ru_tts-for-nvda

Форк
0
536 строк · 18.8 Кб
1
# Copyright (C) 2021 - 2024 Александр Линьков <kvark128@yandex.ru>
2
# This file is covered by the GNU General Public License.
3
# See the file COPYING.txt for more details.
4

5
import os.path
6
import threading
7
import queue
8
import re
9
import unicodedata
10
from collections import OrderedDict
11
from ctypes import *
12

13
import config
14
import addonHandler
15
import globalVars
16
import nvwave
17
from configobj import ConfigObj
18
from configobj.validate import Validator
19
from speech.commands import IndexCommand, PitchCommand, SpeechCommand
20
from synthDriverHandler import SynthDriver, VoiceInfo, synthIndexReached, synthDoneSpeaking
21
from autoSettingsUtils.driverSetting import NumericDriverSetting, BooleanDriverSetting
22
from logHandler import log
23

24
addonHandler.initTranslation()
25

26
MODULE_DIR = os.path.dirname(__file__)
27
RU_TTS_LIB_PATH = os.path.join(MODULE_DIR, "ru_tts.dll")
28
RULEX_LIB_PATH = os.path.join(MODULE_DIR, "rulex.dll")
29
RULEX_DB_PATH = os.path.join(MODULE_DIR, "rulex.db")
30
CONFIG_FILE_PATH = os.path.join(globalVars.appArgs.configPath, "ru_tts.ini")
31
CONFIG_SPEC_PATH = os.path.join(MODULE_DIR, "config.spec")
32
RU_TTS_CALLBACK = CFUNCTYPE(c_int, c_void_p, c_size_t, c_void_p)
33
BRAILLE_DOT_LABELS = ("первая", "вторая", "третья", "четвёртая", "пятая", "шестая", "седьмая", "восьмая")
34

35
SINGLE_CHARACTER_TRANSLATION_DICT = {
36
	# Подавляем произношение круглых скобок, заменяя их на пробелы
37
	ord('('): ' ',
38
	ord(')'): ' ',
39
	# Добавляем поддержку знака ударения
40
	ord('\u0301'): '+',
41
}
42

43
# Регулярные выражения для коррекции произношения
44
RE_WORDS = re.compile("[а-яё\u0301]+", re.I)
45
RE_ABBREVIATIONS = re.compile(r"(?<![а-яёa-z])[bcdfghjklmnpqrstvwxzбвгджзклмнпрстфхцчшщ]{2,}(?![а-яёa-z])", re.I)
46
RE_LETTER_AFTER_NUMBER = re.compile(r"\d[а-яёa-z]", re.I)
47
RE_SINGLE_LATIN = re.compile(r"(?<![а-яёa-z])[a-z](?![а-яёa-z])", re.I)
48
RE_BRAILLE_PATTERNS = re.compile(r"[\u2800-\u28ff]")
49

50
# Диапазоны допустимых значений для скорости, высоты и интонации речи
51
RATE_MIN = 20
52
RATE_MAX = 250 # движок поддерживает максимальное значение 500, но при значении более 250 наблюдается искажение звука
53
PITCH_MIN = 50
54
PITCH_MAX = 300
55
INTONATION_MIN = 0
56
INTONATION_MAX = 140
57

58
@POINTER
59
class TTS(Structure): pass
60

61
@POINTER
62
class RULEXDB(Structure): pass
63

64
# Ограничения на размер некоторых значений при работе с базой данных rulex
65
RULEXDB_MAX_KEY_SIZE = 50
66
RULEXDB_MAX_RECORD_SIZE = 200
67
RULEXDB_BUFSIZE = 256
68

69
# Режимы доступа к базе данных rulex
70
RULEXDB_SEARCH = 0
71
RULEXDB_UPDATE = 1
72
RULEXDB_CREATE = 2
73

74
# Коды возврата при работе с базой данных rulex
75
RULEXDB_SUCCESS = 0
76
RULEXDB_SPECIAL = 1
77
RULEXDB_FAILURE = -1
78
RULEXDB_EMALLOC = -2
79
RULEXDB_EINVKEY = -3
80
RULEXDB_EINVREC = -4
81
RULEXDB_EPARM = -5
82
RULEXDB_EACCESS = -6
83

84
# Управляющие флаги для поля flags в структуре RU_TTS_CONF_T
85
DEC_SEP_POINT = 1 # Использовать точку в качестве десятичного разделителя
86
DEC_SEP_COMMA = 2 # Использовать запятую в качестве десятичного разделителя
87
USE_ALTERNATIVE_VOICE = 4 # Использовать женский голос
88

89
class RU_TTS_CONF_T(Structure):
90
	_fields_ = [
91
		("speech_rate", c_int),
92
		("voice_pitch", c_int),
93
		("intonation", c_int),
94
		("general_gap_factor", c_int),
95
		("comma_gap_factor", c_int),
96
		("dot_gap_factor", c_int),
97
		("semicolon_gap_factor", c_int),
98
		("colon_gap_factor", c_int),
99
		("question_gap_factor", c_int),
100
		("exclamation_gap_factor", c_int),
101
		("intonational_gap_factor", c_int),
102
		("flags", c_int),
103
	]
104

105
# Параметры синтезатора настраиваемые через графический интерфейс
106
SPEECH_RATE_PARAM = "speech_rate"
107
VOICE_PITCH_PARAM = "voice_pitch"
108
INTONATION_PARAM = "intonation"
109
GENERAL_GAP_FACTOR_PARAM = "general_gap_factor"
110
FLAGS_PARAM = "flags"
111

112
# Возвращаемые значения для функции обратного вызова, обрабатывающей аудиоданные
113
CALLBACK_CONTINUE_SYNTHESIS = 0
114
CALLBACK_ABORT_SYNTHESIS = 1
115

116
class AudioCallback(object):
117

118
	def __init__(self, silence_flag, player):
119
		self.__silence_flag = silence_flag
120
		self.__player = player
121

122
	def __call__(self, buffer, size, user_data):
123
		if self.__silence_flag.is_set():
124
			return CALLBACK_ABORT_SYNTHESIS
125
		try:
126
			if size > 0:
127
				data = string_at(buffer, size*sizeof(c_short))
128
				self.__player.feed(data)
129
			if self.__silence_flag.is_set():
130
				return CALLBACK_ABORT_SYNTHESIS
131
			return CALLBACK_CONTINUE_SYNTHESIS
132
		except Exception:
133
			log.error("ru_tts AudioCallback", exc_info=True)
134
			return CALLBACK_ABORT_SYNTHESIS
135

136
class RulexDict(object):
137

138
	def __init__(self, db_path):
139
		# Загрузка rulex.dll. Драйвера базы данных для словаря произношений
140
		self.__rulexdb = CDLL(RULEX_LIB_PATH)
141
		self.__rulexdb.rulexdb_open.argtypes = (c_char_p, c_int)
142
		self.__rulexdb.rulexdb_open.restype = RULEXDB
143
		self.__rulexdb.rulexdb_search.argtypes = (RULEXDB, c_char_p, c_char_p, c_int)
144
		self.__rulexdb.rulexdb_search.restype = c_int
145
		self.__rulexdb.rulexdb_close.argtypes = (RULEXDB,)
146

147
		# Открытие базы данных со словарём произношений и создание буфера в который мы будем получать результаты поиска по этой базе
148
		# При открытии базы данных драйверу передаётся указатель на строку с путём к файлу. Эта строка используется позже, поэтому нам необходимо защитить ее от сборки мусора
149
		self.__searchBuf = create_string_buffer(RULEXDB_BUFSIZE)
150
		self.__db_path = db_path.encode("mbcs")
151
		self.__db = self.__rulexdb.rulexdb_open(self.__db_path, RULEXDB_SEARCH)
152
		if not self.__db:
153
			raise RuntimeError("rulex: failed to open the dictionary database")
154

155
	def search(self, match):
156
		word = match.group()
157
		key = word.lower().encode("koi8-r", "replace")
158
		if len(key) <= RULEXDB_MAX_KEY_SIZE:
159
			if self.__rulexdb.rulexdb_search(self.__db, key, self.__searchBuf, 0) == RULEXDB_SUCCESS:
160
				return self.__searchBuf.value.decode("koi8-r")
161
		return word
162

163
	def close(self):
164
		if self.__db:
165
			self.__rulexdb.rulexdb_close(self.__db)
166
		try:
167
			windll.kernel32.FreeLibrary(self.__rulexdb._handle)
168
		except Exception:
169
			log.error("rulex: can not unload dll")
170
		finally:
171
			self.__rulexdb = None
172

173
class SpeakText(object):
174

175
	def __init__(self, text, lib, tts, tts_config, silence_flag, index, onIndexReached):
176
		self.__text = text
177
		self.__lib = lib
178
		self.__tts = tts
179
		self.__config = tts_config
180
		self.__silence_flag = silence_flag
181
		self.__index = index
182
		self.__onIndexReached = onIndexReached
183

184
	def __call__(self):
185
		if self.__silence_flag.is_set():
186
			return
187
		text = self.__text.encode("koi8-r", "replace")
188
		if text:
189
			self.__lib.tts_speak(self.__tts, byref(self.__config), text)
190
		if self.__index is None or self.__silence_flag.is_set():
191
			return
192
		self.__onIndexReached(self.__index)
193

194
class DoneSpeaking(object):
195

196
	def __init__(self, player, onIndexReached):
197
		self.__player = player
198
		self.__onIndexReached = onIndexReached
199

200
	def __call__(self):
201
		self.__player.idle()
202
		self.__onIndexReached(None)
203

204
class SetParameter(object):
205

206
	def __init__(self, conf, param, value):
207
		self.__config = conf
208
		self.param = param
209
		self.value = value
210

211
	def __call__(self):
212
		setattr(self.__config, self.param, self.value)
213

214
class TaskThread(threading.Thread):
215

216
	def __init__(self, task_queue):
217
		super().__init__()
218
		self.__queue = task_queue
219
		self.daemon = True
220

221
	def run(self):
222
		while True:
223
			try:
224
				task = self.__queue.get()
225
				if task is None:
226
					break
227
				task()
228
			except Exception:
229
				log.error("ru_tts: error while processing a task", exc_info=True)
230

231
class SynthDriver(SynthDriver):
232
	name = "ru_tts"
233
	description = "ru_tts"
234

235
	supportedSettings = [
236
		SynthDriver.VoiceSetting(),
237
		SynthDriver.RateSetting(),
238
		SynthDriver.RateBoostSetting(),
239
		SynthDriver.PitchSetting(),
240
		SynthDriver.VolumeSetting(),
241
		SynthDriver.InflectionSetting(),
242
		NumericDriverSetting("gapFactor", _("Pause between phrases"), availableInSettingsRing=True),
243
	]
244

245
	supportedCommands = {IndexCommand, PitchCommand}
246
	supportedNotifications = {synthIndexReached, synthDoneSpeaking}
247

248
	def __init__(self):
249
		# Первым делом загружаем основной движок синтезатора
250
		self.__ru_tts_lib = CDLL(RU_TTS_LIB_PATH)
251
		self.__ru_tts_lib.tts_create.argtypes = (RU_TTS_CALLBACK,)
252
		self.__ru_tts_lib.tts_create.restype = TTS
253
		self.__ru_tts_lib.tts_destroy.argtypes = (TTS,)
254
		self.__ru_tts_lib.tts_speak.argtypes = (TTS, POINTER(RU_TTS_CONF_T), c_char_p)
255
		self.__ru_tts_lib.tts_setVolume.argtypes = (TTS, c_float)
256
		self.__ru_tts_lib.tts_setSpeed.argtypes = (TTS, c_float)
257
		self.__ru_tts_lib.ru_tts_config_init.argtypes = (POINTER(RU_TTS_CONF_T),)
258

259
		self.__config = RU_TTS_CONF_T()
260
		self.__ru_tts_lib.ru_tts_config_init(byref(self.__config))
261
		self.__user_config = self._getUserConfiguration()
262

263
		params = self.__user_config["Parameters"]
264
		self.__config.comma_gap_factor = params["comma_gap_factor"]
265
		self.__config.dot_gap_factor = params["dot_gap_factor"]
266
		self.__config.semicolon_gap_factor = params["semicolon_gap_factor"]
267
		self.__config.colon_gap_factor = params["colon_gap_factor"]
268
		self.__config.question_gap_factor = params["question_gap_factor"]
269
		self.__config.exclamation_gap_factor = params["exclamation_gap_factor"]
270
		self.__config.intonational_gap_factor = params["intonational_gap_factor"]
271
		self.__config.flags = 0
272

273
		if params["dec_sep_point"]:
274
			self.__config.flags |= DEC_SEP_POINT
275

276
		if params["dec_sep_comma"]:
277
			self.__config.flags |= DEC_SEP_COMMA
278

279
		self.__normalizationForm = None
280
		if params["use_unicode_normalization"]:
281
			validForms = ("NFC", "NFKC", "NFD", "NFKD")
282
			form = params["unicode_normalization_form"]
283
			if form in validForms:
284
				self.__normalizationForm = form
285

286
		try:
287
			self.__rulex_dict = RulexDict(RULEX_DB_PATH)
288
		except Exception:
289
			self.__rulex_dict = None
290
			log.warning("rulex not available", exc_info=True)
291
		else:
292
			self.__rulexSetting = BooleanDriverSetting("useRulex", _("Use RuLex pronunciation dictionary"), availableInSettingsRing=True)
293
			self.supportedSettings.append(self.__rulexSetting)
294

295
		self.__silence_flag = threading.Event()
296
		self.__player = nvwave.WavePlayer(channels=1, samplesPerSec=params["samples_per_sec"], bitsPerSample=16, outputDevice=config.conf["speech"]["outputDevice"])
297
		self.__audio_callback = AudioCallback(self.__silence_flag, self.__player)
298

299
		self.__c_audio_callback = RU_TTS_CALLBACK(self.__audio_callback)
300
		self.__tts = self.__ru_tts_lib.tts_create(self.__c_audio_callback)
301
		if not self.__tts:
302
			raise RuntimeError("ru_tts: failed to create a TTS instance")
303

304
		self.__speechFlags = self.__config.flags
305
		self.__rate = self._paramToPercent(self.__config.speech_rate, RATE_MIN, RATE_MAX)
306
		self.__rateBoost = False
307
		self.__pitch = self._paramToPercent(self.__config.voice_pitch, PITCH_MIN, PITCH_MAX)
308
		self.__volume = 50
309
		self.__ru_tts_lib.tts_setVolume(self.__tts, self.__volume/100)
310
		self.__inflection = self._paramToPercent(self.__config.intonation, INTONATION_MIN, INTONATION_MAX)
311
		self.__gap_factor_max = self._maxGapRange(self.__config.speech_rate)
312
		self.__gapFactor = self._paramToPercent(self.__config.general_gap_factor, 0, self.__gap_factor_max)
313
		self.__useRulex = False
314

315
		self.__task_queue = queue.Queue()
316
		self.__task_thread = TaskThread(self.__task_queue)
317
		self.__task_thread.start()
318

319
	@classmethod
320
	def check(cls):
321
		return True
322

323
	def terminate(self):
324
		self.cancel()
325
		self.__task_queue.put(None)
326
		self.__task_thread.join()
327
		self.__player.close()
328
		if self.__rulex_dict is not None:
329
			self.supportedSettings.remove(self.__rulexSetting)
330
			self.__rulex_dict.close()
331
			self.__rulex_dict = None
332
		self.__config = None
333
		self.__ru_tts_lib.tts_destroy(self.__tts)
334
		self.__tts = None
335
		# Предотвращаем образование циклических ссылок
336
		self.__audio_callback = None
337
		self.__c_audio_callback = None
338
		# Пробуем выгрузить основной движок синтезатора
339
		try:
340
			windll.kernel32.FreeLibrary(self.__ru_tts_lib._handle)
341
		except Exception:
342
			log.error("ru_tts: can not unload dll")
343
		finally:
344
			self.__ru_tts_lib = None
345

346
	def _getUserConfiguration(self):
347
		with open(CONFIG_SPEC_PATH, encoding="utf-8") as spec:
348
			conf = ConfigObj(infile=CONFIG_FILE_PATH, configspec=spec, encoding="utf-8", default_encoding="utf-8")
349
		val = Validator()
350
		conf.validate(val, copy=True)
351
		if not globalVars.appArgs.secure:
352
			try:
353
				conf.write()
354
			except OSError:
355
				log.error("ru_tts: failed to write config file", exc_info=True)
356
		return conf
357

358
	def _setParameter(self, param, value):
359
		task = SetParameter(self.__config, param, value)
360
		self.__task_queue.put(task)
361

362
	def speak(self, speechSequence):
363
		textList = []
364
		pitchChanged = False
365
		for item in speechSequence:
366
			if isinstance(item, str):
367
				textList.append(item)
368
			elif isinstance(item, IndexCommand):
369
				self.do_speak(textList, item.index)
370
				textList = []
371
			elif isinstance(item, PitchCommand):
372
				self.do_speak(textList)
373
				textList = []
374
				pitch = self._percentToParam(item.newValue, PITCH_MIN, PITCH_MAX)
375
				self._setParameter(VOICE_PITCH_PARAM, pitch)
376
				pitchChanged = True
377
			elif isinstance(item, SpeechCommand):
378
				log.debugWarning(f"Unsupported speech command: {item}")
379
			else:
380
				log.error(f"Unknown speech: {item}")
381
		self.do_speak(textList)
382
		if pitchChanged:
383
			pitch = self._percentToParam(self.__pitch, PITCH_MIN, PITCH_MAX)
384
			self._setParameter(VOICE_PITCH_PARAM, pitch)
385
		self.__task_queue.put(DoneSpeaking(self.__player, self._onIndexReached))
386

387
	def do_speak(self, textList, index=None):
388
		text = "".join(textList).strip()
389
		if self.__normalizationForm is not None:
390
			text = unicodedata.normalize(self.__normalizationForm, text)
391
		if len(text) == 1:
392
			text = self.__user_config["SingleCharacters"].get(text.lower(), text)
393
		else:
394
			text = RE_SINGLE_LATIN.sub(self._singleLatinSearch, text)
395
			text = RE_ABBREVIATIONS.sub(self._abbreviationSearch, text)
396
			text = RE_LETTER_AFTER_NUMBER.sub(self._letterAfterNumberSearch, text)
397
			text = "".join([self.__user_config["Characters"].get(ch.lower(), ch) for ch in text])
398
		if self.__useRulex and (self.__rulex_dict is not None):
399
			text = RE_WORDS.sub(self.__rulex_dict.search, text)
400
		text = text.translate(SINGLE_CHARACTER_TRANSLATION_DICT)
401
		text = RE_BRAILLE_PATTERNS.sub(self._brailleDotsSearch, text)
402
		task = SpeakText(text, self.__ru_tts_lib, self.__tts, self.__config, self.__silence_flag, index, self._onIndexReached)
403
		self.__task_queue.put(task)
404

405
	def pause(self, switch):
406
		self.__player.pause(switch)
407

408
	def cancel(self):
409
		tasks = []
410
		try:
411
			while True:
412
				task = self.__task_queue.get_nowait()
413
				if not isinstance(task, SpeakText):
414
					tasks.append(task)
415
		except queue.Empty:
416
			pass
417
		for task in tasks:
418
			self.__task_queue.put(task)
419
		self.__silence_flag.set()
420
		self.__task_queue.put(self.__silence_flag.clear)
421
		self.__player.stop()
422

423
	def _singleLatinSearch(self, match):
424
		ch = match.group().lower()
425
		return self.__user_config["SingleCharacters"].get(ch, ch)
426

427
	def _abbreviationSearch(self, match):
428
		word = match.group().lower()
429
		return " ".join([self.__user_config["SingleCharacters"].get(ch, ch) for ch in word])
430

431
	def _letterAfterNumberSearch(self, match):
432
		return " ".join(match.group())
433

434
	def _brailleDotsSearch(self, match):
435
		ch = match.group()
436
		dotLabels = []
437
		for offset, label in enumerate(BRAILLE_DOT_LABELS):
438
			if ord(ch) >> offset & 1:
439
				dotLabels.append(label)
440
		if len(dotLabels) == 0:
441
			return " брайлевский пробел "
442
		elif len(dotLabels) == 8:
443
			return " брайлевское восьмиточие "
444
		else:
445
			dotLabels.append("брайлевские точки" if len(dotLabels) > 1 else "брайлевская точка")
446
			return f" {' '.join(dotLabels)} "
447

448
	def _onIndexReached(self, index):
449
		if index is not None:
450
			synthIndexReached.notify(synth=self, index=index)
451
		else:
452
			synthDoneSpeaking.notify(synth=self)
453

454
	def _maxGapRange(self, rate):
455
		return 125 * rate // RATE_MIN
456

457
	def _get_language(self):
458
		return "ru"
459

460
	def _get_rate(self):
461
		return self.__rate
462

463
	def _set_rate(self, value):
464
		self.__rate = value
465
		rate = self._percentToParam(self.__rate, RATE_MIN, RATE_MAX)
466
		self._setParameter(SPEECH_RATE_PARAM, rate)
467
		# Коэффициент паузы зависит от скорости речи. Необходимо вычислить его заново
468
		self.__gap_factor_max = self._maxGapRange(rate)
469
		gap_factor = self._percentToParam(self.__gapFactor, 0, self.__gap_factor_max)
470
		self._setParameter(GENERAL_GAP_FACTOR_PARAM, gap_factor)
471

472
	def _get_pitch(self):
473
		return self.__pitch
474

475
	def _set_pitch(self, value):
476
		self.__pitch = value
477
		pitch = self._percentToParam(self.__pitch, PITCH_MIN, PITCH_MAX)
478
		self._setParameter(VOICE_PITCH_PARAM, pitch)
479

480
	def _get_volume(self):
481
		return self.__volume
482

483
	def _set_volume(self, volume):
484
		self.__volume = volume
485
		task = lambda: self.__ru_tts_lib.tts_setVolume(self.__tts, volume/100)
486
		self.__task_queue.put(task)
487

488
	def _getAvailableVoices(self):
489
		voices = OrderedDict()
490
		for id, displayName in enumerate((_("Male"), _("Female"))):
491
			id = str(id)
492
			voices[id] = VoiceInfo(id, displayName, "ru")
493
		return voices
494

495
	def _get_voice(self):
496
		return str((self.__speechFlags & USE_ALTERNATIVE_VOICE) >> 2)
497

498
	def _set_voice(self, voice):
499
		if voice in self.availableVoices:
500
			if (int(voice) << 2) == USE_ALTERNATIVE_VOICE:
501
				self.__speechFlags |= USE_ALTERNATIVE_VOICE
502
			else:
503
				self.__speechFlags &= ~USE_ALTERNATIVE_VOICE
504
			self._setParameter(FLAGS_PARAM, self.__speechFlags)
505

506
	def _get_rateBoost(self):
507
		return self.__rateBoost
508

509
	def _set_rateBoost(self, enable):
510
		if enable != self.__rateBoost:
511
			self.__rateBoost = enable
512
			speed = 2.0 if self.__rateBoost else 1.0
513
			task = lambda: self.__ru_tts_lib.tts_setSpeed(self.__tts, speed)
514
			self.__task_queue.put(task)
515

516
	def _get_gapFactor(self):
517
		return self.__gapFactor
518

519
	def _set_gapFactor(self, value):
520
		self.__gapFactor = value
521
		gap_factor = self._percentToParam(self.__gapFactor, 0, self.__gap_factor_max)
522
		self._setParameter(GENERAL_GAP_FACTOR_PARAM, gap_factor)
523

524
	def _get_inflection(self):
525
		return self.__inflection
526

527
	def _set_inflection(self, value):
528
		self.__inflection = value
529
		intonation = self._percentToParam(self.__inflection, INTONATION_MIN, INTONATION_MAX)
530
		self._setParameter(INTONATION_PARAM, intonation)
531

532
	def _get_useRulex(self):
533
		return self.__useRulex
534

535
	def _set_useRulex(self, value):
536
		self.__useRulex = value
537

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.