TTS-with-RVC
/
inference.py
136 строк · 4.6 Кб
1from rvc_infer import rvc_convert
2
3import os
4import edge_tts as tts
5from edge_tts import VoicesManager
6import asyncio, concurrent.futures
7import gradio as gr
8from rvc_infer import rvc_convert
9import config
10import hashlib
11from datetime import datetime
12
13
14class TTS_RVC:
15def __init__(self, rvc_path, input_directory, model_path, voice="ru-RU-DmitryNeural"):
16if not os.path.exists('input'):
17os.mkdir('input')
18if not os.path.exists('output'):
19os.mkdir('output')
20
21self.pool = concurrent.futures.ThreadPoolExecutor()
22self.current_voice = voice
23self.input_directory = input_directory
24self.can_speak = True
25self.current_model = model_path
26self.rvc_path = rvc_path
27
28def set_voice(self, voice):
29self.current_voice = voice
30
31def get_voices(self):
32return get_voices()
33
34def __call__(self,
35text,
36pitch=0,
37tts_rate=0,
38tts_volume=0,
39tts_pitch=0):
40path = (self.pool.submit
41(asyncio.run, speech(model_path=self.current_model,
42rvc_path=self.rvc_path,
43input_directory=self.input_directory,
44text=text,
45pitch=pitch,
46voice=self.current_voice,
47tts_add_rate=tts_rate,
48tts_add_volume=tts_volume,
49tts_add_pitch=tts_pitch)).result())
50return path
51
52def process_args(self, text):
53rate_param, text = process_text(text, param="--tts-rate")
54volume_param, text = process_text(text, param="--tts-volume")
55tts_pitch_param, text = process_text(text, param="--tts-pitch")
56rvc_pitch_param, text = process_text(text, param="--rvc-pitch")
57return [rate_param, volume_param, tts_pitch_param, rvc_pitch_param], text
58
59
60def date_to_short_hash():
61current_date = datetime.now()
62date_str = current_date.strftime("%Y-%m-%d %H:%M:%S")
63sha256_hash = hashlib.sha256(date_str.encode()).hexdigest()
64short_hash = sha256_hash[:8]
65return short_hash
66
67
68async def get_voices():
69voicesobj = await VoicesManager.create()
70return [data["ShortName"] for data in voicesobj.voices]
71
72can_speak = True
73
74async def speech(model_path,
75input_directory,
76rvc_path,
77text,
78pitch=0,
79voice="ru-RU-DmitryNeural",
80tts_add_rate=0,
81tts_add_volume=0,
82tts_add_pitch=0):
83global can_speak
84communicate = tts.Communicate(text=text,
85voice=voice,
86rate=f'{"+" if tts_add_rate >= 0 else ""}{tts_add_rate}%',
87volume=f'{"+" if tts_add_volume >= 0 else ""}{tts_add_volume}%',
88pitch=f'{"+" if tts_add_pitch >= 0 else ""}{tts_add_pitch}Hz')
89file_name = date_to_short_hash()
90input_path = os.path.join(input_directory, file_name)
91while not can_speak:
92await asyncio.sleep(1)
93can_speak = False
94await communicate.save(input_path)
95
96output_path = rvc_convert(model_path=model_path,
97input_path=input_path,
98rvc_path=rvc_path,
99f0_up_key=pitch)
100name = date_to_short_hash()
101os.rename("output\\out.wav", "output\\" + name + ".wav")
102os.remove("input\\" + file_name)
103output_path = "output\\" + name + ".wav"
104
105can_speak = True
106return os.path.abspath(output_path)
107
108
109def process_text(input_text, param, default_value=0):
110try:
111words = input_text.split()
112
113value = default_value
114
115i = 0
116while i < len(words):
117if words[i] == param:
118if i + 1 < len(words):
119next_word = words[i + 1]
120if next_word.isdigit() or (next_word[0] == '-' and next_word[1:].isdigit()):
121value = int(next_word)
122words.pop(i)
123words.pop(i)
124else:
125raise ValueError(f"Invalid type of argument in \"{param}\"")
126else:
127raise ValueError(f"There is no value for parameter \"{param}\"")
128i += 1
129
130final_string = ' '.join(words)
131
132return value, final_string
133
134except Exception as e:
135print(f"Ошибка: {e}")
136return 0, input_text