TTS-with-RVC-dev

inference.py
184 строки · 6.6 Кб
Перенос по словам
1
import os
2
import edge_tts as tts
3
from edge_tts import VoicesManager
4
import asyncio, concurrent.futures
5
import gradio as gr
6
from rvc_tts_pipeline.rvc_infer import rvc_convert
7
import hashlib
8
from datetime import datetime
9

10

11
class TTS_RVC:
12
    def __init__(self, rvc_path, input_directory, model_path, voice="ru-RU-DmitryNeural", output_directory=None):
13
        self.pool = concurrent.futures.ThreadPoolExecutor()
14
        self.current_voice = voice
15
        self.input_directory = input_directory
16
        self.can_speak = True
17
        self.current_model = model_path
18
        self.rvc_path = rvc_path
19
        self.output_directory = output_directory 
20

21
    def set_voice(self, voice):
22
        self.current_voice = voice
23

24
    #def get_voices(self):
25
    #    loop = asyncio.new_event_loop()
26
    #    voices = loop.run_until_complete(get_voices())
27
    #    loop.close()
28
    #    return voices
29

30
    def set_output_directory(directory_path):
31
        self.output_directory = directory_path
32
    
33
    def __call__(self,
34
                 text,
35
                 pitch=0,
36
                 tts_rate=0,
37
                 tts_volume=0,
38
                 tts_pitch=0,
39
                 output_filename=None):
40
        path = (self.pool.submit
41
                (asyncio.run, speech(model_path=self.current_model,
42
                                     rvc_path=self.rvc_path,
43
                                     input_directory=self.input_directory,
44
                                     text=text,
45
                                     pitch=pitch,
46
                                     voice=self.current_voice,
47
                                     tts_add_rate=tts_rate,
48
                                     tts_add_volume=tts_volume,
49
                                     tts_add_pitch=tts_pitch,
50
                                     output_directory=self.output_directory,
51
                                     filename=output_filename)).result())
52
        return path
53

54
    def speech(self, input_path, pitch=0, output_directory=None, filename=None):
55
        global can_speak
56
        if not can_speak:
57
            print("Can't speak now")
58
            return
59
        output_path = rvc_convert(model_path=self.current_model,
60
                                  input_path=input_path,
61
                                  rvc_path=self.rvc_path,
62
                                  f0_up_key=pitch,
63
                                  output_filename=filename,
64
                                  output_dir_path=output_directory)
65
        name = date_to_short_hash()
66
        if filename is None:
67
            if output_directory is None:
68
                output_directory = "temp"
69
            
70
            new_path = os.path.join(output_directory, name + ".wav")
71
            os.rename(output_path, new_path)
72
            output_path = new_path
73

74
        return os.path.abspath(output_path)
75

76
    def process_args(self, text):
77
        rate_param, text = process_text(text, param="--tts-rate")
78
        volume_param, text = process_text(text, param="--tts-volume")
79
        tts_pitch_param, text = process_text(text, param="--tts-pitch")
80
        rvc_pitch_param, text = process_text(text, param="--rvc-pitch")
81
        return [rate_param, volume_param, tts_pitch_param, rvc_pitch_param], text
82

83

84
def date_to_short_hash():
85
    current_date = datetime.now()
86
    date_str = current_date.strftime("%Y-%m-%d %H:%M:%S")
87
    sha256_hash = hashlib.sha256(date_str.encode()).hexdigest()
88
    short_hash = sha256_hash[:8]
89
    return short_hash
90

91

92
async def get_voices():
93
    voicesobj = await VoicesManager.create()
94
    return [data["ShortName"] for data in voicesobj.voices]
95

96
can_speak = True
97

98
async def tts_comminicate(input_directory,
99
                 text,
100
                 voice="ru-RU-DmitryNeural",
101
                 tts_add_rate=0,
102
                 tts_add_volume=0,
103
                 tts_add_pitch=0):
104
    communicate = tts.Communicate(text=text,
105
                                  voice=voice,
106
                                  rate=f'{"+" if tts_add_rate >= 0 else ""}{tts_add_rate}%',
107
                                  volume=f'{"+" if tts_add_volume >= 0 else ""}{tts_add_volume}%',
108
                                  pitch=f'{"+" if tts_add_pitch >= 0 else ""}{tts_add_pitch}Hz')
109
    file_name = date_to_short_hash()
110
    input_path = os.path.join(input_directory, file_name)
111
    await communicate.save(input_path)
112
    return input_path, file_name
113

114
async def speech(model_path,
115
                 input_directory,
116
                 rvc_path,
117
                 text,
118
                 pitch=0,
119
                 voice="ru-RU-DmitryNeural",
120
                 tts_add_rate=0,
121
                 tts_add_volume=0,
122
                 tts_add_pitch=0,
123
                 filename=None,
124
                 output_directory=None):
125
    global can_speak
126

127
    input_path, file_name = await tts_comminicate(input_directory=input_directory,
128
              text=text,
129
              voice=voice,
130
              tts_add_rate=tts_add_rate,
131
              tts_add_volume=tts_add_volume,
132
              tts_add_pitch=tts_add_pitch)
133

134
    while not can_speak:
135
        await asyncio.sleep(1)
136
    can_speak = False
137

138
    output_path = rvc_convert(model_path=model_path,
139
                              input_path=input_path,
140
                              rvc_path=rvc_path,
141
                              f0_up_key=pitch,
142
                              output_filename=filename,
143
                              output_dir_path=output_directory)
144
    name = date_to_short_hash()
145
    if filename is None:
146
        if output_directory is None:
147
                output_directory = "temp"
148
        new_path = os.path.join(output_directory, name + ".wav")
149
        os.rename(output_path, new_path)
150
        output_path = new_path
151

152
    os.remove(input_path)
153
    can_speak = True
154
    return os.path.abspath(output_path)
155

156

157
def process_text(input_text, param, default_value=0):
158
    try:
159
        words = input_text.split()
160

161
        value = default_value
162

163
        i = 0
164
        while i < len(words):
165
            if words[i] == param:
166
                if i + 1 < len(words):
167
                    next_word = words[i + 1]
168
                    if next_word.isdigit() or (next_word[0] == '-' and next_word[1:].isdigit()):
169
                        value = int(next_word)
170
                        words.pop(i)
171
                        words.pop(i)
172
                    else:
173
                        raise ValueError(f"Invalid type of argument in \"{param}\"")
174
                else:
175
                    raise ValueError(f"There is no value for parameter \"{param}\"")
176
            i += 1
177

178
        final_string = ' '.join(words)
179

180
        return value, final_string
181

182
    except Exception as e:
183
        print(f"Ошибка: {e}")
184
        return 0, input_text
185
TTS-with-RVC-dev

Использование cookies