TTS-with-RVC

inference.py
136 строк · 4.6 Кб
Перенос по словам
1
from rvc_infer import rvc_convert
2

3
import os
4
import edge_tts as tts
5
from edge_tts import VoicesManager
6
import asyncio, concurrent.futures
7
import gradio as gr
8
from rvc_infer import rvc_convert
9
import config
10
import hashlib
11
from datetime import datetime
12

13

14
class TTS_RVC:
15
    def __init__(self, rvc_path, input_directory, model_path, voice="ru-RU-DmitryNeural"):
16
        if not os.path.exists('input'):
17
            os.mkdir('input')
18
        if not os.path.exists('output'):
19
            os.mkdir('output')
20

21
        self.pool = concurrent.futures.ThreadPoolExecutor()
22
        self.current_voice = voice
23
        self.input_directory = input_directory
24
        self.can_speak = True
25
        self.current_model = model_path
26
        self.rvc_path = rvc_path
27

28
    def set_voice(self, voice):
29
        self.current_voice = voice
30

31
    def get_voices(self):
32
        return get_voices()
33

34
    def __call__(self,
35
                 text,
36
                 pitch=0,
37
                 tts_rate=0,
38
                 tts_volume=0,
39
                 tts_pitch=0):
40
        path = (self.pool.submit
41
                (asyncio.run, speech(model_path=self.current_model,
42
                                     rvc_path=self.rvc_path,
43
                                     input_directory=self.input_directory,
44
                                     text=text,
45
                                     pitch=pitch,
46
                                     voice=self.current_voice,
47
                                     tts_add_rate=tts_rate,
48
                                     tts_add_volume=tts_volume,
49
                                     tts_add_pitch=tts_pitch)).result())
50
        return path
51

52
    def process_args(self, text):
53
        rate_param, text = process_text(text, param="--tts-rate")
54
        volume_param, text = process_text(text, param="--tts-volume")
55
        tts_pitch_param, text = process_text(text, param="--tts-pitch")
56
        rvc_pitch_param, text = process_text(text, param="--rvc-pitch")
57
        return [rate_param, volume_param, tts_pitch_param, rvc_pitch_param], text
58

59

60
def date_to_short_hash():
61
    current_date = datetime.now()
62
    date_str = current_date.strftime("%Y-%m-%d %H:%M:%S")
63
    sha256_hash = hashlib.sha256(date_str.encode()).hexdigest()
64
    short_hash = sha256_hash[:8]
65
    return short_hash
66

67

68
async def get_voices():
69
    voicesobj = await VoicesManager.create()
70
    return [data["ShortName"] for data in voicesobj.voices]
71

72
can_speak = True
73

74
async def speech(model_path,
75
                 input_directory,
76
                 rvc_path,
77
                 text,
78
                 pitch=0,
79
                 voice="ru-RU-DmitryNeural",
80
                 tts_add_rate=0,
81
                 tts_add_volume=0,
82
                 tts_add_pitch=0):
83
    global can_speak
84
    communicate = tts.Communicate(text=text,
85
                                  voice=voice,
86
                                  rate=f'{"+" if tts_add_rate >= 0 else ""}{tts_add_rate}%',
87
                                  volume=f'{"+" if tts_add_volume >= 0 else ""}{tts_add_volume}%',
88
                                  pitch=f'{"+" if tts_add_pitch >= 0 else ""}{tts_add_pitch}Hz')
89
    file_name = date_to_short_hash()
90
    input_path = os.path.join(input_directory, file_name)
91
    while not can_speak:
92
        await asyncio.sleep(1)
93
    can_speak = False
94
    await communicate.save(input_path)
95

96
    output_path = rvc_convert(model_path=model_path,
97
                              input_path=input_path,
98
                              rvc_path=rvc_path,
99
                              f0_up_key=pitch)
100
    name = date_to_short_hash()
101
    os.rename("output\\out.wav", "output\\" + name + ".wav")
102
    os.remove("input\\" + file_name)
103
    output_path = "output\\" + name + ".wav"
104

105
    can_speak = True
106
    return os.path.abspath(output_path)
107

108

109
def process_text(input_text, param, default_value=0):
110
    try:
111
        words = input_text.split()
112

113
        value = default_value
114

115
        i = 0
116
        while i < len(words):
117
            if words[i] == param:
118
                if i + 1 < len(words):
119
                    next_word = words[i + 1]
120
                    if next_word.isdigit() or (next_word[0] == '-' and next_word[1:].isdigit()):
121
                        value = int(next_word)
122
                        words.pop(i)
123
                        words.pop(i)
124
                    else:
125
                        raise ValueError(f"Invalid type of argument in \"{param}\"")
126
                else:
127
                    raise ValueError(f"There is no value for parameter \"{param}\"")
128
            i += 1
129

130
        final_string = ' '.join(words)
131

132
        return value, final_string
133

134
    except Exception as e:
135
        print(f"Ошибка: {e}")
136
        return 0, input_text
TTS-with-RVC

Использование cookies