ru_tts

Форк
0
/
numerics.c 
355 строк · 11.1 Кб
1
/* numerics.c -- Numbers phonetic transcription
2
 *
3
 * Copyright (C) 1990, 1991 Speech Research Laboratory, Minsk
4
 * Copyright (C) 2005 Igor Poretsky <poretsky@mlbox.ru>
5
 * Copyright (C) 2021 Boris Lobanov <lobbormef@gmail.com>
6
 * Copyright (C) 2021 Alexander Ivanov <ivalex01@gmail.com>
7
 *
8
 * SPDX-License-Identifier: MIT
9
 */
10

11
#include <stdint.h>
12
#include <stdlib.h>
13

14
#include "numerics.h"
15
#include "transcription.h"
16
#include "sink.h"
17
#include "synth.h"
18
#include "ru_tts.h"
19

20

21
/* Local constants */
22
#define NUMBER_FRACTION 1
23
#define NON_ZERO 2
24

25

26
/* Local data */
27

28
/* Predefined transcriptions */
29
static const uint8_t primary[] = /* 0..9 */
30
  {
31
    4, 16, 1, 53, 17,
32
    5, 2, 24, 5, 53, 16,
33
    4, 21, 6, 2, 53,
34
    4, 27, 13, 5, 53,
35
    7, 33, 3, 27, 4, 53, 13, 3,
36
    4, 29, 2, 53, 30,
37
    5, 36, 3, 53, 38, 30,
38
    4, 38, 3, 53, 15,
39
    6, 6, 1, 53, 38, 3, 15,
40
    6, 24, 3, 53, 11, 2, 30
41
  };
42
static const uint8_t secondary[] = /* 10..19 */
43
  {
44
    6, 24, 3, 53, 38, 2, 30,
45
    9, 2, 24, 5, 53, 16, 2, 32, 2, 30,
46
    9, 21, 11, 3, 16, 2, 53, 32, 2, 30,
47
    9, 27, 13, 5, 16, 2, 53, 32, 2, 30,
48
    11, 33, 3, 27, 4, 53, 8, 16, 2, 32, 2, 30,
49
    9, 29, 2, 27, 16, 2, 53, 32, 2, 30,
50
    9, 36, 3, 35, 16, 2, 53, 32, 2, 30,
51
    9, 38, 3, 15, 16, 2, 53, 32, 2, 30,
52
    11, 6, 2, 38, 3, 15, 16, 2, 53, 32, 2, 30,
53
    11, 24, 3, 11, 2, 27, 16, 2, 53, 32, 2, 30
54
  };
55
static const uint8_t tens[] = /* 20..90 */
56
  {
57
    7, 21, 6, 2, 53, 32, 2, 30,
58
    7, 27, 13, 5, 53, 32, 2, 30,
59
    6, 35, 1, 53, 8, 2, 28,
60
    8, 29, 2, 24, 3, 38, 2, 53, 27,
61
    9, 36, 3, 12, 24, 3, 38, 2, 53, 27,
62
    9, 38, 3, 53, 15, 24, 3, 38, 2, 27,
63
    11, 6, 1, 53, 38, 3, 15, 24, 3, 38, 2, 27,
64
    10, 24, 3, 11, 2, 16, 1, 53, 35, 27, 2
65
  };
66
static const uint8_t hundreds[] = /* 100..900 */
67
  {
68
    4, 35, 27, 1, 53,
69
    7, 21, 11, 3, 53, 38, 30, 5,
70
    7, 27, 13, 5, 53, 35, 27, 2,
71
    10, 33, 3, 27, 4, 53, 13, 3, 35, 27, 2,
72
    7, 29, 2, 27, 35, 1, 53, 27,
73
    7, 36, 3, 35, 35, 1, 53, 27,
74
    7, 38, 3, 15, 35, 1, 53, 27,
75
    9, 6, 2, 38, 3, 15, 35, 1, 53, 27,
76
    9, 24, 3, 11, 2, 27, 35, 1, 53, 27
77
  };
78
static const uint8_t periods[] =
79
  {
80
    6, 27, 4, 53, 38, 2, 33,
81
    7, 18, 5, 17, 5, 1, 53, 16,
82
    8, 18, 5, 17, 5, 2, 53, 8, 27,
83
    8, 27, 13, 5, 17, 5, 1, 53, 16
84
  };
85
static const uint8_t fractions[] =
86
  {
87
    6, 24, 3, 38, 2, 53, 27,
88
    4, 35, 1, 53, 27,
89
    7, 27, 4, 53, 38, 2, 33, 16,
90
    13, 24, 3, 38, 2, 30, 5, 27, 4, 53, 38, 2, 33, 16,
91
    10, 35, 27, 1, 27, 4, 53, 38, 2, 33, 16,
92
    8, 18, 5, 17, 5, 1, 53, 16, 16,
93
    14, 24, 3, 38, 2, 30, 5, 18, 5, 17, 5, 1, 53, 16, 16,
94
    11, 35, 27, 1, 18, 5, 17, 5, 1, 53, 16, 16,
95
    9, 18, 5, 17, 5, 2, 53, 8, 27, 16,
96
    15, 24, 3, 38, 2, 30, 5, 18, 5, 17, 5, 2, 53, 8, 27, 16,
97
    12, 35, 27, 1, 18, 5, 17, 5, 2, 53, 8, 27, 16,
98
    9, 27, 13, 5, 17, 5, 1, 53, 16, 16,
99
    15, 24, 3, 38, 2, 30, 5, 27, 13, 5, 17, 5, 1, 53, 16, 16,
100
    12, 35, 27, 1, 27, 13, 5, 17, 5, 1, 53, 16, 16
101
  };
102
static const uint8_t suffixes[] =
103
  {
104
    2, 4, 40,
105
    3, 2, 10, 2,
106
    2, 2, 34
107
  };
108
static const uint8_t one_int[] =
109
  {
110
    2, 21, 16, 2, 53, 43,
111
    32, 3, 53, 14, 2, 10, 2, 43
112
  };
113
static const uint8_t one_o[] = { 2, 21, 16, 1, 53, 43 };
114
static const uint8_t two_e[] = { 21, 11, 3, 53, 43 };
115
static const uint8_t n_ints[] = { 32, 3, 53, 14, 4, 40 };
116

117

118
/* Local subroutines */
119

120
/* Pass specified list item to the consumer */
121
static void put_transcription(sink_t *consumer, const uint8_t *lst, uint8_t n)
122
{
123
  const uint8_t *item = list_item(lst, n);
124
  sink_write(consumer, item + 1, item[0]);
125
}
126

127
/* Output transcription for specified digit */
128
static void transcribe_digit(sink_t *consumer, char digit, char following)
129
{
130
  put_transcription(consumer, primary, digit - '0');
131
  if (following != ' ')
132
    sink_put(consumer, 43);
133
}
134

135
/* Return true if specified character may be treated as decimal point. */
136
static int check_dec_point(sink_t *consumer, char c)
137
{
138
  int flags = ((ttscb_t *) (consumer->user_data))->flags;
139
  return ((flags & DEC_SEP_POINT) && (c == '.')) ||
140
    ((flags & DEC_SEP_COMMA) && (c == ','));
141
}
142

143

144
/* Global functions */
145

146
/* Transcribe numeric string from input and pass result to the consumer */
147
void process_number(input_t *input, sink_t *consumer)
148
{
149
  unsigned char c;
150
  uint8_t flags = 0;
151

152
  for (c = input->start[0]; IS_DIGIT(c) && (input->start < input->end); c = *(++input->start))
153
    {
154
      char *s;
155
      uint8_t digits = 1;
156
      uint8_t triplets = 0;
157
      uint8_t lzn = 0;
158
      uint8_t nc = 0;
159
      uint8_t n;
160

161
      flags &= ~NON_ZERO;
162
      if (sink_last(consumer) != 43)
163
        sink_put(consumer, 43);
164
      for (s = input->start + 1; s < input->end; s++)
165
        if (IS_DIGIT(s[0]))
166
          {
167
            if (++digits > 3)
168
              {
169
                digits = 1;
170
                if (++triplets > 4)
171
                  {
172
                    digits = 3;
173
                    triplets = 4;
174
                    break;
175
                  }
176
              }
177
            else if ((flags &NUMBER_FRACTION) && (triplets > 3) && (digits > 1))
178
              break;
179
          }
180
        else break;
181
      n = triplets * 3 + digits;
182

183
      s = input->start;
184
      for (input->start += n; !consumer->status; s++)
185
        {
186
          c = s[0];
187
          nc = 0;
188
          if (c != '0')
189
            flags |= NON_ZERO;
190
          else if (IS_DIGIT(s[1]))
191
            lzn++;
192
          if ((c != '0') || !((flags & NON_ZERO) || IS_DIGIT(s[1])))
193
            {
194
              lzn = 0;
195
              switch (digits)
196
                {
197
                case 3:
198
                  put_transcription(consumer, hundreds, c - '1');
199
                  sink_put(consumer, 43);
200
                  break;
201
                case 1:
202
                  if (c == '1')
203
                    {
204
                      nc = 1;
205
                      switch (triplets)
206
                        {
207
                        case 1:
208
                          sink_write(consumer, one_int, 6);
209
                          break;
210
                        case 0:
211
                          if (s[2] == '+')
212
                            {
213
                              if (s[1] == 'A')
214
                                {
215
                                  input->start += 2;
216
                                  s = input->start;
217
                                  sink_write(consumer, one_int, (s[0] != ' ') ? 6 : 5);
218
                                  break;
219
                                }
220
                              else if (s[1] == 'O')
221
                                {
222
                                  input->start += 2;
223
                                  s = input->start;
224
                                  sink_write(consumer, one_o, (s[0] != ' ') ? 6 : 5);
225
                                  break;
226
                                }
227
                            }
228
                        default:
229
                          if (flags & NUMBER_FRACTION)
230
                            {
231
                              if ((s < input->end) && IS_DIGIT(s[2]))
232
                                transcribe_digit(consumer, c, s[0]);
233
                              else sink_write(consumer, one_int, 6);
234
                            }
235
                          else if ((s >= input->end) || !check_dec_point(consumer, s[1]) || !IS_DIGIT(s[2]))
236
                            transcribe_digit(consumer, c, s[0]);
237
                          else sink_write(consumer, one_int, 14);
238
                          break;
239
                        }
240
                      break;
241
                    }
242
                  else if (c < '5')
243
                    {
244
                      nc = 2;
245
                      if (c == '2')
246
                        {
247
                          if ((triplets == 0) && (s[2] == '+') && (s[1] == 'E'))
248
                            {
249
                              input->start += 2;
250
                              s = input->start;
251
                              sink_write(consumer, two_e, (s[0] != ' ') ? 5 : 4);
252
                              break;
253
                            }
254
                          else if ((triplets == 1) ||
255
                                   ((flags & NUMBER_FRACTION) && (s == (input->start - 1))) ||
256
                                   (check_dec_point(consumer, s[1]) && IS_DIGIT(s[2])))
257
                            {
258
                              sink_write(consumer, two_e, 5);
259
                              break;
260
                            }
261
                        }
262
                    }
263
                  transcribe_digit(consumer, c, s[0]);
264
                  break;
265
                default:
266
                  if (c == '1')
267
                    {
268
                      put_transcription(consumer, secondary, *(++s) - '0');
269
                      nc = 0;
270
                      digits--;
271
                    }
272
                  else put_transcription(consumer, tens, c - '2');
273
                  sink_put(consumer, 43);
274
                  break;
275
                }
276
            }
277
          else if (!flags)
278
            transcribe_digit(consumer, c, s[0]);
279
          if (!(--digits))
280
            {
281
              if (lzn == 3)
282
                {
283
                  lzn = 0;
284
                  if (triplets)
285
                    {
286
                      digits = 3;
287
                      triplets--;
288
                    }
289
                  else
290
                    {
291
                      sink_back(consumer);
292
                      break;
293
                    }
294
                }
295
              else
296
                {
297
                  lzn = 0;
298
                  if (triplets)
299
                    {
300
                      if (flags & NON_ZERO)
301
                        {
302
                          put_transcription(consumer, periods, triplets - 1);
303
                          if (triplets != 1)
304
                            {
305
                              if (nc != 1)
306
                                {
307
                                  if (sink_last(consumer) == 27)
308
                                    sink_replace(consumer, 21);
309
                                  if (nc > 1)
310
                                    sink_put(consumer, 2);
311
                                  else put_transcription(consumer, suffixes, 2);
312
                                }
313
                            }
314
                          else if (nc > 0)
315
                            sink_put(consumer, (nc > 1) ? 5 : 2);
316
                          sink_flush(consumer);
317
                        }
318
                      digits = 3;
319
                      triplets--;
320
                    }
321
                  else
322
                    {
323
                      sink_back(consumer);
324
                      break;
325
                    }
326
                }
327
            }
328
        }
329

330
      if (consumer->status)
331
        break;
332
      else if (flags & NUMBER_FRACTION)
333
        {
334
          sink_put(consumer, 43);
335
          put_transcription(consumer, fractions, n - 1);
336
          put_transcription(consumer, suffixes, (nc != 1) ? 0 : 1);
337
          break;
338
        }
339
      else if (((input->start + 1) < input->end) && check_dec_point(consumer, input->start[0]) && IS_DIGIT(input->start[1]))
340
        {
341
          flags |= NUMBER_FRACTION;
342
          sink_put(consumer, 43);
343
          if (nc != 1)
344
            {
345
              sink_write(consumer, n_ints, 6);
346
              sink_flush(consumer);
347
            }
348
        }
349
      else
350
        {
351
          sink_put(consumer, 43);
352
          break;
353
        }
354
    }
355
}
356

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.