ru_tts

intonator.c
521 строка · 11.8 Кб
Перенос по словам
1
/* intonator.c -- Calculate and apply intonation
2
 *
3
 * Copyright (C) 1990, 1991 Speech Research Laboratory, Minsk
4
 * Copyright (C) 2005 Igor Poretsky <poretsky@mlbox.ru>
5
 * Copyright (C) 2021 Boris Lobanov <lobbormef@gmail.com>
6
 * Copyright (C) 2021 Alexander Ivanov <ivalex01@gmail.com>
7
 *
8
 * SPDX-License-Identifier: MIT
9
 */
10

11
#include <stdint.h>
12
#include <stdlib.h>
13
#include <string.h>
14

15
#include "transcription.h"
16
#include "soundscript.h"
17
#include "modulation.h"
18

19

20
/* Static data */
21

22
/* Intonation plans for various clause types */
23
static const uint8_t intonations[][NSTAGES][2] =
24
  {
25
    {
26
      { 30, 40 },
27
      { 40, 50 },
28
      { 50, 50 },
29
      { 50, 40 },
30
      { 40, 50 },
31
      { 50, 60 },
32
      { 60, 50 },
33
      { 50, 40 },
34
      { 40, 40 },
35
      { 40, 60 },
36
      { 60, 70 },
37
      { 70, 70 }
38
    },
39
    {
40
      { 30, 40 },
41
      { 40, 50 },
42
      { 50, 50 },
43
      { 50, 40 },
44
      { 40, 45 },
45
      { 45, 50 },
46
      { 50, 60 },
47
      { 60, 40 },
48
      { 40, 25 },
49
      { 25, 15 },
50
      { 15, 30 },
51
      { 30, 60 }
52
    },
53
    {
54
      { 30, 60 },
55
      { 60, 70 },
56
      { 70, 60 },
57
      { 60, 40 },
58
      { 40, 50 },
59
      { 50, 60 },
60
      { 60, 50 },
61
      { 50, 40 },
62
      { 40, 40 },
63
      { 40, 60 },
64
      { 60, 70 },
65
      { 70, 70 }
66
    },
67
    {
68
      { 30, 60 },
69
      { 60, 70 },
70
      { 70, 60 },
71
      { 60, 40 },
72
      { 40, 50 },
73
      { 50, 60 },
74
      { 60, 50 },
75
      { 50, 40 },
76
      { 40, 40 },
77
      { 40, 60 },
78
      { 60, 70 },
79
      { 70, 70 }
80
    },
81
    {
82
      { 30, 60 },
83
      { 60, 70 },
84
      { 70, 60 },
85
      { 60, 40 },
86
      { 40, 50 },
87
      { 50, 60 },
88
      { 60, 50 },
89
      { 50, 40 },
90
      { 40, 40 },
91
      { 40, 60 },
92
      { 60, 70 },
93
      { 70, 70 }
94
    },
95
    {
96
      { 30, 40 },
97
      { 40, 50 },
98
      { 50, 60 },
99
      { 60, 70 },
100
      { 70, 20 },
101
      { 20, 40 },
102
      { 40, 60 },
103
      { 60, 40 },
104
      { 40, 50 },
105
      { 50, 20 },
106
      { 20, 0 },
107
      { 0, 0 }
108
    },
109
    {
110
      { 30, 30 },
111
      { 35, 40 },
112
      { 40, 40 },
113
      { 40, 40 },
114
      { 40, 20 },
115
      { 20, 30 },
116
      { 30, 40 },
117
      { 40, 40 },
118
      { 30, 20 },
119
      { 20, 0 },
120
      { 0, 0 },
121
      { 0, 0 }
122
    },
123
    {
124
      { 30, 40 },
125
      { 40, 50 },
126
      { 50, 60 },
127
      { 60, 60 },
128
      { 60, 20 },
129
      { 20, 40 },
130
      { 40, 60 },
131
      { 60, 60 },
132
      { 40, 60 },
133
      { 60, 35 },
134
      { 35, 20 },
135
      { 20, 0 }
136
    },
137
    {
138
      { 30, 45 },
139
      { 45, 65 },
140
      { 65, 80 },
141
      { 80, 80 },
142
      { 80, 40 },
143
      { 40, 60 },
144
      { 60, 80 },
145
      { 80, 80 },
146
      { 40, 80 },
147
      { 80, 50 },
148
      { 50, 20 },
149
      { 20, 0 }
150
    },
151
    {
152
      { 20, 30 },
153
      { 30, 25 },
154
      { 25, 25 },
155
      { 25, 20 },
156
      { 20, 30 },
157
      { 30, 25 },
158
      { 25, 25 },
159
      { 25, 20 },
160
      { 20, 30 },
161
      { 30, 25 },
162
      { 25, 20 },
163
      { 20, 10 }
164
    },
165
    {
166
      { 40, 80 },
167
      { 80, 80 },
168
      { 80, 80 },
169
      { 80, 70 },
170
      { 70, 70 },
171
      { 70, 65 },
172
      { 65, 60 },
173
      { 60, 60 },
174
      { 60, 80 },
175
      { 80, 100 },
176
      { 100, 100 },
177
      { 100, 10 }
178
    },
179
    {
180
      { 40, 80 },
181
      { 80, 90 },
182
      { 90, 90 },
183
      { 90, 80 },
184
      { 80, 80 },
185
      { 80, 80 },
186
      { 80, 80 },
187
      { 80, 80 },
188
      { 80, 60 },
189
      { 60, 40 },
190
      { 40, 20 },
191
      { 20, 0 }
192
    },
193
    {
194
      { 40, 80 },
195
      { 80, 80 },
196
      { 80, 80 },
197
      { 80, 70 },
198
      { 70, 70 },
199
      { 70, 65 },
200
      { 65, 60 },
201
      { 60, 60 },
202
      { 60, 80 },
203
      { 80, 100 },
204
      { 100, 100 },
205
      { 100, 10 }
206
    },
207
    {
208
      { 40, 100 },
209
      { 100, 100 },
210
      { 100, 100 },
211
      { 100, 50 },
212
      { 100, 100 },
213
      { 100, 100 },
214
      { 100, 100 },
215
      { 100, 100 },
216
      { 50, 80 },
217
      { 80, 100 },
218
      { 100, 40 },
219
      { 20, 0 }
220
    },
221
    {
222
      { 40, 100 },
223
      { 100, 100 },
224
      { 100, 100 },
225
      { 100, 100 },
226
      { 100, 100 },
227
      { 100, 100 },
228
      { 100, 100 },
229
      { 100, 100 },
230
      { 40, 80 },
231
      { 80, 100 },
232
      { 100, 20 },
233
      { 20, 0 }
234
    },
235
    {
236
      { 40, 100 },
237
      { 100, 100 },
238
      { 100, 100 },
239
      { 100, 100 },
240
      { 100, 100 },
241
      { 100, 100 },
242
      { 100, 100 },
243
      { 100, 100 },
244
      { 100, 70 },
245
      { 70, 50 },
246
      { 50, 20 },
247
      { 20, 0 }
248
    }
249
  };
250

251

252
/* Local routines */
253

254
/*
255
 * Search transcription buffer for a speech breakpoint
256
 * ahead from specified index.
257
 *
258
 * Returns the last checked code or -1 if nothing found.
259
 */
260
static int search_breakpoint(uint8_t *transcription, int start_index)
261
{
262
  int rc = -1;
263
  int i;
264

265
  for (i = start_index; i < TRANSCRIPTION_BUFFER_SIZE; i++)
266
    {
267
      rc = transcription[i];
268
      if (rc != 43)
269
        {
270
          if (rc > 43)
271
            return -1;
272
          break;
273
        }
274
    }
275
  for (i++; i < TRANSCRIPTION_BUFFER_SIZE; i++)
276
    {
277
      rc = transcription[i];
278
      if (rc > 42)
279
        return rc;
280
    }
281
  return -1;
282
}
283

284
static unsigned int eval_tone(unsigned int x, unsigned int pitch_factor, modulation_t *modulation)
285
{
286
  return pitch_factor / ((modulation->maxtone - modulation->mintone) * x / 100 + modulation->mintone);
287
}
288

289
/* Returns index where process was stopped */
290
static uint16_t setstage(soundscript_t *script, uint16_t start_index, uint8_t value)
291
{
292
  uint16_t i = start_index;
293

294
  while (i < script->length)
295
    {
296
      uint8_t prev = script->sounds[i].stage;
297
      script->sounds[i++].stage = value;
298
      if (script->sounds[i].stage <= prev)
299
        break;
300
    }
301
  return i;
302
}
303

304

305
/* Global entry points */
306

307
/*
308
 * Setup modulation parameters according to specified voice pitch
309
 * and intonation level expressed as a percentage of the default values.
310
 */
311
void modulation_setup(modulation_t *modulation, int voice_pitch, int intonation)
312
{
313
  /* Adjust voice pitch */
314
  if (voice_pitch < 50)
315
    modulation->mintone = 50;
316
  else if (voice_pitch > 300)
317
    modulation->mintone = 300;
318
  else modulation->mintone = (uint16_t) voice_pitch;
319
  modulation->maxtone = modulation->mintone;
320

321
  /* Adjust intonation */
322
  if (intonation > 0)
323
    modulation->maxtone += (intonation < 140) ?
324
      (((modulation->mintone >> 1) + 25) * intonation / 100) :
325
      (modulation->mintone * 7 / 10 + 35);
326
}
327

328
/* Apply intonation parameters to the sound script */
329
void apply_intonation(uint8_t *transcription, soundscript_t *soundscript,
330
                      modulation_t *modulation, uint8_t clause_type)
331
{
332
  uint16_t i = TRANSCRIPTION_START;
333
  uint16_t nspeechmarks = 0;
334
  int bp;
335

336
  while (i < TRANSCRIPTION_BUFFER_SIZE)
337
    {
338
      bp = search_breakpoint(transcription, i);
339
      if (bp < 0)
340
        break;
341
      if (bp != 54)
342
        nspeechmarks++;
343
      while (((++i) < TRANSCRIPTION_BUFFER_SIZE) &&
344
             ((transcription[i] >= 53) || (transcription[i] < 43)));
345
    }
346

347
  for (i = 0; i < NSTAGES; i++)
348
    {
349
      soundscript->icb[i].count = 1;
350
      soundscript->icb[i].period = 1;
351
    }
352

353
  if (nspeechmarks)
354
    {
355
      uint16_t coef[NSTAGES];
356
      uint16_t prevk = 256;
357
      uint16_t j = 0;
358
      uint8_t m = 0;
359
      uint8_t st4 = 0;
360
      uint8_t stage = 0;
361

362
      for (i = 0; i < NSTAGES; i++)
363
        {
364
          soundscript->icb[i].stretch = 90;
365
          soundscript->icb[i].delta = 0;
366
        }
367

368
      for (i = TRANSCRIPTION_START; j < soundscript->length; i++)
369
        {
370
          if (!m)
371
            {
372
              if (nspeechmarks == 1)
373
                stage = 8;
374
              else if (st4)
375
                stage = 4;
376
              else
377
                {
378
                  stage = 0;
379
                  st4 = 1;
380
                }
381
              bp = search_breakpoint(transcription, i);
382
              m = ((bp != 53) && (bp != 54)) ? 1 : 2;
383
            }
384

385
          if (m < 3)
386
            {
387
              if ((m < 2) && (transcription[i] > 5))
388
                {
389
                  j = setstage(soundscript, j, stage);
390
                  continue;
391
                }
392
              else if ((m > 1) && ((transcription[i] > 5) || (transcription[i + 1] != 53)))
393
                {
394
                  if (transcription[i] != 54)
395
                    j = setstage(soundscript, j, stage);
396
                  continue;
397
                }
398

399
              m = soundscript->sounds[j].stage;
400
              while (j < soundscript->length)
401
                {
402
                  uint8_t l = m;
403
                  if (m == 4)
404
                    m = 3;
405
                  soundscript->sounds[j++].stage = m + stage;
406
                  m = soundscript->sounds[j].stage;
407
                  if (l >= m)
408
                    break;
409
                }
410

411
              m = 3;
412
              continue;
413
            }
414

415
          if (m > 2)
416
            {
417
              uint8_t l = transcription[i];
418
              if (l < 53)
419
                {
420
                  if (l < 43)
421
                    j = setstage(soundscript, j, stage + 3);
422
                  else if (l != 43)
423
                    break;
424
                  else
425
                    {
426
                      j++;
427
                      bp = search_breakpoint(transcription, i + 1);
428
                      if (bp < 0)
429
                        break;
430
                      else if (bp != 54)
431
                        {
432
                          nspeechmarks--;
433
                          m = 0;
434
                        }
435
                    }
436
                }
437
            }
438
        }
439

440
      memset(coef, 0, sizeof(coef));
441
      for (i = 0; i < soundscript->length; i++)
442
        {
443
          uint8_t j = soundscript->sounds[i].id;
444
          uint8_t k = soundscript->sounds[i].stage;
445
          if (soundscript->voice->sound_lengths[j] < VOICE_THRESHOLD)
446
            {
447
              uint16_t l = soundscript->sounds[i].duration;
448
              l /= 10;
449
              coef[k] += l;
450
            }
451
        }
452

453
      for (i = 0; i < soundscript->length; i++)
454
        {
455
          uint8_t j = soundscript->sounds[i].id;
456
          uint8_t k = soundscript->sounds[i].stage;
457
          if ((prevk != ((uint16_t)k)) && (soundscript->voice->sound_lengths[j] < VOICE_THRESHOLD))
458
            {
459
              int q = 0;
460
              int tone1 = eval_tone(intonations[clause_type][k][0], soundscript->voice->pitch_factor, modulation);
461
              int tone2 = eval_tone(intonations[clause_type][k][1], soundscript->voice->pitch_factor, modulation) - tone1;
462
              soundscript->icb[k].period = 1;
463
              soundscript->icb[k].count = 1;
464
              if (tone2)
465
                {
466
                  int t;
467
                  int r;
468
                  int tone3 = tone2;
469
                  tone2 = tone1;
470
                  tone1 += tone3 >> 1;
471
                  t = (coef[k] * 10 + (tone1 >> 1)) / tone1;
472
                  if (!t)
473
                    t++;
474
                  q = tone3 / t;
475
                  r = tone3 % t;
476
                  if (q)
477
                    {
478
                      if (r < 0)
479
                        {
480
                          r = (-r) << 1;
481
                          if (r >= t)
482
                            q--;
483
                        }
484
                      else
485
                        {
486
                          r <<= 1;
487
                          if (r >= t)
488
                            q++;
489
                        }
490
                    }
491
                  else
492
                    {
493
                      q = (r < 0) ? -1 : 1;
494
                      if (r < 0)
495
                        r = -r;
496
                      t = (t + (r >> 1)) / r;
497
                      t &= 0xFF;
498
                      soundscript->icb[k].period = t;
499
                      soundscript->icb[k].count = t;
500
                    }
501
                }
502
              else tone2 = tone1;
503
              soundscript->icb[k].stretch = tone2;
504
              soundscript->icb[k].delta = q;
505
              prevk = (uint16_t)k;
506
            }
507
          else if (prevk != ((uint16_t)k))
508
            prevk = 256;
509
        }
510
    }
511
  else
512
    {
513
      for (i = 0; i < NSTAGES; i++)
514
        {
515
          soundscript->icb[i].stretch = VOICE_THRESHOLD;
516
          soundscript->icb[i].delta = 0;
517
        }
518

519
      for (i = 0; i < soundscript->length; i = setstage(soundscript, i, 0));
520
    }
521
}
522
ru_tts

Использование cookies