ru_tts
1/* intonator.c -- Calculate and apply intonation
2*
3* Copyright (C) 1990, 1991 Speech Research Laboratory, Minsk
4* Copyright (C) 2005 Igor Poretsky <poretsky@mlbox.ru>
5* Copyright (C) 2021 Boris Lobanov <lobbormef@gmail.com>
6* Copyright (C) 2021 Alexander Ivanov <ivalex01@gmail.com>
7*
8* SPDX-License-Identifier: MIT
9*/
10
11#include <stdint.h>
12#include <stdlib.h>
13#include <string.h>
14
15#include "transcription.h"
16#include "soundscript.h"
17#include "modulation.h"
18
19
20/* Static data */
21
22/* Intonation plans for various clause types */
23static const uint8_t intonations[][NSTAGES][2] =
24{
25{
26{ 30, 40 },
27{ 40, 50 },
28{ 50, 50 },
29{ 50, 40 },
30{ 40, 50 },
31{ 50, 60 },
32{ 60, 50 },
33{ 50, 40 },
34{ 40, 40 },
35{ 40, 60 },
36{ 60, 70 },
37{ 70, 70 }
38},
39{
40{ 30, 40 },
41{ 40, 50 },
42{ 50, 50 },
43{ 50, 40 },
44{ 40, 45 },
45{ 45, 50 },
46{ 50, 60 },
47{ 60, 40 },
48{ 40, 25 },
49{ 25, 15 },
50{ 15, 30 },
51{ 30, 60 }
52},
53{
54{ 30, 60 },
55{ 60, 70 },
56{ 70, 60 },
57{ 60, 40 },
58{ 40, 50 },
59{ 50, 60 },
60{ 60, 50 },
61{ 50, 40 },
62{ 40, 40 },
63{ 40, 60 },
64{ 60, 70 },
65{ 70, 70 }
66},
67{
68{ 30, 60 },
69{ 60, 70 },
70{ 70, 60 },
71{ 60, 40 },
72{ 40, 50 },
73{ 50, 60 },
74{ 60, 50 },
75{ 50, 40 },
76{ 40, 40 },
77{ 40, 60 },
78{ 60, 70 },
79{ 70, 70 }
80},
81{
82{ 30, 60 },
83{ 60, 70 },
84{ 70, 60 },
85{ 60, 40 },
86{ 40, 50 },
87{ 50, 60 },
88{ 60, 50 },
89{ 50, 40 },
90{ 40, 40 },
91{ 40, 60 },
92{ 60, 70 },
93{ 70, 70 }
94},
95{
96{ 30, 40 },
97{ 40, 50 },
98{ 50, 60 },
99{ 60, 70 },
100{ 70, 20 },
101{ 20, 40 },
102{ 40, 60 },
103{ 60, 40 },
104{ 40, 50 },
105{ 50, 20 },
106{ 20, 0 },
107{ 0, 0 }
108},
109{
110{ 30, 30 },
111{ 35, 40 },
112{ 40, 40 },
113{ 40, 40 },
114{ 40, 20 },
115{ 20, 30 },
116{ 30, 40 },
117{ 40, 40 },
118{ 30, 20 },
119{ 20, 0 },
120{ 0, 0 },
121{ 0, 0 }
122},
123{
124{ 30, 40 },
125{ 40, 50 },
126{ 50, 60 },
127{ 60, 60 },
128{ 60, 20 },
129{ 20, 40 },
130{ 40, 60 },
131{ 60, 60 },
132{ 40, 60 },
133{ 60, 35 },
134{ 35, 20 },
135{ 20, 0 }
136},
137{
138{ 30, 45 },
139{ 45, 65 },
140{ 65, 80 },
141{ 80, 80 },
142{ 80, 40 },
143{ 40, 60 },
144{ 60, 80 },
145{ 80, 80 },
146{ 40, 80 },
147{ 80, 50 },
148{ 50, 20 },
149{ 20, 0 }
150},
151{
152{ 20, 30 },
153{ 30, 25 },
154{ 25, 25 },
155{ 25, 20 },
156{ 20, 30 },
157{ 30, 25 },
158{ 25, 25 },
159{ 25, 20 },
160{ 20, 30 },
161{ 30, 25 },
162{ 25, 20 },
163{ 20, 10 }
164},
165{
166{ 40, 80 },
167{ 80, 80 },
168{ 80, 80 },
169{ 80, 70 },
170{ 70, 70 },
171{ 70, 65 },
172{ 65, 60 },
173{ 60, 60 },
174{ 60, 80 },
175{ 80, 100 },
176{ 100, 100 },
177{ 100, 10 }
178},
179{
180{ 40, 80 },
181{ 80, 90 },
182{ 90, 90 },
183{ 90, 80 },
184{ 80, 80 },
185{ 80, 80 },
186{ 80, 80 },
187{ 80, 80 },
188{ 80, 60 },
189{ 60, 40 },
190{ 40, 20 },
191{ 20, 0 }
192},
193{
194{ 40, 80 },
195{ 80, 80 },
196{ 80, 80 },
197{ 80, 70 },
198{ 70, 70 },
199{ 70, 65 },
200{ 65, 60 },
201{ 60, 60 },
202{ 60, 80 },
203{ 80, 100 },
204{ 100, 100 },
205{ 100, 10 }
206},
207{
208{ 40, 100 },
209{ 100, 100 },
210{ 100, 100 },
211{ 100, 50 },
212{ 100, 100 },
213{ 100, 100 },
214{ 100, 100 },
215{ 100, 100 },
216{ 50, 80 },
217{ 80, 100 },
218{ 100, 40 },
219{ 20, 0 }
220},
221{
222{ 40, 100 },
223{ 100, 100 },
224{ 100, 100 },
225{ 100, 100 },
226{ 100, 100 },
227{ 100, 100 },
228{ 100, 100 },
229{ 100, 100 },
230{ 40, 80 },
231{ 80, 100 },
232{ 100, 20 },
233{ 20, 0 }
234},
235{
236{ 40, 100 },
237{ 100, 100 },
238{ 100, 100 },
239{ 100, 100 },
240{ 100, 100 },
241{ 100, 100 },
242{ 100, 100 },
243{ 100, 100 },
244{ 100, 70 },
245{ 70, 50 },
246{ 50, 20 },
247{ 20, 0 }
248}
249};
250
251
252/* Local routines */
253
254/*
255* Search transcription buffer for a speech breakpoint
256* ahead from specified index.
257*
258* Returns the last checked code or -1 if nothing found.
259*/
260static int search_breakpoint(uint8_t *transcription, int start_index)
261{
262int rc = -1;
263int i;
264
265for (i = start_index; i < TRANSCRIPTION_BUFFER_SIZE; i++)
266{
267rc = transcription[i];
268if (rc != 43)
269{
270if (rc > 43)
271return -1;
272break;
273}
274}
275for (i++; i < TRANSCRIPTION_BUFFER_SIZE; i++)
276{
277rc = transcription[i];
278if (rc > 42)
279return rc;
280}
281return -1;
282}
283
284static unsigned int eval_tone(unsigned int x, unsigned int pitch_factor, modulation_t *modulation)
285{
286return pitch_factor / ((modulation->maxtone - modulation->mintone) * x / 100 + modulation->mintone);
287}
288
289/* Returns index where process was stopped */
290static uint16_t setstage(soundscript_t *script, uint16_t start_index, uint8_t value)
291{
292uint16_t i = start_index;
293
294while (i < script->length)
295{
296uint8_t prev = script->sounds[i].stage;
297script->sounds[i++].stage = value;
298if (script->sounds[i].stage <= prev)
299break;
300}
301return i;
302}
303
304
305/* Global entry points */
306
307/*
308* Setup modulation parameters according to specified voice pitch
309* and intonation level expressed as a percentage of the default values.
310*/
311void modulation_setup(modulation_t *modulation, int voice_pitch, int intonation)
312{
313/* Adjust voice pitch */
314if (voice_pitch < 50)
315modulation->mintone = 50;
316else if (voice_pitch > 300)
317modulation->mintone = 300;
318else modulation->mintone = (uint16_t) voice_pitch;
319modulation->maxtone = modulation->mintone;
320
321/* Adjust intonation */
322if (intonation > 0)
323modulation->maxtone += (intonation < 140) ?
324(((modulation->mintone >> 1) + 25) * intonation / 100) :
325(modulation->mintone * 7 / 10 + 35);
326}
327
328/* Apply intonation parameters to the sound script */
329void apply_intonation(uint8_t *transcription, soundscript_t *soundscript,
330modulation_t *modulation, uint8_t clause_type)
331{
332uint16_t i = TRANSCRIPTION_START;
333uint16_t nspeechmarks = 0;
334int bp;
335
336while (i < TRANSCRIPTION_BUFFER_SIZE)
337{
338bp = search_breakpoint(transcription, i);
339if (bp < 0)
340break;
341if (bp != 54)
342nspeechmarks++;
343while (((++i) < TRANSCRIPTION_BUFFER_SIZE) &&
344((transcription[i] >= 53) || (transcription[i] < 43)));
345}
346
347for (i = 0; i < NSTAGES; i++)
348{
349soundscript->icb[i].count = 1;
350soundscript->icb[i].period = 1;
351}
352
353if (nspeechmarks)
354{
355uint16_t coef[NSTAGES];
356uint16_t prevk = 256;
357uint16_t j = 0;
358uint8_t m = 0;
359uint8_t st4 = 0;
360uint8_t stage = 0;
361
362for (i = 0; i < NSTAGES; i++)
363{
364soundscript->icb[i].stretch = 90;
365soundscript->icb[i].delta = 0;
366}
367
368for (i = TRANSCRIPTION_START; j < soundscript->length; i++)
369{
370if (!m)
371{
372if (nspeechmarks == 1)
373stage = 8;
374else if (st4)
375stage = 4;
376else
377{
378stage = 0;
379st4 = 1;
380}
381bp = search_breakpoint(transcription, i);
382m = ((bp != 53) && (bp != 54)) ? 1 : 2;
383}
384
385if (m < 3)
386{
387if ((m < 2) && (transcription[i] > 5))
388{
389j = setstage(soundscript, j, stage);
390continue;
391}
392else if ((m > 1) && ((transcription[i] > 5) || (transcription[i + 1] != 53)))
393{
394if (transcription[i] != 54)
395j = setstage(soundscript, j, stage);
396continue;
397}
398
399m = soundscript->sounds[j].stage;
400while (j < soundscript->length)
401{
402uint8_t l = m;
403if (m == 4)
404m = 3;
405soundscript->sounds[j++].stage = m + stage;
406m = soundscript->sounds[j].stage;
407if (l >= m)
408break;
409}
410
411m = 3;
412continue;
413}
414
415if (m > 2)
416{
417uint8_t l = transcription[i];
418if (l < 53)
419{
420if (l < 43)
421j = setstage(soundscript, j, stage + 3);
422else if (l != 43)
423break;
424else
425{
426j++;
427bp = search_breakpoint(transcription, i + 1);
428if (bp < 0)
429break;
430else if (bp != 54)
431{
432nspeechmarks--;
433m = 0;
434}
435}
436}
437}
438}
439
440memset(coef, 0, sizeof(coef));
441for (i = 0; i < soundscript->length; i++)
442{
443uint8_t j = soundscript->sounds[i].id;
444uint8_t k = soundscript->sounds[i].stage;
445if (soundscript->voice->sound_lengths[j] < VOICE_THRESHOLD)
446{
447uint16_t l = soundscript->sounds[i].duration;
448l /= 10;
449coef[k] += l;
450}
451}
452
453for (i = 0; i < soundscript->length; i++)
454{
455uint8_t j = soundscript->sounds[i].id;
456uint8_t k = soundscript->sounds[i].stage;
457if ((prevk != ((uint16_t)k)) && (soundscript->voice->sound_lengths[j] < VOICE_THRESHOLD))
458{
459int q = 0;
460int tone1 = eval_tone(intonations[clause_type][k][0], soundscript->voice->pitch_factor, modulation);
461int tone2 = eval_tone(intonations[clause_type][k][1], soundscript->voice->pitch_factor, modulation) - tone1;
462soundscript->icb[k].period = 1;
463soundscript->icb[k].count = 1;
464if (tone2)
465{
466int t;
467int r;
468int tone3 = tone2;
469tone2 = tone1;
470tone1 += tone3 >> 1;
471t = (coef[k] * 10 + (tone1 >> 1)) / tone1;
472if (!t)
473t++;
474q = tone3 / t;
475r = tone3 % t;
476if (q)
477{
478if (r < 0)
479{
480r = (-r) << 1;
481if (r >= t)
482q--;
483}
484else
485{
486r <<= 1;
487if (r >= t)
488q++;
489}
490}
491else
492{
493q = (r < 0) ? -1 : 1;
494if (r < 0)
495r = -r;
496t = (t + (r >> 1)) / r;
497t &= 0xFF;
498soundscript->icb[k].period = t;
499soundscript->icb[k].count = t;
500}
501}
502else tone2 = tone1;
503soundscript->icb[k].stretch = tone2;
504soundscript->icb[k].delta = q;
505prevk = (uint16_t)k;
506}
507else if (prevk != ((uint16_t)k))
508prevk = 256;
509}
510}
511else
512{
513for (i = 0; i < NSTAGES; i++)
514{
515soundscript->icb[i].stretch = VOICE_THRESHOLD;
516soundscript->icb[i].delta = 0;
517}
518
519for (i = 0; i < soundscript->length; i = setstage(soundscript, i, 0));
520}
521}
522