1
/* numerics.c -- Numbers phonetic transcription
3
* Copyright (C) 1990, 1991 Speech Research Laboratory, Minsk
4
* Copyright (C) 2005 Igor Poretsky <poretsky@mlbox.ru>
5
* Copyright (C) 2021 Boris Lobanov <lobbormef@gmail.com>
6
* Copyright (C) 2021 Alexander Ivanov <ivalex01@gmail.com>
8
* SPDX-License-Identifier: MIT
15
#include "transcription.h"
22
#define NUMBER_FRACTION 1
28
/* Predefined transcriptions */
29
static const uint8_t primary[] = /* 0..9 */
35
7, 33, 3, 27, 4, 53, 13, 3,
39
6, 6, 1, 53, 38, 3, 15,
40
6, 24, 3, 53, 11, 2, 30
42
static const uint8_t secondary[] = /* 10..19 */
44
6, 24, 3, 53, 38, 2, 30,
45
9, 2, 24, 5, 53, 16, 2, 32, 2, 30,
46
9, 21, 11, 3, 16, 2, 53, 32, 2, 30,
47
9, 27, 13, 5, 16, 2, 53, 32, 2, 30,
48
11, 33, 3, 27, 4, 53, 8, 16, 2, 32, 2, 30,
49
9, 29, 2, 27, 16, 2, 53, 32, 2, 30,
50
9, 36, 3, 35, 16, 2, 53, 32, 2, 30,
51
9, 38, 3, 15, 16, 2, 53, 32, 2, 30,
52
11, 6, 2, 38, 3, 15, 16, 2, 53, 32, 2, 30,
53
11, 24, 3, 11, 2, 27, 16, 2, 53, 32, 2, 30
55
static const uint8_t tens[] = /* 20..90 */
57
7, 21, 6, 2, 53, 32, 2, 30,
58
7, 27, 13, 5, 53, 32, 2, 30,
59
6, 35, 1, 53, 8, 2, 28,
60
8, 29, 2, 24, 3, 38, 2, 53, 27,
61
9, 36, 3, 12, 24, 3, 38, 2, 53, 27,
62
9, 38, 3, 53, 15, 24, 3, 38, 2, 27,
63
11, 6, 1, 53, 38, 3, 15, 24, 3, 38, 2, 27,
64
10, 24, 3, 11, 2, 16, 1, 53, 35, 27, 2
66
static const uint8_t hundreds[] = /* 100..900 */
69
7, 21, 11, 3, 53, 38, 30, 5,
70
7, 27, 13, 5, 53, 35, 27, 2,
71
10, 33, 3, 27, 4, 53, 13, 3, 35, 27, 2,
72
7, 29, 2, 27, 35, 1, 53, 27,
73
7, 36, 3, 35, 35, 1, 53, 27,
74
7, 38, 3, 15, 35, 1, 53, 27,
75
9, 6, 2, 38, 3, 15, 35, 1, 53, 27,
76
9, 24, 3, 11, 2, 27, 35, 1, 53, 27
78
static const uint8_t periods[] =
80
6, 27, 4, 53, 38, 2, 33,
81
7, 18, 5, 17, 5, 1, 53, 16,
82
8, 18, 5, 17, 5, 2, 53, 8, 27,
83
8, 27, 13, 5, 17, 5, 1, 53, 16
85
static const uint8_t fractions[] =
87
6, 24, 3, 38, 2, 53, 27,
89
7, 27, 4, 53, 38, 2, 33, 16,
90
13, 24, 3, 38, 2, 30, 5, 27, 4, 53, 38, 2, 33, 16,
91
10, 35, 27, 1, 27, 4, 53, 38, 2, 33, 16,
92
8, 18, 5, 17, 5, 1, 53, 16, 16,
93
14, 24, 3, 38, 2, 30, 5, 18, 5, 17, 5, 1, 53, 16, 16,
94
11, 35, 27, 1, 18, 5, 17, 5, 1, 53, 16, 16,
95
9, 18, 5, 17, 5, 2, 53, 8, 27, 16,
96
15, 24, 3, 38, 2, 30, 5, 18, 5, 17, 5, 2, 53, 8, 27, 16,
97
12, 35, 27, 1, 18, 5, 17, 5, 2, 53, 8, 27, 16,
98
9, 27, 13, 5, 17, 5, 1, 53, 16, 16,
99
15, 24, 3, 38, 2, 30, 5, 27, 13, 5, 17, 5, 1, 53, 16, 16,
100
12, 35, 27, 1, 27, 13, 5, 17, 5, 1, 53, 16, 16
102
static const uint8_t suffixes[] =
108
static const uint8_t one_int[] =
110
2, 21, 16, 2, 53, 43,
111
32, 3, 53, 14, 2, 10, 2, 43
113
static const uint8_t one_o[] = { 2, 21, 16, 1, 53, 43 };
114
static const uint8_t two_e[] = { 21, 11, 3, 53, 43 };
115
static const uint8_t n_ints[] = { 32, 3, 53, 14, 4, 40 };
118
/* Local subroutines */
120
/* Pass specified list item to the consumer */
121
static void put_transcription(sink_t *consumer, const uint8_t *lst, uint8_t n)
123
const uint8_t *item = list_item(lst, n);
124
sink_write(consumer, item + 1, item[0]);
127
/* Output transcription for specified digit */
128
static void transcribe_digit(sink_t *consumer, char digit, char following)
130
put_transcription(consumer, primary, digit - '0');
131
if (following != ' ')
132
sink_put(consumer, 43);
135
/* Return true if specified character may be treated as decimal point. */
136
static int check_dec_point(sink_t *consumer, char c)
138
int flags = ((ttscb_t *) (consumer->user_data))->flags;
139
return ((flags & DEC_SEP_POINT) && (c == '.')) ||
140
((flags & DEC_SEP_COMMA) && (c == ','));
144
/* Global functions */
146
/* Transcribe numeric string from input and pass result to the consumer */
147
void process_number(input_t *input, sink_t *consumer)
152
for (c = input->start[0]; IS_DIGIT(c) && (input->start < input->end); c = *(++input->start))
156
uint8_t triplets = 0;
162
if (sink_last(consumer) != 43)
163
sink_put(consumer, 43);
164
for (s = input->start + 1; s < input->end; s++)
177
else if ((flags &NUMBER_FRACTION) && (triplets > 3) && (digits > 1))
181
n = triplets * 3 + digits;
184
for (input->start += n; !consumer->status; s++)
190
else if (IS_DIGIT(s[1]))
192
if ((c != '0') || !((flags & NON_ZERO) || IS_DIGIT(s[1])))
198
put_transcription(consumer, hundreds, c - '1');
199
sink_put(consumer, 43);
208
sink_write(consumer, one_int, 6);
217
sink_write(consumer, one_int, (s[0] != ' ') ? 6 : 5);
220
else if (s[1] == 'O')
224
sink_write(consumer, one_o, (s[0] != ' ') ? 6 : 5);
229
if (flags & NUMBER_FRACTION)
231
if ((s < input->end) && IS_DIGIT(s[2]))
232
transcribe_digit(consumer, c, s[0]);
233
else sink_write(consumer, one_int, 6);
235
else if ((s >= input->end) || !check_dec_point(consumer, s[1]) || !IS_DIGIT(s[2]))
236
transcribe_digit(consumer, c, s[0]);
237
else sink_write(consumer, one_int, 14);
247
if ((triplets == 0) && (s[2] == '+') && (s[1] == 'E'))
251
sink_write(consumer, two_e, (s[0] != ' ') ? 5 : 4);
254
else if ((triplets == 1) ||
255
((flags & NUMBER_FRACTION) && (s == (input->start - 1))) ||
256
(check_dec_point(consumer, s[1]) && IS_DIGIT(s[2])))
258
sink_write(consumer, two_e, 5);
263
transcribe_digit(consumer, c, s[0]);
268
put_transcription(consumer, secondary, *(++s) - '0');
272
else put_transcription(consumer, tens, c - '2');
273
sink_put(consumer, 43);
278
transcribe_digit(consumer, c, s[0]);
300
if (flags & NON_ZERO)
302
put_transcription(consumer, periods, triplets - 1);
307
if (sink_last(consumer) == 27)
308
sink_replace(consumer, 21);
310
sink_put(consumer, 2);
311
else put_transcription(consumer, suffixes, 2);
315
sink_put(consumer, (nc > 1) ? 5 : 2);
316
sink_flush(consumer);
330
if (consumer->status)
332
else if (flags & NUMBER_FRACTION)
334
sink_put(consumer, 43);
335
put_transcription(consumer, fractions, n - 1);
336
put_transcription(consumer, suffixes, (nc != 1) ? 0 : 1);
339
else if (((input->start + 1) < input->end) && check_dec_point(consumer, input->start[0]) && IS_DIGIT(input->start[1]))
341
flags |= NUMBER_FRACTION;
342
sink_put(consumer, 43);
345
sink_write(consumer, n_ints, 6);
346
sink_flush(consumer);
351
sink_put(consumer, 43);