Celestia

Форк
0
/
utf8.cpp 
534 строки · 24.9 Кб
1
// utf8.cpp
2
//
3
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
4
//               2018-present, Celestia Development Team
5
//
6
// This program is free software; you can redistribute it and/or
7
// modify it under the terms of the GNU General Public License
8
// as published by the Free Software Foundation; either version 2
9
// of the License, or (at your option) any later version.
10

11
#include "utf8.h"
12

13
#include <cassert>
14
#include <cwctype>
15

16

17
namespace
18
{
19

20
using WGL4Page = std::array<std::uint16_t, 256>;
21

22
// clang-format off
23

24
constexpr WGL4Page WGL4_Normalization_00{
25
    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
26
    0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
27
    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
28
    0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
29
    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
30
    0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
31
    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
32
    0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
33
    0x0040, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
34
    0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
35
    0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
36
    0x0078, 0x0079, 0x007a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
37
    0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
38
    0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
39
    0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
40
    0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
41
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
42
    0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
43
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
44
    0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
45
    0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
46
    0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020,
47
    0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7,
48
    0x0020, 0x0031, 0x006f, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
49
    0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00c6, 0x0063,
50
    0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
51
    0x00d0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00d7,
52
    0x00d8, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00de, 0x00df,
53
    0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063,
54
    0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069,
55
    0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7,
56
    0x00f8, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079,
57
};
58

59
constexpr WGL4Page WGL4_Normalization_01{
60
    0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0063, 0x0063,
61
    0x0063, 0x0063, 0x0063, 0x0063, 0x0063, 0x0063, 0x0064, 0x0064,
62
    0x0111, 0x0111, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065, 0x0065,
63
    0x0065, 0x0065, 0x0065, 0x0065, 0x0067, 0x0067, 0x0067, 0x0067,
64
    0x0067, 0x0067, 0x0067, 0x0067, 0x0068, 0x0068, 0x0127, 0x0127,
65
    0x0069, 0x0069, 0x0069, 0x0069, 0x0069, 0x0069, 0x0069, 0x0069,
66
    0x0069, 0x0131, 0x0069, 0x0069, 0x006a, 0x006a, 0x006b, 0x006b,
67
    0x0138, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c,
68
    0x006c, 0x0142, 0x0142, 0x006e, 0x006e, 0x006e, 0x006e, 0x006e,
69
    0x006e, 0x006e, 0x014a, 0x014b, 0x006f, 0x006f, 0x006f, 0x006f,
70
    0x006f, 0x006f, 0x0153, 0x0153, 0x0072, 0x0072, 0x0072, 0x0072,
71
    0x0072, 0x0072, 0x0073, 0x0073, 0x0073, 0x0073, 0x0073, 0x0073,
72
    0x0073, 0x0073, 0x0074, 0x0074, 0x0074, 0x0074, 0x0167, 0x0167,
73
    0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075, 0x0075,
74
    0x0075, 0x0075, 0x0075, 0x0075, 0x0077, 0x0077, 0x0079, 0x0079,
75
    0x0079, 0x007a, 0x007a, 0x007a, 0x007a, 0x007a, 0x007a, 0x0073,
76
    0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187,
77
    0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f,
78
    0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197,
79
    0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f,
80
    0x01a0, 0x01a1, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7,
81
    0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x01af,
82
    0x01b0, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7,
83
    0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf,
84
    0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x01c4, 0x01c5, 0x01c6, 0x01c7,
85
    0x01c8, 0x01c9, 0x01ca, 0x01cb, 0x01cc, 0x01cd, 0x01ce, 0x01cf,
86
    0x01d0, 0x01d1, 0x01d2, 0x01d3, 0x01d4, 0x01d5, 0x01d6, 0x01d7,
87
    0x01d8, 0x01d9, 0x01da, 0x01db, 0x01dc, 0x01dd, 0x01de, 0x01df,
88
    0x01e0, 0x01e1, 0x01e2, 0x01e3, 0x01e4, 0x01e5, 0x01e6, 0x01e7,
89
    0x01e8, 0x01e9, 0x01ea, 0x01eb, 0x01ec, 0x01ed, 0x01ee, 0x01ef,
90
    0x01f0, 0x01f1, 0x01f2, 0x01f3, 0x01f4, 0x01f5, 0x01f6, 0x01f7,
91
    0x01f8, 0x01f9, 0x00e5, 0x00e5, 0x00e6, 0x00e6, 0x00f8, 0x00f8,
92
};
93

94
constexpr WGL4Page WGL4_Normalization_02{
95
    0x0200, 0x0201, 0x0202, 0x0203, 0x0204, 0x0205, 0x0206, 0x0207,
96
    0x0208, 0x0209, 0x020a, 0x020b, 0x020c, 0x020d, 0x020e, 0x020f,
97
    0x0210, 0x0211, 0x0212, 0x0213, 0x0214, 0x0215, 0x0216, 0x0217,
98
    0x0218, 0x0219, 0x021a, 0x021b, 0x021c, 0x021d, 0x021e, 0x021f,
99
    0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0226, 0x0227,
100
    0x0228, 0x0229, 0x022a, 0x022b, 0x022c, 0x022d, 0x022e, 0x022f,
101
    0x0230, 0x0231, 0x0232, 0x0233, 0x0234, 0x0235, 0x0236, 0x0237,
102
    0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f,
103
    0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247,
104
    0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f,
105
    0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257,
106
    0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f,
107
    0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267,
108
    0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f,
109
    0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277,
110
    0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f,
111
    0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287,
112
    0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f,
113
    0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297,
114
    0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f,
115
    0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7,
116
    0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af,
117
    0x02b0, 0x02b1, 0x02b2, 0x02b3, 0x02b4, 0x02b5, 0x02b6, 0x02b7,
118
    0x02b8, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf,
119
    0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7,
120
    0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf,
121
    0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7,
122
    0x0020, 0x0020, 0x0020, 0x0020, 0x02dc, 0x0020, 0x02de, 0x02df,
123
    0x02e0, 0x02e1, 0x02e2, 0x02e3, 0x02e4, 0x02e5, 0x02e6, 0x02e7,
124
    0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef,
125
    0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7,
126
    0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff,
127
};
128

129
constexpr WGL4Page WGL4_Normalization_03{
130
    0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307,
131
    0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f,
132
    0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317,
133
    0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f,
134
    0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327,
135
    0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f,
136
    0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337,
137
    0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f,
138
    0x0340, 0x0341, 0x0342, 0x0343, 0x0344, 0x0345, 0x0346, 0x0347,
139
    0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f,
140
    0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357,
141
    0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f,
142
    0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367,
143
    0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f,
144
    0x0370, 0x0371, 0x0372, 0x0373, 0x0374, 0x0375, 0x0376, 0x0377,
145
    0x0378, 0x0379, 0x037a, 0x037b, 0x037c, 0x037d, 0x037e, 0x037f,
146
    0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x03b1, 0x00b7,
147
    0x03b5, 0x03b7, 0x03b9, 0x038b, 0x03bf, 0x038d, 0x03c5, 0x03c9,
148
    0x03ca, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
149
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
150
    0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
151
    0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
152
    0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
153
    0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
154
    0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
155
    0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x03cf,
156
    0x03d0, 0x03d1, 0x03d2, 0x03d3, 0x03d4, 0x03d5, 0x03d6, 0x03d7,
157
    0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df,
158
    0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7,
159
    0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef,
160
    0x03f0, 0x03f1, 0x03f2, 0x03f3, 0x03f4, 0x03f5, 0x03f6, 0x03f7,
161
    0x03f8, 0x03f9, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff,
162
};
163

164
constexpr WGL4Page WGL4_Normalization_04{
165
    0x0400, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
166
    0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x040d, 0x0443, 0x045f,
167
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
168
    0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
169
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
170
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
171
    0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
172
    0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
173
    0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
174
    0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
175
    0x0450, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456,
176
    0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x045d, 0x0443, 0x045f,
177
    0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467,
178
    0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f,
179
    0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0476, 0x0477,
180
    0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f,
181
    0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487,
182
    0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f,
183
    0x0491, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497,
184
    0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f,
185
    0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7,
186
    0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af,
187
    0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7,
188
    0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf,
189
    0x04c0, 0x04c1, 0x04c2, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7,
190
    0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf,
191
    0x04d0, 0x04d1, 0x04d2, 0x04d3, 0x04d4, 0x04d5, 0x04d6, 0x04d7,
192
    0x04d8, 0x04d9, 0x04da, 0x04db, 0x04dc, 0x04dd, 0x04de, 0x04df,
193
    0x04e0, 0x04e1, 0x04e2, 0x04e3, 0x04e4, 0x04e5, 0x04e6, 0x04e7,
194
    0x04e8, 0x04e9, 0x04ea, 0x04eb, 0x04ec, 0x04ed, 0x04ee, 0x04ef,
195
    0x04f0, 0x04f1, 0x04f2, 0x04f3, 0x04f4, 0x04f5, 0x04f6, 0x04f7,
196
    0x04f8, 0x04f9, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff,
197
};
198

199
constexpr WGL4Page WGL4_Normalization_1e{
200
    0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06, 0x1e07,
201
    0x1e08, 0x1e09, 0x1e0a, 0x1e0b, 0x1e0c, 0x1e0d, 0x1e0e, 0x1e0f,
202
    0x1e10, 0x1e11, 0x1e12, 0x1e13, 0x1e14, 0x1e15, 0x1e16, 0x1e17,
203
    0x1e18, 0x1e19, 0x1e1a, 0x1e1b, 0x1e1c, 0x1e1d, 0x1e1e, 0x1e1f,
204
    0x1e20, 0x1e21, 0x1e22, 0x1e23, 0x1e24, 0x1e25, 0x1e26, 0x1e27,
205
    0x1e28, 0x1e29, 0x1e2a, 0x1e2b, 0x1e2c, 0x1e2d, 0x1e2e, 0x1e2f,
206
    0x1e30, 0x1e31, 0x1e32, 0x1e33, 0x1e34, 0x1e35, 0x1e36, 0x1e37,
207
    0x1e38, 0x1e39, 0x1e3a, 0x1e3b, 0x1e3c, 0x1e3d, 0x1e3e, 0x1e3f,
208
    0x1e40, 0x1e41, 0x1e42, 0x1e43, 0x1e44, 0x1e45, 0x1e46, 0x1e47,
209
    0x1e48, 0x1e49, 0x1e4a, 0x1e4b, 0x1e4c, 0x1e4d, 0x1e4e, 0x1e4f,
210
    0x1e50, 0x1e51, 0x1e52, 0x1e53, 0x1e54, 0x1e55, 0x1e56, 0x1e57,
211
    0x1e58, 0x1e59, 0x1e5a, 0x1e5b, 0x1e5c, 0x1e5d, 0x1e5e, 0x1e5f,
212
    0x1e60, 0x1e61, 0x1e62, 0x1e63, 0x1e64, 0x1e65, 0x1e66, 0x1e67,
213
    0x1e68, 0x1e69, 0x1e6a, 0x1e6b, 0x1e6c, 0x1e6d, 0x1e6e, 0x1e6f,
214
    0x1e70, 0x1e71, 0x1e72, 0x1e73, 0x1e74, 0x1e75, 0x1e76, 0x1e77,
215
    0x1e78, 0x1e79, 0x1e7a, 0x1e7b, 0x1e7c, 0x1e7d, 0x1e7e, 0x1e7f,
216
    0x0077, 0x0077, 0x0077, 0x0077, 0x0077, 0x0077, 0x1e86, 0x1e87,
217
    0x1e88, 0x1e89, 0x1e8a, 0x1e8b, 0x1e8c, 0x1e8d, 0x1e8e, 0x1e8f,
218
    0x1e90, 0x1e91, 0x1e92, 0x1e93, 0x1e94, 0x1e95, 0x1e96, 0x1e97,
219
    0x1e98, 0x1e99, 0x1e9a, 0x1e9b, 0x1e9c, 0x1e9d, 0x1e9e, 0x1e9f,
220
    0x1ea0, 0x1ea1, 0x1ea2, 0x1ea3, 0x1ea4, 0x1ea5, 0x1ea6, 0x1ea7,
221
    0x1ea8, 0x1ea9, 0x1eaa, 0x1eab, 0x1eac, 0x1ead, 0x1eae, 0x1eaf,
222
    0x1eb0, 0x1eb1, 0x1eb2, 0x1eb3, 0x1eb4, 0x1eb5, 0x1eb6, 0x1eb7,
223
    0x1eb8, 0x1eb9, 0x1eba, 0x1ebb, 0x1ebc, 0x1ebd, 0x1ebe, 0x1ebf,
224
    0x1ec0, 0x1ec1, 0x1ec2, 0x1ec3, 0x1ec4, 0x1ec5, 0x1ec6, 0x1ec7,
225
    0x1ec8, 0x1ec9, 0x1eca, 0x1ecb, 0x1ecc, 0x1ecd, 0x1ece, 0x1ecf,
226
    0x1ed0, 0x1ed1, 0x1ed2, 0x1ed3, 0x1ed4, 0x1ed5, 0x1ed6, 0x1ed7,
227
    0x1ed8, 0x1ed9, 0x1eda, 0x1edb, 0x1edc, 0x1edd, 0x1ede, 0x1edf,
228
    0x1ee0, 0x1ee1, 0x1ee2, 0x1ee3, 0x1ee4, 0x1ee5, 0x1ee6, 0x1ee7,
229
    0x1ee8, 0x1ee9, 0x1eea, 0x1eeb, 0x1eec, 0x1eed, 0x1eee, 0x1eef,
230
    0x1ef0, 0x1ef1, 0x0079, 0x0079, 0x1ef4, 0x1ef5, 0x1ef6, 0x1ef7,
231
    0x1ef8, 0x1ef9, 0x1efa, 0x1efb, 0x1efc, 0x1efd, 0x1efe, 0x1eff,
232
};
233

234
constexpr WGL4Page WGL4_Normalization_21{
235
    0x2100, 0x2101, 0x2102, 0x2103, 0x2104, 0x2105, 0x2106, 0x2107,
236
    0x2108, 0x2109, 0x210a, 0x210b, 0x210c, 0x210d, 0x210e, 0x210f,
237
    0x2110, 0x2111, 0x2112, 0x006c, 0x2114, 0x2115, 0x2116, 0x2117,
238
    0x2118, 0x2119, 0x211a, 0x211b, 0x211c, 0x211d, 0x211e, 0x211f,
239
    0x2120, 0x2121, 0x2122, 0x2123, 0x2124, 0x2125, 0x03c9, 0x2127,
240
    0x2128, 0x2129, 0x212a, 0x212b, 0x212c, 0x212d, 0x212e, 0x212f,
241
    0x2130, 0x2131, 0x2132, 0x2133, 0x2134, 0x2135, 0x2136, 0x2137,
242
    0x2138, 0x2139, 0x213a, 0x213b, 0x213c, 0x213d, 0x213e, 0x213f,
243
    0x2140, 0x2141, 0x2142, 0x2143, 0x2144, 0x2145, 0x2146, 0x2147,
244
    0x2148, 0x2149, 0x214a, 0x214b, 0x214c, 0x214d, 0x214e, 0x214f,
245
    0x2150, 0x2151, 0x2152, 0x2153, 0x2154, 0x2155, 0x2156, 0x2157,
246
    0x2158, 0x2159, 0x215a, 0x215b, 0x215c, 0x215d, 0x215e, 0x215f,
247
    0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167,
248
    0x2168, 0x2169, 0x216a, 0x216b, 0x216c, 0x216d, 0x216e, 0x216f,
249
    0x2170, 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176, 0x2177,
250
    0x2178, 0x2179, 0x217a, 0x217b, 0x217c, 0x217d, 0x217e, 0x217f,
251
    0x2180, 0x2181, 0x2182, 0x2183, 0x2184, 0x2185, 0x2186, 0x2187,
252
    0x2188, 0x2189, 0x218a, 0x218b, 0x218c, 0x218d, 0x218e, 0x218f,
253
    0x2190, 0x2191, 0x2192, 0x2193, 0x2194, 0x2195, 0x2196, 0x2197,
254
    0x2198, 0x2199, 0x219a, 0x219b, 0x219c, 0x219d, 0x219e, 0x219f,
255
    0x21a0, 0x21a1, 0x21a2, 0x21a3, 0x21a4, 0x21a5, 0x21a6, 0x21a7,
256
    0x21a8, 0x21a9, 0x21aa, 0x21ab, 0x21ac, 0x21ad, 0x21ae, 0x21af,
257
    0x21b0, 0x21b1, 0x21b2, 0x21b3, 0x21b4, 0x21b5, 0x21b6, 0x21b7,
258
    0x21b8, 0x21b9, 0x21ba, 0x21bb, 0x21bc, 0x21bd, 0x21be, 0x21bf,
259
    0x21c0, 0x21c1, 0x21c2, 0x21c3, 0x21c4, 0x21c5, 0x21c6, 0x21c7,
260
    0x21c8, 0x21c9, 0x21ca, 0x21cb, 0x21cc, 0x21cd, 0x21ce, 0x21cf,
261
    0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x21d5, 0x21d6, 0x21d7,
262
    0x21d8, 0x21d9, 0x21da, 0x21db, 0x21dc, 0x21dd, 0x21de, 0x21df,
263
    0x21e0, 0x21e1, 0x21e2, 0x21e3, 0x21e4, 0x21e5, 0x21e6, 0x21e7,
264
    0x21e8, 0x21e9, 0x21ea, 0x21eb, 0x21ec, 0x21ed, 0x21ee, 0x21ef,
265
    0x21f0, 0x21f1, 0x21f2, 0x21f3, 0x21f4, 0x21f5, 0x21f6, 0x21f7,
266
    0x21f8, 0x21f9, 0x21fa, 0x21fb, 0x21fc, 0x21fd, 0x21fe, 0x21ff,
267
};
268

269
constexpr std::array<const WGL4Page*, 256> WGL4NormalizationTables{
270
    &WGL4_Normalization_00,
271
    &WGL4_Normalization_01,
272
    &WGL4_Normalization_02,
273
    &WGL4_Normalization_03,
274
    &WGL4_Normalization_04,
275
    nullptr, nullptr, nullptr,
276
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
277
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
278
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &WGL4_Normalization_1e, nullptr,
279
    nullptr, &WGL4_Normalization_21, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
280
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
281
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
282
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
283
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
284
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
285
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
286
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
287
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
288
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
289
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
290
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
291
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
292
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
293
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
294
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
295
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
296
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
297
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
298
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
299
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
300
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
301
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
302
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
303
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
304
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
305
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
306
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
307
};
308

309
// clang-format on
310

311
inline std::int32_t UTF8Normalize(std::int32_t ch)
312
{
313
    if (ch < 0 || ch > 0xffff)
314
        return ch;
315

316
    auto page = static_cast<std::size_t>(ch) >> 8;
317
    if (page >= 256)
318
        return ch;
319

320
    const WGL4Page* normTable = WGL4NormalizationTables[page];
321
    if (normTable == nullptr)
322
        return ch;
323

324
    auto index = static_cast<unsigned int>(ch) & 0xff;
325
    return static_cast<std::int32_t>((*normTable)[index]);
326
}
327

328
} // namespace
329

330
//! Decode the UTF-8 character at the start of the string str. The decoded
331
//! character is returned in ch; the return value of the function is true if
332
//! a valid UTF-8 sequence was successfully decoded.
333
bool UTF8Decode(std::string_view str, std::int32_t& ch)
334
{
335
    std::int32_t offset = 0;
336
    return UTF8Decode(str, offset, ch);
337
}
338

339
//! Decode the UTF-8 characters in string str beginning at position pos.
340
//! The decoded character is returned in ch; the return value of the function
341
//! is true if a valid UTF-8 sequence was successfully decoded. The value of
342
//! pos is advanced to the next undecoded byte in the input string.
343
bool UTF8Decode(std::string_view str, std::int32_t& pos, std::int32_t& ch)
344
{
345
    UTF8Validator validator;
346
    auto length = static_cast<std::int32_t>(str.size());
347
    while (pos < length)
348
    {
349
        auto result = validator.check(str[pos]);
350
        ++pos;
351
        if (result >= 0)
352
        {
353
            ch = result;
354
            return true;
355
        }
356
        if (result != UTF8Validator::PartialSequence)
357
            return false;
358
    }
359

360
    return false;
361
}
362

363
//! Appends the UTF-8 encoded version of the code point ch to the
364
//! destination string
365
void UTF8Encode(std::uint32_t ch, std::string& dest)
366
{
367
    if (ch < 0x80)
368
    {
369
        dest.push_back(static_cast<char>(ch));
370
    }
371
    else if (ch < 0x800)
372
    {
373
        dest.push_back(static_cast<char>(0xc0 | (ch >> 6)));
374
        dest.push_back(static_cast<char>(0x80 | (ch & 0x3f)));
375
    }
376
    else if (ch < 0x10000)
377
    {
378
        if (ch < 0xd800 || ch >= 0xe000)
379
        {
380
            dest.push_back(static_cast<char>(0xe0 | (ch >> 12)));
381
            dest.push_back(static_cast<char>(0x80 | ((ch & 0xfff) >> 6)));
382
            dest.push_back(static_cast<char>(0x80 | (ch & 0x3f)));
383
        }
384
        else
385
        {
386
            // disallow surrogates
387
            dest.append(UTF8_REPLACEMENT_CHAR);
388
        }
389
    }
390
#if WCHAR_MAX > 0xFFFFu
391
    else if (ch < 0x110000)
392
    {
393
        dest.push_back(static_cast<char>(0xf0 | (ch >> 18)));
394
        dest.push_back(static_cast<char>(0x80 | ((ch & 0x3ffff) >> 12)));
395
        dest.push_back(static_cast<char>(0x80 | ((ch & 0xfff) >> 6)));
396
        dest.push_back(static_cast<char>(0x80 | (ch & 0x3f)));
397
    }
398
#endif
399
    else
400
    {
401
        // not a valid Unicode code point, or we only support BMP characters,
402
        // so fall back to U+FFFD REPLACEMENT CHARACTER
403
        dest.append(UTF8_REPLACEMENT_CHAR);
404
    }
405
}
406

407

408
//! Perform a normalized comparison of two UTF-8 strings.  The normalization
409
//! only works for characters in the WGL-4 subset, and no multicharacter
410
//! translations are performed.
411
int UTF8StringCompare(std::string_view s0, std::string_view s1)
412
{
413
    auto len0 = static_cast<std::int32_t>(s0.size());
414
    auto len1 = static_cast<std::int32_t>(s1.size());
415
    std::int32_t i0 = 0;
416
    std::int32_t i1 = 0;
417
    for (;;)
418
    {
419
        std::int32_t ch0;
420
        std::int32_t ch1;
421
        if (i0 >= len0 || !UTF8Decode(s0, i0, ch0))
422
            return (i1 >= len1 || !UTF8Decode(s1, i1, ch1)) ? 0 : -1;
423
        if (i1 >= len1 || !UTF8Decode(s1, i1, ch1))
424
            return 1;
425

426
        ch0 = UTF8Normalize(ch0);
427
        ch1 = UTF8Normalize(ch1);
428

429
        if (ch0 < ch1)
430
            return -1;
431
        if (ch0 > ch1)
432
            return 1;
433
    }
434
}
435

436
bool UTF8StartsWith(std::string_view str, std::string_view prefix, bool ignoreCase)
437
{
438
    auto len0 = static_cast<std::int32_t>(str.size());
439
    auto len1 = static_cast<std::int32_t>(prefix.size());
440
    std::int32_t i0 = 0;
441
    std::int32_t i1 = 0;
442
    for (;;)
443
    {
444
        std::int32_t ch0;
445
        std::int32_t ch1;
446
        if (i1 >= len1)
447
            return true;
448
        if (i0 >= len0 || !UTF8Decode(str, i0, ch0) || !UTF8Decode(prefix, i1, ch1))
449
            return false;
450

451
        ch0 = UTF8Normalize(ch0);
452
        ch1 = UTF8Normalize(ch1);
453

454
        if (ignoreCase)
455
        {
456
            if (ch0 >= 0 && ch0 <= WCHAR_MAX)
457
                ch0 = static_cast<std::int32_t>(std::towlower(static_cast<std::wint_t>(ch0)));
458
            if (ch1 >= 0 && ch1 <= WCHAR_MAX)
459
                ch1 = static_cast<std::int32_t>(std::towlower(static_cast<std::wint_t>(ch1)));
460
        }
461

462
        if (ch0 != ch1)
463
            return false;
464
    }
465
}
466

467
std::int32_t
468
UTF8Validator::check(unsigned char c)
469
{
470
    switch (state)
471
    {
472
    case State::Initial:
473
        if (c < 0x80)
474
            return static_cast<std::int32_t>(c);
475
        if (c >= 0xc2 && c <= 0xf4)
476
        {
477
            buffer[0] = c;
478
            state = State::Continuation1;
479
            return PartialSequence;
480
        }
481
        return InvalidStarter;
482

483
    case State::Continuation1:
484
        if (c < 0x80 || c > 0xbf ||
485
            (buffer[0] == 0xe0 && c < 0xa0) ||
486
            (buffer[0] == 0xed && c > 0x9f) ||
487
            (buffer[0] == 0xf0 && c < 0x90) ||
488
            (buffer[0] == 0xf4 && c > 0x8f))
489
        {
490
            state = State::Initial;
491
            return InvalidTrailing;
492
        }
493
        if (buffer[0] < 0xe0)
494
        {
495
            state = State::Initial;
496
            return ((static_cast<std::int32_t>(buffer[0]) & 0x1f) << 6) |
497
                (static_cast<std::int32_t>(c) & 0x3f);
498
        }
499
        buffer[1] = c;
500
        state = State::Continuation2;
501
        return PartialSequence;
502

503
    case State::Continuation2:
504
        if (c < 0x80 || c > 0xbf)
505
        {
506
            state = State::Initial;
507
            return InvalidTrailing;
508
        }
509
        if (buffer[0] < 0xf0)
510
        {
511
            state = State::Initial;
512
            return ((static_cast<std::int32_t>(buffer[0]) & 0x0f) << 12) |
513
                ((static_cast<std::int32_t>(buffer[1]) & 0x3f) << 6) |
514
                (static_cast<std::int32_t>(c) & 0x3f);
515
        }
516
        buffer[2] = c;
517
        state = State::Continuation3;
518
        return PartialSequence;
519

520
    case State::Continuation3:
521
        state = State::Initial;
522
        if (c < 0x80 || c > 0xbf)
523
            return InvalidTrailing;
524
        return ((static_cast<std::int32_t>(buffer[0]) & 0x07) << 18) |
525
            ((static_cast<std::int32_t>(buffer[1]) & 0x3f) << 12) |
526
            ((static_cast<std::int32_t>(buffer[2]) & 0x3f) << 6) |
527
            (static_cast<std::int32_t>(c) & 0x3f);
528

529
    default:
530
        // Should not be able to get here
531
        assert(false);
532
        return InvalidStarter;
533
    }
534
}
535

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.