jdk
258 строк · 7.4 Кб
1/*
2* Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net>
3*
4* Permission to use, copy, modify, and/or distribute this software for any
5* purpose with or without fee is hereby granted, provided that the above
6* copyright notice and this permission notice appear in all copies.
7*
8* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15*/
16
17#include "hb.hh"
18#include "hb-unicode.hh"
19#include "hb-machinery.hh"
20
21#include "hb-ucd-table.hh"
22
23static hb_unicode_combining_class_t
24hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED,
25hb_codepoint_t unicode,
26void *user_data HB_UNUSED)
27{
28return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode);
29}
30
31static hb_unicode_general_category_t
32hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED,
33hb_codepoint_t unicode,
34void *user_data HB_UNUSED)
35{
36return (hb_unicode_general_category_t) _hb_ucd_gc (unicode);
37}
38
39static hb_codepoint_t
40hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED,
41hb_codepoint_t unicode,
42void *user_data HB_UNUSED)
43{
44return unicode + _hb_ucd_bmg (unicode);
45}
46
47static hb_script_t
48hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED,
49hb_codepoint_t unicode,
50void *user_data HB_UNUSED)
51{
52return _hb_ucd_sc_map[_hb_ucd_sc (unicode)];
53}
54
55
56#define SBASE 0xAC00u
57#define LBASE 0x1100u
58#define VBASE 0x1161u
59#define TBASE 0x11A7u
60#define SCOUNT 11172u
61#define LCOUNT 19u
62#define VCOUNT 21u
63#define TCOUNT 28u
64#define NCOUNT (VCOUNT * TCOUNT)
65
66static inline bool
67_hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b)
68{
69unsigned si = ab - SBASE;
70
71if (si >= SCOUNT)
72return false;
73
74if (si % TCOUNT)
75{
76/* LV,T */
77*a = SBASE + (si / TCOUNT) * TCOUNT;
78*b = TBASE + (si % TCOUNT);
79return true;
80} else {
81/* L,V */
82*a = LBASE + (si / NCOUNT);
83*b = VBASE + (si % NCOUNT) / TCOUNT;
84return true;
85}
86}
87
88static inline bool
89_hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab)
90{
91if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) &&
92!((a - SBASE) % TCOUNT))
93{
94/* LV,T */
95*ab = a + (b - TBASE);
96return true;
97}
98else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT))
99{
100/* L,V */
101int li = a - LBASE;
102int vi = b - VBASE;
103*ab = SBASE + li * NCOUNT + vi * TCOUNT;
104return true;
105}
106else
107return false;
108}
109
110static int
111_cmp_pair (const void *_key, const void *_item)
112{
113uint64_t& a = * (uint64_t*) _key;
114uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0);
115
116return a < b ? -1 : a > b ? +1 : 0;
117}
118static int
119_cmp_pair_11_7_14 (const void *_key, const void *_item)
120{
121uint32_t& a = * (uint32_t*) _key;
122uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0);
123
124return a < b ? -1 : a > b ? +1 : 0;
125}
126
127static hb_bool_t
128hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
129hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab,
130void *user_data HB_UNUSED)
131{
132// Hangul is handled algorithmically.
133if (_hb_ucd_compose_hangul (a, b, ab)) return true;
134
135hb_codepoint_t u = 0;
136
137if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u)
138{
139/* If "a" is small enough and "b" is in the U+0300 range,
140* the composition data is encoded in a 32bit array sorted
141* by "a,b" pair. */
142uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0);
143const uint32_t *v = hb_bsearch (k,
144_hb_ucd_dm2_u32_map,
145ARRAY_LENGTH (_hb_ucd_dm2_u32_map),
146sizeof (*_hb_ucd_dm2_u32_map),
147_cmp_pair_11_7_14);
148if (likely (!v)) return false;
149u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v);
150}
151else
152{
153/* Otherwise it is stored in a 64bit array sorted by
154* "a,b" pair. */
155uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0);
156const uint64_t *v = hb_bsearch (k,
157_hb_ucd_dm2_u64_map,
158ARRAY_LENGTH (_hb_ucd_dm2_u64_map),
159sizeof (*_hb_ucd_dm2_u64_map),
160_cmp_pair);
161if (likely (!v)) return false;
162u = HB_CODEPOINT_DECODE3_3 (*v);
163}
164
165if (unlikely (!u)) return false;
166*ab = u;
167return true;
168}
169
170static hb_bool_t
171hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED,
172hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b,
173void *user_data HB_UNUSED)
174{
175if (_hb_ucd_decompose_hangul (ab, a, b)) return true;
176
177unsigned i = _hb_ucd_dm (ab);
178
179/* If no data, there's no decomposition. */
180if (likely (!i)) return false;
181i--;
182
183/* Check if it's a single-character decomposition. */
184if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map))
185{
186/* Single-character decompositions currently are only in plane 0 or plane 2. */
187if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map))
188{
189/* Plane 0. */
190*a = _hb_ucd_dm1_p0_map[i];
191}
192else
193{
194/* Plane 2. */
195i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map);
196*a = 0x20000 | _hb_ucd_dm1_p2_map[i];
197}
198*b = 0;
199return true;
200}
201i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map);
202
203/* Otherwise they are encoded either in a 32bit array or a 64bit array. */
204if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map))
205{
206/* 32bit array. */
207uint32_t v = _hb_ucd_dm2_u32_map[i];
208*a = HB_CODEPOINT_DECODE3_11_7_14_1 (v);
209*b = HB_CODEPOINT_DECODE3_11_7_14_2 (v);
210return true;
211}
212i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map);
213
214/* 64bit array. */
215uint64_t v = _hb_ucd_dm2_u64_map[i];
216*a = HB_CODEPOINT_DECODE3_1 (v);
217*b = HB_CODEPOINT_DECODE3_2 (v);
218return true;
219}
220
221
222static void free_static_ucd_funcs ();
223
224static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t>
225{
226static hb_unicode_funcs_t *create ()
227{
228hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr);
229
230hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr);
231hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr);
232hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr);
233hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr);
234hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr);
235hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr);
236
237hb_unicode_funcs_make_immutable (funcs);
238
239hb_atexit (free_static_ucd_funcs);
240
241return funcs;
242}
243} static_ucd_funcs;
244
245static inline
246void free_static_ucd_funcs ()
247{
248static_ucd_funcs.free_instance ();
249}
250
251hb_unicode_funcs_t *
252hb_ucd_get_unicode_funcs ()
253{
254#ifdef HB_NO_UCD
255return hb_unicode_funcs_get_empty ();
256#endif
257return static_ucd_funcs.get_unconst ();
258}
259