jdk
625 строк · 19.7 Кб
1/*
2* Copyright © 2009 Red Hat, Inc.
3* Copyright © 2011 Codethink Limited
4* Copyright © 2010,2011,2012 Google, Inc.
5*
6* This is part of HarfBuzz, a text shaping library.
7*
8* Permission is hereby granted, without written agreement and without
9* license or royalty fees, to use, copy, modify, and distribute this
10* software and its documentation for any purpose, provided that the
11* above copyright notice and the following two paragraphs appear in
12* all copies of this software.
13*
14* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18* DAMAGE.
19*
20* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
23* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25*
26* Red Hat Author(s): Behdad Esfahbod
27* Codethink Author(s): Ryan Lortie
28* Google Author(s): Behdad Esfahbod
29*/
30
31#include "hb.hh"
32
33#include "hb-unicode.hh"
34
35
36/**
37* SECTION: hb-unicode
38* @title: hb-unicode
39* @short_description: Unicode character property access
40* @include: hb.h
41*
42* Unicode functions are used to access Unicode character properties.
43* With these functions, client programs can query various properties from
44* the Unicode Character Database for any code point, such as General
45* Category (gc), Script (sc), Canonical Combining Class (ccc), etc.
46*
47* Client programs can optionally pass in their own Unicode functions
48* that implement the same queries. The set of functions available is
49* defined by the virtual methods in #hb_unicode_funcs_t.
50*
51* HarfBuzz provides built-in default functions for each method in
52* #hb_unicode_funcs_t.
53**/
54
55
56/*
57* hb_unicode_funcs_t
58*/
59
60static hb_unicode_combining_class_t
61hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
62hb_codepoint_t unicode HB_UNUSED,
63void *user_data HB_UNUSED)
64{
65return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED;
66}
67
68#ifndef HB_DISABLE_DEPRECATED
69static unsigned int
70hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
71hb_codepoint_t unicode HB_UNUSED,
72void *user_data HB_UNUSED)
73{
74return 1;
75}
76#endif
77
78static hb_unicode_general_category_t
79hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
80hb_codepoint_t unicode HB_UNUSED,
81void *user_data HB_UNUSED)
82{
83return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
84}
85
86static hb_codepoint_t
87hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
88hb_codepoint_t unicode,
89void *user_data HB_UNUSED)
90{
91return unicode;
92}
93
94static hb_script_t
95hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
96hb_codepoint_t unicode HB_UNUSED,
97void *user_data HB_UNUSED)
98{
99return HB_SCRIPT_UNKNOWN;
100}
101
102static hb_bool_t
103hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
104hb_codepoint_t a HB_UNUSED,
105hb_codepoint_t b HB_UNUSED,
106hb_codepoint_t *ab HB_UNUSED,
107void *user_data HB_UNUSED)
108{
109return false;
110}
111
112static hb_bool_t
113hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
114hb_codepoint_t ab HB_UNUSED,
115hb_codepoint_t *a HB_UNUSED,
116hb_codepoint_t *b HB_UNUSED,
117void *user_data HB_UNUSED)
118{
119return false;
120}
121
122
123#ifndef HB_DISABLE_DEPRECATED
124static unsigned int
125hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
126hb_codepoint_t u HB_UNUSED,
127hb_codepoint_t *decomposed HB_UNUSED,
128void *user_data HB_UNUSED)
129{
130return 0;
131}
132#endif
133
134#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
135#include "hb-glib.h"
136#endif
137#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
138#include "hb-icu.h"
139#endif
140
141/**
142* hb_unicode_funcs_get_default:
143*
144* Fetches a pointer to the default Unicode-functions structure that is used
145* when no functions are explicitly set on #hb_buffer_t.
146*
147* Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure
148*
149* Since: 0.9.2
150**/
151hb_unicode_funcs_t *
152hb_unicode_funcs_get_default ()
153{
154#if !defined(HB_NO_UNICODE_FUNCS) && !defined(HB_NO_UCD)
155return hb_ucd_get_unicode_funcs ();
156#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
157return hb_glib_get_unicode_funcs ();
158#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
159return hb_icu_get_unicode_funcs ();
160#else
161#define HB_UNICODE_FUNCS_NIL 1
162return hb_unicode_funcs_get_empty ();
163#endif
164}
165
166#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
167#error "Could not find any Unicode functions implementation, you have to provide your own"
168#error "Consider building hb-ucd.cc. If you absolutely want to build without any, define HB_NO_UNICODE_FUNCS."
169#endif
170
171/**
172* hb_unicode_funcs_create:
173* @parent: (nullable): Parent Unicode-functions structure
174*
175* Creates a new #hb_unicode_funcs_t structure of Unicode functions.
176*
177* Return value: (transfer full): The Unicode-functions structure
178*
179* Since: 0.9.2
180**/
181hb_unicode_funcs_t *
182hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
183{
184hb_unicode_funcs_t *ufuncs;
185
186if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
187return hb_unicode_funcs_get_empty ();
188
189if (!parent)
190parent = hb_unicode_funcs_get_empty ();
191
192hb_unicode_funcs_make_immutable (parent);
193ufuncs->parent = hb_unicode_funcs_reference (parent);
194
195ufuncs->func = parent->func;
196
197/* We can safely copy user_data from parent since we hold a reference
198* onto it and it's immutable. We should not copy the destroy notifiers
199* though. */
200ufuncs->user_data = parent->user_data;
201
202return ufuncs;
203}
204
205
206DEFINE_NULL_INSTANCE (hb_unicode_funcs_t) =
207{
208HB_OBJECT_HEADER_STATIC,
209
210nullptr, /* parent */
211{
212#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
213HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
214#undef HB_UNICODE_FUNC_IMPLEMENT
215}
216};
217
218/**
219* hb_unicode_funcs_get_empty:
220*
221* Fetches the singleton empty Unicode-functions structure.
222*
223* Return value: (transfer full): The empty Unicode-functions structure
224*
225* Since: 0.9.2
226**/
227hb_unicode_funcs_t *
228hb_unicode_funcs_get_empty ()
229{
230return const_cast<hb_unicode_funcs_t *> (&Null (hb_unicode_funcs_t));
231}
232
233/**
234* hb_unicode_funcs_reference: (skip)
235* @ufuncs: The Unicode-functions structure
236*
237* Increases the reference count on a Unicode-functions structure.
238*
239* Return value: (transfer full): The Unicode-functions structure
240*
241* Since: 0.9.2
242**/
243hb_unicode_funcs_t *
244hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
245{
246return hb_object_reference (ufuncs);
247}
248
249/**
250* hb_unicode_funcs_destroy: (skip)
251* @ufuncs: The Unicode-functions structure
252*
253* Decreases the reference count on a Unicode-functions structure. When
254* the reference count reaches zero, the Unicode-functions structure is
255* destroyed, freeing all memory.
256*
257* Since: 0.9.2
258**/
259void
260hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
261{
262if (!hb_object_destroy (ufuncs)) return;
263
264#define HB_UNICODE_FUNC_IMPLEMENT(name) \
265if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
266HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
267#undef HB_UNICODE_FUNC_IMPLEMENT
268
269hb_unicode_funcs_destroy (ufuncs->parent);
270
271hb_free (ufuncs);
272}
273
274/**
275* hb_unicode_funcs_set_user_data: (skip)
276* @ufuncs: The Unicode-functions structure
277* @key: The user-data key
278* @data: A pointer to the user data
279* @destroy: (nullable): A callback to call when @data is not needed anymore
280* @replace: Whether to replace an existing data with the same key
281*
282* Attaches a user-data key/data pair to the specified Unicode-functions structure.
283*
284* Return value: `true` if success, `false` otherwise
285*
286* Since: 0.9.2
287**/
288hb_bool_t
289hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
290hb_user_data_key_t *key,
291void * data,
292hb_destroy_func_t destroy,
293hb_bool_t replace)
294{
295return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
296}
297
298/**
299* hb_unicode_funcs_get_user_data: (skip)
300* @ufuncs: The Unicode-functions structure
301* @key: The user-data key to query
302*
303* Fetches the user-data associated with the specified key,
304* attached to the specified Unicode-functions structure.
305*
306* Return value: (transfer none): A pointer to the user data
307*
308* Since: 0.9.2
309**/
310void *
311hb_unicode_funcs_get_user_data (const hb_unicode_funcs_t *ufuncs,
312hb_user_data_key_t *key)
313{
314return hb_object_get_user_data (ufuncs, key);
315}
316
317
318/**
319* hb_unicode_funcs_make_immutable:
320* @ufuncs: The Unicode-functions structure
321*
322* Makes the specified Unicode-functions structure
323* immutable.
324*
325* Since: 0.9.2
326**/
327void
328hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
329{
330if (hb_object_is_immutable (ufuncs))
331return;
332
333hb_object_make_immutable (ufuncs);
334}
335
336/**
337* hb_unicode_funcs_is_immutable:
338* @ufuncs: The Unicode-functions structure
339*
340* Tests whether the specified Unicode-functions structure
341* is immutable.
342*
343* Return value: `true` if @ufuncs is immutable, `false` otherwise
344*
345* Since: 0.9.2
346**/
347hb_bool_t
348hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
349{
350return hb_object_is_immutable (ufuncs);
351}
352
353/**
354* hb_unicode_funcs_get_parent:
355* @ufuncs: The Unicode-functions structure
356*
357* Fetches the parent of the Unicode-functions structure
358* @ufuncs.
359*
360* Return value: The parent Unicode-functions structure
361*
362* Since: 0.9.2
363**/
364hb_unicode_funcs_t *
365hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
366{
367return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
368}
369
370
371#define HB_UNICODE_FUNC_IMPLEMENT(name) \
372\
373void \
374hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
375hb_unicode_##name##_func_t func, \
376void *user_data, \
377hb_destroy_func_t destroy) \
378{ \
379if (hb_object_is_immutable (ufuncs)) \
380goto fail; \
381\
382if (!func) \
383{ \
384if (destroy) \
385destroy (user_data); \
386destroy = nullptr; \
387user_data = ufuncs->parent->user_data.name; \
388} \
389\
390if (ufuncs->destroy.name) \
391ufuncs->destroy.name (ufuncs->user_data.name); \
392\
393if (func) \
394ufuncs->func.name = func; \
395else \
396ufuncs->func.name = ufuncs->parent->func.name; \
397ufuncs->user_data.name = user_data; \
398ufuncs->destroy.name = destroy; \
399return; \
400\
401fail: \
402if (destroy) \
403destroy (user_data); \
404}
405
406HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
407#undef HB_UNICODE_FUNC_IMPLEMENT
408
409
410#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
411\
412return_type \
413hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \
414hb_codepoint_t unicode) \
415{ \
416return ufuncs->name (unicode); \
417}
418HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
419#undef HB_UNICODE_FUNC_IMPLEMENT
420
421/**
422* hb_unicode_compose:
423* @ufuncs: The Unicode-functions structure
424* @a: The first Unicode code point to compose
425* @b: The second Unicode code point to compose
426* @ab: (out): The composition of @a, @b
427*
428* Fetches the composition of a sequence of two Unicode
429* code points.
430*
431* Calls the composition function of the specified
432* Unicode-functions structure @ufuncs.
433*
434* Return value: `true` if @a and @b composed, `false` otherwise
435*
436* Since: 0.9.2
437**/
438hb_bool_t
439hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
440hb_codepoint_t a,
441hb_codepoint_t b,
442hb_codepoint_t *ab)
443{
444return ufuncs->compose (a, b, ab);
445}
446
447/**
448* hb_unicode_decompose:
449* @ufuncs: The Unicode-functions structure
450* @ab: Unicode code point to decompose
451* @a: (out): The first code point of the decomposition of @ab
452* @b: (out): The second code point of the decomposition of @ab
453*
454* Fetches the decomposition of a Unicode code point.
455*
456* Calls the decomposition function of the specified
457* Unicode-functions structure @ufuncs.
458*
459* Return value: `true` if @ab was decomposed, `false` otherwise
460*
461* Since: 0.9.2
462**/
463hb_bool_t
464hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
465hb_codepoint_t ab,
466hb_codepoint_t *a,
467hb_codepoint_t *b)
468{
469return ufuncs->decompose (ab, a, b);
470}
471
472#ifndef HB_DISABLE_DEPRECATED
473/**
474* hb_unicode_decompose_compatibility:
475* @ufuncs: The Unicode-functions structure
476* @u: Code point to decompose
477* @decomposed: (out): Compatibility decomposition of @u
478*
479* Fetches the compatibility decomposition of a Unicode
480* code point. Deprecated.
481*
482* Return value: length of @decomposed.
483*
484* Since: 0.9.2
485* Deprecated: 2.0.0
486**/
487unsigned int
488hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
489hb_codepoint_t u,
490hb_codepoint_t *decomposed)
491{
492return ufuncs->decompose_compatibility (u, decomposed);
493}
494#endif
495
496
497#ifndef HB_NO_OT_SHAPE
498/* See hb-unicode.hh for details. */
499const uint8_t
500_hb_modified_combining_class[256] =
501{
5020, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */
5031, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */
5042, 3, 4, 5, 6,
5057, /* HB_UNICODE_COMBINING_CLASS_NUKTA */
5068, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */
5079, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */
508
509/* Hebrew */
510HB_MODIFIED_COMBINING_CLASS_CCC10,
511HB_MODIFIED_COMBINING_CLASS_CCC11,
512HB_MODIFIED_COMBINING_CLASS_CCC12,
513HB_MODIFIED_COMBINING_CLASS_CCC13,
514HB_MODIFIED_COMBINING_CLASS_CCC14,
515HB_MODIFIED_COMBINING_CLASS_CCC15,
516HB_MODIFIED_COMBINING_CLASS_CCC16,
517HB_MODIFIED_COMBINING_CLASS_CCC17,
518HB_MODIFIED_COMBINING_CLASS_CCC18,
519HB_MODIFIED_COMBINING_CLASS_CCC19,
520HB_MODIFIED_COMBINING_CLASS_CCC20,
521HB_MODIFIED_COMBINING_CLASS_CCC21,
522HB_MODIFIED_COMBINING_CLASS_CCC22,
523HB_MODIFIED_COMBINING_CLASS_CCC23,
524HB_MODIFIED_COMBINING_CLASS_CCC24,
525HB_MODIFIED_COMBINING_CLASS_CCC25,
526HB_MODIFIED_COMBINING_CLASS_CCC26,
527
528/* Arabic */
529HB_MODIFIED_COMBINING_CLASS_CCC27,
530HB_MODIFIED_COMBINING_CLASS_CCC28,
531HB_MODIFIED_COMBINING_CLASS_CCC29,
532HB_MODIFIED_COMBINING_CLASS_CCC30,
533HB_MODIFIED_COMBINING_CLASS_CCC31,
534HB_MODIFIED_COMBINING_CLASS_CCC32,
535HB_MODIFIED_COMBINING_CLASS_CCC33,
536HB_MODIFIED_COMBINING_CLASS_CCC34,
537HB_MODIFIED_COMBINING_CLASS_CCC35,
538
539/* Syriac */
540HB_MODIFIED_COMBINING_CLASS_CCC36,
541
54237, 38, 39,
54340, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
54460, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
54580, 81, 82, 83,
546
547/* Telugu */
548HB_MODIFIED_COMBINING_CLASS_CCC84,
54985, 86, 87, 88, 89, 90,
550HB_MODIFIED_COMBINING_CLASS_CCC91,
55192, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
552
553/* Thai */
554HB_MODIFIED_COMBINING_CLASS_CCC103,
555104, 105, 106,
556HB_MODIFIED_COMBINING_CLASS_CCC107,
557108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
558
559/* Lao */
560HB_MODIFIED_COMBINING_CLASS_CCC118,
561119, 120, 121,
562HB_MODIFIED_COMBINING_CLASS_CCC122,
563123, 124, 125, 126, 127, 128,
564
565/* Tibetan */
566HB_MODIFIED_COMBINING_CLASS_CCC129,
567HB_MODIFIED_COMBINING_CLASS_CCC130,
568131,
569HB_MODIFIED_COMBINING_CLASS_CCC132,
570133, 134, 135, 136, 137, 138, 139,
571
572
573140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
574150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
575160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
576170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
577180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
578190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
579
580200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */
581201,
582202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */
583203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
584214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */
585215,
586216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */
587217,
588218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */
589219,
590220, /* HB_UNICODE_COMBINING_CLASS_BELOW */
591221,
592222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */
593223,
594224, /* HB_UNICODE_COMBINING_CLASS_LEFT */
595225,
596226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */
597227,
598228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */
599229,
600230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */
601231,
602232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */
603233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */
604234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */
605235, 236, 237, 238, 239,
606240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */
607241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
608255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
609};
610#endif
611
612
613/*
614* Emoji
615*/
616#ifndef HB_NO_EMOJI_SEQUENCES
617
618#include "hb-unicode-emoji-table.hh"
619
620bool
621_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp)
622{
623return _hb_emoji_is_Extended_Pictographic (cp);
624}
625#endif
626