1 /*
2 	Copyright (C) 2014-2016 Quinten Lansu
3 
4 	Permission is hereby granted, free of charge, to any person
5 	obtaining a copy of this software and associated documentation
6 	files (the "Software"), to deal in the Software without
7 	restriction, including without limitation the rights to use,
8 	copy, modify, merge, publish, distribute, sublicense, and/or
9 	sell copies of the Software, and to permit persons to whom the
10 	Software is furnished to do so, subject to the following
11 	conditions:
12 
13 	The above copyright notice and this permission notice shall be
14 	included in all copies or substantial portions of the Software.
15 
16 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18 	OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 	NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 	HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21 	WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 	FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 	OTHER DEALINGS IN THE SOFTWARE.
24 */
25 
26 /* This is the concatenation of
27 
28 base.h casemapping.h codepoint.h database.h streaming.h
29 composition.h decomposition.h unicodedatabase.h utf8rewind.h
30 (order is important)
31 
32 with some modifications to simplify
33 */
34 
35 #ifndef U8_H
36 #define U8_H 1
37 
38 #ifndef _UTF8REWIND_H_
39 #define _UTF8REWIND_H_
40 
41 
42 
43 
44 #define UTF8_VERSION_MAKE(_major, _minor, _bugfix) \
45 	((_major) * 10000) + ((_minor) * 100) + (_bugfix)
46 
47 #define UTF8_VERSION_MAJOR   1
48 
49 #define UTF8_VERSION_MINOR   5
50 
51 #define UTF8_VERSION_BUGFIX  1
52 
53 #define UTF8_VERSION \
54 	UTF8_VERSION_MAKE(UTF8_VERSION_MAJOR, UTF8_VERSION_MINOR, UTF8_VERSION_BUGFIX)
55 
56 #define UTF8_VERSION_STRING  "1.5.1"
57 
58 #define UTF8_VERSION_GUARD(_major, _minor, _bugfix) \
59 	(UTF8_VERSION >= UTF8_VERSION_MAKE(_major, _minor, _bugfix))
60 
61 
62 
63 #define UTF8_ERR_NONE                           (0)
64 
65 #define UTF8_ERR_INVALID_DATA                   (-1)
66 
67 #define UTF8_ERR_INVALID_FLAG                   (-2)
68 
69 #define UTF8_ERR_NOT_ENOUGH_SPACE               (-3)
70 
71 #define UTF8_ERR_OVERLAPPING_PARAMETERS         (-4)
72 
73 #define UTF8_ERR_INVALID_LOCALE                 (-5)
74 
75 
76 
77 #define UTF8_LOCALE_DEFAULT                     0
78 
79 #define UTF8_LOCALE_LITHUANIAN                  1
80 
81 #define UTF8_LOCALE_TURKISH_AND_AZERI_LATIN     2
82 
83 #define UTF8_LOCALE_MAXIMUM                     3
84 
85 
86 
87 #define UTF8_NORMALIZE_COMPOSE                  0x00000001
88 
89 #define UTF8_NORMALIZE_DECOMPOSE                0x00000002
90 
91 #define UTF8_NORMALIZE_COMPATIBILITY            0x00000004
92 
93 #define UTF8_NORMALIZATION_RESULT_YES           (0)
94 
95 #define UTF8_NORMALIZATION_RESULT_MAYBE         (1)
96 
97 #define UTF8_NORMALIZATION_RESULT_NO            (2)
98 
99 
100 
101 #define UTF8_CATEGORY_LETTER_UPPERCASE          0x00000001
102 
103 #define UTF8_CATEGORY_LETTER_LOWERCASE          0x00000002
104 
105 #define UTF8_CATEGORY_LETTER_TITLECASE          0x00000004
106 
107 #define UTF8_CATEGORY_LETTER_MODIFIER           0x00000008
108 
109 #define UTF8_CATEGORY_LETTER_OTHER              0x00000010
110 
111 #define UTF8_CATEGORY_LETTER \
112 	(UTF8_CATEGORY_LETTER_UPPERCASE | UTF8_CATEGORY_LETTER_LOWERCASE | \
113 	UTF8_CATEGORY_LETTER_TITLECASE | UTF8_CATEGORY_LETTER_MODIFIER | \
114 	UTF8_CATEGORY_LETTER_OTHER)
115 
116 #define UTF8_CATEGORY_CASE_MAPPED \
117 	(UTF8_CATEGORY_LETTER_UPPERCASE | UTF8_CATEGORY_LETTER_LOWERCASE | \
118 	UTF8_CATEGORY_LETTER_TITLECASE)
119 
120 #define UTF8_CATEGORY_MARK_NON_SPACING          0x00000020
121 
122 #define UTF8_CATEGORY_MARK_SPACING              0x00000040
123 
124 #define UTF8_CATEGORY_MARK_ENCLOSING            0x00000080
125 
126 #define UTF8_CATEGORY_MARK \
127 	(UTF8_CATEGORY_MARK_NON_SPACING | UTF8_CATEGORY_MARK_SPACING | \
128 	UTF8_CATEGORY_MARK_ENCLOSING)
129 
130 #define UTF8_CATEGORY_NUMBER_DECIMAL            0x00000100
131 
132 #define UTF8_CATEGORY_NUMBER_LETTER             0x00000200
133 
134 #define UTF8_CATEGORY_NUMBER_OTHER              0x00000400
135 
136 #define UTF8_CATEGORY_NUMBER \
137 	(UTF8_CATEGORY_NUMBER_DECIMAL | UTF8_CATEGORY_NUMBER_LETTER | \
138 	UTF8_CATEGORY_NUMBER_OTHER)
139 
140 #define UTF8_CATEGORY_PUNCTUATION_CONNECTOR     0x00000800
141 
142 #define UTF8_CATEGORY_PUNCTUATION_DASH          0x00001000
143 
144 #define UTF8_CATEGORY_PUNCTUATION_OPEN          0x00002000
145 
146 #define UTF8_CATEGORY_PUNCTUATION_CLOSE         0x00004000
147 
148 #define UTF8_CATEGORY_PUNCTUATION_INITIAL       0x00008000
149 
150 #define UTF8_CATEGORY_PUNCTUATION_FINAL         0x00010000
151 
152 #define UTF8_CATEGORY_PUNCTUATION_OTHER         0x00020000
153 
154 #define UTF8_CATEGORY_PUNCTUATION \
155 	(UTF8_CATEGORY_PUNCTUATION_CONNECTOR | UTF8_CATEGORY_PUNCTUATION_DASH | \
156 	UTF8_CATEGORY_PUNCTUATION_OPEN | UTF8_CATEGORY_PUNCTUATION_CLOSE | \
157 	UTF8_CATEGORY_PUNCTUATION_INITIAL | UTF8_CATEGORY_PUNCTUATION_FINAL | \
158 	UTF8_CATEGORY_PUNCTUATION_OTHER)
159 
160 #define UTF8_CATEGORY_SYMBOL_MATH               0x00040000
161 
162 #define UTF8_CATEGORY_SYMBOL_CURRENCY           0x00080000
163 
164 #define UTF8_CATEGORY_SYMBOL_MODIFIER           0x00100000
165 
166 #define UTF8_CATEGORY_SYMBOL_OTHER              0x00200000
167 
168 #define UTF8_CATEGORY_SYMBOL \
169 	(UTF8_CATEGORY_SYMBOL_MATH | UTF8_CATEGORY_SYMBOL_CURRENCY | \
170 	UTF8_CATEGORY_SYMBOL_MODIFIER | UTF8_CATEGORY_SYMBOL_OTHER)
171 
172 #define UTF8_CATEGORY_SEPARATOR_SPACE           0x00400000
173 
174 #define UTF8_CATEGORY_SEPARATOR_LINE            0x00800000
175 
176 #define UTF8_CATEGORY_SEPARATOR_PARAGRAPH       0x01000000
177 
178 #define UTF8_CATEGORY_SEPARATOR \
179 	(UTF8_CATEGORY_SEPARATOR_SPACE | UTF8_CATEGORY_SEPARATOR_LINE | \
180 	UTF8_CATEGORY_SEPARATOR_PARAGRAPH)
181 
182 #define UTF8_CATEGORY_CONTROL                   0x02000000
183 
184 #define UTF8_CATEGORY_FORMAT                    0x04000000
185 
186 #define UTF8_CATEGORY_SURROGATE                 0x08000000
187 
188 #define UTF8_CATEGORY_PRIVATE_USE               0x10000000
189 
190 #define UTF8_CATEGORY_UNASSIGNED                0x20000000
191 
192 #define UTF8_CATEGORY_COMPATIBILITY             0x40000000
193 
194 #define UTF8_CATEGORY_IGNORE_GRAPHEME_CLUSTER   0x80000000
195 
196 #define UTF8_CATEGORY_ISCNTRL \
197 	(UTF8_CATEGORY_COMPATIBILITY | \
198 	UTF8_CATEGORY_CONTROL)
199 
200 #define UTF8_CATEGORY_ISPRINT \
201 	(UTF8_CATEGORY_COMPATIBILITY | \
202 	UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER | \
203 	UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL | \
204 	UTF8_CATEGORY_SEPARATOR)
205 
206 #define UTF8_CATEGORY_ISSPACE \
207 	(UTF8_CATEGORY_COMPATIBILITY | \
208 	UTF8_CATEGORY_SEPARATOR_SPACE)
209 
210 #define UTF8_CATEGORY_ISBLANK \
211 	(UTF8_CATEGORY_COMPATIBILITY | \
212 	UTF8_CATEGORY_SEPARATOR_SPACE | UTF8_CATEGORY_PRIVATE_USE)
213 
214 #define UTF8_CATEGORY_ISGRAPH \
215 	(UTF8_CATEGORY_COMPATIBILITY | \
216 	UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER | \
217 	UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL)
218 
219 #define UTF8_CATEGORY_ISPUNCT \
220 	(UTF8_CATEGORY_COMPATIBILITY | \
221 	UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL)
222 
223 #define UTF8_CATEGORY_ISALNUM \
224 	(UTF8_CATEGORY_COMPATIBILITY | \
225 	UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER)
226 
227 #define UTF8_CATEGORY_ISALPHA \
228 	(UTF8_CATEGORY_COMPATIBILITY | \
229 	UTF8_CATEGORY_LETTER)
230 
231 #define UTF8_CATEGORY_ISUPPER \
232 	(UTF8_CATEGORY_COMPATIBILITY | \
233 	UTF8_CATEGORY_LETTER_UPPERCASE)
234 
235 #define UTF8_CATEGORY_ISLOWER \
236 	(UTF8_CATEGORY_COMPATIBILITY | \
237 	UTF8_CATEGORY_LETTER_LOWERCASE)
238 
239 #define UTF8_CATEGORY_ISDIGIT \
240 	(UTF8_CATEGORY_COMPATIBILITY | \
241 	UTF8_CATEGORY_NUMBER)
242 
243 #define UTF8_CATEGORY_ISXDIGIT \
244 	(UTF8_CATEGORY_COMPATIBILITY | \
245 	UTF8_CATEGORY_NUMBER | UTF8_CATEGORY_PRIVATE_USE)
246 
247 
248 
249 
250 #ifndef UTF8_WCHAR_SIZE
251 	#if (__SIZEOF_WCHAR_T__ == 4) || (WCHAR_MAX > UINT16_MAX) || (__WCHAR_MAX__ > UINT16_MAX)
252 		#define UTF8_WCHAR_SIZE (4)
253 	#else
254 		#define UTF8_WCHAR_SIZE (2)
255 	#endif
256 #endif
257 
258 #if (UTF8_WCHAR_SIZE == 4)
259 
260 	#define UTF8_WCHAR_UTF32 (1)
261 #elif (UTF8_WCHAR_SIZE == 2)
262 
263 	#define UTF8_WCHAR_UTF16 (1)
264 #else
265 	#error Invalid size for wchar_t type.
266 #endif
267 
268 #ifndef UTF8_API
269 	#ifdef __cplusplus
270 		#define UTF8_API extern "C"
271 	#else
272 		#define UTF8_API
273 	#endif
274 #endif
275 
276 typedef int int32_t;
277 typedef unsigned char uint8_t;
278 typedef unsigned short uint16_t;
279 typedef unsigned int uint32_t;
280 typedef unsigned long long uint64_t;
281 
282 typedef uint16_t utf16_t;
283 typedef uint32_t unicode_t;
284 
285 #endif /* _UTF8REWIND_H_ */
286 
287 
288 #ifndef _UTF8REWIND_INTERNAL_BASE_H_
289 #define _UTF8REWIND_INTERNAL_BASE_H_
290 
291 
292 
293 
294 #if defined(__GNUC__) && !defined(COMPILER_ICC)
295 	#define UTF8_UNUSED(_parameter) _parameter __attribute__ ((unused))
296 #else
297 	#define UTF8_UNUSED(_parameter) _parameter
298 #endif
299 
300 #define UTF8_SET_ERROR(_error) \
301 	if (errors != 0) { *errors = UTF8_ERR_ ## _error; }
302 
303 /* Validates input before transforming */
304 /* Check for parameter overlap using the separating axis theorem */
305 
306 #define UTF8_VALIDATE_PARAMETERS_CHAR(_inputType, _result) \
307 	if (input == 0) { \
308 		UTF8_SET_ERROR(INVALID_DATA); \
309 		return _result; \
310 	} \
311 	else if (inputSize < sizeof(_inputType)) { \
312 		if (target != 0) { \
313 			if (targetSize < 3) { \
314 				UTF8_SET_ERROR(NOT_ENOUGH_SPACE); \
315 				return _result; \
316 			} \
317 			memcpy(target, REPLACEMENT_CHARACTER_STRING, REPLACEMENT_CHARACTER_STRING_LENGTH); \
318 		} \
319 		UTF8_SET_ERROR(INVALID_DATA); \
320 		return _result + REPLACEMENT_CHARACTER_STRING_LENGTH; \
321 	} \
322 	if (target != 0 && targetSize == 0) { \
323 		UTF8_SET_ERROR(NOT_ENOUGH_SPACE); \
324 		return _result; \
325 	} \
326 	if ((char*)input == target) { \
327 		UTF8_SET_ERROR(OVERLAPPING_PARAMETERS); \
328 		return _result; \
329 	} \
330 	{ \
331 		char* input_center = (char*)input + (inputSize / 2); \
332 		char* target_center = target + (targetSize / 2); \
333 		size_t delta = (size_t)((input_center > target_center) ? (input_center - target_center) : (target_center - input_center)); \
334 		if (delta < (inputSize + targetSize) / 2) { \
335 			UTF8_SET_ERROR(OVERLAPPING_PARAMETERS); \
336 			return _result; \
337 		} \
338 	}
339 
340 #define UTF8_VALIDATE_PARAMETERS(_inputType, _outputType, _result) \
341 	if (input == 0) { \
342 		UTF8_SET_ERROR(INVALID_DATA); \
343 		return _result; \
344 	} \
345 	else if (inputSize < sizeof(_inputType)) { \
346 		if (target != 0) { \
347 			if (targetSize < sizeof(_outputType)) { \
348 				UTF8_SET_ERROR(NOT_ENOUGH_SPACE); \
349 				return _result; \
350 			} \
351 			*target = REPLACEMENT_CHARACTER; \
352 		} \
353 		UTF8_SET_ERROR(INVALID_DATA); \
354 		return _result + sizeof(_outputType); \
355 	} \
356 	if (target != 0 && targetSize < sizeof(_outputType)) { \
357 		UTF8_SET_ERROR(NOT_ENOUGH_SPACE); \
358 		return _result; \
359 	} \
360 	if ((char*)input == (char*)target) { \
361 		UTF8_SET_ERROR(OVERLAPPING_PARAMETERS); \
362 		return _result; \
363 	} \
364 	{ \
365 		char* input_center = (char*)input + (inputSize / 2); \
366 		char* target_center = (char*)target + (targetSize / 2); \
367 		size_t delta = (size_t)((input_center > target_center) ? (input_center - target_center) : (target_center - input_center)); \
368 		if (delta < (inputSize + targetSize) / 2) { \
369 			UTF8_SET_ERROR(OVERLAPPING_PARAMETERS); \
370 			return _result; \
371 		} \
372 	}
373 
374 
375 
376 #endif /* _UTF8REWIND_INTERNAL_BASE_H_ */
377 
378 #ifndef _UTF8REWIND_INTERNAL_CASEMAPPING_H_
379 #define _UTF8REWIND_INTERNAL_CASEMAPPING_H_
380 
381 
382 
383 
384 typedef struct {
385 	const char* src;
386 	char* dst;
387 	size_t src_size;
388 	size_t dst_size;
389 	size_t total_bytes_needed;
390 	unicode_t last_code_point;
391 	size_t locale;
392 	const uint32_t* property_index1;
393 	const uint32_t* property_index2;
394 	const uint32_t* property_data;
395 	uint32_t last_general_category;
396 	uint8_t last_code_point_size;
397 	uint8_t last_canonical_combining_class;
398 	uint8_t quickcheck_flags;
399 } CaseMappingState;
400 
401 uint8_t casemapping_initialize(
402 	CaseMappingState* state,
403 	const char* input, size_t inputSize,
404 	char* target, size_t targetSize,
405 	const uint32_t* propertyIndex1, const uint32_t* propertyIndex2, const uint32_t* propertyData,
406 	uint8_t quickCheck, size_t locale,
407 	int32_t* errors);
408 
409 size_t casemapping_execute(CaseMappingState* state, int32_t* errors);
410 
411 
412 
413 #endif /* _UTF8REWIND_INTERNAL_CASEMAPPING_H_ */
414 #ifndef _UTF8REWIND_INTERNAL_CODEPOINT_H_
415 #define _UTF8REWIND_INTERNAL_CODEPOINT_H_
416 
417 
418 
419 
420 
421 
422 
423 #define MAX_BASIC_LATIN                      0x007F
424 
425 
426 #define MAX_LATIN_1                          0x00FF
427 
428 
429 #define MAX_BASIC_MULTILINGUAL_PLANE         0xFFFF
430 
431 
432 #define MAX_LEGAL_UNICODE                    0x10FFFF
433 
434 
435 #define REPLACEMENT_CHARACTER                0xFFFD
436 
437 
438 #define REPLACEMENT_CHARACTER_STRING         "\xEF\xBF\xBD"
439 
440 
441 #define REPLACEMENT_CHARACTER_STRING_LENGTH  3
442 
443 
444 #define SURROGATE_HIGH_START                 0xD800
445 
446 
447 #define SURROGATE_HIGH_END                   0xDBFF
448 
449 
450 #define SURROGATE_LOW_START                  0xDC00
451 
452 
453 #define SURROGATE_LOW_END                    0xDFFF
454 
455 
456 #define HANGUL_JAMO_FIRST                    0x1100
457 
458 
459 #define HANGUL_JAMO_LAST                     0x11FF
460 
461 
462 #define HANGUL_L_FIRST                       0x1100
463 
464 
465 #define HANGUL_L_LAST                        0x1112
466 
467 
468 #define HANGUL_L_COUNT                       19
469 
470 
471 #define HANGUL_V_FIRST                       0x1161
472 
473 
474 #define HANGUL_V_LAST                        0x1175
475 
476 
477 #define HANGUL_V_COUNT                       21
478 
479 
480 #define HANGUL_T_FIRST                       0x11A7
481 
482 
483 #define HANGUL_T_LAST                        0x11C2
484 
485 
486 #define HANGUL_T_COUNT                       28
487 
488 
489 #define HANGUL_N_COUNT                       588 /* VCount * TCount */
490 
491 
492 #define HANGUL_S_FIRST                       0xAC00
493 
494 
495 #define HANGUL_S_LAST                        0xD7A3
496 
497 
498 #define HANGUL_S_COUNT                       11172 /* LCount * NCount */
499 
500 #define CP_LATIN_CAPITAL_LETTER_I                 0x0049
501 #define CP_LATIN_CAPITAL_LETTER_J                 0x004A
502 #define CP_LATIN_SMALL_LETTER_I                   0x0069
503 #define CP_LATIN_SMALL_LETTER_J                   0x006A
504 #define CP_LATIN_CAPITAL_LETTER_I_WITH_GRAVE      0x00CC
505 #define CP_LATIN_CAPITAL_LETTER_I_WITH_ACUTE      0x00CD
506 #define CP_LATIN_CAPITAL_LETTER_I_WITH_TILDE      0x0128
507 #define CP_LATIN_CAPITAL_LETTER_I_WITH_OGONEK     0x012E
508 #define CP_LATIN_SMALL_LETTER_I_WITH_OGONEK       0x012F
509 #define CP_LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE  0x0130
510 #define CP_LATIN_SMALL_LETTER_DOTLESS_I           0x0131
511 #define CP_COMBINING_GRAVE_ACCENT                 0x0300
512 #define CP_COMBINING_ACUTE_ACCENT                 0x0301
513 #define CP_COMBINING_TILDE_ACCENT                 0x0303
514 #define CP_COMBINING_DOT_ABOVE                    0x0307
515 #define CP_COMBINING_GREEK_YPOGEGRAMMENI          0x0345
516 #define CP_COMBINING_GRAPHEME_JOINER              0x034F
517 #define CP_GREEK_CAPITAL_LETTER_SIGMA             0x03A3
518 
519 #define CCC_NOT_REORDERED                         0
520 #define CCC_OVERLAY                               1
521 #define CCC_NUKTA                                 7
522 #define CCC_KANA_VOICING                          8
523 #define CCC_VIRAMA                                9
524 #define CCC_FIXED_POSITION_START                  10
525 #define CCC_FIXED_POSITION_END                    199
526 #define CCC_ATTACHED_BELOW_LEFT                   200
527 #define CCC_ATTACHED_BELOW                        202
528 #define CCC_ATTACHED_BOTTOM_RIGHT                 204
529 #define CCC_ATTACHED_LEFT                         208
530 #define CCC_ATTACHED_RIGHT                        210
531 #define CCC_ATTACHED_TOP_LEFT                     212
532 #define CCC_ATTACHED_ABOVE                        214
533 #define CCC_ATTACHED_ABOVE_RIGHT                  216
534 #define CCC_BELOW_LEFT                            218
535 #define CCC_BELOW                                 220
536 #define CCC_BELOW_RIGHT                           222
537 #define CCC_LEFT                                  224
538 #define CCC_RIGHT                                 226
539 #define CCC_ABOVE_LEFT                            228
540 #define CCC_ABOVE                                 230
541 #define CCC_ABOVE_RIGHT                           232
542 #define CCC_DOUBLE_BELOW                          233
543 #define CCC_DOUBLE_ABOVE                          234
544 #define CCC_IOTA_SUBSCRIPT                        240
545 #define CCC_INVALID                               255
546 
547 
548 
549 
550 
551 #endif /* _UTF8REWIND_INTERNAL_CODEPOINT_H_ */
552 
553 #ifndef _UTF8REWIND_INTERNAL_DATABASE_H_
554 #define _UTF8REWIND_INTERNAL_DATABASE_H_
555 
556 
557 
558 
559 typedef enum QuickCheckCaseMapped
560 {
561 	QuickCheckCaseMapped_Uppercase = 0x01,
562 	QuickCheckCaseMapped_Lowercase = 0x02,
563 	QuickCheckCaseMapped_Titlecase = 0x04,
564 	QuickCheckCaseMapped_Casefolded = 0x08,
565 } QuickCheckCaseMapped;
566 
567 typedef enum QuickCheckResult
568 {
569 	QuickCheckResult_Yes,
570 	QuickCheckResult_Maybe,
571 	QuickCheckResult_No,
572 } QuickCheckResult;
573 
574 #define PROPERTY_INDEX_SHIFT (5)
575 
576 static const unicode_t PROPERTY_DATA_MASK = (1 << PROPERTY_INDEX_SHIFT) - 1;
577 
578 #define PROPERTY_GET(_indexArray, _dataArray, _cp) \
579 	(_dataArray)[ \
580 		(_indexArray)[(_cp) >> PROPERTY_INDEX_SHIFT] + \
581 		((_cp) & PROPERTY_DATA_MASK)]
582 
583 #define PROPERTY_GET_GC(_cp) \
584 	PROPERTY_GET(GeneralCategoryIndexPtr, GeneralCategoryDataPtr, _cp)
585 
586 #define PROPERTY_GET_CCC(_cp) \
587 	PROPERTY_GET(CanonicalCombiningClassIndexPtr, CanonicalCombiningClassDataPtr, _cp)
588 
589 #define PROPERTY_GET_CM(_cp) \
590 	PROPERTY_GET(QuickCheckCaseMappedIndexPtr, QuickCheckCaseMappedDataPtr, _cp)
591 
592 #define PROPERTY_GET_NFC(_cp) \
593 	PROPERTY_GET(QuickCheckNFCIndexPtr, QuickCheckNFCDataPtr, _cp)
594 
595 #define PROPERTY_GET_NFD(_cp) \
596 	PROPERTY_GET(QuickCheckNFDIndexPtr, QuickCheckNFDDataPtr, _cp)
597 
598 #define PROPERTY_GET_NFKC(_cp) \
599 	PROPERTY_GET(QuickCheckNFKCIndexPtr, QuickCheckNFKCDataPtr, _cp)
600 
601 #define PROPERTY_GET_NFKD(_cp) \
602 	PROPERTY_GET(QuickCheckNFKDIndexPtr, QuickCheckNFKDDataPtr, _cp)
603 
604 
605 
606 #endif /* _UTF8REWIND_INTERNAL_DATABASE_H_ */
607 
608 
609 #ifndef _UTF8REWIND_INTERNAL_STREAMING_H_
610 #define _UTF8REWIND_INTERNAL_STREAMING_H_
611 
612 
613 
614 
615 /*
616 	UAX15-D4. Stream-Safe Text Process
617 
618 	This is the process of producing a Unicode string in Stream-Safe Text Format by processing that string
619 	from start to finish, inserting U+034F COMBINING GRAPHEME JOINER (CGJ) within long sequences of
620 	non-starters. The exact position of the inserted CGJs are determined according to the following algorithm,
621 	which describes the generation of an output string from an input string:
622 
623 	* If the input string is empty, return an empty output string.
624 	* Set nonStarterCount to zero.
625 	* For each code point C in the input string:
626 		* Produce the NFKD decomposition S.
627 		* If nonStarterCount plus the number of initial non-starters in S is greater than 30, append a CGJ to
628 			the output string and set the nonStarterCount to zero.
629 		* Append C to the output string.
630 		* If there are no starters in S, increment nonStarterCount by the number of code points in S; otherwise,
631 			set nonStarterCount to the number of trailing non-starters in S (which may be zero).
632 	* Return the output string.
633 */
634 
635 #define STREAM_SAFE_MAX 30
636 #define STREAM_BUFFER_MAX 32
637 
638 typedef struct {
639 	const char* src;
640 	size_t src_size;
641 	uint8_t index;
642 	uint8_t current;
643 	uint8_t filled;
644 	uint8_t stable;
645 	uint8_t last_length;
646 	unicode_t codepoint[STREAM_BUFFER_MAX];
647 	uint8_t quick_check[STREAM_BUFFER_MAX];
648 	uint8_t canonical_combining_class[STREAM_BUFFER_MAX];
649 } StreamState;
650 
651 
652 
653 
654 #endif /* _UTF8REWIND_INTERNAL_STREAMING_H_ */
655 
656 #ifndef _UTF8REWIND_INTERNAL_COMPOSITION_H_
657 #define _UTF8REWIND_INTERNAL_COMPOSITION_H_
658 
659 
660 
661 
662 typedef struct {
663 	StreamState* input;
664 	StreamState* output;
665 	const size_t* qc_index;
666 	const uint8_t* qc_data;
667 } ComposeState;
668 
669 
670 
671 #endif /* _UTF8REWIND_INTERNAL_COMPOSITION_H_ */
672 
673 #ifndef _UTF8REWIND_INTERNAL_DECOMPOSITION_H_
674 #define _UTF8REWIND_INTERNAL_DECOMPOSITION_H_
675 
676 
677 
678 
679 typedef struct {
680 	StreamState* input;
681 	StreamState* output;
682 	const size_t* qc_index;
683 	const uint8_t* qc_data;
684 	const uint32_t* property_index1;
685 	const uint32_t* property_index2;
686 	const uint32_t* property_data;
687 	unicode_t cache_codepoint[STREAM_BUFFER_MAX];
688 	uint8_t cache_canonical_combining_class[STREAM_BUFFER_MAX];
689 	uint8_t cache_current;
690 	uint8_t cache_filled;
691 } DecomposeState;
692 
693 
694 
695 #endif /* _UTF8REWIND_INTERNAL_DECOMPOSITION_H_ */
696 
697 #ifndef _UTF8REWIND_UNICODEDATABASE_H_
698 #define _UTF8REWIND_UNICODEDATABASE_H_
699 
700 
701 
702 
703 typedef struct {
704 	unicode_t codepoint;
705 	uint32_t length_and_offset;
706 } DecompositionRecord;
707 
708 typedef struct {
709 	uint64_t key;
710 	unicode_t value;
711 } CompositionRecord;
712 
713 
714 
715 #endif /* _UTF8REWIND_UNICODEDATABASE_H_ */
716 
717 
718 #endif /*U8_H*/
719