1 /**********************************************************************
2 unicode.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2019 K.Kosako
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regint.h"
31
32 struct PoolPropertyNameCtype {
33 short int name;
34 short int ctype;
35 };
36
37 #define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
38 ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
39
40 static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
41 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
42 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
43 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
44 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
45 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
46 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
47 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
48 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
49 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
50 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
51 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
52 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
53 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
54 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
55 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
56 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
57 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
58 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
59 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
60 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
61 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
62 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
63 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
64 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
65 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
66 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
67 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
68 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
69 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
70 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
71 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
72 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
73 };
74
75 #include "st.h"
76
77 #include "unicode_fold_data.c"
78
79 extern int
onigenc_unicode_mbc_case_fold(OnigEncoding enc,OnigCaseFoldType flag ARG_UNUSED,const UChar ** pp,const UChar * end,UChar * fold)80 onigenc_unicode_mbc_case_fold(OnigEncoding enc,
81 OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
82 UChar* fold)
83 {
84 const struct ByUnfoldKey* buk;
85
86 OnigCodePoint code;
87 int i, len, rlen;
88 const UChar *p = *pp;
89
90 code = ONIGENC_MBC_TO_CODE(enc, p, end);
91 len = enclen(enc, p);
92 *pp += len;
93
94 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
95 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
96 if (code == 0x0130) {
97 return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
98 }
99 #if 0
100 if (code == 0x0049) {
101 return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
102 }
103 #endif
104 }
105 #endif
106
107 buk = onigenc_unicode_unfold_key(code);
108 if (buk != 0) {
109 if (buk->fold_len == 1) {
110 return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
111 }
112 else {
113 OnigCodePoint* addr;
114
115 FOLDS_FOLD_ADDR_BUK(buk, addr);
116 rlen = 0;
117 for (i = 0; i < buk->fold_len; i++) {
118 OnigCodePoint c = addr[i];
119 len = ONIGENC_CODE_TO_MBC(enc, c, fold);
120 fold += len;
121 rlen += len;
122 }
123 return rlen;
124 }
125 }
126
127 for (i = 0; i < len; i++) {
128 *fold++ = *p++;
129 }
130 return len;
131 }
132
133 static int
apply_case_fold1(int from,int to,OnigApplyAllCaseFoldFunc f,void * arg)134 apply_case_fold1(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
135 {
136 int i, j, k, n, r;
137
138 for (i = from; i < to; ) {
139 OnigCodePoint fold = *FOLDS1_FOLD(i);
140 n = FOLDS1_UNFOLDS_NUM(i);
141 for (j = 0; j < n; j++) {
142 OnigCodePoint unfold = FOLDS1_UNFOLDS(i)[j];
143
144 r = (*f)(fold, &unfold, 1, arg);
145 if (r != 0) return r;
146 r = (*f)(unfold, &fold, 1, arg);
147 if (r != 0) return r;
148
149 for (k = 0; k < j; k++) {
150 OnigCodePoint unfold2 = FOLDS1_UNFOLDS(i)[k];
151 r = (*f)(unfold, &unfold2, 1, arg);
152 if (r != 0) return r;
153 r = (*f)(unfold2, &unfold, 1, arg);
154 if (r != 0) return r;
155 }
156 }
157
158 i = FOLDS1_NEXT_INDEX(i);
159 }
160
161 return 0;
162 }
163
164 static int
apply_case_fold2(int from,int to,OnigApplyAllCaseFoldFunc f,void * arg)165 apply_case_fold2(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
166 {
167 int i, j, k, n, r;
168
169 for (i = from; i < to; ) {
170 OnigCodePoint* fold = FOLDS2_FOLD(i);
171 n = FOLDS2_UNFOLDS_NUM(i);
172 for (j = 0; j < n; j++) {
173 OnigCodePoint unfold = FOLDS2_UNFOLDS(i)[j];
174
175 r = (*f)(unfold, fold, 2, arg);
176 if (r != 0) return r;
177
178 for (k = 0; k < j; k++) {
179 OnigCodePoint unfold2 = FOLDS2_UNFOLDS(i)[k];
180 r = (*f)(unfold, &unfold2, 1, arg);
181 if (r != 0) return r;
182 r = (*f)(unfold2, &unfold, 1, arg);
183 if (r != 0) return r;
184 }
185 }
186
187 i = FOLDS2_NEXT_INDEX(i);
188 }
189
190 return 0;
191 }
192
193 static int
apply_case_fold3(int from,int to,OnigApplyAllCaseFoldFunc f,void * arg)194 apply_case_fold3(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
195 {
196 int i, j, k, n, r;
197
198 for (i = from; i < to; ) {
199 OnigCodePoint* fold = FOLDS3_FOLD(i);
200 n = FOLDS3_UNFOLDS_NUM(i);
201 for (j = 0; j < n; j++) {
202 OnigCodePoint unfold = FOLDS3_UNFOLDS(i)[j];
203
204 r = (*f)(unfold, fold, 3, arg);
205 if (r != 0) return r;
206
207 for (k = 0; k < j; k++) {
208 OnigCodePoint unfold2 = FOLDS3_UNFOLDS(i)[k];
209 r = (*f)(unfold, &unfold2, 1, arg);
210 if (r != 0) return r;
211 r = (*f)(unfold2, &unfold, 1, arg);
212 if (r != 0) return r;
213 }
214 }
215
216 i = FOLDS3_NEXT_INDEX(i);
217 }
218
219 return 0;
220 }
221
222 extern int
onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,OnigApplyAllCaseFoldFunc f,void * arg)223 onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
224 OnigApplyAllCaseFoldFunc f, void* arg)
225 {
226 int r;
227
228 r = apply_case_fold1(0, FOLDS1_NORMAL_END_INDEX, f, arg);
229 if (r != 0) return r;
230
231 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
232 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
233 code = 0x0131;
234 r = (*f)(0x0049, &code, 1, arg);
235 if (r != 0) return r;
236 code = 0x0049;
237 r = (*f)(0x0131, &code, 1, arg);
238 if (r != 0) return r;
239
240 code = 0x0130;
241 r = (*f)(0x0069, &code, 1, arg);
242 if (r != 0) return r;
243 code = 0x0069;
244 r = (*f)(0x0130, &code, 1, arg);
245 if (r != 0) return r;
246 }
247 else {
248 #endif
249 r = apply_case_fold1(FOLDS1_NORMAL_END_INDEX, FOLDS1_END_INDEX, f, arg);
250 if (r != 0) return r;
251 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
252 }
253 #endif
254
255 if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
256 return 0;
257
258 r = apply_case_fold2(0, FOLDS2_NORMAL_END_INDEX, f, arg);
259 if (r != 0) return r;
260
261 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
262 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
263 #endif
264 r = apply_case_fold2(FOLDS2_NORMAL_END_INDEX, FOLDS2_END_INDEX, f, arg);
265 if (r != 0) return r;
266 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
267 }
268 #endif
269
270 r = apply_case_fold3(0, FOLDS3_NORMAL_END_INDEX, f, arg);
271 if (r != 0) return r;
272
273 return 0;
274 }
275
276 extern int
onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,OnigCaseFoldType flag,const OnigUChar * p,const OnigUChar * end,OnigCaseFoldCodeItem items[])277 onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
278 OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
279 OnigCaseFoldCodeItem items[])
280 {
281 int n, m, i, j, k, len, lens[3];
282 int index;
283 int fn, ncs[3];
284 OnigCodePoint cs[3][4];
285 OnigCodePoint code, codes[3], orig_codes[3];
286 const struct ByUnfoldKey* buk1;
287
288 n = 0;
289
290 code = ONIGENC_MBC_TO_CODE(enc, p, end);
291 len = enclen(enc, p);
292
293 #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
294 if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
295 if (code == 0x0049) {
296 items[0].byte_len = len;
297 items[0].code_len = 1;
298 items[0].code[0] = 0x0131;
299 return 1;
300 }
301 else if (code == 0x0130) {
302 items[0].byte_len = len;
303 items[0].code_len = 1;
304 items[0].code[0] = 0x0069;
305 return 1;
306 }
307 else if (code == 0x0131) {
308 items[0].byte_len = len;
309 items[0].code_len = 1;
310 items[0].code[0] = 0x0049;
311 return 1;
312 }
313 else if (code == 0x0069) {
314 items[0].byte_len = len;
315 items[0].code_len = 1;
316 items[0].code[0] = 0x0130;
317 return 1;
318 }
319 }
320 #endif
321
322 orig_codes[0] = code;
323 lens[0] = len;
324 p += len;
325
326 buk1 = onigenc_unicode_unfold_key(orig_codes[0]);
327 if (buk1 != 0 && buk1->fold_len == 1) {
328 codes[0] = *FOLDS1_FOLD(buk1->index);
329 }
330 else
331 codes[0] = orig_codes[0];
332
333 if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
334 goto fold1;
335
336 if (p < end) {
337 const struct ByUnfoldKey* buk;
338
339 code = ONIGENC_MBC_TO_CODE(enc, p, end);
340 orig_codes[1] = code;
341 len = enclen(enc, p);
342 lens[1] = lens[0] + len;
343 buk = onigenc_unicode_unfold_key(orig_codes[1]);
344 if (buk != 0 && buk->fold_len == 1) {
345 codes[1] = *FOLDS1_FOLD(buk->index);
346 }
347 else
348 codes[1] = orig_codes[1];
349
350 p += len;
351 if (p < end) {
352 code = ONIGENC_MBC_TO_CODE(enc, p, end);
353 orig_codes[2] = code;
354 len = enclen(enc, p);
355 lens[2] = lens[1] + len;
356 buk = onigenc_unicode_unfold_key(orig_codes[2]);
357 if (buk != 0 && buk->fold_len == 1) {
358 codes[2] = *FOLDS1_FOLD(buk->index);
359 }
360 else
361 codes[2] = orig_codes[2];
362
363 index = onigenc_unicode_fold3_key(codes);
364 if (index >= 0) {
365 m = FOLDS3_UNFOLDS_NUM(index);
366 for (i = 0; i < m; i++) {
367 items[n].byte_len = lens[2];
368 items[n].code_len = 1;
369 items[n].code[0] = FOLDS3_UNFOLDS(index)[i];
370 n++;
371 }
372
373 for (fn = 0; fn < 3; fn++) {
374 int sindex;
375 cs[fn][0] = FOLDS3_FOLD(index)[fn];
376 ncs[fn] = 1;
377 sindex = onigenc_unicode_fold1_key(&cs[fn][0]);
378 if (sindex >= 0) {
379 int m = FOLDS1_UNFOLDS_NUM(sindex);
380 for (i = 0; i < m; i++) {
381 cs[fn][i+1] = FOLDS1_UNFOLDS(sindex)[i];
382 }
383 ncs[fn] += m;
384 }
385 }
386
387 for (i = 0; i < ncs[0]; i++) {
388 for (j = 0; j < ncs[1]; j++) {
389 for (k = 0; k < ncs[2]; k++) {
390 items[n].byte_len = lens[2];
391 items[n].code_len = 3;
392 items[n].code[0] = cs[0][i];
393 items[n].code[1] = cs[1][j];
394 items[n].code[2] = cs[2][k];
395 if (items[n].code[0] == orig_codes[0] &&
396 items[n].code[1] == orig_codes[1] &&
397 items[n].code[2] == orig_codes[2])
398 continue;
399 n++;
400 }
401 }
402 }
403
404 return n;
405 }
406 }
407
408 index = onigenc_unicode_fold2_key(codes);
409 if (index >= 0) {
410 m = FOLDS2_UNFOLDS_NUM(index);
411 for (i = 0; i < m; i++) {
412 items[n].byte_len = lens[1];
413 items[n].code_len = 1;
414 items[n].code[0] = FOLDS2_UNFOLDS(index)[i];
415 n++;
416 }
417
418 for (fn = 0; fn < 2; fn++) {
419 int sindex;
420 cs[fn][0] = FOLDS2_FOLD(index)[fn];
421 ncs[fn] = 1;
422 sindex = onigenc_unicode_fold1_key(&cs[fn][0]);
423 if (sindex >= 0) {
424 int m = FOLDS1_UNFOLDS_NUM(sindex);
425 for (i = 0; i < m; i++) {
426 cs[fn][i+1] = FOLDS1_UNFOLDS(sindex)[i];
427 }
428 ncs[fn] += m;
429 }
430 }
431
432 for (i = 0; i < ncs[0]; i++) {
433 for (j = 0; j < ncs[1]; j++) {
434 items[n].byte_len = lens[1];
435 items[n].code_len = 2;
436 items[n].code[0] = cs[0][i];
437 items[n].code[1] = cs[1][j];
438 if (items[n].code[0] == orig_codes[0] &&
439 items[n].code[1] == orig_codes[1])
440 continue;
441 n++;
442 }
443 }
444
445 return n;
446 }
447 }
448
449 fold1:
450 if (buk1 != 0) {
451 if (buk1->fold_len == 1) {
452 int un;
453 items[0].byte_len = lens[0];
454 items[0].code_len = 1;
455 items[0].code[0] = *FOLDS1_FOLD(buk1->index);
456 n++;
457
458 un = FOLDS1_UNFOLDS_NUM(buk1->index);
459 for (i = 0; i < un; i++) {
460 OnigCodePoint unfold = FOLDS1_UNFOLDS(buk1->index)[i];
461 if (unfold != orig_codes[0]) {
462 items[n].byte_len = lens[0];
463 items[n].code_len = 1;
464 items[n].code[0] = unfold;
465 n++;
466 }
467 }
468 }
469 else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
470 if (buk1->fold_len == 2) {
471 m = FOLDS2_UNFOLDS_NUM(buk1->index);
472 for (i = 0; i < m; i++) {
473 OnigCodePoint unfold = FOLDS2_UNFOLDS(buk1->index)[i];
474 if (unfold == orig_codes[0]) continue;
475
476 items[n].byte_len = lens[0];
477 items[n].code_len = 1;
478 items[n].code[0] = unfold;
479 n++;
480 }
481
482 for (fn = 0; fn < 2; fn++) {
483 int index;
484 cs[fn][0] = FOLDS2_FOLD(buk1->index)[fn];
485 ncs[fn] = 1;
486 index = onigenc_unicode_fold1_key(&cs[fn][0]);
487 if (index >= 0) {
488 int m = FOLDS1_UNFOLDS_NUM(index);
489 for (i = 0; i < m; i++) {
490 cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
491 }
492 ncs[fn] += m;
493 }
494 }
495
496 for (i = 0; i < ncs[0]; i++) {
497 for (j = 0; j < ncs[1]; j++) {
498 items[n].byte_len = lens[0];
499 items[n].code_len = 2;
500 items[n].code[0] = cs[0][i];
501 items[n].code[1] = cs[1][j];
502 n++;
503 }
504 }
505 }
506 else { /* fold_len == 3 */
507 m = FOLDS3_UNFOLDS_NUM(buk1->index);
508 for (i = 0; i < m; i++) {
509 OnigCodePoint unfold = FOLDS3_UNFOLDS(buk1->index)[i];
510 if (unfold == orig_codes[0]) continue;
511
512 items[n].byte_len = lens[0];
513 items[n].code_len = 1;
514 items[n].code[0] = unfold;
515 n++;
516 }
517
518 for (fn = 0; fn < 3; fn++) {
519 int index;
520 cs[fn][0] = FOLDS3_FOLD(buk1->index)[fn];
521 ncs[fn] = 1;
522 index = onigenc_unicode_fold1_key(&cs[fn][0]);
523 if (index >= 0) {
524 int m = FOLDS1_UNFOLDS_NUM(index);
525 for (i = 0; i < m; i++) {
526 cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
527 }
528 ncs[fn] += m;
529 }
530 }
531
532 for (i = 0; i < ncs[0]; i++) {
533 for (j = 0; j < ncs[1]; j++) {
534 for (k = 0; k < ncs[2]; k++) {
535 items[n].byte_len = lens[0];
536 items[n].code_len = 3;
537 items[n].code[0] = cs[0][i];
538 items[n].code[1] = cs[1][j];
539 items[n].code[2] = cs[2][k];
540 n++;
541 }
542 }
543 }
544 }
545 }
546 }
547 else {
548 int index = onigenc_unicode_fold1_key(orig_codes);
549 if (index >= 0) {
550 int m = FOLDS1_UNFOLDS_NUM(index);
551 for (i = 0; i < m; i++) {
552 items[n].byte_len = lens[0];
553 items[n].code_len = 1;
554 items[n].code[0] = FOLDS1_UNFOLDS(index)[i];
555 n++;
556 }
557 }
558 }
559
560 return n;
561 }
562
563 #ifdef USE_UNICODE_PROPERTIES
564 #include "unicode_property_data.c"
565 #else
566 #include "unicode_property_data_posix.c"
567 #endif
568
569
570 #ifdef USE_UNICODE_WORD_BREAK
571
572 enum WB_TYPE {
573 WB_Any = 0,
574 WB_ALetter,
575 WB_CR,
576 WB_Double_Quote,
577 WB_Extend,
578 WB_ExtendNumLet,
579 WB_Format,
580 WB_Hebrew_Letter,
581 WB_Katakana,
582 WB_LF,
583 WB_MidLetter,
584 WB_MidNum,
585 WB_MidNumLet,
586 WB_Newline,
587 WB_Numeric,
588 WB_Regional_Indicator,
589 WB_Single_Quote,
590 WB_WSegSpace,
591 WB_ZWJ,
592 };
593
594 typedef struct {
595 OnigCodePoint start;
596 OnigCodePoint end;
597 enum WB_TYPE type;
598 } WB_RANGE_TYPE;
599
600 #include "unicode_wb_data.c"
601
602 static enum WB_TYPE
wb_get_type(OnigCodePoint code)603 wb_get_type(OnigCodePoint code)
604 {
605 OnigCodePoint low, high, x;
606 enum WB_TYPE type;
607
608 for (low = 0, high = (OnigCodePoint )WB_RANGE_NUM; low < high; ) {
609 x = (low + high) >> 1;
610 if (code > WB_RANGES[x].end)
611 low = x + 1;
612 else
613 high = x;
614 }
615
616 type = (low < (OnigCodePoint )WB_RANGE_NUM &&
617 code >= WB_RANGES[low].start) ?
618 WB_RANGES[low].type : WB_Any;
619
620 return type;
621 }
622
623 #define IS_WB_IGNORE_TAIL(t) ((t) == WB_Extend || (t) == WB_Format || (t) == WB_ZWJ)
624 #define IS_WB_AHLetter(t) ((t) == WB_ALetter || (t) == WB_Hebrew_Letter)
625 #define IS_WB_MidNumLetQ(t) ((t) == WB_MidNumLet || (t) == WB_Single_Quote)
626
627 static int
wb_get_next_main_code(OnigEncoding enc,UChar * p,const UChar * end,OnigCodePoint * rcode,enum WB_TYPE * rtype)628 wb_get_next_main_code(OnigEncoding enc, UChar* p, const UChar* end,
629 OnigCodePoint* rcode, enum WB_TYPE* rtype)
630 {
631 OnigCodePoint code;
632 enum WB_TYPE type;
633
634 while (TRUE) {
635 p += enclen(enc, p);
636 if (p >= end) break;
637
638 code = ONIGENC_MBC_TO_CODE(enc, p, end);
639 type = wb_get_type(code);
640 if (! IS_WB_IGNORE_TAIL(type)) {
641 *rcode = code;
642 *rtype = type;
643 return 1;
644 }
645 }
646
647 return 0;
648 }
649
650 extern int
onigenc_wb_is_break_position(OnigEncoding enc,UChar * p,UChar * prev,const UChar * start,const UChar * end)651 onigenc_wb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
652 const UChar* start, const UChar* end)
653 {
654 int r;
655 UChar* pp;
656 OnigCodePoint cfrom;
657 OnigCodePoint cfrom2;
658 OnigCodePoint cto;
659 OnigCodePoint cto2;
660 enum WB_TYPE from;
661 enum WB_TYPE from2;
662 enum WB_TYPE to;
663 enum WB_TYPE to2;
664
665 /* WB1: sot / Any */
666 if (p == start) return TRUE;
667 /* WB2: Any / eot */
668 if (p == end) return TRUE;
669
670 if (IS_NULL(prev)) {
671 prev = onigenc_get_prev_char_head(enc, start, p);
672 if (IS_NULL(prev)) return TRUE;
673 }
674
675 cfrom = ONIGENC_MBC_TO_CODE(enc, prev, end);
676 cto = ONIGENC_MBC_TO_CODE(enc, p, end);
677
678 from = wb_get_type(cfrom);
679 to = wb_get_type(cto);
680
681 /* short cut */
682 if (from == 0 && to == 0) goto WB999;
683
684 /* WB3: CR + LF */
685 if (from == WB_CR && to == WB_LF) return FALSE;
686
687 /* WB3a: (Newline|CR|LF) / */
688 if (from == WB_Newline || from == WB_CR || from == WB_LF) return TRUE;
689 /* WB3b: / (Newline|CR|LF) */
690 if (to == WB_Newline || to == WB_CR || to == WB_LF) return TRUE;
691
692 /* WB3c: ZWJ + {Extended_Pictographic} */
693 if (from == WB_ZWJ) {
694 if (onigenc_unicode_is_code_ctype(cto, PROP_INDEX_EXTENDEDPICTOGRAPHIC))
695 return FALSE;
696 }
697
698 /* WB3d: WSegSpace + WSegSpace */
699 if (from == WB_WSegSpace && to == WB_WSegSpace) return FALSE;
700
701 /* WB4: X (Extend|Format|ZWJ)* -> X */
702 if (IS_WB_IGNORE_TAIL(to)) return FALSE;
703 if (IS_WB_IGNORE_TAIL(from)) {
704 while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
705 prev = pp;
706 cfrom = ONIGENC_MBC_TO_CODE(enc, prev, end);
707 from = wb_get_type(cfrom);
708 if (! IS_WB_IGNORE_TAIL(from))
709 break;
710 }
711 }
712
713 if (IS_WB_AHLetter(from)) {
714 /* WB5: AHLetter + AHLetter */
715 if (IS_WB_AHLetter(to)) return FALSE;
716
717 /* WB6: AHLetter + (MidLetter | MidNumLetQ) AHLetter */
718 if (to == WB_MidLetter || IS_WB_MidNumLetQ(to)) {
719 r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
720 if (r == 1) {
721 if (IS_WB_AHLetter(to2)) return FALSE;
722 }
723 }
724 }
725
726 /* WB7: AHLetter (MidLetter | MidNumLetQ) + AHLetter */
727 if (from == WB_MidLetter || IS_WB_MidNumLetQ(from)) {
728 if (IS_WB_AHLetter(to)) {
729 from2 = WB_Any;
730 while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
731 prev = pp;
732 cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
733 from2 = wb_get_type(cfrom2);
734 if (! IS_WB_IGNORE_TAIL(from2))
735 break;
736 }
737
738 if (IS_WB_AHLetter(from2)) return FALSE;
739 }
740 }
741
742 if (from == WB_Hebrew_Letter) {
743 /* WB7a: Hebrew_Letter + Single_Quote */
744 if (to == WB_Single_Quote) return FALSE;
745
746 /* WB7b: Hebrew_Letter + Double_Quote Hebrew_Letter */
747 if (to == WB_Double_Quote) {
748 r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
749 if (r == 1) {
750 if (to2 == WB_Hebrew_Letter) return FALSE;
751 }
752 }
753 }
754
755 /* WB7c: Hebrew_Letter Double_Quote + Hebrew_Letter */
756 if (from == WB_Double_Quote) {
757 if (to == WB_Hebrew_Letter) {
758 from2 = WB_Any;
759 while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
760 prev = pp;
761 cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
762 from2 = wb_get_type(cfrom2);
763 if (! IS_WB_IGNORE_TAIL(from2))
764 break;
765 }
766
767 if (from2 == WB_Hebrew_Letter) return FALSE;
768 }
769 }
770
771 if (to == WB_Numeric) {
772 /* WB8: Numeric + Numeric */
773 if (from == WB_Numeric) return FALSE;
774
775 /* WB9: AHLetter + Numeric */
776 if (IS_WB_AHLetter(from)) return FALSE;
777
778 /* WB11: Numeric (MidNum | MidNumLetQ) + Numeric */
779 if (from == WB_MidNum || IS_WB_MidNumLetQ(from)) {
780 from2 = WB_Any;
781 while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
782 prev = pp;
783 cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
784 from2 = wb_get_type(cfrom2);
785 if (! IS_WB_IGNORE_TAIL(from2))
786 break;
787 }
788
789 if (from2 == WB_Numeric) return FALSE;
790 }
791 }
792
793 if (from == WB_Numeric) {
794 /* WB10: Numeric + AHLetter */
795 if (IS_WB_AHLetter(to)) return FALSE;
796
797 /* WB12: Numeric + (MidNum | MidNumLetQ) Numeric */
798 if (to == WB_MidNum || IS_WB_MidNumLetQ(to)) {
799 r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
800 if (r == 1) {
801 if (to2 == WB_Numeric) return FALSE;
802 }
803 }
804 }
805
806 /* WB13: Katakana + Katakana */
807 if (from == WB_Katakana && to == WB_Katakana) return FALSE;
808
809 /* WB13a: (AHLetter | Numeric | Katakana | ExtendNumLet) + ExtendNumLet */
810 if (IS_WB_AHLetter(from) || from == WB_Numeric || from == WB_Katakana
811 || from == WB_ExtendNumLet) {
812 if (to == WB_ExtendNumLet) return FALSE;
813 }
814
815 /* WB13b: ExtendNumLet + (AHLetter | Numeric | Katakana) */
816 if (from == WB_ExtendNumLet) {
817 if (IS_WB_AHLetter(to) || to == WB_Numeric || to == WB_Katakana)
818 return FALSE;
819 }
820
821
822 /* WB15: sot (RI RI)* RI + RI */
823 /* WB16: [^RI] (RI RI)* RI + RI */
824 if (from == WB_Regional_Indicator && to == WB_Regional_Indicator) {
825 int n = 0;
826 while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
827 cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
828 from2 = wb_get_type(cfrom2);
829 if (from2 != WB_Regional_Indicator)
830 break;
831
832 n++;
833 }
834 if ((n % 2) == 0) return FALSE;
835 }
836
837 WB999:
838 /* WB999: Any / Any */
839 return TRUE;
840 }
841
842 #endif /* USE_UNICODE_WORD_BREAK */
843
844
845 #ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
846
847 enum EGCB_BREAK_TYPE {
848 EGCB_NOT_BREAK = 0,
849 EGCB_BREAK = 1,
850 EGCB_BREAK_UNDEF_GB11 = 2,
851 EGCB_BREAK_UNDEF_RI_RI = 3
852 };
853
854 enum EGCB_TYPE {
855 EGCB_Other = 0,
856 EGCB_CR = 1,
857 EGCB_LF = 2,
858 EGCB_Control = 3,
859 EGCB_Extend = 4,
860 EGCB_Prepend = 5,
861 EGCB_Regional_Indicator = 6,
862 EGCB_SpacingMark = 7,
863 EGCB_ZWJ = 8,
864 #if 0
865 /* obsoleted */
866 EGCB_E_Base = 9,
867 EGCB_E_Base_GAZ = 10,
868 EGCB_E_Modifier = 11,
869 EGCB_Glue_After_Zwj = 12,
870 #endif
871 EGCB_L = 13,
872 EGCB_LV = 14,
873 EGCB_LVT = 15,
874 EGCB_T = 16,
875 EGCB_V = 17
876 };
877
878 typedef struct {
879 OnigCodePoint start;
880 OnigCodePoint end;
881 enum EGCB_TYPE type;
882 } EGCB_RANGE_TYPE;
883
884 #include "unicode_egcb_data.c"
885
886 static enum EGCB_TYPE
egcb_get_type(OnigCodePoint code)887 egcb_get_type(OnigCodePoint code)
888 {
889 OnigCodePoint low, high, x;
890 enum EGCB_TYPE type;
891
892 for (low = 0, high = (OnigCodePoint )EGCB_RANGE_NUM; low < high; ) {
893 x = (low + high) >> 1;
894 if (code > EGCB_RANGES[x].end)
895 low = x + 1;
896 else
897 high = x;
898 }
899
900 type = (low < (OnigCodePoint )EGCB_RANGE_NUM &&
901 code >= EGCB_RANGES[low].start) ?
902 EGCB_RANGES[low].type : EGCB_Other;
903
904 return type;
905 }
906
907 #define IS_CONTROL_CR_LF(code) ((code) <= EGCB_Control && (code) >= EGCB_CR)
908 #define IS_HANGUL(code) ((code) >= EGCB_L)
909
910 /* GB1 and GB2 are outside of this function. */
911 static enum EGCB_BREAK_TYPE
unicode_egcb_is_break_2code(OnigCodePoint from_code,OnigCodePoint to_code)912 unicode_egcb_is_break_2code(OnigCodePoint from_code, OnigCodePoint to_code)
913 {
914 enum EGCB_TYPE from;
915 enum EGCB_TYPE to;
916
917 from = egcb_get_type(from_code);
918 to = egcb_get_type(to_code);
919
920 /* short cut */
921 if (from == 0 && to == 0) goto GB999;
922
923 /* GB3 */
924 if (from == EGCB_CR && to == EGCB_LF) return EGCB_NOT_BREAK;
925 /* GB4 */
926 if (IS_CONTROL_CR_LF(from)) return EGCB_BREAK;
927 /* GB5 */
928 if (IS_CONTROL_CR_LF(to)) return EGCB_BREAK;
929
930 if (IS_HANGUL(from) && IS_HANGUL(to)) {
931 /* GB6 */
932 if (from == EGCB_L && to != EGCB_T) return EGCB_NOT_BREAK;
933 /* GB7 */
934 if ((from == EGCB_LV || from == EGCB_V)
935 && (to == EGCB_V || to == EGCB_T)) return EGCB_NOT_BREAK;
936
937 /* GB8 */
938 if ((to == EGCB_T) && (from == EGCB_LVT || from == EGCB_T))
939 return EGCB_NOT_BREAK;
940
941 goto GB999;
942 }
943
944 /* GB9 */
945 if (to == EGCB_Extend || to == EGCB_ZWJ) return EGCB_NOT_BREAK;
946
947 /* GB9a */
948 if (to == EGCB_SpacingMark) return EGCB_NOT_BREAK;
949 /* GB9b */
950 if (from == EGCB_Prepend) return EGCB_NOT_BREAK;
951
952 /* GB10 removed */
953
954 /* GB11 */
955 if (from == EGCB_ZWJ) {
956 if (onigenc_unicode_is_code_ctype(to_code, PROP_INDEX_EXTENDEDPICTOGRAPHIC))
957 return EGCB_BREAK_UNDEF_GB11;
958
959 goto GB999;
960 }
961
962 /* GB12, GB13 */
963 if (from == EGCB_Regional_Indicator && to == EGCB_Regional_Indicator) {
964 return EGCB_BREAK_UNDEF_RI_RI;
965 }
966
967 GB999:
968 return EGCB_BREAK;
969 }
970
971 #endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
972
973 extern int
onigenc_egcb_is_break_position(OnigEncoding enc,UChar * p,UChar * prev,const UChar * start,const UChar * end)974 onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
975 const UChar* start, const UChar* end)
976 {
977 OnigCodePoint from;
978 OnigCodePoint to;
979 #ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
980 enum EGCB_BREAK_TYPE btype;
981 enum EGCB_TYPE type;
982 #endif
983
984 /* GB1 and GB2 */
985 if (p == start) return 1;
986 if (p == end) return 1;
987
988 if (IS_NULL(prev)) {
989 prev = onigenc_get_prev_char_head(enc, start, p);
990 if (IS_NULL(prev)) return 1;
991 }
992
993 from = ONIGENC_MBC_TO_CODE(enc, prev, end);
994 to = ONIGENC_MBC_TO_CODE(enc, p, end);
995
996 #ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
997 if (! ONIGENC_IS_UNICODE_ENCODING(enc)) {
998 return from != 0x000d || to != NEWLINE_CODE;
999 }
1000
1001 btype = unicode_egcb_is_break_2code(from, to);
1002 switch (btype) {
1003 case EGCB_NOT_BREAK:
1004 return 0;
1005 break;
1006 case EGCB_BREAK:
1007 return 1;
1008 break;
1009
1010 case EGCB_BREAK_UNDEF_GB11:
1011 while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
1012 from = ONIGENC_MBC_TO_CODE(enc, prev, end);
1013 if (onigenc_unicode_is_code_ctype(from, PROP_INDEX_EXTENDEDPICTOGRAPHIC))
1014 return 0;
1015
1016 type = egcb_get_type(from);
1017 if (type != EGCB_Extend)
1018 break;
1019 }
1020 break;
1021
1022 case EGCB_BREAK_UNDEF_RI_RI:
1023 {
1024 int n = 0;
1025 while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
1026 from = ONIGENC_MBC_TO_CODE(enc, prev, end);
1027 type = egcb_get_type(from);
1028 if (type != EGCB_Regional_Indicator)
1029 break;
1030
1031 n++;
1032 }
1033 if ((n % 2) == 0) return 0;
1034 }
1035 break;
1036 }
1037
1038 return 1;
1039
1040 #else
1041 return from != 0x000d || to != NEWLINE_CODE;
1042 #endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
1043 }
1044
1045
1046 #define USER_DEFINED_PROPERTY_MAX_NUM 20
1047
1048 typedef struct {
1049 int ctype;
1050 OnigCodePoint* ranges;
1051 } UserDefinedPropertyValue;
1052
1053 static int UserDefinedPropertyNum;
1054 static UserDefinedPropertyValue
1055 UserDefinedPropertyRanges[USER_DEFINED_PROPERTY_MAX_NUM];
1056 static st_table* UserDefinedPropertyTable;
1057
1058 extern int
onig_unicode_define_user_property(const char * name,OnigCodePoint * ranges)1059 onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
1060 {
1061 UserDefinedPropertyValue* e;
1062 int r;
1063 int i;
1064 int n;
1065 int len;
1066 int c;
1067 char* s;
1068 UChar* uname;
1069
1070 if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)
1071 return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS;
1072
1073 len = (int )strlen(name);
1074 if (len >= PROPERTY_NAME_MAX_SIZE)
1075 return ONIGERR_TOO_LONG_PROPERTY_NAME;
1076
1077 s = (char* )xmalloc(len + 1);
1078 if (s == 0)
1079 return ONIGERR_MEMORY;
1080
1081 uname = (UChar* )name;
1082 n = 0;
1083 for (i = 0; i < len; i++) {
1084 c = uname[i];
1085 if (c < 0x20 || c >= 0x80) {
1086 xfree(s);
1087 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
1088 }
1089
1090 if (c != ' ' && c != '-' && c != '_') {
1091 s[n] = c;
1092 n++;
1093 }
1094 }
1095 s[n] = '\0';
1096
1097 if (UserDefinedPropertyTable == 0) {
1098 UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10);
1099 if (IS_NULL(UserDefinedPropertyTable)) {
1100 xfree(s);
1101 return ONIGERR_MEMORY;
1102 }
1103 }
1104
1105 e = UserDefinedPropertyRanges + UserDefinedPropertyNum;
1106 e->ctype = CODE_RANGES_NUM + UserDefinedPropertyNum;
1107 e->ranges = ranges;
1108 r = onig_st_insert_strend(UserDefinedPropertyTable,
1109 (const UChar* )s, (const UChar* )s + n,
1110 (hash_data_type )((void* )e));
1111 if (r < 0) return r;
1112
1113 UserDefinedPropertyNum++;
1114 return 0;
1115 }
1116
1117 extern int
onigenc_unicode_is_code_ctype(OnigCodePoint code,unsigned int ctype)1118 onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
1119 {
1120 if (
1121 #ifdef USE_UNICODE_PROPERTIES
1122 ctype <= ONIGENC_MAX_STD_CTYPE &&
1123 #endif
1124 code < 256) {
1125 return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
1126 }
1127
1128 if (ctype >= CODE_RANGES_NUM) {
1129 int index = ctype - CODE_RANGES_NUM;
1130 if (index < UserDefinedPropertyNum)
1131 return onig_is_in_code_range((UChar* )UserDefinedPropertyRanges[index].ranges, code);
1132 else
1133 return ONIGERR_TYPE_BUG;
1134 }
1135
1136 return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
1137 }
1138
1139
1140 extern int
onigenc_unicode_ctype_code_range(OnigCtype ctype,const OnigCodePoint * ranges[])1141 onigenc_unicode_ctype_code_range(OnigCtype ctype, const OnigCodePoint* ranges[])
1142 {
1143 if (ctype >= CODE_RANGES_NUM) {
1144 int index = ctype - CODE_RANGES_NUM;
1145 if (index < UserDefinedPropertyNum) {
1146 *ranges = UserDefinedPropertyRanges[index].ranges;
1147 return 0;
1148 }
1149 else
1150 return ONIGERR_TYPE_BUG;
1151 }
1152
1153 *ranges = CodeRanges[ctype];
1154 return 0;
1155 }
1156
1157 extern int
onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype,OnigCodePoint * sb_out,const OnigCodePoint * ranges[])1158 onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
1159 const OnigCodePoint* ranges[])
1160 {
1161 *sb_out = 0x00;
1162 return onigenc_unicode_ctype_code_range(ctype, ranges);
1163 }
1164
1165 extern int
onigenc_unicode_property_name_to_ctype(OnigEncoding enc,UChar * name,UChar * end)1166 onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
1167 {
1168 int len;
1169 UChar *p;
1170 OnigCodePoint code;
1171 const struct PoolPropertyNameCtype* pc;
1172 char buf[PROPERTY_NAME_MAX_SIZE];
1173
1174 p = name;
1175 len = 0;
1176 while (p < end) {
1177 code = ONIGENC_MBC_TO_CODE(enc, p, end);
1178 if (code >= 0x80)
1179 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
1180
1181 if (code != ' ' && code != '-' && code != '_') {
1182 buf[len++] = (char )code;
1183 if (len >= PROPERTY_NAME_MAX_SIZE)
1184 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
1185 }
1186
1187 p += enclen(enc, p);
1188 }
1189
1190 buf[len] = 0;
1191
1192 if (UserDefinedPropertyTable != 0) {
1193 UserDefinedPropertyValue* e;
1194 e = (UserDefinedPropertyValue* )NULL;
1195 onig_st_lookup_strend(UserDefinedPropertyTable,
1196 (const UChar* )buf, (const UChar* )buf + len,
1197 (hash_data_type* )((void* )(&e)));
1198 if (e != 0) {
1199 return e->ctype;
1200 }
1201 }
1202
1203 pc = unicode_lookup_property_name(buf, len);
1204 if (pc != 0) {
1205 /* fprintf(stderr, "LOOKUP: %s: %d\n", buf, pc->ctype); */
1206 #ifndef USE_UNICODE_PROPERTIES
1207 if (pc->ctype > ONIGENC_MAX_STD_CTYPE)
1208 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
1209 #endif
1210
1211 return (int )pc->ctype;
1212 }
1213
1214 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
1215 }
1216