1 /**********************************************************************
2   regenc.c -  Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2016  K.Takata  <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regint.h"
32 
33 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
34 
35 extern int
onigenc_init(void)36 onigenc_init(void)
37 {
38   return 0;
39 }
40 
41 extern OnigEncoding
onigenc_get_default_encoding(void)42 onigenc_get_default_encoding(void)
43 {
44   return OnigEncDefaultCharEncoding;
45 }
46 
47 extern int
onigenc_set_default_encoding(OnigEncoding enc)48 onigenc_set_default_encoding(OnigEncoding enc)
49 {
50   OnigEncDefaultCharEncoding = enc;
51   return 0;
52 }
53 
54 extern int
onigenc_mbclen_approximate(const OnigUChar * p,const OnigUChar * e,OnigEncoding enc)55 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc)
56 {
57   int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);
58   if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
59     return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
60   else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
61     return (int )(e - p) + ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
62   return 1;
63 }
64 
65 extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc,const UChar * start,const UChar * s,const UChar * end)66 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
67 {
68   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
69   if (p < s) {
70     p += enclen(enc, p, end);
71   }
72   return p;
73 }
74 
75 extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,const UChar * start,const UChar * s,const UChar * end,const UChar ** prev)76 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
77 				   const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
78 {
79   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
80 
81   if (p < s) {
82     if (prev) *prev = (const UChar* )p;
83     p += enclen(enc, p, end);
84   }
85   else {
86     if (prev) *prev = (const UChar* )NULL; /* Sorry */
87   }
88   return p;
89 }
90 
91 extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc,const UChar * start,const UChar * s,const UChar * end)92 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
93 {
94   if (s <= start)
95     return (UChar* )NULL;
96 
97   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
98 }
99 
100 extern UChar*
onigenc_step_back(OnigEncoding enc,const UChar * start,const UChar * s,const UChar * end,int n)101 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)
102 {
103   while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
104     if (s <= start)
105       return (UChar* )NULL;
106 
107     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
108   }
109   return (UChar* )s;
110 }
111 
112 extern UChar*
onigenc_step(OnigEncoding enc,const UChar * p,const UChar * end,int n)113 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
114 {
115   UChar* q = (UChar* )p;
116   while (n-- > 0) {
117     q += ONIGENC_MBC_ENC_LEN(enc, q, end);
118   }
119   return (q <= end ? q : NULL);
120 }
121 
122 extern int
onigenc_strlen(OnigEncoding enc,const UChar * p,const UChar * end)123 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
124 {
125   int n = 0;
126   UChar* q = (UChar* )p;
127 
128   while (q < end) {
129     q += ONIGENC_MBC_ENC_LEN(enc, q, end);
130     n++;
131   }
132   return n;
133 }
134 
135 extern int
onigenc_strlen_null(OnigEncoding enc,const UChar * s)136 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
137 {
138   int n = 0;
139   UChar* p = (UChar* )s;
140   UChar* e;
141 
142   while (1) {
143     if (*p == '\0') {
144       UChar* q;
145       int len = ONIGENC_MBC_MINLEN(enc);
146 
147       if (len == 1) return n;
148       q = p + 1;
149       while (len > 1) {
150         if (*q != '\0') break;
151         q++;
152         len--;
153       }
154       if (len == 1) return n;
155     }
156     e = p + ONIGENC_MBC_MAXLEN(enc);
157     p += ONIGENC_MBC_ENC_LEN(enc, p, e);
158     n++;
159   }
160 }
161 
162 extern int
onigenc_str_bytelen_null(OnigEncoding enc,const UChar * s)163 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
164 {
165   UChar* start = (UChar* )s;
166   UChar* p = (UChar* )s;
167   UChar* e;
168 
169   while (1) {
170     if (*p == '\0') {
171       UChar* q;
172       int len = ONIGENC_MBC_MINLEN(enc);
173 
174       if (len == 1) return (int )(p - start);
175       q = p + 1;
176       while (len > 1) {
177         if (*q != '\0') break;
178         q++;
179         len--;
180       }
181       if (len == 1) return (int )(p - start);
182     }
183     e = p + ONIGENC_MBC_MAXLEN(enc);
184     p += ONIGENC_MBC_ENC_LEN(enc, p, e);
185   }
186 }
187 
188 const UChar OnigEncAsciiToLowerCaseTable[] = {
189   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
190   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
191   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
192   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
193   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
194   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
195   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
196   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
197   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
198   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
199   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
200   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
201   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
202   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
203   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
204   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
205   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
206   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
207   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
208   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
209   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
210   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
211   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
212   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
213   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
214   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
215   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
216   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
217   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
218   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
219   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
220   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
221 };
222 
223 #ifdef USE_UPPER_CASE_TABLE
224 const UChar OnigEncAsciiToUpperCaseTable[256] = {
225   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
226   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
227   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
228   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
229   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
230   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
231   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
232   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
233   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
234   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
235   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
236   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
237   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
238   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
239   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
240   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
241   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
242   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
243   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
244   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
245   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
246   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
247   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
248   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
249   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
250   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
251   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
252   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
253   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
254   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
255   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
256   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
257 };
258 #endif
259 
260 const unsigned short OnigEncAsciiCtypeTable[256] = {
261   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
262   0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
263   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
264   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
265   0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
266   0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
267   0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
268   0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
269   0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
270   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
271   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
272   0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
273   0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
274   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
275   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
276   0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
277   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
278   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
279   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
280   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
281   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
282   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
283   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
284   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
285   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
286   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
287   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
288   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
289   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
290   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
291   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
292   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
293 };
294 
295 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
296   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
297   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
298   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
299   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
300   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
301   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
302   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
303   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
304   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
305   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
306   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
307   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
308   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
309   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
310   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
311   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
312   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
313   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
314   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
315   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
316   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
317   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
318   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
319   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
320   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
321   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
322   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
323   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
324   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
325   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
326   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
327   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
328 };
329 
330 #ifdef USE_UPPER_CASE_TABLE
331 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
332   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
333   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
334   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
335   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
336   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
337   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
338   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
339   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
340   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
341   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
342   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
343   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
344   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
345   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
346   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
347   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
348   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
349   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
350   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
351   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
352   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
353   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
354   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
355   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
356   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
357   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
358   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
359   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
360   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
361   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
362   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
363   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
364 };
365 #endif
366 
367 #if 0
368 extern void
369 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
370 {
371   /* nothing */
372   /* obsoleted. */
373 }
374 #endif
375 
376 extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc,const UChar * start,const UChar * s,const UChar * end)377 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
378 {
379   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
380 }
381 
382 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
383   { 0x41, 0x61 },
384   { 0x42, 0x62 },
385   { 0x43, 0x63 },
386   { 0x44, 0x64 },
387   { 0x45, 0x65 },
388   { 0x46, 0x66 },
389   { 0x47, 0x67 },
390   { 0x48, 0x68 },
391   { 0x49, 0x69 },
392   { 0x4a, 0x6a },
393   { 0x4b, 0x6b },
394   { 0x4c, 0x6c },
395   { 0x4d, 0x6d },
396   { 0x4e, 0x6e },
397   { 0x4f, 0x6f },
398   { 0x50, 0x70 },
399   { 0x51, 0x71 },
400   { 0x52, 0x72 },
401   { 0x53, 0x73 },
402   { 0x54, 0x74 },
403   { 0x55, 0x75 },
404   { 0x56, 0x76 },
405   { 0x57, 0x77 },
406   { 0x58, 0x78 },
407   { 0x59, 0x79 },
408   { 0x5a, 0x7a }
409 };
410 
411 extern int
onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,OnigApplyAllCaseFoldFunc f,void * arg,OnigEncoding enc ARG_UNUSED)412 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
413 				  OnigApplyAllCaseFoldFunc f, void* arg,
414 				  OnigEncoding enc ARG_UNUSED)
415 {
416   OnigCodePoint code;
417   int i, r;
418 
419   for (i = 0; i < numberof(OnigAsciiLowerMap); i++) {
420     code = OnigAsciiLowerMap[i].to;
421     r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
422     if (r != 0) return r;
423 
424     code = OnigAsciiLowerMap[i].from;
425     r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
426     if (r != 0) return r;
427   }
428 
429   return 0;
430 }
431 
432 extern int
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,const OnigUChar * p,const OnigUChar * end ARG_UNUSED,OnigCaseFoldCodeItem items[],OnigEncoding enc ARG_UNUSED)433 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
434 	 const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
435 	 OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
436 {
437   if (0x41 <= *p && *p <= 0x5a) {
438     items[0].byte_len = 1;
439     items[0].code_len = 1;
440     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
441     return 1;
442   }
443   else if (0x61 <= *p && *p <= 0x7a) {
444     items[0].byte_len = 1;
445     items[0].code_len = 1;
446     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
447     return 1;
448   }
449   else
450     return 0;
451 }
452 
453 static int
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,OnigApplyAllCaseFoldFunc f,void * arg)454 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
455 		       OnigApplyAllCaseFoldFunc f, void* arg)
456 {
457   OnigCodePoint ss[] = { 0x73, 0x73 };
458 
459   return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
460 }
461 
462 extern int
onigenc_apply_all_case_fold_with_map(int map_size,const OnigPairCaseFoldCodes map[],int ess_tsett_flag,OnigCaseFoldType flag,OnigApplyAllCaseFoldFunc f,void * arg)463 onigenc_apply_all_case_fold_with_map(int map_size,
464     const OnigPairCaseFoldCodes map[],
465     int ess_tsett_flag, OnigCaseFoldType flag,
466     OnigApplyAllCaseFoldFunc f, void* arg)
467 {
468   OnigCodePoint code;
469   int i, r;
470 
471   r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
472   if (r != 0) return r;
473 
474   for (i = 0; i < map_size; i++) {
475     code = map[i].to;
476     r = (*f)(map[i].from, &code, 1, arg);
477     if (r != 0) return r;
478 
479     code = map[i].from;
480     r = (*f)(map[i].to, &code, 1, arg);
481     if (r != 0) return r;
482   }
483 
484   if (ess_tsett_flag != 0)
485     return ss_apply_all_case_fold(flag, f, arg);
486 
487   return 0;
488 }
489 
490 extern int
onigenc_get_case_fold_codes_by_str_with_map(int map_size,const OnigPairCaseFoldCodes map[],int ess_tsett_flag,OnigCaseFoldType flag ARG_UNUSED,const OnigUChar * p,const OnigUChar * end,OnigCaseFoldCodeItem items[])491 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
492     const OnigPairCaseFoldCodes map[],
493     int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
494     const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
495 {
496   if (0x41 <= *p && *p <= 0x5a) {
497     items[0].byte_len = 1;
498     items[0].code_len = 1;
499     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
500     if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
501 	&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
502       /* SS */
503       items[1].byte_len = 2;
504       items[1].code_len = 1;
505       items[1].code[0] = (OnigCodePoint )0xdf;
506       return 2;
507     }
508     else
509       return 1;
510   }
511   else if (0x61 <= *p && *p <= 0x7a) {
512     items[0].byte_len = 1;
513     items[0].code_len = 1;
514     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
515     if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
516 	&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
517       /* ss */
518       items[1].byte_len = 2;
519       items[1].code_len = 1;
520       items[1].code[0] = (OnigCodePoint )0xdf;
521       return 2;
522     }
523     else
524       return 1;
525   }
526   else if (*p == 0xdf && ess_tsett_flag != 0) {
527     items[0].byte_len = 1;
528     items[0].code_len = 2;
529     items[0].code[0] = (OnigCodePoint )'s';
530     items[0].code[1] = (OnigCodePoint )'s';
531 
532     items[1].byte_len = 1;
533     items[1].code_len = 2;
534     items[1].code[0] = (OnigCodePoint )'S';
535     items[1].code[1] = (OnigCodePoint )'S';
536 
537     items[2].byte_len = 1;
538     items[2].code_len = 2;
539     items[2].code[0] = (OnigCodePoint )'s';
540     items[2].code[1] = (OnigCodePoint )'S';
541 
542     items[3].byte_len = 1;
543     items[3].code_len = 2;
544     items[3].code[0] = (OnigCodePoint )'S';
545     items[3].code[1] = (OnigCodePoint )'s';
546 
547     return 4;
548   }
549   else {
550     int i;
551 
552     for (i = 0; i < map_size; i++) {
553       if (*p == map[i].from) {
554 	items[0].byte_len = 1;
555 	items[0].code_len = 1;
556 	items[0].code[0] = map[i].to;
557 	return 1;
558       }
559       else if (*p == map[i].to) {
560 	items[0].byte_len = 1;
561 	items[0].code_len = 1;
562 	items[0].code[0] = map[i].from;
563 	return 1;
564       }
565     }
566   }
567 
568   return 0;
569 }
570 
571 
572 extern int
onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,OnigCodePoint * sb_out ARG_UNUSED,const OnigCodePoint * ranges[]ARG_UNUSED,OnigEncoding enc)573 onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
574 	 OnigCodePoint* sb_out ARG_UNUSED,
575 	 const OnigCodePoint* ranges[] ARG_UNUSED,
576 	 OnigEncoding enc)
577 {
578   return ONIG_NO_SUPPORT_CONFIG;
579 }
580 
581 extern int
onigenc_is_mbc_newline_0x0a(const UChar * p,const UChar * end,OnigEncoding enc ARG_UNUSED)582 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
583 {
584   if (p < end) {
585     if (*p == 0x0a) return 1;
586   }
587   return 0;
588 }
589 
590 /* for single byte encodings */
591 extern int
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,const UChar ** p,const UChar * end,UChar * lower,OnigEncoding enc ARG_UNUSED)592 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
593 			    const UChar* end, UChar* lower, OnigEncoding enc ARG_UNUSED)
594 {
595   *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
596 
597   (*p)++;
598   return 1; /* return byte length of converted char to lower */
599 }
600 
601 #if 0
602 extern int
603 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,
604 			       const UChar** pp, const UChar* end ARG_UNUSED)
605 {
606   const UChar* p = *pp;
607 
608   (*pp)++;
609   return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
610 }
611 #endif
612 
613 extern int
onigenc_single_byte_mbc_enc_len(const UChar * p ARG_UNUSED,const UChar * e ARG_UNUSED,OnigEncoding enc ARG_UNUSED)614 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,
615 				OnigEncoding enc ARG_UNUSED)
616 {
617   return 1;
618 }
619 
620 extern OnigCodePoint
onigenc_single_byte_mbc_to_code(const UChar * p,const UChar * end ARG_UNUSED,OnigEncoding enc ARG_UNUSED)621 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
622 				OnigEncoding enc ARG_UNUSED)
623 {
624   return (OnigCodePoint )(*p);
625 }
626 
627 extern int
onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED,OnigEncoding enc ARG_UNUSED)628 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
629 {
630   return 1;
631 }
632 
633 extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code,UChar * buf,OnigEncoding enc ARG_UNUSED)634 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
635 {
636 #ifdef RUBY
637   if (code > 0xff)
638     rb_raise(rb_eRangeError, "%u out of char range", code);
639 #endif
640   *buf = (UChar )(code & 0xff);
641   return 1;
642 }
643 
644 extern UChar*
onigenc_single_byte_left_adjust_char_head(const UChar * start ARG_UNUSED,const UChar * s,const UChar * end ARG_UNUSED,OnigEncoding enc ARG_UNUSED)645 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
646 					  const UChar* s,
647 					  const UChar* end ARG_UNUSED,
648 					  OnigEncoding enc ARG_UNUSED)
649 {
650   return (UChar* )s;
651 }
652 
653 extern int
onigenc_always_true_is_allowed_reverse_match(const UChar * s ARG_UNUSED,const UChar * end ARG_UNUSED,OnigEncoding enc ARG_UNUSED)654 onigenc_always_true_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
655 					     const UChar* end ARG_UNUSED,
656 					     OnigEncoding enc ARG_UNUSED)
657 {
658   return TRUE;
659 }
660 
661 extern int
onigenc_always_false_is_allowed_reverse_match(const UChar * s ARG_UNUSED,const UChar * end ARG_UNUSED,OnigEncoding enc ARG_UNUSED)662 onigenc_always_false_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
663 					      const UChar* end ARG_UNUSED,
664 					      OnigEncoding enc ARG_UNUSED)
665 {
666   return FALSE;
667 }
668 
669 extern int
onigenc_ascii_is_code_ctype(OnigCodePoint code,unsigned int ctype,OnigEncoding enc ARG_UNUSED)670 onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype,
671                             OnigEncoding enc ARG_UNUSED)
672 {
673   if (code < 128)
674     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
675   else
676     return FALSE;
677 }
678 
679 extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc,const UChar * p,const UChar * end)680 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
681 {
682   int c, i, len;
683   OnigCodePoint n;
684 
685   len = enclen(enc, p, end);
686   n = (OnigCodePoint )(*p++);
687   if (len == 1) return n;
688 
689   for (i = 1; i < len; i++) {
690     if (p >= end) break;
691     c = *p++;
692     n <<= 8;  n += c;
693   }
694   return n;
695 }
696 
697 extern int
onigenc_mbn_mbc_case_fold(OnigEncoding enc,OnigCaseFoldType flag ARG_UNUSED,const UChar ** pp,const UChar * end ARG_UNUSED,UChar * lower)698 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
699                           const UChar** pp, const UChar* end ARG_UNUSED,
700 			  UChar* lower)
701 {
702   int len;
703   const UChar *p = *pp;
704 
705   if (ONIGENC_IS_MBC_ASCII(p)) {
706     *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
707     (*pp)++;
708     return 1;
709   }
710   else {
711     int i;
712 
713     len = enclen(enc, p, end);
714     for (i = 0; i < len; i++) {
715       *lower++ = *p++;
716     }
717     (*pp) += len;
718     return len; /* return byte length of converted to lower char */
719   }
720 }
721 
722 #if 0
723 extern int
724 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
725                              const UChar** pp, const UChar* end ARG_UNUSED)
726 {
727   const UChar* p = *pp;
728 
729   if (ONIGENC_IS_MBC_ASCII(p)) {
730     (*pp)++;
731     return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
732   }
733 
734   (*pp) += enclen(enc, p);
735   return FALSE;
736 }
737 #endif
738 
739 extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code,OnigEncoding enc ARG_UNUSED)740 onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
741 {
742   if (code <= 0xff) return 1;
743   if (code <= 0xffff) return 2;
744   return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
745 }
746 
747 extern int
onigenc_mb4_code_to_mbclen(OnigCodePoint code,OnigEncoding enc ARG_UNUSED)748 onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
749 {
750        if ((code & 0xff000000) != 0) return 4;
751   else if ((code & 0xff0000) != 0) return 3;
752   else if ((code & 0xff00) != 0) return 2;
753   else return 1;
754 }
755 
756 extern int
onigenc_mb2_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)757 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
758 {
759   UChar *p = buf;
760 
761   if ((code & 0xff00) != 0) {
762     *p++ = (UChar )((code >>  8) & 0xff);
763   }
764   *p++ = (UChar )(code & 0xff);
765 
766 #if 1
767   if (enclen(enc, buf, p) != (p - buf))
768     return ONIGERR_INVALID_CODE_POINT_VALUE;
769 #endif
770   return (int )(p - buf);
771 }
772 
773 extern int
onigenc_mb4_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)774 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
775 {
776   UChar *p = buf;
777 
778   if ((code & 0xff000000) != 0) {
779     *p++ = (UChar )((code >> 24) & 0xff);
780   }
781   if ((code & 0xff0000) != 0 || p != buf) {
782     *p++ = (UChar )((code >> 16) & 0xff);
783   }
784   if ((code & 0xff00) != 0 || p != buf) {
785     *p++ = (UChar )((code >> 8) & 0xff);
786   }
787   *p++ = (UChar )(code & 0xff);
788 
789 #if 1
790   if (enclen(enc, buf, p) != (p - buf))
791     return ONIGERR_INVALID_CODE_POINT_VALUE;
792 #endif
793   return (int )(p - buf);
794 }
795 
796 extern int
onigenc_minimum_property_name_to_ctype(OnigEncoding enc,const UChar * p,const UChar * end)797 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end)
798 {
799   static const PosixBracketEntryType PBS[] = {
800     POSIX_BRACKET_ENTRY_INIT("Alnum",  ONIGENC_CTYPE_ALNUM),
801     POSIX_BRACKET_ENTRY_INIT("Alpha",  ONIGENC_CTYPE_ALPHA),
802     POSIX_BRACKET_ENTRY_INIT("Blank",  ONIGENC_CTYPE_BLANK),
803     POSIX_BRACKET_ENTRY_INIT("Cntrl",  ONIGENC_CTYPE_CNTRL),
804     POSIX_BRACKET_ENTRY_INIT("Digit",  ONIGENC_CTYPE_DIGIT),
805     POSIX_BRACKET_ENTRY_INIT("Graph",  ONIGENC_CTYPE_GRAPH),
806     POSIX_BRACKET_ENTRY_INIT("Lower",  ONIGENC_CTYPE_LOWER),
807     POSIX_BRACKET_ENTRY_INIT("Print",  ONIGENC_CTYPE_PRINT),
808     POSIX_BRACKET_ENTRY_INIT("Punct",  ONIGENC_CTYPE_PUNCT),
809     POSIX_BRACKET_ENTRY_INIT("Space",  ONIGENC_CTYPE_SPACE),
810     POSIX_BRACKET_ENTRY_INIT("Upper",  ONIGENC_CTYPE_UPPER),
811     POSIX_BRACKET_ENTRY_INIT("XDigit", ONIGENC_CTYPE_XDIGIT),
812     POSIX_BRACKET_ENTRY_INIT("ASCII",  ONIGENC_CTYPE_ASCII),
813     POSIX_BRACKET_ENTRY_INIT("Word",   ONIGENC_CTYPE_WORD),
814   };
815 
816   const PosixBracketEntryType *pb;
817   int len;
818 
819   len = onigenc_strlen(enc, p, end);
820   for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
821     if (len == pb->len &&
822         onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
823       return pb->ctype;
824   }
825 
826   return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
827 }
828 
829 extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc,OnigCodePoint code,unsigned int ctype)830 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
831 			  unsigned int ctype)
832 {
833   if (code < 128)
834     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
835   else {
836     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
837       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
838     }
839   }
840 
841   return FALSE;
842 }
843 
844 extern int
onigenc_mb4_is_code_ctype(OnigEncoding enc,OnigCodePoint code,unsigned int ctype)845 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
846 			  unsigned int ctype)
847 {
848   if (code < 128)
849     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
850   else {
851     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
852       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
853     }
854   }
855 
856   return FALSE;
857 }
858 
859 extern int
onigenc_with_ascii_strncmp(OnigEncoding enc,const UChar * p,const UChar * end,const UChar * sascii,int n)860 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
861                            const UChar* sascii /* ascii */, int n)
862 {
863   int x, c;
864 
865   while (n-- > 0) {
866     if (p >= end) return (int )(*sascii);
867 
868     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
869     x = *sascii - c;
870     if (x) return x;
871 
872     sascii++;
873     p += enclen(enc, p, end);
874   }
875   return 0;
876 }
877 
878 extern int
onigenc_with_ascii_strnicmp(OnigEncoding enc,const UChar * p,const UChar * end,const UChar * sascii,int n)879 onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end,
880                             const UChar* sascii /* ascii */, int n)
881 {
882   int x, c;
883 
884   while (n-- > 0) {
885     if (p >= end) return (int )(*sascii);
886 
887     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
888     if (ONIGENC_IS_ASCII_CODE(c))
889       c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
890     x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c;
891     if (x) return x;
892 
893     sascii++;
894     p += enclen(enc, p, end);
895   }
896   return 0;
897 }
898 
899 #if 0
900 /* Property management */
901 static int
902 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
903 {
904   size_t size;
905   const OnigCodePoint **list = *plist;
906 
907   size = sizeof(OnigCodePoint*) * new_size;
908   if (IS_NULL(list)) {
909     list = (const OnigCodePoint** )xmalloc(size);
910     if (IS_NULL(list)) return ONIGERR_MEMORY;
911   }
912   else {
913     const OnigCodePoint **tmp;
914     tmp = (const OnigCodePoint** )xrealloc((void* )list, size);
915     if (IS_NULL(tmp)) return ONIGERR_MEMORY;
916     list = tmp;
917   }
918 
919   *plist = list;
920   *psize = new_size;
921 
922   return 0;
923 }
924 
925 extern int
926 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
927      hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
928      int *psize)
929 {
930 #define PROP_INIT_SIZE     16
931 
932   int r;
933 
934   if (*psize <= *pnum) {
935     int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
936     r = resize_property_list(new_size, plist, psize);
937     if (r != 0) return r;
938   }
939 
940   (*plist)[*pnum] = prop;
941 
942   if (ONIG_IS_NULL(*table)) {
943     *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
944     if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
945   }
946 
947   *pnum = *pnum + 1;
948   onig_st_insert_strend(*table, name, name + strlen((char* )name),
949 			(hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
950   return 0;
951 }
952 #endif
953 
954 extern int
onigenc_ascii_only_case_map(OnigCaseFoldType * flagP,const OnigUChar ** pp,const OnigUChar * end,OnigUChar * to,OnigUChar * to_end,const struct OnigEncodingTypeST * enc)955 onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end,
956 			    OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc)
957 {
958   OnigCodePoint code;
959   OnigUChar *to_start = to;
960   OnigCaseFoldType flags = *flagP;
961   int codepoint_length;
962 
963   while (*pp < end && to < to_end) {
964     codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
965     if (codepoint_length < 0)
966       return codepoint_length; /* encoding invalid */
967     code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
968     *pp += codepoint_length;
969 
970     if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) {
971       flags |= ONIGENC_CASE_MODIFIED;
972       code += 'A' - 'a';
973     } else if (code >= 'A' && code <= 'Z' &&
974 	(flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
975       flags |= ONIGENC_CASE_MODIFIED;
976       code += 'a' - 'A';
977     }
978     to += ONIGENC_CODE_TO_MBC(enc, code, to);
979     if (flags & ONIGENC_CASE_TITLECASE)  /* switch from titlecase to lowercase for capitalize */
980       flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
981   }
982   *flagP = flags;
983   return (int )(to - to_start);
984 }
985 
986 extern int
onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType * flagP,const OnigUChar ** pp,const OnigUChar * end,OnigUChar * to,OnigUChar * to_end,const struct OnigEncodingTypeST * enc)987 onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
988 					const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
989 					const struct OnigEncodingTypeST* enc)
990 {
991   OnigCodePoint code;
992   OnigUChar *to_start = to;
993   OnigCaseFoldType flags = *flagP;
994 
995   while (*pp < end && to < to_end) {
996     code = *(*pp)++;
997 
998     if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) {
999       flags |= ONIGENC_CASE_MODIFIED;
1000       code += 'A' - 'a';
1001     } else if (code >= 'A' && code <= 'Z' &&
1002 	(flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
1003       flags |= ONIGENC_CASE_MODIFIED;
1004       code += 'a' - 'A';
1005     }
1006     *to++ = code;
1007     if (flags & ONIGENC_CASE_TITLECASE)  /* switch from titlecase to lowercase for capitalize */
1008       flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
1009   }
1010   *flagP = flags;
1011   return (int )(to - to_start);
1012 }
1013