1 /**********************************************************************
2   regext.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2019  K.Kosako
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regint.h"
31 
32 #if 0
33 static void
34 conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
35 {
36   while (s < end) {
37     *conv++ = '\0';
38     *conv++ = '\0';
39     *conv++ = '\0';
40     *conv++ = *s++;
41   }
42 }
43 
44 static void
45 conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
46 {
47   while (s < end) {
48     *conv++ = *s++;
49     *conv++ = '\0';
50     *conv++ = '\0';
51     *conv++ = '\0';
52   }
53 }
54 
55 static void
56 conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
57 {
58   while (s < end) {
59     *conv++ = '\0';
60     *conv++ = *s++;
61   }
62 }
63 
64 static void
65 conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
66 {
67   while (s < end) {
68     *conv++ = *s++;
69     *conv++ = '\0';
70   }
71 }
72 
73 static void
74 conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
75 {
76   while (s < end) {
77     *conv++ = s[3];
78     *conv++ = s[2];
79     *conv++ = s[1];
80     *conv++ = s[0];
81     s += 4;
82   }
83 }
84 
85 static void
86 conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
87 {
88   while (s < end) {
89     *conv++ = s[1];
90     *conv++ = s[0];
91     s += 2;
92   }
93 }
94 
95 static int
96 conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
97               UChar** conv, UChar** conv_end)
98 {
99   int len = (int )(end - s);
100 
101   if (to == ONIG_ENCODING_UTF16_BE) {
102     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
103       *conv = (UChar* )xmalloc(len * 2);
104       CHECK_NULL_RETURN_MEMERR(*conv);
105       *conv_end = *conv + (len * 2);
106       conv_ext0be(s, end, *conv);
107       return 0;
108     }
109     else if (from == ONIG_ENCODING_UTF16_LE) {
110     swap16:
111       *conv = (UChar* )xmalloc(len);
112       CHECK_NULL_RETURN_MEMERR(*conv);
113       *conv_end = *conv + len;
114       conv_swap2bytes(s, end, *conv);
115       return 0;
116     }
117   }
118   else if (to == ONIG_ENCODING_UTF16_LE) {
119     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
120       *conv = (UChar* )xmalloc(len * 2);
121       CHECK_NULL_RETURN_MEMERR(*conv);
122       *conv_end = *conv + (len * 2);
123       conv_ext0le(s, end, *conv);
124       return 0;
125     }
126     else if (from == ONIG_ENCODING_UTF16_BE) {
127       goto swap16;
128     }
129   }
130   if (to == ONIG_ENCODING_UTF32_BE) {
131     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
132       *conv = (UChar* )xmalloc(len * 4);
133       CHECK_NULL_RETURN_MEMERR(*conv);
134       *conv_end = *conv + (len * 4);
135       conv_ext0be32(s, end, *conv);
136       return 0;
137     }
138     else if (from == ONIG_ENCODING_UTF32_LE) {
139     swap32:
140       *conv = (UChar* )xmalloc(len);
141       CHECK_NULL_RETURN_MEMERR(*conv);
142       *conv_end = *conv + len;
143       conv_swap4bytes(s, end, *conv);
144       return 0;
145     }
146   }
147   else if (to == ONIG_ENCODING_UTF32_LE) {
148     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
149       *conv = (UChar* )xmalloc(len * 4);
150       CHECK_NULL_RETURN_MEMERR(*conv);
151       *conv_end = *conv + (len * 4);
152       conv_ext0le32(s, end, *conv);
153       return 0;
154     }
155     else if (from == ONIG_ENCODING_UTF32_BE) {
156       goto swap32;
157     }
158   }
159 
160   return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
161 }
162 #endif
163 
164 extern int
onig_new_deluxe(regex_t ** reg,const UChar * pattern,const UChar * pattern_end,OnigCompileInfo * ci,OnigErrorInfo * einfo)165 onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
166                 OnigCompileInfo* ci, OnigErrorInfo* einfo)
167 {
168   int r;
169   UChar *cpat, *cpat_end;
170 
171   if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
172 
173   if (ci->pattern_enc != ci->target_enc) {
174     return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
175   }
176   else {
177     cpat     = (UChar* )pattern;
178     cpat_end = (UChar* )pattern_end;
179   }
180 
181   *reg = (regex_t* )xmalloc(sizeof(regex_t));
182   if (IS_NULL(*reg)) {
183     r = ONIGERR_MEMORY;
184     goto err2;
185   }
186 
187   r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc,
188                     ci->syntax);
189   if (r != 0) goto err;
190 
191   r = onig_compile(*reg, cpat, cpat_end, einfo);
192   if (r != 0) {
193   err:
194     onig_free(*reg);
195     *reg = NULL;
196   }
197 
198  err2:
199   if (cpat != pattern) xfree(cpat);
200 
201   return r;
202 }
203