1 /**********************************************************************
2 regext.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2019 K.Kosako
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regint.h"
31
32 #if 0
33 static void
34 conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
35 {
36 while (s < end) {
37 *conv++ = '\0';
38 *conv++ = '\0';
39 *conv++ = '\0';
40 *conv++ = *s++;
41 }
42 }
43
44 static void
45 conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
46 {
47 while (s < end) {
48 *conv++ = *s++;
49 *conv++ = '\0';
50 *conv++ = '\0';
51 *conv++ = '\0';
52 }
53 }
54
55 static void
56 conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
57 {
58 while (s < end) {
59 *conv++ = '\0';
60 *conv++ = *s++;
61 }
62 }
63
64 static void
65 conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
66 {
67 while (s < end) {
68 *conv++ = *s++;
69 *conv++ = '\0';
70 }
71 }
72
73 static void
74 conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
75 {
76 while (s < end) {
77 *conv++ = s[3];
78 *conv++ = s[2];
79 *conv++ = s[1];
80 *conv++ = s[0];
81 s += 4;
82 }
83 }
84
85 static void
86 conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
87 {
88 while (s < end) {
89 *conv++ = s[1];
90 *conv++ = s[0];
91 s += 2;
92 }
93 }
94
95 static int
96 conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
97 UChar** conv, UChar** conv_end)
98 {
99 int len = (int )(end - s);
100
101 if (to == ONIG_ENCODING_UTF16_BE) {
102 if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
103 *conv = (UChar* )xmalloc(len * 2);
104 CHECK_NULL_RETURN_MEMERR(*conv);
105 *conv_end = *conv + (len * 2);
106 conv_ext0be(s, end, *conv);
107 return 0;
108 }
109 else if (from == ONIG_ENCODING_UTF16_LE) {
110 swap16:
111 *conv = (UChar* )xmalloc(len);
112 CHECK_NULL_RETURN_MEMERR(*conv);
113 *conv_end = *conv + len;
114 conv_swap2bytes(s, end, *conv);
115 return 0;
116 }
117 }
118 else if (to == ONIG_ENCODING_UTF16_LE) {
119 if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
120 *conv = (UChar* )xmalloc(len * 2);
121 CHECK_NULL_RETURN_MEMERR(*conv);
122 *conv_end = *conv + (len * 2);
123 conv_ext0le(s, end, *conv);
124 return 0;
125 }
126 else if (from == ONIG_ENCODING_UTF16_BE) {
127 goto swap16;
128 }
129 }
130 if (to == ONIG_ENCODING_UTF32_BE) {
131 if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
132 *conv = (UChar* )xmalloc(len * 4);
133 CHECK_NULL_RETURN_MEMERR(*conv);
134 *conv_end = *conv + (len * 4);
135 conv_ext0be32(s, end, *conv);
136 return 0;
137 }
138 else if (from == ONIG_ENCODING_UTF32_LE) {
139 swap32:
140 *conv = (UChar* )xmalloc(len);
141 CHECK_NULL_RETURN_MEMERR(*conv);
142 *conv_end = *conv + len;
143 conv_swap4bytes(s, end, *conv);
144 return 0;
145 }
146 }
147 else if (to == ONIG_ENCODING_UTF32_LE) {
148 if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
149 *conv = (UChar* )xmalloc(len * 4);
150 CHECK_NULL_RETURN_MEMERR(*conv);
151 *conv_end = *conv + (len * 4);
152 conv_ext0le32(s, end, *conv);
153 return 0;
154 }
155 else if (from == ONIG_ENCODING_UTF32_BE) {
156 goto swap32;
157 }
158 }
159
160 return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
161 }
162 #endif
163
164 extern int
onig_new_deluxe(regex_t ** reg,const UChar * pattern,const UChar * pattern_end,OnigCompileInfo * ci,OnigErrorInfo * einfo)165 onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
166 OnigCompileInfo* ci, OnigErrorInfo* einfo)
167 {
168 int r;
169 UChar *cpat, *cpat_end;
170
171 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
172
173 if (ci->pattern_enc != ci->target_enc) {
174 return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
175 }
176 else {
177 cpat = (UChar* )pattern;
178 cpat_end = (UChar* )pattern_end;
179 }
180
181 *reg = (regex_t* )xmalloc(sizeof(regex_t));
182 if (IS_NULL(*reg)) {
183 r = ONIGERR_MEMORY;
184 goto err2;
185 }
186
187 r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc,
188 ci->syntax);
189 if (r != 0) goto err;
190
191 r = onig_compile(*reg, cpat, cpat_end, einfo);
192 if (r != 0) {
193 err:
194 onig_free(*reg);
195 *reg = NULL;
196 }
197
198 err2:
199 if (cpat != pattern) xfree(cpat);
200
201 return r;
202 }
203