1 /* rgxg - ReGular eXpression Generator
2 *
3 * Copyright (c) 2010-2013,2020 Hannes von Haugwitz
4 *
5 * This software is provided 'as-is', without any express or implied
6 * warranty. In no event will the authors be held liable for any damages
7 * arising from the use of this software.
8 *
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute
11 * it freely, subject to the following restrictions:
12 *
13 * 1. The origin of this software must not be misrepresented; you
14 * must not claim that you wrote the original software. If you use
15 * this software in a product, an acknowledgment in the product
16 * documentation would be appreciated but is not required.
17 *
18 * 2. Altered source versions must be plainly marked as such, and
19 * must not be misrepresented as being the original software.
20 *
21 * 3. This notice may not be removed or altered from any source
22 * distribution.
23 */
24
25 /* needed for rgxg_number_* */
26 #include "rgxg/number.h"
27
28 /* needed for EASY_CHAR */
29 #include "common_macros.h"
30
31 /* needed for internal_plain_number_base10 */
32 #include "internal_utils.h"
33
34 /* needed for LLONG_MAX */
35 #include <limits.h>
36
37 /* needed for NULL */
38 #include <stdlib.h>
39
40 /* pre-compile limits */
41 static long long limits[] = {
42 0L, LLONG_MAX, LLONG_MAX/2, LLONG_MAX/3, LLONG_MAX/4, LLONG_MAX/5,
43 LLONG_MAX/6, LLONG_MAX/7, LLONG_MAX/8, LLONG_MAX/9, LLONG_MAX/10,
44 LLONG_MAX/11, LLONG_MAX/12, LLONG_MAX/13, LLONG_MAX/14, LLONG_MAX/15,
45 LLONG_MAX/16, LLONG_MAX/17, LLONG_MAX/18, LLONG_MAX/19, LLONG_MAX/20,
46 LLONG_MAX/21, LLONG_MAX/22, LLONG_MAX/23, LLONG_MAX/24, LLONG_MAX/25,
47 LLONG_MAX/26, LLONG_MAX/27, LLONG_MAX/28, LLONG_MAX/29, LLONG_MAX/30,
48 LLONG_MAX/31, LLONG_MAX/32
49 };
50
rgxg_number_of_digits_long_long(long long number,int base)51 static size_t rgxg_number_of_digits_long_long(long long number, int base) {
52 long long power = base;
53 long long limit = limits[base];
54 size_t n = 1;
55
56 while(number >= power) {
57 n++;
58 if (power > limit) break;
59 power *= base;
60 }
61
62 return n;
63 }
64
rgxg_power(int base,int exp)65 static long long rgxg_power(int base, int exp) {
66 long long r = 1;
67 while (exp--) { r *=base; }
68 return r;
69 }
70
rgxg_base32_char(long long number,int upper)71 static char rgxg_base32_char(long long number, int upper) {
72 /* 'A'-10 = '7', 'a'-10='W' */
73 return number+(number < 10 ? '0' : (upper ? '7' : 'W'));
74 }
75
rgxg_base32_range(long long first,long long last,char * regex,rgxg_options_t options)76 static int rgxg_base32_range(long long first, long long
77 last, char* regex, rgxg_options_t options) {
78 /* size 4 is enough as long base <=36 */
79 long long stk[4];
80 int top, brackets;
81 int n = 0;
82
83 top = -1;
84 brackets=first-last || (last > 9 && !(RGXG_NOUPPERCASE&options || RGXG_NOLOWERCASE&options));
85 if (brackets) { EASY_CHAR('[') }
86 stk[++top] = first;
87 stk[++top] = last;
88 do {
89 last = stk[top--];
90 first = stk[top--];
91 switch (last-first) {
92 case 0:
93 if (first <= 9 || !(RGXG_NOUPPERCASE&options)) {
94 EASY_CHAR(rgxg_base32_char(first, 1))
95 }
96 if (first > 9 && !(RGXG_NOLOWERCASE&options)) {
97 EASY_CHAR(rgxg_base32_char(first, 0))
98 }
99 break;
100 case 1:
101 if (first <= 9 || !(RGXG_NOUPPERCASE&options)) {
102 EASY_CHAR(rgxg_base32_char(first, 1))
103 }
104 if (first > 9 && !(RGXG_NOLOWERCASE&options)) {
105 EASY_CHAR(rgxg_base32_char(first, 0))
106 }
107
108 if (last <= 9 || !(RGXG_NOUPPERCASE&options)) {
109 EASY_CHAR(rgxg_base32_char(last, 1))
110 }
111 if (last > 9 && !(RGXG_NOLOWERCASE&options)) {
112 EASY_CHAR(rgxg_base32_char(last, 0))
113 }
114 break;
115 default:
116 if (first <= 9 && last >= 10) {
117 stk[++top] = 10;
118 stk[++top] = last;
119 stk[++top] = first;
120 stk[++top] = 9;
121 } else {
122 if (first <= 9 || !(RGXG_NOUPPERCASE&options)) {
123 EASY_CHAR(rgxg_base32_char(first, 1))
124 EASY_CHAR('-')
125 EASY_CHAR(rgxg_base32_char(last, 1))
126 }
127 if (first > 9 && !(RGXG_NOLOWERCASE&options)) {
128 EASY_CHAR(rgxg_base32_char(first, 0))
129 EASY_CHAR('-')
130 EASY_CHAR(rgxg_base32_char(last, 0))
131 }
132 }
133 break;
134 }
135 } while (top > 0);
136
137 if (brackets) { EASY_CHAR(']') }
138
139 return n;
140 }
141
142 #define EASY_IF_ERROR_ELSE(condition, error) \
143 if (condition) { \
144 n = error; \
145 } else
146
rgxg_number_single(long long number,int base,char * regex,rgxg_options_t options)147 static int rgxg_number_single(long long number, int base, char* regex, rgxg_options_t
148 options) {
149 int n;
150 long long m;
151
152 n = 0;
153 m = number;
154 do {
155 n += (m%base <= 9 || RGXG_NOUPPERCASE&options || RGXG_NOLOWERCASE&options) ? 1 : 4;
156 m /= base;
157 } while (m);
158
159 if (regex) {
160 int pos = n;
161 do {
162 m = number%base;
163 if (m <= 9 || RGXG_NOUPPERCASE&options || RGXG_NOLOWERCASE&options) {
164 regex[--pos] = rgxg_base32_char(m, RGXG_NOLOWERCASE&options);
165 } else {
166 regex[--pos] = ']';
167 regex[--pos] = rgxg_base32_char(m, 0);
168 regex[--pos] = rgxg_base32_char(m, 1);
169 regex[--pos] = '[';
170 }
171 number /= base;
172 } while (number);
173 }
174
175 return n;
176 }
177
178 #define EASY_NUMBER_ASSIGNMENT(number) \
179 n += rgxg_number_single(number, base, (regex ? regex+n : NULL), options);
180
181 #define EASY_BASE32_RANGE_ASSIGNMENT(first, last) \
182 n += rgxg_base32_range(first, last, (regex ? regex+n : NULL), options);
183
184 #define EASY_LEADING_ZEROS \
185 if (number_of_leading_zeros < ((RGXG_LEADINGZERO&options) ? 5 : 4)) { \
186 /* 0000000000 -> 0{10} (0?0?0?0? -> 0{0,4}) but 0000 -> 0000 (0?0?0? -> 0?0?0?) */ \
187 while (number_of_leading_zeros-- > 0) { \
188 EASY_CHAR('0') \
189 if (RGXG_VARLEADINGZERO&options) { EASY_CHAR('?') } \
190 } \
191 } else { \
192 EASY_CHAR('0') \
193 EASY_CHAR('{') \
194 if (RGXG_VARLEADINGZERO&options) { \
195 EASY_CHAR('0') \
196 EASY_CHAR(',') \
197 } \
198 n += internal_plain_number_base10(number_of_leading_zeros, (regex ? regex+n : NULL)); \
199 EASY_CHAR('}') \
200 }
201
rgxg_number_range(long long first,long long last,int base,size_t min_length,char * regex,rgxg_options_t options)202 int rgxg_number_range (long long first, long long last, int base,
203 size_t min_length, char *regex, rgxg_options_t options) {
204 long long prefix, prefix_range_first, prefix_range_last,
205 current_last, power_max, power_max_plus_one;
206 int n, min, max, number_of_leading_zeros, parenthesis;
207 size_t max_num_of_digits;
208
209 EASY_VALIDATE_MUTEXOPTIONS(RGXG_LEADINGZERO, RGXG_VARLEADINGZERO)
210 EASY_VALIDATE_MUTEXOPTIONS(RGXG_NOUPPERCASE, RGXG_NOLOWERCASE)
211 EASY_IF_ERROR_ELSE(base < 2 || base > 32, RGXG_ERROR_BASE)
212 EASY_IF_ERROR_ELSE(first < 0 || last < 0, RGXG_ERROR_NEGARG)
213 EASY_IF_ERROR_ELSE(first > last , RGXG_ERROR_RANGE) {
214 n = 0;
215 parenthesis = 0;
216 max_num_of_digits = 0;
217 if ((RGXG_LEADINGZERO|RGXG_VARLEADINGZERO)&options) {
218 max_num_of_digits = rgxg_number_of_digits_long_long(last, base);
219 if (max_num_of_digits < min_length) {
220 max_num_of_digits = min_length;
221 }
222 }
223 current_last = last;
224
225 do {
226 max = 0;
227 prefix = current_last/base;
228 prefix_range_first = first/base != current_last/base ? 0 : first%base;
229 prefix_range_last = current_last%base;
230
231 power_max = rgxg_power(base, max);
232 power_max_plus_one = power_max * base;
233
234 while (prefix_range_last == base-1 && (prefix*power_max_plus_one-1 > first) ) {
235 ++max;
236 current_last /= base;
237 prefix = current_last/base;
238 power_max = power_max_plus_one;
239 power_max_plus_one *= base;
240 prefix_range_first =
241 prefix*power_max_plus_one+!prefix*power_max > first
242 ? ((((RGXG_LEADINGZERO|RGXG_VARLEADINGZERO)&options) && first == 0) ? 0 : !prefix)
243 : first/power_max%base+(prefix*power_max_plus_one+(first/power_max%base)*power_max != first);
244 prefix_range_last = current_last%base;
245 }
246 current_last = prefix*power_max_plus_one + prefix_range_first*power_max;
247 min = max;
248 if (max && prefix_range_first == 1 && prefix_range_last == base-1 && !((RGXG_LEADINGZERO|RGXG_VARLEADINGZERO)&options)) {
249 while (current_last/base && current_last/base >= first) {
250 min--;
251 current_last /= base;
252 }
253 }
254 current_last--;
255
256 if (!parenthesis && !(RGXG_NOOUTERPARENS&options) && first <= current_last ) {
257 if (!(max == 1 && first ==0 && ((current_last == base -1
258 && min == 1) || (current_last == 0 && min == 0)))
259 ) { /* no parenthesis around 1?[0-9] and [1-9]?[0-9] */
260 EASY_CHAR('(')
261 parenthesis = 1;
262 }
263 }
264
265 if ((RGXG_LEADINGZERO|RGXG_VARLEADINGZERO)&options) {
266 number_of_leading_zeros = max_num_of_digits-(prefix ? rgxg_number_of_digits_long_long(prefix, base): 0)-max-1;
267 EASY_LEADING_ZEROS
268 }
269
270 if (prefix) {
271 EASY_NUMBER_ASSIGNMENT(prefix)
272 }
273 if (prefix_range_first == 0 && prefix_range_last == base-1) { /* 55[0-9][0-9] => 55[0-9]{2} */
274 max++; min++;
275 } else {
276 EASY_BASE32_RANGE_ASSIGNMENT(prefix_range_first, prefix_range_last)
277 }
278 if (max) { /* handle multiple [0-9] */
279 if (max == 1 && first ==0 && ((current_last == base -1
280 && min == 1) || (current_last == 0 && min == 0))
281 ) { /* 1[0-9]|[0-9] => 1?[0-9] and [1-9][0-9]?|0 -> [1-9]?[0-9] */
282 EASY_CHAR('?')
283 EASY_BASE32_RANGE_ASSIGNMENT(0, base-1)
284 current_last = -1;
285 } else {
286 if ((RGXG_VARLEADINGZERO&options) && first == 0 &&
287 prefix == 0 && prefix_range_first == 0) {
288 if (prefix_range_last != base-1) {
289 EASY_CHAR('?')
290 }
291 min = 1;
292 }
293 EASY_BASE32_RANGE_ASSIGNMENT(0, base-1)
294 if (max == 1 && min == 0) { /* 5[0-9]{0,1} => 5[0-9]? */
295 EASY_CHAR('?')
296 } else if (max > 1) {
297 EASY_CHAR('{')
298 if (min != max) {
299 n += internal_plain_number_base10(min, (regex ? regex+n : NULL));
300 EASY_CHAR(',')
301 }
302 n += internal_plain_number_base10(max, (regex ? regex+n : NULL));
303 EASY_CHAR('}')
304 }
305 }
306 }
307 if (first <= current_last) {
308 EASY_CHAR('|')
309 }
310 } while (first <= current_last);
311 if (parenthesis) {
312 EASY_CHAR(')')
313 }
314 if (!(RGXG_NONULLBYTE&options) && regex) { regex[n] = '\0'; }
315 }
316 return n;
317 }
318
rgxg_number_greaterequal(long long number,int base,size_t min_length,char * regex,rgxg_options_t options)319 int rgxg_number_greaterequal (long long number, int base,
320 size_t min_length, char *regex, rgxg_options_t options) {
321 int n, no_power_of_base, min, max, number_of_leading_zeros;
322 size_t count;
323 long long boundary;
324
325 EASY_VALIDATE_MUTEXOPTIONS(RGXG_LEADINGZERO, RGXG_VARLEADINGZERO)
326 EASY_VALIDATE_MUTEXOPTIONS(RGXG_NOUPPERCASE, RGXG_NOLOWERCASE)
327 EASY_IF_ERROR_ELSE(base < 2 || base > 32, RGXG_ERROR_BASE)
328 EASY_IF_ERROR_ELSE(number < 0, RGXG_ERROR_NEGARG)
329 EASY_IF_ERROR_ELSE(number > rgxg_power(base, rgxg_number_of_digits_long_long(limits[base], base)), RGXG_ERROR_ARG2BIG) {
330 n = 0;
331 count = rgxg_number_of_digits_long_long(number ,base);
332
333 boundary = rgxg_power(base,count-1);
334 no_power_of_base = (number != boundary) || (RGXG_LEADINGZERO|RGXG_VARLEADINGZERO)&options; /* ([1-9][0-9]{3,}|[1-9][0-9]{2}) => [1-9][0-9]{2,} but ([1-9][0-9]+|0[1-9]) => ([1-9][0-9]+|0[1-9]) */
335
336 if (!(RGXG_NOOUTERPARENS&options) && no_power_of_base) {
337 EASY_CHAR('(')
338 }
339 count -= (!no_power_of_base);
340
341 max = (RGXG_LEADINGZERO|RGXG_VARLEADINGZERO)&options && min_length > count+1 ? min_length-1 : count;
342 if (number) { min = count; } else { min = max; };
343 for (int i = max ; i >= min; --i) {
344 if (i < max) { EASY_CHAR('|') }
345 number_of_leading_zeros = max-i;
346 EASY_LEADING_ZEROS
347 EASY_BASE32_RANGE_ASSIGNMENT(1, base-1)
348 EASY_BASE32_RANGE_ASSIGNMENT(0, base-1)
349 if (i == 0 && i == max) { /* [1-9][0-9]{0,} => [1-9][0-9]* */
350 EASY_CHAR('*')
351 } else if (i == 1 && i == max) { /* [1-9][0-9]{1,} => [1-9][0-9]+ */
352 EASY_CHAR('+')
353 } else if (i > 1) {
354 EASY_CHAR('{')
355 n += internal_plain_number_base10(i, (regex ? regex+n : NULL));
356 if (i == max) { EASY_CHAR(',') }
357 EASY_CHAR('}')
358 }
359 }
360 if (no_power_of_base) {
361 boundary *= base;
362 EASY_CHAR('|')
363 if (number > 0 || !((RGXG_LEADINGZERO|RGXG_VARLEADINGZERO)&options)) { count = max; }
364 n += rgxg_number_range(number, boundary-1, base, count+1,
365 (regex ? regex+n : NULL), RGXG_NONULLBYTE|RGXG_NOOUTERPARENS|options);
366 if (number == 0 && (RGXG_LEADINGZERO|RGXG_VARLEADINGZERO)&options) {
367 if (max > 1) {
368 EASY_CHAR('{')
369 if ((RGXG_VARLEADINGZERO)&options) {
370 EASY_CHAR('1')
371 EASY_CHAR(',')
372 }
373 n += internal_plain_number_base10(max, (regex ? regex+n : NULL));
374 EASY_CHAR('}')
375 }
376 }
377 if (!(RGXG_NOOUTERPARENS&options)) {
378 EASY_CHAR(')')
379 }
380 }
381 if (!(RGXG_NONULLBYTE&options) && regex) { regex[n] = '\0'; }
382 }
383
384 return n;
385 }
386