1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Derick Rethans <derick@php.net> |
14 | Pierre-A. Joye <pierre@php.net> |
15 | Kévin Dunglas <dunglas@gmail.com> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include "php_filter.h"
20 #include "filter_private.h"
21 #include "ext/standard/url.h"
22 #include "ext/pcre/php_pcre.h"
23
24 #include "zend_multiply.h"
25
26 #ifdef HAVE_ARPA_INET_H
27 # include <arpa/inet.h>
28 #endif
29
30 #ifndef INADDR_NONE
31 # define INADDR_NONE ((unsigned long int) -1)
32 #endif
33
34
35 /* {{{ FETCH_DOUBLE_OPTION(var_name, option_name) */
36 #define FETCH_DOUBLE_OPTION(var_name, option_name) \
37 var_name = 0; \
38 var_name##_set = 0; \
39 if (option_array) { \
40 if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
41 var_name = zval_get_double(option_val); \
42 var_name##_set = 1; \
43 } \
44 }
45 /* }}} */
46
47 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
48 #define FETCH_LONG_OPTION(var_name, option_name) \
49 var_name = 0; \
50 var_name##_set = 0; \
51 if (option_array) { \
52 if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
53 var_name = zval_get_long(option_val); \
54 var_name##_set = 1; \
55 } \
56 }
57 /* }}} */
58
59 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
60 #define FETCH_STRING_OPTION(var_name, option_name) \
61 var_name = NULL; \
62 var_name##_set = 0; \
63 var_name##_len = 0; \
64 if (option_array) { \
65 if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
66 if (Z_TYPE_P(option_val) == IS_STRING) { \
67 var_name = Z_STRVAL_P(option_val); \
68 var_name##_len = Z_STRLEN_P(option_val); \
69 var_name##_set = 1; \
70 } \
71 } \
72 }
73 /* }}} */
74
75 /* {{{ FETCH_STR_OPTION(var_name, option_name) */
76 #define FETCH_STR_OPTION(var_name, option_name) \
77 var_name = NULL; \
78 var_name##_set = 0; \
79 if (option_array) { \
80 if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
81 if (Z_TYPE_P(option_val) == IS_STRING) { \
82 var_name = Z_STR_P(option_val); \
83 var_name##_set = 1; \
84 } \
85 } \
86 }
87 /* }}} */
88
89 #define FORMAT_IPV4 4
90 #define FORMAT_IPV6 6
91
92 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]);
93
php_filter_parse_int(const char * str,size_t str_len,zend_long * ret)94 static int php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
95 zend_long ctx_value;
96 int sign = 0, digit = 0;
97 const char *end = str + str_len;
98
99 switch (*str) {
100 case '-':
101 sign = 1;
102 ZEND_FALLTHROUGH;
103 case '+':
104 str++;
105 default:
106 break;
107 }
108
109 if (*str == '0' && str + 1 == end) {
110 /* Special cases: +0 and -0 */
111 return 1;
112 }
113
114 /* must start with 1..9*/
115 if (str < end && *str >= '1' && *str <= '9') {
116 ctx_value = ((sign)?-1:1) * ((*(str++)) - '0');
117 } else {
118 return -1;
119 }
120
121 if ((end - str > MAX_LENGTH_OF_LONG - 1) /* number too long */
122 || (SIZEOF_LONG == 4 && (end - str == MAX_LENGTH_OF_LONG - 1) && *str > '2')) {
123 /* overflow */
124 return -1;
125 }
126
127 while (str < end) {
128 if (*str >= '0' && *str <= '9') {
129 digit = (*(str++) - '0');
130 if ( (!sign) && ctx_value <= (ZEND_LONG_MAX-digit)/10 ) {
131 ctx_value = (ctx_value * 10) + digit;
132 } else if ( sign && ctx_value >= (ZEND_LONG_MIN+digit)/10) {
133 ctx_value = (ctx_value * 10) - digit;
134 } else {
135 return -1;
136 }
137 } else {
138 return -1;
139 }
140 }
141
142 *ret = ctx_value;
143 return 1;
144 }
145 /* }}} */
146
php_filter_parse_octal(const char * str,size_t str_len,zend_long * ret)147 static int php_filter_parse_octal(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
148 zend_ulong ctx_value = 0;
149 const char *end = str + str_len;
150
151 while (str < end) {
152 if (*str >= '0' && *str <= '7') {
153 zend_ulong n = ((*(str++)) - '0');
154
155 if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 8) ||
156 ((ctx_value = ctx_value * 8) > ((zend_ulong)(~(zend_long)0)) - n)) {
157 return -1;
158 }
159 ctx_value += n;
160 } else {
161 return -1;
162 }
163 }
164
165 *ret = (zend_long)ctx_value;
166 return 1;
167 }
168 /* }}} */
169
php_filter_parse_hex(const char * str,size_t str_len,zend_long * ret)170 static int php_filter_parse_hex(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
171 zend_ulong ctx_value = 0;
172 const char *end = str + str_len;
173 zend_ulong n;
174
175 while (str < end) {
176 if (*str >= '0' && *str <= '9') {
177 n = ((*(str++)) - '0');
178 } else if (*str >= 'a' && *str <= 'f') {
179 n = ((*(str++)) - ('a' - 10));
180 } else if (*str >= 'A' && *str <= 'F') {
181 n = ((*(str++)) - ('A' - 10));
182 } else {
183 return -1;
184 }
185 if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 16) ||
186 ((ctx_value = ctx_value * 16) > ((zend_ulong)(~(zend_long)0)) - n)) {
187 return -1;
188 }
189 ctx_value += n;
190 }
191
192 *ret = (zend_long)ctx_value;
193 return 1;
194 }
195 /* }}} */
196
php_filter_int(PHP_INPUT_FILTER_PARAM_DECL)197 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
198 {
199 zval *option_val;
200 zend_long min_range, max_range, option_flags;
201 int min_range_set, max_range_set;
202 int allow_octal = 0, allow_hex = 0;
203 size_t len;
204 int error = 0;
205 zend_long ctx_value;
206 char *p;
207
208 /* Parse options */
209 FETCH_LONG_OPTION(min_range, "min_range");
210 FETCH_LONG_OPTION(max_range, "max_range");
211 option_flags = flags;
212
213 len = Z_STRLEN_P(value);
214
215 if (len == 0) {
216 RETURN_VALIDATION_FAILED
217 }
218
219 if (option_flags & FILTER_FLAG_ALLOW_OCTAL) {
220 allow_octal = 1;
221 }
222
223 if (option_flags & FILTER_FLAG_ALLOW_HEX) {
224 allow_hex = 1;
225 }
226
227 /* Start the validating loop */
228 p = Z_STRVAL_P(value);
229 ctx_value = 0;
230
231 PHP_FILTER_TRIM_DEFAULT(p, len);
232
233 if (*p == '0') {
234 p++; len--;
235 if (allow_hex && (*p == 'x' || *p == 'X')) {
236 p++; len--;
237 if (len == 0) {
238 RETURN_VALIDATION_FAILED
239 }
240 if (php_filter_parse_hex(p, len, &ctx_value) < 0) {
241 error = 1;
242 }
243 } else if (allow_octal) {
244 /* Support explicit octal prefix notation */
245 if (*p == 'o' || *p == 'O') {
246 p++; len--;
247 if (len == 0) {
248 RETURN_VALIDATION_FAILED
249 }
250 }
251 if (php_filter_parse_octal(p, len, &ctx_value) < 0) {
252 error = 1;
253 }
254 } else if (len != 0) {
255 error = 1;
256 }
257 } else {
258 if (php_filter_parse_int(p, len, &ctx_value) < 0) {
259 error = 1;
260 }
261 }
262
263 if (error > 0 || (min_range_set && (ctx_value < min_range)) || (max_range_set && (ctx_value > max_range))) {
264 RETURN_VALIDATION_FAILED
265 } else {
266 zval_ptr_dtor(value);
267 ZVAL_LONG(value, ctx_value);
268 return;
269 }
270 }
271 /* }}} */
272
php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL)273 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
274 {
275 char *str = Z_STRVAL_P(value);
276 size_t len = Z_STRLEN_P(value);
277 int ret;
278
279 PHP_FILTER_TRIM_DEFAULT_EX(str, len, 0);
280
281 /* returns true for "1", "true", "on" and "yes"
282 * returns false for "0", "false", "off", "no", and ""
283 * null otherwise. */
284 switch (len) {
285 case 0:
286 ret = 0;
287 break;
288 case 1:
289 if (*str == '1') {
290 ret = 1;
291 } else if (*str == '0') {
292 ret = 0;
293 } else {
294 ret = -1;
295 }
296 break;
297 case 2:
298 if (strncasecmp(str, "on", 2) == 0) {
299 ret = 1;
300 } else if (strncasecmp(str, "no", 2) == 0) {
301 ret = 0;
302 } else {
303 ret = -1;
304 }
305 break;
306 case 3:
307 if (strncasecmp(str, "yes", 3) == 0) {
308 ret = 1;
309 } else if (strncasecmp(str, "off", 3) == 0) {
310 ret = 0;
311 } else {
312 ret = -1;
313 }
314 break;
315 case 4:
316 if (strncasecmp(str, "true", 4) == 0) {
317 ret = 1;
318 } else {
319 ret = -1;
320 }
321 break;
322 case 5:
323 if (strncasecmp(str, "false", 5) == 0) {
324 ret = 0;
325 } else {
326 ret = -1;
327 }
328 break;
329 default:
330 ret = -1;
331 }
332
333 if (ret == -1) {
334 RETURN_VALIDATION_FAILED
335 } else {
336 zval_ptr_dtor(value);
337 ZVAL_BOOL(value, ret);
338 }
339 }
340 /* }}} */
341
php_filter_float(PHP_INPUT_FILTER_PARAM_DECL)342 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
343 {
344 size_t len;
345 char *str, *end;
346 char *num, *p;
347 zval *option_val;
348 char *decimal;
349 int decimal_set;
350 size_t decimal_len;
351 char dec_sep = '.';
352 char *thousand;
353 int thousand_set;
354 size_t thousand_len;
355 char *tsd_sep;
356
357 zend_long lval;
358 double dval;
359 double min_range, max_range;
360 int min_range_set, max_range_set;
361
362 int first, n;
363
364 len = Z_STRLEN_P(value);
365 str = Z_STRVAL_P(value);
366
367 PHP_FILTER_TRIM_DEFAULT(str, len);
368 end = str + len;
369
370 FETCH_STRING_OPTION(decimal, "decimal");
371
372 if (decimal_set) {
373 if (decimal_len != 1) {
374 zend_value_error("%s(): \"decimal\" option must be one character long", get_active_function_name());
375 RETURN_VALIDATION_FAILED
376 } else {
377 dec_sep = *decimal;
378 }
379 }
380
381 FETCH_STRING_OPTION(thousand, "thousand");
382
383 if (thousand_set) {
384 if (thousand_len < 1) {
385 zend_value_error("%s(): \"thousand\" option cannot be empty", get_active_function_name());
386 RETURN_VALIDATION_FAILED
387 } else {
388 tsd_sep = thousand;
389 }
390 } else {
391 tsd_sep = "',.";
392 }
393
394 FETCH_DOUBLE_OPTION(min_range, "min_range");
395 FETCH_DOUBLE_OPTION(max_range, "max_range");
396
397 num = p = emalloc(len+1);
398 if (str < end && (*str == '+' || *str == '-')) {
399 *p++ = *str++;
400 }
401 first = 1;
402 while (1) {
403 n = 0;
404 while (str < end && *str >= '0' && *str <= '9') {
405 ++n;
406 *p++ = *str++;
407 }
408 if (str == end || *str == dec_sep || *str == 'e' || *str == 'E') {
409 if (!first && n != 3) {
410 goto error;
411 }
412 if (*str == dec_sep) {
413 *p++ = '.';
414 str++;
415 while (str < end && *str >= '0' && *str <= '9') {
416 *p++ = *str++;
417 }
418 }
419 if (*str == 'e' || *str == 'E') {
420 *p++ = *str++;
421 if (str < end && (*str == '+' || *str == '-')) {
422 *p++ = *str++;
423 }
424 while (str < end && *str >= '0' && *str <= '9') {
425 *p++ = *str++;
426 }
427 }
428 break;
429 }
430 if ((flags & FILTER_FLAG_ALLOW_THOUSAND) && strchr(tsd_sep, *str)) {
431 if (first?(n < 1 || n > 3):(n != 3)) {
432 goto error;
433 }
434 first = 0;
435 str++;
436 } else {
437 goto error;
438 }
439 }
440 if (str != end) {
441 goto error;
442 }
443 *p = 0;
444
445 switch (is_numeric_string(num, p - num, &lval, &dval, 0)) {
446 case IS_LONG:
447 zval_ptr_dtor(value);
448 if ((min_range_set && (lval < min_range)) || (max_range_set && (lval > max_range))) {
449 goto error;
450 }
451 ZVAL_DOUBLE(value, (double)lval);
452 break;
453 case IS_DOUBLE:
454 if ((!dval && p - num > 1 && strpbrk(num, "123456789")) || !zend_finite(dval)) {
455 goto error;
456 }
457 if ((min_range_set && (dval < min_range)) || (max_range_set && (dval > max_range))) {
458 goto error;
459 }
460 zval_ptr_dtor(value);
461 ZVAL_DOUBLE(value, dval);
462 break;
463 default:
464 error:
465 efree(num);
466 RETURN_VALIDATION_FAILED
467 }
468 efree(num);
469 }
470 /* }}} */
471
php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL)472 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
473 {
474 zval *option_val;
475 zend_string *regexp;
476 int regexp_set;
477 pcre2_code *re = NULL;
478 pcre2_match_data *match_data = NULL;
479 uint32_t capture_count;
480 int rc;
481
482 /* Parse options */
483 FETCH_STR_OPTION(regexp, "regexp");
484
485 if (!regexp_set) {
486 zend_value_error("%s(): \"regexp\" option is missing", get_active_function_name());
487 RETURN_VALIDATION_FAILED
488 }
489
490 re = pcre_get_compiled_regex(regexp, &capture_count);
491 if (!re) {
492 RETURN_VALIDATION_FAILED
493 }
494 match_data = php_pcre_create_match_data(capture_count, re);
495 if (!match_data) {
496 RETURN_VALIDATION_FAILED
497 }
498 rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
499 php_pcre_free_match_data(match_data);
500
501 /* 0 means that the vector is too small to hold all the captured substring offsets */
502 if (rc < 0) {
503 RETURN_VALIDATION_FAILED
504 }
505 }
506
_php_filter_validate_domain(char * domain,int len,zend_long flags)507 static int _php_filter_validate_domain(char * domain, int len, zend_long flags) /* {{{ */
508 {
509 char *e, *s, *t;
510 size_t l;
511 int hostname = flags & FILTER_FLAG_HOSTNAME;
512 unsigned char i = 1;
513
514 s = domain;
515 l = len;
516 e = domain + l;
517 t = e - 1;
518
519 /* Ignore trailing dot */
520 if (*t == '.') {
521 e = t;
522 l--;
523 }
524
525 /* The total length cannot exceed 253 characters (final dot not included) */
526 if (l > 253) {
527 return 0;
528 }
529
530 /* First char must be alphanumeric */
531 if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
532 return 0;
533 }
534
535 while (s < e) {
536 if (*s == '.') {
537 /* The first and the last character of a label must be alphanumeric */
538 if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
539 return 0;
540 }
541
542 /* Reset label length counter */
543 i = 1;
544 } else {
545 if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
546 return 0;
547 }
548
549 i++;
550 }
551
552 s++;
553 }
554
555 return 1;
556 }
557 /* }}} */
558
php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL)559 void php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
560 {
561 if (!_php_filter_validate_domain(Z_STRVAL_P(value), Z_STRLEN_P(value), flags)) {
562 RETURN_VALIDATION_FAILED
563 }
564 }
565 /* }}} */
566
is_userinfo_valid(zend_string * str)567 static int is_userinfo_valid(zend_string *str)
568 {
569 const char *valid = "-._~!$&'()*+,;=:";
570 const char *p = ZSTR_VAL(str);
571 while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
572 if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
573 p++;
574 } else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
575 p += 3;
576 } else {
577 return 0;
578 }
579 }
580 return 1;
581 }
582
php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL)583 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
584 {
585 php_url *url;
586 size_t old_len = Z_STRLEN_P(value);
587
588 php_filter_url(value, flags, option_array, charset);
589
590 if (Z_TYPE_P(value) != IS_STRING || old_len != Z_STRLEN_P(value)) {
591 RETURN_VALIDATION_FAILED
592 }
593
594 /* Use parse_url - if it returns false, we return NULL */
595 url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
596
597 if (url == NULL) {
598 RETURN_VALIDATION_FAILED
599 }
600
601 if (url->scheme != NULL &&
602 (zend_string_equals_literal_ci(url->scheme, "http") || zend_string_equals_literal_ci(url->scheme, "https"))) {
603 char *e, *s, *t;
604 size_t l;
605
606 if (url->host == NULL) {
607 goto bad_url;
608 }
609
610 s = ZSTR_VAL(url->host);
611 l = ZSTR_LEN(url->host);
612 e = s + l;
613 t = e - 1;
614
615 /* An IPv6 enclosed by square brackets is a valid hostname */
616 if (*s == '[' && *t == ']' && _php_filter_validate_ipv6((s + 1), l - 2, NULL)) {
617 php_url_free(url);
618 return;
619 }
620
621 // Validate domain
622 if (!_php_filter_validate_domain(ZSTR_VAL(url->host), l, FILTER_FLAG_HOSTNAME)) {
623 php_url_free(url);
624 RETURN_VALIDATION_FAILED
625 }
626 }
627
628 if (
629 url->scheme == NULL ||
630 /* some schemas allow the host to be empty */
631 (url->host == NULL && (!zend_string_equals_literal(url->scheme, "mailto") && !zend_string_equals_literal(url->scheme, "news") && !zend_string_equals_literal(url->scheme, "file"))) ||
632 ((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
633 ) {
634 bad_url:
635 php_url_free(url);
636 RETURN_VALIDATION_FAILED
637 }
638
639 if ((url->user != NULL && !is_userinfo_valid(url->user))
640 || (url->pass != NULL && !is_userinfo_valid(url->pass))
641 ) {
642 php_url_free(url);
643 RETURN_VALIDATION_FAILED
644
645 }
646
647 php_url_free(url);
648 }
649 /* }}} */
650
php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL)651 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
652 {
653 /*
654 * The regex below is based on a regex by Michael Rushton.
655 * However, it is not identical. I changed it to only consider routeable
656 * addresses as valid. Michael's regex considers a@b a valid address
657 * which conflicts with section 2.3.5 of RFC 5321 which states that:
658 *
659 * Only resolvable, fully-qualified domain names (FQDNs) are permitted
660 * when domain names are used in SMTP. In other words, names that can
661 * be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
662 * in Section 5) are permitted, as are CNAME RRs whose targets can be
663 * resolved, in turn, to MX or address RRs. Local nicknames or
664 * unqualified names MUST NOT be used.
665 *
666 * This regex does not handle comments and folding whitespace. While
667 * this is technically valid in an email address, these parts aren't
668 * actually part of the address itself.
669 *
670 * Michael's regex carries this copyright:
671 *
672 * Copyright © Michael Rushton 2009-10
673 * http://squiloople.com/
674 * Feel free to use and redistribute this code. But please keep this copyright notice.
675 *
676 */
677 pcre2_code *re = NULL;
678 pcre2_match_data *match_data = NULL;
679 uint32_t capture_count;
680 zend_string *sregexp;
681 int rc;
682 const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
683 const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
684 const char *regexp;
685 size_t regexp_len;
686
687 if (flags & FILTER_FLAG_EMAIL_UNICODE) {
688 regexp = regexp0;
689 regexp_len = sizeof(regexp0) - 1;
690 } else {
691 regexp = regexp1;
692 regexp_len = sizeof(regexp1) - 1;
693 }
694
695 /* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
696 if (Z_STRLEN_P(value) > 320) {
697 RETURN_VALIDATION_FAILED
698 }
699
700 sregexp = zend_string_init(regexp, regexp_len, 0);
701 re = pcre_get_compiled_regex(sregexp, &capture_count);
702 zend_string_release_ex(sregexp, 0);
703 if (!re) {
704 RETURN_VALIDATION_FAILED
705 }
706 match_data = php_pcre_create_match_data(capture_count, re);
707 if (!match_data) {
708 RETURN_VALIDATION_FAILED
709 }
710 rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
711 php_pcre_free_match_data(match_data);
712
713 /* 0 means that the vector is too small to hold all the captured substring offsets */
714 if (rc < 0) {
715 RETURN_VALIDATION_FAILED
716 }
717
718 }
719 /* }}} */
720
_php_filter_validate_ipv4(char * str,size_t str_len,int * ip)721 static int _php_filter_validate_ipv4(char *str, size_t str_len, int *ip) /* {{{ */
722 {
723 const char *end = str + str_len;
724 int num, m;
725 int n = 0;
726
727 while (str < end) {
728 int leading_zero;
729 if (*str < '0' || *str > '9') {
730 return 0;
731 }
732 leading_zero = (*str == '0');
733 m = 1;
734 num = ((*(str++)) - '0');
735 while (str < end && (*str >= '0' && *str <= '9')) {
736 num = num * 10 + ((*(str++)) - '0');
737 if (num > 255 || ++m > 3) {
738 return 0;
739 }
740 }
741 /* don't allow a leading 0; that introduces octal numbers,
742 * which we don't support */
743 if (leading_zero && (num != 0 || m > 1))
744 return 0;
745 ip[n++] = num;
746 if (n == 4) {
747 return str == end;
748 } else if (str >= end || *(str++) != '.') {
749 return 0;
750 }
751 }
752 return 0;
753 }
754 /* }}} */
755
_php_filter_validate_ipv6(char * str,size_t str_len,int ip[8])756 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]) /* {{{ */
757 {
758 int compressed_pos = -1;
759 int blocks = 0;
760 int num, n, i;
761 char *ipv4;
762 char *end;
763 int ip4elm[4];
764 char *s = str;
765
766 if (!memchr(str, ':', str_len)) {
767 return 0;
768 }
769
770 /* check for bundled IPv4 */
771 ipv4 = memchr(str, '.', str_len);
772 if (ipv4) {
773 while (ipv4 > str && *(ipv4-1) != ':') {
774 ipv4--;
775 }
776
777 if (!_php_filter_validate_ipv4(ipv4, (str_len - (ipv4 - str)), ip4elm)) {
778 return 0;
779 }
780
781 str_len = ipv4 - str; /* length excluding ipv4 */
782 if (str_len < 2) {
783 return 0;
784 }
785
786 if (ipv4[-2] != ':') {
787 /* don't include : before ipv4 unless it's a :: */
788 str_len--;
789 }
790
791 blocks = 2;
792 }
793
794 end = str + str_len;
795
796 while (str < end) {
797 if (*str == ':') {
798 if (++str >= end) {
799 /* cannot end in : without previous : */
800 return 0;
801 }
802 if (*str == ':') {
803 if (compressed_pos >= 0) {
804 return 0;
805 }
806 if (ip && blocks < 8) {
807 ip[blocks] = -1;
808 }
809 compressed_pos = blocks++; /* :: means 1 or more 16-bit 0 blocks */
810 if (++str == end) {
811 if (blocks > 8) {
812 return 0;
813 }
814 goto fixup_ip;
815 }
816 } else if ((str - 1) == s) {
817 /* don't allow leading : without another : following */
818 return 0;
819 }
820 }
821 num = n = 0;
822 while (str < end) {
823 if (*str >= '0' && *str <= '9') {
824 num = 16 * num + (*str - '0');
825 } else if (*str >= 'a' && *str <= 'f') {
826 num = 16 * num + (*str - 'a') + 10;
827 } else if (*str >= 'A' && *str <= 'F') {
828 num = 16 * num + (*str - 'A') + 10;
829 } else {
830 break;
831 }
832 n++;
833 str++;
834 }
835 if (ip && blocks < 8) {
836 ip[blocks] = num;
837 }
838 if (n < 1 || n > 4) {
839 return 0;
840 }
841 if (++blocks > 8)
842 return 0;
843 }
844
845 fixup_ip:
846 if (ip && ipv4) {
847 for (i = 0; i < 5; i++) {
848 ip[i] = 0;
849 }
850 ip[i++] = 0xffff;
851 ip[i++] = 256 * ip4elm[0] + ip4elm[1];
852 ip[i++] = 256 * ip4elm[2] + ip4elm[3];
853 } else if (ip && compressed_pos >= 0 && blocks <= 8) {
854 int offset = 8 - blocks;
855 for (i = 7; i > compressed_pos + offset; i--) {
856 ip[i] = ip[i - offset];
857 }
858 for (i = compressed_pos + offset; i >= compressed_pos; i--) {
859 ip[i] = 0;
860 }
861 }
862
863 return (compressed_pos >= 0 && blocks <= 8) || blocks == 8;
864 }
865 /* }}} */
866
php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL)867 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
868 {
869 /* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
870 * flag to throw out reserved ranges; multicast ranges... etc. If both
871 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
872 * colon determine the format */
873
874 int ip[8];
875 int mode;
876
877 if (memchr(Z_STRVAL_P(value), ':', Z_STRLEN_P(value))) {
878 mode = FORMAT_IPV6;
879 } else if (memchr(Z_STRVAL_P(value), '.', Z_STRLEN_P(value))) {
880 mode = FORMAT_IPV4;
881 } else {
882 RETURN_VALIDATION_FAILED
883 }
884
885 if ((flags & FILTER_FLAG_IPV4) && (flags & FILTER_FLAG_IPV6)) {
886 /* Both formats are cool */
887 } else if ((flags & FILTER_FLAG_IPV4) && mode == FORMAT_IPV6) {
888 RETURN_VALIDATION_FAILED
889 } else if ((flags & FILTER_FLAG_IPV6) && mode == FORMAT_IPV4) {
890 RETURN_VALIDATION_FAILED
891 }
892
893 switch (mode) {
894 case FORMAT_IPV4:
895 if (!_php_filter_validate_ipv4(Z_STRVAL_P(value), Z_STRLEN_P(value), ip)) {
896 RETURN_VALIDATION_FAILED
897 }
898
899 /* Check flags */
900 if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
901 if (
902 (ip[0] == 10) ||
903 (ip[0] == 172 && ip[1] >= 16 && ip[1] <= 31) ||
904 (ip[0] == 192 && ip[1] == 168)
905 ) {
906 RETURN_VALIDATION_FAILED
907 }
908 }
909
910 if (flags & FILTER_FLAG_NO_RES_RANGE) {
911 if (
912 (ip[0] == 0) ||
913 (ip[0] >= 240) ||
914 (ip[0] == 127) ||
915 (ip[0] == 169 && ip[1] == 254)
916 ) {
917 RETURN_VALIDATION_FAILED
918 }
919 }
920 break;
921
922 case FORMAT_IPV6:
923 {
924 int res = 0;
925 res = _php_filter_validate_ipv6(Z_STRVAL_P(value), Z_STRLEN_P(value), ip);
926 if (res < 1) {
927 RETURN_VALIDATION_FAILED
928 }
929 /* Check flags */
930 if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
931 if (ip[0] >= 0xfc00 && ip[0] <= 0xfdff) {
932 RETURN_VALIDATION_FAILED
933 }
934 }
935 if (flags & FILTER_FLAG_NO_RES_RANGE) {
936 if ((ip[0] == 0 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0
937 && ip[4] == 0 && ip[5] == 0 && ip[6] == 0 && (ip[7] == 0 || ip[7] == 1))
938 || (ip[0] == 0x5f)
939 || (ip[0] >= 0xfe80 && ip[0] <= 0xfebf)
940 || ((ip[0] == 0x2001 && ip[1] == 0x0db8) || (ip[1] >= 0x0010 && ip[1] <= 0x001f))
941 || (ip[0] == 0x3ff3)
942 ) {
943 RETURN_VALIDATION_FAILED
944 }
945 }
946 }
947 break;
948 }
949 }
950 /* }}} */
951
php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL)952 void php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
953 {
954 char *input = Z_STRVAL_P(value);
955 size_t input_len = Z_STRLEN_P(value);
956 int tokens, length, i, offset, exp_separator_set;
957 size_t exp_separator_len;
958 char separator;
959 char *exp_separator;
960 zend_long ret = 0;
961 zval *option_val;
962
963 FETCH_STRING_OPTION(exp_separator, "separator");
964
965 if (exp_separator_set && exp_separator_len != 1) {
966 zend_value_error("%s(): \"separator\" option must be one character long", get_active_function_name());
967 RETURN_VALIDATION_FAILED;
968 }
969
970 if (14 == input_len) {
971 /* EUI-64 format: Four hexadecimal digits separated by dots. Less
972 * commonly used but valid nonetheless.
973 */
974 tokens = 3;
975 length = 4;
976 separator = '.';
977 } else if (17 == input_len && input[2] == '-') {
978 /* IEEE 802 format: Six hexadecimal digits separated by hyphens. */
979 tokens = 6;
980 length = 2;
981 separator = '-';
982 } else if (17 == input_len && input[2] == ':') {
983 /* IEEE 802 format: Six hexadecimal digits separated by colons. */
984 tokens = 6;
985 length = 2;
986 separator = ':';
987 } else {
988 RETURN_VALIDATION_FAILED;
989 }
990
991 if (exp_separator_set && separator != exp_separator[0]) {
992 RETURN_VALIDATION_FAILED;
993 }
994
995 /* Essentially what we now have is a set of tokens each consisting of
996 * a hexadecimal number followed by a separator character. (With the
997 * exception of the last token which does not have the separator.)
998 */
999 for (i = 0; i < tokens; i++) {
1000 offset = i * (length + 1);
1001
1002 if (i < tokens - 1 && input[offset + length] != separator) {
1003 /* The current token did not end with e.g. a "." */
1004 RETURN_VALIDATION_FAILED
1005 }
1006 if (php_filter_parse_hex(input + offset, length, &ret) < 0) {
1007 /* The current token is no valid hexadecimal digit */
1008 RETURN_VALIDATION_FAILED
1009 }
1010 }
1011 }
1012 /* }}} */
1013