1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Clayton Collie <clcollie@mindspring.com> |
14 +----------------------------------------------------------------------+
15 */
16
17 /*
18 scanf.c --
19
20 This file contains the base code which implements sscanf and by extension
21 fscanf. Original code is from TCL8.3.0 and bears the following copyright:
22
23 This software is copyrighted by the Regents of the University of
24 California, Sun Microsystems, Inc., Scriptics Corporation,
25 and other parties. The following terms apply to all files associated
26 with the software unless explicitly disclaimed in individual files.
27
28 The authors hereby grant permission to use, copy, modify, distribute,
29 and license this software and its documentation for any purpose, provided
30 that existing copyright notices are retained in all copies and that this
31 notice is included verbatim in any distributions. No written agreement,
32 license, or royalty fee is required for any of the authorized uses.
33 Modifications to this software may be copyrighted by their authors
34 and need not follow the licensing terms described here, provided that
35 the new terms are clearly indicated on the first page of each file where
36 they apply.
37
38 IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
39 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
40 ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
41 DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
42 POSSIBILITY OF SUCH DAMAGE.
43
44 THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
45 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
46 FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
47 IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
48 NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
49 MODIFICATIONS.
50
51 GOVERNMENT USE: If you are acquiring this software on behalf of the
52 U.S. government, the Government shall have only "Restricted Rights"
53 in the software and related documentation as defined in the Federal
54 Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
55 are acquiring the software on behalf of the Department of Defense, the
56 software shall be classified as "Commercial Computer Software" and the
57 Government shall have only "Restricted Rights" as defined in Clause
58 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
59 authors grant the U.S. Government and others acting in its behalf
60 permission to use and distribute the software in accordance with the
61 terms specified in this license.
62 */
63
64 #include <stdio.h>
65 #include <limits.h>
66 #include <ctype.h>
67 #include "php.h"
68 #include "php_variables.h"
69 #include <locale.h>
70 #include "zend_execute.h"
71 #include "zend_operators.h"
72 #include "zend_strtod.h"
73 #include "php_globals.h"
74 #include "basic_functions.h"
75 #include "scanf.h"
76
77 /*
78 * Flag values used internally by [f|s]canf.
79 */
80 #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
81 #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
82 #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
83 #define SCAN_WIDTH 0x8 /* A width value was supplied. */
84
85 #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
86 #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
87 #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
88 #define SCAN_XOK 0x80 /* An 'x' is allowed. */
89 #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
90 #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
91
92 #define UCHAR(x) (zend_uchar)(x)
93
94 /*
95 * The following structure contains the information associated with
96 * a character set.
97 */
98 typedef struct CharSet {
99 int exclude; /* 1 if this is an exclusion set. */
100 int nchars;
101 char *chars;
102 int nranges;
103 struct Range {
104 char start;
105 char end;
106 } *ranges;
107 } CharSet;
108
109 typedef zend_long (*int_string_formater)(const char*, char**, int);
110
111 /*
112 * Declarations for functions used only in this file.
113 */
114 static char *BuildCharSet(CharSet *cset, char *format);
115 static int CharInSet(CharSet *cset, int ch);
116 static void ReleaseCharSet(CharSet *cset);
117 static inline void scan_set_error_return(int numVars, zval *return_value);
118
119
120 /* {{{ BuildCharSet
121 *----------------------------------------------------------------------
122 *
123 * BuildCharSet --
124 *
125 * This function examines a character set format specification
126 * and builds a CharSet containing the individual characters and
127 * character ranges specified.
128 *
129 * Results:
130 * Returns the next format position.
131 *
132 * Side effects:
133 * Initializes the charset.
134 *
135 *----------------------------------------------------------------------
136 */
BuildCharSet(CharSet * cset,char * format)137 static char * BuildCharSet(CharSet *cset, char *format)
138 {
139 char *ch, start;
140 int nranges;
141 char *end;
142
143 memset(cset, 0, sizeof(CharSet));
144
145 ch = format;
146 if (*ch == '^') {
147 cset->exclude = 1;
148 ch = ++format;
149 }
150 end = format + 1; /* verify this - cc */
151
152 /*
153 * Find the close bracket so we can overallocate the set.
154 */
155 if (*ch == ']') {
156 ch = end++;
157 }
158 nranges = 0;
159 while (*ch != ']') {
160 if (*ch == '-') {
161 nranges++;
162 }
163 ch = end++;
164 }
165
166 cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
167 if (nranges > 0) {
168 cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
169 } else {
170 cset->ranges = NULL;
171 }
172
173 /*
174 * Now build the character set.
175 */
176 cset->nchars = cset->nranges = 0;
177 ch = format++;
178 start = *ch;
179 if (*ch == ']' || *ch == '-') {
180 cset->chars[cset->nchars++] = *ch;
181 ch = format++;
182 }
183 while (*ch != ']') {
184 if (*format == '-') {
185 /*
186 * This may be the first character of a range, so don't add
187 * it yet.
188 */
189 start = *ch;
190 } else if (*ch == '-') {
191 /*
192 * Check to see if this is the last character in the set, in which
193 * case it is not a range and we should add the previous character
194 * as well as the dash.
195 */
196 if (*format == ']') {
197 cset->chars[cset->nchars++] = start;
198 cset->chars[cset->nchars++] = *ch;
199 } else {
200 ch = format++;
201
202 /*
203 * Check to see if the range is in reverse order.
204 */
205 if (start < *ch) {
206 cset->ranges[cset->nranges].start = start;
207 cset->ranges[cset->nranges].end = *ch;
208 } else {
209 cset->ranges[cset->nranges].start = *ch;
210 cset->ranges[cset->nranges].end = start;
211 }
212 cset->nranges++;
213 }
214 } else {
215 cset->chars[cset->nchars++] = *ch;
216 }
217 ch = format++;
218 }
219 return format;
220 }
221 /* }}} */
222
223 /* {{{ CharInSet
224 *----------------------------------------------------------------------
225 *
226 * CharInSet --
227 *
228 * Check to see if a character matches the given set.
229 *
230 * Results:
231 * Returns non-zero if the character matches the given set.
232 *
233 * Side effects:
234 * None.
235 *
236 *----------------------------------------------------------------------
237 */
CharInSet(CharSet * cset,int c)238 static int CharInSet(CharSet *cset, int c)
239 {
240 char ch = (char) c;
241 int i, match = 0;
242
243 for (i = 0; i < cset->nchars; i++) {
244 if (cset->chars[i] == ch) {
245 match = 1;
246 break;
247 }
248 }
249 if (!match) {
250 for (i = 0; i < cset->nranges; i++) {
251 if ((cset->ranges[i].start <= ch)
252 && (ch <= cset->ranges[i].end)) {
253 match = 1;
254 break;
255 }
256 }
257 }
258 return (cset->exclude ? !match : match);
259 }
260 /* }}} */
261
262 /* {{{ ReleaseCharSet
263 *----------------------------------------------------------------------
264 *
265 * ReleaseCharSet --
266 *
267 * Free the storage associated with a character set.
268 *
269 * Results:
270 * None.
271 *
272 * Side effects:
273 * None.
274 *
275 *----------------------------------------------------------------------
276 */
ReleaseCharSet(CharSet * cset)277 static void ReleaseCharSet(CharSet *cset)
278 {
279 efree((char *)cset->chars);
280 if (cset->ranges) {
281 efree((char *)cset->ranges);
282 }
283 }
284 /* }}} */
285
286 /* {{{ ValidateFormat
287 *----------------------------------------------------------------------
288 *
289 * ValidateFormat --
290 *
291 * Parse the format string and verify that it is properly formed
292 * and that there are exactly enough variables on the command line.
293 *
294 * Results:
295 * FAILURE or SUCCESS.
296 *
297 * Side effects:
298 * May set php_error based on abnormal conditions.
299 *
300 * Parameters :
301 * format The format string.
302 * numVars The number of variables passed to the scan command.
303 * totalSubs The number of variables that will be required.
304 *
305 *----------------------------------------------------------------------
306 */
ValidateFormat(char * format,int numVars,int * totalSubs)307 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
308 {
309 #define STATIC_LIST_SIZE 16
310 int gotXpg, gotSequential, value, i, flags;
311 char *end, *ch = NULL;
312 int staticAssign[STATIC_LIST_SIZE];
313 int *nassign = staticAssign;
314 int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
315
316 /*
317 * Initialize an array that records the number of times a variable
318 * is assigned to by the format string. We use this to detect if
319 * a variable is multiply assigned or left unassigned.
320 */
321 if (numVars > nspace) {
322 nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
323 nspace = numVars;
324 }
325 for (i = 0; i < nspace; i++) {
326 nassign[i] = 0;
327 }
328
329 xpgSize = objIndex = gotXpg = gotSequential = 0;
330
331 while (*format != '\0') {
332 ch = format++;
333 flags = 0;
334
335 if (*ch != '%') {
336 continue;
337 }
338 ch = format++;
339 if (*ch == '%') {
340 continue;
341 }
342 if (*ch == '*') {
343 flags |= SCAN_SUPPRESS;
344 ch = format++;
345 goto xpgCheckDone;
346 }
347
348 if ( isdigit( (int)*ch ) ) {
349 /*
350 * Check for an XPG3-style %n$ specification. Note: there
351 * must not be a mixture of XPG3 specs and non-XPG3 specs
352 * in the same format string.
353 */
354 value = ZEND_STRTOUL(format-1, &end, 10);
355 if (*end != '$') {
356 goto notXpg;
357 }
358 format = end+1;
359 ch = format++;
360 gotXpg = 1;
361 if (gotSequential) {
362 goto mixedXPG;
363 }
364 objIndex = value - 1;
365 if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
366 goto badIndex;
367 } else if (numVars == 0) {
368 /*
369 * In the case where no vars are specified, the user can
370 * specify %9999$ legally, so we have to consider special
371 * rules for growing the assign array. 'value' is
372 * guaranteed to be > 0.
373 */
374
375 /* set a lower artificial limit on this
376 * in the interest of security and resource friendliness
377 * 255 arguments should be more than enough. - cc
378 */
379 if (value > SCAN_MAX_ARGS) {
380 goto badIndex;
381 }
382
383 xpgSize = (xpgSize > value) ? xpgSize : value;
384 }
385 goto xpgCheckDone;
386 }
387
388 notXpg:
389 gotSequential = 1;
390 if (gotXpg) {
391 mixedXPG:
392 zend_value_error("%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
393 goto error;
394 }
395
396 xpgCheckDone:
397 /*
398 * Parse any width specifier.
399 */
400 if (isdigit(UCHAR(*ch))) {
401 value = ZEND_STRTOUL(format-1, &format, 10);
402 flags |= SCAN_WIDTH;
403 ch = format++;
404 }
405
406 /*
407 * Ignore size specifier.
408 */
409 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
410 ch = format++;
411 }
412
413 if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
414 goto badIndex;
415 }
416
417 /*
418 * Handle the various field types.
419 */
420 switch (*ch) {
421 case 'n':
422 case 'd':
423 case 'D':
424 case 'i':
425 case 'o':
426 case 'x':
427 case 'X':
428 case 'u':
429 case 'f':
430 case 'e':
431 case 'E':
432 case 'g':
433 case 's':
434 break;
435
436 case 'c':
437 /* we differ here with the TCL implementation in allowing for */
438 /* a character width specification, to be more consistent with */
439 /* ANSI. since Zend auto allocates space for vars, this is no */
440 /* problem - cc */
441 /*
442 if (flags & SCAN_WIDTH) {
443 php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
444 goto error;
445 }
446 */
447 break;
448
449 case '[':
450 if (*format == '\0') {
451 goto badSet;
452 }
453 ch = format++;
454 if (*ch == '^') {
455 if (*format == '\0') {
456 goto badSet;
457 }
458 ch = format++;
459 }
460 if (*ch == ']') {
461 if (*format == '\0') {
462 goto badSet;
463 }
464 ch = format++;
465 }
466 while (*ch != ']') {
467 if (*format == '\0') {
468 goto badSet;
469 }
470 ch = format++;
471 }
472 break;
473 badSet:
474 zend_value_error("Unmatched [ in format string");
475 goto error;
476
477 default: {
478 zend_value_error("Bad scan conversion character \"%c\"", *ch);
479 goto error;
480 }
481 }
482
483 if (!(flags & SCAN_SUPPRESS)) {
484 if (objIndex >= nspace) {
485 /*
486 * Expand the nassign buffer. If we are using XPG specifiers,
487 * make sure that we grow to a large enough size. xpgSize is
488 * guaranteed to be at least one larger than objIndex.
489 */
490 value = nspace;
491 if (xpgSize) {
492 nspace = xpgSize;
493 } else {
494 nspace += STATIC_LIST_SIZE;
495 }
496 if (nassign == staticAssign) {
497 nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
498 for (i = 0; i < STATIC_LIST_SIZE; ++i) {
499 nassign[i] = staticAssign[i];
500 }
501 } else {
502 nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
503 }
504 for (i = value; i < nspace; i++) {
505 nassign[i] = 0;
506 }
507 }
508 nassign[objIndex]++;
509 objIndex++;
510 }
511 } /* while (*format != '\0') */
512
513 /*
514 * Verify that all of the variable were assigned exactly once.
515 */
516 if (numVars == 0) {
517 if (xpgSize) {
518 numVars = xpgSize;
519 } else {
520 numVars = objIndex;
521 }
522 }
523 if (totalSubs) {
524 *totalSubs = numVars;
525 }
526 for (i = 0; i < numVars; i++) {
527 if (nassign[i] > 1) {
528 zend_value_error("%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
529 goto error;
530 } else if (!xpgSize && (nassign[i] == 0)) {
531 /*
532 * If the space is empty, and xpgSize is 0 (means XPG wasn't
533 * used, and/or numVars != 0), then too many vars were given
534 */
535 zend_value_error("Variable is not assigned by any conversion specifiers");
536 goto error;
537 }
538 }
539
540 if (nassign != staticAssign) {
541 efree((char *)nassign);
542 }
543 return SCAN_SUCCESS;
544
545 badIndex:
546 if (gotXpg) {
547 zend_value_error("%s", "\"%n$\" argument index out of range");
548 } else {
549 zend_value_error("Different numbers of variable names and field specifiers");
550 }
551
552 error:
553 if (nassign != staticAssign) {
554 efree((char *)nassign);
555 }
556 return SCAN_ERROR_INVALID_FORMAT;
557 #undef STATIC_LIST_SIZE
558 }
559 /* }}} */
560
561 /* {{{ php_sscanf_internal
562 * This is the internal function which does processing on behalf of
563 * both sscanf() and fscanf()
564 *
565 * parameters :
566 * string literal string to be processed
567 * format format string
568 * argCount total number of elements in the args array
569 * args arguments passed in from user function (f|s)scanf
570 * varStart offset (in args) of 1st variable passed in to (f|s)scanf
571 * return_value set with the results of the scan
572 */
573
php_sscanf_internal(char * string,char * format,int argCount,zval * args,int varStart,zval * return_value)574 PHPAPI int php_sscanf_internal( char *string, char *format,
575 int argCount, zval *args,
576 int varStart, zval *return_value)
577 {
578 int numVars, nconversions, totalVars = -1;
579 int i, result;
580 zend_long value;
581 int objIndex;
582 char *end, *baseString;
583 zval *current;
584 char op = 0;
585 int base = 0;
586 int underflow = 0;
587 size_t width;
588 int_string_formater fn = NULL;
589 char *ch, sch;
590 int flags;
591 char buf[64]; /* Temporary buffer to hold scanned number
592 * strings before they are passed to strtoul() */
593
594 /* do some sanity checking */
595 if ((varStart > argCount) || (varStart < 0)){
596 varStart = SCAN_MAX_ARGS + 1;
597 }
598 numVars = argCount - varStart;
599 if (numVars < 0) {
600 numVars = 0;
601 }
602
603 /*
604 * Check for errors in the format string.
605 */
606 if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
607 scan_set_error_return( numVars, return_value );
608 return SCAN_ERROR_INVALID_FORMAT;
609 }
610
611 objIndex = numVars ? varStart : 0;
612
613 /*
614 * If any variables are passed, make sure they are all passed by reference
615 */
616 if (numVars) {
617 for (i = varStart;i < argCount;i++){
618 ZEND_ASSERT(Z_ISREF(args[i]) && "Parameter must be passed by reference");
619 }
620 }
621
622 /*
623 * Allocate space for the result objects. Only happens when no variables
624 * are specified
625 */
626 if (!numVars) {
627 zval tmp;
628
629 /* allocate an array for return */
630 array_init(return_value);
631
632 for (i = 0; i < totalVars; i++) {
633 ZVAL_NULL(&tmp);
634 if (add_next_index_zval(return_value, &tmp) == FAILURE) {
635 scan_set_error_return(0, return_value);
636 return FAILURE;
637 }
638 }
639 varStart = 0; /* Array index starts from 0 */
640 }
641
642 baseString = string;
643
644 /*
645 * Iterate over the format string filling in the result objects until
646 * we reach the end of input, the end of the format string, or there
647 * is a mismatch.
648 */
649 nconversions = 0;
650 /* note ! - we need to limit the loop for objIndex to keep it in bounds */
651
652 while (*format != '\0') {
653 ch = format++;
654 flags = 0;
655
656 /*
657 * If we see whitespace in the format, skip whitespace in the string.
658 */
659 if ( isspace( (int)*ch ) ) {
660 sch = *string;
661 while ( isspace( (int)sch ) ) {
662 if (*string == '\0') {
663 goto done;
664 }
665 string++;
666 sch = *string;
667 }
668 continue;
669 }
670
671 if (*ch != '%') {
672 literal:
673 if (*string == '\0') {
674 underflow = 1;
675 goto done;
676 }
677 sch = *string;
678 string++;
679 if (*ch != sch) {
680 goto done;
681 }
682 continue;
683 }
684
685 ch = format++;
686 if (*ch == '%') {
687 goto literal;
688 }
689
690 /*
691 * Check for assignment suppression ('*') or an XPG3-style
692 * assignment ('%n$').
693 */
694 if (*ch == '*') {
695 flags |= SCAN_SUPPRESS;
696 ch = format++;
697 } else if ( isdigit(UCHAR(*ch))) {
698 value = ZEND_STRTOUL(format-1, &end, 10);
699 if (*end == '$') {
700 format = end+1;
701 ch = format++;
702 objIndex = varStart + value - 1;
703 }
704 }
705
706 /*
707 * Parse any width specifier.
708 */
709 if ( isdigit(UCHAR(*ch))) {
710 width = ZEND_STRTOUL(format-1, &format, 10);
711 ch = format++;
712 } else {
713 width = 0;
714 }
715
716 /*
717 * Ignore size specifier.
718 */
719 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
720 ch = format++;
721 }
722
723 /*
724 * Handle the various field types.
725 */
726 switch (*ch) {
727 case 'n':
728 if (!(flags & SCAN_SUPPRESS)) {
729 if (numVars && objIndex >= argCount) {
730 break;
731 } else if (numVars) {
732 current = args + objIndex++;
733 ZEND_TRY_ASSIGN_REF_LONG(current, (zend_long) (string - baseString));
734 } else {
735 add_index_long(return_value, objIndex++, string - baseString);
736 }
737 }
738 nconversions++;
739 continue;
740
741 case 'd':
742 case 'D':
743 op = 'i';
744 base = 10;
745 fn = (int_string_formater)ZEND_STRTOL_PTR;
746 break;
747 case 'i':
748 op = 'i';
749 base = 0;
750 fn = (int_string_formater)ZEND_STRTOL_PTR;
751 break;
752 case 'o':
753 op = 'i';
754 base = 8;
755 fn = (int_string_formater)ZEND_STRTOL_PTR;
756 break;
757 case 'x':
758 case 'X':
759 op = 'i';
760 base = 16;
761 fn = (int_string_formater)ZEND_STRTOL_PTR;
762 break;
763 case 'u':
764 op = 'i';
765 base = 10;
766 flags |= SCAN_UNSIGNED;
767 fn = (int_string_formater)ZEND_STRTOUL_PTR;
768 break;
769
770 case 'f':
771 case 'e':
772 case 'E':
773 case 'g':
774 op = 'f';
775 break;
776
777 case 's':
778 op = 's';
779 break;
780
781 case 'c':
782 op = 's';
783 flags |= SCAN_NOSKIP;
784 /*-cc-*/
785 if (0 == width) {
786 width = 1;
787 }
788 /*-cc-*/
789 break;
790 case '[':
791 op = '[';
792 flags |= SCAN_NOSKIP;
793 break;
794 } /* switch */
795
796 /*
797 * At this point, we will need additional characters from the
798 * string to proceed.
799 */
800 if (*string == '\0') {
801 underflow = 1;
802 goto done;
803 }
804
805 /*
806 * Skip any leading whitespace at the beginning of a field unless
807 * the format suppresses this behavior.
808 */
809 if (!(flags & SCAN_NOSKIP)) {
810 while (*string != '\0') {
811 sch = *string;
812 if (! isspace((int)sch) ) {
813 break;
814 }
815 string++;
816 }
817 if (*string == '\0') {
818 underflow = 1;
819 goto done;
820 }
821 }
822
823 /*
824 * Perform the requested scanning operation.
825 */
826 switch (op) {
827 case 'c':
828 case 's':
829 /*
830 * Scan a string up to width characters or whitespace.
831 */
832 if (width == 0) {
833 width = (size_t) ~0;
834 }
835 end = string;
836 while (*end != '\0') {
837 sch = *end;
838 if ( isspace( (int)sch ) ) {
839 break;
840 }
841 end++;
842 if (--width == 0) {
843 break;
844 }
845 }
846 if (!(flags & SCAN_SUPPRESS)) {
847 if (numVars && objIndex >= argCount) {
848 break;
849 } else if (numVars) {
850 current = args + objIndex++;
851 ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
852 } else {
853 add_index_stringl(return_value, objIndex++, string, end-string);
854 }
855 }
856 string = end;
857 break;
858
859 case '[': {
860 CharSet cset;
861
862 if (width == 0) {
863 width = (size_t) ~0;
864 }
865 end = string;
866
867 format = BuildCharSet(&cset, format);
868 while (*end != '\0') {
869 sch = *end;
870 if (!CharInSet(&cset, (int)sch)) {
871 break;
872 }
873 end++;
874 if (--width == 0) {
875 break;
876 }
877 }
878 ReleaseCharSet(&cset);
879
880 if (string == end) {
881 /*
882 * Nothing matched the range, stop processing
883 */
884 goto done;
885 }
886 if (!(flags & SCAN_SUPPRESS)) {
887 if (numVars && objIndex >= argCount) {
888 break;
889 } else if (numVars) {
890 current = args + objIndex++;
891 ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
892 } else {
893 add_index_stringl(return_value, objIndex++, string, end-string);
894 }
895 }
896 string = end;
897 break;
898 }
899 /*
900 case 'c':
901 / Scan a single character./
902
903 sch = *string;
904 string++;
905 if (!(flags & SCAN_SUPPRESS)) {
906 if (numVars) {
907 char __buf[2];
908 __buf[0] = sch;
909 __buf[1] = '\0';
910 current = args[objIndex++];
911 zval_ptr_dtor_nogc(*current);
912 ZVAL_STRINGL( *current, __buf, 1);
913 } else {
914 add_index_stringl(return_value, objIndex++, &sch, 1);
915 }
916 }
917 break;
918 */
919 case 'i':
920 /*
921 * Scan an unsigned or signed integer.
922 */
923 /*-cc-*/
924 buf[0] = '\0';
925 /*-cc-*/
926 if ((width == 0) || (width > sizeof(buf) - 1)) {
927 width = sizeof(buf) - 1;
928 }
929
930 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
931 for (end = buf; width > 0; width--) {
932 switch (*string) {
933 /*
934 * The 0 digit has special meaning at the beginning of
935 * a number. If we are unsure of the base, it
936 * indicates that we are in base 8 or base 16 (if it is
937 * followed by an 'x').
938 */
939 case '0':
940 /*-cc-*/
941 if (base == 16) {
942 flags |= SCAN_XOK;
943 }
944 /*-cc-*/
945 if (base == 0) {
946 base = 8;
947 flags |= SCAN_XOK;
948 }
949 if (flags & SCAN_NOZERO) {
950 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
951 } else {
952 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
953 }
954 goto addToInt;
955
956 case '1': case '2': case '3': case '4':
957 case '5': case '6': case '7':
958 if (base == 0) {
959 base = 10;
960 }
961 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
962 goto addToInt;
963
964 case '8': case '9':
965 if (base == 0) {
966 base = 10;
967 }
968 if (base <= 8) {
969 break;
970 }
971 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
972 goto addToInt;
973
974 case 'A': case 'B': case 'C':
975 case 'D': case 'E': case 'F':
976 case 'a': case 'b': case 'c':
977 case 'd': case 'e': case 'f':
978 if (base <= 10) {
979 break;
980 }
981 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
982 goto addToInt;
983
984 case '+': case '-':
985 if (flags & SCAN_SIGNOK) {
986 flags &= ~SCAN_SIGNOK;
987 goto addToInt;
988 }
989 break;
990
991 case 'x': case 'X':
992 if ((flags & SCAN_XOK) && (end == buf+1)) {
993 base = 16;
994 flags &= ~SCAN_XOK;
995 goto addToInt;
996 }
997 break;
998 }
999
1000 /*
1001 * We got an illegal character so we are done accumulating.
1002 */
1003 break;
1004
1005 addToInt:
1006 /*
1007 * Add the character to the temporary buffer.
1008 */
1009 *end++ = *string++;
1010 if (*string == '\0') {
1011 break;
1012 }
1013 }
1014
1015 /*
1016 * Check to see if we need to back up because we only got a
1017 * sign or a trailing x after a 0.
1018 */
1019 if (flags & SCAN_NODIGITS) {
1020 if (*string == '\0') {
1021 underflow = 1;
1022 }
1023 goto done;
1024 } else if (end[-1] == 'x' || end[-1] == 'X') {
1025 end--;
1026 string--;
1027 }
1028
1029 /*
1030 * Scan the value from the temporary buffer. If we are
1031 * returning a large unsigned value, we have to convert it back
1032 * to a string since PHP only supports signed values.
1033 */
1034 if (!(flags & SCAN_SUPPRESS)) {
1035 *end = '\0';
1036 value = (zend_long) (*fn)(buf, NULL, base);
1037 if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1038 snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1039 if (numVars && objIndex >= argCount) {
1040 break;
1041 } else if (numVars) {
1042 /* change passed value type to string */
1043 current = args + objIndex++;
1044 ZEND_TRY_ASSIGN_REF_STRING(current, buf);
1045 } else {
1046 add_index_string(return_value, objIndex++, buf);
1047 }
1048 } else {
1049 if (numVars && objIndex >= argCount) {
1050 break;
1051 } else if (numVars) {
1052 current = args + objIndex++;
1053 ZEND_TRY_ASSIGN_REF_LONG(current, value);
1054 } else {
1055 add_index_long(return_value, objIndex++, value);
1056 }
1057 }
1058 }
1059 break;
1060
1061 case 'f':
1062 /*
1063 * Scan a floating point number
1064 */
1065 buf[0] = '\0'; /* call me pedantic */
1066 if ((width == 0) || (width > sizeof(buf) - 1)) {
1067 width = sizeof(buf) - 1;
1068 }
1069 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1070 for (end = buf; width > 0; width--) {
1071 switch (*string) {
1072 case '0': case '1': case '2': case '3':
1073 case '4': case '5': case '6': case '7':
1074 case '8': case '9':
1075 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1076 goto addToFloat;
1077 case '+':
1078 case '-':
1079 if (flags & SCAN_SIGNOK) {
1080 flags &= ~SCAN_SIGNOK;
1081 goto addToFloat;
1082 }
1083 break;
1084 case '.':
1085 if (flags & SCAN_PTOK) {
1086 flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1087 goto addToFloat;
1088 }
1089 break;
1090 case 'e':
1091 case 'E':
1092 /*
1093 * An exponent is not allowed until there has
1094 * been at least one digit.
1095 */
1096 if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1097 flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1098 | SCAN_SIGNOK | SCAN_NODIGITS;
1099 goto addToFloat;
1100 }
1101 break;
1102 }
1103
1104 /*
1105 * We got an illegal character so we are done accumulating.
1106 */
1107 break;
1108
1109 addToFloat:
1110 /*
1111 * Add the character to the temporary buffer.
1112 */
1113 *end++ = *string++;
1114 if (*string == '\0') {
1115 break;
1116 }
1117 }
1118
1119 /*
1120 * Check to see if we need to back up because we saw a
1121 * trailing 'e' or sign.
1122 */
1123 if (flags & SCAN_NODIGITS) {
1124 if (flags & SCAN_EXPOK) {
1125 /*
1126 * There were no digits at all so scanning has
1127 * failed and we are done.
1128 */
1129 if (*string == '\0') {
1130 underflow = 1;
1131 }
1132 goto done;
1133 }
1134
1135 /*
1136 * We got a bad exponent ('e' and maybe a sign).
1137 */
1138 end--;
1139 string--;
1140 if (*end != 'e' && *end != 'E') {
1141 end--;
1142 string--;
1143 }
1144 }
1145
1146 /*
1147 * Scan the value from the temporary buffer.
1148 */
1149 if (!(flags & SCAN_SUPPRESS)) {
1150 double dvalue;
1151 *end = '\0';
1152 dvalue = zend_strtod(buf, NULL);
1153 if (numVars && objIndex >= argCount) {
1154 break;
1155 } else if (numVars) {
1156 current = args + objIndex++;
1157 ZEND_TRY_ASSIGN_REF_DOUBLE(current, dvalue);
1158 } else {
1159 add_index_double(return_value, objIndex++, dvalue );
1160 }
1161 }
1162 break;
1163 } /* switch (op) */
1164 nconversions++;
1165 } /* while (*format != '\0') */
1166
1167 done:
1168 result = SCAN_SUCCESS;
1169
1170 if (underflow && (0==nconversions)) {
1171 scan_set_error_return( numVars, return_value );
1172 result = SCAN_ERROR_EOF;
1173 } else if (numVars) {
1174 zval_ptr_dtor(return_value );
1175 ZVAL_LONG(return_value, nconversions);
1176 } else if (nconversions < totalVars) {
1177 /* TODO: not all elements converted. we need to prune the list - cc */
1178 }
1179 return result;
1180 }
1181 /* }}} */
1182
1183 /* the compiler choked when i tried to make this a macro */
scan_set_error_return(int numVars,zval * return_value)1184 static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1185 {
1186 if (numVars) {
1187 ZVAL_LONG(return_value, SCAN_ERROR_EOF); /* EOF marker */
1188 } else {
1189 /* convert_to_null calls destructor */
1190 convert_to_null(return_value);
1191 }
1192 }
1193 /* }}} */
1194