1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Clayton Collie <clcollie@mindspring.com> |
16 +----------------------------------------------------------------------+
17 */
18
19 /*
20 scanf.c --
21
22 This file contains the base code which implements sscanf and by extension
23 fscanf. Original code is from TCL8.3.0 and bears the following copyright:
24
25 This software is copyrighted by the Regents of the University of
26 California, Sun Microsystems, Inc., Scriptics Corporation,
27 and other parties. The following terms apply to all files associated
28 with the software unless explicitly disclaimed in individual files.
29
30 The authors hereby grant permission to use, copy, modify, distribute,
31 and license this software and its documentation for any purpose, provided
32 that existing copyright notices are retained in all copies and that this
33 notice is included verbatim in any distributions. No written agreement,
34 license, or royalty fee is required for any of the authorized uses.
35 Modifications to this software may be copyrighted by their authors
36 and need not follow the licensing terms described here, provided that
37 the new terms are clearly indicated on the first page of each file where
38 they apply.
39
40 IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
41 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
42 ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
43 DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
44 POSSIBILITY OF SUCH DAMAGE.
45
46 THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
47 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
48 FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
49 IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
50 NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
51 MODIFICATIONS.
52
53 GOVERNMENT USE: If you are acquiring this software on behalf of the
54 U.S. government, the Government shall have only "Restricted Rights"
55 in the software and related documentation as defined in the Federal
56 Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
57 are acquiring the software on behalf of the Department of Defense, the
58 software shall be classified as "Commercial Computer Software" and the
59 Government shall have only "Restricted Rights" as defined in Clause
60 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
61 authors grant the U.S. Government and others acting in its behalf
62 permission to use and distribute the software in accordance with the
63 terms specified in this license.
64 */
65
66 #include <stdio.h>
67 #include <limits.h>
68 #include <ctype.h>
69 #include "php.h"
70 #include "php_variables.h"
71 #include <locale.h>
72 #include "zend_execute.h"
73 #include "zend_operators.h"
74 #include "zend_strtod.h"
75 #include "php_globals.h"
76 #include "basic_functions.h"
77 #include "scanf.h"
78
79 /*
80 * Flag values used internally by [f|s]canf.
81 */
82 #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
83 #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
84 #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
85 #define SCAN_WIDTH 0x8 /* A width value was supplied. */
86
87 #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
88 #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
89 #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
90 #define SCAN_XOK 0x80 /* An 'x' is allowed. */
91 #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
92 #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
93
94 #define UCHAR(x) (zend_uchar)(x)
95
96 /*
97 * The following structure contains the information associated with
98 * a character set.
99 */
100 typedef struct CharSet {
101 int exclude; /* 1 if this is an exclusion set. */
102 int nchars;
103 char *chars;
104 int nranges;
105 struct Range {
106 char start;
107 char end;
108 } *ranges;
109 } CharSet;
110
111 /*
112 * Declarations for functions used only in this file.
113 */
114 static char *BuildCharSet(CharSet *cset, char *format);
115 static int CharInSet(CharSet *cset, int ch);
116 static void ReleaseCharSet(CharSet *cset);
117 static inline void scan_set_error_return(int numVars, zval *return_value);
118
119
120 /* {{{ BuildCharSet
121 *----------------------------------------------------------------------
122 *
123 * BuildCharSet --
124 *
125 * This function examines a character set format specification
126 * and builds a CharSet containing the individual characters and
127 * character ranges specified.
128 *
129 * Results:
130 * Returns the next format position.
131 *
132 * Side effects:
133 * Initializes the charset.
134 *
135 *----------------------------------------------------------------------
136 */
BuildCharSet(CharSet * cset,char * format)137 static char * BuildCharSet(CharSet *cset, char *format)
138 {
139 char *ch, start;
140 int nranges;
141 char *end;
142
143 memset(cset, 0, sizeof(CharSet));
144
145 ch = format;
146 if (*ch == '^') {
147 cset->exclude = 1;
148 ch = ++format;
149 }
150 end = format + 1; /* verify this - cc */
151
152 /*
153 * Find the close bracket so we can overallocate the set.
154 */
155 if (*ch == ']') {
156 ch = end++;
157 }
158 nranges = 0;
159 while (*ch != ']') {
160 if (*ch == '-') {
161 nranges++;
162 }
163 ch = end++;
164 }
165
166 cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
167 if (nranges > 0) {
168 cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
169 } else {
170 cset->ranges = NULL;
171 }
172
173 /*
174 * Now build the character set.
175 */
176 cset->nchars = cset->nranges = 0;
177 ch = format++;
178 start = *ch;
179 if (*ch == ']' || *ch == '-') {
180 cset->chars[cset->nchars++] = *ch;
181 ch = format++;
182 }
183 while (*ch != ']') {
184 if (*format == '-') {
185 /*
186 * This may be the first character of a range, so don't add
187 * it yet.
188 */
189 start = *ch;
190 } else if (*ch == '-') {
191 /*
192 * Check to see if this is the last character in the set, in which
193 * case it is not a range and we should add the previous character
194 * as well as the dash.
195 */
196 if (*format == ']') {
197 cset->chars[cset->nchars++] = start;
198 cset->chars[cset->nchars++] = *ch;
199 } else {
200 ch = format++;
201
202 /*
203 * Check to see if the range is in reverse order.
204 */
205 if (start < *ch) {
206 cset->ranges[cset->nranges].start = start;
207 cset->ranges[cset->nranges].end = *ch;
208 } else {
209 cset->ranges[cset->nranges].start = *ch;
210 cset->ranges[cset->nranges].end = start;
211 }
212 cset->nranges++;
213 }
214 } else {
215 cset->chars[cset->nchars++] = *ch;
216 }
217 ch = format++;
218 }
219 return format;
220 }
221 /* }}} */
222
223 /* {{{ CharInSet
224 *----------------------------------------------------------------------
225 *
226 * CharInSet --
227 *
228 * Check to see if a character matches the given set.
229 *
230 * Results:
231 * Returns non-zero if the character matches the given set.
232 *
233 * Side effects:
234 * None.
235 *
236 *----------------------------------------------------------------------
237 */
CharInSet(CharSet * cset,int c)238 static int CharInSet(CharSet *cset, int c)
239 {
240 char ch = (char) c;
241 int i, match = 0;
242
243 for (i = 0; i < cset->nchars; i++) {
244 if (cset->chars[i] == ch) {
245 match = 1;
246 break;
247 }
248 }
249 if (!match) {
250 for (i = 0; i < cset->nranges; i++) {
251 if ((cset->ranges[i].start <= ch)
252 && (ch <= cset->ranges[i].end)) {
253 match = 1;
254 break;
255 }
256 }
257 }
258 return (cset->exclude ? !match : match);
259 }
260 /* }}} */
261
262 /* {{{ ReleaseCharSet
263 *----------------------------------------------------------------------
264 *
265 * ReleaseCharSet --
266 *
267 * Free the storage associated with a character set.
268 *
269 * Results:
270 * None.
271 *
272 * Side effects:
273 * None.
274 *
275 *----------------------------------------------------------------------
276 */
ReleaseCharSet(CharSet * cset)277 static void ReleaseCharSet(CharSet *cset)
278 {
279 efree((char *)cset->chars);
280 if (cset->ranges) {
281 efree((char *)cset->ranges);
282 }
283 }
284 /* }}} */
285
286 /* {{{ ValidateFormat
287 *----------------------------------------------------------------------
288 *
289 * ValidateFormat --
290 *
291 * Parse the format string and verify that it is properly formed
292 * and that there are exactly enough variables on the command line.
293 *
294 * Results:
295 * FAILURE or SUCCESS.
296 *
297 * Side effects:
298 * May set php_error based on abnormal conditions.
299 *
300 * Parameters :
301 * format The format string.
302 * numVars The number of variables passed to the scan command.
303 * totalSubs The number of variables that will be required.
304 *
305 *----------------------------------------------------------------------
306 */
ValidateFormat(char * format,int numVars,int * totalSubs)307 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
308 {
309 #define STATIC_LIST_SIZE 16
310 int gotXpg, gotSequential, value, i, flags;
311 char *end, *ch = NULL;
312 int staticAssign[STATIC_LIST_SIZE];
313 int *nassign = staticAssign;
314 int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
315
316 /*
317 * Initialize an array that records the number of times a variable
318 * is assigned to by the format string. We use this to detect if
319 * a variable is multiply assigned or left unassigned.
320 */
321 if (numVars > nspace) {
322 nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
323 nspace = numVars;
324 }
325 for (i = 0; i < nspace; i++) {
326 nassign[i] = 0;
327 }
328
329 xpgSize = objIndex = gotXpg = gotSequential = 0;
330
331 while (*format != '\0') {
332 ch = format++;
333 flags = 0;
334
335 if (*ch != '%') {
336 continue;
337 }
338 ch = format++;
339 if (*ch == '%') {
340 continue;
341 }
342 if (*ch == '*') {
343 flags |= SCAN_SUPPRESS;
344 ch = format++;
345 goto xpgCheckDone;
346 }
347
348 if ( isdigit( (int)*ch ) ) {
349 /*
350 * Check for an XPG3-style %n$ specification. Note: there
351 * must not be a mixture of XPG3 specs and non-XPG3 specs
352 * in the same format string.
353 */
354 value = ZEND_STRTOUL(format-1, &end, 10);
355 if (*end != '$') {
356 goto notXpg;
357 }
358 format = end+1;
359 ch = format++;
360 gotXpg = 1;
361 if (gotSequential) {
362 goto mixedXPG;
363 }
364 objIndex = value - 1;
365 if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
366 goto badIndex;
367 } else if (numVars == 0) {
368 /*
369 * In the case where no vars are specified, the user can
370 * specify %9999$ legally, so we have to consider special
371 * rules for growing the assign array. 'value' is
372 * guaranteed to be > 0.
373 */
374
375 /* set a lower artificial limit on this
376 * in the interest of security and resource friendliness
377 * 255 arguments should be more than enough. - cc
378 */
379 if (value > SCAN_MAX_ARGS) {
380 goto badIndex;
381 }
382
383 xpgSize = (xpgSize > value) ? xpgSize : value;
384 }
385 goto xpgCheckDone;
386 }
387
388 notXpg:
389 gotSequential = 1;
390 if (gotXpg) {
391 mixedXPG:
392 php_error_docref(NULL, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
393 goto error;
394 }
395
396 xpgCheckDone:
397 /*
398 * Parse any width specifier.
399 */
400 if (isdigit(UCHAR(*ch))) {
401 value = ZEND_STRTOUL(format-1, &format, 10);
402 flags |= SCAN_WIDTH;
403 ch = format++;
404 }
405
406 /*
407 * Ignore size specifier.
408 */
409 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
410 ch = format++;
411 }
412
413 if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
414 goto badIndex;
415 }
416
417 /*
418 * Handle the various field types.
419 */
420 switch (*ch) {
421 case 'n':
422 case 'd':
423 case 'D':
424 case 'i':
425 case 'o':
426 case 'x':
427 case 'X':
428 case 'u':
429 case 'f':
430 case 'e':
431 case 'E':
432 case 'g':
433 case 's':
434 break;
435
436 case 'c':
437 /* we differ here with the TCL implementation in allowing for */
438 /* a character width specification, to be more consistent with */
439 /* ANSI. since Zend auto allocates space for vars, this is no */
440 /* problem - cc */
441 /*
442 if (flags & SCAN_WIDTH) {
443 php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
444 goto error;
445 }
446 */
447 break;
448
449 case '[':
450 if (*format == '\0') {
451 goto badSet;
452 }
453 ch = format++;
454 if (*ch == '^') {
455 if (*format == '\0') {
456 goto badSet;
457 }
458 ch = format++;
459 }
460 if (*ch == ']') {
461 if (*format == '\0') {
462 goto badSet;
463 }
464 ch = format++;
465 }
466 while (*ch != ']') {
467 if (*format == '\0') {
468 goto badSet;
469 }
470 ch = format++;
471 }
472 break;
473 badSet:
474 php_error_docref(NULL, E_WARNING, "Unmatched [ in format string");
475 goto error;
476
477 default: {
478 php_error_docref(NULL, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
479 goto error;
480 }
481 }
482
483 if (!(flags & SCAN_SUPPRESS)) {
484 if (objIndex >= nspace) {
485 /*
486 * Expand the nassign buffer. If we are using XPG specifiers,
487 * make sure that we grow to a large enough size. xpgSize is
488 * guaranteed to be at least one larger than objIndex.
489 */
490 value = nspace;
491 if (xpgSize) {
492 nspace = xpgSize;
493 } else {
494 nspace += STATIC_LIST_SIZE;
495 }
496 if (nassign == staticAssign) {
497 nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
498 for (i = 0; i < STATIC_LIST_SIZE; ++i) {
499 nassign[i] = staticAssign[i];
500 }
501 } else {
502 nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
503 }
504 for (i = value; i < nspace; i++) {
505 nassign[i] = 0;
506 }
507 }
508 nassign[objIndex]++;
509 objIndex++;
510 }
511 } /* while (*format != '\0') */
512
513 /*
514 * Verify that all of the variable were assigned exactly once.
515 */
516 if (numVars == 0) {
517 if (xpgSize) {
518 numVars = xpgSize;
519 } else {
520 numVars = objIndex;
521 }
522 }
523 if (totalSubs) {
524 *totalSubs = numVars;
525 }
526 for (i = 0; i < numVars; i++) {
527 if (nassign[i] > 1) {
528 php_error_docref(NULL, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
529 goto error;
530 } else if (!xpgSize && (nassign[i] == 0)) {
531 /*
532 * If the space is empty, and xpgSize is 0 (means XPG wasn't
533 * used, and/or numVars != 0), then too many vars were given
534 */
535 php_error_docref(NULL, E_WARNING, "Variable is not assigned by any conversion specifiers");
536 goto error;
537 }
538 }
539
540 if (nassign != staticAssign) {
541 efree((char *)nassign);
542 }
543 return SCAN_SUCCESS;
544
545 badIndex:
546 if (gotXpg) {
547 php_error_docref(NULL, E_WARNING, "%s", "\"%n$\" argument index out of range");
548 } else {
549 php_error_docref(NULL, E_WARNING, "Different numbers of variable names and field specifiers");
550 }
551
552 error:
553 if (nassign != staticAssign) {
554 efree((char *)nassign);
555 }
556 return SCAN_ERROR_INVALID_FORMAT;
557 #undef STATIC_LIST_SIZE
558 }
559 /* }}} */
560
561 /* {{{ php_sscanf_internal
562 * This is the internal function which does processing on behalf of
563 * both sscanf() and fscanf()
564 *
565 * parameters :
566 * string literal string to be processed
567 * format format string
568 * argCount total number of elements in the args array
569 * args arguments passed in from user function (f|s)scanf
570 * varStart offset (in args) of 1st variable passed in to (f|s)scanf
571 * return_value set with the results of the scan
572 */
573
php_sscanf_internal(char * string,char * format,int argCount,zval * args,int varStart,zval * return_value)574 PHPAPI int php_sscanf_internal( char *string, char *format,
575 int argCount, zval *args,
576 int varStart, zval *return_value)
577 {
578 int numVars, nconversions, totalVars = -1;
579 int i, result;
580 zend_long value;
581 int objIndex;
582 char *end, *baseString;
583 zval *current;
584 char op = 0;
585 int base = 0;
586 int underflow = 0;
587 size_t width;
588 zend_long (*fn)() = NULL;
589 char *ch, sch;
590 int flags;
591 char buf[64]; /* Temporary buffer to hold scanned number
592 * strings before they are passed to strtoul() */
593
594 /* do some sanity checking */
595 if ((varStart > argCount) || (varStart < 0)){
596 varStart = SCAN_MAX_ARGS + 1;
597 }
598 numVars = argCount - varStart;
599 if (numVars < 0) {
600 numVars = 0;
601 }
602
603 #if 0
604 zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
605 string, format, numVars, varStart);
606 #endif
607 /*
608 * Check for errors in the format string.
609 */
610 if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
611 scan_set_error_return( numVars, return_value );
612 return SCAN_ERROR_INVALID_FORMAT;
613 }
614
615 objIndex = numVars ? varStart : 0;
616
617 /*
618 * If any variables are passed, make sure they are all passed by reference
619 */
620 if (numVars) {
621 for (i = varStart;i < argCount;i++){
622 if ( ! Z_ISREF(args[ i ] ) ) {
623 php_error_docref(NULL, E_WARNING, "Parameter %d must be passed by reference", i);
624 scan_set_error_return(numVars, return_value);
625 return SCAN_ERROR_VAR_PASSED_BYVAL;
626 }
627 }
628 }
629
630 /*
631 * Allocate space for the result objects. Only happens when no variables
632 * are specified
633 */
634 if (!numVars) {
635 zval tmp;
636
637 /* allocate an array for return */
638 array_init(return_value);
639
640 for (i = 0; i < totalVars; i++) {
641 ZVAL_NULL(&tmp);
642 if (add_next_index_zval(return_value, &tmp) == FAILURE) {
643 scan_set_error_return(0, return_value);
644 return FAILURE;
645 }
646 }
647 varStart = 0; /* Array index starts from 0 */
648 }
649
650 baseString = string;
651
652 /*
653 * Iterate over the format string filling in the result objects until
654 * we reach the end of input, the end of the format string, or there
655 * is a mismatch.
656 */
657 nconversions = 0;
658 /* note ! - we need to limit the loop for objIndex to keep it in bounds */
659
660 while (*format != '\0') {
661 ch = format++;
662 flags = 0;
663
664 /*
665 * If we see whitespace in the format, skip whitespace in the string.
666 */
667 if ( isspace( (int)*ch ) ) {
668 sch = *string;
669 while ( isspace( (int)sch ) ) {
670 if (*string == '\0') {
671 goto done;
672 }
673 string++;
674 sch = *string;
675 }
676 continue;
677 }
678
679 if (*ch != '%') {
680 literal:
681 if (*string == '\0') {
682 underflow = 1;
683 goto done;
684 }
685 sch = *string;
686 string++;
687 if (*ch != sch) {
688 goto done;
689 }
690 continue;
691 }
692
693 ch = format++;
694 if (*ch == '%') {
695 goto literal;
696 }
697
698 /*
699 * Check for assignment suppression ('*') or an XPG3-style
700 * assignment ('%n$').
701 */
702 if (*ch == '*') {
703 flags |= SCAN_SUPPRESS;
704 ch = format++;
705 } else if ( isdigit(UCHAR(*ch))) {
706 value = ZEND_STRTOUL(format-1, &end, 10);
707 if (*end == '$') {
708 format = end+1;
709 ch = format++;
710 objIndex = varStart + value - 1;
711 }
712 }
713
714 /*
715 * Parse any width specifier.
716 */
717 if ( isdigit(UCHAR(*ch))) {
718 width = ZEND_STRTOUL(format-1, &format, 10);
719 ch = format++;
720 } else {
721 width = 0;
722 }
723
724 /*
725 * Ignore size specifier.
726 */
727 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
728 ch = format++;
729 }
730
731 /*
732 * Handle the various field types.
733 */
734 switch (*ch) {
735 case 'n':
736 if (!(flags & SCAN_SUPPRESS)) {
737 if (numVars && objIndex >= argCount) {
738 break;
739 } else if (numVars) {
740 current = args + objIndex++;
741 ZEND_TRY_ASSIGN_REF_LONG(current, (zend_long) (string - baseString));
742 } else {
743 add_index_long(return_value, objIndex++, string - baseString);
744 }
745 }
746 nconversions++;
747 continue;
748
749 case 'd':
750 case 'D':
751 op = 'i';
752 base = 10;
753 fn = (zend_long (*)())ZEND_STRTOL_PTR;
754 break;
755 case 'i':
756 op = 'i';
757 base = 0;
758 fn = (zend_long (*)())ZEND_STRTOL_PTR;
759 break;
760 case 'o':
761 op = 'i';
762 base = 8;
763 fn = (zend_long (*)())ZEND_STRTOL_PTR;
764 break;
765 case 'x':
766 case 'X':
767 op = 'i';
768 base = 16;
769 fn = (zend_long (*)())ZEND_STRTOL_PTR;
770 break;
771 case 'u':
772 op = 'i';
773 base = 10;
774 flags |= SCAN_UNSIGNED;
775 fn = (zend_long (*)())ZEND_STRTOUL_PTR;
776 break;
777
778 case 'f':
779 case 'e':
780 case 'E':
781 case 'g':
782 op = 'f';
783 break;
784
785 case 's':
786 op = 's';
787 break;
788
789 case 'c':
790 op = 's';
791 flags |= SCAN_NOSKIP;
792 /*-cc-*/
793 if (0 == width) {
794 width = 1;
795 }
796 /*-cc-*/
797 break;
798 case '[':
799 op = '[';
800 flags |= SCAN_NOSKIP;
801 break;
802 } /* switch */
803
804 /*
805 * At this point, we will need additional characters from the
806 * string to proceed.
807 */
808 if (*string == '\0') {
809 underflow = 1;
810 goto done;
811 }
812
813 /*
814 * Skip any leading whitespace at the beginning of a field unless
815 * the format suppresses this behavior.
816 */
817 if (!(flags & SCAN_NOSKIP)) {
818 while (*string != '\0') {
819 sch = *string;
820 if (! isspace((int)sch) ) {
821 break;
822 }
823 string++;
824 }
825 if (*string == '\0') {
826 underflow = 1;
827 goto done;
828 }
829 }
830
831 /*
832 * Perform the requested scanning operation.
833 */
834 switch (op) {
835 case 'c':
836 case 's':
837 /*
838 * Scan a string up to width characters or whitespace.
839 */
840 if (width == 0) {
841 width = (size_t) ~0;
842 }
843 end = string;
844 while (*end != '\0') {
845 sch = *end;
846 if ( isspace( (int)sch ) ) {
847 break;
848 }
849 end++;
850 if (--width == 0) {
851 break;
852 }
853 }
854 if (!(flags & SCAN_SUPPRESS)) {
855 if (numVars && objIndex >= argCount) {
856 break;
857 } else if (numVars) {
858 current = args + objIndex++;
859 ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
860 } else {
861 add_index_stringl(return_value, objIndex++, string, end-string);
862 }
863 }
864 string = end;
865 break;
866
867 case '[': {
868 CharSet cset;
869
870 if (width == 0) {
871 width = (size_t) ~0;
872 }
873 end = string;
874
875 format = BuildCharSet(&cset, format);
876 while (*end != '\0') {
877 sch = *end;
878 if (!CharInSet(&cset, (int)sch)) {
879 break;
880 }
881 end++;
882 if (--width == 0) {
883 break;
884 }
885 }
886 ReleaseCharSet(&cset);
887
888 if (string == end) {
889 /*
890 * Nothing matched the range, stop processing
891 */
892 goto done;
893 }
894 if (!(flags & SCAN_SUPPRESS)) {
895 if (numVars && objIndex >= argCount) {
896 break;
897 } else if (numVars) {
898 current = args + objIndex++;
899 ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
900 } else {
901 add_index_stringl(return_value, objIndex++, string, end-string);
902 }
903 }
904 string = end;
905 break;
906 }
907 /*
908 case 'c':
909 / Scan a single character./
910
911 sch = *string;
912 string++;
913 if (!(flags & SCAN_SUPPRESS)) {
914 if (numVars) {
915 char __buf[2];
916 __buf[0] = sch;
917 __buf[1] = '\0';
918 current = args[objIndex++];
919 zval_dtor(*current);
920 ZVAL_STRINGL( *current, __buf, 1);
921 } else {
922 add_index_stringl(return_value, objIndex++, &sch, 1);
923 }
924 }
925 break;
926 */
927 case 'i':
928 /*
929 * Scan an unsigned or signed integer.
930 */
931 /*-cc-*/
932 buf[0] = '\0';
933 /*-cc-*/
934 if ((width == 0) || (width > sizeof(buf) - 1)) {
935 width = sizeof(buf) - 1;
936 }
937
938 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
939 for (end = buf; width > 0; width--) {
940 switch (*string) {
941 /*
942 * The 0 digit has special meaning at the beginning of
943 * a number. If we are unsure of the base, it
944 * indicates that we are in base 8 or base 16 (if it is
945 * followed by an 'x').
946 */
947 case '0':
948 /*-cc-*/
949 if (base == 16) {
950 flags |= SCAN_XOK;
951 }
952 /*-cc-*/
953 if (base == 0) {
954 base = 8;
955 flags |= SCAN_XOK;
956 }
957 if (flags & SCAN_NOZERO) {
958 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
959 } else {
960 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
961 }
962 goto addToInt;
963
964 case '1': case '2': case '3': case '4':
965 case '5': case '6': case '7':
966 if (base == 0) {
967 base = 10;
968 }
969 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
970 goto addToInt;
971
972 case '8': case '9':
973 if (base == 0) {
974 base = 10;
975 }
976 if (base <= 8) {
977 break;
978 }
979 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
980 goto addToInt;
981
982 case 'A': case 'B': case 'C':
983 case 'D': case 'E': case 'F':
984 case 'a': case 'b': case 'c':
985 case 'd': case 'e': case 'f':
986 if (base <= 10) {
987 break;
988 }
989 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
990 goto addToInt;
991
992 case '+': case '-':
993 if (flags & SCAN_SIGNOK) {
994 flags &= ~SCAN_SIGNOK;
995 goto addToInt;
996 }
997 break;
998
999 case 'x': case 'X':
1000 if ((flags & SCAN_XOK) && (end == buf+1)) {
1001 base = 16;
1002 flags &= ~SCAN_XOK;
1003 goto addToInt;
1004 }
1005 break;
1006 }
1007
1008 /*
1009 * We got an illegal character so we are done accumulating.
1010 */
1011 break;
1012
1013 addToInt:
1014 /*
1015 * Add the character to the temporary buffer.
1016 */
1017 *end++ = *string++;
1018 if (*string == '\0') {
1019 break;
1020 }
1021 }
1022
1023 /*
1024 * Check to see if we need to back up because we only got a
1025 * sign or a trailing x after a 0.
1026 */
1027 if (flags & SCAN_NODIGITS) {
1028 if (*string == '\0') {
1029 underflow = 1;
1030 }
1031 goto done;
1032 } else if (end[-1] == 'x' || end[-1] == 'X') {
1033 end--;
1034 string--;
1035 }
1036
1037 /*
1038 * Scan the value from the temporary buffer. If we are
1039 * returning a large unsigned value, we have to convert it back
1040 * to a string since PHP only supports signed values.
1041 */
1042 if (!(flags & SCAN_SUPPRESS)) {
1043 *end = '\0';
1044 value = (zend_long) (*fn)(buf, NULL, base);
1045 if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1046 snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1047 if (numVars && objIndex >= argCount) {
1048 break;
1049 } else if (numVars) {
1050 /* change passed value type to string */
1051 current = args + objIndex++;
1052 ZEND_TRY_ASSIGN_REF_STRING(current, buf);
1053 } else {
1054 add_index_string(return_value, objIndex++, buf);
1055 }
1056 } else {
1057 if (numVars && objIndex >= argCount) {
1058 break;
1059 } else if (numVars) {
1060 current = args + objIndex++;
1061 ZEND_TRY_ASSIGN_REF_LONG(current, value);
1062 } else {
1063 add_index_long(return_value, objIndex++, value);
1064 }
1065 }
1066 }
1067 break;
1068
1069 case 'f':
1070 /*
1071 * Scan a floating point number
1072 */
1073 buf[0] = '\0'; /* call me pedantic */
1074 if ((width == 0) || (width > sizeof(buf) - 1)) {
1075 width = sizeof(buf) - 1;
1076 }
1077 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1078 for (end = buf; width > 0; width--) {
1079 switch (*string) {
1080 case '0': case '1': case '2': case '3':
1081 case '4': case '5': case '6': case '7':
1082 case '8': case '9':
1083 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1084 goto addToFloat;
1085 case '+':
1086 case '-':
1087 if (flags & SCAN_SIGNOK) {
1088 flags &= ~SCAN_SIGNOK;
1089 goto addToFloat;
1090 }
1091 break;
1092 case '.':
1093 if (flags & SCAN_PTOK) {
1094 flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1095 goto addToFloat;
1096 }
1097 break;
1098 case 'e':
1099 case 'E':
1100 /*
1101 * An exponent is not allowed until there has
1102 * been at least one digit.
1103 */
1104 if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1105 flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1106 | SCAN_SIGNOK | SCAN_NODIGITS;
1107 goto addToFloat;
1108 }
1109 break;
1110 }
1111
1112 /*
1113 * We got an illegal character so we are done accumulating.
1114 */
1115 break;
1116
1117 addToFloat:
1118 /*
1119 * Add the character to the temporary buffer.
1120 */
1121 *end++ = *string++;
1122 if (*string == '\0') {
1123 break;
1124 }
1125 }
1126
1127 /*
1128 * Check to see if we need to back up because we saw a
1129 * trailing 'e' or sign.
1130 */
1131 if (flags & SCAN_NODIGITS) {
1132 if (flags & SCAN_EXPOK) {
1133 /*
1134 * There were no digits at all so scanning has
1135 * failed and we are done.
1136 */
1137 if (*string == '\0') {
1138 underflow = 1;
1139 }
1140 goto done;
1141 }
1142
1143 /*
1144 * We got a bad exponent ('e' and maybe a sign).
1145 */
1146 end--;
1147 string--;
1148 if (*end != 'e' && *end != 'E') {
1149 end--;
1150 string--;
1151 }
1152 }
1153
1154 /*
1155 * Scan the value from the temporary buffer.
1156 */
1157 if (!(flags & SCAN_SUPPRESS)) {
1158 double dvalue;
1159 *end = '\0';
1160 dvalue = zend_strtod(buf, NULL);
1161 if (numVars && objIndex >= argCount) {
1162 break;
1163 } else if (numVars) {
1164 current = args + objIndex++;
1165 ZEND_TRY_ASSIGN_REF_DOUBLE(current, dvalue);
1166 } else {
1167 add_index_double(return_value, objIndex++, dvalue );
1168 }
1169 }
1170 break;
1171 } /* switch (op) */
1172 nconversions++;
1173 } /* while (*format != '\0') */
1174
1175 done:
1176 result = SCAN_SUCCESS;
1177
1178 if (underflow && (0==nconversions)) {
1179 scan_set_error_return( numVars, return_value );
1180 result = SCAN_ERROR_EOF;
1181 } else if (numVars) {
1182 zval_ptr_dtor(return_value );
1183 ZVAL_LONG(return_value, nconversions);
1184 } else if (nconversions < totalVars) {
1185 /* TODO: not all elements converted. we need to prune the list - cc */
1186 }
1187 return result;
1188 }
1189 /* }}} */
1190
1191 /* the compiler choked when i tried to make this a macro */
scan_set_error_return(int numVars,zval * return_value)1192 static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1193 {
1194 if (numVars) {
1195 ZVAL_LONG(return_value, SCAN_ERROR_EOF); /* EOF marker */
1196 } else {
1197 /* convert_to_null calls destructor */
1198 convert_to_null(return_value);
1199 }
1200 }
1201 /* }}} */
1202