1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 /*
7  * Scan functions for NSPR types
8  *
9  * Author: Wan-Teh Chang
10  *
11  * Acknowledgment: The implementation is inspired by the source code
12  * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992.
13  */
14 
15 #include <limits.h>
16 #include <ctype.h>
17 #include <string.h>
18 #include <stdlib.h>
19 #include "prprf.h"
20 #include "prdtoa.h"
21 #include "prlog.h"
22 #include "prerror.h"
23 
24 /*
25  * A function that reads a character from 'stream'.
26  * Returns the character read, or EOF if end of stream is reached.
27  */
28 typedef int (*_PRGetCharFN)(void *stream);
29 
30 /*
31  * A function that pushes the character 'ch' back to 'stream'.
32  */
33 typedef void (*_PRUngetCharFN)(void *stream, int ch);
34 
35 /*
36  * The size specifier for the integer and floating point number
37  * conversions in format control strings.
38  */
39 typedef enum {
40     _PR_size_none,  /* No size specifier is given */
41     _PR_size_h,     /* The 'h' specifier, suggesting "short" */
42     _PR_size_l,     /* The 'l' specifier, suggesting "long" */
43     _PR_size_L,     /* The 'L' specifier, meaning a 'long double' */
44     _PR_size_ll     /* The 'll' specifier, suggesting "long long" */
45 } _PRSizeSpec;
46 
47 /*
48  * The collection of data that is passed between the scan function
49  * and its subordinate functions.  The fields of this structure
50  * serve as the input or output arguments for these functions.
51  */
52 typedef struct {
53     _PRGetCharFN get;        /* get a character from input stream */
54     _PRUngetCharFN unget;    /* unget (push back) a character */
55     void *stream;            /* argument for get and unget */
56     va_list ap;              /* the variable argument list */
57     int nChar;               /* number of characters read from 'stream' */
58 
59     PRBool assign;           /* assign, or suppress assignment? */
60     int width;               /* field width */
61     _PRSizeSpec sizeSpec;    /* 'h', 'l', 'L', or 'll' */
62 
63     PRBool converted;        /* is the value actually converted? */
64 } ScanfState;
65 
66 #define GET(state) ((state)->nChar++, (state)->get((state)->stream))
67 #define UNGET(state, ch) \
68         ((state)->nChar--, (state)->unget((state)->stream, ch))
69 
70 /*
71  * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH,
72  * are always used together.
73  *
74  * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return
75  * value to 'ch' only if we have not exceeded the field width of
76  * 'state'.  Therefore, after GET_IF_WITHIN_WIDTH, the value of
77  * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true.
78  */
79 
80 #define GET_IF_WITHIN_WIDTH(state, ch) \
81         if (--(state)->width >= 0) { \
82             (ch) = GET(state); \
83         }
84 #define WITHIN_WIDTH(state) ((state)->width >= 0)
85 
86 /*
87  * _pr_strtoull:
88  *     Convert a string to an unsigned 64-bit integer.  The string
89  *     'str' is assumed to be a representation of the integer in
90  *     base 'base'.
91  *
92  * Warning:
93  *     - Only handle base 8, 10, and 16.
94  *     - No overflow checking.
95  */
96 
97 static PRUint64
_pr_strtoull(const char * str,char ** endptr,int base)98 _pr_strtoull(const char *str, char **endptr, int base)
99 {
100     static const int BASE_MAX = 16;
101     static const char digits[] = "0123456789abcdef";
102     char *digitPtr;
103     PRUint64 x;    /* return value */
104     PRInt64 base64;
105     const char *cPtr;
106     PRBool negative;
107     const char *digitStart;
108 
109     PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16);
110     if (base < 0 || base == 1 || base > BASE_MAX) {
111         if (endptr) {
112             *endptr = (char *) str;
113             return LL_ZERO;
114         }
115     }
116 
117     cPtr = str;
118     while (isspace(*cPtr)) {
119         ++cPtr;
120     }
121 
122     negative = PR_FALSE;
123     if (*cPtr == '-') {
124         negative = PR_TRUE;
125         cPtr++;
126     } else if (*cPtr == '+') {
127         cPtr++;
128     }
129 
130     if (base == 16) {
131         if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) {
132             cPtr += 2;
133         }
134     } else if (base == 0) {
135         if (*cPtr != '0') {
136             base = 10;
137         } else if (cPtr[1] == 'x' || cPtr[1] == 'X') {
138             base = 16;
139             cPtr += 2;
140         } else {
141             base = 8;
142         }
143     }
144     PR_ASSERT(base != 0);
145     LL_I2L(base64, base);
146     digitStart = cPtr;
147 
148     /* Skip leading zeros */
149     while (*cPtr == '0') {
150         cPtr++;
151     }
152 
153     LL_I2L(x, 0);
154     while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) {
155         PRUint64 d;
156 
157         LL_I2L(d, (digitPtr - digits));
158         LL_MUL(x, x, base64);
159         LL_ADD(x, x, d);
160         cPtr++;
161     }
162 
163     if (cPtr == digitStart) {
164         if (endptr) {
165             *endptr = (char *) str;
166         }
167         return LL_ZERO;
168     }
169 
170     if (negative) {
171 #ifdef HAVE_LONG_LONG
172         /* The cast to a signed type is to avoid a compiler warning */
173         x = -(PRInt64)x;
174 #else
175         LL_NEG(x, x);
176 #endif
177     }
178 
179     if (endptr) {
180         *endptr = (char *) cPtr;
181     }
182     return x;
183 }
184 
185 /*
186  * The maximum field width (in number of characters) that is enough
187  * (may be more than necessary) to represent a 64-bit integer or
188  * floating point number.
189  */
190 #define FMAX 31
191 #define DECIMAL_POINT '.'
192 
193 static PRStatus
GetInt(ScanfState * state,int code)194 GetInt(ScanfState *state, int code)
195 {
196     char buf[FMAX + 1], *p;
197     int ch = 0;
198     static const char digits[] = "0123456789abcdefABCDEF";
199     PRBool seenDigit = PR_FALSE;
200     int base;
201     int dlen;
202 
203     switch (code) {
204         case 'd': case 'u':
205             base = 10;
206             break;
207         case 'i':
208             base = 0;
209             break;
210         case 'x': case 'X': case 'p':
211             base = 16;
212             break;
213         case 'o':
214             base = 8;
215             break;
216         default:
217             return PR_FAILURE;
218     }
219     if (state->width == 0 || state->width > FMAX) {
220         state->width = FMAX;
221     }
222     p = buf;
223     GET_IF_WITHIN_WIDTH(state, ch);
224     if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
225         *p++ = ch;
226         GET_IF_WITHIN_WIDTH(state, ch);
227     }
228     if (WITHIN_WIDTH(state) && ch == '0') {
229         seenDigit = PR_TRUE;
230         *p++ = ch;
231         GET_IF_WITHIN_WIDTH(state, ch);
232         if (WITHIN_WIDTH(state)
233             && (ch == 'x' || ch == 'X')
234             && (base == 0 || base == 16)) {
235             base = 16;
236             *p++ = ch;
237             GET_IF_WITHIN_WIDTH(state, ch);
238         } else if (base == 0) {
239             base = 8;
240         }
241     }
242     if (base == 0 || base == 10) {
243         dlen = 10;
244     } else if (base == 8) {
245         dlen = 8;
246     } else {
247         PR_ASSERT(base == 16);
248         dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */
249     }
250     while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) {
251         *p++ = ch;
252         GET_IF_WITHIN_WIDTH(state, ch);
253         seenDigit = PR_TRUE;
254     }
255     if (WITHIN_WIDTH(state)) {
256         UNGET(state, ch);
257     }
258     if (!seenDigit) {
259         return PR_FAILURE;
260     }
261     *p = '\0';
262     if (state->assign) {
263         if (code == 'd' || code == 'i') {
264             if (state->sizeSpec == _PR_size_ll) {
265                 PRInt64 llval = _pr_strtoull(buf, NULL, base);
266                 *va_arg(state->ap, PRInt64 *) = llval;
267             } else {
268                 long lval = strtol(buf, NULL, base);
269 
270                 if (state->sizeSpec == _PR_size_none) {
271                     *va_arg(state->ap, PRIntn *) = lval;
272                 } else if (state->sizeSpec == _PR_size_h) {
273                     *va_arg(state->ap, PRInt16 *) = (PRInt16)lval;
274                 } else if (state->sizeSpec == _PR_size_l) {
275                     *va_arg(state->ap, PRInt32 *) = lval;
276                 } else {
277                     return PR_FAILURE;
278                 }
279             }
280         } else {
281             if (state->sizeSpec == _PR_size_ll) {
282                 PRUint64 llval = _pr_strtoull(buf, NULL, base);
283                 *va_arg(state->ap, PRUint64 *) = llval;
284             } else {
285                 unsigned long lval = strtoul(buf, NULL, base);
286 
287                 if (state->sizeSpec == _PR_size_none) {
288                     *va_arg(state->ap, PRUintn *) = lval;
289                 } else if (state->sizeSpec == _PR_size_h) {
290                     *va_arg(state->ap, PRUint16 *) = (PRUint16)lval;
291                 } else if (state->sizeSpec == _PR_size_l) {
292                     *va_arg(state->ap, PRUint32 *) = lval;
293                 } else {
294                     return PR_FAILURE;
295                 }
296             }
297         }
298         state->converted = PR_TRUE;
299     }
300     return PR_SUCCESS;
301 }
302 
303 static PRStatus
GetFloat(ScanfState * state)304 GetFloat(ScanfState *state)
305 {
306     char buf[FMAX + 1], *p;
307     int ch = 0;
308     PRBool seenDigit = PR_FALSE;
309 
310     if (state->width == 0 || state->width > FMAX) {
311         state->width = FMAX;
312     }
313     p = buf;
314     GET_IF_WITHIN_WIDTH(state, ch);
315     if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
316         *p++ = ch;
317         GET_IF_WITHIN_WIDTH(state, ch);
318     }
319     while (WITHIN_WIDTH(state) && isdigit(ch)) {
320         *p++ = ch;
321         GET_IF_WITHIN_WIDTH(state, ch);
322         seenDigit = PR_TRUE;
323     }
324     if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) {
325         *p++ = ch;
326         GET_IF_WITHIN_WIDTH(state, ch);
327         while (WITHIN_WIDTH(state) && isdigit(ch)) {
328             *p++ = ch;
329             GET_IF_WITHIN_WIDTH(state, ch);
330             seenDigit = PR_TRUE;
331         }
332     }
333 
334     /*
335      * This is not robust.  For example, "1.2e+" would confuse
336      * the code below to read 'e' and '+', only to realize that
337      * it should have stopped at "1.2".  But we can't push back
338      * more than one character, so there is nothing I can do.
339      */
340 
341     /* Parse exponent */
342     if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) {
343         *p++ = ch;
344         GET_IF_WITHIN_WIDTH(state, ch);
345         if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
346             *p++ = ch;
347             GET_IF_WITHIN_WIDTH(state, ch);
348         }
349         while (WITHIN_WIDTH(state) && isdigit(ch)) {
350             *p++ = ch;
351             GET_IF_WITHIN_WIDTH(state, ch);
352         }
353     }
354     if (WITHIN_WIDTH(state)) {
355         UNGET(state, ch);
356     }
357     if (!seenDigit) {
358         return PR_FAILURE;
359     }
360     *p = '\0';
361     if (state->assign) {
362         PRFloat64 dval = PR_strtod(buf, NULL);
363 
364         state->converted = PR_TRUE;
365         if (state->sizeSpec == _PR_size_l) {
366             *va_arg(state->ap, PRFloat64 *) = dval;
367         } else if (state->sizeSpec == _PR_size_L) {
368             *va_arg(state->ap, long double *) = dval;
369         } else {
370             *va_arg(state->ap, float *) = (float) dval;
371         }
372     }
373     return PR_SUCCESS;
374 }
375 
376 /*
377  * Convert, and return the end of the conversion spec.
378  * Return NULL on error.
379  */
380 
381 static const char *
Convert(ScanfState * state,const char * fmt)382 Convert(ScanfState *state, const char *fmt)
383 {
384     const char *cPtr;
385     int ch;
386     char *cArg = NULL;
387 
388     state->converted = PR_FALSE;
389     cPtr = fmt;
390     if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') {
391         do {
392             ch = GET(state);
393         } while (isspace(ch));
394         UNGET(state, ch);
395     }
396     switch (*cPtr) {
397         case 'c':
398             if (state->assign) {
399                 cArg = va_arg(state->ap, char *);
400             }
401             if (state->width == 0) {
402                 state->width = 1;
403             }
404             for (; state->width > 0; state->width--) {
405                 ch = GET(state);
406                 if (ch == EOF) {
407                     return NULL;
408                 }
409                 if (state->assign) {
410                     *cArg++ = ch;
411                 }
412             }
413             if (state->assign) {
414                 state->converted = PR_TRUE;
415             }
416             break;
417         case 'p':
418         case 'd': case 'i': case 'o':
419         case 'u': case 'x': case 'X':
420             if (GetInt(state, *cPtr) == PR_FAILURE) {
421                 return NULL;
422             }
423             break;
424         case 'e': case 'E': case 'f':
425         case 'g': case 'G':
426             if (GetFloat(state) == PR_FAILURE) {
427                 return NULL;
428             }
429             break;
430         case 'n':
431             /* do not consume any input */
432             if (state->assign) {
433                 switch (state->sizeSpec) {
434                     case _PR_size_none:
435                         *va_arg(state->ap, PRIntn *) = state->nChar;
436                         break;
437                     case _PR_size_h:
438                         *va_arg(state->ap, PRInt16 *) = state->nChar;
439                         break;
440                     case _PR_size_l:
441                         *va_arg(state->ap, PRInt32 *) = state->nChar;
442                         break;
443                     case _PR_size_ll:
444                         LL_I2L(*va_arg(state->ap, PRInt64 *), state->nChar);
445                         break;
446                     default:
447                         PR_ASSERT(0);
448                 }
449             }
450             break;
451         case 's':
452             if (state->width == 0) {
453                 state->width = INT_MAX;
454             }
455             if (state->assign) {
456                 cArg = va_arg(state->ap, char *);
457             }
458             for (; state->width > 0; state->width--) {
459                 ch = GET(state);
460                 if ((ch == EOF) || isspace(ch)) {
461                     UNGET(state, ch);
462                     break;
463                 }
464                 if (state->assign) {
465                     *cArg++ = ch;
466                 }
467             }
468             if (state->assign) {
469                 *cArg = '\0';
470                 state->converted = PR_TRUE;
471             }
472             break;
473         case '%':
474             ch = GET(state);
475             if (ch != '%') {
476                 UNGET(state, ch);
477                 return NULL;
478             }
479             break;
480         case '[':
481         {
482             PRBool complement = PR_FALSE;
483             const char *closeBracket;
484             size_t n;
485 
486             if (*++cPtr == '^') {
487                 complement = PR_TRUE;
488                 cPtr++;
489             }
490             closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']');
491             if (closeBracket == NULL) {
492                 return NULL;
493             }
494             n = closeBracket - cPtr;
495             if (state->width == 0) {
496                 state->width = INT_MAX;
497             }
498             if (state->assign) {
499                 cArg = va_arg(state->ap, char *);
500             }
501             for (; state->width > 0; state->width--) {
502                 ch = GET(state);
503                 if ((ch == EOF)
504                     || (!complement && !memchr(cPtr, ch, n))
505                     || (complement && memchr(cPtr, ch, n))) {
506                     UNGET(state, ch);
507                     break;
508                 }
509                 if (state->assign) {
510                     *cArg++ = ch;
511                 }
512             }
513             if (state->assign) {
514                 *cArg = '\0';
515                 state->converted = PR_TRUE;
516             }
517             cPtr = closeBracket;
518         }
519         break;
520         default:
521             return NULL;
522     }
523     return cPtr;
524 }
525 
526 static PRInt32
DoScanf(ScanfState * state,const char * fmt)527 DoScanf(ScanfState *state, const char *fmt)
528 {
529     PRInt32 nConverted = 0;
530     const char *cPtr;
531     int ch;
532 
533     state->nChar = 0;
534     cPtr = fmt;
535     while (1) {
536         if (isspace(*cPtr)) {
537             /* white space: skip */
538             do {
539                 cPtr++;
540             } while (isspace(*cPtr));
541             do {
542                 ch = GET(state);
543             } while (isspace(ch));
544             UNGET(state, ch);
545         } else if (*cPtr == '%') {
546             /* format spec: convert */
547             cPtr++;
548             state->assign = PR_TRUE;
549             if (*cPtr == '*') {
550                 cPtr++;
551                 state->assign = PR_FALSE;
552             }
553             for (state->width = 0; isdigit(*cPtr); cPtr++) {
554                 state->width = state->width * 10 + *cPtr - '0';
555             }
556             state->sizeSpec = _PR_size_none;
557             if (*cPtr == 'h') {
558                 cPtr++;
559                 state->sizeSpec = _PR_size_h;
560             } else if (*cPtr == 'l') {
561                 cPtr++;
562                 if (*cPtr == 'l') {
563                     cPtr++;
564                     state->sizeSpec = _PR_size_ll;
565                 } else {
566                     state->sizeSpec = _PR_size_l;
567                 }
568             } else if (*cPtr == 'L') {
569                 cPtr++;
570                 state->sizeSpec = _PR_size_L;
571             }
572             cPtr = Convert(state, cPtr);
573             if (cPtr == NULL) {
574                 return (nConverted > 0 ? nConverted : EOF);
575             }
576             if (state->converted) {
577                 nConverted++;
578             }
579             cPtr++;
580         } else {
581             /* others: must match */
582             if (*cPtr == '\0') {
583                 return nConverted;
584             }
585             ch = GET(state);
586             if (ch != *cPtr) {
587                 UNGET(state, ch);
588                 return nConverted;
589             }
590             cPtr++;
591         }
592     }
593 }
594 
595 static int
StringGetChar(void * stream)596 StringGetChar(void *stream)
597 {
598     char *cPtr = *((char **) stream);
599 
600     if (*cPtr == '\0') {
601         return EOF;
602     }
603     *((char **) stream) = cPtr + 1;
604     return (unsigned char) *cPtr;
605 }
606 
607 static void
StringUngetChar(void * stream,int ch)608 StringUngetChar(void *stream, int ch)
609 {
610     char *cPtr = *((char **) stream);
611 
612     if (ch != EOF) {
613         *((char **) stream) = cPtr - 1;
614     }
615 }
616 
617 PR_IMPLEMENT(PRInt32)
PR_sscanf(const char * buf,const char * fmt,...)618 PR_sscanf(const char *buf, const char *fmt, ...)
619 {
620     PRInt32 rv;
621     ScanfState state;
622 
623     state.get = &StringGetChar;
624     state.unget = &StringUngetChar;
625     state.stream = (void *) &buf;
626     va_start(state.ap, fmt);
627     rv = DoScanf(&state, fmt);
628     va_end(state.ap);
629     return rv;
630 }
631