1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 /*
7 * Scan functions for NSPR types
8 *
9 * Author: Wan-Teh Chang
10 *
11 * Acknowledgment: The implementation is inspired by the source code
12 * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992.
13 */
14
15 #include <limits.h>
16 #include <ctype.h>
17 #include <string.h>
18 #include <stdlib.h>
19 #include "prprf.h"
20 #include "prdtoa.h"
21 #include "prlog.h"
22 #include "prerror.h"
23
24 /*
25 * A function that reads a character from 'stream'.
26 * Returns the character read, or EOF if end of stream is reached.
27 */
28 typedef int (*_PRGetCharFN)(void *stream);
29
30 /*
31 * A function that pushes the character 'ch' back to 'stream'.
32 */
33 typedef void (*_PRUngetCharFN)(void *stream, int ch);
34
35 /*
36 * The size specifier for the integer and floating point number
37 * conversions in format control strings.
38 */
39 typedef enum {
40 _PR_size_none, /* No size specifier is given */
41 _PR_size_h, /* The 'h' specifier, suggesting "short" */
42 _PR_size_l, /* The 'l' specifier, suggesting "long" */
43 _PR_size_L, /* The 'L' specifier, meaning a 'long double' */
44 _PR_size_ll /* The 'll' specifier, suggesting "long long" */
45 } _PRSizeSpec;
46
47 /*
48 * The collection of data that is passed between the scan function
49 * and its subordinate functions. The fields of this structure
50 * serve as the input or output arguments for these functions.
51 */
52 typedef struct {
53 _PRGetCharFN get; /* get a character from input stream */
54 _PRUngetCharFN unget; /* unget (push back) a character */
55 void *stream; /* argument for get and unget */
56 va_list ap; /* the variable argument list */
57 int nChar; /* number of characters read from 'stream' */
58
59 PRBool assign; /* assign, or suppress assignment? */
60 int width; /* field width */
61 _PRSizeSpec sizeSpec; /* 'h', 'l', 'L', or 'll' */
62
63 PRBool converted; /* is the value actually converted? */
64 } ScanfState;
65
66 #define GET(state) ((state)->nChar++, (state)->get((state)->stream))
67 #define UNGET(state, ch) \
68 ((state)->nChar--, (state)->unget((state)->stream, ch))
69
70 /*
71 * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH,
72 * are always used together.
73 *
74 * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return
75 * value to 'ch' only if we have not exceeded the field width of
76 * 'state'. Therefore, after GET_IF_WITHIN_WIDTH, the value of
77 * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true.
78 */
79
80 #define GET_IF_WITHIN_WIDTH(state, ch) \
81 if (--(state)->width >= 0) { \
82 (ch) = GET(state); \
83 }
84 #define WITHIN_WIDTH(state) ((state)->width >= 0)
85
86 /*
87 * _pr_strtoull:
88 * Convert a string to an unsigned 64-bit integer. The string
89 * 'str' is assumed to be a representation of the integer in
90 * base 'base'.
91 *
92 * Warning:
93 * - Only handle base 8, 10, and 16.
94 * - No overflow checking.
95 */
96
97 static PRUint64
_pr_strtoull(const char * str,char ** endptr,int base)98 _pr_strtoull(const char *str, char **endptr, int base)
99 {
100 static const int BASE_MAX = 16;
101 static const char digits[] = "0123456789abcdef";
102 char *digitPtr;
103 PRUint64 x; /* return value */
104 PRInt64 base64;
105 const char *cPtr;
106 PRBool negative;
107 const char *digitStart;
108
109 PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16);
110 if (base < 0 || base == 1 || base > BASE_MAX) {
111 if (endptr) {
112 *endptr = (char *) str;
113 return LL_ZERO;
114 }
115 }
116
117 cPtr = str;
118 while (isspace(*cPtr)) {
119 ++cPtr;
120 }
121
122 negative = PR_FALSE;
123 if (*cPtr == '-') {
124 negative = PR_TRUE;
125 cPtr++;
126 } else if (*cPtr == '+') {
127 cPtr++;
128 }
129
130 if (base == 16) {
131 if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) {
132 cPtr += 2;
133 }
134 } else if (base == 0) {
135 if (*cPtr != '0') {
136 base = 10;
137 } else if (cPtr[1] == 'x' || cPtr[1] == 'X') {
138 base = 16;
139 cPtr += 2;
140 } else {
141 base = 8;
142 }
143 }
144 PR_ASSERT(base != 0);
145 LL_I2L(base64, base);
146 digitStart = cPtr;
147
148 /* Skip leading zeros */
149 while (*cPtr == '0') {
150 cPtr++;
151 }
152
153 LL_I2L(x, 0);
154 while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) {
155 PRUint64 d;
156
157 LL_I2L(d, (digitPtr - digits));
158 LL_MUL(x, x, base64);
159 LL_ADD(x, x, d);
160 cPtr++;
161 }
162
163 if (cPtr == digitStart) {
164 if (endptr) {
165 *endptr = (char *) str;
166 }
167 return LL_ZERO;
168 }
169
170 if (negative) {
171 #ifdef HAVE_LONG_LONG
172 /* The cast to a signed type is to avoid a compiler warning */
173 x = -(PRInt64)x;
174 #else
175 LL_NEG(x, x);
176 #endif
177 }
178
179 if (endptr) {
180 *endptr = (char *) cPtr;
181 }
182 return x;
183 }
184
185 /*
186 * The maximum field width (in number of characters) that is enough
187 * (may be more than necessary) to represent a 64-bit integer or
188 * floating point number.
189 */
190 #define FMAX 31
191 #define DECIMAL_POINT '.'
192
193 static PRStatus
GetInt(ScanfState * state,int code)194 GetInt(ScanfState *state, int code)
195 {
196 char buf[FMAX + 1], *p;
197 int ch = 0;
198 static const char digits[] = "0123456789abcdefABCDEF";
199 PRBool seenDigit = PR_FALSE;
200 int base;
201 int dlen;
202
203 switch (code) {
204 case 'd': case 'u':
205 base = 10;
206 break;
207 case 'i':
208 base = 0;
209 break;
210 case 'x': case 'X': case 'p':
211 base = 16;
212 break;
213 case 'o':
214 base = 8;
215 break;
216 default:
217 return PR_FAILURE;
218 }
219 if (state->width == 0 || state->width > FMAX) {
220 state->width = FMAX;
221 }
222 p = buf;
223 GET_IF_WITHIN_WIDTH(state, ch);
224 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
225 *p++ = ch;
226 GET_IF_WITHIN_WIDTH(state, ch);
227 }
228 if (WITHIN_WIDTH(state) && ch == '0') {
229 seenDigit = PR_TRUE;
230 *p++ = ch;
231 GET_IF_WITHIN_WIDTH(state, ch);
232 if (WITHIN_WIDTH(state)
233 && (ch == 'x' || ch == 'X')
234 && (base == 0 || base == 16)) {
235 base = 16;
236 *p++ = ch;
237 GET_IF_WITHIN_WIDTH(state, ch);
238 } else if (base == 0) {
239 base = 8;
240 }
241 }
242 if (base == 0 || base == 10) {
243 dlen = 10;
244 } else if (base == 8) {
245 dlen = 8;
246 } else {
247 PR_ASSERT(base == 16);
248 dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */
249 }
250 while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) {
251 *p++ = ch;
252 GET_IF_WITHIN_WIDTH(state, ch);
253 seenDigit = PR_TRUE;
254 }
255 if (WITHIN_WIDTH(state)) {
256 UNGET(state, ch);
257 }
258 if (!seenDigit) {
259 return PR_FAILURE;
260 }
261 *p = '\0';
262 if (state->assign) {
263 if (code == 'd' || code == 'i') {
264 if (state->sizeSpec == _PR_size_ll) {
265 PRInt64 llval = _pr_strtoull(buf, NULL, base);
266 *va_arg(state->ap, PRInt64 *) = llval;
267 } else {
268 long lval = strtol(buf, NULL, base);
269
270 if (state->sizeSpec == _PR_size_none) {
271 *va_arg(state->ap, PRIntn *) = lval;
272 } else if (state->sizeSpec == _PR_size_h) {
273 *va_arg(state->ap, PRInt16 *) = (PRInt16)lval;
274 } else if (state->sizeSpec == _PR_size_l) {
275 *va_arg(state->ap, PRInt32 *) = lval;
276 } else {
277 return PR_FAILURE;
278 }
279 }
280 } else {
281 if (state->sizeSpec == _PR_size_ll) {
282 PRUint64 llval = _pr_strtoull(buf, NULL, base);
283 *va_arg(state->ap, PRUint64 *) = llval;
284 } else {
285 unsigned long lval = strtoul(buf, NULL, base);
286
287 if (state->sizeSpec == _PR_size_none) {
288 *va_arg(state->ap, PRUintn *) = lval;
289 } else if (state->sizeSpec == _PR_size_h) {
290 *va_arg(state->ap, PRUint16 *) = (PRUint16)lval;
291 } else if (state->sizeSpec == _PR_size_l) {
292 *va_arg(state->ap, PRUint32 *) = lval;
293 } else {
294 return PR_FAILURE;
295 }
296 }
297 }
298 state->converted = PR_TRUE;
299 }
300 return PR_SUCCESS;
301 }
302
303 static PRStatus
GetFloat(ScanfState * state)304 GetFloat(ScanfState *state)
305 {
306 char buf[FMAX + 1], *p;
307 int ch = 0;
308 PRBool seenDigit = PR_FALSE;
309
310 if (state->width == 0 || state->width > FMAX) {
311 state->width = FMAX;
312 }
313 p = buf;
314 GET_IF_WITHIN_WIDTH(state, ch);
315 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
316 *p++ = ch;
317 GET_IF_WITHIN_WIDTH(state, ch);
318 }
319 while (WITHIN_WIDTH(state) && isdigit(ch)) {
320 *p++ = ch;
321 GET_IF_WITHIN_WIDTH(state, ch);
322 seenDigit = PR_TRUE;
323 }
324 if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) {
325 *p++ = ch;
326 GET_IF_WITHIN_WIDTH(state, ch);
327 while (WITHIN_WIDTH(state) && isdigit(ch)) {
328 *p++ = ch;
329 GET_IF_WITHIN_WIDTH(state, ch);
330 seenDigit = PR_TRUE;
331 }
332 }
333
334 /*
335 * This is not robust. For example, "1.2e+" would confuse
336 * the code below to read 'e' and '+', only to realize that
337 * it should have stopped at "1.2". But we can't push back
338 * more than one character, so there is nothing I can do.
339 */
340
341 /* Parse exponent */
342 if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) {
343 *p++ = ch;
344 GET_IF_WITHIN_WIDTH(state, ch);
345 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
346 *p++ = ch;
347 GET_IF_WITHIN_WIDTH(state, ch);
348 }
349 while (WITHIN_WIDTH(state) && isdigit(ch)) {
350 *p++ = ch;
351 GET_IF_WITHIN_WIDTH(state, ch);
352 }
353 }
354 if (WITHIN_WIDTH(state)) {
355 UNGET(state, ch);
356 }
357 if (!seenDigit) {
358 return PR_FAILURE;
359 }
360 *p = '\0';
361 if (state->assign) {
362 PRFloat64 dval = PR_strtod(buf, NULL);
363
364 state->converted = PR_TRUE;
365 if (state->sizeSpec == _PR_size_l) {
366 *va_arg(state->ap, PRFloat64 *) = dval;
367 } else if (state->sizeSpec == _PR_size_L) {
368 *va_arg(state->ap, long double *) = dval;
369 } else {
370 *va_arg(state->ap, float *) = (float) dval;
371 }
372 }
373 return PR_SUCCESS;
374 }
375
376 /*
377 * Convert, and return the end of the conversion spec.
378 * Return NULL on error.
379 */
380
381 static const char *
Convert(ScanfState * state,const char * fmt)382 Convert(ScanfState *state, const char *fmt)
383 {
384 const char *cPtr;
385 int ch;
386 char *cArg = NULL;
387
388 state->converted = PR_FALSE;
389 cPtr = fmt;
390 if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') {
391 do {
392 ch = GET(state);
393 } while (isspace(ch));
394 UNGET(state, ch);
395 }
396 switch (*cPtr) {
397 case 'c':
398 if (state->assign) {
399 cArg = va_arg(state->ap, char *);
400 }
401 if (state->width == 0) {
402 state->width = 1;
403 }
404 for (; state->width > 0; state->width--) {
405 ch = GET(state);
406 if (ch == EOF) {
407 return NULL;
408 }
409 if (state->assign) {
410 *cArg++ = ch;
411 }
412 }
413 if (state->assign) {
414 state->converted = PR_TRUE;
415 }
416 break;
417 case 'p':
418 case 'd': case 'i': case 'o':
419 case 'u': case 'x': case 'X':
420 if (GetInt(state, *cPtr) == PR_FAILURE) {
421 return NULL;
422 }
423 break;
424 case 'e': case 'E': case 'f':
425 case 'g': case 'G':
426 if (GetFloat(state) == PR_FAILURE) {
427 return NULL;
428 }
429 break;
430 case 'n':
431 /* do not consume any input */
432 if (state->assign) {
433 switch (state->sizeSpec) {
434 case _PR_size_none:
435 *va_arg(state->ap, PRIntn *) = state->nChar;
436 break;
437 case _PR_size_h:
438 *va_arg(state->ap, PRInt16 *) = state->nChar;
439 break;
440 case _PR_size_l:
441 *va_arg(state->ap, PRInt32 *) = state->nChar;
442 break;
443 case _PR_size_ll:
444 LL_I2L(*va_arg(state->ap, PRInt64 *), state->nChar);
445 break;
446 default:
447 PR_ASSERT(0);
448 }
449 }
450 break;
451 case 's':
452 if (state->width == 0) {
453 state->width = INT_MAX;
454 }
455 if (state->assign) {
456 cArg = va_arg(state->ap, char *);
457 }
458 for (; state->width > 0; state->width--) {
459 ch = GET(state);
460 if ((ch == EOF) || isspace(ch)) {
461 UNGET(state, ch);
462 break;
463 }
464 if (state->assign) {
465 *cArg++ = ch;
466 }
467 }
468 if (state->assign) {
469 *cArg = '\0';
470 state->converted = PR_TRUE;
471 }
472 break;
473 case '%':
474 ch = GET(state);
475 if (ch != '%') {
476 UNGET(state, ch);
477 return NULL;
478 }
479 break;
480 case '[':
481 {
482 PRBool complement = PR_FALSE;
483 const char *closeBracket;
484 size_t n;
485
486 if (*++cPtr == '^') {
487 complement = PR_TRUE;
488 cPtr++;
489 }
490 closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']');
491 if (closeBracket == NULL) {
492 return NULL;
493 }
494 n = closeBracket - cPtr;
495 if (state->width == 0) {
496 state->width = INT_MAX;
497 }
498 if (state->assign) {
499 cArg = va_arg(state->ap, char *);
500 }
501 for (; state->width > 0; state->width--) {
502 ch = GET(state);
503 if ((ch == EOF)
504 || (!complement && !memchr(cPtr, ch, n))
505 || (complement && memchr(cPtr, ch, n))) {
506 UNGET(state, ch);
507 break;
508 }
509 if (state->assign) {
510 *cArg++ = ch;
511 }
512 }
513 if (state->assign) {
514 *cArg = '\0';
515 state->converted = PR_TRUE;
516 }
517 cPtr = closeBracket;
518 }
519 break;
520 default:
521 return NULL;
522 }
523 return cPtr;
524 }
525
526 static PRInt32
DoScanf(ScanfState * state,const char * fmt)527 DoScanf(ScanfState *state, const char *fmt)
528 {
529 PRInt32 nConverted = 0;
530 const char *cPtr;
531 int ch;
532
533 state->nChar = 0;
534 cPtr = fmt;
535 while (1) {
536 if (isspace(*cPtr)) {
537 /* white space: skip */
538 do {
539 cPtr++;
540 } while (isspace(*cPtr));
541 do {
542 ch = GET(state);
543 } while (isspace(ch));
544 UNGET(state, ch);
545 } else if (*cPtr == '%') {
546 /* format spec: convert */
547 cPtr++;
548 state->assign = PR_TRUE;
549 if (*cPtr == '*') {
550 cPtr++;
551 state->assign = PR_FALSE;
552 }
553 for (state->width = 0; isdigit(*cPtr); cPtr++) {
554 state->width = state->width * 10 + *cPtr - '0';
555 }
556 state->sizeSpec = _PR_size_none;
557 if (*cPtr == 'h') {
558 cPtr++;
559 state->sizeSpec = _PR_size_h;
560 } else if (*cPtr == 'l') {
561 cPtr++;
562 if (*cPtr == 'l') {
563 cPtr++;
564 state->sizeSpec = _PR_size_ll;
565 } else {
566 state->sizeSpec = _PR_size_l;
567 }
568 } else if (*cPtr == 'L') {
569 cPtr++;
570 state->sizeSpec = _PR_size_L;
571 }
572 cPtr = Convert(state, cPtr);
573 if (cPtr == NULL) {
574 return (nConverted > 0 ? nConverted : EOF);
575 }
576 if (state->converted) {
577 nConverted++;
578 }
579 cPtr++;
580 } else {
581 /* others: must match */
582 if (*cPtr == '\0') {
583 return nConverted;
584 }
585 ch = GET(state);
586 if (ch != *cPtr) {
587 UNGET(state, ch);
588 return nConverted;
589 }
590 cPtr++;
591 }
592 }
593 }
594
595 static int
StringGetChar(void * stream)596 StringGetChar(void *stream)
597 {
598 char *cPtr = *((char **) stream);
599
600 if (*cPtr == '\0') {
601 return EOF;
602 }
603 *((char **) stream) = cPtr + 1;
604 return (unsigned char) *cPtr;
605 }
606
607 static void
StringUngetChar(void * stream,int ch)608 StringUngetChar(void *stream, int ch)
609 {
610 char *cPtr = *((char **) stream);
611
612 if (ch != EOF) {
613 *((char **) stream) = cPtr - 1;
614 }
615 }
616
617 PR_IMPLEMENT(PRInt32)
PR_sscanf(const char * buf,const char * fmt,...)618 PR_sscanf(const char *buf, const char *fmt, ...)
619 {
620 PRInt32 rv;
621 ScanfState state;
622
623 state.get = &StringGetChar;
624 state.unget = &StringUngetChar;
625 state.stream = (void *) &buf;
626 va_start(state.ap, fmt);
627 rv = DoScanf(&state, fmt);
628 va_end(state.ap);
629 return rv;
630 }
631