1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * $FreeBSD: src/sys/kern/subr_scanf.c,v 1.13 1999/11/24 01:03:01 archie Exp $ 37 * $DragonFly: src/sys/kern/subr_scanf.c,v 1.3 2003/11/09 02:22:36 dillon Exp $ 38 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 39 * From: static char sccsid[] = "@(#)strtol.c 8.1 (Berkeley) 6/4/93"; 40 * From: static char sccsid[] = "@(#)strtoul.c 8.1 (Berkeley) 6/4/93"; 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/ctype.h> 46 #include <machine/limits.h> 47 48 /* 49 * Note that stdarg.h and the ANSI style va_start macro is used for both 50 * ANSI and traditional C compilers. 51 */ 52 #include <machine/stdarg.h> 53 54 #define BUF 32 /* Maximum length of numeric string. */ 55 56 /* 57 * Flags used during conversion. 58 */ 59 #define LONG 0x01 /* l: long or double */ 60 #define SHORT 0x04 /* h: short */ 61 #define SUPPRESS 0x08 /* suppress assignment */ 62 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 63 #define NOSKIP 0x20 /* do not skip blanks */ 64 #define QUAD 0x400 65 66 /* 67 * The following are used in numeric conversions only: 68 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 69 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 70 */ 71 #define SIGNOK 0x40 /* +/- is (still) legal */ 72 #define NDIGITS 0x80 /* no digits detected */ 73 74 #define DPTOK 0x100 /* (float) decimal point is still legal */ 75 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 76 77 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 78 #define NZDIGITS 0x200 /* no zero digits detected */ 79 80 /* 81 * Conversion types. 82 */ 83 #define CT_CHAR 0 /* %c conversion */ 84 #define CT_CCL 1 /* %[...] conversion */ 85 #define CT_STRING 2 /* %s conversion */ 86 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 87 typedef u_quad_t (*ccfntype)(const char *, char **, int); 88 89 static const u_char *__sccl(char *, const u_char *); 90 91 int 92 sscanf(const char *ibuf, const char *fmt, ...) 93 { 94 __va_list ap; 95 int ret; 96 97 __va_start(ap, fmt); 98 ret = vsscanf(ibuf, fmt, ap); 99 __va_end(ap); 100 return(ret); 101 } 102 103 int 104 vsscanf(const char *inp, char const *fmt0, __va_list ap) 105 { 106 int inr; 107 const u_char *fmt = (const u_char *)fmt0; 108 int c; /* character from format, or conversion */ 109 size_t width; /* field width, or 0 */ 110 char *p; /* points into all kinds of strings */ 111 int n; /* handy integer */ 112 int flags; /* flags as defined above */ 113 char *p0; /* saves original value of p when necessary */ 114 int nassigned; /* number of fields assigned */ 115 int nconversions; /* number of conversions */ 116 int nread; /* number of characters consumed from fp */ 117 int base; /* base argument to strtoq/strtouq */ 118 ccfntype ccfn; /* conversion function (strtoq/strtouq) */ 119 char ccltab[256]; /* character class table for %[...] */ 120 char buf[BUF]; /* buffer for numeric conversions */ 121 122 /* `basefix' is used to avoid `if' tests in the integer scanner */ 123 static short basefix[17] = 124 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 125 126 inr = strlen(inp); 127 128 nassigned = 0; 129 nconversions = 0; 130 nread = 0; 131 base = 0; /* XXX just to keep gcc happy */ 132 ccfn = NULL; /* XXX just to keep gcc happy */ 133 for (;;) { 134 c = *fmt++; 135 if (c == 0) 136 return (nassigned); 137 if (isspace(c)) { 138 while (inr > 0 && isspace(*inp)) 139 nread++, inr--, inp++; 140 continue; 141 } 142 if (c != '%') 143 goto literal; 144 width = 0; 145 flags = 0; 146 /* 147 * switch on the format. continue if done; 148 * break once format type is derived. 149 */ 150 again: c = *fmt++; 151 switch (c) { 152 case '%': 153 literal: 154 if (inr <= 0) 155 goto input_failure; 156 if (*inp != c) 157 goto match_failure; 158 inr--, inp++; 159 nread++; 160 continue; 161 162 case '*': 163 flags |= SUPPRESS; 164 goto again; 165 case 'l': 166 flags |= LONG; 167 goto again; 168 case 'q': 169 flags |= QUAD; 170 goto again; 171 case 'h': 172 flags |= SHORT; 173 goto again; 174 175 case '0': case '1': case '2': case '3': case '4': 176 case '5': case '6': case '7': case '8': case '9': 177 width = width * 10 + c - '0'; 178 goto again; 179 180 /* 181 * Conversions. 182 * 183 */ 184 case 'd': 185 c = CT_INT; 186 ccfn = (ccfntype)strtoq; 187 base = 10; 188 break; 189 190 case 'i': 191 c = CT_INT; 192 ccfn = (ccfntype)strtoq; 193 base = 0; 194 break; 195 196 case 'o': 197 c = CT_INT; 198 ccfn = strtouq; 199 base = 8; 200 break; 201 202 case 'u': 203 c = CT_INT; 204 ccfn = strtouq; 205 base = 10; 206 break; 207 208 case 'x': 209 flags |= PFXOK; /* enable 0x prefixing */ 210 c = CT_INT; 211 ccfn = strtouq; 212 base = 16; 213 break; 214 215 case 's': 216 c = CT_STRING; 217 break; 218 219 case '[': 220 fmt = __sccl(ccltab, fmt); 221 flags |= NOSKIP; 222 c = CT_CCL; 223 break; 224 225 case 'c': 226 flags |= NOSKIP; 227 c = CT_CHAR; 228 break; 229 230 case 'p': /* pointer format is like hex */ 231 flags |= POINTER | PFXOK; 232 c = CT_INT; 233 ccfn = strtouq; 234 base = 16; 235 break; 236 237 case 'n': 238 nconversions++; 239 if (flags & SUPPRESS) /* ??? */ 240 continue; 241 if (flags & SHORT) 242 *__va_arg(ap, short *) = nread; 243 else if (flags & LONG) 244 *__va_arg(ap, long *) = nread; 245 else if (flags & QUAD) 246 *__va_arg(ap, quad_t *) = nread; 247 else 248 *__va_arg(ap, int *) = nread; 249 continue; 250 } 251 252 /* 253 * We have a conversion that requires input. 254 */ 255 if (inr <= 0) 256 goto input_failure; 257 258 /* 259 * Consume leading white space, except for formats 260 * that suppress this. 261 */ 262 if ((flags & NOSKIP) == 0) { 263 while (isspace(*inp)) { 264 nread++; 265 if (--inr > 0) 266 inp++; 267 else 268 goto input_failure; 269 } 270 /* 271 * Note that there is at least one character in 272 * the buffer, so conversions that do not set NOSKIP 273 * can no longer result in an input failure. 274 */ 275 } 276 277 /* 278 * Do the conversion. 279 */ 280 switch (c) { 281 282 case CT_CHAR: 283 /* scan arbitrary characters (sets NOSKIP) */ 284 if (width == 0) 285 width = 1; 286 if (flags & SUPPRESS) { 287 size_t sum = 0; 288 for (;;) { 289 if ((n = inr) < width) { 290 sum += n; 291 width -= n; 292 inp += n; 293 if (sum == 0) 294 goto input_failure; 295 break; 296 } else { 297 sum += width; 298 inr -= width; 299 inp += width; 300 break; 301 } 302 } 303 nread += sum; 304 } else { 305 bcopy(inp, __va_arg(ap, char *), width); 306 inr -= width; 307 inp += width; 308 nread += width; 309 nassigned++; 310 } 311 nconversions++; 312 break; 313 314 case CT_CCL: 315 /* scan a (nonempty) character class (sets NOSKIP) */ 316 if (width == 0) 317 width = (size_t)~0; /* `infinity' */ 318 /* take only those things in the class */ 319 if (flags & SUPPRESS) { 320 n = 0; 321 while (ccltab[(unsigned char)*inp]) { 322 n++, inr--, inp++; 323 if (--width == 0) 324 break; 325 if (inr <= 0) { 326 if (n == 0) 327 goto input_failure; 328 break; 329 } 330 } 331 if (n == 0) 332 goto match_failure; 333 } else { 334 p0 = p = __va_arg(ap, char *); 335 while (ccltab[(unsigned char)*inp]) { 336 inr--; 337 *p++ = *inp++; 338 if (--width == 0) 339 break; 340 if (inr <= 0) { 341 if (p == p0) 342 goto input_failure; 343 break; 344 } 345 } 346 n = p - p0; 347 if (n == 0) 348 goto match_failure; 349 *p = 0; 350 nassigned++; 351 } 352 nread += n; 353 nconversions++; 354 break; 355 356 case CT_STRING: 357 /* like CCL, but zero-length string OK, & no NOSKIP */ 358 if (width == 0) 359 width = (size_t)~0; 360 if (flags & SUPPRESS) { 361 n = 0; 362 while (!isspace(*inp)) { 363 n++, inr--, inp++; 364 if (--width == 0) 365 break; 366 if (inr <= 0) 367 break; 368 } 369 nread += n; 370 } else { 371 p0 = p = __va_arg(ap, char *); 372 while (!isspace(*inp)) { 373 inr--; 374 *p++ = *inp++; 375 if (--width == 0) 376 break; 377 if (inr <= 0) 378 break; 379 } 380 *p = 0; 381 nread += p - p0; 382 nassigned++; 383 } 384 nconversions++; 385 continue; 386 387 case CT_INT: 388 /* scan an integer as if by strtoq/strtouq */ 389 #ifdef hardway 390 if (width == 0 || width > sizeof(buf) - 1) 391 width = sizeof(buf) - 1; 392 #else 393 /* size_t is unsigned, hence this optimisation */ 394 if (--width > sizeof(buf) - 2) 395 width = sizeof(buf) - 2; 396 width++; 397 #endif 398 flags |= SIGNOK | NDIGITS | NZDIGITS; 399 for (p = buf; width; width--) { 400 c = *inp; 401 /* 402 * Switch on the character; `goto ok' 403 * if we accept it as a part of number. 404 */ 405 switch (c) { 406 407 /* 408 * The digit 0 is always legal, but is 409 * special. For %i conversions, if no 410 * digits (zero or nonzero) have been 411 * scanned (only signs), we will have 412 * base==0. In that case, we should set 413 * it to 8 and enable 0x prefixing. 414 * Also, if we have not scanned zero digits 415 * before this, do not turn off prefixing 416 * (someone else will turn it off if we 417 * have scanned any nonzero digits). 418 */ 419 case '0': 420 if (base == 0) { 421 base = 8; 422 flags |= PFXOK; 423 } 424 if (flags & NZDIGITS) 425 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 426 else 427 flags &= ~(SIGNOK|PFXOK|NDIGITS); 428 goto ok; 429 430 /* 1 through 7 always legal */ 431 case '1': case '2': case '3': 432 case '4': case '5': case '6': case '7': 433 base = basefix[base]; 434 flags &= ~(SIGNOK | PFXOK | NDIGITS); 435 goto ok; 436 437 /* digits 8 and 9 ok iff decimal or hex */ 438 case '8': case '9': 439 base = basefix[base]; 440 if (base <= 8) 441 break; /* not legal here */ 442 flags &= ~(SIGNOK | PFXOK | NDIGITS); 443 goto ok; 444 445 /* letters ok iff hex */ 446 case 'A': case 'B': case 'C': 447 case 'D': case 'E': case 'F': 448 case 'a': case 'b': case 'c': 449 case 'd': case 'e': case 'f': 450 /* no need to fix base here */ 451 if (base <= 10) 452 break; /* not legal here */ 453 flags &= ~(SIGNOK | PFXOK | NDIGITS); 454 goto ok; 455 456 /* sign ok only as first character */ 457 case '+': case '-': 458 if (flags & SIGNOK) { 459 flags &= ~SIGNOK; 460 goto ok; 461 } 462 break; 463 464 /* x ok iff flag still set & 2nd char */ 465 case 'x': case 'X': 466 if (flags & PFXOK && p == buf + 1) { 467 base = 16; /* if %i */ 468 flags &= ~PFXOK; 469 goto ok; 470 } 471 break; 472 } 473 474 /* 475 * If we got here, c is not a legal character 476 * for a number. Stop accumulating digits. 477 */ 478 break; 479 ok: 480 /* 481 * c is legal: store it and look at the next. 482 */ 483 *p++ = c; 484 if (--inr > 0) 485 inp++; 486 else 487 break; /* end of input */ 488 } 489 /* 490 * If we had only a sign, it is no good; push 491 * back the sign. If the number ends in `x', 492 * it was [sign] '0' 'x', so push back the x 493 * and treat it as [sign] '0'. 494 */ 495 if (flags & NDIGITS) { 496 if (p > buf) { 497 inp--; 498 inr++; 499 } 500 goto match_failure; 501 } 502 c = ((u_char *)p)[-1]; 503 if (c == 'x' || c == 'X') { 504 --p; 505 inp--; 506 inr++; 507 } 508 if ((flags & SUPPRESS) == 0) { 509 u_quad_t res; 510 511 *p = 0; 512 res = (*ccfn)(buf, (char **)NULL, base); 513 if (flags & POINTER) 514 *__va_arg(ap, void **) = 515 (void *)(uintptr_t)res; 516 else if (flags & SHORT) 517 *__va_arg(ap, short *) = res; 518 else if (flags & LONG) 519 *__va_arg(ap, long *) = res; 520 else if (flags & QUAD) 521 *__va_arg(ap, quad_t *) = res; 522 else 523 *__va_arg(ap, int *) = res; 524 nassigned++; 525 } 526 nread += p - buf; 527 nconversions++; 528 break; 529 530 } 531 } 532 input_failure: 533 return (nconversions != 0 ? nassigned : -1); 534 match_failure: 535 return (nassigned); 536 } 537 538 /* 539 * Fill in the given table from the scanset at the given format 540 * (just after `['). Return a pointer to the character past the 541 * closing `]'. The table has a 1 wherever characters should be 542 * considered part of the scanset. 543 */ 544 static const u_char * 545 __sccl(char *tab, const u_char *fmt) 546 { 547 int c, n, v; 548 549 /* first `clear' the whole table */ 550 c = *fmt++; /* first char hat => negated scanset */ 551 if (c == '^') { 552 v = 1; /* default => accept */ 553 c = *fmt++; /* get new first char */ 554 } else 555 v = 0; /* default => reject */ 556 557 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 558 for (n = 0; n < 256; n++) 559 tab[n] = v; /* memset(tab, v, 256) */ 560 561 if (c == 0) 562 return (fmt - 1);/* format ended before closing ] */ 563 564 /* 565 * Now set the entries corresponding to the actual scanset 566 * to the opposite of the above. 567 * 568 * The first character may be ']' (or '-') without being special; 569 * the last character may be '-'. 570 */ 571 v = 1 - v; 572 for (;;) { 573 tab[c] = v; /* take character c */ 574 doswitch: 575 n = *fmt++; /* and examine the next */ 576 switch (n) { 577 578 case 0: /* format ended too soon */ 579 return (fmt - 1); 580 581 case '-': 582 /* 583 * A scanset of the form 584 * [01+-] 585 * is defined as `the digit 0, the digit 1, 586 * the character +, the character -', but 587 * the effect of a scanset such as 588 * [a-zA-Z0-9] 589 * is implementation defined. The V7 Unix 590 * scanf treats `a-z' as `the letters a through 591 * z', but treats `a-a' as `the letter a, the 592 * character -, and the letter a'. 593 * 594 * For compatibility, the `-' is not considerd 595 * to define a range if the character following 596 * it is either a close bracket (required by ANSI) 597 * or is not numerically greater than the character 598 * we just stored in the table (c). 599 */ 600 n = *fmt; 601 if (n == ']' || n < c) { 602 c = '-'; 603 break; /* resume the for(;;) */ 604 } 605 fmt++; 606 /* fill in the range */ 607 do { 608 tab[++c] = v; 609 } while (c < n); 610 c = n; 611 /* 612 * Alas, the V7 Unix scanf also treats formats 613 * such as [a-c-e] as `the letters a through e'. 614 * This too is permitted by the standard.... 615 */ 616 goto doswitch; 617 break; 618 619 case ']': /* end of scanset */ 620 return (fmt); 621 622 default: /* just another character */ 623 c = n; 624 break; 625 } 626 } 627 /* NOTREACHED */ 628 } 629 630