1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * $FreeBSD: src/sys/kern/subr_scanf.c,v 1.13 1999/11/24 01:03:01 archie Exp $ 33 * $DragonFly: src/sys/kern/subr_scanf.c,v 1.4 2006/12/13 21:58:50 dillon Exp $ 34 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 35 * From: static char sccsid[] = "@(#)strtol.c 8.1 (Berkeley) 6/4/93"; 36 * From: static char sccsid[] = "@(#)strtoul.c 8.1 (Berkeley) 6/4/93"; 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/ctype.h> 42 #include <machine/limits.h> 43 44 /* 45 * Note that stdarg.h and the ANSI style va_start macro is used for both 46 * ANSI and traditional C compilers. 47 */ 48 #include <machine/stdarg.h> 49 50 #define BUF 32 /* Maximum length of numeric string. */ 51 52 /* 53 * Flags used during conversion. 54 */ 55 #define LONG 0x01 /* l: long or double */ 56 #define SHORT 0x04 /* h: short */ 57 #define SUPPRESS 0x08 /* suppress assignment */ 58 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 59 #define NOSKIP 0x20 /* do not skip blanks */ 60 #define QUAD 0x400 61 62 /* 63 * The following are used in numeric conversions only: 64 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 65 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 66 */ 67 #define SIGNOK 0x40 /* +/- is (still) legal */ 68 #define NDIGITS 0x80 /* no digits detected */ 69 70 #define DPTOK 0x100 /* (float) decimal point is still legal */ 71 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 72 73 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 74 #define NZDIGITS 0x200 /* no zero digits detected */ 75 76 /* 77 * Conversion types. 78 */ 79 #define CT_CHAR 0 /* %c conversion */ 80 #define CT_CCL 1 /* %[...] conversion */ 81 #define CT_STRING 2 /* %s conversion */ 82 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 83 typedef u_quad_t (*ccfntype)(const char *, char **, int); 84 85 static const u_char *__sccl(char *, const u_char *); 86 87 int 88 ksscanf(const char *ibuf, const char *fmt, ...) 89 { 90 __va_list ap; 91 int ret; 92 93 __va_start(ap, fmt); 94 ret = kvsscanf(ibuf, fmt, ap); 95 __va_end(ap); 96 return(ret); 97 } 98 99 int 100 kvsscanf(const char *inp, char const *fmt0, __va_list ap) 101 { 102 int inr; 103 const u_char *fmt = (const u_char *)fmt0; 104 int c; /* character from format, or conversion */ 105 size_t width; /* field width, or 0 */ 106 char *p; /* points into all kinds of strings */ 107 int n; /* handy integer */ 108 int flags; /* flags as defined above */ 109 char *p0; /* saves original value of p when necessary */ 110 int nassigned; /* number of fields assigned */ 111 int nconversions; /* number of conversions */ 112 int nread; /* number of characters consumed from fp */ 113 int base; /* base argument to strtoq/strtouq */ 114 ccfntype ccfn; /* conversion function (strtoq/strtouq) */ 115 char ccltab[256]; /* character class table for %[...] */ 116 char buf[BUF]; /* buffer for numeric conversions */ 117 118 /* `basefix' is used to avoid `if' tests in the integer scanner */ 119 static short basefix[17] = 120 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 121 122 inr = strlen(inp); 123 124 nassigned = 0; 125 nconversions = 0; 126 nread = 0; 127 base = 0; /* XXX just to keep gcc happy */ 128 ccfn = NULL; /* XXX just to keep gcc happy */ 129 for (;;) { 130 c = *fmt++; 131 if (c == 0) 132 return (nassigned); 133 if (isspace(c)) { 134 while (inr > 0 && isspace(*inp)) 135 nread++, inr--, inp++; 136 continue; 137 } 138 if (c != '%') 139 goto literal; 140 width = 0; 141 flags = 0; 142 /* 143 * switch on the format. continue if done; 144 * break once format type is derived. 145 */ 146 again: c = *fmt++; 147 switch (c) { 148 case '%': 149 literal: 150 if (inr <= 0) 151 goto input_failure; 152 if (*inp != c) 153 goto match_failure; 154 inr--, inp++; 155 nread++; 156 continue; 157 158 case '*': 159 flags |= SUPPRESS; 160 goto again; 161 case 'l': 162 flags |= LONG; 163 goto again; 164 case 'q': 165 flags |= QUAD; 166 goto again; 167 case 'h': 168 flags |= SHORT; 169 goto again; 170 171 case '0': case '1': case '2': case '3': case '4': 172 case '5': case '6': case '7': case '8': case '9': 173 width = width * 10 + c - '0'; 174 goto again; 175 176 /* 177 * Conversions. 178 * 179 */ 180 case 'd': 181 c = CT_INT; 182 ccfn = (ccfntype)strtoq; 183 base = 10; 184 break; 185 186 case 'i': 187 c = CT_INT; 188 ccfn = (ccfntype)strtoq; 189 base = 0; 190 break; 191 192 case 'o': 193 c = CT_INT; 194 ccfn = strtouq; 195 base = 8; 196 break; 197 198 case 'u': 199 c = CT_INT; 200 ccfn = strtouq; 201 base = 10; 202 break; 203 204 case 'x': 205 flags |= PFXOK; /* enable 0x prefixing */ 206 c = CT_INT; 207 ccfn = strtouq; 208 base = 16; 209 break; 210 211 case 's': 212 c = CT_STRING; 213 break; 214 215 case '[': 216 fmt = __sccl(ccltab, fmt); 217 flags |= NOSKIP; 218 c = CT_CCL; 219 break; 220 221 case 'c': 222 flags |= NOSKIP; 223 c = CT_CHAR; 224 break; 225 226 case 'p': /* pointer format is like hex */ 227 flags |= POINTER | PFXOK; 228 c = CT_INT; 229 ccfn = strtouq; 230 base = 16; 231 break; 232 233 case 'n': 234 nconversions++; 235 if (flags & SUPPRESS) /* ??? */ 236 continue; 237 if (flags & SHORT) 238 *__va_arg(ap, short *) = nread; 239 else if (flags & LONG) 240 *__va_arg(ap, long *) = nread; 241 else if (flags & QUAD) 242 *__va_arg(ap, quad_t *) = nread; 243 else 244 *__va_arg(ap, int *) = nread; 245 continue; 246 } 247 248 /* 249 * We have a conversion that requires input. 250 */ 251 if (inr <= 0) 252 goto input_failure; 253 254 /* 255 * Consume leading white space, except for formats 256 * that suppress this. 257 */ 258 if ((flags & NOSKIP) == 0) { 259 while (isspace(*inp)) { 260 nread++; 261 if (--inr > 0) 262 inp++; 263 else 264 goto input_failure; 265 } 266 /* 267 * Note that there is at least one character in 268 * the buffer, so conversions that do not set NOSKIP 269 * can no longer result in an input failure. 270 */ 271 } 272 273 /* 274 * Do the conversion. 275 */ 276 switch (c) { 277 278 case CT_CHAR: 279 /* scan arbitrary characters (sets NOSKIP) */ 280 if (width == 0) 281 width = 1; 282 if (flags & SUPPRESS) { 283 size_t sum = 0; 284 for (;;) { 285 if ((n = inr) < width) { 286 sum += n; 287 width -= n; 288 inp += n; 289 if (sum == 0) 290 goto input_failure; 291 break; 292 } else { 293 sum += width; 294 inr -= width; 295 inp += width; 296 break; 297 } 298 } 299 nread += sum; 300 } else { 301 bcopy(inp, __va_arg(ap, char *), width); 302 inr -= width; 303 inp += width; 304 nread += width; 305 nassigned++; 306 } 307 nconversions++; 308 break; 309 310 case CT_CCL: 311 /* scan a (nonempty) character class (sets NOSKIP) */ 312 if (width == 0) 313 width = (size_t)~0; /* `infinity' */ 314 /* take only those things in the class */ 315 if (flags & SUPPRESS) { 316 n = 0; 317 while (ccltab[(unsigned char)*inp]) { 318 n++, inr--, inp++; 319 if (--width == 0) 320 break; 321 if (inr <= 0) { 322 if (n == 0) 323 goto input_failure; 324 break; 325 } 326 } 327 if (n == 0) 328 goto match_failure; 329 } else { 330 p0 = p = __va_arg(ap, char *); 331 while (ccltab[(unsigned char)*inp]) { 332 inr--; 333 *p++ = *inp++; 334 if (--width == 0) 335 break; 336 if (inr <= 0) { 337 if (p == p0) 338 goto input_failure; 339 break; 340 } 341 } 342 n = p - p0; 343 if (n == 0) 344 goto match_failure; 345 *p = 0; 346 nassigned++; 347 } 348 nread += n; 349 nconversions++; 350 break; 351 352 case CT_STRING: 353 /* like CCL, but zero-length string OK, & no NOSKIP */ 354 if (width == 0) 355 width = (size_t)~0; 356 if (flags & SUPPRESS) { 357 n = 0; 358 while (!isspace(*inp)) { 359 n++, inr--, inp++; 360 if (--width == 0) 361 break; 362 if (inr <= 0) 363 break; 364 } 365 nread += n; 366 } else { 367 p0 = p = __va_arg(ap, char *); 368 while (!isspace(*inp)) { 369 inr--; 370 *p++ = *inp++; 371 if (--width == 0) 372 break; 373 if (inr <= 0) 374 break; 375 } 376 *p = 0; 377 nread += p - p0; 378 nassigned++; 379 } 380 nconversions++; 381 continue; 382 383 case CT_INT: 384 /* scan an integer as if by strtoq/strtouq */ 385 #ifdef hardway 386 if (width == 0 || width > sizeof(buf) - 1) 387 width = sizeof(buf) - 1; 388 #else 389 /* size_t is unsigned, hence this optimisation */ 390 if (--width > sizeof(buf) - 2) 391 width = sizeof(buf) - 2; 392 width++; 393 #endif 394 flags |= SIGNOK | NDIGITS | NZDIGITS; 395 for (p = buf; width; width--) { 396 c = *inp; 397 /* 398 * Switch on the character; `goto ok' 399 * if we accept it as a part of number. 400 */ 401 switch (c) { 402 403 /* 404 * The digit 0 is always legal, but is 405 * special. For %i conversions, if no 406 * digits (zero or nonzero) have been 407 * scanned (only signs), we will have 408 * base==0. In that case, we should set 409 * it to 8 and enable 0x prefixing. 410 * Also, if we have not scanned zero digits 411 * before this, do not turn off prefixing 412 * (someone else will turn it off if we 413 * have scanned any nonzero digits). 414 */ 415 case '0': 416 if (base == 0) { 417 base = 8; 418 flags |= PFXOK; 419 } 420 if (flags & NZDIGITS) 421 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 422 else 423 flags &= ~(SIGNOK|PFXOK|NDIGITS); 424 goto ok; 425 426 /* 1 through 7 always legal */ 427 case '1': case '2': case '3': 428 case '4': case '5': case '6': case '7': 429 base = basefix[base]; 430 flags &= ~(SIGNOK | PFXOK | NDIGITS); 431 goto ok; 432 433 /* digits 8 and 9 ok iff decimal or hex */ 434 case '8': case '9': 435 base = basefix[base]; 436 if (base <= 8) 437 break; /* not legal here */ 438 flags &= ~(SIGNOK | PFXOK | NDIGITS); 439 goto ok; 440 441 /* letters ok iff hex */ 442 case 'A': case 'B': case 'C': 443 case 'D': case 'E': case 'F': 444 case 'a': case 'b': case 'c': 445 case 'd': case 'e': case 'f': 446 /* no need to fix base here */ 447 if (base <= 10) 448 break; /* not legal here */ 449 flags &= ~(SIGNOK | PFXOK | NDIGITS); 450 goto ok; 451 452 /* sign ok only as first character */ 453 case '+': case '-': 454 if (flags & SIGNOK) { 455 flags &= ~SIGNOK; 456 goto ok; 457 } 458 break; 459 460 /* x ok iff flag still set & 2nd char */ 461 case 'x': case 'X': 462 if (flags & PFXOK && p == buf + 1) { 463 base = 16; /* if %i */ 464 flags &= ~PFXOK; 465 goto ok; 466 } 467 break; 468 } 469 470 /* 471 * If we got here, c is not a legal character 472 * for a number. Stop accumulating digits. 473 */ 474 break; 475 ok: 476 /* 477 * c is legal: store it and look at the next. 478 */ 479 *p++ = c; 480 if (--inr > 0) 481 inp++; 482 else 483 break; /* end of input */ 484 } 485 /* 486 * If we had only a sign, it is no good; push 487 * back the sign. If the number ends in `x', 488 * it was [sign] '0' 'x', so push back the x 489 * and treat it as [sign] '0'. 490 */ 491 if (flags & NDIGITS) { 492 if (p > buf) { 493 inp--; 494 inr++; 495 } 496 goto match_failure; 497 } 498 c = ((u_char *)p)[-1]; 499 if (c == 'x' || c == 'X') { 500 --p; 501 inp--; 502 inr++; 503 } 504 if ((flags & SUPPRESS) == 0) { 505 u_quad_t res; 506 507 *p = 0; 508 res = (*ccfn)(buf, NULL, base); 509 if (flags & POINTER) 510 *__va_arg(ap, void **) = 511 (void *)(uintptr_t)res; 512 else if (flags & SHORT) 513 *__va_arg(ap, short *) = res; 514 else if (flags & LONG) 515 *__va_arg(ap, long *) = res; 516 else if (flags & QUAD) 517 *__va_arg(ap, quad_t *) = res; 518 else 519 *__va_arg(ap, int *) = res; 520 nassigned++; 521 } 522 nread += p - buf; 523 nconversions++; 524 break; 525 526 } 527 } 528 input_failure: 529 return (nconversions != 0 ? nassigned : -1); 530 match_failure: 531 return (nassigned); 532 } 533 534 /* 535 * Fill in the given table from the scanset at the given format 536 * (just after `['). Return a pointer to the character past the 537 * closing `]'. The table has a 1 wherever characters should be 538 * considered part of the scanset. 539 */ 540 static const u_char * 541 __sccl(char *tab, const u_char *fmt) 542 { 543 int c, n, v; 544 545 /* first `clear' the whole table */ 546 c = *fmt++; /* first char hat => negated scanset */ 547 if (c == '^') { 548 v = 1; /* default => accept */ 549 c = *fmt++; /* get new first char */ 550 } else 551 v = 0; /* default => reject */ 552 553 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 554 for (n = 0; n < 256; n++) 555 tab[n] = v; /* memset(tab, v, 256) */ 556 557 if (c == 0) 558 return (fmt - 1);/* format ended before closing ] */ 559 560 /* 561 * Now set the entries corresponding to the actual scanset 562 * to the opposite of the above. 563 * 564 * The first character may be ']' (or '-') without being special; 565 * the last character may be '-'. 566 */ 567 v = 1 - v; 568 for (;;) { 569 tab[c] = v; /* take character c */ 570 doswitch: 571 n = *fmt++; /* and examine the next */ 572 switch (n) { 573 574 case 0: /* format ended too soon */ 575 return (fmt - 1); 576 577 case '-': 578 /* 579 * A scanset of the form 580 * [01+-] 581 * is defined as `the digit 0, the digit 1, 582 * the character +, the character -', but 583 * the effect of a scanset such as 584 * [a-zA-Z0-9] 585 * is implementation defined. The V7 Unix 586 * scanf treats `a-z' as `the letters a through 587 * z', but treats `a-a' as `the letter a, the 588 * character -, and the letter a'. 589 * 590 * For compatibility, the `-' is not considerd 591 * to define a range if the character following 592 * it is either a close bracket (required by ANSI) 593 * or is not numerically greater than the character 594 * we just stored in the table (c). 595 */ 596 n = *fmt; 597 if (n == ']' || n < c) { 598 c = '-'; 599 break; /* resume the for(;;) */ 600 } 601 fmt++; 602 /* fill in the range */ 603 do { 604 tab[++c] = v; 605 } while (c < n); 606 c = n; 607 /* 608 * Alas, the V7 Unix scanf also treats formats 609 * such as [a-c-e] as `the letters a through e'. 610 * This too is permitted by the standard.... 611 */ 612 goto doswitch; 613 break; 614 615 case ']': /* end of scanset */ 616 return (fmt); 617 618 default: /* just another character */ 619 c = n; 620 break; 621 } 622 } 623 /* NOTREACHED */ 624 } 625 626