xref: /minix/lib/libc/stdio/vfscanf.c (revision ebfedea0)
1 /*	$NetBSD: vfscanf.c,v 1.45 2013/05/17 12:55:57 joerg Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Torek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 #if defined(LIBC_SCCS) && !defined(lint)
37 #if 0
38 static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
39 __FBSDID("$FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.41 2007/01/09 00:28:07 imp Exp $");
40 #else
41 __RCSID("$NetBSD: vfscanf.c,v 1.45 2013/05/17 12:55:57 joerg Exp $");
42 #endif
43 #endif /* LIBC_SCCS and not lint */
44 
45 #include "namespace.h"
46 #include <assert.h>
47 #include <ctype.h>
48 #include <inttypes.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <stddef.h>
52 #include <stdarg.h>
53 #include <string.h>
54 #include <wchar.h>
55 #include <wctype.h>
56 
57 #include "reentrant.h"
58 #include "local.h"
59 
60 #include <locale.h>
61 #include "setlocale_local.h"
62 
63 /*
64  * Provide an external name for vfscanf.  Note, we don't use the normal
65  * namespace.h method; stdio routines explicitly use the internal name
66  * __svfscanf.
67  */
68 #ifdef __weak_alias
69 __weak_alias(vfscanf,__svfscanf)
70 __weak_alias(vfscanf_l,__svfscanf_l)
71 #endif
72 
73 #define	BUF		513	/* Maximum length of numeric string. */
74 
75 /*
76  * Flags used during conversion.
77  */
78 #define	LONG		0x0001	/* l: long or double */
79 #define	LONGDBL		0x0002	/* L: long double */
80 #define	SHORT		0x0004	/* h: short */
81 #define	SUPPRESS	0x0008	/* *: suppress assignment */
82 #define	POINTER		0x0010	/* p: void * (as hex) */
83 #define	NOSKIP		0x0020	/* [ or c: do not skip blanks */
84 #define	LONGLONG	0x0400	/* ll: long long (+ deprecated q: quad) */
85 #define	INTMAXT		0x0800	/* j: intmax_t */
86 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
87 #define	SIZET		0x2000	/* z: size_t */
88 #define	SHORTSHORT	0x4000	/* hh: char */
89 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
90 
91 /*
92  * The following are used in integral conversions only:
93  * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
94  */
95 #define	SIGNOK		0x00040	/* +/- is (still) legal */
96 #define	NDIGITS		0x00080	/* no digits detected */
97 #define	PFXOK		0x00100	/* 0x prefix is (still) legal */
98 #define	NZDIGITS	0x00200	/* no zero digits detected */
99 #define	HAVESIGN	0x10000	/* sign detected */
100 
101 /*
102  * Conversion types.
103  */
104 #define	CT_CHAR		0	/* %c conversion */
105 #define	CT_CCL		1	/* %[...] conversion */
106 #define	CT_STRING	2	/* %s conversion */
107 #define	CT_INT		3	/* %[dioupxX] conversion */
108 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
109 
110 static const u_char *__sccl(char *, const u_char *, locale_t);
111 #ifndef NO_FLOATING_POINT
112 static size_t parsefloat(FILE *, char *, char *, locale_t);
113 #endif
114 
115 int __scanfdebug = 0;
116 
117 #define __collate_load_error /*CONSTCOND*/0
118 static int
119 __collate_range_cmp(int c1, int c2, locale_t loc)
120 {
121 	static char s1[2], s2[2];
122 
123 	s1[0] = c1;
124 	s2[0] = c2;
125 	return strcoll_l(s1, s2, loc);
126 }
127 
128 
129 /*
130  * __svfscanf - MT-safe version
131  */
132 int
133 __svfscanf(FILE *fp, char const *fmt0, va_list ap)
134 {
135 	return __svfscanf_l(fp, _current_locale(), fmt0, ap);
136 }
137 
138 int
139 __svfscanf_l(FILE *fp, locale_t loc, char const *fmt0, va_list ap)
140 {
141 	int ret;
142 
143 	FLOCKFILE(fp);
144 	ret = __svfscanf_unlocked_l(fp, loc, fmt0, ap);
145 	FUNLOCKFILE(fp);
146 	return ret;
147 }
148 
149 #define SCANF_SKIP_SPACE() \
150 do { \
151 	while ((fp->_r > 0 || __srefill(fp) == 0) && isspace_l(*fp->_p, loc)) \
152 		nread++, fp->_r--, fp->_p++; \
153 } while (/*CONSTCOND*/ 0)
154 
155 /*
156  * __svfscanf_unlocked - non-MT-safe version of __svfscanf
157  */
158 int
159 __svfscanf_unlocked_l(FILE *fp, locale_t loc, const char *fmt0, va_list ap)
160 {
161 	const u_char *fmt = (const u_char *)fmt0;
162 	int c;			/* character from format, or conversion */
163 	size_t width;		/* field width, or 0 */
164 	char *p;		/* points into all kinds of strings */
165 	size_t n;		/* handy size_t */
166 	int flags;		/* flags as defined above */
167 	char *p0;		/* saves original value of p when necessary */
168 	int nassigned;		/* number of fields assigned */
169 	int nconversions;	/* number of conversions */
170 	size_t nread;		/* number of characters consumed from fp */
171 	int base;		/* base argument to conversion function */
172 	char ccltab[256];	/* character class table for %[...] */
173 	char buf[BUF];		/* buffer for numeric and mb conversions */
174 	wchar_t *wcp;		/* handy wide-character pointer */
175 	size_t nconv;		/* length of multibyte sequence converted */
176 	static const mbstate_t initial;
177 	mbstate_t mbs;
178 
179 	/* `basefix' is used to avoid `if' tests in the integer scanner */
180 	static const short basefix[17] =
181 		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
182 
183 	_DIAGASSERT(fp != NULL);
184 	_DIAGASSERT(fmt0 != NULL);
185 
186 	_SET_ORIENTATION(fp, -1);
187 
188 	nassigned = 0;
189 	nconversions = 0;
190 	nread = 0;
191 	base = 0;
192 	for (;;) {
193 		c = (unsigned char)*fmt++;
194 		if (c == 0)
195 			return nassigned;
196 		if (isspace_l(c, loc)) {
197 			while ((fp->_r > 0 || __srefill(fp) == 0) &&
198 			    isspace_l(*fp->_p, loc))
199 				nread++, fp->_r--, fp->_p++;
200 			continue;
201 		}
202 		if (c != '%')
203 			goto literal;
204 		width = 0;
205 		flags = 0;
206 		/*
207 		 * switch on the format.  continue if done;
208 		 * break once format type is derived.
209 		 */
210 again:		c = *fmt++;
211 		switch (c) {
212 		case '%':
213 			SCANF_SKIP_SPACE();
214 literal:
215 			if (fp->_r <= 0 && __srefill(fp))
216 				goto input_failure;
217 			if (*fp->_p != c)
218 				goto match_failure;
219 			fp->_r--, fp->_p++;
220 			nread++;
221 			continue;
222 
223 		case '*':
224 			flags |= SUPPRESS;
225 			goto again;
226 		case 'j':
227 			flags |= INTMAXT;
228 			goto again;
229 		case 'l':
230 			if (flags & LONG) {
231 				flags &= ~LONG;
232 				flags |= LONGLONG;
233 			} else
234 				flags |= LONG;
235 			goto again;
236 		case 'q':
237 			flags |= LONGLONG;	/* not quite */
238 			goto again;
239 		case 't':
240 			flags |= PTRDIFFT;
241 			goto again;
242 		case 'z':
243 			flags |= SIZET;
244 			goto again;
245 		case 'L':
246 			flags |= LONGDBL;
247 			goto again;
248 		case 'h':
249 			if (flags & SHORT) {
250 				flags &= ~SHORT;
251 				flags |= SHORTSHORT;
252 			} else
253 				flags |= SHORT;
254 			goto again;
255 
256 		case '0': case '1': case '2': case '3': case '4':
257 		case '5': case '6': case '7': case '8': case '9':
258 			width = width * 10 + c - '0';
259 			goto again;
260 
261 		/*
262 		 * Conversions.
263 		 */
264 		case 'd':
265 			c = CT_INT;
266 			base = 10;
267 			break;
268 
269 		case 'i':
270 			c = CT_INT;
271 			base = 0;
272 			break;
273 
274 		case 'o':
275 			c = CT_INT;
276 			flags |= UNSIGNED;
277 			base = 8;
278 			break;
279 
280 		case 'u':
281 			c = CT_INT;
282 			flags |= UNSIGNED;
283 			base = 10;
284 			break;
285 
286 		case 'X':
287 		case 'x':
288 			flags |= PFXOK;	/* enable 0x prefixing */
289 			c = CT_INT;
290 			flags |= UNSIGNED;
291 			base = 16;
292 			break;
293 
294 #ifndef NO_FLOATING_POINT
295 		case 'A': case 'E': case 'F': case 'G':
296 		case 'a': case 'e': case 'f': case 'g':
297 			c = CT_FLOAT;
298 			break;
299 #endif
300 
301 		case 'S':
302 			flags |= LONG;
303 			/* FALLTHROUGH */
304 		case 's':
305 			c = CT_STRING;
306 			break;
307 
308 		case '[':
309 			fmt = __sccl(ccltab, fmt, loc);
310 			flags |= NOSKIP;
311 			c = CT_CCL;
312 			break;
313 
314 		case 'C':
315 			flags |= LONG;
316 			/* FALLTHROUGH */
317 		case 'c':
318 			flags |= NOSKIP;
319 			c = CT_CHAR;
320 			break;
321 
322 		case 'p':	/* pointer format is like hex */
323 			flags |= POINTER | PFXOK;
324 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
325 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
326 			base = 16;
327 			break;
328 
329 		case 'n':
330 			nconversions++;
331 			if (flags & SUPPRESS)	/* ??? */
332 				continue;
333 			if (flags & SHORTSHORT)
334 				*va_arg(ap, char *) = (char)nread;
335 			else if (flags & SHORT)
336 				*va_arg(ap, short *) = (short)nread;
337 			else if (flags & LONG)
338 				*va_arg(ap, long *) = nread;
339 			else if (flags & LONGLONG)
340 				*va_arg(ap, long long *) = nread;
341 			else if (flags & INTMAXT)
342 				*va_arg(ap, intmax_t *) = nread;
343 			else if (flags & SIZET)
344 				*va_arg(ap, size_t *) = nread;
345 			else if (flags & PTRDIFFT)
346 				*va_arg(ap, ptrdiff_t *) = nread;
347 			else
348 				*va_arg(ap, int *) = (int)nread;
349 			continue;
350 
351 		default:
352 			goto match_failure;
353 
354 		/*
355 		 * Disgusting backwards compatibility hack.	XXX
356 		 */
357 		case '\0':	/* compat */
358 			return EOF;
359 		}
360 
361 		/*
362 		 * We have a conversion that requires input.
363 		 */
364 		if (fp->_r <= 0 && __srefill(fp))
365 			goto input_failure;
366 
367 		/*
368 		 * Consume leading white space, except for formats
369 		 * that suppress this.
370 		 */
371 		if ((flags & NOSKIP) == 0) {
372 			while (isspace_l(*fp->_p, loc)) {
373 				nread++;
374 				if (--fp->_r > 0)
375 					fp->_p++;
376 				else if (__srefill(fp))
377 					goto input_failure;
378 			}
379 			/*
380 			 * Note that there is at least one character in
381 			 * the buffer, so conversions that do not set NOSKIP
382 			 * ca no longer result in an input failure.
383 			 */
384 		}
385 
386 		/*
387 		 * Do the conversion.
388 		 */
389 		switch (c) {
390 
391 		case CT_CHAR:
392 			/* scan arbitrary characters (sets NOSKIP) */
393 			if (width == 0)
394 				width = 1;
395 			if (flags & LONG) {
396 				if ((flags & SUPPRESS) == 0)
397 					wcp = va_arg(ap, wchar_t *);
398 				else
399 					wcp = NULL;
400 				n = 0;
401 				while (width != 0) {
402 					if (n == MB_CUR_MAX_L(loc)) {
403 						fp->_flags |= __SERR;
404 						goto input_failure;
405 					}
406 					buf[n++] = *fp->_p;
407 					fp->_p++;
408 					fp->_r--;
409 					mbs = initial;
410 					nconv = mbrtowc_l(wcp, buf, n, &mbs,
411 					    loc);
412 					if (nconv == (size_t)-1) {
413 						fp->_flags |= __SERR;
414 						goto input_failure;
415 					}
416 					if (nconv == 0 && !(flags & SUPPRESS))
417 						*wcp = L'\0';
418 					if (nconv != (size_t)-2) {
419 						nread += n;
420 						width--;
421 						if (!(flags & SUPPRESS))
422 							wcp++;
423 						n = 0;
424 					}
425 					if (fp->_r <= 0 && __srefill(fp)) {
426 						if (n != 0) {
427 							fp->_flags |= __SERR;
428 							goto input_failure;
429 						}
430 						break;
431 					}
432 				}
433 				if (!(flags & SUPPRESS))
434 					nassigned++;
435 			} else if (flags & SUPPRESS) {
436 				size_t sum = 0;
437 				for (;;) {
438 					if ((n = fp->_r) < width) {
439 						sum += n;
440 						width -= n;
441 						fp->_p += n;
442 						if (__srefill(fp)) {
443 							if (sum == 0)
444 							    goto input_failure;
445 							break;
446 						}
447 					} else {
448 						sum += width;
449 						_DIAGASSERT(__type_fit(int,
450 						    fp->_r - width));
451 						fp->_r -= (int)width;
452 						fp->_p += width;
453 						break;
454 					}
455 				}
456 				nread += sum;
457 			} else {
458 				size_t r = fread(va_arg(ap, char *), 1,
459 				    width, fp);
460 
461 				if (r == 0)
462 					goto input_failure;
463 				nread += r;
464 				nassigned++;
465 			}
466 			nconversions++;
467 			break;
468 
469 		case CT_CCL:
470 			/* scan a (nonempty) character class (sets NOSKIP) */
471 			if (width == 0)
472 				width = (size_t)~0;	/* `infinity' */
473 			/* take only those things in the class */
474 			if (flags & LONG) {
475 				wchar_t twc;
476 				int nchars;
477 
478 				if ((flags & SUPPRESS) == 0)
479 					wcp = va_arg(ap, wchar_t *);
480 				else
481 					wcp = &twc;
482 				n = 0;
483 				nchars = 0;
484 				while (width != 0) {
485 					if (n == MB_CUR_MAX_L(loc)) {
486 						fp->_flags |= __SERR;
487 						goto input_failure;
488 					}
489 					buf[n++] = *fp->_p;
490 					fp->_p++;
491 					fp->_r--;
492 					mbs = initial;
493 					nconv = mbrtowc_l(wcp, buf, n, &mbs,
494 					    loc);
495 					if (nconv == (size_t)-1) {
496 						fp->_flags |= __SERR;
497 						goto input_failure;
498 					}
499 					if (nconv == 0)
500 						*wcp = L'\0';
501 					if (nconv != (size_t)-2) {
502 						if (wctob_l(*wcp, loc) != EOF &&
503 						    !ccltab[wctob_l(*wcp, loc)]) {
504 							while (n != 0) {
505 								n--;
506 								(void)ungetc(buf[n],
507 								    fp);
508 							}
509 							break;
510 						}
511 						nread += n;
512 						width--;
513 						if (!(flags & SUPPRESS))
514 							wcp++;
515 						nchars++;
516 						n = 0;
517 					}
518 					if (fp->_r <= 0 && __srefill(fp)) {
519 						if (n != 0) {
520 							fp->_flags |= __SERR;
521 							goto input_failure;
522 						}
523 						break;
524 					}
525 				}
526 				if (n != 0) {
527 					fp->_flags |= __SERR;
528 					goto input_failure;
529 				}
530 				n = nchars;
531 				if (n == 0)
532 					goto match_failure;
533 				if (!(flags & SUPPRESS)) {
534 					*wcp = L'\0';
535 					nassigned++;
536 				}
537 			} else if (flags & SUPPRESS) {
538 				n = 0;
539 				while (ccltab[*fp->_p]) {
540 					n++, fp->_r--, fp->_p++;
541 					if (--width == 0)
542 						break;
543 					if (fp->_r <= 0 && __srefill(fp)) {
544 						if (n == 0)
545 							goto input_failure;
546 						break;
547 					}
548 				}
549 				if (n == 0)
550 					goto match_failure;
551 			} else {
552 				p0 = p = va_arg(ap, char *);
553 				while (ccltab[*fp->_p]) {
554 					fp->_r--;
555 					*p++ = *fp->_p++;
556 					if (--width == 0)
557 						break;
558 					if (fp->_r <= 0 && __srefill(fp)) {
559 						if (p == p0)
560 							goto input_failure;
561 						break;
562 					}
563 				}
564 				n = p - p0;
565 				if (n == 0)
566 					goto match_failure;
567 				*p = 0;
568 				nassigned++;
569 			}
570 			nread += n;
571 			nconversions++;
572 			break;
573 
574 		case CT_STRING:
575 			/* like CCL, but zero-length string OK, & no NOSKIP */
576 			if (width == 0)
577 				width = (size_t)~0;
578 			if (flags & LONG) {
579 				wchar_t twc;
580 
581 				if ((flags & SUPPRESS) == 0)
582 					wcp = va_arg(ap, wchar_t *);
583 				else
584 					wcp = &twc;
585 				n = 0;
586 				while (!isspace_l(*fp->_p, loc) && width != 0) {
587 					if (n == MB_CUR_MAX_L(loc)) {
588 						fp->_flags |= __SERR;
589 						goto input_failure;
590 					}
591 					buf[n++] = *fp->_p;
592 					fp->_p++;
593 					fp->_r--;
594 					mbs = initial;
595 					nconv = mbrtowc_l(wcp, buf, n, &mbs,
596 					    loc);
597 					if (nconv == (size_t)-1) {
598 						fp->_flags |= __SERR;
599 						goto input_failure;
600 					}
601 					if (nconv == 0)
602 						*wcp = L'\0';
603 					if (nconv != (size_t)-2) {
604 						if (iswspace_l(*wcp, loc)) {
605 							while (n != 0) {
606 								n--;
607 								(void)ungetc(buf[n],
608 								    fp);
609 							}
610 							break;
611 						}
612 						nread += n;
613 						width--;
614 						if (!(flags & SUPPRESS))
615 							wcp++;
616 						n = 0;
617 					}
618 					if (fp->_r <= 0 && __srefill(fp)) {
619 						if (n != 0) {
620 							fp->_flags |= __SERR;
621 							goto input_failure;
622 						}
623 						break;
624 					}
625 				}
626 				if (!(flags & SUPPRESS)) {
627 					*wcp = L'\0';
628 					nassigned++;
629 				}
630 			} else if (flags & SUPPRESS) {
631 				n = 0;
632 				while (!isspace_l(*fp->_p, loc)) {
633 					n++, fp->_r--, fp->_p++;
634 					if (--width == 0)
635 						break;
636 					if (fp->_r <= 0 && __srefill(fp))
637 						break;
638 				}
639 				nread += n;
640 			} else {
641 				p0 = p = va_arg(ap, char *);
642 				while (!isspace_l(*fp->_p, loc)) {
643 					fp->_r--;
644 					*p++ = *fp->_p++;
645 					if (--width == 0)
646 						break;
647 					if (fp->_r <= 0 && __srefill(fp))
648 						break;
649 				}
650 				*p = 0;
651 				nread += p - p0;
652 				nassigned++;
653 			}
654 			nconversions++;
655 			continue;
656 
657 		case CT_INT:
658 			/* scan an integer as if by the conversion function */
659 #ifdef hardway
660 			if (width == 0 || width > sizeof(buf) - 1)
661 				width = sizeof(buf) - 1;
662 #else
663 			/* size_t is unsigned, hence this optimisation */
664 			if (--width > sizeof(buf) - 2)
665 				width = sizeof(buf) - 2;
666 			width++;
667 #endif
668 			flags |= SIGNOK | NDIGITS | NZDIGITS;
669 			for (p = buf; width; width--) {
670 				c = *fp->_p;
671 				/*
672 				 * Switch on the character; `goto ok'
673 				 * if we accept it as a part of number.
674 				 */
675 				switch (c) {
676 
677 				/*
678 				 * The digit 0 is always legal, but is
679 				 * special.  For %i conversions, if no
680 				 * digits (zero or nonzero) have been
681 				 * scanned (only signs), we will have
682 				 * base==0.  In that case, we should set
683 				 * it to 8 and enable 0x prefixing.
684 				 * Also, if we have not scanned zero digits
685 				 * before this, do not turn off prefixing
686 				 * (someone else will turn it off if we
687 				 * have scanned any nonzero digits).
688 				 */
689 				case '0':
690 					if (base == 0) {
691 						base = 8;
692 						flags |= PFXOK;
693 					}
694 					if (flags & NZDIGITS)
695 					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
696 					else
697 					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
698 					goto ok;
699 
700 				/* 1 through 7 always legal */
701 				case '1': case '2': case '3':
702 				case '4': case '5': case '6': case '7':
703 					base = basefix[base];
704 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
705 					goto ok;
706 
707 				/* digits 8 and 9 ok iff decimal or hex */
708 				case '8': case '9':
709 					base = basefix[base];
710 					if (base <= 8)
711 						break;	/* not legal here */
712 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
713 					goto ok;
714 
715 				/* letters ok iff hex */
716 				case 'A': case 'B': case 'C':
717 				case 'D': case 'E': case 'F':
718 				case 'a': case 'b': case 'c':
719 				case 'd': case 'e': case 'f':
720 					/* no need to fix base here */
721 					if (base <= 10)
722 						break;	/* not legal here */
723 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
724 					goto ok;
725 
726 				/* sign ok only as first character */
727 				case '+': case '-':
728 					if (flags & SIGNOK) {
729 						flags &= ~SIGNOK;
730 						flags |= HAVESIGN;
731 						goto ok;
732 					}
733 					break;
734 
735 				/*
736 				 * x ok iff flag still set & 2nd char (or
737 				 * 3rd char if we have a sign).
738 				 */
739 				case 'x': case 'X':
740 					if (flags & PFXOK && p ==
741 					    buf + 1 + !!(flags & HAVESIGN)) {
742 						base = 16;	/* if %i */
743 						flags &= ~PFXOK;
744 						goto ok;
745 					}
746 					break;
747 				}
748 
749 				/*
750 				 * If we got here, c is not a legal character
751 				 * for a number.  Stop accumulating digits.
752 				 */
753 				break;
754 		ok:
755 				/*
756 				 * c is legal: store it and look at the next.
757 				 */
758 				*p++ = c;
759 				if (--fp->_r > 0)
760 					fp->_p++;
761 				else if (__srefill(fp))
762 					break;		/* EOF */
763 			}
764 			/*
765 			 * If we had only a sign, it is no good; push
766 			 * back the sign.  If the number ends in `x',
767 			 * it was [sign] '0' 'x', so push back the x
768 			 * and treat it as [sign] '0'.
769 			 */
770 			if (flags & NDIGITS) {
771 				if (p > buf)
772 					(void)ungetc(*(u_char *)--p, fp);
773 				goto match_failure;
774 			}
775 			c = ((u_char *)p)[-1];
776 			if (c == 'x' || c == 'X') {
777 				--p;
778 				(void)ungetc(c, fp);
779 			}
780 			if ((flags & SUPPRESS) == 0) {
781 				uintmax_t res;
782 
783 				*p = 0;
784 				if ((flags & UNSIGNED) == 0)
785 				    res = strtoimax_l(buf, (char **)NULL, base,
786 				        loc);
787 				else
788 				    res = strtoumax_l(buf, (char **)NULL, base,
789 				        loc);
790 				if (flags & POINTER)
791 					*va_arg(ap, void **) =
792 							(void *)(uintptr_t)res;
793 				else if (flags & SHORTSHORT)
794 					*va_arg(ap, char *) = (char)res;
795 				else if (flags & SHORT)
796 					*va_arg(ap, short *) = (short)res;
797 				else if (flags & LONG)
798 					*va_arg(ap, long *) = (long)res;
799 				else if (flags & LONGLONG)
800 					*va_arg(ap, long long *) = res;
801 				else if (flags & INTMAXT)
802 					*va_arg(ap, intmax_t *) = res;
803 				else if (flags & PTRDIFFT)
804 					*va_arg(ap, ptrdiff_t *) =
805 					    (ptrdiff_t)res;
806 				else if (flags & SIZET)
807 					*va_arg(ap, size_t *) = (size_t)res;
808 				else
809 					*va_arg(ap, int *) = (int)res;
810 				nassigned++;
811 			}
812 			nread += p - buf;
813 			nconversions++;
814 			break;
815 
816 #ifndef NO_FLOATING_POINT
817 		case CT_FLOAT:
818 			/* scan a floating point number as if by strtod */
819 			if (width == 0 || width > sizeof(buf) - 1)
820 				width = sizeof(buf) - 1;
821 			if ((width = parsefloat(fp, buf, buf + width, loc)) == 0)
822 				goto match_failure;
823 			if ((flags & SUPPRESS) == 0) {
824 				if (flags & LONGDBL) {
825 					long double res = strtold_l(buf, &p,
826 					    loc);
827 					*va_arg(ap, long double *) = res;
828 				} else if (flags & LONG) {
829 					double res = strtod_l(buf, &p, loc);
830 					*va_arg(ap, double *) = res;
831 				} else {
832 					float res = strtof_l(buf, &p, loc);
833 					*va_arg(ap, float *) = res;
834 				}
835 				if (__scanfdebug && (size_t)(p - buf) != width)
836 					abort();
837 				nassigned++;
838 			}
839 			nread += width;
840 			nconversions++;
841 			break;
842 #endif /* !NO_FLOATING_POINT */
843 		}
844 	}
845 input_failure:
846 	return nconversions != 0 ? nassigned : EOF;
847 match_failure:
848 	return nassigned;
849 }
850 
851 /*
852  * Fill in the given table from the scanset at the given format
853  * (just after `[').  Return a pointer to the character past the
854  * closing `]'.  The table has a 1 wherever characters should be
855  * considered part of the scanset.
856  */
857 static const u_char *
858 __sccl(char *tab, const u_char *fmt, locale_t loc)
859 {
860 	int c, n, v, i;
861 
862 	_DIAGASSERT(tab != NULL);
863 	_DIAGASSERT(fmt != NULL);
864 	/* first `clear' the whole table */
865 	c = *fmt++;		/* first char hat => negated scanset */
866 	if (c == '^') {
867 		v = 1;		/* default => accept */
868 		c = *fmt++;	/* get new first char */
869 	} else
870 		v = 0;		/* default => reject */
871 
872 	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
873 	(void)memset(tab, v, 256);
874 
875 	if (c == 0)
876 		return fmt - 1;/* format ended before closing ] */
877 
878 	/*
879 	 * Now set the entries corresponding to the actual scanset
880 	 * to the opposite of the above.
881 	 *
882 	 * The first character may be ']' (or '-') without being special;
883 	 * the last character may be '-'.
884 	 */
885 	v = 1 - v;
886 	for (;;) {
887 		tab[c] = v;		/* take character c */
888 doswitch:
889 		n = *fmt++;		/* and examine the next */
890 		switch (n) {
891 
892 		case 0:			/* format ended too soon */
893 			return fmt - 1;
894 
895 		case '-':
896 			/*
897 			 * A scanset of the form
898 			 *	[01+-]
899 			 * is defined as `the digit 0, the digit 1,
900 			 * the character +, the character -', but
901 			 * the effect of a scanset such as
902 			 *	[a-zA-Z0-9]
903 			 * is implementation defined.  The V7 Unix
904 			 * scanf treats `a-z' as `the letters a through
905 			 * z', but treats `a-a' as `the letter a, the
906 			 * character -, and the letter a'.
907 			 *
908 			 * For compatibility, the `-' is not considerd
909 			 * to define a range if the character following
910 			 * it is either a close bracket (required by ANSI)
911 			 * or is not numerically greater than the character
912 			 * we just stored in the table (c).
913 			 */
914 			n = *fmt;
915 			if (n == ']' || (__collate_load_error ? n < c :
916 			    __collate_range_cmp(n, c, loc) < 0)) {
917 				c = '-';
918 				break;	/* resume the for(;;) */
919 			}
920 			fmt++;
921 			/* fill in the range */
922 			if (__collate_load_error) {
923 				do
924 					tab[++c] = v;
925 				while (c < n);
926 			} else {
927 				for (i = 0; i < 256; i ++)
928 					if (__collate_range_cmp(c, i, loc) < 0 &&
929 					    __collate_range_cmp(i, n, loc) <= 0)
930 						tab[i] = v;
931 			}
932 #if 1	/* XXX another disgusting compatibility hack */
933 			c = n;
934 			/*
935 			 * Alas, the V7 Unix scanf also treats formats
936 			 * such as [a-c-e] as `the letters a through e'.
937 			 * This too is permitted by the standard....
938 			 */
939 			goto doswitch;
940 #else
941 			c = *fmt++;
942 			if (c == 0)
943 				return fmt - 1;
944 			if (c == ']')
945 				return fmt;
946 #endif
947 
948 		case ']':		/* end of scanset */
949 			return fmt;
950 
951 		default:		/* just another character */
952 			c = n;
953 			break;
954 		}
955 	}
956 	/* NOTREACHED */
957 }
958 
959 #ifndef NO_FLOATING_POINT
960 static size_t
961 parsefloat(FILE *fp, char *buf, char *end, locale_t loc)
962 {
963 	char *commit, *p;
964 	int infnanpos = 0;
965 	enum {
966 		S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
967 		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
968 	} state = S_START;
969 	unsigned char c;
970 	char decpt = *localeconv_l(loc)->decimal_point;
971 	_Bool gotmantdig = 0, ishex = 0;
972 
973 	/*
974 	 * We set commit = p whenever the string we have read so far
975 	 * constitutes a valid representation of a floating point
976 	 * number by itself.  At some point, the parse will complete
977 	 * or fail, and we will ungetc() back to the last commit point.
978 	 * To ensure that the file offset gets updated properly, it is
979 	 * always necessary to read at least one character that doesn't
980 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
981 	 */
982 	commit = buf - 1;
983 	for (p = buf; p < end; ) {
984 		c = *fp->_p;
985 reswitch:
986 		switch (state) {
987 		case S_START:
988 			state = S_GOTSIGN;
989 			if (c == '-' || c == '+')
990 				break;
991 			else
992 				goto reswitch;
993 		case S_GOTSIGN:
994 			switch (c) {
995 			case '0':
996 				state = S_MAYBEHEX;
997 				commit = p;
998 				break;
999 			case 'I':
1000 			case 'i':
1001 				state = S_INF;
1002 				break;
1003 			case 'N':
1004 			case 'n':
1005 				state = S_NAN;
1006 				break;
1007 			default:
1008 				state = S_DIGITS;
1009 				goto reswitch;
1010 			}
1011 			break;
1012 		case S_INF:
1013 			if (infnanpos > 6 ||
1014 			    (c != "nfinity"[infnanpos] &&
1015 			     c != "NFINITY"[infnanpos]))
1016 				goto parsedone;
1017 			if (infnanpos == 1 || infnanpos == 6)
1018 				commit = p;	/* inf or infinity */
1019 			infnanpos++;
1020 			break;
1021 		case S_NAN:
1022 			switch (infnanpos) {
1023 			case -1:	/* XXX kludge to deal with nan(...) */
1024 				goto parsedone;
1025 			case 0:
1026 				if (c != 'A' && c != 'a')
1027 					goto parsedone;
1028 				break;
1029 			case 1:
1030 				if (c != 'N' && c != 'n')
1031 					goto parsedone;
1032 				else
1033 					commit = p;
1034 				break;
1035 			case 2:
1036 				if (c != '(')
1037 					goto parsedone;
1038 				break;
1039 			default:
1040 				if (c == ')') {
1041 					commit = p;
1042 					infnanpos = -2;
1043 				} else if (!isalnum_l(c, loc) && c != '_')
1044 					goto parsedone;
1045 				break;
1046 			}
1047 			infnanpos++;
1048 			break;
1049 		case S_MAYBEHEX:
1050 			state = S_DIGITS;
1051 			if (c == 'X' || c == 'x') {
1052 				ishex = 1;
1053 				break;
1054 			} else {	/* we saw a '0', but no 'x' */
1055 				gotmantdig = 1;
1056 				goto reswitch;
1057 			}
1058 		case S_DIGITS:
1059 			if ((ishex && isxdigit_l(c, loc)) || isdigit_l(c, loc))
1060 				gotmantdig = 1;
1061 			else {
1062 				state = S_FRAC;
1063 				if (c != decpt)
1064 					goto reswitch;
1065 			}
1066 			if (gotmantdig)
1067 				commit = p;
1068 			break;
1069 		case S_FRAC:
1070 			if (((c == 'E' || c == 'e') && !ishex) ||
1071 			    ((c == 'P' || c == 'p') && ishex)) {
1072 				if (!gotmantdig)
1073 					goto parsedone;
1074 				else
1075 					state = S_EXP;
1076 			} else if ((ishex && isxdigit_l(c, loc)) || isdigit_l(c, loc)) {
1077 				commit = p;
1078 				gotmantdig = 1;
1079 			} else
1080 				goto parsedone;
1081 			break;
1082 		case S_EXP:
1083 			state = S_EXPDIGITS;
1084 			if (c == '-' || c == '+')
1085 				break;
1086 			else
1087 				goto reswitch;
1088 		case S_EXPDIGITS:
1089 			if (isdigit_l(c, loc))
1090 				commit = p;
1091 			else
1092 				goto parsedone;
1093 			break;
1094 		default:
1095 			abort();
1096 		}
1097 		*p++ = c;
1098 		if (--fp->_r > 0)
1099 			fp->_p++;
1100 		else if (__srefill(fp))
1101 			break;	/* EOF */
1102 	}
1103 
1104 parsedone:
1105 	while (commit < --p)
1106 		(void)ungetc(*(u_char *)p, fp);
1107 	*++commit = '\0';
1108 	return commit - buf;
1109 }
1110 #endif
1111