1 // Copyright 2006 Google Inc.
2 // All Rights Reserved.
3 // Author: renn
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifdef HAVE_CONFIG_H
16 # include "config_auto.h"
17 #endif
18
19 #include <cctype>
20 #include <climits> // for CHAR_BIT
21 #include <cmath>
22 #include <cstdarg>
23 #include <cstddef>
24 #include <cstdint>
25 #include <cstdio>
26 #include <cstring>
27 #include <limits> // for std::numeric_limits
28
29 #include "scanutils.h"
30
31 enum Flags {
32 FL_SPLAT = 0x01, // Drop the value, do not assign
33 FL_INV = 0x02, // Character-set with inverse
34 FL_WIDTH = 0x04, // Field width specified
35 FL_MINUS = 0x08, // Negative number
36 };
37
38 enum Ranks {
39 RANK_CHAR = -2,
40 RANK_SHORT = -1,
41 RANK_INT = 0,
42 RANK_LONG = 1,
43 RANK_LONGLONG = 2,
44 RANK_PTR = std::numeric_limits<int>::max() // Special value used for pointers
45 };
46
47 const enum Ranks kMinRank = RANK_CHAR;
48 const enum Ranks kMaxRank = RANK_LONGLONG;
49
50 const enum Ranks kIntMaxRank = RANK_LONGLONG;
51 const enum Ranks kSizeTRank = RANK_LONG;
52 const enum Ranks kPtrDiffRank = RANK_LONG;
53
54 enum Bail {
55 BAIL_NONE = 0, // No error condition
56 BAIL_EOF, // Hit EOF
57 BAIL_ERR // Conversion mismatch
58 };
59
60 // Helper functions ------------------------------------------------------------
LongBit()61 inline size_t LongBit() {
62 return CHAR_BIT * sizeof(long);
63 }
64
SkipSpace(FILE * s)65 static inline int SkipSpace(FILE *s) {
66 int p;
67 while (isascii(p = fgetc(s)) && isspace(p)) {
68 ;
69 }
70 ungetc(p, s); // Make sure next char is available for reading
71 return p;
72 }
73
SetBit(unsigned long * bitmap,unsigned int bit)74 static inline void SetBit(unsigned long *bitmap, unsigned int bit) {
75 bitmap[bit / LongBit()] |= 1UL << (bit % LongBit());
76 }
77
TestBit(unsigned long * bitmap,unsigned int bit)78 static inline int TestBit(unsigned long *bitmap, unsigned int bit) {
79 return static_cast<int>(bitmap[bit / LongBit()] >> (bit % LongBit())) & 1;
80 }
81
DigitValue(int ch,int base)82 static inline int DigitValue(int ch, int base) {
83 if (ch >= '0' && ch <= '9') {
84 if (base >= 10 || ch <= '7') {
85 return ch - '0';
86 }
87 } else if (ch >= 'A' && ch <= 'Z' && base == 16) {
88 return ch - 'A' + 10;
89 } else if (ch >= 'a' && ch <= 'z' && base == 16) {
90 return ch - 'a' + 10;
91 }
92 return -1;
93 }
94
95 // IO (re-)implementations -----------------------------------------------------
streamtoumax(FILE * s,int base)96 static uintmax_t streamtoumax(FILE *s, int base) {
97 int minus = 0;
98 uintmax_t v = 0;
99 int d, c = 0;
100
101 for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s)) {
102 ;
103 }
104
105 // Single optional + or -
106 if (c == '-' || c == '+') {
107 minus = (c == '-');
108 c = fgetc(s);
109 }
110
111 // Assign correct base
112 if (base == 0) {
113 if (c == '0') {
114 c = fgetc(s);
115 if (c == 'x' || c == 'X') {
116 base = 16;
117 c = fgetc(s);
118 } else {
119 base = 8;
120 }
121 }
122 } else if (base == 16) {
123 if (c == '0') {
124 c = fgetc(s);
125 if (c == 'x' || c == 'X') {
126 c = fgetc(s);
127 }
128 }
129 }
130
131 // Actual number parsing
132 for (; (c != EOF) && (d = DigitValue(c, base)) >= 0; c = fgetc(s)) {
133 v = v * base + d;
134 }
135
136 ungetc(c, s);
137 return minus ? -v : v;
138 }
139
streamtofloat(FILE * s)140 static double streamtofloat(FILE *s) {
141 bool minus = false;
142 uint64_t v = 0;
143 int d, c;
144 uint64_t k = 1;
145 uint64_t w = 0;
146
147 for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s)) {
148 ;
149 }
150
151 // Single optional + or -
152 if (c == '-' || c == '+') {
153 minus = (c == '-');
154 c = fgetc(s);
155 }
156
157 // Actual number parsing
158 for (; c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) {
159 v = v * 10 + d;
160 }
161 if (c == '.') {
162 for (c = fgetc(s); c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) {
163 w = w * 10 + d;
164 k *= 10;
165 }
166 }
167 double f = v + static_cast<double>(w) / k;
168 if (c == 'e' || c == 'E') {
169 c = fgetc(s);
170 int expsign = 1;
171 if (c == '-' || c == '+') {
172 expsign = (c == '-') ? -1 : 1;
173 c = fgetc(s);
174 }
175 int exponent = 0;
176 for (; (c != EOF) && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) {
177 exponent = exponent * 10 + d;
178 }
179 exponent *= expsign;
180 f *= pow(10.0, static_cast<double>(exponent));
181 }
182 ungetc(c, s);
183
184 return minus ? -f : f;
185 }
186
187 static int tvfscanf(FILE *stream, const char *format, va_list ap);
188
tfscanf(FILE * stream,const char * format,...)189 int tfscanf(FILE *stream, const char *format, ...) {
190 va_list ap;
191 int rv;
192
193 va_start(ap, format);
194 rv = tvfscanf(stream, format, ap);
195 va_end(ap);
196
197 return rv;
198 }
199
tvfscanf(FILE * stream,const char * format,va_list ap)200 static int tvfscanf(FILE *stream, const char *format, va_list ap) {
201 const char *p = format;
202 char ch;
203 int q = 0;
204 uintmax_t val = 0;
205 int rank = RANK_INT; // Default rank
206 unsigned int width = UINT_MAX;
207 int base;
208 int flags = 0;
209 enum {
210 ST_NORMAL, // Ground state
211 ST_FLAGS, // Special flags
212 ST_WIDTH, // Field width
213 ST_MODIFIERS, // Length or conversion modifiers
214 ST_MATCH_INIT, // Initial state of %[ sequence
215 ST_MATCH, // Main state of %[ sequence
216 ST_MATCH_RANGE, // After - in a %[ sequence
217 } state = ST_NORMAL;
218 char *sarg = nullptr; // %s %c or %[ string argument
219 enum Bail bail = BAIL_NONE;
220 int converted = 0; // Successful conversions
221 unsigned long
222 matchmap[((1 << CHAR_BIT) + (CHAR_BIT * sizeof(long) - 1)) / (CHAR_BIT * sizeof(long))];
223 int matchinv = 0; // Is match map inverted?
224 unsigned char range_start = 0;
225 auto start_off = std::ftell(stream);
226
227 // Skip leading spaces
228 SkipSpace(stream);
229
230 while ((ch = *p++) && !bail) {
231 switch (state) {
232 case ST_NORMAL:
233 if (ch == '%') {
234 state = ST_FLAGS;
235 flags = 0;
236 rank = RANK_INT;
237 width = UINT_MAX;
238 } else if (isascii(ch) && isspace(ch)) {
239 SkipSpace(stream);
240 } else {
241 if (fgetc(stream) != ch) {
242 bail = BAIL_ERR; // Match failure
243 }
244 }
245 break;
246
247 case ST_FLAGS:
248 if (ch == '*') {
249 flags |= FL_SPLAT;
250 } else if ('0' <= ch && ch <= '9') {
251 width = (ch - '0');
252 state = ST_WIDTH;
253 flags |= FL_WIDTH;
254 } else {
255 state = ST_MODIFIERS;
256 p--; // Process this character again
257 }
258 break;
259
260 case ST_WIDTH:
261 if (ch >= '0' && ch <= '9') {
262 width = width * 10 + (ch - '0');
263 } else {
264 state = ST_MODIFIERS;
265 p--; // Process this character again
266 }
267 break;
268
269 case ST_MODIFIERS:
270 switch (ch) {
271 // Length modifiers - nonterminal sequences
272 case 'h':
273 rank--; // Shorter rank
274 break;
275 case 'l':
276 rank++; // Longer rank
277 break;
278 case 'j':
279 rank = kIntMaxRank;
280 break;
281 case 'z':
282 rank = kSizeTRank;
283 break;
284 case 't':
285 rank = kPtrDiffRank;
286 break;
287 case 'L':
288 case 'q':
289 rank = RANK_LONGLONG; // long double/long long
290 break;
291
292 default:
293 // Output modifiers - terminal sequences
294 state = ST_NORMAL; // Next state will be normal
295 if (rank < kMinRank) { // Canonicalize rank
296 rank = kMinRank;
297 } else if (rank > kMaxRank) {
298 rank = kMaxRank;
299 }
300
301 switch (ch) {
302 case 'P': // Upper case pointer
303 case 'p': // Pointer
304 rank = RANK_PTR;
305 base = 0;
306 goto scan_int;
307
308 case 'i': // Base-independent integer
309 base = 0;
310 goto scan_int;
311
312 case 'd': // Decimal integer
313 base = 10;
314 goto scan_int;
315
316 case 'o': // Octal integer
317 base = 8;
318 goto scan_int;
319
320 case 'u': // Unsigned decimal integer
321 base = 10;
322 goto scan_int;
323
324 case 'x': // Hexadecimal integer
325 case 'X':
326 base = 16;
327 goto scan_int;
328
329 case 'n': // Number of characters consumed
330 val = std::ftell(stream) - start_off;
331 goto set_integer;
332
333 scan_int:
334 q = SkipSpace(stream);
335 if (q <= 0) {
336 bail = BAIL_EOF;
337 break;
338 }
339 val = streamtoumax(stream, base);
340 // fall through
341
342 set_integer:
343 if (!(flags & FL_SPLAT)) {
344 converted++;
345 switch (rank) {
346 case RANK_CHAR:
347 *va_arg(ap, unsigned char *) = static_cast<unsigned char>(val);
348 break;
349 case RANK_SHORT:
350 *va_arg(ap, unsigned short *) = static_cast<unsigned short>(val);
351 break;
352 case RANK_INT:
353 *va_arg(ap, unsigned int *) = static_cast<unsigned int>(val);
354 break;
355 case RANK_LONG:
356 *va_arg(ap, unsigned long *) = static_cast<unsigned long>(val);
357 break;
358 case RANK_LONGLONG:
359 *va_arg(ap, unsigned long long *) = static_cast<unsigned long long>(val);
360 break;
361 case RANK_PTR:
362 *va_arg(ap, void **) = reinterpret_cast<void *>(static_cast<uintptr_t>(val));
363 break;
364 }
365 }
366 break;
367
368 case 'f': // Preliminary float value parsing
369 case 'g':
370 case 'G':
371 case 'e':
372 case 'E':
373 q = SkipSpace(stream);
374 if (q <= 0) {
375 bail = BAIL_EOF;
376 break;
377 }
378
379 {
380 double fval = streamtofloat(stream);
381 if (!(flags & FL_SPLAT)) {
382 if (rank == RANK_INT) {
383 *va_arg(ap, float *) = static_cast<float>(fval);
384 } else if (rank == RANK_LONG) {
385 *va_arg(ap, double *) = static_cast<double>(fval);
386 }
387 converted++;
388 }
389 }
390 break;
391
392 case 'c': // Character
393 width = (flags & FL_WIDTH) ? width : 1; // Default width == 1
394 sarg = va_arg(ap, char *);
395 while (width--) {
396 if ((q = fgetc(stream)) <= 0) {
397 bail = BAIL_EOF;
398 break;
399 }
400 if (!(flags & FL_SPLAT)) {
401 *sarg++ = q;
402 converted++;
403 }
404 }
405 break;
406
407 case 's': // String
408 {
409 if (!(flags & FL_SPLAT)) {
410 sarg = va_arg(ap, char *);
411 }
412 unsigned length = 0;
413 while (width--) {
414 q = fgetc(stream);
415 if ((isascii(q) && isspace(q)) || (q <= 0)) {
416 ungetc(q, stream);
417 break;
418 }
419 if (!(flags & FL_SPLAT)) {
420 sarg[length] = q;
421 }
422 length++;
423 }
424 if (length == 0) {
425 bail = BAIL_EOF;
426 } else if (!(flags & FL_SPLAT)) {
427 sarg[length] = '\0'; // Terminate output
428 converted++;
429 }
430 } break;
431
432 case '[': // Character range
433 sarg = va_arg(ap, char *);
434 state = ST_MATCH_INIT;
435 matchinv = 0;
436 memset(matchmap, 0, sizeof matchmap);
437 break;
438
439 case '%': // %% sequence
440 if (fgetc(stream) != '%') {
441 bail = BAIL_ERR;
442 }
443 break;
444
445 default: // Anything else
446 bail = BAIL_ERR; // Unknown sequence
447 break;
448 }
449 }
450 break;
451
452 case ST_MATCH_INIT: // Initial state for %[ match
453 if (ch == '^' && !(flags & FL_INV)) {
454 matchinv = 1;
455 } else {
456 SetBit(matchmap, static_cast<unsigned char>(ch));
457 state = ST_MATCH;
458 }
459 break;
460
461 case ST_MATCH: // Main state for %[ match
462 if (ch == ']') {
463 goto match_run;
464 } else if (ch == '-') {
465 range_start = static_cast<unsigned char>(ch);
466 state = ST_MATCH_RANGE;
467 } else {
468 SetBit(matchmap, static_cast<unsigned char>(ch));
469 }
470 break;
471
472 case ST_MATCH_RANGE: // %[ match after -
473 if (ch == ']') {
474 SetBit(matchmap, static_cast<unsigned char>('-'));
475 goto match_run;
476 } else {
477 int i;
478 for (i = range_start; i < (static_cast<unsigned char>(ch)); i++) {
479 SetBit(matchmap, i);
480 }
481 state = ST_MATCH;
482 }
483 break;
484
485 match_run: // Match expression finished
486 char *oarg = sarg;
487 while (width) {
488 q = fgetc(stream);
489 auto qc = static_cast<unsigned char>(q);
490 if (q <= 0 || !(TestBit(matchmap, qc) ^ matchinv)) {
491 ungetc(q, stream);
492 break;
493 }
494 if (!(flags & FL_SPLAT)) {
495 *sarg = q;
496 }
497 sarg++;
498 }
499 if (oarg == sarg) {
500 bail = (q <= 0) ? BAIL_EOF : BAIL_ERR;
501 } else if (!(flags & FL_SPLAT)) {
502 *sarg = '\0';
503 converted++;
504 }
505 break;
506 }
507 }
508
509 if (bail == BAIL_EOF && !converted) {
510 converted = -1; // Return EOF (-1)
511 }
512
513 return converted;
514 }
515