1 // Copyright 2006 Google Inc.
2 // All Rights Reserved.
3 // Author: renn
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifdef HAVE_CONFIG_H
16 #  include "config_auto.h"
17 #endif
18 
19 #include <cctype>
20 #include <climits> // for CHAR_BIT
21 #include <cmath>
22 #include <cstdarg>
23 #include <cstddef>
24 #include <cstdint>
25 #include <cstdio>
26 #include <cstring>
27 #include <limits> // for std::numeric_limits
28 
29 #include "scanutils.h"
30 
31 enum Flags {
32   FL_SPLAT = 0x01, // Drop the value, do not assign
33   FL_INV = 0x02,   // Character-set with inverse
34   FL_WIDTH = 0x04, // Field width specified
35   FL_MINUS = 0x08, // Negative number
36 };
37 
38 enum Ranks {
39   RANK_CHAR = -2,
40   RANK_SHORT = -1,
41   RANK_INT = 0,
42   RANK_LONG = 1,
43   RANK_LONGLONG = 2,
44   RANK_PTR = std::numeric_limits<int>::max() // Special value used for pointers
45 };
46 
47 const enum Ranks kMinRank = RANK_CHAR;
48 const enum Ranks kMaxRank = RANK_LONGLONG;
49 
50 const enum Ranks kIntMaxRank = RANK_LONGLONG;
51 const enum Ranks kSizeTRank = RANK_LONG;
52 const enum Ranks kPtrDiffRank = RANK_LONG;
53 
54 enum Bail {
55   BAIL_NONE = 0, // No error condition
56   BAIL_EOF,      // Hit EOF
57   BAIL_ERR       // Conversion mismatch
58 };
59 
60 // Helper functions ------------------------------------------------------------
LongBit()61 inline size_t LongBit() {
62   return CHAR_BIT * sizeof(long);
63 }
64 
SkipSpace(FILE * s)65 static inline int SkipSpace(FILE *s) {
66   int p;
67   while (isascii(p = fgetc(s)) && isspace(p)) {
68     ;
69   }
70   ungetc(p, s); // Make sure next char is available for reading
71   return p;
72 }
73 
SetBit(unsigned long * bitmap,unsigned int bit)74 static inline void SetBit(unsigned long *bitmap, unsigned int bit) {
75   bitmap[bit / LongBit()] |= 1UL << (bit % LongBit());
76 }
77 
TestBit(unsigned long * bitmap,unsigned int bit)78 static inline int TestBit(unsigned long *bitmap, unsigned int bit) {
79   return static_cast<int>(bitmap[bit / LongBit()] >> (bit % LongBit())) & 1;
80 }
81 
DigitValue(int ch,int base)82 static inline int DigitValue(int ch, int base) {
83   if (ch >= '0' && ch <= '9') {
84     if (base >= 10 || ch <= '7') {
85       return ch - '0';
86     }
87   } else if (ch >= 'A' && ch <= 'Z' && base == 16) {
88     return ch - 'A' + 10;
89   } else if (ch >= 'a' && ch <= 'z' && base == 16) {
90     return ch - 'a' + 10;
91   }
92   return -1;
93 }
94 
95 // IO (re-)implementations -----------------------------------------------------
streamtoumax(FILE * s,int base)96 static uintmax_t streamtoumax(FILE *s, int base) {
97   int minus = 0;
98   uintmax_t v = 0;
99   int d, c = 0;
100 
101   for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s)) {
102     ;
103   }
104 
105   // Single optional + or -
106   if (c == '-' || c == '+') {
107     minus = (c == '-');
108     c = fgetc(s);
109   }
110 
111   // Assign correct base
112   if (base == 0) {
113     if (c == '0') {
114       c = fgetc(s);
115       if (c == 'x' || c == 'X') {
116         base = 16;
117         c = fgetc(s);
118       } else {
119         base = 8;
120       }
121     }
122   } else if (base == 16) {
123     if (c == '0') {
124       c = fgetc(s);
125       if (c == 'x' || c == 'X') {
126         c = fgetc(s);
127       }
128     }
129   }
130 
131   // Actual number parsing
132   for (; (c != EOF) && (d = DigitValue(c, base)) >= 0; c = fgetc(s)) {
133     v = v * base + d;
134   }
135 
136   ungetc(c, s);
137   return minus ? -v : v;
138 }
139 
streamtofloat(FILE * s)140 static double streamtofloat(FILE *s) {
141   bool minus = false;
142   uint64_t v = 0;
143   int d, c;
144   uint64_t k = 1;
145   uint64_t w = 0;
146 
147   for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s)) {
148     ;
149   }
150 
151   // Single optional + or -
152   if (c == '-' || c == '+') {
153     minus = (c == '-');
154     c = fgetc(s);
155   }
156 
157   // Actual number parsing
158   for (; c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) {
159     v = v * 10 + d;
160   }
161   if (c == '.') {
162     for (c = fgetc(s); c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) {
163       w = w * 10 + d;
164       k *= 10;
165     }
166   }
167   double f = v + static_cast<double>(w) / k;
168   if (c == 'e' || c == 'E') {
169     c = fgetc(s);
170     int expsign = 1;
171     if (c == '-' || c == '+') {
172       expsign = (c == '-') ? -1 : 1;
173       c = fgetc(s);
174     }
175     int exponent = 0;
176     for (; (c != EOF) && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) {
177       exponent = exponent * 10 + d;
178     }
179     exponent *= expsign;
180     f *= pow(10.0, static_cast<double>(exponent));
181   }
182   ungetc(c, s);
183 
184   return minus ? -f : f;
185 }
186 
187 static int tvfscanf(FILE *stream, const char *format, va_list ap);
188 
tfscanf(FILE * stream,const char * format,...)189 int tfscanf(FILE *stream, const char *format, ...) {
190   va_list ap;
191   int rv;
192 
193   va_start(ap, format);
194   rv = tvfscanf(stream, format, ap);
195   va_end(ap);
196 
197   return rv;
198 }
199 
tvfscanf(FILE * stream,const char * format,va_list ap)200 static int tvfscanf(FILE *stream, const char *format, va_list ap) {
201   const char *p = format;
202   char ch;
203   int q = 0;
204   uintmax_t val = 0;
205   int rank = RANK_INT; // Default rank
206   unsigned int width = UINT_MAX;
207   int base;
208   int flags = 0;
209   enum {
210     ST_NORMAL,      // Ground state
211     ST_FLAGS,       // Special flags
212     ST_WIDTH,       // Field width
213     ST_MODIFIERS,   // Length or conversion modifiers
214     ST_MATCH_INIT,  // Initial state of %[ sequence
215     ST_MATCH,       // Main state of %[ sequence
216     ST_MATCH_RANGE, // After - in a %[ sequence
217   } state = ST_NORMAL;
218   char *sarg = nullptr; // %s %c or %[ string argument
219   enum Bail bail = BAIL_NONE;
220   int converted = 0; // Successful conversions
221   unsigned long
222       matchmap[((1 << CHAR_BIT) + (CHAR_BIT * sizeof(long) - 1)) / (CHAR_BIT * sizeof(long))];
223   int matchinv = 0; // Is match map inverted?
224   unsigned char range_start = 0;
225   auto start_off = std::ftell(stream);
226 
227   // Skip leading spaces
228   SkipSpace(stream);
229 
230   while ((ch = *p++) && !bail) {
231     switch (state) {
232       case ST_NORMAL:
233         if (ch == '%') {
234           state = ST_FLAGS;
235           flags = 0;
236           rank = RANK_INT;
237           width = UINT_MAX;
238         } else if (isascii(ch) && isspace(ch)) {
239           SkipSpace(stream);
240         } else {
241           if (fgetc(stream) != ch) {
242             bail = BAIL_ERR; // Match failure
243           }
244         }
245         break;
246 
247       case ST_FLAGS:
248         if (ch == '*') {
249           flags |= FL_SPLAT;
250         } else if ('0' <= ch && ch <= '9') {
251           width = (ch - '0');
252           state = ST_WIDTH;
253           flags |= FL_WIDTH;
254         } else {
255           state = ST_MODIFIERS;
256           p--; // Process this character again
257         }
258         break;
259 
260       case ST_WIDTH:
261         if (ch >= '0' && ch <= '9') {
262           width = width * 10 + (ch - '0');
263         } else {
264           state = ST_MODIFIERS;
265           p--; // Process this character again
266         }
267         break;
268 
269       case ST_MODIFIERS:
270         switch (ch) {
271           // Length modifiers - nonterminal sequences
272           case 'h':
273             rank--; // Shorter rank
274             break;
275           case 'l':
276             rank++; // Longer rank
277             break;
278           case 'j':
279             rank = kIntMaxRank;
280             break;
281           case 'z':
282             rank = kSizeTRank;
283             break;
284           case 't':
285             rank = kPtrDiffRank;
286             break;
287           case 'L':
288           case 'q':
289             rank = RANK_LONGLONG; // long double/long long
290             break;
291 
292           default:
293             // Output modifiers - terminal sequences
294             state = ST_NORMAL;   // Next state will be normal
295             if (rank < kMinRank) { // Canonicalize rank
296               rank = kMinRank;
297             } else if (rank > kMaxRank) {
298               rank = kMaxRank;
299             }
300 
301             switch (ch) {
302               case 'P': // Upper case pointer
303               case 'p': // Pointer
304                 rank = RANK_PTR;
305                 base = 0;
306                 goto scan_int;
307 
308               case 'i': // Base-independent integer
309                 base = 0;
310                 goto scan_int;
311 
312               case 'd': // Decimal integer
313                 base = 10;
314                 goto scan_int;
315 
316               case 'o': // Octal integer
317                 base = 8;
318                 goto scan_int;
319 
320               case 'u': // Unsigned decimal integer
321                 base = 10;
322                 goto scan_int;
323 
324               case 'x': // Hexadecimal integer
325               case 'X':
326                 base = 16;
327                 goto scan_int;
328 
329               case 'n': // Number of characters consumed
330                 val = std::ftell(stream) - start_off;
331                 goto set_integer;
332 
333               scan_int:
334                 q = SkipSpace(stream);
335                 if (q <= 0) {
336                   bail = BAIL_EOF;
337                   break;
338                 }
339                 val = streamtoumax(stream, base);
340                 // fall through
341 
342               set_integer:
343                 if (!(flags & FL_SPLAT)) {
344                   converted++;
345                   switch (rank) {
346                     case RANK_CHAR:
347                       *va_arg(ap, unsigned char *) = static_cast<unsigned char>(val);
348                       break;
349                     case RANK_SHORT:
350                       *va_arg(ap, unsigned short *) = static_cast<unsigned short>(val);
351                       break;
352                     case RANK_INT:
353                       *va_arg(ap, unsigned int *) = static_cast<unsigned int>(val);
354                       break;
355                     case RANK_LONG:
356                       *va_arg(ap, unsigned long *) = static_cast<unsigned long>(val);
357                       break;
358                     case RANK_LONGLONG:
359                       *va_arg(ap, unsigned long long *) = static_cast<unsigned long long>(val);
360                       break;
361                     case RANK_PTR:
362                       *va_arg(ap, void **) = reinterpret_cast<void *>(static_cast<uintptr_t>(val));
363                       break;
364                   }
365                 }
366                 break;
367 
368               case 'f': // Preliminary float value parsing
369               case 'g':
370               case 'G':
371               case 'e':
372               case 'E':
373                 q = SkipSpace(stream);
374                 if (q <= 0) {
375                   bail = BAIL_EOF;
376                   break;
377                 }
378 
379                 {
380                   double fval = streamtofloat(stream);
381                   if (!(flags & FL_SPLAT)) {
382                     if (rank == RANK_INT) {
383                       *va_arg(ap, float *) = static_cast<float>(fval);
384                     } else if (rank == RANK_LONG) {
385                       *va_arg(ap, double *) = static_cast<double>(fval);
386                     }
387                     converted++;
388                   }
389                 }
390                 break;
391 
392               case 'c':                                 // Character
393                 width = (flags & FL_WIDTH) ? width : 1; // Default width == 1
394                 sarg = va_arg(ap, char *);
395                 while (width--) {
396                   if ((q = fgetc(stream)) <= 0) {
397                     bail = BAIL_EOF;
398                     break;
399                   }
400                   if (!(flags & FL_SPLAT)) {
401                     *sarg++ = q;
402                     converted++;
403                   }
404                 }
405                 break;
406 
407               case 's': // String
408               {
409                 if (!(flags & FL_SPLAT)) {
410                   sarg = va_arg(ap, char *);
411                 }
412                 unsigned length = 0;
413                 while (width--) {
414                   q = fgetc(stream);
415                   if ((isascii(q) && isspace(q)) || (q <= 0)) {
416                     ungetc(q, stream);
417                     break;
418                   }
419                   if (!(flags & FL_SPLAT)) {
420                     sarg[length] = q;
421                   }
422                   length++;
423                 }
424                 if (length == 0) {
425                   bail = BAIL_EOF;
426                 } else if (!(flags & FL_SPLAT)) {
427                   sarg[length] = '\0'; // Terminate output
428                   converted++;
429                 }
430               } break;
431 
432               case '[': // Character range
433                 sarg = va_arg(ap, char *);
434                 state = ST_MATCH_INIT;
435                 matchinv = 0;
436                 memset(matchmap, 0, sizeof matchmap);
437                 break;
438 
439               case '%': // %% sequence
440                 if (fgetc(stream) != '%') {
441                   bail = BAIL_ERR;
442                 }
443                 break;
444 
445               default:           // Anything else
446                 bail = BAIL_ERR; // Unknown sequence
447                 break;
448             }
449         }
450         break;
451 
452       case ST_MATCH_INIT: // Initial state for %[ match
453         if (ch == '^' && !(flags & FL_INV)) {
454           matchinv = 1;
455         } else {
456           SetBit(matchmap, static_cast<unsigned char>(ch));
457           state = ST_MATCH;
458         }
459         break;
460 
461       case ST_MATCH: // Main state for %[ match
462         if (ch == ']') {
463           goto match_run;
464         } else if (ch == '-') {
465           range_start = static_cast<unsigned char>(ch);
466           state = ST_MATCH_RANGE;
467         } else {
468           SetBit(matchmap, static_cast<unsigned char>(ch));
469         }
470         break;
471 
472       case ST_MATCH_RANGE: // %[ match after -
473         if (ch == ']') {
474           SetBit(matchmap, static_cast<unsigned char>('-'));
475           goto match_run;
476         } else {
477           int i;
478           for (i = range_start; i < (static_cast<unsigned char>(ch)); i++) {
479             SetBit(matchmap, i);
480           }
481           state = ST_MATCH;
482         }
483         break;
484 
485       match_run: // Match expression finished
486         char *oarg = sarg;
487         while (width) {
488           q = fgetc(stream);
489           auto qc = static_cast<unsigned char>(q);
490           if (q <= 0 || !(TestBit(matchmap, qc) ^ matchinv)) {
491             ungetc(q, stream);
492             break;
493           }
494           if (!(flags & FL_SPLAT)) {
495             *sarg = q;
496           }
497           sarg++;
498         }
499         if (oarg == sarg) {
500           bail = (q <= 0) ? BAIL_EOF : BAIL_ERR;
501         } else if (!(flags & FL_SPLAT)) {
502           *sarg = '\0';
503           converted++;
504         }
505         break;
506     }
507   }
508 
509   if (bail == BAIL_EOF && !converted) {
510     converted = -1; // Return EOF (-1)
511   }
512 
513   return converted;
514 }
515