1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines APIs for analyzing the format strings of printf, fscanf,
11 // and friends.
12 //
13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
14 //
15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #ifndef LLVM_CLANG_FORMAT_H
20 #define LLVM_CLANG_FORMAT_H
21 
22 #include "clang/AST/CanonicalType.h"
23 
24 namespace clang {
25 
26 class TargetInfo;
27 
28 //===----------------------------------------------------------------------===//
29 /// Common components of both fprintf and fscanf format strings.
30 namespace analyze_format_string {
31 
32 /// Class representing optional flags with location and representation
33 /// information.
34 class OptionalFlag {
35 public:
36   OptionalFlag(const char *Representation)
37       : representation(Representation), flag(false) {}
38   bool isSet() { return flag; }
39   void set() { flag = true; }
40   void clear() { flag = false; }
41   void setPosition(const char *position) {
42     assert(position);
43     this->position = position;
44   }
45   const char *getPosition() const {
46     assert(position);
47     return position;
48   }
49   const char *toString() const { return representation; }
50 
51   // Overloaded operators for bool like qualities
52   LLVM_EXPLICIT operator bool() const { return flag; }
53   OptionalFlag& operator=(const bool &rhs) {
54     flag = rhs;
55     return *this;  // Return a reference to myself.
56   }
57 private:
58   const char *representation;
59   const char *position;
60   bool flag;
61 };
62 
63 /// Represents the length modifier in a format string in scanf/printf.
64 class LengthModifier {
65 public:
66   enum Kind {
67     None,
68     AsChar,       // 'hh'
69     AsShort,      // 'h'
70     AsLong,       // 'l'
71     AsLongLong,   // 'll'
72     AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
73     AsIntMax,     // 'j'
74     AsSizeT,      // 'z'
75     AsPtrDiff,    // 't'
76     AsInt32,      // 'I32' (MSVCRT, like __int32)
77     AsInt3264,    // 'I'   (MSVCRT, like __int3264 from MIDL)
78     AsInt64,      // 'I64' (MSVCRT, like __int64)
79     AsLongDouble, // 'L'
80     AsAllocate,   // for '%as', GNU extension to C90 scanf
81     AsMAllocate,  // for '%ms', GNU extension to scanf
82     AsWideChar = AsLong // for '%ls', only makes sense for printf
83   };
84 
85   LengthModifier()
86     : Position(0), kind(None) {}
87   LengthModifier(const char *pos, Kind k)
88     : Position(pos), kind(k) {}
89 
90   const char *getStart() const {
91     return Position;
92   }
93 
94   unsigned getLength() const {
95     switch (kind) {
96       default:
97         return 1;
98       case AsLongLong:
99       case AsChar:
100         return 2;
101       case AsInt32:
102       case AsInt64:
103         return 3;
104       case None:
105         return 0;
106     }
107   }
108 
109   Kind getKind() const { return kind; }
110   void setKind(Kind k) { kind = k; }
111 
112   const char *toString() const;
113 
114 private:
115   const char *Position;
116   Kind kind;
117 };
118 
119 class ConversionSpecifier {
120 public:
121   enum Kind {
122     InvalidSpecifier = 0,
123       // C99 conversion specifiers.
124     cArg,
125     dArg,
126     DArg, // Apple extension
127     iArg,
128     IntArgBeg = dArg, IntArgEnd = iArg,
129 
130     oArg,
131     OArg, // Apple extension
132     uArg,
133     UArg, // Apple extension
134     xArg,
135     XArg,
136     UIntArgBeg = oArg, UIntArgEnd = XArg,
137 
138     fArg,
139     FArg,
140     eArg,
141     EArg,
142     gArg,
143     GArg,
144     aArg,
145     AArg,
146     DoubleArgBeg = fArg, DoubleArgEnd = AArg,
147 
148     sArg,
149     pArg,
150     nArg,
151     PercentArg,
152     CArg,
153     SArg,
154 
155     // ** Printf-specific **
156 
157     // Objective-C specific specifiers.
158     ObjCObjArg,  // '@'
159     ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
160 
161     // GlibC specific specifiers.
162     PrintErrno,   // 'm'
163 
164     PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
165 
166     // ** Scanf-specific **
167     ScanListArg, // '['
168     ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
169   };
170 
171   ConversionSpecifier(bool isPrintf = true)
172     : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
173 
174   ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
175     : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
176 
177   const char *getStart() const {
178     return Position;
179   }
180 
181   StringRef getCharacters() const {
182     return StringRef(getStart(), getLength());
183   }
184 
185   bool consumesDataArgument() const {
186     switch (kind) {
187       case PrintErrno:
188         assert(IsPrintf);
189         return false;
190       case PercentArg:
191         return false;
192       default:
193         return true;
194     }
195   }
196 
197   Kind getKind() const { return kind; }
198   void setKind(Kind k) { kind = k; }
199   unsigned getLength() const {
200     return EndScanList ? EndScanList - Position : 1;
201   }
202 
203   bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
204   bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
205   bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
206   const char *toString() const;
207 
208   bool isPrintfKind() const { return IsPrintf; }
209 
210   Optional<ConversionSpecifier> getStandardSpecifier() const;
211 
212 protected:
213   bool IsPrintf;
214   const char *Position;
215   const char *EndScanList;
216   Kind kind;
217 };
218 
219 class ArgType {
220 public:
221   enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
222               AnyCharTy, CStrTy, WCStrTy, WIntTy };
223 private:
224   const Kind K;
225   QualType T;
226   const char *Name;
227   bool Ptr;
228 public:
229   ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {}
230   ArgType(QualType t, const char *n = 0)
231       : K(SpecificTy), T(t), Name(n), Ptr(false) {}
232   ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {}
233 
234   static ArgType Invalid() { return ArgType(InvalidTy); }
235   bool isValid() const { return K != InvalidTy; }
236 
237   /// Create an ArgType which corresponds to the type pointer to A.
238   static ArgType PtrTo(const ArgType& A) {
239     assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
240     ArgType Res = A;
241     Res.Ptr = true;
242     return Res;
243   }
244 
245   bool matchesType(ASTContext &C, QualType argTy) const;
246 
247   QualType getRepresentativeType(ASTContext &C) const;
248 
249   std::string getRepresentativeTypeName(ASTContext &C) const;
250 };
251 
252 class OptionalAmount {
253 public:
254   enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
255 
256   OptionalAmount(HowSpecified howSpecified,
257                  unsigned amount,
258                  const char *amountStart,
259                  unsigned amountLength,
260                  bool usesPositionalArg)
261   : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
262   UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
263 
264   OptionalAmount(bool valid = true)
265   : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
266   UsesPositionalArg(0), UsesDotPrefix(0) {}
267 
268   bool isInvalid() const {
269     return hs == Invalid;
270   }
271 
272   HowSpecified getHowSpecified() const { return hs; }
273   void setHowSpecified(HowSpecified h) { hs = h; }
274 
275   bool hasDataArgument() const { return hs == Arg; }
276 
277   unsigned getArgIndex() const {
278     assert(hasDataArgument());
279     return amt;
280   }
281 
282   unsigned getConstantAmount() const {
283     assert(hs == Constant);
284     return amt;
285   }
286 
287   const char *getStart() const {
288       // We include the . character if it is given.
289     return start - UsesDotPrefix;
290   }
291 
292   unsigned getConstantLength() const {
293     assert(hs == Constant);
294     return length + UsesDotPrefix;
295   }
296 
297   ArgType getArgType(ASTContext &Ctx) const;
298 
299   void toString(raw_ostream &os) const;
300 
301   bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
302   unsigned getPositionalArgIndex() const {
303     assert(hasDataArgument());
304     return amt + 1;
305   }
306 
307   bool usesDotPrefix() const { return UsesDotPrefix; }
308   void setUsesDotPrefix() { UsesDotPrefix = true; }
309 
310 private:
311   const char *start;
312   unsigned length;
313   HowSpecified hs;
314   unsigned amt;
315   bool UsesPositionalArg : 1;
316   bool UsesDotPrefix;
317 };
318 
319 
320 class FormatSpecifier {
321 protected:
322   LengthModifier LM;
323   OptionalAmount FieldWidth;
324   ConversionSpecifier CS;
325   /// Positional arguments, an IEEE extension:
326   ///  IEEE Std 1003.1, 2004 Edition
327   ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
328   bool UsesPositionalArg;
329   unsigned argIndex;
330 public:
331   FormatSpecifier(bool isPrintf)
332     : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
333 
334   void setLengthModifier(LengthModifier lm) {
335     LM = lm;
336   }
337 
338   void setUsesPositionalArg() { UsesPositionalArg = true; }
339 
340   void setArgIndex(unsigned i) {
341     argIndex = i;
342   }
343 
344   unsigned getArgIndex() const {
345     return argIndex;
346   }
347 
348   unsigned getPositionalArgIndex() const {
349     return argIndex + 1;
350   }
351 
352   const LengthModifier &getLengthModifier() const {
353     return LM;
354   }
355 
356   const OptionalAmount &getFieldWidth() const {
357     return FieldWidth;
358   }
359 
360   void setFieldWidth(const OptionalAmount &Amt) {
361     FieldWidth = Amt;
362   }
363 
364   bool usesPositionalArg() const { return UsesPositionalArg; }
365 
366   bool hasValidLengthModifier(const TargetInfo &Target) const;
367 
368   bool hasStandardLengthModifier() const;
369 
370   Optional<LengthModifier> getCorrectedLengthModifier() const;
371 
372   bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
373 
374   bool hasStandardLengthConversionCombination() const;
375 
376   /// For a TypedefType QT, if it is a named integer type such as size_t,
377   /// assign the appropriate value to LM and return true.
378   static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
379 };
380 
381 } // end analyze_format_string namespace
382 
383 //===----------------------------------------------------------------------===//
384 /// Pieces specific to fprintf format strings.
385 
386 namespace analyze_printf {
387 
388 class PrintfConversionSpecifier :
389   public analyze_format_string::ConversionSpecifier  {
390 public:
391   PrintfConversionSpecifier()
392     : ConversionSpecifier(true, 0, InvalidSpecifier) {}
393 
394   PrintfConversionSpecifier(const char *pos, Kind k)
395     : ConversionSpecifier(true, pos, k) {}
396 
397   bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
398   bool isDoubleArg() const { return kind >= DoubleArgBeg &&
399                                     kind <= DoubleArgEnd; }
400   unsigned getLength() const {
401       // Conversion specifiers currently only are represented by
402       // single characters, but we be flexible.
403     return 1;
404   }
405 
406   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
407     return CS->isPrintfKind();
408   }
409 };
410 
411 using analyze_format_string::ArgType;
412 using analyze_format_string::LengthModifier;
413 using analyze_format_string::OptionalAmount;
414 using analyze_format_string::OptionalFlag;
415 
416 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
417   OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
418   OptionalFlag IsLeftJustified; // '-'
419   OptionalFlag HasPlusPrefix; // '+'
420   OptionalFlag HasSpacePrefix; // ' '
421   OptionalFlag HasAlternativeForm; // '#'
422   OptionalFlag HasLeadingZeroes; // '0'
423   OptionalAmount Precision;
424 public:
425   PrintfSpecifier() :
426     FormatSpecifier(/* isPrintf = */ true),
427     HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
428     HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
429 
430   static PrintfSpecifier Parse(const char *beg, const char *end);
431 
432     // Methods for incrementally constructing the PrintfSpecifier.
433   void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
434     CS = cs;
435   }
436   void setHasThousandsGrouping(const char *position) {
437     HasThousandsGrouping = true;
438     HasThousandsGrouping.setPosition(position);
439   }
440   void setIsLeftJustified(const char *position) {
441     IsLeftJustified = true;
442     IsLeftJustified.setPosition(position);
443   }
444   void setHasPlusPrefix(const char *position) {
445     HasPlusPrefix = true;
446     HasPlusPrefix.setPosition(position);
447   }
448   void setHasSpacePrefix(const char *position) {
449     HasSpacePrefix = true;
450     HasSpacePrefix.setPosition(position);
451   }
452   void setHasAlternativeForm(const char *position) {
453     HasAlternativeForm = true;
454     HasAlternativeForm.setPosition(position);
455   }
456   void setHasLeadingZeros(const char *position) {
457     HasLeadingZeroes = true;
458     HasLeadingZeroes.setPosition(position);
459   }
460   void setUsesPositionalArg() { UsesPositionalArg = true; }
461 
462     // Methods for querying the format specifier.
463 
464   const PrintfConversionSpecifier &getConversionSpecifier() const {
465     return cast<PrintfConversionSpecifier>(CS);
466   }
467 
468   void setPrecision(const OptionalAmount &Amt) {
469     Precision = Amt;
470     Precision.setUsesDotPrefix();
471   }
472 
473   const OptionalAmount &getPrecision() const {
474     return Precision;
475   }
476 
477   bool consumesDataArgument() const {
478     return getConversionSpecifier().consumesDataArgument();
479   }
480 
481   /// \brief Returns the builtin type that a data argument
482   /// paired with this format specifier should have.  This method
483   /// will return null if the format specifier does not have
484   /// a matching data argument or the matching argument matches
485   /// more than one type.
486   ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
487 
488   const OptionalFlag &hasThousandsGrouping() const {
489       return HasThousandsGrouping;
490   }
491   const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
492   const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
493   const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
494   const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
495   const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
496   bool usesPositionalArg() const { return UsesPositionalArg; }
497 
498   /// Changes the specifier and length according to a QualType, retaining any
499   /// flags or options. Returns true on success, or false when a conversion
500   /// was not successful.
501   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
502                bool IsObjCLiteral);
503 
504   void toString(raw_ostream &os) const;
505 
506   // Validation methods - to check if any element results in undefined behavior
507   bool hasValidPlusPrefix() const;
508   bool hasValidAlternativeForm() const;
509   bool hasValidLeadingZeros() const;
510   bool hasValidSpacePrefix() const;
511   bool hasValidLeftJustified() const;
512   bool hasValidThousandsGroupingPrefix() const;
513 
514   bool hasValidPrecision() const;
515   bool hasValidFieldWidth() const;
516 };
517 }  // end analyze_printf namespace
518 
519 //===----------------------------------------------------------------------===//
520 /// Pieces specific to fscanf format strings.
521 
522 namespace analyze_scanf {
523 
524 class ScanfConversionSpecifier :
525     public analyze_format_string::ConversionSpecifier  {
526 public:
527   ScanfConversionSpecifier()
528     : ConversionSpecifier(false, 0, InvalidSpecifier) {}
529 
530   ScanfConversionSpecifier(const char *pos, Kind k)
531     : ConversionSpecifier(false, pos, k) {}
532 
533   void setEndScanList(const char *pos) { EndScanList = pos; }
534 
535   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
536     return !CS->isPrintfKind();
537   }
538 };
539 
540 using analyze_format_string::ArgType;
541 using analyze_format_string::LengthModifier;
542 using analyze_format_string::OptionalAmount;
543 using analyze_format_string::OptionalFlag;
544 
545 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
546   OptionalFlag SuppressAssignment; // '*'
547 public:
548   ScanfSpecifier() :
549     FormatSpecifier(/* isPrintf = */ false),
550     SuppressAssignment("*") {}
551 
552   void setSuppressAssignment(const char *position) {
553     SuppressAssignment = true;
554     SuppressAssignment.setPosition(position);
555   }
556 
557   const OptionalFlag &getSuppressAssignment() const {
558     return SuppressAssignment;
559   }
560 
561   void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
562     CS = cs;
563   }
564 
565   const ScanfConversionSpecifier &getConversionSpecifier() const {
566     return cast<ScanfConversionSpecifier>(CS);
567   }
568 
569   bool consumesDataArgument() const {
570     return CS.consumesDataArgument() && !SuppressAssignment;
571   }
572 
573   ArgType getArgType(ASTContext &Ctx) const;
574 
575   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
576 
577   void toString(raw_ostream &os) const;
578 
579   static ScanfSpecifier Parse(const char *beg, const char *end);
580 };
581 
582 } // end analyze_scanf namespace
583 
584 //===----------------------------------------------------------------------===//
585 // Parsing and processing of format strings (both fprintf and fscanf).
586 
587 namespace analyze_format_string {
588 
589 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
590 
591 class FormatStringHandler {
592 public:
593   FormatStringHandler() {}
594   virtual ~FormatStringHandler();
595 
596   virtual void HandleNullChar(const char *nullCharacter) {}
597 
598   virtual void HandlePosition(const char *startPos, unsigned posLen) {}
599 
600   virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
601                                      PositionContext p) {}
602 
603   virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
604 
605   virtual void HandleIncompleteSpecifier(const char *startSpecifier,
606                                          unsigned specifierLen) {}
607 
608   // Printf-specific handlers.
609 
610   virtual bool HandleInvalidPrintfConversionSpecifier(
611                                       const analyze_printf::PrintfSpecifier &FS,
612                                       const char *startSpecifier,
613                                       unsigned specifierLen) {
614     return true;
615   }
616 
617   virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
618                                      const char *startSpecifier,
619                                      unsigned specifierLen) {
620     return true;
621   }
622 
623     // Scanf-specific handlers.
624 
625   virtual bool HandleInvalidScanfConversionSpecifier(
626                                         const analyze_scanf::ScanfSpecifier &FS,
627                                         const char *startSpecifier,
628                                         unsigned specifierLen) {
629     return true;
630   }
631 
632   virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
633                                     const char *startSpecifier,
634                                     unsigned specifierLen) {
635     return true;
636   }
637 
638   virtual void HandleIncompleteScanList(const char *start, const char *end) {}
639 };
640 
641 bool ParsePrintfString(FormatStringHandler &H,
642                        const char *beg, const char *end, const LangOptions &LO,
643                        const TargetInfo &Target);
644 
645 bool ParseScanfString(FormatStringHandler &H,
646                       const char *beg, const char *end, const LangOptions &LO,
647                       const TargetInfo &Target);
648 
649 } // end analyze_format_string namespace
650 } // end clang namespace
651 #endif
652