1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handling of format string in printf and friends.  The structure of format
10 // strings for fprintf() are described in C99 7.19.6.1.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/FormatString.h"
15 #include "clang/AST/OSLog.h"
16 #include "FormatStringParsing.h"
17 #include "clang/Basic/TargetInfo.h"
18 
19 using clang::analyze_format_string::ArgType;
20 using clang::analyze_format_string::FormatStringHandler;
21 using clang::analyze_format_string::LengthModifier;
22 using clang::analyze_format_string::OptionalAmount;
23 using clang::analyze_format_string::ConversionSpecifier;
24 using clang::analyze_printf::PrintfSpecifier;
25 
26 using namespace clang;
27 
28 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
29         PrintfSpecifierResult;
30 
31 //===----------------------------------------------------------------------===//
32 // Methods for parsing format strings.
33 //===----------------------------------------------------------------------===//
34 
35 using analyze_format_string::ParseNonPositionAmount;
36 
37 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
38                            const char *Start, const char *&Beg, const char *E,
39                            unsigned *argIndex) {
40   if (argIndex) {
41     FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
42   } else {
43     const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
44                                            analyze_format_string::PrecisionPos);
45     if (Amt.isInvalid())
46       return true;
47     FS.setPrecision(Amt);
48   }
49   return false;
50 }
51 
52 static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS,
53                            const char *FlagBeg, const char *E, bool Warn) {
54    StringRef Flag(FlagBeg, E - FlagBeg);
55    // Currently there is only one flag.
56    if (Flag == "tt") {
57      FS.setHasObjCTechnicalTerm(FlagBeg);
58      return false;
59    }
60    // Handle either the case of no flag or an invalid flag.
61    if (Warn) {
62      if (Flag == "")
63        H.HandleEmptyObjCModifierFlag(FlagBeg, E  - FlagBeg);
64      else
65        H.HandleInvalidObjCModifierFlag(FlagBeg, E  - FlagBeg);
66    }
67    return true;
68 }
69 
70 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
71                                                   const char *&Beg,
72                                                   const char *E,
73                                                   unsigned &argIndex,
74                                                   const LangOptions &LO,
75                                                   const TargetInfo &Target,
76                                                   bool Warn,
77                                                   bool isFreeBSDKPrintf) {
78 
79   using namespace clang::analyze_format_string;
80   using namespace clang::analyze_printf;
81 
82   const char *I = Beg;
83   const char *Start = nullptr;
84   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
85 
86   // Look for a '%' character that indicates the start of a format specifier.
87   for ( ; I != E ; ++I) {
88     char c = *I;
89     if (c == '\0') {
90       // Detect spurious null characters, which are likely errors.
91       H.HandleNullChar(I);
92       return true;
93     }
94     if (c == '%') {
95       Start = I++;  // Record the start of the format specifier.
96       break;
97     }
98   }
99 
100   // No format specifier found?
101   if (!Start)
102     return false;
103 
104   if (I == E) {
105     // No more characters left?
106     if (Warn)
107       H.HandleIncompleteSpecifier(Start, E - Start);
108     return true;
109   }
110 
111   PrintfSpecifier FS;
112   if (ParseArgPosition(H, FS, Start, I, E))
113     return true;
114 
115   if (I == E) {
116     // No more characters left?
117     if (Warn)
118       H.HandleIncompleteSpecifier(Start, E - Start);
119     return true;
120   }
121 
122   if (*I == '{') {
123     ++I;
124     unsigned char PrivacyFlags = 0;
125     StringRef MatchedStr;
126 
127     do {
128       StringRef Str(I, E - I);
129       std::string Match = "^[[:space:]]*"
130                           "(private|public|sensitive|mask\\.[^[:space:],}]*)"
131                           "[[:space:]]*(,|})";
132       llvm::Regex R(Match);
133       SmallVector<StringRef, 2> Matches;
134 
135       if (R.match(Str, &Matches)) {
136         MatchedStr = Matches[1];
137         I += Matches[0].size();
138 
139         // Set the privacy flag if the privacy annotation in the
140         // comma-delimited segment is at least as strict as the privacy
141         // annotations in previous comma-delimited segments.
142         if (MatchedStr.startswith("mask")) {
143           StringRef MaskType = MatchedStr.substr(sizeof("mask.") - 1);
144           unsigned Size = MaskType.size();
145           if (Warn && (Size == 0 || Size > 8))
146             H.handleInvalidMaskType(MaskType);
147           FS.setMaskType(MaskType);
148         } else if (MatchedStr.equals("sensitive"))
149           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsSensitive;
150         else if (PrivacyFlags !=
151                  clang::analyze_os_log::OSLogBufferItem::IsSensitive &&
152                  MatchedStr.equals("private"))
153           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPrivate;
154         else if (PrivacyFlags == 0 && MatchedStr.equals("public"))
155           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPublic;
156       } else {
157         size_t CommaOrBracePos =
158             Str.find_if([](char c) { return c == ',' || c == '}'; });
159 
160         if (CommaOrBracePos == StringRef::npos) {
161           // Neither a comma nor the closing brace was found.
162           if (Warn)
163             H.HandleIncompleteSpecifier(Start, E - Start);
164           return true;
165         }
166 
167         I += CommaOrBracePos + 1;
168       }
169       // Continue until the closing brace is found.
170     } while (*(I - 1) == ',');
171 
172     // Set the privacy flag.
173     switch (PrivacyFlags) {
174     case 0:
175       break;
176     case clang::analyze_os_log::OSLogBufferItem::IsPrivate:
177       FS.setIsPrivate(MatchedStr.data());
178       break;
179     case clang::analyze_os_log::OSLogBufferItem::IsPublic:
180       FS.setIsPublic(MatchedStr.data());
181       break;
182     case clang::analyze_os_log::OSLogBufferItem::IsSensitive:
183       FS.setIsSensitive(MatchedStr.data());
184       break;
185     default:
186       llvm_unreachable("Unexpected privacy flag value");
187     }
188   }
189 
190   // Look for flags (if any).
191   bool hasMore = true;
192   for ( ; I != E; ++I) {
193     switch (*I) {
194       default: hasMore = false; break;
195       case '\'':
196         // FIXME: POSIX specific.  Always accept?
197         FS.setHasThousandsGrouping(I);
198         break;
199       case '-': FS.setIsLeftJustified(I); break;
200       case '+': FS.setHasPlusPrefix(I); break;
201       case ' ': FS.setHasSpacePrefix(I); break;
202       case '#': FS.setHasAlternativeForm(I); break;
203       case '0': FS.setHasLeadingZeros(I); break;
204     }
205     if (!hasMore)
206       break;
207   }
208 
209   if (I == E) {
210     // No more characters left?
211     if (Warn)
212       H.HandleIncompleteSpecifier(Start, E - Start);
213     return true;
214   }
215 
216   // Look for the field width (if any).
217   if (ParseFieldWidth(H, FS, Start, I, E,
218                       FS.usesPositionalArg() ? nullptr : &argIndex))
219     return true;
220 
221   if (I == E) {
222     // No more characters left?
223     if (Warn)
224       H.HandleIncompleteSpecifier(Start, E - Start);
225     return true;
226   }
227 
228   // Look for the precision (if any).
229   if (*I == '.') {
230     ++I;
231     if (I == E) {
232       if (Warn)
233         H.HandleIncompleteSpecifier(Start, E - Start);
234       return true;
235     }
236 
237     if (ParsePrecision(H, FS, Start, I, E,
238                        FS.usesPositionalArg() ? nullptr : &argIndex))
239       return true;
240 
241     if (I == E) {
242       // No more characters left?
243       if (Warn)
244         H.HandleIncompleteSpecifier(Start, E - Start);
245       return true;
246     }
247   }
248 
249   if (ParseVectorModifier(H, FS, I, E, LO))
250     return true;
251 
252   // Look for the length modifier.
253   if (ParseLengthModifier(FS, I, E, LO) && I == E) {
254     // No more characters left?
255     if (Warn)
256       H.HandleIncompleteSpecifier(Start, E - Start);
257     return true;
258   }
259 
260   // Look for the Objective-C modifier flags, if any.
261   // We parse these here, even if they don't apply to
262   // the conversion specifier, and then emit an error
263   // later if the conversion specifier isn't '@'.  This
264   // enables better recovery, and we don't know if
265   // these flags are applicable until later.
266   const char *ObjCModifierFlagsStart = nullptr,
267              *ObjCModifierFlagsEnd = nullptr;
268   if (*I == '[') {
269     ObjCModifierFlagsStart = I;
270     ++I;
271     auto flagStart = I;
272     for (;; ++I) {
273       ObjCModifierFlagsEnd = I;
274       if (I == E) {
275         if (Warn)
276           H.HandleIncompleteSpecifier(Start, E - Start);
277         return true;
278       }
279       // Did we find the closing ']'?
280       if (*I == ']') {
281         if (ParseObjCFlags(H, FS, flagStart, I, Warn))
282           return true;
283         ++I;
284         break;
285       }
286       // There are no separators defined yet for multiple
287       // Objective-C modifier flags.  When those are
288       // defined, this is the place to check.
289     }
290   }
291 
292   if (*I == '\0') {
293     // Detect spurious null characters, which are likely errors.
294     H.HandleNullChar(I);
295     return true;
296   }
297 
298   // Finally, look for the conversion specifier.
299   const char *conversionPosition = I++;
300   ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
301   switch (*conversionPosition) {
302     default:
303       break;
304     // C99: 7.19.6.1 (section 8).
305     case '%': k = ConversionSpecifier::PercentArg;   break;
306     case 'A': k = ConversionSpecifier::AArg; break;
307     case 'E': k = ConversionSpecifier::EArg; break;
308     case 'F': k = ConversionSpecifier::FArg; break;
309     case 'G': k = ConversionSpecifier::GArg; break;
310     case 'X': k = ConversionSpecifier::XArg; break;
311     case 'a': k = ConversionSpecifier::aArg; break;
312     case 'c': k = ConversionSpecifier::cArg; break;
313     case 'd': k = ConversionSpecifier::dArg; break;
314     case 'e': k = ConversionSpecifier::eArg; break;
315     case 'f': k = ConversionSpecifier::fArg; break;
316     case 'g': k = ConversionSpecifier::gArg; break;
317     case 'i': k = ConversionSpecifier::iArg; break;
318     case 'n':
319       // Not handled, but reserved in OpenCL and FreeBSD kernel.
320       if (!LO.OpenCL && !isFreeBSDKPrintf)
321         k = ConversionSpecifier::nArg;
322       break;
323     case 'o': k = ConversionSpecifier::oArg; break;
324     case 'p': k = ConversionSpecifier::pArg; break;
325     case 's': k = ConversionSpecifier::sArg; break;
326     case 'u': k = ConversionSpecifier::uArg; break;
327     case 'x': k = ConversionSpecifier::xArg; break;
328     // POSIX specific.
329     case 'C': k = ConversionSpecifier::CArg; break;
330     case 'S': k = ConversionSpecifier::SArg; break;
331     // Apple extension for os_log
332     case 'P':
333       k = ConversionSpecifier::PArg;
334       break;
335     // Objective-C.
336     case '@': k = ConversionSpecifier::ObjCObjArg; break;
337     // Glibc specific.
338     case 'm': k = ConversionSpecifier::PrintErrno; break;
339     // FreeBSD kernel specific.
340     case 'b':
341       if (isFreeBSDKPrintf)
342         k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
343       break;
344     case 'r':
345       if (isFreeBSDKPrintf)
346         k = ConversionSpecifier::FreeBSDrArg; // int
347       break;
348     case 'y':
349       if (isFreeBSDKPrintf)
350         k = ConversionSpecifier::FreeBSDyArg; // int
351       break;
352     // Apple-specific.
353     case 'D':
354       if (isFreeBSDKPrintf)
355         k = ConversionSpecifier::FreeBSDDArg; // void * followed by char *
356       else if (Target.getTriple().isOSDarwin())
357         k = ConversionSpecifier::DArg;
358       break;
359     case 'O':
360       if (Target.getTriple().isOSDarwin())
361         k = ConversionSpecifier::OArg;
362       break;
363     case 'U':
364       if (Target.getTriple().isOSDarwin())
365         k = ConversionSpecifier::UArg;
366       break;
367     // MS specific.
368     case 'Z':
369       if (Target.getTriple().isOSMSVCRT())
370         k = ConversionSpecifier::ZArg;
371       break;
372   }
373 
374   // Check to see if we used the Objective-C modifier flags with
375   // a conversion specifier other than '@'.
376   if (k != ConversionSpecifier::ObjCObjArg &&
377       k != ConversionSpecifier::InvalidSpecifier &&
378       ObjCModifierFlagsStart) {
379     H.HandleObjCFlagsWithNonObjCConversion(ObjCModifierFlagsStart,
380                                            ObjCModifierFlagsEnd + 1,
381                                            conversionPosition);
382     return true;
383   }
384 
385   PrintfConversionSpecifier CS(conversionPosition, k);
386   FS.setConversionSpecifier(CS);
387   if (CS.consumesDataArgument() && !FS.usesPositionalArg())
388     FS.setArgIndex(argIndex++);
389   // FreeBSD kernel specific.
390   if (k == ConversionSpecifier::FreeBSDbArg ||
391       k == ConversionSpecifier::FreeBSDDArg)
392     argIndex++;
393 
394   if (k == ConversionSpecifier::InvalidSpecifier) {
395     unsigned Len = I - Start;
396     if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
397       CS.setEndScanList(Start + Len);
398       FS.setConversionSpecifier(CS);
399     }
400     // Assume the conversion takes one argument.
401     return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
402   }
403   return PrintfSpecifierResult(Start, FS);
404 }
405 
406 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
407                                                      const char *I,
408                                                      const char *E,
409                                                      const LangOptions &LO,
410                                                      const TargetInfo &Target,
411                                                      bool isFreeBSDKPrintf) {
412 
413   unsigned argIndex = 0;
414 
415   // Keep looking for a format specifier until we have exhausted the string.
416   while (I != E) {
417     const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
418                                                             LO, Target, true,
419                                                             isFreeBSDKPrintf);
420     // Did a fail-stop error of any kind occur when parsing the specifier?
421     // If so, don't do any more processing.
422     if (FSR.shouldStop())
423       return true;
424     // Did we exhaust the string or encounter an error that
425     // we can recover from?
426     if (!FSR.hasValue())
427       continue;
428     // We have a format specifier.  Pass it to the callback.
429     if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
430                                  I - FSR.getStart()))
431       return true;
432   }
433   assert(I == E && "Format string not exhausted");
434   return false;
435 }
436 
437 bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
438                                                             const char *E,
439                                                             const LangOptions &LO,
440                                                             const TargetInfo &Target) {
441 
442   unsigned argIndex = 0;
443 
444   // Keep looking for a %s format specifier until we have exhausted the string.
445   FormatStringHandler H;
446   while (I != E) {
447     const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
448                                                             LO, Target, false,
449                                                             false);
450     // Did a fail-stop error of any kind occur when parsing the specifier?
451     // If so, don't do any more processing.
452     if (FSR.shouldStop())
453       return false;
454     // Did we exhaust the string or encounter an error that
455     // we can recover from?
456     if (!FSR.hasValue())
457       continue;
458     const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
459     // Return true if this a %s format specifier.
460     if (FS.getConversionSpecifier().getKind() == ConversionSpecifier::Kind::sArg)
461       return true;
462   }
463   return false;
464 }
465 
466 bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
467     const char *Begin, const char *End, const LangOptions &LO,
468     const TargetInfo &Target) {
469   unsigned ArgIndex = 0;
470   // Keep looking for a formatting specifier until we have exhausted the string.
471   FormatStringHandler H;
472   while (Begin != End) {
473     const PrintfSpecifierResult &FSR =
474         ParsePrintfSpecifier(H, Begin, End, ArgIndex, LO, Target, false, false);
475     if (FSR.shouldStop())
476       break;
477     if (FSR.hasValue())
478       return true;
479   }
480   return false;
481 }
482 
483 //===----------------------------------------------------------------------===//
484 // Methods on PrintfSpecifier.
485 //===----------------------------------------------------------------------===//
486 
487 ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
488                                           bool IsObjCLiteral) const {
489   if (CS.getKind() == ConversionSpecifier::cArg)
490     switch (LM.getKind()) {
491       case LengthModifier::None:
492         return Ctx.IntTy;
493       case LengthModifier::AsLong:
494       case LengthModifier::AsWide:
495         return ArgType(ArgType::WIntTy, "wint_t");
496       case LengthModifier::AsShort:
497         if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
498           return Ctx.IntTy;
499         LLVM_FALLTHROUGH;
500       default:
501         return ArgType::Invalid();
502     }
503 
504   if (CS.isIntArg())
505     switch (LM.getKind()) {
506       case LengthModifier::AsLongDouble:
507         // GNU extension.
508         return Ctx.LongLongTy;
509       case LengthModifier::None:
510       case LengthModifier::AsShortLong:
511         return Ctx.IntTy;
512       case LengthModifier::AsInt32:
513         return ArgType(Ctx.IntTy, "__int32");
514       case LengthModifier::AsChar:
515         return ArgType::AnyCharTy;
516       case LengthModifier::AsShort: return Ctx.ShortTy;
517       case LengthModifier::AsLong: return Ctx.LongTy;
518       case LengthModifier::AsLongLong:
519       case LengthModifier::AsQuad:
520         return Ctx.LongLongTy;
521       case LengthModifier::AsInt64:
522         return ArgType(Ctx.LongLongTy, "__int64");
523       case LengthModifier::AsIntMax:
524         return ArgType(Ctx.getIntMaxType(), "intmax_t");
525       case LengthModifier::AsSizeT:
526         return ArgType::makeSizeT(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
527       case LengthModifier::AsInt3264:
528         return Ctx.getTargetInfo().getTriple().isArch64Bit()
529                    ? ArgType(Ctx.LongLongTy, "__int64")
530                    : ArgType(Ctx.IntTy, "__int32");
531       case LengthModifier::AsPtrDiff:
532         return ArgType::makePtrdiffT(
533             ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
534       case LengthModifier::AsAllocate:
535       case LengthModifier::AsMAllocate:
536       case LengthModifier::AsWide:
537         return ArgType::Invalid();
538     }
539 
540   if (CS.isUIntArg())
541     switch (LM.getKind()) {
542       case LengthModifier::AsLongDouble:
543         // GNU extension.
544         return Ctx.UnsignedLongLongTy;
545       case LengthModifier::None:
546       case LengthModifier::AsShortLong:
547         return Ctx.UnsignedIntTy;
548       case LengthModifier::AsInt32:
549         return ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
550       case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
551       case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
552       case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
553       case LengthModifier::AsLongLong:
554       case LengthModifier::AsQuad:
555         return Ctx.UnsignedLongLongTy;
556       case LengthModifier::AsInt64:
557         return ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64");
558       case LengthModifier::AsIntMax:
559         return ArgType(Ctx.getUIntMaxType(), "uintmax_t");
560       case LengthModifier::AsSizeT:
561         return ArgType::makeSizeT(ArgType(Ctx.getSizeType(), "size_t"));
562       case LengthModifier::AsInt3264:
563         return Ctx.getTargetInfo().getTriple().isArch64Bit()
564                    ? ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")
565                    : ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
566       case LengthModifier::AsPtrDiff:
567         return ArgType::makePtrdiffT(
568             ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
569       case LengthModifier::AsAllocate:
570       case LengthModifier::AsMAllocate:
571       case LengthModifier::AsWide:
572         return ArgType::Invalid();
573     }
574 
575   if (CS.isDoubleArg()) {
576     if (!VectorNumElts.isInvalid()) {
577       switch (LM.getKind()) {
578       case LengthModifier::AsShort:
579         return Ctx.HalfTy;
580       case LengthModifier::AsShortLong:
581         return Ctx.FloatTy;
582       case LengthModifier::AsLong:
583       default:
584         return Ctx.DoubleTy;
585       }
586     }
587 
588     if (LM.getKind() == LengthModifier::AsLongDouble)
589       return Ctx.LongDoubleTy;
590     return Ctx.DoubleTy;
591   }
592 
593   if (CS.getKind() == ConversionSpecifier::nArg) {
594     switch (LM.getKind()) {
595       case LengthModifier::None:
596         return ArgType::PtrTo(Ctx.IntTy);
597       case LengthModifier::AsChar:
598         return ArgType::PtrTo(Ctx.SignedCharTy);
599       case LengthModifier::AsShort:
600         return ArgType::PtrTo(Ctx.ShortTy);
601       case LengthModifier::AsLong:
602         return ArgType::PtrTo(Ctx.LongTy);
603       case LengthModifier::AsLongLong:
604       case LengthModifier::AsQuad:
605         return ArgType::PtrTo(Ctx.LongLongTy);
606       case LengthModifier::AsIntMax:
607         return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
608       case LengthModifier::AsSizeT:
609         return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
610       case LengthModifier::AsPtrDiff:
611         return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
612       case LengthModifier::AsLongDouble:
613         return ArgType(); // FIXME: Is this a known extension?
614       case LengthModifier::AsAllocate:
615       case LengthModifier::AsMAllocate:
616       case LengthModifier::AsInt32:
617       case LengthModifier::AsInt3264:
618       case LengthModifier::AsInt64:
619       case LengthModifier::AsWide:
620         return ArgType::Invalid();
621       case LengthModifier::AsShortLong:
622         llvm_unreachable("only used for OpenCL which doesn not handle nArg");
623     }
624   }
625 
626   switch (CS.getKind()) {
627     case ConversionSpecifier::sArg:
628       if (LM.getKind() == LengthModifier::AsWideChar) {
629         if (IsObjCLiteral)
630           return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
631                          "const unichar *");
632         return ArgType(ArgType::WCStrTy, "wchar_t *");
633       }
634       if (LM.getKind() == LengthModifier::AsWide)
635         return ArgType(ArgType::WCStrTy, "wchar_t *");
636       return ArgType::CStrTy;
637     case ConversionSpecifier::SArg:
638       if (IsObjCLiteral)
639         return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
640                        "const unichar *");
641       if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
642           LM.getKind() == LengthModifier::AsShort)
643         return ArgType::CStrTy;
644       return ArgType(ArgType::WCStrTy, "wchar_t *");
645     case ConversionSpecifier::CArg:
646       if (IsObjCLiteral)
647         return ArgType(Ctx.UnsignedShortTy, "unichar");
648       if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
649           LM.getKind() == LengthModifier::AsShort)
650         return Ctx.IntTy;
651       return ArgType(Ctx.WideCharTy, "wchar_t");
652     case ConversionSpecifier::pArg:
653     case ConversionSpecifier::PArg:
654       return ArgType::CPointerTy;
655     case ConversionSpecifier::ObjCObjArg:
656       return ArgType::ObjCPointerTy;
657     default:
658       break;
659   }
660 
661   // FIXME: Handle other cases.
662   return ArgType();
663 }
664 
665 
666 ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
667                                     bool IsObjCLiteral) const {
668   const PrintfConversionSpecifier &CS = getConversionSpecifier();
669 
670   if (!CS.consumesDataArgument())
671     return ArgType::Invalid();
672 
673   ArgType ScalarTy = getScalarArgType(Ctx, IsObjCLiteral);
674   if (!ScalarTy.isValid() || VectorNumElts.isInvalid())
675     return ScalarTy;
676 
677   return ScalarTy.makeVectorType(Ctx, VectorNumElts.getConstantAmount());
678 }
679 
680 bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
681                               ASTContext &Ctx, bool IsObjCLiteral) {
682   // %n is different from other conversion specifiers; don't try to fix it.
683   if (CS.getKind() == ConversionSpecifier::nArg)
684     return false;
685 
686   // Handle Objective-C objects first. Note that while the '%@' specifier will
687   // not warn for structure pointer or void pointer arguments (because that's
688   // how CoreFoundation objects are implemented), we only show a fixit for '%@'
689   // if we know it's an object (block, id, class, or __attribute__((NSObject))).
690   if (QT->isObjCRetainableType()) {
691     if (!IsObjCLiteral)
692       return false;
693 
694     CS.setKind(ConversionSpecifier::ObjCObjArg);
695 
696     // Disable irrelevant flags
697     HasThousandsGrouping = false;
698     HasPlusPrefix = false;
699     HasSpacePrefix = false;
700     HasAlternativeForm = false;
701     HasLeadingZeroes = false;
702     Precision.setHowSpecified(OptionalAmount::NotSpecified);
703     LM.setKind(LengthModifier::None);
704 
705     return true;
706   }
707 
708   // Handle strings next (char *, wchar_t *)
709   if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
710     CS.setKind(ConversionSpecifier::sArg);
711 
712     // Disable irrelevant flags
713     HasAlternativeForm = 0;
714     HasLeadingZeroes = 0;
715 
716     // Set the long length modifier for wide characters
717     if (QT->getPointeeType()->isWideCharType())
718       LM.setKind(LengthModifier::AsWideChar);
719     else
720       LM.setKind(LengthModifier::None);
721 
722     return true;
723   }
724 
725   // If it's an enum, get its underlying type.
726   if (const EnumType *ETy = QT->getAs<EnumType>())
727     QT = ETy->getDecl()->getIntegerType();
728 
729   const BuiltinType *BT = QT->getAs<BuiltinType>();
730   if (!BT) {
731     const VectorType *VT = QT->getAs<VectorType>();
732     if (VT) {
733       QT = VT->getElementType();
734       BT = QT->getAs<BuiltinType>();
735       VectorNumElts = OptionalAmount(VT->getNumElements());
736     }
737   }
738 
739   // We can only work with builtin types.
740   if (!BT)
741     return false;
742 
743   // Set length modifier
744   switch (BT->getKind()) {
745   case BuiltinType::Bool:
746   case BuiltinType::WChar_U:
747   case BuiltinType::WChar_S:
748   case BuiltinType::Char8: // FIXME: Treat like 'char'?
749   case BuiltinType::Char16:
750   case BuiltinType::Char32:
751   case BuiltinType::UInt128:
752   case BuiltinType::Int128:
753   case BuiltinType::Half:
754   case BuiltinType::Float16:
755   case BuiltinType::Float128:
756   case BuiltinType::ShortAccum:
757   case BuiltinType::Accum:
758   case BuiltinType::LongAccum:
759   case BuiltinType::UShortAccum:
760   case BuiltinType::UAccum:
761   case BuiltinType::ULongAccum:
762   case BuiltinType::ShortFract:
763   case BuiltinType::Fract:
764   case BuiltinType::LongFract:
765   case BuiltinType::UShortFract:
766   case BuiltinType::UFract:
767   case BuiltinType::ULongFract:
768   case BuiltinType::SatShortAccum:
769   case BuiltinType::SatAccum:
770   case BuiltinType::SatLongAccum:
771   case BuiltinType::SatUShortAccum:
772   case BuiltinType::SatUAccum:
773   case BuiltinType::SatULongAccum:
774   case BuiltinType::SatShortFract:
775   case BuiltinType::SatFract:
776   case BuiltinType::SatLongFract:
777   case BuiltinType::SatUShortFract:
778   case BuiltinType::SatUFract:
779   case BuiltinType::SatULongFract:
780     // Various types which are non-trivial to correct.
781     return false;
782 
783 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
784   case BuiltinType::Id:
785 #include "clang/Basic/OpenCLImageTypes.def"
786 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
787   case BuiltinType::Id:
788 #include "clang/Basic/OpenCLExtensionTypes.def"
789 #define SVE_TYPE(Name, Id, SingletonId) \
790   case BuiltinType::Id:
791 #include "clang/Basic/AArch64SVEACLETypes.def"
792 #define SIGNED_TYPE(Id, SingletonId)
793 #define UNSIGNED_TYPE(Id, SingletonId)
794 #define FLOATING_TYPE(Id, SingletonId)
795 #define BUILTIN_TYPE(Id, SingletonId) \
796   case BuiltinType::Id:
797 #include "clang/AST/BuiltinTypes.def"
798     // Misc other stuff which doesn't make sense here.
799     return false;
800 
801   case BuiltinType::UInt:
802   case BuiltinType::Int:
803   case BuiltinType::Float:
804     LM.setKind(VectorNumElts.isInvalid() ?
805                LengthModifier::None : LengthModifier::AsShortLong);
806     break;
807   case BuiltinType::Double:
808     LM.setKind(VectorNumElts.isInvalid() ?
809                LengthModifier::None : LengthModifier::AsLong);
810     break;
811   case BuiltinType::Char_U:
812   case BuiltinType::UChar:
813   case BuiltinType::Char_S:
814   case BuiltinType::SChar:
815     LM.setKind(LengthModifier::AsChar);
816     break;
817 
818   case BuiltinType::Short:
819   case BuiltinType::UShort:
820     LM.setKind(LengthModifier::AsShort);
821     break;
822 
823   case BuiltinType::Long:
824   case BuiltinType::ULong:
825     LM.setKind(LengthModifier::AsLong);
826     break;
827 
828   case BuiltinType::LongLong:
829   case BuiltinType::ULongLong:
830     LM.setKind(LengthModifier::AsLongLong);
831     break;
832 
833   case BuiltinType::LongDouble:
834     LM.setKind(LengthModifier::AsLongDouble);
835     break;
836   }
837 
838   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
839   if (isa<TypedefType>(QT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
840     namedTypeToLengthModifier(QT, LM);
841 
842   // If fixing the length modifier was enough, we might be done.
843   if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
844     // If we're going to offer a fix anyway, make sure the sign matches.
845     switch (CS.getKind()) {
846     case ConversionSpecifier::uArg:
847     case ConversionSpecifier::UArg:
848       if (QT->isSignedIntegerType())
849         CS.setKind(clang::analyze_format_string::ConversionSpecifier::dArg);
850       break;
851     case ConversionSpecifier::dArg:
852     case ConversionSpecifier::DArg:
853     case ConversionSpecifier::iArg:
854       if (QT->isUnsignedIntegerType() && !HasPlusPrefix)
855         CS.setKind(clang::analyze_format_string::ConversionSpecifier::uArg);
856       break;
857     default:
858       // Other specifiers do not have signed/unsigned variants.
859       break;
860     }
861 
862     const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral);
863     if (ATR.isValid() && ATR.matchesType(Ctx, QT))
864       return true;
865   }
866 
867   // Set conversion specifier and disable any flags which do not apply to it.
868   // Let typedefs to char fall through to int, as %c is silly for uint8_t.
869   if (!isa<TypedefType>(QT) && QT->isCharType()) {
870     CS.setKind(ConversionSpecifier::cArg);
871     LM.setKind(LengthModifier::None);
872     Precision.setHowSpecified(OptionalAmount::NotSpecified);
873     HasAlternativeForm = 0;
874     HasLeadingZeroes = 0;
875     HasPlusPrefix = 0;
876   }
877   // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
878   else if (QT->isRealFloatingType()) {
879     CS.setKind(ConversionSpecifier::fArg);
880   }
881   else if (QT->isSignedIntegerType()) {
882     CS.setKind(ConversionSpecifier::dArg);
883     HasAlternativeForm = 0;
884   }
885   else if (QT->isUnsignedIntegerType()) {
886     CS.setKind(ConversionSpecifier::uArg);
887     HasAlternativeForm = 0;
888     HasPlusPrefix = 0;
889   } else {
890     llvm_unreachable("Unexpected type");
891   }
892 
893   return true;
894 }
895 
896 void PrintfSpecifier::toString(raw_ostream &os) const {
897   // Whilst some features have no defined order, we are using the order
898   // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
899   os << "%";
900 
901   // Positional args
902   if (usesPositionalArg()) {
903     os << getPositionalArgIndex() << "$";
904   }
905 
906   // Conversion flags
907   if (IsLeftJustified)    os << "-";
908   if (HasPlusPrefix)      os << "+";
909   if (HasSpacePrefix)     os << " ";
910   if (HasAlternativeForm) os << "#";
911   if (HasLeadingZeroes)   os << "0";
912 
913   // Minimum field width
914   FieldWidth.toString(os);
915   // Precision
916   Precision.toString(os);
917 
918   // Vector modifier
919   if (!VectorNumElts.isInvalid())
920     os << 'v' << VectorNumElts.getConstantAmount();
921 
922   // Length modifier
923   os << LM.toString();
924   // Conversion specifier
925   os << CS.toString();
926 }
927 
928 bool PrintfSpecifier::hasValidPlusPrefix() const {
929   if (!HasPlusPrefix)
930     return true;
931 
932   // The plus prefix only makes sense for signed conversions
933   switch (CS.getKind()) {
934   case ConversionSpecifier::dArg:
935   case ConversionSpecifier::DArg:
936   case ConversionSpecifier::iArg:
937   case ConversionSpecifier::fArg:
938   case ConversionSpecifier::FArg:
939   case ConversionSpecifier::eArg:
940   case ConversionSpecifier::EArg:
941   case ConversionSpecifier::gArg:
942   case ConversionSpecifier::GArg:
943   case ConversionSpecifier::aArg:
944   case ConversionSpecifier::AArg:
945   case ConversionSpecifier::FreeBSDrArg:
946   case ConversionSpecifier::FreeBSDyArg:
947     return true;
948 
949   default:
950     return false;
951   }
952 }
953 
954 bool PrintfSpecifier::hasValidAlternativeForm() const {
955   if (!HasAlternativeForm)
956     return true;
957 
958   // Alternate form flag only valid with the oxXaAeEfFgG conversions
959   switch (CS.getKind()) {
960   case ConversionSpecifier::oArg:
961   case ConversionSpecifier::OArg:
962   case ConversionSpecifier::xArg:
963   case ConversionSpecifier::XArg:
964   case ConversionSpecifier::aArg:
965   case ConversionSpecifier::AArg:
966   case ConversionSpecifier::eArg:
967   case ConversionSpecifier::EArg:
968   case ConversionSpecifier::fArg:
969   case ConversionSpecifier::FArg:
970   case ConversionSpecifier::gArg:
971   case ConversionSpecifier::GArg:
972   case ConversionSpecifier::FreeBSDrArg:
973   case ConversionSpecifier::FreeBSDyArg:
974     return true;
975 
976   default:
977     return false;
978   }
979 }
980 
981 bool PrintfSpecifier::hasValidLeadingZeros() const {
982   if (!HasLeadingZeroes)
983     return true;
984 
985   // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
986   switch (CS.getKind()) {
987   case ConversionSpecifier::dArg:
988   case ConversionSpecifier::DArg:
989   case ConversionSpecifier::iArg:
990   case ConversionSpecifier::oArg:
991   case ConversionSpecifier::OArg:
992   case ConversionSpecifier::uArg:
993   case ConversionSpecifier::UArg:
994   case ConversionSpecifier::xArg:
995   case ConversionSpecifier::XArg:
996   case ConversionSpecifier::aArg:
997   case ConversionSpecifier::AArg:
998   case ConversionSpecifier::eArg:
999   case ConversionSpecifier::EArg:
1000   case ConversionSpecifier::fArg:
1001   case ConversionSpecifier::FArg:
1002   case ConversionSpecifier::gArg:
1003   case ConversionSpecifier::GArg:
1004   case ConversionSpecifier::FreeBSDrArg:
1005   case ConversionSpecifier::FreeBSDyArg:
1006     return true;
1007 
1008   default:
1009     return false;
1010   }
1011 }
1012 
1013 bool PrintfSpecifier::hasValidSpacePrefix() const {
1014   if (!HasSpacePrefix)
1015     return true;
1016 
1017   // The space prefix only makes sense for signed conversions
1018   switch (CS.getKind()) {
1019   case ConversionSpecifier::dArg:
1020   case ConversionSpecifier::DArg:
1021   case ConversionSpecifier::iArg:
1022   case ConversionSpecifier::fArg:
1023   case ConversionSpecifier::FArg:
1024   case ConversionSpecifier::eArg:
1025   case ConversionSpecifier::EArg:
1026   case ConversionSpecifier::gArg:
1027   case ConversionSpecifier::GArg:
1028   case ConversionSpecifier::aArg:
1029   case ConversionSpecifier::AArg:
1030   case ConversionSpecifier::FreeBSDrArg:
1031   case ConversionSpecifier::FreeBSDyArg:
1032     return true;
1033 
1034   default:
1035     return false;
1036   }
1037 }
1038 
1039 bool PrintfSpecifier::hasValidLeftJustified() const {
1040   if (!IsLeftJustified)
1041     return true;
1042 
1043   // The left justified flag is valid for all conversions except n
1044   switch (CS.getKind()) {
1045   case ConversionSpecifier::nArg:
1046     return false;
1047 
1048   default:
1049     return true;
1050   }
1051 }
1052 
1053 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
1054   if (!HasThousandsGrouping)
1055     return true;
1056 
1057   switch (CS.getKind()) {
1058     case ConversionSpecifier::dArg:
1059     case ConversionSpecifier::DArg:
1060     case ConversionSpecifier::iArg:
1061     case ConversionSpecifier::uArg:
1062     case ConversionSpecifier::UArg:
1063     case ConversionSpecifier::fArg:
1064     case ConversionSpecifier::FArg:
1065     case ConversionSpecifier::gArg:
1066     case ConversionSpecifier::GArg:
1067       return true;
1068     default:
1069       return false;
1070   }
1071 }
1072 
1073 bool PrintfSpecifier::hasValidPrecision() const {
1074   if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
1075     return true;
1076 
1077   // Precision is only valid with the diouxXaAeEfFgGsP conversions
1078   switch (CS.getKind()) {
1079   case ConversionSpecifier::dArg:
1080   case ConversionSpecifier::DArg:
1081   case ConversionSpecifier::iArg:
1082   case ConversionSpecifier::oArg:
1083   case ConversionSpecifier::OArg:
1084   case ConversionSpecifier::uArg:
1085   case ConversionSpecifier::UArg:
1086   case ConversionSpecifier::xArg:
1087   case ConversionSpecifier::XArg:
1088   case ConversionSpecifier::aArg:
1089   case ConversionSpecifier::AArg:
1090   case ConversionSpecifier::eArg:
1091   case ConversionSpecifier::EArg:
1092   case ConversionSpecifier::fArg:
1093   case ConversionSpecifier::FArg:
1094   case ConversionSpecifier::gArg:
1095   case ConversionSpecifier::GArg:
1096   case ConversionSpecifier::sArg:
1097   case ConversionSpecifier::FreeBSDrArg:
1098   case ConversionSpecifier::FreeBSDyArg:
1099   case ConversionSpecifier::PArg:
1100     return true;
1101 
1102   default:
1103     return false;
1104   }
1105 }
1106 bool PrintfSpecifier::hasValidFieldWidth() const {
1107   if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
1108       return true;
1109 
1110   // The field width is valid for all conversions except n
1111   switch (CS.getKind()) {
1112   case ConversionSpecifier::nArg:
1113     return false;
1114 
1115   default:
1116     return true;
1117   }
1118 }
1119