1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handling of format string in printf and friends.  The structure of format
10 // strings for fprintf() are described in C99 7.19.6.1.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "FormatStringParsing.h"
15 #include "clang/AST/FormatString.h"
16 #include "clang/AST/OSLog.h"
17 #include "clang/Basic/TargetInfo.h"
18 #include "llvm/Support/Regex.h"
19 
20 using clang::analyze_format_string::ArgType;
21 using clang::analyze_format_string::FormatStringHandler;
22 using clang::analyze_format_string::LengthModifier;
23 using clang::analyze_format_string::OptionalAmount;
24 using clang::analyze_format_string::ConversionSpecifier;
25 using clang::analyze_printf::PrintfSpecifier;
26 
27 using namespace clang;
28 
29 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
30         PrintfSpecifierResult;
31 
32 //===----------------------------------------------------------------------===//
33 // Methods for parsing format strings.
34 //===----------------------------------------------------------------------===//
35 
36 using analyze_format_string::ParseNonPositionAmount;
37 
ParsePrecision(FormatStringHandler & H,PrintfSpecifier & FS,const char * Start,const char * & Beg,const char * E,unsigned * argIndex)38 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
39                            const char *Start, const char *&Beg, const char *E,
40                            unsigned *argIndex) {
41   if (argIndex) {
42     FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
43   } else {
44     const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
45                                            analyze_format_string::PrecisionPos);
46     if (Amt.isInvalid())
47       return true;
48     FS.setPrecision(Amt);
49   }
50   return false;
51 }
52 
ParseObjCFlags(FormatStringHandler & H,PrintfSpecifier & FS,const char * FlagBeg,const char * E,bool Warn)53 static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS,
54                            const char *FlagBeg, const char *E, bool Warn) {
55    StringRef Flag(FlagBeg, E - FlagBeg);
56    // Currently there is only one flag.
57    if (Flag == "tt") {
58      FS.setHasObjCTechnicalTerm(FlagBeg);
59      return false;
60    }
61    // Handle either the case of no flag or an invalid flag.
62    if (Warn) {
63      if (Flag == "")
64        H.HandleEmptyObjCModifierFlag(FlagBeg, E  - FlagBeg);
65      else
66        H.HandleInvalidObjCModifierFlag(FlagBeg, E  - FlagBeg);
67    }
68    return true;
69 }
70 
ParsePrintfSpecifier(FormatStringHandler & H,const char * & Beg,const char * E,unsigned & argIndex,const LangOptions & LO,const TargetInfo & Target,bool Warn,bool isFreeBSDKPrintf)71 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
72                                                   const char *&Beg,
73                                                   const char *E,
74                                                   unsigned &argIndex,
75                                                   const LangOptions &LO,
76                                                   const TargetInfo &Target,
77                                                   bool Warn,
78                                                   bool isFreeBSDKPrintf) {
79 
80   using namespace clang::analyze_format_string;
81   using namespace clang::analyze_printf;
82 
83   const char *I = Beg;
84   const char *Start = nullptr;
85   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86 
87   // Look for a '%' character that indicates the start of a format specifier.
88   for ( ; I != E ; ++I) {
89     char c = *I;
90     if (c == '\0') {
91       // Detect spurious null characters, which are likely errors.
92       H.HandleNullChar(I);
93       return true;
94     }
95     if (c == '%') {
96       Start = I++;  // Record the start of the format specifier.
97       break;
98     }
99   }
100 
101   // No format specifier found?
102   if (!Start)
103     return false;
104 
105   if (I == E) {
106     // No more characters left?
107     if (Warn)
108       H.HandleIncompleteSpecifier(Start, E - Start);
109     return true;
110   }
111 
112   PrintfSpecifier FS;
113   if (ParseArgPosition(H, FS, Start, I, E))
114     return true;
115 
116   if (I == E) {
117     // No more characters left?
118     if (Warn)
119       H.HandleIncompleteSpecifier(Start, E - Start);
120     return true;
121   }
122 
123   if (*I == '{') {
124     ++I;
125     unsigned char PrivacyFlags = 0;
126     StringRef MatchedStr;
127 
128     do {
129       StringRef Str(I, E - I);
130       std::string Match = "^[[:space:]]*"
131                           "(private|public|sensitive|mask\\.[^[:space:],}]*)"
132                           "[[:space:]]*(,|})";
133       llvm::Regex R(Match);
134       SmallVector<StringRef, 2> Matches;
135 
136       if (R.match(Str, &Matches)) {
137         MatchedStr = Matches[1];
138         I += Matches[0].size();
139 
140         // Set the privacy flag if the privacy annotation in the
141         // comma-delimited segment is at least as strict as the privacy
142         // annotations in previous comma-delimited segments.
143         if (MatchedStr.starts_with("mask")) {
144           StringRef MaskType = MatchedStr.substr(sizeof("mask.") - 1);
145           unsigned Size = MaskType.size();
146           if (Warn && (Size == 0 || Size > 8))
147             H.handleInvalidMaskType(MaskType);
148           FS.setMaskType(MaskType);
149         } else if (MatchedStr.equals("sensitive"))
150           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsSensitive;
151         else if (PrivacyFlags !=
152                  clang::analyze_os_log::OSLogBufferItem::IsSensitive &&
153                  MatchedStr.equals("private"))
154           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPrivate;
155         else if (PrivacyFlags == 0 && MatchedStr.equals("public"))
156           PrivacyFlags = clang::analyze_os_log::OSLogBufferItem::IsPublic;
157       } else {
158         size_t CommaOrBracePos =
159             Str.find_if([](char c) { return c == ',' || c == '}'; });
160 
161         if (CommaOrBracePos == StringRef::npos) {
162           // Neither a comma nor the closing brace was found.
163           if (Warn)
164             H.HandleIncompleteSpecifier(Start, E - Start);
165           return true;
166         }
167 
168         I += CommaOrBracePos + 1;
169       }
170       // Continue until the closing brace is found.
171     } while (*(I - 1) == ',');
172 
173     // Set the privacy flag.
174     switch (PrivacyFlags) {
175     case 0:
176       break;
177     case clang::analyze_os_log::OSLogBufferItem::IsPrivate:
178       FS.setIsPrivate(MatchedStr.data());
179       break;
180     case clang::analyze_os_log::OSLogBufferItem::IsPublic:
181       FS.setIsPublic(MatchedStr.data());
182       break;
183     case clang::analyze_os_log::OSLogBufferItem::IsSensitive:
184       FS.setIsSensitive(MatchedStr.data());
185       break;
186     default:
187       llvm_unreachable("Unexpected privacy flag value");
188     }
189   }
190 
191   // Look for flags (if any).
192   bool hasMore = true;
193   for ( ; I != E; ++I) {
194     switch (*I) {
195       default: hasMore = false; break;
196       case '\'':
197         // FIXME: POSIX specific.  Always accept?
198         FS.setHasThousandsGrouping(I);
199         break;
200       case '-': FS.setIsLeftJustified(I); break;
201       case '+': FS.setHasPlusPrefix(I); break;
202       case ' ': FS.setHasSpacePrefix(I); break;
203       case '#': FS.setHasAlternativeForm(I); break;
204       case '0': FS.setHasLeadingZeros(I); break;
205     }
206     if (!hasMore)
207       break;
208   }
209 
210   if (I == E) {
211     // No more characters left?
212     if (Warn)
213       H.HandleIncompleteSpecifier(Start, E - Start);
214     return true;
215   }
216 
217   // Look for the field width (if any).
218   if (ParseFieldWidth(H, FS, Start, I, E,
219                       FS.usesPositionalArg() ? nullptr : &argIndex))
220     return true;
221 
222   if (I == E) {
223     // No more characters left?
224     if (Warn)
225       H.HandleIncompleteSpecifier(Start, E - Start);
226     return true;
227   }
228 
229   // Look for the precision (if any).
230   if (*I == '.') {
231     ++I;
232     if (I == E) {
233       if (Warn)
234         H.HandleIncompleteSpecifier(Start, E - Start);
235       return true;
236     }
237 
238     if (ParsePrecision(H, FS, Start, I, E,
239                        FS.usesPositionalArg() ? nullptr : &argIndex))
240       return true;
241 
242     if (I == E) {
243       // No more characters left?
244       if (Warn)
245         H.HandleIncompleteSpecifier(Start, E - Start);
246       return true;
247     }
248   }
249 
250   if (ParseVectorModifier(H, FS, I, E, LO))
251     return true;
252 
253   // Look for the length modifier.
254   if (ParseLengthModifier(FS, I, E, LO) && I == E) {
255     // No more characters left?
256     if (Warn)
257       H.HandleIncompleteSpecifier(Start, E - Start);
258     return true;
259   }
260 
261   // Look for the Objective-C modifier flags, if any.
262   // We parse these here, even if they don't apply to
263   // the conversion specifier, and then emit an error
264   // later if the conversion specifier isn't '@'.  This
265   // enables better recovery, and we don't know if
266   // these flags are applicable until later.
267   const char *ObjCModifierFlagsStart = nullptr,
268              *ObjCModifierFlagsEnd = nullptr;
269   if (*I == '[') {
270     ObjCModifierFlagsStart = I;
271     ++I;
272     auto flagStart = I;
273     for (;; ++I) {
274       ObjCModifierFlagsEnd = I;
275       if (I == E) {
276         if (Warn)
277           H.HandleIncompleteSpecifier(Start, E - Start);
278         return true;
279       }
280       // Did we find the closing ']'?
281       if (*I == ']') {
282         if (ParseObjCFlags(H, FS, flagStart, I, Warn))
283           return true;
284         ++I;
285         break;
286       }
287       // There are no separators defined yet for multiple
288       // Objective-C modifier flags.  When those are
289       // defined, this is the place to check.
290     }
291   }
292 
293   if (*I == '\0') {
294     // Detect spurious null characters, which are likely errors.
295     H.HandleNullChar(I);
296     return true;
297   }
298 
299   // Finally, look for the conversion specifier.
300   const char *conversionPosition = I++;
301   ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
302   switch (*conversionPosition) {
303     default:
304       break;
305     // C99: 7.19.6.1 (section 8).
306     case '%': k = ConversionSpecifier::PercentArg;   break;
307     case 'A': k = ConversionSpecifier::AArg; break;
308     case 'E': k = ConversionSpecifier::EArg; break;
309     case 'F': k = ConversionSpecifier::FArg; break;
310     case 'G': k = ConversionSpecifier::GArg; break;
311     case 'X': k = ConversionSpecifier::XArg; break;
312     case 'a': k = ConversionSpecifier::aArg; break;
313     case 'c': k = ConversionSpecifier::cArg; break;
314     case 'd': k = ConversionSpecifier::dArg; break;
315     case 'e': k = ConversionSpecifier::eArg; break;
316     case 'f': k = ConversionSpecifier::fArg; break;
317     case 'g': k = ConversionSpecifier::gArg; break;
318     case 'i': k = ConversionSpecifier::iArg; break;
319     case 'n':
320       // Not handled, but reserved in OpenCL and FreeBSD kernel.
321       if (!LO.OpenCL && !isFreeBSDKPrintf)
322         k = ConversionSpecifier::nArg;
323       break;
324     case 'o': k = ConversionSpecifier::oArg; break;
325     case 'p': k = ConversionSpecifier::pArg; break;
326     case 's': k = ConversionSpecifier::sArg; break;
327     case 'u': k = ConversionSpecifier::uArg; break;
328     case 'x': k = ConversionSpecifier::xArg; break;
329     // C23.
330     case 'b':
331       if (isFreeBSDKPrintf)
332         k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
333       else
334         k = ConversionSpecifier::bArg;
335       break;
336     case 'B': k = ConversionSpecifier::BArg; break;
337     // POSIX specific.
338     case 'C': k = ConversionSpecifier::CArg; break;
339     case 'S': k = ConversionSpecifier::SArg; break;
340     // Apple extension for os_log
341     case 'P':
342       k = ConversionSpecifier::PArg;
343       break;
344     // Objective-C.
345     case '@': k = ConversionSpecifier::ObjCObjArg; break;
346     // Glibc specific.
347     case 'm': k = ConversionSpecifier::PrintErrno; break;
348     case 'r':
349       if (isFreeBSDKPrintf)
350         k = ConversionSpecifier::FreeBSDrArg; // int
351       break;
352     case 'y':
353       if (isFreeBSDKPrintf)
354         k = ConversionSpecifier::FreeBSDyArg; // int
355       break;
356     // Apple-specific.
357     case 'D':
358       if (isFreeBSDKPrintf)
359         k = ConversionSpecifier::FreeBSDDArg; // void * followed by char *
360       else if (Target.getTriple().isOSDarwin())
361         k = ConversionSpecifier::DArg;
362       break;
363     case 'O':
364       if (Target.getTriple().isOSDarwin())
365         k = ConversionSpecifier::OArg;
366       break;
367     case 'U':
368       if (Target.getTriple().isOSDarwin())
369         k = ConversionSpecifier::UArg;
370       break;
371     // MS specific.
372     case 'Z':
373       if (Target.getTriple().isOSMSVCRT())
374         k = ConversionSpecifier::ZArg;
375       break;
376   }
377 
378   // Check to see if we used the Objective-C modifier flags with
379   // a conversion specifier other than '@'.
380   if (k != ConversionSpecifier::ObjCObjArg &&
381       k != ConversionSpecifier::InvalidSpecifier &&
382       ObjCModifierFlagsStart) {
383     H.HandleObjCFlagsWithNonObjCConversion(ObjCModifierFlagsStart,
384                                            ObjCModifierFlagsEnd + 1,
385                                            conversionPosition);
386     return true;
387   }
388 
389   PrintfConversionSpecifier CS(conversionPosition, k);
390   FS.setConversionSpecifier(CS);
391   if (CS.consumesDataArgument() && !FS.usesPositionalArg())
392     FS.setArgIndex(argIndex++);
393   // FreeBSD kernel specific.
394   if (k == ConversionSpecifier::FreeBSDbArg ||
395       k == ConversionSpecifier::FreeBSDDArg)
396     argIndex++;
397 
398   if (k == ConversionSpecifier::InvalidSpecifier) {
399     unsigned Len = I - Start;
400     if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
401       CS.setEndScanList(Start + Len);
402       FS.setConversionSpecifier(CS);
403     }
404     // Assume the conversion takes one argument.
405     return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
406   }
407   return PrintfSpecifierResult(Start, FS);
408 }
409 
ParsePrintfString(FormatStringHandler & H,const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target,bool isFreeBSDKPrintf)410 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
411                                                      const char *I,
412                                                      const char *E,
413                                                      const LangOptions &LO,
414                                                      const TargetInfo &Target,
415                                                      bool isFreeBSDKPrintf) {
416 
417   unsigned argIndex = 0;
418 
419   // Keep looking for a format specifier until we have exhausted the string.
420   while (I != E) {
421     const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
422                                                             LO, Target, true,
423                                                             isFreeBSDKPrintf);
424     // Did a fail-stop error of any kind occur when parsing the specifier?
425     // If so, don't do any more processing.
426     if (FSR.shouldStop())
427       return true;
428     // Did we exhaust the string or encounter an error that
429     // we can recover from?
430     if (!FSR.hasValue())
431       continue;
432     // We have a format specifier.  Pass it to the callback.
433     if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
434                                  I - FSR.getStart(), Target))
435       return true;
436   }
437   assert(I == E && "Format string not exhausted");
438   return false;
439 }
440 
ParseFormatStringHasSArg(const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target)441 bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
442                                                             const char *E,
443                                                             const LangOptions &LO,
444                                                             const TargetInfo &Target) {
445 
446   unsigned argIndex = 0;
447 
448   // Keep looking for a %s format specifier until we have exhausted the string.
449   FormatStringHandler H;
450   while (I != E) {
451     const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
452                                                             LO, Target, false,
453                                                             false);
454     // Did a fail-stop error of any kind occur when parsing the specifier?
455     // If so, don't do any more processing.
456     if (FSR.shouldStop())
457       return false;
458     // Did we exhaust the string or encounter an error that
459     // we can recover from?
460     if (!FSR.hasValue())
461       continue;
462     const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
463     // Return true if this a %s format specifier.
464     if (FS.getConversionSpecifier().getKind() == ConversionSpecifier::Kind::sArg)
465       return true;
466   }
467   return false;
468 }
469 
parseFormatStringHasFormattingSpecifiers(const char * Begin,const char * End,const LangOptions & LO,const TargetInfo & Target)470 bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
471     const char *Begin, const char *End, const LangOptions &LO,
472     const TargetInfo &Target) {
473   unsigned ArgIndex = 0;
474   // Keep looking for a formatting specifier until we have exhausted the string.
475   FormatStringHandler H;
476   while (Begin != End) {
477     const PrintfSpecifierResult &FSR =
478         ParsePrintfSpecifier(H, Begin, End, ArgIndex, LO, Target, false, false);
479     if (FSR.shouldStop())
480       break;
481     if (FSR.hasValue())
482       return true;
483   }
484   return false;
485 }
486 
487 //===----------------------------------------------------------------------===//
488 // Methods on PrintfSpecifier.
489 //===----------------------------------------------------------------------===//
490 
getScalarArgType(ASTContext & Ctx,bool IsObjCLiteral) const491 ArgType PrintfSpecifier::getScalarArgType(ASTContext &Ctx,
492                                           bool IsObjCLiteral) const {
493   if (CS.getKind() == ConversionSpecifier::cArg)
494     switch (LM.getKind()) {
495       case LengthModifier::None:
496         return Ctx.IntTy;
497       case LengthModifier::AsLong:
498       case LengthModifier::AsWide:
499         return ArgType(ArgType::WIntTy, "wint_t");
500       case LengthModifier::AsShort:
501         if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
502           return Ctx.IntTy;
503         [[fallthrough]];
504       default:
505         return ArgType::Invalid();
506     }
507 
508   if (CS.isIntArg())
509     switch (LM.getKind()) {
510       case LengthModifier::AsLongDouble:
511         // GNU extension.
512         return Ctx.LongLongTy;
513       case LengthModifier::None:
514       case LengthModifier::AsShortLong:
515         return Ctx.IntTy;
516       case LengthModifier::AsInt32:
517         return ArgType(Ctx.IntTy, "__int32");
518       case LengthModifier::AsChar:
519         return ArgType::AnyCharTy;
520       case LengthModifier::AsShort: return Ctx.ShortTy;
521       case LengthModifier::AsLong: return Ctx.LongTy;
522       case LengthModifier::AsLongLong:
523       case LengthModifier::AsQuad:
524         return Ctx.LongLongTy;
525       case LengthModifier::AsInt64:
526         return ArgType(Ctx.LongLongTy, "__int64");
527       case LengthModifier::AsIntMax:
528         return ArgType(Ctx.getIntMaxType(), "intmax_t");
529       case LengthModifier::AsSizeT:
530         return ArgType::makeSizeT(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
531       case LengthModifier::AsInt3264:
532         return Ctx.getTargetInfo().getTriple().isArch64Bit()
533                    ? ArgType(Ctx.LongLongTy, "__int64")
534                    : ArgType(Ctx.IntTy, "__int32");
535       case LengthModifier::AsPtrDiff:
536         return ArgType::makePtrdiffT(
537             ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
538       case LengthModifier::AsAllocate:
539       case LengthModifier::AsMAllocate:
540       case LengthModifier::AsWide:
541         return ArgType::Invalid();
542     }
543 
544   if (CS.isUIntArg())
545     switch (LM.getKind()) {
546       case LengthModifier::AsLongDouble:
547         // GNU extension.
548         return Ctx.UnsignedLongLongTy;
549       case LengthModifier::None:
550       case LengthModifier::AsShortLong:
551         return Ctx.UnsignedIntTy;
552       case LengthModifier::AsInt32:
553         return ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
554       case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
555       case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
556       case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
557       case LengthModifier::AsLongLong:
558       case LengthModifier::AsQuad:
559         return Ctx.UnsignedLongLongTy;
560       case LengthModifier::AsInt64:
561         return ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64");
562       case LengthModifier::AsIntMax:
563         return ArgType(Ctx.getUIntMaxType(), "uintmax_t");
564       case LengthModifier::AsSizeT:
565         return ArgType::makeSizeT(ArgType(Ctx.getSizeType(), "size_t"));
566       case LengthModifier::AsInt3264:
567         return Ctx.getTargetInfo().getTriple().isArch64Bit()
568                    ? ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")
569                    : ArgType(Ctx.UnsignedIntTy, "unsigned __int32");
570       case LengthModifier::AsPtrDiff:
571         return ArgType::makePtrdiffT(
572             ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
573       case LengthModifier::AsAllocate:
574       case LengthModifier::AsMAllocate:
575       case LengthModifier::AsWide:
576         return ArgType::Invalid();
577     }
578 
579   if (CS.isDoubleArg()) {
580     if (!VectorNumElts.isInvalid()) {
581       switch (LM.getKind()) {
582       case LengthModifier::AsShort:
583         return Ctx.HalfTy;
584       case LengthModifier::AsShortLong:
585         return Ctx.FloatTy;
586       case LengthModifier::AsLong:
587       default:
588         return Ctx.DoubleTy;
589       }
590     }
591 
592     if (LM.getKind() == LengthModifier::AsLongDouble)
593       return Ctx.LongDoubleTy;
594     return Ctx.DoubleTy;
595   }
596 
597   if (CS.getKind() == ConversionSpecifier::nArg) {
598     switch (LM.getKind()) {
599       case LengthModifier::None:
600         return ArgType::PtrTo(Ctx.IntTy);
601       case LengthModifier::AsChar:
602         return ArgType::PtrTo(Ctx.SignedCharTy);
603       case LengthModifier::AsShort:
604         return ArgType::PtrTo(Ctx.ShortTy);
605       case LengthModifier::AsLong:
606         return ArgType::PtrTo(Ctx.LongTy);
607       case LengthModifier::AsLongLong:
608       case LengthModifier::AsQuad:
609         return ArgType::PtrTo(Ctx.LongLongTy);
610       case LengthModifier::AsIntMax:
611         return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
612       case LengthModifier::AsSizeT:
613         return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
614       case LengthModifier::AsPtrDiff:
615         return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
616       case LengthModifier::AsLongDouble:
617         return ArgType(); // FIXME: Is this a known extension?
618       case LengthModifier::AsAllocate:
619       case LengthModifier::AsMAllocate:
620       case LengthModifier::AsInt32:
621       case LengthModifier::AsInt3264:
622       case LengthModifier::AsInt64:
623       case LengthModifier::AsWide:
624         return ArgType::Invalid();
625       case LengthModifier::AsShortLong:
626         llvm_unreachable("only used for OpenCL which doesn not handle nArg");
627     }
628   }
629 
630   switch (CS.getKind()) {
631     case ConversionSpecifier::sArg:
632       if (LM.getKind() == LengthModifier::AsWideChar) {
633         if (IsObjCLiteral)
634           return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
635                          "const unichar *");
636         return ArgType(ArgType::WCStrTy, "wchar_t *");
637       }
638       if (LM.getKind() == LengthModifier::AsWide)
639         return ArgType(ArgType::WCStrTy, "wchar_t *");
640       return ArgType::CStrTy;
641     case ConversionSpecifier::SArg:
642       if (IsObjCLiteral)
643         return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
644                        "const unichar *");
645       if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
646           LM.getKind() == LengthModifier::AsShort)
647         return ArgType::CStrTy;
648       return ArgType(ArgType::WCStrTy, "wchar_t *");
649     case ConversionSpecifier::CArg:
650       if (IsObjCLiteral)
651         return ArgType(Ctx.UnsignedShortTy, "unichar");
652       if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() &&
653           LM.getKind() == LengthModifier::AsShort)
654         return Ctx.IntTy;
655       return ArgType(Ctx.WideCharTy, "wchar_t");
656     case ConversionSpecifier::pArg:
657     case ConversionSpecifier::PArg:
658       return ArgType::CPointerTy;
659     case ConversionSpecifier::ObjCObjArg:
660       return ArgType::ObjCPointerTy;
661     default:
662       break;
663   }
664 
665   // FIXME: Handle other cases.
666   return ArgType();
667 }
668 
669 
getArgType(ASTContext & Ctx,bool IsObjCLiteral) const670 ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
671                                     bool IsObjCLiteral) const {
672   const PrintfConversionSpecifier &CS = getConversionSpecifier();
673 
674   if (!CS.consumesDataArgument())
675     return ArgType::Invalid();
676 
677   ArgType ScalarTy = getScalarArgType(Ctx, IsObjCLiteral);
678   if (!ScalarTy.isValid() || VectorNumElts.isInvalid())
679     return ScalarTy;
680 
681   return ScalarTy.makeVectorType(Ctx, VectorNumElts.getConstantAmount());
682 }
683 
fixType(QualType QT,const LangOptions & LangOpt,ASTContext & Ctx,bool IsObjCLiteral)684 bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
685                               ASTContext &Ctx, bool IsObjCLiteral) {
686   // %n is different from other conversion specifiers; don't try to fix it.
687   if (CS.getKind() == ConversionSpecifier::nArg)
688     return false;
689 
690   // Handle Objective-C objects first. Note that while the '%@' specifier will
691   // not warn for structure pointer or void pointer arguments (because that's
692   // how CoreFoundation objects are implemented), we only show a fixit for '%@'
693   // if we know it's an object (block, id, class, or __attribute__((NSObject))).
694   if (QT->isObjCRetainableType()) {
695     if (!IsObjCLiteral)
696       return false;
697 
698     CS.setKind(ConversionSpecifier::ObjCObjArg);
699 
700     // Disable irrelevant flags
701     HasThousandsGrouping = false;
702     HasPlusPrefix = false;
703     HasSpacePrefix = false;
704     HasAlternativeForm = false;
705     HasLeadingZeroes = false;
706     Precision.setHowSpecified(OptionalAmount::NotSpecified);
707     LM.setKind(LengthModifier::None);
708 
709     return true;
710   }
711 
712   // Handle strings next (char *, wchar_t *)
713   if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
714     CS.setKind(ConversionSpecifier::sArg);
715 
716     // Disable irrelevant flags
717     HasAlternativeForm = false;
718     HasLeadingZeroes = false;
719 
720     // Set the long length modifier for wide characters
721     if (QT->getPointeeType()->isWideCharType())
722       LM.setKind(LengthModifier::AsWideChar);
723     else
724       LM.setKind(LengthModifier::None);
725 
726     return true;
727   }
728 
729   // If it's an enum, get its underlying type.
730   if (const EnumType *ETy = QT->getAs<EnumType>())
731     QT = ETy->getDecl()->getIntegerType();
732 
733   const BuiltinType *BT = QT->getAs<BuiltinType>();
734   if (!BT) {
735     const VectorType *VT = QT->getAs<VectorType>();
736     if (VT) {
737       QT = VT->getElementType();
738       BT = QT->getAs<BuiltinType>();
739       VectorNumElts = OptionalAmount(VT->getNumElements());
740     }
741   }
742 
743   // We can only work with builtin types.
744   if (!BT)
745     return false;
746 
747   // Set length modifier
748   switch (BT->getKind()) {
749   case BuiltinType::Bool:
750   case BuiltinType::WChar_U:
751   case BuiltinType::WChar_S:
752   case BuiltinType::Char8: // FIXME: Treat like 'char'?
753   case BuiltinType::Char16:
754   case BuiltinType::Char32:
755   case BuiltinType::UInt128:
756   case BuiltinType::Int128:
757   case BuiltinType::Half:
758   case BuiltinType::BFloat16:
759   case BuiltinType::Float16:
760   case BuiltinType::Float128:
761   case BuiltinType::Ibm128:
762   case BuiltinType::ShortAccum:
763   case BuiltinType::Accum:
764   case BuiltinType::LongAccum:
765   case BuiltinType::UShortAccum:
766   case BuiltinType::UAccum:
767   case BuiltinType::ULongAccum:
768   case BuiltinType::ShortFract:
769   case BuiltinType::Fract:
770   case BuiltinType::LongFract:
771   case BuiltinType::UShortFract:
772   case BuiltinType::UFract:
773   case BuiltinType::ULongFract:
774   case BuiltinType::SatShortAccum:
775   case BuiltinType::SatAccum:
776   case BuiltinType::SatLongAccum:
777   case BuiltinType::SatUShortAccum:
778   case BuiltinType::SatUAccum:
779   case BuiltinType::SatULongAccum:
780   case BuiltinType::SatShortFract:
781   case BuiltinType::SatFract:
782   case BuiltinType::SatLongFract:
783   case BuiltinType::SatUShortFract:
784   case BuiltinType::SatUFract:
785   case BuiltinType::SatULongFract:
786     // Various types which are non-trivial to correct.
787     return false;
788 
789 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
790   case BuiltinType::Id:
791 #include "clang/Basic/OpenCLImageTypes.def"
792 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
793   case BuiltinType::Id:
794 #include "clang/Basic/OpenCLExtensionTypes.def"
795 #define SVE_TYPE(Name, Id, SingletonId) \
796   case BuiltinType::Id:
797 #include "clang/Basic/AArch64SVEACLETypes.def"
798 #define PPC_VECTOR_TYPE(Name, Id, Size) \
799   case BuiltinType::Id:
800 #include "clang/Basic/PPCTypes.def"
801 #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
802 #include "clang/Basic/RISCVVTypes.def"
803 #define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
804 #include "clang/Basic/WebAssemblyReferenceTypes.def"
805 #define SIGNED_TYPE(Id, SingletonId)
806 #define UNSIGNED_TYPE(Id, SingletonId)
807 #define FLOATING_TYPE(Id, SingletonId)
808 #define BUILTIN_TYPE(Id, SingletonId) \
809   case BuiltinType::Id:
810 #include "clang/AST/BuiltinTypes.def"
811     // Misc other stuff which doesn't make sense here.
812     return false;
813 
814   case BuiltinType::UInt:
815   case BuiltinType::Int:
816   case BuiltinType::Float:
817     LM.setKind(VectorNumElts.isInvalid() ?
818                LengthModifier::None : LengthModifier::AsShortLong);
819     break;
820   case BuiltinType::Double:
821     LM.setKind(VectorNumElts.isInvalid() ?
822                LengthModifier::None : LengthModifier::AsLong);
823     break;
824   case BuiltinType::Char_U:
825   case BuiltinType::UChar:
826   case BuiltinType::Char_S:
827   case BuiltinType::SChar:
828     LM.setKind(LengthModifier::AsChar);
829     break;
830 
831   case BuiltinType::Short:
832   case BuiltinType::UShort:
833     LM.setKind(LengthModifier::AsShort);
834     break;
835 
836   case BuiltinType::Long:
837   case BuiltinType::ULong:
838     LM.setKind(LengthModifier::AsLong);
839     break;
840 
841   case BuiltinType::LongLong:
842   case BuiltinType::ULongLong:
843     LM.setKind(LengthModifier::AsLongLong);
844     break;
845 
846   case BuiltinType::LongDouble:
847     LM.setKind(LengthModifier::AsLongDouble);
848     break;
849   }
850 
851   // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
852   if (LangOpt.C99 || LangOpt.CPlusPlus11)
853     namedTypeToLengthModifier(QT, LM);
854 
855   // If fixing the length modifier was enough, we might be done.
856   if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
857     // If we're going to offer a fix anyway, make sure the sign matches.
858     switch (CS.getKind()) {
859     case ConversionSpecifier::uArg:
860     case ConversionSpecifier::UArg:
861       if (QT->isSignedIntegerType())
862         CS.setKind(clang::analyze_format_string::ConversionSpecifier::dArg);
863       break;
864     case ConversionSpecifier::dArg:
865     case ConversionSpecifier::DArg:
866     case ConversionSpecifier::iArg:
867       if (QT->isUnsignedIntegerType() && !HasPlusPrefix)
868         CS.setKind(clang::analyze_format_string::ConversionSpecifier::uArg);
869       break;
870     default:
871       // Other specifiers do not have signed/unsigned variants.
872       break;
873     }
874 
875     const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral);
876     if (ATR.isValid() && ATR.matchesType(Ctx, QT))
877       return true;
878   }
879 
880   // Set conversion specifier and disable any flags which do not apply to it.
881   // Let typedefs to char fall through to int, as %c is silly for uint8_t.
882   if (!QT->getAs<TypedefType>() && QT->isCharType()) {
883     CS.setKind(ConversionSpecifier::cArg);
884     LM.setKind(LengthModifier::None);
885     Precision.setHowSpecified(OptionalAmount::NotSpecified);
886     HasAlternativeForm = false;
887     HasLeadingZeroes = false;
888     HasPlusPrefix = false;
889   }
890   // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
891   else if (QT->isRealFloatingType()) {
892     CS.setKind(ConversionSpecifier::fArg);
893   } else if (QT->isSignedIntegerType()) {
894     CS.setKind(ConversionSpecifier::dArg);
895     HasAlternativeForm = false;
896   } else if (QT->isUnsignedIntegerType()) {
897     CS.setKind(ConversionSpecifier::uArg);
898     HasAlternativeForm = false;
899     HasPlusPrefix = false;
900   } else {
901     llvm_unreachable("Unexpected type");
902   }
903 
904   return true;
905 }
906 
toString(raw_ostream & os) const907 void PrintfSpecifier::toString(raw_ostream &os) const {
908   // Whilst some features have no defined order, we are using the order
909   // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
910   os << "%";
911 
912   // Positional args
913   if (usesPositionalArg()) {
914     os << getPositionalArgIndex() << "$";
915   }
916 
917   // Conversion flags
918   if (IsLeftJustified)    os << "-";
919   if (HasPlusPrefix)      os << "+";
920   if (HasSpacePrefix)     os << " ";
921   if (HasAlternativeForm) os << "#";
922   if (HasLeadingZeroes)   os << "0";
923 
924   // Minimum field width
925   FieldWidth.toString(os);
926   // Precision
927   Precision.toString(os);
928 
929   // Vector modifier
930   if (!VectorNumElts.isInvalid())
931     os << 'v' << VectorNumElts.getConstantAmount();
932 
933   // Length modifier
934   os << LM.toString();
935   // Conversion specifier
936   os << CS.toString();
937 }
938 
hasValidPlusPrefix() const939 bool PrintfSpecifier::hasValidPlusPrefix() const {
940   if (!HasPlusPrefix)
941     return true;
942 
943   // The plus prefix only makes sense for signed conversions
944   switch (CS.getKind()) {
945   case ConversionSpecifier::dArg:
946   case ConversionSpecifier::DArg:
947   case ConversionSpecifier::iArg:
948   case ConversionSpecifier::fArg:
949   case ConversionSpecifier::FArg:
950   case ConversionSpecifier::eArg:
951   case ConversionSpecifier::EArg:
952   case ConversionSpecifier::gArg:
953   case ConversionSpecifier::GArg:
954   case ConversionSpecifier::aArg:
955   case ConversionSpecifier::AArg:
956   case ConversionSpecifier::FreeBSDrArg:
957   case ConversionSpecifier::FreeBSDyArg:
958     return true;
959 
960   default:
961     return false;
962   }
963 }
964 
hasValidAlternativeForm() const965 bool PrintfSpecifier::hasValidAlternativeForm() const {
966   if (!HasAlternativeForm)
967     return true;
968 
969   // Alternate form flag only valid with the bBoxXaAeEfFgG conversions
970   switch (CS.getKind()) {
971   case ConversionSpecifier::bArg:
972   case ConversionSpecifier::BArg:
973   case ConversionSpecifier::oArg:
974   case ConversionSpecifier::OArg:
975   case ConversionSpecifier::xArg:
976   case ConversionSpecifier::XArg:
977   case ConversionSpecifier::aArg:
978   case ConversionSpecifier::AArg:
979   case ConversionSpecifier::eArg:
980   case ConversionSpecifier::EArg:
981   case ConversionSpecifier::fArg:
982   case ConversionSpecifier::FArg:
983   case ConversionSpecifier::gArg:
984   case ConversionSpecifier::GArg:
985   case ConversionSpecifier::FreeBSDrArg:
986   case ConversionSpecifier::FreeBSDyArg:
987     return true;
988 
989   default:
990     return false;
991   }
992 }
993 
hasValidLeadingZeros() const994 bool PrintfSpecifier::hasValidLeadingZeros() const {
995   if (!HasLeadingZeroes)
996     return true;
997 
998   // Leading zeroes flag only valid with the bBdiouxXaAeEfFgG conversions
999   switch (CS.getKind()) {
1000   case ConversionSpecifier::bArg:
1001   case ConversionSpecifier::BArg:
1002   case ConversionSpecifier::dArg:
1003   case ConversionSpecifier::DArg:
1004   case ConversionSpecifier::iArg:
1005   case ConversionSpecifier::oArg:
1006   case ConversionSpecifier::OArg:
1007   case ConversionSpecifier::uArg:
1008   case ConversionSpecifier::UArg:
1009   case ConversionSpecifier::xArg:
1010   case ConversionSpecifier::XArg:
1011   case ConversionSpecifier::aArg:
1012   case ConversionSpecifier::AArg:
1013   case ConversionSpecifier::eArg:
1014   case ConversionSpecifier::EArg:
1015   case ConversionSpecifier::fArg:
1016   case ConversionSpecifier::FArg:
1017   case ConversionSpecifier::gArg:
1018   case ConversionSpecifier::GArg:
1019   case ConversionSpecifier::FreeBSDrArg:
1020   case ConversionSpecifier::FreeBSDyArg:
1021     return true;
1022 
1023   default:
1024     return false;
1025   }
1026 }
1027 
hasValidSpacePrefix() const1028 bool PrintfSpecifier::hasValidSpacePrefix() const {
1029   if (!HasSpacePrefix)
1030     return true;
1031 
1032   // The space prefix only makes sense for signed conversions
1033   switch (CS.getKind()) {
1034   case ConversionSpecifier::dArg:
1035   case ConversionSpecifier::DArg:
1036   case ConversionSpecifier::iArg:
1037   case ConversionSpecifier::fArg:
1038   case ConversionSpecifier::FArg:
1039   case ConversionSpecifier::eArg:
1040   case ConversionSpecifier::EArg:
1041   case ConversionSpecifier::gArg:
1042   case ConversionSpecifier::GArg:
1043   case ConversionSpecifier::aArg:
1044   case ConversionSpecifier::AArg:
1045   case ConversionSpecifier::FreeBSDrArg:
1046   case ConversionSpecifier::FreeBSDyArg:
1047     return true;
1048 
1049   default:
1050     return false;
1051   }
1052 }
1053 
hasValidLeftJustified() const1054 bool PrintfSpecifier::hasValidLeftJustified() const {
1055   if (!IsLeftJustified)
1056     return true;
1057 
1058   // The left justified flag is valid for all conversions except n
1059   switch (CS.getKind()) {
1060   case ConversionSpecifier::nArg:
1061     return false;
1062 
1063   default:
1064     return true;
1065   }
1066 }
1067 
hasValidThousandsGroupingPrefix() const1068 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
1069   if (!HasThousandsGrouping)
1070     return true;
1071 
1072   switch (CS.getKind()) {
1073     case ConversionSpecifier::dArg:
1074     case ConversionSpecifier::DArg:
1075     case ConversionSpecifier::iArg:
1076     case ConversionSpecifier::uArg:
1077     case ConversionSpecifier::UArg:
1078     case ConversionSpecifier::fArg:
1079     case ConversionSpecifier::FArg:
1080     case ConversionSpecifier::gArg:
1081     case ConversionSpecifier::GArg:
1082       return true;
1083     default:
1084       return false;
1085   }
1086 }
1087 
hasValidPrecision() const1088 bool PrintfSpecifier::hasValidPrecision() const {
1089   if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
1090     return true;
1091 
1092   // Precision is only valid with the bBdiouxXaAeEfFgGsP conversions
1093   switch (CS.getKind()) {
1094   case ConversionSpecifier::bArg:
1095   case ConversionSpecifier::BArg:
1096   case ConversionSpecifier::dArg:
1097   case ConversionSpecifier::DArg:
1098   case ConversionSpecifier::iArg:
1099   case ConversionSpecifier::oArg:
1100   case ConversionSpecifier::OArg:
1101   case ConversionSpecifier::uArg:
1102   case ConversionSpecifier::UArg:
1103   case ConversionSpecifier::xArg:
1104   case ConversionSpecifier::XArg:
1105   case ConversionSpecifier::aArg:
1106   case ConversionSpecifier::AArg:
1107   case ConversionSpecifier::eArg:
1108   case ConversionSpecifier::EArg:
1109   case ConversionSpecifier::fArg:
1110   case ConversionSpecifier::FArg:
1111   case ConversionSpecifier::gArg:
1112   case ConversionSpecifier::GArg:
1113   case ConversionSpecifier::sArg:
1114   case ConversionSpecifier::FreeBSDrArg:
1115   case ConversionSpecifier::FreeBSDyArg:
1116   case ConversionSpecifier::PArg:
1117     return true;
1118 
1119   default:
1120     return false;
1121   }
1122 }
hasValidFieldWidth() const1123 bool PrintfSpecifier::hasValidFieldWidth() const {
1124   if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
1125       return true;
1126 
1127   // The field width is valid for all conversions except n
1128   switch (CS.getKind()) {
1129   case ConversionSpecifier::nArg:
1130     return false;
1131 
1132   default:
1133     return true;
1134   }
1135 }
1136