1 //===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a utility class used to parse user-provided text files with
10 // "special case lists" for code sanitizers. Such files are used to
11 // define an "ABI list" for DataFlowSanitizer and blacklists for sanitizers
12 // like AddressSanitizer or UndefinedBehaviorSanitizer.
13 //
14 // Empty lines and lines starting with "#" are ignored. Sections are defined
15 // using a '[section_name]' header and can be used to specify sanitizers the
16 // entries below it apply to. Section names are regular expressions, and
17 // entries without a section header match all sections (e.g. an '[*]' header
18 // is assumed.)
19 // The remaining lines should have the form:
20 //   prefix:wildcard_expression[=category]
21 // If category is not specified, it is assumed to be empty string.
22 // Definitions of "prefix" and "category" are sanitizer-specific. For example,
23 // sanitizer blacklists support prefixes "src", "fun" and "global".
24 // Wildcard expressions define, respectively, source files, functions or
25 // globals which shouldn't be instrumented.
26 // Examples of categories:
27 //   "functional": used in DFSan to list functions with pure functional
28 //                 semantics.
29 //   "init": used in ASan blacklist to disable initialization-order bugs
30 //           detection for certain globals or source files.
31 // Full special case list file example:
32 // ---
33 // [address]
34 // # Blacklisted items:
35 // fun:*_ZN4base6subtle*
36 // global:*global_with_bad_access_or_initialization*
37 // global:*global_with_initialization_issues*=init
38 // type:*Namespace::ClassName*=init
39 // src:file_with_tricky_code.cc
40 // src:ignore-global-initializers-issues.cc=init
41 //
42 // [dataflow]
43 // # Functions with pure functional semantics:
44 // fun:cos=functional
45 // fun:sin=functional
46 // ---
47 // Note that the wild card is in fact an llvm::Regex, but * is automatically
48 // replaced with .*
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #ifndef LLVM_SUPPORT_SPECIALCASELIST_H
53 #define LLVM_SUPPORT_SPECIALCASELIST_H
54 
55 #include "llvm/ADT/StringMap.h"
56 #include "llvm/ADT/StringSet.h"
57 #include "llvm/Support/Regex.h"
58 #include "llvm/Support/TrigramIndex.h"
59 #include <string>
60 #include <vector>
61 
62 namespace llvm {
63 class MemoryBuffer;
64 class Regex;
65 class StringRef;
66 
67 class SpecialCaseList {
68 public:
69   /// Parses the special case list entries from files. On failure, returns
70   /// 0 and writes an error message to string.
71   static std::unique_ptr<SpecialCaseList>
72   create(const std::vector<std::string> &Paths, std::string &Error);
73   /// Parses the special case list from a memory buffer. On failure, returns
74   /// 0 and writes an error message to string.
75   static std::unique_ptr<SpecialCaseList> create(const MemoryBuffer *MB,
76                                                  std::string &Error);
77   /// Parses the special case list entries from files. On failure, reports a
78   /// fatal error.
79   static std::unique_ptr<SpecialCaseList>
80   createOrDie(const std::vector<std::string> &Paths);
81 
82   ~SpecialCaseList();
83 
84   /// Returns true, if special case list contains a line
85   /// \code
86   ///   @Prefix:<E>=@Category
87   /// \endcode
88   /// where @Query satisfies wildcard expression <E> in a given @Section.
89   bool inSection(StringRef Section, StringRef Prefix, StringRef Query,
90                  StringRef Category = StringRef()) const;
91 
92   /// Returns the line number corresponding to the special case list entry if
93   /// the special case list contains a line
94   /// \code
95   ///   @Prefix:<E>=@Category
96   /// \endcode
97   /// where @Query satisfies wildcard expression <E> in a given @Section.
98   /// Returns zero if there is no blacklist entry corresponding to this
99   /// expression.
100   unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
101                           StringRef Category = StringRef()) const;
102 
103 protected:
104   // Implementations of the create*() functions that can also be used by derived
105   // classes.
106   bool createInternal(const std::vector<std::string> &Paths,
107                       std::string &Error);
108   bool createInternal(const MemoryBuffer *MB, std::string &Error);
109 
110   SpecialCaseList() = default;
111   SpecialCaseList(SpecialCaseList const &) = delete;
112   SpecialCaseList &operator=(SpecialCaseList const &) = delete;
113 
114   /// Represents a set of regular expressions.  Regular expressions which are
115   /// "literal" (i.e. no regex metacharacters) are stored in Strings.  The
116   /// reason for doing so is efficiency; StringMap is much faster at matching
117   /// literal strings than Regex.
118   class Matcher {
119   public:
120     bool insert(std::string Regexp, unsigned LineNumber, std::string &REError);
121     // Returns the line number in the source file that this query matches to.
122     // Returns zero if no match is found.
123     unsigned match(StringRef Query) const;
124 
125   private:
126     StringMap<unsigned> Strings;
127     TrigramIndex Trigrams;
128     std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
129   };
130 
131   using SectionEntries = StringMap<StringMap<Matcher>>;
132 
133   struct Section {
SectionSection134     Section(std::unique_ptr<Matcher> M) : SectionMatcher(std::move(M)){};
135 
136     std::unique_ptr<Matcher> SectionMatcher;
137     SectionEntries Entries;
138   };
139 
140   std::vector<Section> Sections;
141 
142   /// Parses just-constructed SpecialCaseList entries from a memory buffer.
143   bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap,
144              std::string &Error);
145 
146   // Helper method for derived classes to search by Prefix, Query, and Category
147   // once they have already resolved a section entry.
148   unsigned inSectionBlame(const SectionEntries &Entries, StringRef Prefix,
149                           StringRef Query, StringRef Category) const;
150 };
151 
152 }  // namespace llvm
153 
154 #endif  // LLVM_SUPPORT_SPECIALCASELIST_H
155 
156