1 //===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
7 //
8 // This is a utility class used to parse user-provided text files with
9 // "special case lists" for code sanitizers. Such files are used to
10 // define an "ABI list" for DataFlowSanitizer and allow/exclusion lists for
11 // sanitizers like AddressSanitizer or UndefinedBehaviorSanitizer.
12 //
13 // Empty lines and lines starting with "#" are ignored. Sections are defined
14 // using a '[section_name]' header and can be used to specify sanitizers the
15 // entries below it apply to. Section names are regular expressions, and
16 // entries without a section header match all sections (e.g. an '[*]' header
17 // is assumed.)
18 // The remaining lines should have the form:
19 //   prefix:wildcard_expression[=category]
20 // If category is not specified, it is assumed to be empty string.
21 // Definitions of "prefix" and "category" are sanitizer-specific. For example,
22 // sanitizer exclusion support prefixes "src", "mainfile", "fun" and "global".
23 // Wildcard expressions define, respectively, source files, main files,
24 // functions or globals which shouldn't be instrumented.
25 // Examples of categories:
26 //   "functional": used in DFSan to list functions with pure functional
27 //                 semantics.
28 //   "init": used in ASan exclusion list to disable initialization-order bugs
29 //           detection for certain globals or source files.
30 // Full special case list file example:
31 // ---
32 // [address]
33 // # Excluded items:
34 // fun:*_ZN4base6subtle*
35 // global:*global_with_bad_access_or_initialization*
36 // global:*global_with_initialization_issues*=init
37 // type:*Namespace::ClassName*=init
38 // src:file_with_tricky_code.cc
39 // src:ignore-global-initializers-issues.cc=init
40 // mainfile:main_file.cc
41 //
42 // [dataflow]
43 // # Functions with pure functional semantics:
44 // fun:cos=functional
45 // fun:sin=functional
46 // ---
47 // Note that the wild card is in fact an llvm::Regex, but * is automatically
48 // replaced with .*
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #ifndef LLVM_SUPPORT_SPECIALCASELIST_H
53 #define LLVM_SUPPORT_SPECIALCASELIST_H
54 
55 #include "llvm/ADT/StringMap.h"
56 #include "llvm/Support/Regex.h"
57 #include "llvm/Support/TrigramIndex.h"
58 #include <memory>
59 #include <string>
60 #include <vector>
61 
62 namespace llvm {
63 class MemoryBuffer;
64 class StringRef;
65 
66 namespace vfs {
67 class FileSystem;
68 }
69 
70 class SpecialCaseList {
71 public:
72   /// Parses the special case list entries from files. On failure, returns
73   /// 0 and writes an error message to string.
74   static std::unique_ptr<SpecialCaseList>
75   create(const std::vector<std::string> &Paths, llvm::vfs::FileSystem &FS,
76          std::string &Error);
77   /// Parses the special case list from a memory buffer. On failure, returns
78   /// 0 and writes an error message to string.
79   static std::unique_ptr<SpecialCaseList> create(const MemoryBuffer *MB,
80                                                  std::string &Error);
81   /// Parses the special case list entries from files. On failure, reports a
82   /// fatal error.
83   static std::unique_ptr<SpecialCaseList>
84   createOrDie(const std::vector<std::string> &Paths, llvm::vfs::FileSystem &FS);
85 
86   ~SpecialCaseList();
87 
88   /// Returns true, if special case list contains a line
89   /// \code
90   ///   @Prefix:<E>=@Category
91   /// \endcode
92   /// where @Query satisfies wildcard expression <E> in a given @Section.
93   bool inSection(StringRef Section, StringRef Prefix, StringRef Query,
94                  StringRef Category = StringRef()) const;
95 
96   /// Returns the line number corresponding to the special case list entry if
97   /// the special case list contains a line
98   /// \code
99   ///   @Prefix:<E>=@Category
100   /// \endcode
101   /// where @Query satisfies wildcard expression <E> in a given @Section.
102   /// Returns zero if there is no exclusion entry corresponding to this
103   /// expression.
104   unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
105                           StringRef Category = StringRef()) const;
106 
107 protected:
108   // Implementations of the create*() functions that can also be used by derived
109   // classes.
110   bool createInternal(const std::vector<std::string> &Paths,
111                       vfs::FileSystem &VFS, std::string &Error);
112   bool createInternal(const MemoryBuffer *MB, std::string &Error);
113 
114   SpecialCaseList() = default;
115   SpecialCaseList(SpecialCaseList const &) = delete;
116   SpecialCaseList &operator=(SpecialCaseList const &) = delete;
117 
118   /// Represents a set of regular expressions.  Regular expressions which are
119   /// "literal" (i.e. no regex metacharacters) are stored in Strings.  The
120   /// reason for doing so is efficiency; StringMap is much faster at matching
121   /// literal strings than Regex.
122   class Matcher {
123   public:
124     bool insert(std::string Regexp, unsigned LineNumber, std::string &REError);
125     // Returns the line number in the source file that this query matches to.
126     // Returns zero if no match is found.
127     unsigned match(StringRef Query) const;
128 
129   private:
130     StringMap<unsigned> Strings;
131     TrigramIndex Trigrams;
132     std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
133   };
134 
135   using SectionEntries = StringMap<StringMap<Matcher>>;
136 
137   struct Section {
138     Section(std::unique_ptr<Matcher> M) : SectionMatcher(std::move(M)){};
139 
140     std::unique_ptr<Matcher> SectionMatcher;
141     SectionEntries Entries;
142   };
143 
144   std::vector<Section> Sections;
145 
146   /// Parses just-constructed SpecialCaseList entries from a memory buffer.
147   bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap,
148              std::string &Error);
149 
150   // Helper method for derived classes to search by Prefix, Query, and Category
151   // once they have already resolved a section entry.
152   unsigned inSectionBlame(const SectionEntries &Entries, StringRef Prefix,
153                           StringRef Query, StringRef Category) const;
154 };
155 
156 }  // namespace llvm
157 
158 #endif  // LLVM_SUPPORT_SPECIALCASELIST_H
159 
160