1 // Copyright 2009 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_PREFILTER_H_
6 #define RE2_PREFILTER_H_
7 
8 // Prefilter is the class used to extract string guards from regexps.
9 // Rather than using Prefilter class directly, use FilteredRE2.
10 // See filtered_re2.h
11 
12 #include <set>
13 #include <string>
14 #include <vector>
15 
16 #include "util/util.h"
17 #include "util/logging.h"
18 
19 namespace re2 {
20 
21 class RE2;
22 
23 class Regexp;
24 
25 class Prefilter {
26   // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h
27  public:
28   enum Op {
29     ALL = 0,  // Everything matches
30     NONE,  // Nothing matches
31     ATOM,  // The string atom() must match
32     AND,   // All in subs() must match
33     OR,   // One of subs() must match
34   };
35 
36   explicit Prefilter(Op op);
37   ~Prefilter();
38 
op()39   Op op() { return op_; }
atom()40   const std::string& atom() const { return atom_; }
set_unique_id(int id)41   void set_unique_id(int id) { unique_id_ = id; }
unique_id()42   int unique_id() const { return unique_id_; }
43 
44   // The children of the Prefilter node.
subs()45   std::vector<Prefilter*>* subs() {
46     DCHECK(op_ == AND || op_ == OR);
47     return subs_;
48   }
49 
50   // Set the children vector. Prefilter takes ownership of subs and
51   // subs_ will be deleted when Prefilter is deleted.
set_subs(std::vector<Prefilter * > * subs)52   void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; }
53 
54   // Given a RE2, return a Prefilter. The caller takes ownership of
55   // the Prefilter and should deallocate it. Returns NULL if Prefilter
56   // cannot be formed.
57   static Prefilter* FromRE2(const RE2* re2);
58 
59   // Returns a readable debug string of the prefilter.
60   std::string DebugString() const;
61 
62  private:
63   class Info;
64 
65   // Combines two prefilters together to create an AND. The passed
66   // Prefilters will be part of the returned Prefilter or deleted.
67   static Prefilter* And(Prefilter* a, Prefilter* b);
68 
69   // Combines two prefilters together to create an OR. The passed
70   // Prefilters will be part of the returned Prefilter or deleted.
71   static Prefilter* Or(Prefilter* a, Prefilter* b);
72 
73   // Generalized And/Or
74   static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b);
75 
76   static Prefilter* FromRegexp(Regexp* a);
77 
78   static Prefilter* FromString(const std::string& str);
79 
80   static Prefilter* OrStrings(std::set<std::string>* ss);
81 
82   static Info* BuildInfo(Regexp* re);
83 
84   Prefilter* Simplify();
85 
86   // Kind of Prefilter.
87   Op op_;
88 
89   // Sub-matches for AND or OR Prefilter.
90   std::vector<Prefilter*>* subs_;
91 
92   // Actual string to match in leaf node.
93   std::string atom_;
94 
95   // If different prefilters have the same string atom, or if they are
96   // structurally the same (e.g., OR of same atom strings) they are
97   // considered the same unique nodes. This is the id for each unique
98   // node. This field is populated with a unique id for every node,
99   // and -1 for duplicate nodes.
100   int unique_id_;
101 
102   Prefilter(const Prefilter&) = delete;
103   Prefilter& operator=(const Prefilter&) = delete;
104 };
105 
106 }  // namespace re2
107 
108 #endif  // RE2_PREFILTER_H_
109