1 // Copyright 2010 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #include "re2/set.h"
6 
7 #include <stddef.h>
8 #include <algorithm>
9 #include <memory>
10 #include <utility>
11 
12 #include "util/util.h"
13 #include "util/logging.h"
14 #include "re2/pod_array.h"
15 #include "re2/prog.h"
16 #include "re2/re2.h"
17 #include "re2/regexp.h"
18 #include "re2/stringpiece.h"
19 
20 namespace re2 {
21 
Set(const RE2::Options & options,RE2::Anchor anchor)22 RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
23     : options_(options),
24       anchor_(anchor),
25       compiled_(false),
26       size_(0) {
27   options_.set_never_capture(true);  // might unblock some optimisations
28 }
29 
~Set()30 RE2::Set::~Set() {
31   for (size_t i = 0; i < elem_.size(); i++)
32     elem_[i].second->Decref();
33 }
34 
Set(Set && other)35 RE2::Set::Set(Set&& other)
36     : options_(other.options_),
37       anchor_(other.anchor_),
38       elem_(std::move(other.elem_)),
39       compiled_(other.compiled_),
40       size_(other.size_),
41       prog_(std::move(other.prog_)) {
42   other.elem_.clear();
43   other.elem_.shrink_to_fit();
44   other.compiled_ = false;
45   other.size_ = 0;
46   other.prog_.reset();
47 }
48 
operator =(Set && other)49 RE2::Set& RE2::Set::operator=(Set&& other) {
50   this->~Set();
51   (void) new (this) Set(std::move(other));
52   return *this;
53 }
54 
Add(const StringPiece & pattern,std::string * error)55 int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
56   if (compiled_) {
57     LOG(DFATAL) << "RE2::Set::Add() called after compiling";
58     return -1;
59   }
60 
61   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
62     options_.ParseFlags());
63   RegexpStatus status;
64   re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
65   if (re == NULL) {
66     if (error != NULL)
67       *error = status.Text();
68     if (options_.log_errors())
69       LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
70     return -1;
71   }
72 
73   // Concatenate with match index and push on vector.
74   int n = static_cast<int>(elem_.size());
75   re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
76   if (re->op() == kRegexpConcat) {
77     int nsub = re->nsub();
78     PODArray<re2::Regexp*> sub(nsub + 1);
79     for (int i = 0; i < nsub; i++)
80       sub[i] = re->sub()[i]->Incref();
81     sub[nsub] = m;
82     re->Decref();
83     re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
84   } else {
85     re2::Regexp* sub[2];
86     sub[0] = re;
87     sub[1] = m;
88     re = re2::Regexp::Concat(sub, 2, pf);
89   }
90   elem_.emplace_back(std::string(pattern), re);
91   return n;
92 }
93 
Compile()94 bool RE2::Set::Compile() {
95   if (compiled_) {
96     LOG(DFATAL) << "RE2::Set::Compile() called more than once";
97     return false;
98   }
99   compiled_ = true;
100   size_ = static_cast<int>(elem_.size());
101 
102   // Sort the elements by their patterns. This is good enough for now
103   // until we have a Regexp comparison function. (Maybe someday...)
104   std::sort(elem_.begin(), elem_.end(),
105             [](const Elem& a, const Elem& b) -> bool {
106               return a.first < b.first;
107             });
108 
109   PODArray<re2::Regexp*> sub(size_);
110   for (int i = 0; i < size_; i++)
111     sub[i] = elem_[i].second;
112   elem_.clear();
113   elem_.shrink_to_fit();
114 
115   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
116     options_.ParseFlags());
117   re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
118 
119   prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
120   re->Decref();
121   return prog_ != nullptr;
122 }
123 
Match(const StringPiece & text,std::vector<int> * v) const124 bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
125   return Match(text, v, NULL);
126 }
127 
Match(const StringPiece & text,std::vector<int> * v,ErrorInfo * error_info) const128 bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
129                      ErrorInfo* error_info) const {
130   if (!compiled_) {
131     LOG(DFATAL) << "RE2::Set::Match() called before compiling";
132     if (error_info != NULL)
133       error_info->kind = kNotCompiled;
134     return false;
135   }
136 #ifdef RE2_HAVE_THREAD_LOCAL
137   hooks::context = NULL;
138 #endif
139   bool dfa_failed = false;
140   std::unique_ptr<SparseSet> matches;
141   if (v != NULL) {
142     matches.reset(new SparseSet(size_));
143     v->clear();
144   }
145   bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
146                               NULL, &dfa_failed, matches.get());
147   if (dfa_failed) {
148     if (options_.log_errors())
149       LOG(ERROR) << "DFA out of memory: "
150                  << "program size " << prog_->size() << ", "
151                  << "list count " << prog_->list_count() << ", "
152                  << "bytemap range " << prog_->bytemap_range();
153     if (error_info != NULL)
154       error_info->kind = kOutOfMemory;
155     return false;
156   }
157   if (ret == false) {
158     if (error_info != NULL)
159       error_info->kind = kNoError;
160     return false;
161   }
162   if (v != NULL) {
163     if (matches->empty()) {
164       LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
165       if (error_info != NULL)
166         error_info->kind = kInconsistent;
167       return false;
168     }
169     v->assign(matches->begin(), matches->end());
170   }
171   if (error_info != NULL)
172     error_info->kind = kNoError;
173   return true;
174 }
175 
176 }  // namespace re2
177