1 // Copyright 2010 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "re2/set.h"
6
7 #include <stddef.h>
8 #include <algorithm>
9 #include <memory>
10 #include <utility>
11
12 #include "util/util.h"
13 #include "util/logging.h"
14 #include "re2/pod_array.h"
15 #include "re2/prog.h"
16 #include "re2/re2.h"
17 #include "re2/regexp.h"
18 #include "re2/stringpiece.h"
19
20 namespace re2 {
21
Set(const RE2::Options & options,RE2::Anchor anchor)22 RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
23 : options_(options),
24 anchor_(anchor),
25 compiled_(false),
26 size_(0) {
27 options_.set_never_capture(true); // might unblock some optimisations
28 }
29
~Set()30 RE2::Set::~Set() {
31 for (size_t i = 0; i < elem_.size(); i++)
32 elem_[i].second->Decref();
33 }
34
Set(Set && other)35 RE2::Set::Set(Set&& other)
36 : options_(other.options_),
37 anchor_(other.anchor_),
38 elem_(std::move(other.elem_)),
39 compiled_(other.compiled_),
40 size_(other.size_),
41 prog_(std::move(other.prog_)) {
42 other.elem_.clear();
43 other.elem_.shrink_to_fit();
44 other.compiled_ = false;
45 other.size_ = 0;
46 other.prog_.reset();
47 }
48
operator =(Set && other)49 RE2::Set& RE2::Set::operator=(Set&& other) {
50 this->~Set();
51 (void) new (this) Set(std::move(other));
52 return *this;
53 }
54
Add(const StringPiece & pattern,std::string * error)55 int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
56 if (compiled_) {
57 LOG(DFATAL) << "RE2::Set::Add() called after compiling";
58 return -1;
59 }
60
61 Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
62 options_.ParseFlags());
63 RegexpStatus status;
64 re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
65 if (re == NULL) {
66 if (error != NULL)
67 *error = status.Text();
68 if (options_.log_errors())
69 LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
70 return -1;
71 }
72
73 // Concatenate with match index and push on vector.
74 int n = static_cast<int>(elem_.size());
75 re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
76 if (re->op() == kRegexpConcat) {
77 int nsub = re->nsub();
78 PODArray<re2::Regexp*> sub(nsub + 1);
79 for (int i = 0; i < nsub; i++)
80 sub[i] = re->sub()[i]->Incref();
81 sub[nsub] = m;
82 re->Decref();
83 re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
84 } else {
85 re2::Regexp* sub[2];
86 sub[0] = re;
87 sub[1] = m;
88 re = re2::Regexp::Concat(sub, 2, pf);
89 }
90 elem_.emplace_back(std::string(pattern), re);
91 return n;
92 }
93
Compile()94 bool RE2::Set::Compile() {
95 if (compiled_) {
96 LOG(DFATAL) << "RE2::Set::Compile() called more than once";
97 return false;
98 }
99 compiled_ = true;
100 size_ = static_cast<int>(elem_.size());
101
102 // Sort the elements by their patterns. This is good enough for now
103 // until we have a Regexp comparison function. (Maybe someday...)
104 std::sort(elem_.begin(), elem_.end(),
105 [](const Elem& a, const Elem& b) -> bool {
106 return a.first < b.first;
107 });
108
109 PODArray<re2::Regexp*> sub(size_);
110 for (int i = 0; i < size_; i++)
111 sub[i] = elem_[i].second;
112 elem_.clear();
113 elem_.shrink_to_fit();
114
115 Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
116 options_.ParseFlags());
117 re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
118
119 prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
120 re->Decref();
121 return prog_ != nullptr;
122 }
123
Match(const StringPiece & text,std::vector<int> * v) const124 bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
125 return Match(text, v, NULL);
126 }
127
Match(const StringPiece & text,std::vector<int> * v,ErrorInfo * error_info) const128 bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
129 ErrorInfo* error_info) const {
130 if (!compiled_) {
131 LOG(DFATAL) << "RE2::Set::Match() called before compiling";
132 if (error_info != NULL)
133 error_info->kind = kNotCompiled;
134 return false;
135 }
136 #ifdef RE2_HAVE_THREAD_LOCAL
137 hooks::context = NULL;
138 #endif
139 bool dfa_failed = false;
140 std::unique_ptr<SparseSet> matches;
141 if (v != NULL) {
142 matches.reset(new SparseSet(size_));
143 v->clear();
144 }
145 bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
146 NULL, &dfa_failed, matches.get());
147 if (dfa_failed) {
148 if (options_.log_errors())
149 LOG(ERROR) << "DFA out of memory: "
150 << "program size " << prog_->size() << ", "
151 << "list count " << prog_->list_count() << ", "
152 << "bytemap range " << prog_->bytemap_range();
153 if (error_info != NULL)
154 error_info->kind = kOutOfMemory;
155 return false;
156 }
157 if (ret == false) {
158 if (error_info != NULL)
159 error_info->kind = kNoError;
160 return false;
161 }
162 if (v != NULL) {
163 if (matches->empty()) {
164 LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
165 if (error_info != NULL)
166 error_info->kind = kInconsistent;
167 return false;
168 }
169 v->assign(matches->begin(), matches->end());
170 }
171 if (error_info != NULL)
172 error_info->kind = kNoError;
173 return true;
174 }
175
176 } // namespace re2
177