1 /*
2 Copyright (c) 2009 Andrew Caudwell (acaudwell@gmail.com)
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "regex.h"
29
Regex(std::string regex,bool test)30 Regex::Regex(std::string regex, bool test) {
31
32 re = pcre_compile(
33 regex.c_str(),
34 0,
35 &error,
36 &erroffset,
37 0
38 );
39
40 if(!re) {
41 valid = false;
42
43 if(!test) {
44 throw RegexCompilationException(regex);
45 }
46
47 } else {
48 valid = true;
49 }
50
51 }
52
~Regex()53 Regex::~Regex() {
54 if(re != 0) pcre_free(re);
55 }
56
isValid()57 bool Regex::isValid() {
58 return valid;
59 }
60
replace(std::string & str,const std::string & replacement_str)61 bool Regex::replace(std::string& str, const std::string& replacement_str) {
62
63 int offset = replaceOffset(str, replacement_str, 0);
64
65 return (offset != -1);
66 }
67
replaceAll(std::string & str,const std::string & replacement_str)68 bool Regex::replaceAll(std::string& str, const std::string& replacement_str) {
69
70
71 int offset = -1;
72
73 while((offset = replaceOffset(str, replacement_str, offset+1)) != -1 && offset < str.size());
74
75 return (offset != -1);
76 }
77
replaceOffset(std::string & str,const std::string & replacement_str,int offset)78 int Regex::replaceOffset(std::string& str, const std::string& replacement_str, int offset) {
79
80 int ovector[REGEX_MAX_MATCHES];
81
82 int rc = pcre_exec(
83 re,
84 0,
85 str.c_str(),
86 str.size(),
87 offset,
88 0,
89 ovector,
90 REGEX_MAX_MATCHES
91 );
92
93 //failed match
94 if(rc<1) {
95 return -1;
96 }
97
98 // replace matched section of string
99 std::string new_str = str;
100 new_str.replace(ovector[0], ovector[1]-ovector[0], replacement_str);
101
102 size_t end_offset = ovector[0] + replacement_str.size();
103
104 for (int i = 1; i < rc; i++) {
105 int match_start = ovector[2*i];
106 int match_end = ovector[2*i+1];
107
108 std::string matched_str;
109
110 if(match_start != -1) {
111 matched_str = std::string(str, match_start, match_end-match_start);
112 }
113
114 // check if 'str' contains $i, if it does, replace with match string
115 size_t string_size = new_str.size();
116
117 for(size_t j=0; j<string_size-1; j++) {
118 if(new_str[j] == '$' && atoi(&(new_str[j+1])) == i) {
119 new_str.replace(j, 2, matched_str);
120 size_t new_string_size = new_str.size();
121 end_offset += (new_string_size-string_size);
122 string_size = new_string_size;
123 }
124 }
125 }
126
127 str = new_str;
128
129 return end_offset;
130 }
131
match(const std::string & str,std::vector<std::string> * results)132 bool Regex::match(const std::string& str, std::vector<std::string>* results) {
133
134 if(results != 0) results->clear();
135 int offset = matchOffset(str, results, 0);
136 return offset != -1;
137 }
138
matchAll(const std::string & str,std::vector<std::string> * results)139 bool Regex::matchAll(const std::string& str, std::vector<std::string>* results) {
140
141 int offset = 0;
142 int match_count = 0;
143 if(results != 0) results->clear();
144
145 int str_size = str.size();
146
147 while((offset = matchOffset(str, results, offset)) != -1) {
148 match_count++;
149 if(offset >= str_size) break;
150 }
151
152 return match_count>0;
153 }
154
matchOffset(const std::string & str,std::vector<std::string> * results,int offset)155 int Regex::matchOffset(const std::string& str, std::vector<std::string>* results, int offset) {
156
157 int ovector[REGEX_MAX_MATCHES];
158
159 if(offset >= str.size()) return -1;
160
161 // To allow ^ to match the start of the remaining string
162 // we offset the string before passing it to pcre_exec
163
164 int rc = pcre_exec(
165 re,
166 0,
167 str.c_str() + offset,
168 str.size()-offset,
169 0,
170 0,
171 ovector,
172 REGEX_MAX_MATCHES
173 );
174
175 //failed match
176 if(rc<1) {
177 return -1;
178 }
179
180 if(results!=0) {
181 for (int i = 1; i < rc; i++) {
182 int match_start = ovector[2*i];
183 int match_end = ovector[2*i+1];
184
185 // insert a empty string for non-matching optional regex
186 if(match_start == -1) {
187 results->push_back(std::string(""));
188 } else {
189 std::string match(str, match_start+offset, match_end-match_start);
190 results->push_back(match);
191 }
192 }
193 }
194
195 return ovector[1]+offset;
196 }
197