1 /*
2     Copyright (c) 2009 Andrew Caudwell (acaudwell@gmail.com)
3     All rights reserved.
4 
5     Redistribution and use in source and binary forms, with or without
6     modification, are permitted provided that the following conditions
7     are met:
8     1. Redistributions of source code must retain the above copyright
9        notice, this list of conditions and the following disclaimer.
10     2. Redistributions in binary form must reproduce the above copyright
11        notice, this list of conditions and the following disclaimer in the
12        documentation and/or other materials provided with the distribution.
13     3. The name of the author may not be used to endorse or promote products
14        derived from this software without specific prior written permission.
15 
16     THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18     OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19     IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25     THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 
28 #include "regex.h"
29 
Regex(std::string regex,bool test)30 Regex::Regex(std::string regex, bool test) {
31 
32     re = pcre_compile(
33         regex.c_str(),
34         0,
35         &error,
36         &erroffset,
37         0
38     );
39 
40     if(!re) {
41         valid = false;
42 
43         if(!test) {
44             throw RegexCompilationException(regex);
45         }
46 
47     } else {
48         valid = true;
49     }
50 
51 }
52 
~Regex()53 Regex::~Regex() {
54     if(re != 0) pcre_free(re);
55 }
56 
isValid()57 bool Regex::isValid() {
58     return valid;
59 }
60 
replace(std::string & str,const std::string & replacement_str)61 bool Regex::replace(std::string& str, const std::string& replacement_str) {
62 
63     int offset = replaceOffset(str, replacement_str, 0);
64 
65     return (offset != -1);
66 }
67 
replaceAll(std::string & str,const std::string & replacement_str)68 bool Regex::replaceAll(std::string& str, const std::string& replacement_str) {
69 
70 
71     int offset = -1;
72 
73     while((offset = replaceOffset(str, replacement_str, offset+1)) != -1 && offset < str.size());
74 
75     return (offset != -1);
76 }
77 
replaceOffset(std::string & str,const std::string & replacement_str,int offset)78 int Regex::replaceOffset(std::string& str, const std::string& replacement_str, int offset) {
79 
80     int ovector[REGEX_MAX_MATCHES];
81 
82     int rc = pcre_exec(
83         re,
84         0,
85         str.c_str(),
86         str.size(),
87         offset,
88         0,
89         ovector,
90         REGEX_MAX_MATCHES
91     );
92 
93     //failed match
94     if(rc<1) {
95         return -1;
96     }
97 
98     // replace matched section of string
99     std::string new_str = str;
100     new_str.replace(ovector[0], ovector[1]-ovector[0], replacement_str);
101 
102     size_t end_offset = ovector[0] + replacement_str.size();
103 
104     for (int i = 1; i < rc; i++) {
105         int match_start = ovector[2*i];
106         int match_end   = ovector[2*i+1];
107 
108         std::string matched_str;
109 
110         if(match_start != -1) {
111             matched_str = std::string(str, match_start, match_end-match_start);
112         }
113 
114         // check if 'str' contains $i, if it does, replace with match string
115         size_t string_size = new_str.size();
116 
117         for(size_t j=0; j<string_size-1; j++) {
118             if(new_str[j] == '$' && atoi(&(new_str[j+1])) == i) {
119                 new_str.replace(j, 2, matched_str);
120                 size_t new_string_size = new_str.size();
121                 end_offset += (new_string_size-string_size);
122                 string_size = new_string_size;
123             }
124         }
125     }
126 
127     str = new_str;
128 
129     return end_offset;
130 }
131 
match(const std::string & str,std::vector<std::string> * results)132 bool Regex::match(const std::string& str, std::vector<std::string>* results) {
133 
134     if(results != 0) results->clear();
135     int offset = matchOffset(str, results, 0);
136     return offset != -1;
137 }
138 
matchAll(const std::string & str,std::vector<std::string> * results)139 bool Regex::matchAll(const std::string& str, std::vector<std::string>* results) {
140 
141     int offset = 0;
142     int match_count = 0;
143     if(results != 0) results->clear();
144 
145     int str_size = str.size();
146 
147     while((offset = matchOffset(str, results, offset)) != -1) {
148         match_count++;
149         if(offset >= str_size) break;
150     }
151 
152     return match_count>0;
153 }
154 
matchOffset(const std::string & str,std::vector<std::string> * results,int offset)155 int Regex::matchOffset(const std::string& str, std::vector<std::string>* results, int offset) {
156 
157     int ovector[REGEX_MAX_MATCHES];
158 
159     if(offset >= str.size()) return -1;
160 
161     // To allow ^ to match the start of the remaining string
162     // we offset the string before passing it to pcre_exec
163 
164     int rc = pcre_exec(
165         re,
166         0,
167         str.c_str() + offset,
168         str.size()-offset,
169         0,
170         0,
171         ovector,
172         REGEX_MAX_MATCHES
173     );
174 
175     //failed match
176     if(rc<1) {
177         return -1;
178     }
179 
180     if(results!=0) {
181         for (int i = 1; i < rc; i++) {
182             int match_start = ovector[2*i];
183             int match_end   = ovector[2*i+1];
184 
185             // insert a empty string for non-matching optional regex
186             if(match_start == -1) {
187                 results->push_back(std::string(""));
188             } else {
189                 std::string match(str, match_start+offset, match_end-match_start);
190                 results->push_back(match);
191             }
192         }
193     }
194 
195     return ovector[1]+offset;
196 }
197