1 /**************************************************************************
2 Copyright:
3 (C) 2008 - 2012 Alexander Shaduri <ashaduri 'at' gmail.com>
4 License: See LICENSE_gsmartcontrol.txt
5 ***************************************************************************/
6 /// \file
7 /// \author Alexander Shaduri
8 /// \ingroup applib
9 /// \weakgroup applib
10 /// @{
11
12 #ifndef APP_PCRECPP_H
13 #define APP_PCRECPP_H
14
15 // A wrapper header for pcrecpp
16
17 #include <pcrecpp.h>
18 #include <string>
19
20 #include "hz/debug.h"
21
22
23
24 /// Take a string of characters where each character represents a modifier
25 /// and return the appropriate pcre options.
26 /// - i - case insensitive match.
27 /// - m - multiline, read past the first line.
28 /// - s - dot matches newlines.
29 /// - E - $ matches only the end of the string (D in php, not available in perl).
30 /// - X - strict escape parsing (not available in perl).
31 /// - x - ignore whitespaces.
32 /// - 8 - handles UTF8 characters in pattern (u in php, not available in perl).
33 /// - U - ungreedy, reverses * and *? (not available in perl).
34 /// - N - disables matching parentheses (not available in perl or php).
app_pcre_get_options(const char * modifiers)35 inline pcrecpp::RE_Options app_pcre_get_options(const char* modifiers)
36 {
37 // ANYCRLF means any of crlf, cr, lf. Used in ^ and $.
38 // This overrides the build-time newline setting of pcre.
39 #ifdef PCRE_NEWLINE_ANYCRLF
40 pcrecpp::RE_Options options(PCRE_NEWLINE_ANYCRLF);
41 #else
42 pcrecpp::RE_Options options;
43 #endif
44
45 if (modifiers) {
46 char c = '\0';
47 while ((c = *modifiers++) != '\0') {
48 switch (c) {
49 // Note: Most of these are from pcretest man page.
50 // Perl lacks some of them.
51 case 'i': options.set_caseless(true); break; // case insensitive match.
52 case 'm': options.set_multiline(true); break; // read past the first line too.
53 case 's': options.set_dotall(true); break; // dot matches newlines.
54 case 'E': options.set_dollar_endonly(true); break; // not in perl. php has D. $ matches only at end.
55 case 'X': options.set_extra(true); break; // not in perl. strict escape parsing
56 case 'x': options.set_extended(true); break; // ignore whitespaces
57 case '8': options.set_utf8(true); break; // not in perl. php has u. handles UTF8 chars in pattern.
58 case 'U': options.set_ungreedy(true); break; // not in perl. reverses * and *?
59 case 'N': options.set_no_auto_capture(true); break; // not in perl or php. disables matching parentheses.
60 default: debug_out_error("app", DBG_FUNC_MSG << "Unknown modifier \'" << c << "\'\n"); break;
61 }
62 }
63 }
64
65 return options;
66 }
67
68
69
70 /// Accept pattern in form of "/pattern/modifiers".
71 /// Note: Slashes should be escaped within the pattern.
72 /// If the string doesn't start with a slash, it is treated as an ordinary pattern
73 /// without modifiers.
74 /// This function will make a RE object with ANYCRLF option set for portability
75 /// across various pcre builds.
app_pcre_re(const std::string & perl_pattern)76 inline pcrecpp::RE app_pcre_re(const std::string& perl_pattern)
77 {
78 if (perl_pattern.size() >= 2 && perl_pattern[0] == '/') {
79
80 // find the separator
81 std::string::size_type endpos = perl_pattern.rfind('/');
82 DBG_ASSERT(endpos != std::string::npos); // shouldn't happen
83
84 // no need to unescape slashes in pattern - pcre seems to not mind.
85 return pcrecpp::RE(perl_pattern.substr(1, endpos - 1),
86 app_pcre_get_options(perl_pattern.substr(endpos + 1).c_str()));
87 }
88
89 return pcrecpp::RE(perl_pattern, app_pcre_get_options(0));
90 }
91
92
93
94 /// Match a string against a pattern in "/pattern/modifiers" format.
95 inline bool app_pcre_match(const std::string& perl_pattern, const std::string& str,
96 const pcrecpp::Arg& ptr1 = pcrecpp::RE::no_arg,
97 const pcrecpp::Arg& ptr2 = pcrecpp::RE::no_arg,
98 const pcrecpp::Arg& ptr3 = pcrecpp::RE::no_arg,
99 const pcrecpp::Arg& ptr4 = pcrecpp::RE::no_arg,
100 const pcrecpp::Arg& ptr5 = pcrecpp::RE::no_arg,
101 const pcrecpp::Arg& ptr6 = pcrecpp::RE::no_arg,
102 const pcrecpp::Arg& ptr7 = pcrecpp::RE::no_arg,
103 const pcrecpp::Arg& ptr8 = pcrecpp::RE::no_arg,
104 const pcrecpp::Arg& ptr9 = pcrecpp::RE::no_arg,
105 const pcrecpp::Arg& ptr10 = pcrecpp::RE::no_arg,
106 const pcrecpp::Arg& ptr11 = pcrecpp::RE::no_arg,
107 const pcrecpp::Arg& ptr12 = pcrecpp::RE::no_arg,
108 const pcrecpp::Arg& ptr13 = pcrecpp::RE::no_arg,
109 const pcrecpp::Arg& ptr14 = pcrecpp::RE::no_arg,
110 const pcrecpp::Arg& ptr15 = pcrecpp::RE::no_arg,
111 const pcrecpp::Arg& ptr16 = pcrecpp::RE::no_arg)
112 {
113 return app_pcre_re(perl_pattern).PartialMatch(str,
114 ptr1, ptr2, ptr3, ptr4, ptr5, ptr6, ptr7, ptr8, ptr9, ptr10, ptr11, ptr12, ptr13, ptr14, ptr15, ptr16);
115 }
116
117
118
119 /// Match a string against a pattern in "/pattern/modifiers" format.
120 /// This overload is needed to avoid confusion with RE.
121 inline bool app_pcre_match(const char* perl_pattern, const std::string& str,
122 const pcrecpp::Arg& ptr1 = pcrecpp::RE::no_arg,
123 const pcrecpp::Arg& ptr2 = pcrecpp::RE::no_arg,
124 const pcrecpp::Arg& ptr3 = pcrecpp::RE::no_arg,
125 const pcrecpp::Arg& ptr4 = pcrecpp::RE::no_arg,
126 const pcrecpp::Arg& ptr5 = pcrecpp::RE::no_arg,
127 const pcrecpp::Arg& ptr6 = pcrecpp::RE::no_arg,
128 const pcrecpp::Arg& ptr7 = pcrecpp::RE::no_arg,
129 const pcrecpp::Arg& ptr8 = pcrecpp::RE::no_arg,
130 const pcrecpp::Arg& ptr9 = pcrecpp::RE::no_arg,
131 const pcrecpp::Arg& ptr10 = pcrecpp::RE::no_arg,
132 const pcrecpp::Arg& ptr11 = pcrecpp::RE::no_arg,
133 const pcrecpp::Arg& ptr12 = pcrecpp::RE::no_arg,
134 const pcrecpp::Arg& ptr13 = pcrecpp::RE::no_arg,
135 const pcrecpp::Arg& ptr14 = pcrecpp::RE::no_arg,
136 const pcrecpp::Arg& ptr15 = pcrecpp::RE::no_arg,
137 const pcrecpp::Arg& ptr16 = pcrecpp::RE::no_arg)
138 {
139 return app_pcre_match(std::string(perl_pattern), str,
140 ptr1, ptr2, ptr3, ptr4, ptr5, ptr6, ptr7, ptr8, ptr9, ptr10, ptr11, ptr12, ptr13, ptr14, ptr15, ptr16);
141 }
142
143
144
145 /// Match a string against a pattern in "/pattern/modifiers" format.
146 inline bool app_pcre_match(const pcrecpp::RE& re, const std::string& str,
147 const pcrecpp::Arg& ptr1 = pcrecpp::RE::no_arg,
148 const pcrecpp::Arg& ptr2 = pcrecpp::RE::no_arg,
149 const pcrecpp::Arg& ptr3 = pcrecpp::RE::no_arg,
150 const pcrecpp::Arg& ptr4 = pcrecpp::RE::no_arg,
151 const pcrecpp::Arg& ptr5 = pcrecpp::RE::no_arg,
152 const pcrecpp::Arg& ptr6 = pcrecpp::RE::no_arg,
153 const pcrecpp::Arg& ptr7 = pcrecpp::RE::no_arg,
154 const pcrecpp::Arg& ptr8 = pcrecpp::RE::no_arg,
155 const pcrecpp::Arg& ptr9 = pcrecpp::RE::no_arg,
156 const pcrecpp::Arg& ptr10 = pcrecpp::RE::no_arg,
157 const pcrecpp::Arg& ptr11 = pcrecpp::RE::no_arg,
158 const pcrecpp::Arg& ptr12 = pcrecpp::RE::no_arg,
159 const pcrecpp::Arg& ptr13 = pcrecpp::RE::no_arg,
160 const pcrecpp::Arg& ptr14 = pcrecpp::RE::no_arg,
161 const pcrecpp::Arg& ptr15 = pcrecpp::RE::no_arg,
162 const pcrecpp::Arg& ptr16 = pcrecpp::RE::no_arg)
163 {
164 return re.PartialMatch(str,
165 ptr1, ptr2, ptr3, ptr4, ptr5, ptr6, ptr7, ptr8, ptr9, ptr10, ptr11, ptr12, ptr13, ptr14, ptr15, ptr16);
166 }
167
168
169
170 /// Replace every occurence of pattern with replacement string in \c subject.
171 /// The pattern is in "/pattern/modifiers" format.
app_pcre_replace(const std::string & perl_pattern,const std::string & replacement,std::string & subject)172 inline int app_pcre_replace(const std::string& perl_pattern, const std::string& replacement, std::string& subject)
173 {
174 return app_pcre_re(perl_pattern).GlobalReplace(replacement, &subject);
175 }
176
177
178 /// Replace every occurence of pattern with replacement string in \c subject.
179 /// The pattern is in "/pattern/modifiers" format.
app_pcre_replace(const char * perl_pattern,const std::string & replacement,std::string & subject)180 inline int app_pcre_replace(const char* perl_pattern, const std::string& replacement, std::string& subject)
181 {
182 return app_pcre_replace(std::string(perl_pattern), replacement, subject);
183 }
184
185
186 /// Replace every occurence of pattern with replacement string in \c subject.
app_pcre_replace(const pcrecpp::RE & re,const std::string & replacement,std::string & subject)187 inline int app_pcre_replace(const pcrecpp::RE& re, const std::string& replacement, std::string& subject)
188 {
189 return re.GlobalReplace(replacement, &subject);
190 }
191
192
193
194 /// Replace the first occurence of pattern with replacement string in \c subject.
195 /// The pattern is in "/pattern/modifiers" format.
app_pcre_replace_once(const std::string & perl_pattern,const std::string & replacement,std::string & subject)196 inline bool app_pcre_replace_once(const std::string& perl_pattern, const std::string& replacement, std::string& subject)
197 {
198 return app_pcre_re(perl_pattern).Replace(replacement, &subject);
199 }
200
201
202
203 /// Replace the first occurence of pattern with replacement string in \c subject.
204 /// The pattern is in "/pattern/modifiers" format.
app_pcre_replace_once(const char * perl_pattern,const std::string & replacement,std::string & subject)205 inline bool app_pcre_replace_once(const char* perl_pattern, const std::string& replacement, std::string& subject)
206 {
207 return app_pcre_replace_once(std::string(perl_pattern), replacement, subject);
208 }
209
210
211
212 /// Replace first occurence of pattern with replacement string in \c subject.
app_pcre_replace_once(const pcrecpp::RE & re,const std::string & replacement,std::string & subject)213 inline bool app_pcre_replace_once(const pcrecpp::RE& re, const std::string& replacement, std::string& subject)
214 {
215 return re.Replace(replacement, &subject);
216 }
217
218
219
220 /// Escape a string to be used inside a regular expression. The result
221 /// won't contain any special expression characters.
app_pcre_escape(const std::string & str)222 inline std::string app_pcre_escape(const std::string& str)
223 {
224 return pcrecpp::RE::QuoteMeta(str);
225 }
226
227
228
229
230
231 #endif
232
233 /// @}
234