1 /**************************************************************************
2  Copyright:
3       (C) 2008 - 2012  Alexander Shaduri <ashaduri 'at' gmail.com>
4  License: See LICENSE_gsmartcontrol.txt
5 ***************************************************************************/
6 /// \file
7 /// \author Alexander Shaduri
8 /// \ingroup applib
9 /// \weakgroup applib
10 /// @{
11 
12 #ifndef APP_PCRECPP_H
13 #define APP_PCRECPP_H
14 
15 // A wrapper header for pcrecpp
16 
17 #include <pcrecpp.h>
18 #include <string>
19 
20 #include "hz/debug.h"
21 
22 
23 
24 /// Take a string of characters where each character represents a modifier
25 /// and return the appropriate pcre options.
26 /// - i - case insensitive match.
27 /// - m - multiline, read past the first line.
28 /// - s - dot matches newlines.
29 /// - E - $ matches only the end of the string (D in php, not available in perl).
30 /// - X - strict escape parsing (not available in perl).
31 /// - x - ignore whitespaces.
32 /// - 8 - handles UTF8 characters in pattern (u in php, not available in perl).
33 /// - U - ungreedy, reverses * and *? (not available in perl).
34 /// - N - disables matching parentheses (not available in perl or php).
app_pcre_get_options(const char * modifiers)35 inline pcrecpp::RE_Options app_pcre_get_options(const char* modifiers)
36 {
37 	// ANYCRLF means any of crlf, cr, lf. Used in ^ and $.
38 	// This overrides the build-time newline setting of pcre.
39 #ifdef PCRE_NEWLINE_ANYCRLF
40 	pcrecpp::RE_Options options(PCRE_NEWLINE_ANYCRLF);
41 #else
42 	pcrecpp::RE_Options options;
43 #endif
44 
45 	if (modifiers) {
46 		char c = '\0';
47 		while ((c = *modifiers++) != '\0') {
48 			switch (c) {
49 				// Note: Most of these are from pcretest man page.
50 				// Perl lacks some of them.
51 				case 'i': options.set_caseless(true); break;  // case insensitive match.
52 				case 'm': options.set_multiline(true); break;  // read past the first line too.
53 				case 's': options.set_dotall(true); break;  // dot matches newlines.
54 				case 'E': options.set_dollar_endonly(true); break;  // not in perl. php has D. $ matches only at end.
55 				case 'X': options.set_extra(true); break;  // not in perl. strict escape parsing
56 				case 'x': options.set_extended(true); break;  // ignore whitespaces
57 				case '8': options.set_utf8(true); break;  // not in perl. php has u. handles UTF8 chars in pattern.
58 				case 'U': options.set_ungreedy(true); break;  // not in perl. reverses * and *?
59 				case 'N': options.set_no_auto_capture(true); break;  // not in perl or php. disables matching parentheses.
60 				default: debug_out_error("app", DBG_FUNC_MSG << "Unknown modifier \'" << c << "\'\n"); break;
61 			}
62 		}
63 	}
64 
65 	return options;
66 }
67 
68 
69 
70 /// Accept pattern in form of "/pattern/modifiers".
71 /// Note: Slashes should be escaped within the pattern.
72 /// If the string doesn't start with a slash, it is treated as an ordinary pattern
73 /// without modifiers.
74 /// This function will make a RE object with ANYCRLF option set for portability
75 /// across various pcre builds.
app_pcre_re(const std::string & perl_pattern)76 inline pcrecpp::RE app_pcre_re(const std::string& perl_pattern)
77 {
78 	if (perl_pattern.size() >= 2 && perl_pattern[0] == '/') {
79 
80 		// find the separator
81 		std::string::size_type endpos = perl_pattern.rfind('/');
82 		DBG_ASSERT(endpos != std::string::npos);  // shouldn't happen
83 
84 		// no need to unescape slashes in pattern - pcre seems to not mind.
85 		return pcrecpp::RE(perl_pattern.substr(1, endpos - 1),
86 				app_pcre_get_options(perl_pattern.substr(endpos + 1).c_str()));
87 	}
88 
89 	return pcrecpp::RE(perl_pattern, app_pcre_get_options(0));
90 }
91 
92 
93 
94 /// Match a string against a pattern in "/pattern/modifiers" format.
95 inline bool app_pcre_match(const std::string& perl_pattern, const std::string& str,
96 		const pcrecpp::Arg& ptr1 = pcrecpp::RE::no_arg,
97 		const pcrecpp::Arg& ptr2 = pcrecpp::RE::no_arg,
98 		const pcrecpp::Arg& ptr3 = pcrecpp::RE::no_arg,
99 		const pcrecpp::Arg& ptr4 = pcrecpp::RE::no_arg,
100 		const pcrecpp::Arg& ptr5 = pcrecpp::RE::no_arg,
101 		const pcrecpp::Arg& ptr6 = pcrecpp::RE::no_arg,
102 		const pcrecpp::Arg& ptr7 = pcrecpp::RE::no_arg,
103 		const pcrecpp::Arg& ptr8 = pcrecpp::RE::no_arg,
104 		const pcrecpp::Arg& ptr9 = pcrecpp::RE::no_arg,
105 		const pcrecpp::Arg& ptr10 = pcrecpp::RE::no_arg,
106 		const pcrecpp::Arg& ptr11 = pcrecpp::RE::no_arg,
107 		const pcrecpp::Arg& ptr12 = pcrecpp::RE::no_arg,
108 		const pcrecpp::Arg& ptr13 = pcrecpp::RE::no_arg,
109 		const pcrecpp::Arg& ptr14 = pcrecpp::RE::no_arg,
110 		const pcrecpp::Arg& ptr15 = pcrecpp::RE::no_arg,
111 		const pcrecpp::Arg& ptr16 = pcrecpp::RE::no_arg)
112 {
113 	return app_pcre_re(perl_pattern).PartialMatch(str,
114 			ptr1, ptr2, ptr3, ptr4, ptr5, ptr6, ptr7, ptr8, ptr9, ptr10, ptr11, ptr12, ptr13, ptr14, ptr15, ptr16);
115 }
116 
117 
118 
119 /// Match a string against a pattern in "/pattern/modifiers" format.
120 /// This overload is needed to avoid confusion with RE.
121 inline bool app_pcre_match(const char* perl_pattern, const std::string& str,
122 		const pcrecpp::Arg& ptr1 = pcrecpp::RE::no_arg,
123 		const pcrecpp::Arg& ptr2 = pcrecpp::RE::no_arg,
124 		const pcrecpp::Arg& ptr3 = pcrecpp::RE::no_arg,
125 		const pcrecpp::Arg& ptr4 = pcrecpp::RE::no_arg,
126 		const pcrecpp::Arg& ptr5 = pcrecpp::RE::no_arg,
127 		const pcrecpp::Arg& ptr6 = pcrecpp::RE::no_arg,
128 		const pcrecpp::Arg& ptr7 = pcrecpp::RE::no_arg,
129 		const pcrecpp::Arg& ptr8 = pcrecpp::RE::no_arg,
130 		const pcrecpp::Arg& ptr9 = pcrecpp::RE::no_arg,
131 		const pcrecpp::Arg& ptr10 = pcrecpp::RE::no_arg,
132 		const pcrecpp::Arg& ptr11 = pcrecpp::RE::no_arg,
133 		const pcrecpp::Arg& ptr12 = pcrecpp::RE::no_arg,
134 		const pcrecpp::Arg& ptr13 = pcrecpp::RE::no_arg,
135 		const pcrecpp::Arg& ptr14 = pcrecpp::RE::no_arg,
136 		const pcrecpp::Arg& ptr15 = pcrecpp::RE::no_arg,
137 		const pcrecpp::Arg& ptr16 = pcrecpp::RE::no_arg)
138 {
139 	return app_pcre_match(std::string(perl_pattern), str,
140 			ptr1, ptr2, ptr3, ptr4, ptr5, ptr6, ptr7, ptr8, ptr9, ptr10, ptr11, ptr12, ptr13, ptr14, ptr15, ptr16);
141 }
142 
143 
144 
145 /// Match a string against a pattern in "/pattern/modifiers" format.
146 inline bool app_pcre_match(const pcrecpp::RE& re, const std::string& str,
147 		const pcrecpp::Arg& ptr1 = pcrecpp::RE::no_arg,
148 		const pcrecpp::Arg& ptr2 = pcrecpp::RE::no_arg,
149 		const pcrecpp::Arg& ptr3 = pcrecpp::RE::no_arg,
150 		const pcrecpp::Arg& ptr4 = pcrecpp::RE::no_arg,
151 		const pcrecpp::Arg& ptr5 = pcrecpp::RE::no_arg,
152 		const pcrecpp::Arg& ptr6 = pcrecpp::RE::no_arg,
153 		const pcrecpp::Arg& ptr7 = pcrecpp::RE::no_arg,
154 		const pcrecpp::Arg& ptr8 = pcrecpp::RE::no_arg,
155 		const pcrecpp::Arg& ptr9 = pcrecpp::RE::no_arg,
156 		const pcrecpp::Arg& ptr10 = pcrecpp::RE::no_arg,
157 		const pcrecpp::Arg& ptr11 = pcrecpp::RE::no_arg,
158 		const pcrecpp::Arg& ptr12 = pcrecpp::RE::no_arg,
159 		const pcrecpp::Arg& ptr13 = pcrecpp::RE::no_arg,
160 		const pcrecpp::Arg& ptr14 = pcrecpp::RE::no_arg,
161 		const pcrecpp::Arg& ptr15 = pcrecpp::RE::no_arg,
162 		const pcrecpp::Arg& ptr16 = pcrecpp::RE::no_arg)
163 {
164 	return re.PartialMatch(str,
165 			ptr1, ptr2, ptr3, ptr4, ptr5, ptr6, ptr7, ptr8, ptr9, ptr10, ptr11, ptr12, ptr13, ptr14, ptr15, ptr16);
166 }
167 
168 
169 
170 /// Replace every occurence of pattern with replacement string in \c subject.
171 /// The pattern is in "/pattern/modifiers" format.
app_pcre_replace(const std::string & perl_pattern,const std::string & replacement,std::string & subject)172 inline int app_pcre_replace(const std::string& perl_pattern, const std::string& replacement, std::string& subject)
173 {
174 	return app_pcre_re(perl_pattern).GlobalReplace(replacement, &subject);
175 }
176 
177 
178 /// Replace every occurence of pattern with replacement string in \c subject.
179 /// The pattern is in "/pattern/modifiers" format.
app_pcre_replace(const char * perl_pattern,const std::string & replacement,std::string & subject)180 inline int app_pcre_replace(const char* perl_pattern, const std::string& replacement, std::string& subject)
181 {
182 	return app_pcre_replace(std::string(perl_pattern), replacement, subject);
183 }
184 
185 
186 /// Replace every occurence of pattern with replacement string in \c subject.
app_pcre_replace(const pcrecpp::RE & re,const std::string & replacement,std::string & subject)187 inline int app_pcre_replace(const pcrecpp::RE& re, const std::string& replacement, std::string& subject)
188 {
189 	return re.GlobalReplace(replacement, &subject);
190 }
191 
192 
193 
194 /// Replace the first occurence of pattern with replacement string in \c subject.
195 /// The pattern is in "/pattern/modifiers" format.
app_pcre_replace_once(const std::string & perl_pattern,const std::string & replacement,std::string & subject)196 inline bool app_pcre_replace_once(const std::string& perl_pattern, const std::string& replacement, std::string& subject)
197 {
198 	return app_pcre_re(perl_pattern).Replace(replacement, &subject);
199 }
200 
201 
202 
203 /// Replace the first occurence of pattern with replacement string in \c subject.
204 /// The pattern is in "/pattern/modifiers" format.
app_pcre_replace_once(const char * perl_pattern,const std::string & replacement,std::string & subject)205 inline bool app_pcre_replace_once(const char* perl_pattern, const std::string& replacement, std::string& subject)
206 {
207 	return app_pcre_replace_once(std::string(perl_pattern), replacement, subject);
208 }
209 
210 
211 
212 /// Replace first occurence of pattern with replacement string in \c subject.
app_pcre_replace_once(const pcrecpp::RE & re,const std::string & replacement,std::string & subject)213 inline bool app_pcre_replace_once(const pcrecpp::RE& re, const std::string& replacement, std::string& subject)
214 {
215 	return re.Replace(replacement, &subject);
216 }
217 
218 
219 
220 /// Escape a string to be used inside a regular expression. The result
221 /// won't contain any special expression characters.
app_pcre_escape(const std::string & str)222 inline std::string app_pcre_escape(const std::string& str)
223 {
224 	return pcrecpp::RE::QuoteMeta(str);
225 }
226 
227 
228 
229 
230 
231 #endif
232 
233 /// @}
234