1 /* cclive
2  * Copyright (C) 2010-2011  Toni Gundogdu <legatvs@gmail.com>
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <ccinternal>
19 
20 #include <sstream>
21 #include <boost/format.hpp>
22 #include <pcrecpp.h>
23 
24 #include <ccre>
25 
26 namespace cc
27 {
28 namespace re
29 {
30 
_check_re_error(const pcrecpp::RE & re)31 static void _check_re_error(const pcrecpp::RE& re)
32 {
33   if (re.error().length() >0)
34     {
35       std::stringstream b;
36       b << "bad regexp `" << re.pattern() << "': " << re.error();
37       throw std::runtime_error(b.str());
38     }
39 }
40 
_init_re_opts(const std::string & flags)41 static pcrecpp::RE_Options _init_re_opts(const std::string& flags)
42 {
43   pcrecpp::RE_Options opts;
44   opts.set_caseless(strstr(flags.c_str(), "i") != 0);
45   opts.set_utf8(true);
46   return opts;
47 }
48 
subst(const std::string & re,std::string & src)49 bool subst(const std::string& re, std::string& src)
50 {
51   std::string pat, sub, flags;
52 
53   static const char delims_b[] = "\\{\\<\\[\\(\\/";
54   static const char delims_c[] = "\\}\\>\\]\\)\\/";
55 
56   boost::format fmt =
57     boost::format("^s[%1%](.*)[%2%][%3%](.*)[%4%](.*)$")
58     % delims_b % delims_c % delims_b % delims_c;
59 
60   pcrecpp::RE rx(fmt.str(), pcrecpp::UTF8());
61 
62   if (rx.PartialMatch(re, &pat, &sub, &flags))
63     {
64       if (src.empty()) // Verify regexp only.
65         return true;
66 
67       pcrecpp::RE_Options opts = _init_re_opts(flags);
68       pcrecpp::RE subs(pat, opts);
69       _check_re_error(subs);
70 
71       (strstr(flags.c_str(), "g"))
72       ? subs.GlobalReplace(sub, &src)
73       : subs.Replace(sub, &src);
74 
75       return true;
76     }
77   return false;
78 }
79 
capture(const std::string & re,std::string & src)80 bool capture(const std::string& re, std::string& src)
81 {
82   std::string pat, flags;
83   pcrecpp::RE rx("^\\/(.*)\\/(.*)$", pcrecpp::UTF8());
84 
85   if (rx.PartialMatch(re, &pat, &flags))
86     {
87       if (src.empty()) // Check regexp
88         return true;
89 
90       pcrecpp::RE_Options opts = _init_re_opts(flags);
91       if (strstr(flags.c_str(), "g") != 0)
92         {
93           std::string orig(src);
94           pcrecpp::StringPiece sp(orig);
95           pcrecpp::RE re(pat, opts);
96           _check_re_error(re);
97           src.clear();
98 
99           std::string s;
100           while (re.FindAndConsume(&sp, &s))
101             src += s;
102         }
103       else
104         {
105           std::string tmp = src;
106           src.clear();
107           pcrecpp::RE re(pat, opts);
108           _check_re_error(re);
109           re.PartialMatch(tmp, &src);
110         }
111       return true;
112     }
113   return false;
114 }
115 
grep(const std::string & r,const std::string & s)116 bool grep(const std::string& r, const std::string& s)
117 {
118   pcrecpp::RE re(r, pcrecpp::UTF8());
119   _check_re_error(re);
120   return re.PartialMatch(s);
121 }
122 
tr_subst(const std::string & r,std::string & s)123 static void tr_subst(const std::string& r, std::string& s)
124 {
125   pcrecpp::RE rx("^s\\/(.*)\\/(.*)\\/(.*)$", pcrecpp::UTF8());
126   std::string pat, sub, flags;
127 
128   if (!rx.PartialMatch(r, &pat, &sub, &flags))
129     {
130       std::stringstream b;
131       b << "--tr: " << "no idea what to do with `" << r << "'";
132       throw std::runtime_error(b.str());
133     }
134 
135   if (s.empty()) // Validate regexp only.
136     return;
137 
138   pcrecpp::RE_Options o = _init_re_opts(flags);
139   pcrecpp::RE subs(pat, o);
140   _check_re_error(subs);
141 
142   (strstr(flags.c_str(), "g"))
143   ? subs.GlobalReplace(sub, &s)
144   : subs.Replace(sub, &s);
145 }
146 
tr_filter(const std::string & r,std::string & s)147 static void tr_filter(const std::string& r, std::string& s)
148 {
149   pcrecpp::RE rx("^\\/(.*)\\/(.*)$", pcrecpp::UTF8());
150   std::string pat, flags;
151 
152   if (!rx.PartialMatch(r, &pat, &flags))
153     {
154       std::stringstream b;
155       b << "--tr: " << "no idea what to do with `" << r << "'";
156       throw std::runtime_error(b.str());
157     }
158 
159   if (s.empty())  // Validate regexp only.
160     return;
161 
162   pcrecpp::RE_Options o = _init_re_opts(flags);
163 
164   if (strstr(flags.c_str(), "g") != 0)
165     {
166       std::string orig(s);
167       pcrecpp::StringPiece sp(orig);
168       s.clear();
169 
170       pcrecpp::RE re(pat, o);
171       _check_re_error(re);
172       std::string tmp;
173 
174       while (re.FindAndConsume(&sp, &tmp))
175         s += tmp;
176     }
177   else
178     {
179       std::string tmp = s;
180       s.clear();
181       pcrecpp::RE re(pat, o);
182       _check_re_error(re);
183       re.PartialMatch(tmp, &s);
184     }
185 }
186 
tr(const std::string & r,std::string & s)187 void tr(const std::string& r, std::string& s)
188 {
189   if (pcrecpp::RE("^s\\/", pcrecpp::UTF8()).PartialMatch(r))
190     tr_subst(r, s);
191   else
192     tr_filter(r, s);
193 }
194 
trim(std::string & s)195 void trim(std::string& s)
196 {
197   tr_subst("s/^[\\s]+//",   s);
198   tr_subst("s/\\s+$//",     s);
199   tr_subst("s/\\s\\s+/ /g", s);
200 }
201 
202 } // namespace re
203 
204 } // namespace cc
205 
206 // vim: set ts=2 sw=2 tw=72 expandtab:
207