1 /*
2  *
3  * Copyright (c) 1998-2002
4  * John Maddock
5  *
6  * Use, modification and distribution are subject to the
7  * Boost Software License, Version 1.0. (See accompanying file
8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9  *
10  */
11 
12  /*
13   *   LOCATION:    see http://www.boost.org for most recent version.
14   *   FILE:        cregex.cpp
15   *   VERSION:     see <boost/version.hpp>
16   *   DESCRIPTION: Implements high level class boost::RexEx
17   */
18 
19 
20 #define BOOST_REGEX_SOURCE
21 
22 #include <boost/regex.hpp>
23 #include <boost/cregex.hpp>
24 #if !defined(BOOST_NO_STD_STRING)
25 #include <map>
26 #include <list>
27 #include <boost/regex/v4/fileiter.hpp>
28 typedef boost::match_flag_type match_flag_type;
29 #include <cstdio>
30 
31 #ifdef BOOST_MSVC
32 #pragma warning(disable:4309)
33 #endif
34 #ifdef BOOST_INTEL
35 #pragma warning(disable:981 383)
36 #endif
37 
38 namespace boost{
39 
40 #ifdef BOOST_BORLANDC
41 #if BOOST_BORLANDC < 0x530
42 //
43 // we need to instantiate the vector classes we use
44 // since declaring a reference to type doesn't seem to
45 // do the job...
46 std::vector<std::size_t> inst1;
47 std::vector<std::string> inst2;
48 #endif
49 #endif
50 
51 namespace{
52 
53 template <class iterator>
to_string(iterator i,iterator j)54 std::string to_string(iterator i, iterator j)
55 {
56    std::string s;
57    while(i != j)
58    {
59       s.append(1, *i);
60       ++i;
61    }
62    return s;
63 }
64 
to_string(const char * i,const char * j)65 inline std::string to_string(const char* i, const char* j)
66 {
67    return std::string(i, j);
68 }
69 
70 }
71 namespace BOOST_REGEX_DETAIL_NS{
72 
73 #ifdef BOOST_MSVC
74 #  pragma warning(push)
75 #pragma warning(disable:26812)
76 #endif
77 class RegExData
78 {
79 public:
80    enum type
81    {
82       type_pc,
83       type_pf,
84       type_copy
85    };
86    regex e;
87    cmatch m;
88 #ifndef BOOST_REGEX_NO_FILEITER
89    match_results<mapfile::iterator> fm;
90 #endif
91    type t;
92    const char* pbase;
93 #ifndef BOOST_REGEX_NO_FILEITER
94    mapfile::iterator fbase;
95 #endif
96    std::map<int, std::string, std::less<int> > strings;
97    std::map<int, std::ptrdiff_t, std::less<int> > positions;
98    void update();
99    void clean();
RegExData()100    RegExData() : e(), m(),
101 #ifndef BOOST_REGEX_NO_FILEITER
102    fm(),
103 #endif
104    t(type_copy), pbase(0),
105 #ifndef BOOST_REGEX_NO_FILEITER
106    fbase(),
107 #endif
108    strings(), positions() {}
109 };
110 #ifdef BOOST_MSVC
111 #  pragma warning(pop)
112 #endif
113 
update()114 void RegExData::update()
115 {
116    strings.erase(strings.begin(), strings.end());
117    positions.erase(positions.begin(), positions.end());
118    if(t == type_pc)
119    {
120       for(unsigned int i = 0; i < m.size(); ++i)
121       {
122          if(m[i].matched) strings[i] = std::string(m[i].first, m[i].second);
123          positions[i] = m[i].matched ? m[i].first - pbase : -1;
124       }
125    }
126 #ifndef BOOST_REGEX_NO_FILEITER
127    else
128    {
129       for(unsigned int i = 0; i < fm.size(); ++i)
130       {
131          if(fm[i].matched) strings[i] = to_string(fm[i].first, fm[i].second);
132          positions[i] = fm[i].matched ? fm[i].first - fbase : -1;
133       }
134    }
135 #endif
136    t = type_copy;
137 }
138 
clean()139 void RegExData::clean()
140 {
141 #ifndef BOOST_REGEX_NO_FILEITER
142    fbase = mapfile::iterator();
143    fm = match_results<mapfile::iterator>();
144 #endif
145 }
146 
147 } // namespace
148 
RegEx()149 RegEx::RegEx()
150 {
151    pdata = new BOOST_REGEX_DETAIL_NS::RegExData();
152 }
153 
RegEx(const RegEx & o)154 RegEx::RegEx(const RegEx& o)
155 {
156    pdata = new BOOST_REGEX_DETAIL_NS::RegExData(*(o.pdata));
157 }
158 
~RegEx()159 RegEx::~RegEx()
160 {
161    delete pdata;
162 }
163 
RegEx(const char * c,bool icase)164 RegEx::RegEx(const char* c, bool icase)
165 {
166    pdata = new BOOST_REGEX_DETAIL_NS::RegExData();
167    SetExpression(c, icase);
168 }
169 
RegEx(const std::string & s,bool icase)170 RegEx::RegEx(const std::string& s, bool icase)
171 {
172    pdata = new BOOST_REGEX_DETAIL_NS::RegExData();
173    SetExpression(s.c_str(), icase);
174 }
175 
operator =(const RegEx & o)176 RegEx& RegEx::operator=(const RegEx& o)
177 {
178    *pdata = *(o.pdata);
179    return *this;
180 }
181 
operator =(const char * p)182 RegEx& RegEx::operator=(const char* p)
183 {
184    SetExpression(p, false);
185    return *this;
186 }
187 
SetExpression(const char * p,bool icase)188 unsigned int RegEx::SetExpression(const char* p, bool icase)
189 {
190    boost::uint_fast32_t f = icase ? regex::normal | regex::icase : regex::normal;
191    return pdata->e.set_expression(p, f);
192 }
193 
error_code() const194 unsigned int RegEx::error_code()const
195 {
196    return pdata->e.error_code();
197 }
198 
199 
Expression() const200 std::string RegEx::Expression()const
201 {
202    return pdata->e.expression();
203 }
204 
205 //
206 // now matching operators:
207 //
Match(const char * p,match_flag_type flags)208 bool RegEx::Match(const char* p, match_flag_type flags)
209 {
210    pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc;
211    pdata->pbase = p;
212    const char* end = p;
213    while(*end)++end;
214 
215    if(regex_match(p, end, pdata->m, pdata->e, flags))
216    {
217       pdata->update();
218       return true;
219    }
220    return false;
221 }
222 
Search(const char * p,match_flag_type flags)223 bool RegEx::Search(const char* p, match_flag_type flags)
224 {
225    pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc;
226    pdata->pbase = p;
227    const char* end = p;
228    while(*end)++end;
229 
230    if(regex_search(p, end, pdata->m, pdata->e, flags))
231    {
232       pdata->update();
233       return true;
234    }
235    return false;
236 }
237 namespace BOOST_REGEX_DETAIL_NS{
238 struct pred1
239 {
240    GrepCallback cb;
241    RegEx* pe;
pred1boost::BOOST_REGEX_DETAIL_NS::pred1242    pred1(GrepCallback c, RegEx* i) : cb(c), pe(i) {}
operator ()boost::BOOST_REGEX_DETAIL_NS::pred1243    bool operator()(const cmatch& m)
244    {
245       pe->pdata->m = m;
246       return cb(*pe);
247    }
248 };
249 }
Grep(GrepCallback cb,const char * p,match_flag_type flags)250 unsigned int RegEx::Grep(GrepCallback cb, const char* p, match_flag_type flags)
251 {
252    pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc;
253    pdata->pbase = p;
254    const char* end = p;
255    while(*end)++end;
256 
257    unsigned int result = regex_grep(BOOST_REGEX_DETAIL_NS::pred1(cb, this), p, end, pdata->e, flags);
258    if(result)
259       pdata->update();
260    return result;
261 }
262 namespace BOOST_REGEX_DETAIL_NS{
263 struct pred2
264 {
265    std::vector<std::string>& v;
266    RegEx* pe;
pred2boost::BOOST_REGEX_DETAIL_NS::pred2267    pred2(std::vector<std::string>& o, RegEx* e) : v(o), pe(e) {}
operator ()boost::BOOST_REGEX_DETAIL_NS::pred2268    bool operator()(const cmatch& m)
269    {
270       pe->pdata->m = m;
271       v.push_back(std::string(m[0].first, m[0].second));
272       return true;
273    }
274 private:
275    pred2& operator=(const pred2&);
276 };
277 }
278 
Grep(std::vector<std::string> & v,const char * p,match_flag_type flags)279 unsigned int RegEx::Grep(std::vector<std::string>& v, const char* p, match_flag_type flags)
280 {
281    pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc;
282    pdata->pbase = p;
283    const char* end = p;
284    while(*end)++end;
285 
286    unsigned int result = regex_grep(BOOST_REGEX_DETAIL_NS::pred2(v, this), p, end, pdata->e, flags);
287    if(result)
288       pdata->update();
289    return result;
290 }
291 namespace BOOST_REGEX_DETAIL_NS{
292 struct pred3
293 {
294    std::vector<std::size_t>& v;
295    const char* base;
296    RegEx* pe;
pred3boost::BOOST_REGEX_DETAIL_NS::pred3297    pred3(std::vector<std::size_t>& o, const char* pb, RegEx* p) : v(o), base(pb), pe(p) {}
operator ()boost::BOOST_REGEX_DETAIL_NS::pred3298    bool operator()(const cmatch& m)
299    {
300       pe->pdata->m = m;
301       v.push_back(static_cast<std::size_t>(m[0].first - base));
302       return true;
303    }
304 private:
305    pred3& operator=(const pred3&);
306 };
307 }
Grep(std::vector<std::size_t> & v,const char * p,match_flag_type flags)308 unsigned int RegEx::Grep(std::vector<std::size_t>& v, const char* p, match_flag_type flags)
309 {
310    pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pc;
311    pdata->pbase = p;
312    const char* end = p;
313    while(*end)++end;
314 
315    unsigned int result = regex_grep(BOOST_REGEX_DETAIL_NS::pred3(v, p, this), p, end, pdata->e, flags);
316    if(result)
317       pdata->update();
318    return result;
319 }
320 #ifndef BOOST_REGEX_NO_FILEITER
321 namespace BOOST_REGEX_DETAIL_NS{
322 struct pred4
323 {
324    GrepFileCallback cb;
325    RegEx* pe;
326    const char* file;
327    bool ok;
pred4boost::BOOST_REGEX_DETAIL_NS::pred4328    pred4(GrepFileCallback c, RegEx* i, const char* f) : cb(c), pe(i), file(f), ok(true) {}
operator ()boost::BOOST_REGEX_DETAIL_NS::pred4329    bool operator()(const match_results<mapfile::iterator>& m)
330    {
331       pe->pdata->t = RegExData::type_pf;
332       pe->pdata->fm = m;
333       pe->pdata->update();
334       ok = cb(file, *pe);
335       return ok;
336    }
337 };
338 }
339 namespace{
BuildFileList(std::list<std::string> * pl,const char * files,bool recurse)340 void BuildFileList(std::list<std::string>* pl, const char* files, bool recurse)
341 {
342    file_iterator start(files);
343    file_iterator end;
344    if(recurse)
345    {
346       // go through sub directories:
347       char buf[MAX_PATH];
348       BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(buf, MAX_PATH, start.root()));
349       if(*buf == 0)
350       {
351          BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcpy_s(buf, MAX_PATH, "."));
352          BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, directory_iterator::separator()));
353          BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, "*"));
354       }
355       else
356       {
357          BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, directory_iterator::separator()));
358          BOOST_REGEX_DETAIL_NS::overflow_error_if_not_zero(BOOST_REGEX_DETAIL_NS::strcat_s(buf, MAX_PATH, "*"));
359       }
360       directory_iterator dstart(buf);
361       directory_iterator dend;
362 
363       // now get the file mask bit of "files":
364       const char* ptr = files;
365       while(*ptr) ++ptr;
366       while((ptr != files) && (*ptr != *directory_iterator::separator()) && (*ptr != '/'))--ptr;
367       if(ptr != files) ++ptr;
368 
369       while(dstart != dend)
370       {
371          // Verify that sprintf will not overflow:
372          if(std::strlen(dstart.path()) + std::strlen(directory_iterator::separator()) + std::strlen(ptr) >= MAX_PATH)
373          {
374             // Oops overflow, skip this item:
375             ++dstart;
376             continue;
377          }
378 #if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) && !defined(_WIN32_WCE) && !defined(UNDER_CE)
379          int r = (::sprintf_s)(buf, sizeof(buf), "%s%s%s", dstart.path(), directory_iterator::separator(), ptr);
380 #else
381          int r = (std::sprintf)(buf, "%s%s%s", dstart.path(), directory_iterator::separator(), ptr);
382 #endif
383          if(r < 0)
384          {
385             // sprintf failed, skip this item:
386             ++dstart;
387             continue;
388          }
389          BuildFileList(pl, buf, recurse);
390          ++dstart;
391       }
392    }
393    while(start != end)
394    {
395       pl->push_back(*start);
396       ++start;
397    }
398 }
399 }
400 
GrepFiles(GrepFileCallback cb,const char * files,bool recurse,match_flag_type flags)401 unsigned int RegEx::GrepFiles(GrepFileCallback cb, const char* files, bool recurse, match_flag_type flags)
402 {
403    unsigned int result = 0;
404    std::list<std::string> file_list;
405    BuildFileList(&file_list, files, recurse);
406    std::list<std::string>::iterator start, end;
407    start = file_list.begin();
408    end = file_list.end();
409 
410    while(start != end)
411    {
412       mapfile map((*start).c_str());
413       pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pf;
414       pdata->fbase = map.begin();
415       BOOST_REGEX_DETAIL_NS::pred4 pred(cb, this, (*start).c_str());
416       int r = regex_grep(pred, map.begin(), map.end(), pdata->e, flags);
417       result += r;
418       ++start;
419       pdata->clean();
420       if(pred.ok == false)
421          return result;
422    }
423 
424    return result;
425 }
426 
427 
FindFiles(FindFilesCallback cb,const char * files,bool recurse,match_flag_type flags)428 unsigned int RegEx::FindFiles(FindFilesCallback cb, const char* files, bool recurse, match_flag_type flags)
429 {
430    unsigned int result = 0;
431    std::list<std::string> file_list;
432    BuildFileList(&file_list, files, recurse);
433    std::list<std::string>::iterator start, end;
434    start = file_list.begin();
435    end = file_list.end();
436 
437    while(start != end)
438    {
439       mapfile map((*start).c_str());
440       pdata->t = BOOST_REGEX_DETAIL_NS::RegExData::type_pf;
441       pdata->fbase = map.begin();
442 
443       if(regex_search(map.begin(), map.end(), pdata->fm, pdata->e, flags))
444       {
445          ++result;
446          if(false == cb((*start).c_str()))
447             return result;
448       }
449       //pdata->update();
450       ++start;
451       //pdata->clean();
452    }
453 
454    return result;
455 }
456 #endif
457 
458 #ifdef BOOST_REGEX_V3
459 #define regex_replace regex_merge
460 #endif
461 
Merge(const std::string & in,const std::string & fmt,bool copy,match_flag_type flags)462 std::string RegEx::Merge(const std::string& in, const std::string& fmt,
463                     bool copy, match_flag_type flags)
464 {
465    std::string result;
466    BOOST_REGEX_DETAIL_NS::string_out_iterator<std::string> i(result);
467    if(!copy) flags |= format_no_copy;
468    regex_replace(i, in.begin(), in.end(), pdata->e, fmt.c_str(), flags);
469    return result;
470 }
471 
Merge(const char * in,const char * fmt,bool copy,match_flag_type flags)472 std::string RegEx::Merge(const char* in, const char* fmt,
473                     bool copy, match_flag_type flags)
474 {
475    std::string result;
476    if(!copy) flags |= format_no_copy;
477    BOOST_REGEX_DETAIL_NS::string_out_iterator<std::string> i(result);
478    regex_replace(i, in, in + std::strlen(in), pdata->e, fmt, flags);
479    return result;
480 }
481 
Split(std::vector<std::string> & v,std::string & s,match_flag_type flags,unsigned max_count)482 std::size_t RegEx::Split(std::vector<std::string>& v,
483                       std::string& s,
484                       match_flag_type flags,
485                       unsigned max_count)
486 {
487    return regex_split(std::back_inserter(v), s, pdata->e, flags, max_count);
488 }
489 
490 
491 
492 //
493 // now operators for returning what matched in more detail:
494 //
Position(int i) const495 std::size_t RegEx::Position(int i)const
496 {
497    switch(pdata->t)
498    {
499    case BOOST_REGEX_DETAIL_NS::RegExData::type_pc:
500       return pdata->m[i].matched ? pdata->m[i].first - pdata->pbase : RegEx::npos;
501    case BOOST_REGEX_DETAIL_NS::RegExData::type_pf:
502 #ifndef BOOST_REGEX_NO_FILEITER
503       return pdata->fm[i].matched ? pdata->fm[i].first - pdata->fbase : RegEx::npos;
504 #endif
505    case BOOST_REGEX_DETAIL_NS::RegExData::type_copy:
506       {
507       std::map<int, std::ptrdiff_t, std::less<int> >::iterator pos = pdata->positions.find(i);
508       if(pos == pdata->positions.end())
509          return RegEx::npos;
510       return (*pos).second;
511       }
512    }
513    return RegEx::npos;
514 }
515 
Marks() const516 std::size_t RegEx::Marks()const
517 {
518    return pdata->e.mark_count();
519 }
520 
521 
Length(int i) const522 std::size_t RegEx::Length(int i)const
523 {
524    switch(pdata->t)
525    {
526    case BOOST_REGEX_DETAIL_NS::RegExData::type_pc:
527       return pdata->m[i].matched ? pdata->m[i].second - pdata->m[i].first : RegEx::npos;
528    case BOOST_REGEX_DETAIL_NS::RegExData::type_pf:
529 #ifndef BOOST_REGEX_NO_FILEITER
530       return pdata->fm[i].matched ? pdata->fm[i].second - pdata->fm[i].first : RegEx::npos;
531 #endif
532    case BOOST_REGEX_DETAIL_NS::RegExData::type_copy:
533       {
534       std::map<int, std::string, std::less<int> >::iterator pos = pdata->strings.find(i);
535       if(pos == pdata->strings.end())
536          return RegEx::npos;
537       return (*pos).second.size();
538       }
539    }
540    return RegEx::npos;
541 }
542 
Matched(int i) const543 bool RegEx::Matched(int i)const
544 {
545    switch(pdata->t)
546    {
547    case BOOST_REGEX_DETAIL_NS::RegExData::type_pc:
548       return pdata->m[i].matched;
549    case BOOST_REGEX_DETAIL_NS::RegExData::type_pf:
550 #ifndef BOOST_REGEX_NO_FILEITER
551       return pdata->fm[i].matched;
552 #endif
553    case BOOST_REGEX_DETAIL_NS::RegExData::type_copy:
554       {
555       std::map<int, std::string, std::less<int> >::iterator pos = pdata->strings.find(i);
556       if(pos == pdata->strings.end())
557          return false;
558       return true;
559       }
560    }
561    return false;
562 }
563 
564 
What(int i) const565 std::string RegEx::What(int i)const
566 {
567    std::string result;
568    switch(pdata->t)
569    {
570    case BOOST_REGEX_DETAIL_NS::RegExData::type_pc:
571       if(pdata->m[i].matched)
572          result.assign(pdata->m[i].first, pdata->m[i].second);
573       break;
574    case BOOST_REGEX_DETAIL_NS::RegExData::type_pf:
575       if(pdata->m[i].matched)
576          result.assign(to_string(pdata->m[i].first, pdata->m[i].second));
577       break;
578    case BOOST_REGEX_DETAIL_NS::RegExData::type_copy:
579       {
580       std::map<int, std::string, std::less<int> >::iterator pos = pdata->strings.find(i);
581       if(pos != pdata->strings.end())
582          result = (*pos).second;
583       break;
584       }
585    }
586    return result;
587 }
588 
589 const std::size_t RegEx::npos = ~static_cast<std::size_t>(0);
590 
591 } // namespace boost
592 
593 #if defined(BOOST_BORLANDC) && (BOOST_BORLANDC >= 0x550) && (BOOST_BORLANDC <= 0x551) && !defined(_RWSTD_COMPILE_INSTANTIATE)
594 //
595 // this is an ugly hack to work around an ugly problem:
596 // by default this file will produce unresolved externals during
597 // linking unless _RWSTD_COMPILE_INSTANTIATE is defined (Borland bug).
598 // However if _RWSTD_COMPILE_INSTANTIATE is defined then we get separate
599 // copies of basic_string's static data in the RTL and this DLL, this messes
600 // with basic_string's memory management and results in run-time crashes,
601 // Oh sweet joy of Catch 22....
602 //
603 namespace std{
604 template<> template<>
605 basic_string<char>& BOOST_REGEX_DECL
replace(char * f1,char * f2,const char * i1,const char * i2)606 basic_string<char>::replace<const char*>(char* f1, char* f2, const char* i1, const char* i2)
607 {
608    unsigned insert_pos = f1 - begin();
609    unsigned remove_len = f2 - f1;
610    unsigned insert_len = i2 - i1;
611    unsigned org_size = size();
612    if(insert_len > remove_len)
613    {
614       append(insert_len-remove_len, ' ');
615       std::copy_backward(begin() + insert_pos + remove_len, begin() + org_size, end());
616       std::copy(i1, i2, begin() + insert_pos);
617    }
618    else
619    {
620       std::copy(begin() + insert_pos + remove_len, begin() + org_size, begin() + insert_pos + insert_len);
621       std::copy(i1, i2, begin() + insert_pos);
622       erase(size() + insert_len - remove_len);
623    }
624    return *this;
625 }
626 template<> template<>
627 basic_string<wchar_t>& BOOST_REGEX_DECL
replace(wchar_t * f1,wchar_t * f2,const wchar_t * i1,const wchar_t * i2)628 basic_string<wchar_t>::replace<const wchar_t*>(wchar_t* f1, wchar_t* f2, const wchar_t* i1, const wchar_t* i2)
629 {
630    unsigned insert_pos = f1 - begin();
631    unsigned remove_len = f2 - f1;
632    unsigned insert_len = i2 - i1;
633    unsigned org_size = size();
634    if(insert_len > remove_len)
635    {
636       append(insert_len-remove_len, ' ');
637       std::copy_backward(begin() + insert_pos + remove_len, begin() + org_size, end());
638       std::copy(i1, i2, begin() + insert_pos);
639    }
640    else
641    {
642       std::copy(begin() + insert_pos + remove_len, begin() + org_size, begin() + insert_pos + insert_len);
643       std::copy(i1, i2, begin() + insert_pos);
644       erase(size() + insert_len - remove_len);
645    }
646    return *this;
647 }
648 } // namespace std
649 #endif
650 
651 #endif
652 
653 
654 
655 
656 
657 
658 
659 
660 
661 
662 
663 
664 
665 
666 
667 
668