1 /*
2  *
3  * Copyright (c) 2004 John Maddock
4  * Copyright 2011 Garmin Ltd. or its subsidiaries
5  *
6  * Use, modification and distribution are subject to the
7  * Boost Software License, Version 1.0. (See accompanying file
8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9  *
10  */
11 
12  /*
13   *   LOCATION:    see http://www.boost.org for most recent version.
14   *   FILE         cpp_regex_traits.hpp
15   *   VERSION      see <boost/version.hpp>
16   *   DESCRIPTION: Declares regular expression traits class cpp_regex_traits.
17   */
18 
19 #ifndef BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED
20 #define BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED
21 
22 #include <boost/config.hpp>
23 #include <boost/integer.hpp>
24 #include <boost/type_traits/make_unsigned.hpp>
25 
26 #ifndef BOOST_NO_STD_LOCALE
27 
28 #ifndef BOOST_RE_PAT_EXCEPT_HPP
29 #include <boost/regex/pattern_except.hpp>
30 #endif
31 #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
32 #include <boost/regex/v4/regex_traits_defaults.hpp>
33 #endif
34 #ifdef BOOST_HAS_THREADS
35 #include <boost/regex/pending/static_mutex.hpp>
36 #endif
37 #ifndef BOOST_REGEX_PRIMARY_TRANSFORM
38 #include <boost/regex/v4/primary_transform.hpp>
39 #endif
40 #ifndef BOOST_REGEX_OBJECT_CACHE_HPP
41 #include <boost/regex/pending/object_cache.hpp>
42 #endif
43 
44 #include <istream>
45 #include <ios>
46 #include <climits>
47 
48 #ifdef BOOST_MSVC
49 #pragma warning(push)
50 #pragma warning(disable: 4103)
51 #endif
52 #ifdef BOOST_HAS_ABI_HEADERS
53 #  include BOOST_ABI_PREFIX
54 #endif
55 #ifdef BOOST_MSVC
56 #pragma warning(pop)
57 #endif
58 
59 #ifdef BOOST_MSVC
60 #pragma warning(push)
61 #pragma warning(disable:4786 4251)
62 #endif
63 
64 namespace boost{
65 
66 //
67 // forward declaration is needed by some compilers:
68 //
69 template <class charT>
70 class cpp_regex_traits;
71 
72 namespace re_detail{
73 
74 //
75 // class parser_buf:
76 // acts as a stream buffer which wraps around a pair of pointers:
77 //
78 template <class charT,
79           class traits = ::std::char_traits<charT> >
80 class parser_buf : public ::std::basic_streambuf<charT, traits>
81 {
82    typedef ::std::basic_streambuf<charT, traits> base_type;
83    typedef typename base_type::int_type int_type;
84    typedef typename base_type::char_type char_type;
85    typedef typename base_type::pos_type pos_type;
86    typedef ::std::streamsize streamsize;
87    typedef typename base_type::off_type off_type;
88 public:
parser_buf()89    parser_buf() : base_type() { setbuf(0, 0); }
getnext()90    const charT* getnext() { return this->gptr(); }
91 protected:
92    std::basic_streambuf<charT, traits>* setbuf(char_type* s, streamsize n);
93    typename parser_buf<charT, traits>::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which);
94    typename parser_buf<charT, traits>::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which);
95 private:
96    parser_buf& operator=(const parser_buf&);
97    parser_buf(const parser_buf&);
98 };
99 
100 template<class charT, class traits>
101 std::basic_streambuf<charT, traits>*
setbuf(char_type * s,streamsize n)102 parser_buf<charT, traits>::setbuf(char_type* s, streamsize n)
103 {
104    this->setg(s, s, s + n);
105    return this;
106 }
107 
108 template<class charT, class traits>
109 typename parser_buf<charT, traits>::pos_type
seekoff(off_type off,::std::ios_base::seekdir way,::std::ios_base::openmode which)110 parser_buf<charT, traits>::seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which)
111 {
112    typedef typename boost::int_t<sizeof(way) * CHAR_BIT>::least cast_type;
113 
114    if(which & ::std::ios_base::out)
115       return pos_type(off_type(-1));
116    std::ptrdiff_t size = this->egptr() - this->eback();
117    std::ptrdiff_t pos = this->gptr() - this->eback();
118    charT* g = this->eback();
119    switch(static_cast<cast_type>(way))
120    {
121    case ::std::ios_base::beg:
122       if((off < 0) || (off > size))
123          return pos_type(off_type(-1));
124       else
125          this->setg(g, g + off, g + size);
126       break;
127    case ::std::ios_base::end:
128       if((off < 0) || (off > size))
129          return pos_type(off_type(-1));
130       else
131          this->setg(g, g + size - off, g + size);
132       break;
133    case ::std::ios_base::cur:
134    {
135       std::ptrdiff_t newpos = static_cast<std::ptrdiff_t>(pos + off);
136       if((newpos < 0) || (newpos > size))
137          return pos_type(off_type(-1));
138       else
139          this->setg(g, g + newpos, g + size);
140       break;
141    }
142    default: ;
143    }
144 #ifdef BOOST_MSVC
145 #pragma warning(push)
146 #pragma warning(disable:4244)
147 #endif
148    return static_cast<pos_type>(this->gptr() - this->eback());
149 #ifdef BOOST_MSVC
150 #pragma warning(pop)
151 #endif
152 }
153 
154 template<class charT, class traits>
155 typename parser_buf<charT, traits>::pos_type
seekpos(pos_type sp,::std::ios_base::openmode which)156 parser_buf<charT, traits>::seekpos(pos_type sp, ::std::ios_base::openmode which)
157 {
158    if(which & ::std::ios_base::out)
159       return pos_type(off_type(-1));
160    off_type size = static_cast<off_type>(this->egptr() - this->eback());
161    charT* g = this->eback();
162    if(off_type(sp) <= size)
163    {
164       this->setg(g, g + off_type(sp), g + size);
165    }
166    return pos_type(off_type(-1));
167 }
168 
169 //
170 // class cpp_regex_traits_base:
171 // acts as a container for locale and the facets we are using.
172 //
173 template <class charT>
174 struct cpp_regex_traits_base
175 {
cpp_regex_traits_baseboost::re_detail::cpp_regex_traits_base176    cpp_regex_traits_base(const std::locale& l)
177    { imbue(l); }
178    std::locale imbue(const std::locale& l);
179 
180    std::locale m_locale;
181    std::ctype<charT> const* m_pctype;
182 #ifndef BOOST_NO_STD_MESSAGES
183    std::messages<charT> const* m_pmessages;
184 #endif
185    std::collate<charT> const* m_pcollate;
186 
operator <boost::re_detail::cpp_regex_traits_base187    bool operator<(const cpp_regex_traits_base& b)const
188    {
189       if(m_pctype == b.m_pctype)
190       {
191 #ifndef BOOST_NO_STD_MESSAGES
192          if(m_pmessages == b.m_pmessages)
193          {
194             return m_pcollate < b.m_pcollate;
195          }
196          return m_pmessages < b.m_pmessages;
197 #else
198          return m_pcollate < b.m_pcollate;
199 #endif
200       }
201       return m_pctype < b.m_pctype;
202    }
operator ==boost::re_detail::cpp_regex_traits_base203    bool operator==(const cpp_regex_traits_base& b)const
204    {
205       return (m_pctype == b.m_pctype)
206 #ifndef BOOST_NO_STD_MESSAGES
207          && (m_pmessages == b.m_pmessages)
208 #endif
209          && (m_pcollate == b.m_pcollate);
210    }
211 };
212 
213 template <class charT>
imbue(const std::locale & l)214 std::locale cpp_regex_traits_base<charT>::imbue(const std::locale& l)
215 {
216    std::locale result(m_locale);
217    m_locale = l;
218    m_pctype = &BOOST_USE_FACET(std::ctype<charT>, l);
219 #ifndef BOOST_NO_STD_MESSAGES
220    m_pmessages = BOOST_HAS_FACET(std::messages<charT>, l) ? &BOOST_USE_FACET(std::messages<charT>, l) : 0;
221 #endif
222    m_pcollate = &BOOST_USE_FACET(std::collate<charT>, l);
223    return result;
224 }
225 
226 //
227 // class cpp_regex_traits_char_layer:
228 // implements methods that require specialisation for narrow characters:
229 //
230 template <class charT>
231 class cpp_regex_traits_char_layer : public cpp_regex_traits_base<charT>
232 {
233    typedef std::basic_string<charT> string_type;
234    typedef std::map<charT, regex_constants::syntax_type> map_type;
235    typedef typename map_type::const_iterator map_iterator_type;
236 public:
cpp_regex_traits_char_layer(const std::locale & l)237    cpp_regex_traits_char_layer(const std::locale& l)
238       : cpp_regex_traits_base<charT>(l)
239    {
240       init();
241    }
cpp_regex_traits_char_layer(const cpp_regex_traits_base<charT> & b)242    cpp_regex_traits_char_layer(const cpp_regex_traits_base<charT>& b)
243       : cpp_regex_traits_base<charT>(b)
244    {
245       init();
246    }
247    void init();
248 
syntax_type(charT c) const249    regex_constants::syntax_type syntax_type(charT c)const
250    {
251       map_iterator_type i = m_char_map.find(c);
252       return ((i == m_char_map.end()) ? 0 : i->second);
253    }
escape_syntax_type(charT c) const254    regex_constants::escape_syntax_type escape_syntax_type(charT c) const
255    {
256       map_iterator_type i = m_char_map.find(c);
257       if(i == m_char_map.end())
258       {
259          if(this->m_pctype->is(std::ctype_base::lower, c)) return regex_constants::escape_type_class;
260          if(this->m_pctype->is(std::ctype_base::upper, c)) return regex_constants::escape_type_not_class;
261          return 0;
262       }
263       return i->second;
264    }
265 
266 private:
267    string_type get_default_message(regex_constants::syntax_type);
268    // TODO: use a hash table when available!
269    map_type m_char_map;
270 };
271 
272 template <class charT>
init()273 void cpp_regex_traits_char_layer<charT>::init()
274 {
275    // we need to start by initialising our syntax map so we know which
276    // character is used for which purpose:
277 #ifndef BOOST_NO_STD_MESSAGES
278 #ifndef __IBMCPP__
279    typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
280 #else
281    typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
282 #endif
283    std::string cat_name(cpp_regex_traits<charT>::get_catalog_name());
284    if(cat_name.size() && (this->m_pmessages != 0))
285    {
286       cat = this->m_pmessages->open(
287          cat_name,
288          this->m_locale);
289       if((int)cat < 0)
290       {
291          std::string m("Unable to open message catalog: ");
292          std::runtime_error err(m + cat_name);
293          boost::re_detail::raise_runtime_error(err);
294       }
295    }
296    //
297    // if we have a valid catalog then load our messages:
298    //
299    if((int)cat >= 0)
300    {
301 #ifndef BOOST_NO_EXCEPTIONS
302       try{
303 #endif
304          for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
305          {
306             string_type mss = this->m_pmessages->get(cat, 0, i, get_default_message(i));
307             for(typename string_type::size_type j = 0; j < mss.size(); ++j)
308             {
309                m_char_map[mss[j]] = i;
310             }
311          }
312          this->m_pmessages->close(cat);
313 #ifndef BOOST_NO_EXCEPTIONS
314       }
315       catch(...)
316       {
317          if(this->m_pmessages)
318             this->m_pmessages->close(cat);
319          throw;
320       }
321 #endif
322    }
323    else
324    {
325 #endif
326       for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
327       {
328          const char* ptr = get_default_syntax(i);
329          while(ptr && *ptr)
330          {
331             m_char_map[this->m_pctype->widen(*ptr)] = i;
332             ++ptr;
333          }
334       }
335 #ifndef BOOST_NO_STD_MESSAGES
336    }
337 #endif
338 }
339 
340 template <class charT>
341 typename cpp_regex_traits_char_layer<charT>::string_type
get_default_message(regex_constants::syntax_type i)342    cpp_regex_traits_char_layer<charT>::get_default_message(regex_constants::syntax_type i)
343 {
344    const char* ptr = get_default_syntax(i);
345    string_type result;
346    while(ptr && *ptr)
347    {
348       result.append(1, this->m_pctype->widen(*ptr));
349       ++ptr;
350    }
351    return result;
352 }
353 
354 //
355 // specialised version for narrow characters:
356 //
357 template <>
358 class BOOST_REGEX_DECL cpp_regex_traits_char_layer<char> : public cpp_regex_traits_base<char>
359 {
360    typedef std::string string_type;
361 public:
cpp_regex_traits_char_layer(const std::locale & l)362    cpp_regex_traits_char_layer(const std::locale& l)
363    : cpp_regex_traits_base<char>(l)
364    {
365       init();
366    }
cpp_regex_traits_char_layer(const cpp_regex_traits_base<char> & l)367    cpp_regex_traits_char_layer(const cpp_regex_traits_base<char>& l)
368    : cpp_regex_traits_base<char>(l)
369    {
370       init();
371    }
372 
syntax_type(char c) const373    regex_constants::syntax_type syntax_type(char c)const
374    {
375       return m_char_map[static_cast<unsigned char>(c)];
376    }
escape_syntax_type(char c) const377    regex_constants::escape_syntax_type escape_syntax_type(char c) const
378    {
379       return m_char_map[static_cast<unsigned char>(c)];
380    }
381 
382 private:
383    regex_constants::syntax_type m_char_map[1u << CHAR_BIT];
384    void init();
385 };
386 
387 #ifdef BOOST_REGEX_BUGGY_CTYPE_FACET
388 enum
389 {
390    char_class_space=1<<0,
391    char_class_print=1<<1,
392    char_class_cntrl=1<<2,
393    char_class_upper=1<<3,
394    char_class_lower=1<<4,
395    char_class_alpha=1<<5,
396    char_class_digit=1<<6,
397    char_class_punct=1<<7,
398    char_class_xdigit=1<<8,
399    char_class_alnum=char_class_alpha|char_class_digit,
400    char_class_graph=char_class_alnum|char_class_punct,
401    char_class_blank=1<<9,
402    char_class_word=1<<10,
403    char_class_unicode=1<<11,
404    char_class_horizontal_space=1<<12,
405    char_class_vertical_space=1<<13
406 };
407 
408 #endif
409 
410 //
411 // class cpp_regex_traits_implementation:
412 // provides pimpl implementation for cpp_regex_traits.
413 //
414 template <class charT>
415 class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer<charT>
416 {
417 public:
418    typedef typename cpp_regex_traits<charT>::char_class_type char_class_type;
419    typedef typename std::ctype<charT>::mask                  native_mask_type;
420 #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
421    BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 24);
422    BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 25);
423    BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 26);
424    BOOST_STATIC_CONSTANT(char_class_type, mask_horizontal = 1u << 27);
425    BOOST_STATIC_CONSTANT(char_class_type, mask_vertical = 1u << 28);
426 #endif
427 
428    typedef std::basic_string<charT> string_type;
429    typedef charT char_type;
430    //cpp_regex_traits_implementation();
cpp_regex_traits_implementation(const std::locale & l)431    cpp_regex_traits_implementation(const std::locale& l)
432       : cpp_regex_traits_char_layer<charT>(l)
433    {
434       init();
435    }
cpp_regex_traits_implementation(const cpp_regex_traits_base<charT> & l)436    cpp_regex_traits_implementation(const cpp_regex_traits_base<charT>& l)
437       : cpp_regex_traits_char_layer<charT>(l)
438    {
439       init();
440    }
error_string(regex_constants::error_type n) const441    std::string error_string(regex_constants::error_type n) const
442    {
443       if(!m_error_strings.empty())
444       {
445          std::map<int, std::string>::const_iterator p = m_error_strings.find(n);
446          return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second;
447       }
448       return get_default_error_string(n);
449    }
lookup_classname(const charT * p1,const charT * p2) const450    char_class_type lookup_classname(const charT* p1, const charT* p2) const
451    {
452       char_class_type result = lookup_classname_imp(p1, p2);
453       if(result == 0)
454       {
455          string_type temp(p1, p2);
456          this->m_pctype->tolower(&*temp.begin(), &*temp.begin() + temp.size());
457          result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size());
458       }
459       return result;
460    }
461    string_type lookup_collatename(const charT* p1, const charT* p2) const;
462    string_type transform_primary(const charT* p1, const charT* p2) const;
463    string_type transform(const charT* p1, const charT* p2) const;
464 private:
465    std::map<int, std::string>     m_error_strings;   // error messages indexed by numberic ID
466    std::map<string_type, char_class_type>  m_custom_class_names; // character class names
467    std::map<string_type, string_type>      m_custom_collate_names; // collating element names
468    unsigned                       m_collate_type;    // the form of the collation string
469    charT                          m_collate_delim;   // the collation group delimiter
470    //
471    // helpers:
472    //
473    char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const;
474    void init();
475 #ifdef BOOST_REGEX_BUGGY_CTYPE_FACET
476 public:
477    bool isctype(charT c, char_class_type m)const;
478 #endif
479 };
480 
481 #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
482 #if !defined(BOOST_NO_INCLASS_MEMBER_INITIALIZATION)
483 
484 template <class charT>
485 typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_blank;
486 template <class charT>
487 typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_word;
488 template <class charT>
489 typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_unicode;
490 template <class charT>
491 typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_vertical;
492 template <class charT>
493 typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_horizontal;
494 
495 #endif
496 #endif
497 
498 template <class charT>
499 typename cpp_regex_traits_implementation<charT>::string_type
transform_primary(const charT * p1,const charT * p2) const500    cpp_regex_traits_implementation<charT>::transform_primary(const charT* p1, const charT* p2) const
501 {
502    //
503    // PRECONDITIONS:
504    //
505    // A bug in gcc 3.2 (and maybe other versions as well) treats
506    // p1 as a null terminated string, for efficiency reasons
507    // we work around this elsewhere, but just assert here that
508    // we adhere to gcc's (buggy) preconditions...
509    //
510    BOOST_ASSERT(*p2 == 0);
511    string_type result;
512 #if defined(_CPPLIB_VER)
513    //
514    // A bug in VC11 and 12 causes the program to hang if we pass a null-string
515    // to std::collate::transform, but only for certain locales :-(
516    // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
517    //
518    if(*p1 == 0)
519    {
520       return string_type(1, charT(0));
521    }
522 #endif
523    //
524    // swallowing all exceptions here is a bad idea
525    // however at least one std lib will always throw
526    // std::bad_alloc for certain arguments...
527    //
528 #ifndef BOOST_NO_EXCEPTIONS
529    try{
530 #endif
531       //
532       // What we do here depends upon the format of the sort key returned by
533       // sort key returned by this->transform:
534       //
535       switch(m_collate_type)
536       {
537       case sort_C:
538       case sort_unknown:
539          // the best we can do is translate to lower case, then get a regular sort key:
540          {
541             result.assign(p1, p2);
542             this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size());
543             result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size());
544             break;
545          }
546       case sort_fixed:
547          {
548             // get a regular sort key, and then truncate it:
549             result.assign(this->m_pcollate->transform(p1, p2));
550             result.erase(this->m_collate_delim);
551             break;
552          }
553       case sort_delim:
554             // get a regular sort key, and then truncate everything after the delim:
555             result.assign(this->m_pcollate->transform(p1, p2));
556             std::size_t i;
557             for(i = 0; i < result.size(); ++i)
558             {
559                if(result[i] == m_collate_delim)
560                   break;
561             }
562             result.erase(i);
563             break;
564       }
565 #ifndef BOOST_NO_EXCEPTIONS
566    }catch(...){}
567 #endif
568    while(result.size() && (charT(0) == *result.rbegin()))
569       result.erase(result.size() - 1);
570    if(result.empty())
571    {
572       // character is ignorable at the primary level:
573       result = string_type(1, charT(0));
574    }
575    return result;
576 }
577 
578 template <class charT>
579 typename cpp_regex_traits_implementation<charT>::string_type
transform(const charT * p1,const charT * p2) const580    cpp_regex_traits_implementation<charT>::transform(const charT* p1, const charT* p2) const
581 {
582    //
583    // PRECONDITIONS:
584    //
585    // A bug in gcc 3.2 (and maybe other versions as well) treats
586    // p1 as a null terminated string, for efficiency reasons
587    // we work around this elsewhere, but just assert here that
588    // we adhere to gcc's (buggy) preconditions...
589    //
590    BOOST_ASSERT(*p2 == 0);
591    //
592    // swallowing all exceptions here is a bad idea
593    // however at least one std lib will always throw
594    // std::bad_alloc for certain arguments...
595    //
596    string_type result, result2;
597 #if defined(_CPPLIB_VER)
598    //
599    // A bug in VC11 and 12 causes the program to hang if we pass a null-string
600    // to std::collate::transform, but only for certain locales :-(
601    // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
602    //
603    if(*p1 == 0)
604    {
605       return result;
606    }
607 #endif
608 #ifndef BOOST_NO_EXCEPTIONS
609    try{
610 #endif
611       result = this->m_pcollate->transform(p1, p2);
612       //
613       // Borland's STLPort version returns a NULL-terminated
614       // string that has garbage at the end - each call to
615       // std::collate<wchar_t>::transform returns a different string!
616       // So as a workaround, we'll truncate the string at the first NULL
617       // which _seems_ to work....
618 #if BOOST_WORKAROUND(__BORLANDC__, < 0x580)
619       result.erase(result.find(charT(0)));
620 #else
621       //
622       // some implementations (Dinkumware) append unnecessary trailing \0's:
623       while(result.size() && (charT(0) == *result.rbegin()))
624          result.erase(result.size() - 1);
625 #endif
626       //
627       // We may have NULL's used as separators between sections of the collate string,
628       // an example would be Boost.Locale.  We have no way to detect this case via
629       // #defines since this can be used with any compiler/platform combination.
630       // Unfortunately our state machine (which was devised when all implementations
631       // used underlying C language API's) can't cope with that case.  One workaround
632       // is to replace each character with 2, fortunately this code isn't used that
633       // much as this is now slower than before :-(
634       //
635       typedef typename make_unsigned<charT>::type uchar_type;
636       result2.reserve(result.size() * 2 + 2);
637       for(unsigned i = 0; i < result.size(); ++i)
638       {
639          if(static_cast<uchar_type>(result[i]) == (std::numeric_limits<uchar_type>::max)())
640          {
641             result2.append(1, charT((std::numeric_limits<uchar_type>::max)())).append(1, charT('b'));
642          }
643          else
644          {
645             result2.append(1, static_cast<charT>(1 + static_cast<uchar_type>(result[i]))).append(1, charT('b') - 1);
646          }
647       }
648       BOOST_ASSERT(std::find(result2.begin(), result2.end(), charT(0)) == result2.end());
649 #ifndef BOOST_NO_EXCEPTIONS
650    }
651    catch(...)
652    {
653    }
654 #endif
655    return result2;
656 }
657 
658 
659 template <class charT>
660 typename cpp_regex_traits_implementation<charT>::string_type
lookup_collatename(const charT * p1,const charT * p2) const661    cpp_regex_traits_implementation<charT>::lookup_collatename(const charT* p1, const charT* p2) const
662 {
663    typedef typename std::map<string_type, string_type>::const_iterator iter_type;
664    if(m_custom_collate_names.size())
665    {
666       iter_type pos = m_custom_collate_names.find(string_type(p1, p2));
667       if(pos != m_custom_collate_names.end())
668          return pos->second;
669    }
670 #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\
671                && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)
672    std::string name(p1, p2);
673 #else
674    std::string name;
675    const charT* p0 = p1;
676    while(p0 != p2)
677       name.append(1, char(*p0++));
678 #endif
679    name = lookup_default_collate_name(name);
680 #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\
681                && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)
682    if(name.size())
683       return string_type(name.begin(), name.end());
684 #else
685    if(name.size())
686    {
687       string_type result;
688       typedef std::string::const_iterator iter;
689       iter b = name.begin();
690       iter e = name.end();
691       while(b != e)
692          result.append(1, charT(*b++));
693       return result;
694    }
695 #endif
696    if(p2 - p1 == 1)
697       return string_type(1, *p1);
698    return string_type();
699 }
700 
701 template <class charT>
init()702 void cpp_regex_traits_implementation<charT>::init()
703 {
704 #ifndef BOOST_NO_STD_MESSAGES
705 #ifndef __IBMCPP__
706    typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
707 #else
708    typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
709 #endif
710    std::string cat_name(cpp_regex_traits<charT>::get_catalog_name());
711    if(cat_name.size() && (this->m_pmessages != 0))
712    {
713       cat = this->m_pmessages->open(
714          cat_name,
715          this->m_locale);
716       if((int)cat < 0)
717       {
718          std::string m("Unable to open message catalog: ");
719          std::runtime_error err(m + cat_name);
720          boost::re_detail::raise_runtime_error(err);
721       }
722    }
723    //
724    // if we have a valid catalog then load our messages:
725    //
726    if((int)cat >= 0)
727    {
728       //
729       // Error messages:
730       //
731       for(boost::regex_constants::error_type i = static_cast<boost::regex_constants::error_type>(0);
732          i <= boost::regex_constants::error_unknown;
733          i = static_cast<boost::regex_constants::error_type>(i + 1))
734       {
735          const char* p = get_default_error_string(i);
736          string_type default_message;
737          while(*p)
738          {
739             default_message.append(1, this->m_pctype->widen(*p));
740             ++p;
741          }
742          string_type s = this->m_pmessages->get(cat, 0, i+200, default_message);
743          std::string result;
744          for(std::string::size_type j = 0; j < s.size(); ++j)
745          {
746             result.append(1, this->m_pctype->narrow(s[j], 0));
747          }
748          m_error_strings[i] = result;
749       }
750       //
751       // Custom class names:
752       //
753 #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
754       static const char_class_type masks[16] =
755       {
756          std::ctype<charT>::alnum,
757          std::ctype<charT>::alpha,
758          std::ctype<charT>::cntrl,
759          std::ctype<charT>::digit,
760          std::ctype<charT>::graph,
761          cpp_regex_traits_implementation<charT>::mask_horizontal,
762          std::ctype<charT>::lower,
763          std::ctype<charT>::print,
764          std::ctype<charT>::punct,
765          std::ctype<charT>::space,
766          std::ctype<charT>::upper,
767          cpp_regex_traits_implementation<charT>::mask_vertical,
768          std::ctype<charT>::xdigit,
769          cpp_regex_traits_implementation<charT>::mask_blank,
770          cpp_regex_traits_implementation<charT>::mask_word,
771          cpp_regex_traits_implementation<charT>::mask_unicode,
772       };
773 #else
774       static const char_class_type masks[16] =
775       {
776          ::boost::re_detail::char_class_alnum,
777          ::boost::re_detail::char_class_alpha,
778          ::boost::re_detail::char_class_cntrl,
779          ::boost::re_detail::char_class_digit,
780          ::boost::re_detail::char_class_graph,
781          ::boost::re_detail::char_class_horizontal_space,
782          ::boost::re_detail::char_class_lower,
783          ::boost::re_detail::char_class_print,
784          ::boost::re_detail::char_class_punct,
785          ::boost::re_detail::char_class_space,
786          ::boost::re_detail::char_class_upper,
787          ::boost::re_detail::char_class_vertical_space,
788          ::boost::re_detail::char_class_xdigit,
789          ::boost::re_detail::char_class_blank,
790          ::boost::re_detail::char_class_word,
791          ::boost::re_detail::char_class_unicode,
792       };
793 #endif
794       static const string_type null_string;
795       for(unsigned int j = 0; j <= 13; ++j)
796       {
797          string_type s(this->m_pmessages->get(cat, 0, j+300, null_string));
798          if(s.size())
799             this->m_custom_class_names[s] = masks[j];
800       }
801    }
802 #endif
803    //
804    // get the collation format used by m_pcollate:
805    //
806    m_collate_type = re_detail::find_sort_syntax(this, &m_collate_delim);
807 }
808 
809 template <class charT>
810 typename cpp_regex_traits_implementation<charT>::char_class_type
lookup_classname_imp(const charT * p1,const charT * p2) const811    cpp_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const
812 {
813 #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
814    static const char_class_type masks[22] =
815    {
816       0,
817       std::ctype<char>::alnum,
818       std::ctype<char>::alpha,
819       cpp_regex_traits_implementation<charT>::mask_blank,
820       std::ctype<char>::cntrl,
821       std::ctype<char>::digit,
822       std::ctype<char>::digit,
823       std::ctype<char>::graph,
824       cpp_regex_traits_implementation<charT>::mask_horizontal,
825       std::ctype<char>::lower,
826       std::ctype<char>::lower,
827       std::ctype<char>::print,
828       std::ctype<char>::punct,
829       std::ctype<char>::space,
830       std::ctype<char>::space,
831       std::ctype<char>::upper,
832       cpp_regex_traits_implementation<charT>::mask_unicode,
833       std::ctype<char>::upper,
834       cpp_regex_traits_implementation<charT>::mask_vertical,
835       std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
836       std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
837       std::ctype<char>::xdigit,
838    };
839 #else
840    static const char_class_type masks[22] =
841    {
842       0,
843       ::boost::re_detail::char_class_alnum,
844       ::boost::re_detail::char_class_alpha,
845       ::boost::re_detail::char_class_blank,
846       ::boost::re_detail::char_class_cntrl,
847       ::boost::re_detail::char_class_digit,
848       ::boost::re_detail::char_class_digit,
849       ::boost::re_detail::char_class_graph,
850       ::boost::re_detail::char_class_horizontal_space,
851       ::boost::re_detail::char_class_lower,
852       ::boost::re_detail::char_class_lower,
853       ::boost::re_detail::char_class_print,
854       ::boost::re_detail::char_class_punct,
855       ::boost::re_detail::char_class_space,
856       ::boost::re_detail::char_class_space,
857       ::boost::re_detail::char_class_upper,
858       ::boost::re_detail::char_class_unicode,
859       ::boost::re_detail::char_class_upper,
860       ::boost::re_detail::char_class_vertical_space,
861       ::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word,
862       ::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word,
863       ::boost::re_detail::char_class_xdigit,
864    };
865 #endif
866    if(m_custom_class_names.size())
867    {
868       typedef typename std::map<std::basic_string<charT>, char_class_type>::const_iterator map_iter;
869       map_iter pos = m_custom_class_names.find(string_type(p1, p2));
870       if(pos != m_custom_class_names.end())
871          return pos->second;
872    }
873    std::size_t state_id = 1 + re_detail::get_default_class_id(p1, p2);
874    BOOST_ASSERT(state_id < sizeof(masks) / sizeof(masks[0]));
875    return masks[state_id];
876 }
877 
878 #ifdef BOOST_REGEX_BUGGY_CTYPE_FACET
879 template <class charT>
isctype(const charT c,char_class_type mask) const880 bool cpp_regex_traits_implementation<charT>::isctype(const charT c, char_class_type mask) const
881 {
882    return
883       ((mask & ::boost::re_detail::char_class_space) && (this->m_pctype->is(std::ctype<charT>::space, c)))
884       || ((mask & ::boost::re_detail::char_class_print) && (this->m_pctype->is(std::ctype<charT>::print, c)))
885       || ((mask & ::boost::re_detail::char_class_cntrl) && (this->m_pctype->is(std::ctype<charT>::cntrl, c)))
886       || ((mask & ::boost::re_detail::char_class_upper) && (this->m_pctype->is(std::ctype<charT>::upper, c)))
887       || ((mask & ::boost::re_detail::char_class_lower) && (this->m_pctype->is(std::ctype<charT>::lower, c)))
888       || ((mask & ::boost::re_detail::char_class_alpha) && (this->m_pctype->is(std::ctype<charT>::alpha, c)))
889       || ((mask & ::boost::re_detail::char_class_digit) && (this->m_pctype->is(std::ctype<charT>::digit, c)))
890       || ((mask & ::boost::re_detail::char_class_punct) && (this->m_pctype->is(std::ctype<charT>::punct, c)))
891       || ((mask & ::boost::re_detail::char_class_xdigit) && (this->m_pctype->is(std::ctype<charT>::xdigit, c)))
892       || ((mask & ::boost::re_detail::char_class_blank) && (this->m_pctype->is(std::ctype<charT>::space, c)) && !::boost::re_detail::is_separator(c))
893       || ((mask & ::boost::re_detail::char_class_word) && (c == '_'))
894       || ((mask & ::boost::re_detail::char_class_unicode) && ::boost::re_detail::is_extended(c))
895       || ((mask & ::boost::re_detail::char_class_vertical_space) && (is_separator(c) || (c == '\v')))
896       || ((mask & ::boost::re_detail::char_class_horizontal_space) && this->m_pctype->is(std::ctype<charT>::space, c) && !(is_separator(c) || (c == '\v')));
897 }
898 #endif
899 
900 
901 template <class charT>
create_cpp_regex_traits(const std::locale & l)902 inline boost::shared_ptr<const cpp_regex_traits_implementation<charT> > create_cpp_regex_traits(const std::locale& l)
903 {
904    cpp_regex_traits_base<charT> key(l);
905    return ::boost::object_cache<cpp_regex_traits_base<charT>, cpp_regex_traits_implementation<charT> >::get(key, 5);
906 }
907 
908 } // re_detail
909 
910 template <class charT>
911 class cpp_regex_traits
912 {
913 private:
914    typedef std::ctype<charT>            ctype_type;
915 public:
916    typedef charT                        char_type;
917    typedef std::size_t                  size_type;
918    typedef std::basic_string<char_type> string_type;
919    typedef std::locale                  locale_type;
920    typedef boost::uint_least32_t        char_class_type;
921 
922    struct boost_extensions_tag{};
923 
cpp_regex_traits()924    cpp_regex_traits()
925       : m_pimpl(re_detail::create_cpp_regex_traits<charT>(std::locale()))
926    { }
length(const char_type * p)927    static size_type length(const char_type* p)
928    {
929       return std::char_traits<charT>::length(p);
930    }
syntax_type(charT c) const931    regex_constants::syntax_type syntax_type(charT c)const
932    {
933       return m_pimpl->syntax_type(c);
934    }
escape_syntax_type(charT c) const935    regex_constants::escape_syntax_type escape_syntax_type(charT c) const
936    {
937       return m_pimpl->escape_syntax_type(c);
938    }
translate(charT c) const939    charT translate(charT c) const
940    {
941       return c;
942    }
translate_nocase(charT c) const943    charT translate_nocase(charT c) const
944    {
945       return m_pimpl->m_pctype->tolower(c);
946    }
translate(charT c,bool icase) const947    charT translate(charT c, bool icase) const
948    {
949       return icase ? m_pimpl->m_pctype->tolower(c) : c;
950    }
tolower(charT c) const951    charT tolower(charT c) const
952    {
953       return m_pimpl->m_pctype->tolower(c);
954    }
toupper(charT c) const955    charT toupper(charT c) const
956    {
957       return m_pimpl->m_pctype->toupper(c);
958    }
transform(const charT * p1,const charT * p2) const959    string_type transform(const charT* p1, const charT* p2) const
960    {
961       return m_pimpl->transform(p1, p2);
962    }
transform_primary(const charT * p1,const charT * p2) const963    string_type transform_primary(const charT* p1, const charT* p2) const
964    {
965       return m_pimpl->transform_primary(p1, p2);
966    }
lookup_classname(const charT * p1,const charT * p2) const967    char_class_type lookup_classname(const charT* p1, const charT* p2) const
968    {
969       return m_pimpl->lookup_classname(p1, p2);
970    }
lookup_collatename(const charT * p1,const charT * p2) const971    string_type lookup_collatename(const charT* p1, const charT* p2) const
972    {
973       return m_pimpl->lookup_collatename(p1, p2);
974    }
isctype(charT c,char_class_type f) const975    bool isctype(charT c, char_class_type f) const
976    {
977 #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
978       typedef typename std::ctype<charT>::mask ctype_mask;
979 
980       static const ctype_mask mask_base =
981          static_cast<ctype_mask>(
982             std::ctype<charT>::alnum
983             | std::ctype<charT>::alpha
984             | std::ctype<charT>::cntrl
985             | std::ctype<charT>::digit
986             | std::ctype<charT>::graph
987             | std::ctype<charT>::lower
988             | std::ctype<charT>::print
989             | std::ctype<charT>::punct
990             | std::ctype<charT>::space
991             | std::ctype<charT>::upper
992             | std::ctype<charT>::xdigit);
993 
994       if((f & mask_base)
995          && (m_pimpl->m_pctype->is(
996             static_cast<ctype_mask>(f & mask_base), c)))
997          return true;
998       else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_unicode) && re_detail::is_extended(c))
999          return true;
1000       else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_word) && (c == '_'))
1001          return true;
1002       else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_blank)
1003          && m_pimpl->m_pctype->is(std::ctype<charT>::space, c)
1004          && !re_detail::is_separator(c))
1005          return true;
1006       else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_vertical)
1007          && (::boost::re_detail::is_separator(c) || (c == '\v')))
1008          return true;
1009       else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_horizontal)
1010          && this->isctype(c, std::ctype<charT>::space) && !this->isctype(c, re_detail::cpp_regex_traits_implementation<charT>::mask_vertical))
1011          return true;
1012       return false;
1013 #else
1014       return m_pimpl->isctype(c, f);
1015 #endif
1016    }
1017    int toi(const charT*& p1, const charT* p2, int radix)const;
value(charT c,int radix) const1018    int value(charT c, int radix)const
1019    {
1020       const charT* pc = &c;
1021       return toi(pc, pc + 1, radix);
1022    }
imbue(locale_type l)1023    locale_type imbue(locale_type l)
1024    {
1025       std::locale result(getloc());
1026       m_pimpl = re_detail::create_cpp_regex_traits<charT>(l);
1027       return result;
1028    }
getloc() const1029    locale_type getloc()const
1030    {
1031       return m_pimpl->m_locale;
1032    }
error_string(regex_constants::error_type n) const1033    std::string error_string(regex_constants::error_type n) const
1034    {
1035       return m_pimpl->error_string(n);
1036    }
1037 
1038    //
1039    // extension:
1040    // set the name of the message catalog in use (defaults to "boost_regex").
1041    //
1042    static std::string catalog_name(const std::string& name);
1043    static std::string get_catalog_name();
1044 
1045 private:
1046    boost::shared_ptr<const re_detail::cpp_regex_traits_implementation<charT> > m_pimpl;
1047    //
1048    // catalog name handler:
1049    //
1050    static std::string& get_catalog_name_inst();
1051 
1052 #ifdef BOOST_HAS_THREADS
1053    static static_mutex& get_mutex_inst();
1054 #endif
1055 };
1056 
1057 
1058 template <class charT>
toi(const charT * & first,const charT * last,int radix) const1059 int cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int radix)const
1060 {
1061    re_detail::parser_buf<charT>   sbuf;            // buffer for parsing numbers.
1062    std::basic_istream<charT>      is(&sbuf);       // stream for parsing numbers.
1063 
1064    // we do NOT want to parse any thousands separators inside the stream:
1065    last = std::find(first, last, BOOST_USE_FACET(std::numpunct<charT>, is.getloc()).thousands_sep());
1066 
1067    sbuf.pubsetbuf(const_cast<charT*>(static_cast<const charT*>(first)), static_cast<std::streamsize>(last-first));
1068    is.clear();
1069    if(std::abs(radix) == 16) is >> std::hex;
1070    else if(std::abs(radix) == 8) is >> std::oct;
1071    else is >> std::dec;
1072    int val;
1073    if(is >> val)
1074    {
1075       first = first + ((last - first) - sbuf.in_avail());
1076       return val;
1077    }
1078    else
1079       return -1;
1080 }
1081 
1082 template <class charT>
catalog_name(const std::string & name)1083 std::string cpp_regex_traits<charT>::catalog_name(const std::string& name)
1084 {
1085 #ifdef BOOST_HAS_THREADS
1086    static_mutex::scoped_lock lk(get_mutex_inst());
1087 #endif
1088    std::string result(get_catalog_name_inst());
1089    get_catalog_name_inst() = name;
1090    return result;
1091 }
1092 
1093 template <class charT>
get_catalog_name_inst()1094 std::string& cpp_regex_traits<charT>::get_catalog_name_inst()
1095 {
1096    static std::string s_name;
1097    return s_name;
1098 }
1099 
1100 template <class charT>
get_catalog_name()1101 std::string cpp_regex_traits<charT>::get_catalog_name()
1102 {
1103 #ifdef BOOST_HAS_THREADS
1104    static_mutex::scoped_lock lk(get_mutex_inst());
1105 #endif
1106    std::string result(get_catalog_name_inst());
1107    return result;
1108 }
1109 
1110 #ifdef BOOST_HAS_THREADS
1111 template <class charT>
get_mutex_inst()1112 static_mutex& cpp_regex_traits<charT>::get_mutex_inst()
1113 {
1114    static static_mutex s_mutex = BOOST_STATIC_MUTEX_INIT;
1115    return s_mutex;
1116 }
1117 #endif
1118 
1119 
1120 } // boost
1121 
1122 #ifdef BOOST_MSVC
1123 #pragma warning(pop)
1124 #endif
1125 
1126 #ifdef BOOST_MSVC
1127 #pragma warning(push)
1128 #pragma warning(disable: 4103)
1129 #endif
1130 #ifdef BOOST_HAS_ABI_HEADERS
1131 #  include BOOST_ABI_SUFFIX
1132 #endif
1133 #ifdef BOOST_MSVC
1134 #pragma warning(pop)
1135 #endif
1136 
1137 #endif
1138 
1139 #endif
1140 
1141 
1142