1 /*
2  *
3  * Copyright (c) 2004
4  * John Maddock
5  *
6  * Use, modification and distribution are subject to the
7  * Boost Software License, Version 1.0. (See accompanying file
8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9  *
10  */
11 
12  /*
13   *   LOCATION:    see http://www.boost.org for most recent version.
14   *   FILE         c_regex_traits.hpp
15   *   VERSION      see <boost/version.hpp>
16   *   DESCRIPTION: Declares regular expression traits class that wraps the global C locale.
17   */
18 
19 #ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED
20 #define BOOST_C_REGEX_TRAITS_HPP_INCLUDED
21 
22 #include <boost/regex/config.hpp>
23 #include <boost/regex/v5/regex_workaround.hpp>
24 #include <cctype>
25 
26 namespace boost{
27 
28    namespace BOOST_REGEX_DETAIL_NS {
29 
30       enum
31       {
32          char_class_space = 1 << 0,
33          char_class_print = 1 << 1,
34          char_class_cntrl = 1 << 2,
35          char_class_upper = 1 << 3,
36          char_class_lower = 1 << 4,
37          char_class_alpha = 1 << 5,
38          char_class_digit = 1 << 6,
39          char_class_punct = 1 << 7,
40          char_class_xdigit = 1 << 8,
41          char_class_alnum = char_class_alpha | char_class_digit,
42          char_class_graph = char_class_alnum | char_class_punct,
43          char_class_blank = 1 << 9,
44          char_class_word = 1 << 10,
45          char_class_unicode = 1 << 11,
46          char_class_horizontal = 1 << 12,
47          char_class_vertical = 1 << 13
48       };
49 
50    }
51 
52 template <class charT>
53 struct c_regex_traits;
54 
55 template<>
56 struct c_regex_traits<char>
57 {
c_regex_traitsboost::c_regex_traits58    c_regex_traits(){}
59    typedef char char_type;
60    typedef std::size_t size_type;
61    typedef std::string string_type;
62    struct locale_type{};
63    typedef std::uint32_t char_class_type;
64 
lengthboost::c_regex_traits65    static size_type length(const char_type* p)
66    {
67       return (std::strlen)(p);
68    }
69 
translateboost::c_regex_traits70    char translate(char c) const
71    {
72       return c;
73    }
translate_nocaseboost::c_regex_traits74    char translate_nocase(char c) const
75    {
76       return static_cast<char>((std::tolower)(static_cast<unsigned char>(c)));
77    }
78 
79    static string_type  transform(const char* p1, const char* p2);
80    static string_type  transform_primary(const char* p1, const char* p2);
81 
82    static char_class_type  lookup_classname(const char* p1, const char* p2);
83    static string_type  lookup_collatename(const char* p1, const char* p2);
84 
85    static bool  isctype(char, char_class_type);
86    static int  value(char, int);
87 
imbueboost::c_regex_traits88    locale_type imbue(locale_type l)
89    { return l; }
getlocboost::c_regex_traits90    locale_type getloc()const
91    { return locale_type(); }
92 
93 private:
94    // this type is not copyable:
95    c_regex_traits(const c_regex_traits&);
96    c_regex_traits& operator=(const c_regex_traits&);
97 };
98 
99 #ifndef BOOST_NO_WREGEX
100 template<>
101 struct c_regex_traits<wchar_t>
102 {
c_regex_traitsboost::c_regex_traits103    c_regex_traits(){}
104    typedef wchar_t char_type;
105    typedef std::size_t size_type;
106    typedef std::wstring string_type;
107    struct locale_type{};
108    typedef std::uint32_t char_class_type;
109 
lengthboost::c_regex_traits110    static size_type length(const char_type* p)
111    {
112       return (std::wcslen)(p);
113    }
114 
translateboost::c_regex_traits115    wchar_t translate(wchar_t c) const
116    {
117       return c;
118    }
translate_nocaseboost::c_regex_traits119    wchar_t translate_nocase(wchar_t c) const
120    {
121       return (std::towlower)(c);
122    }
123 
124    static string_type  transform(const wchar_t* p1, const wchar_t* p2);
125    static string_type  transform_primary(const wchar_t* p1, const wchar_t* p2);
126 
127    static char_class_type  lookup_classname(const wchar_t* p1, const wchar_t* p2);
128    static string_type  lookup_collatename(const wchar_t* p1, const wchar_t* p2);
129 
130    static bool  isctype(wchar_t, char_class_type);
131    static int  value(wchar_t, int);
132 
imbueboost::c_regex_traits133    locale_type imbue(locale_type l)
134    { return l; }
getlocboost::c_regex_traits135    locale_type getloc()const
136    { return locale_type(); }
137 
138 private:
139    // this type is not copyable:
140    c_regex_traits(const c_regex_traits&);
141    c_regex_traits& operator=(const c_regex_traits&);
142 };
143 
144 #endif // BOOST_NO_WREGEX
145 
transform(const char * p1,const char * p2)146 inline c_regex_traits<char>::string_type  c_regex_traits<char>::transform(const char* p1, const char* p2)
147 {
148    std::string result(10, ' ');
149    std::size_t s = result.size();
150    std::size_t r;
151    std::string src(p1, p2);
152    while (s < (r = std::strxfrm(&*result.begin(), src.c_str(), s)))
153    {
154 #if defined(_CPPLIB_VER)
155       //
156       // A bug in VC11 and 12 causes the program to hang if we pass a null-string
157       // to std::strxfrm, but only for certain locales :-(
158       // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
159       //
160       if (r == INT_MAX)
161       {
162          result.erase();
163          result.insert(result.begin(), static_cast<char>(0));
164          return result;
165       }
166 #endif
167       result.append(r - s + 3, ' ');
168       s = result.size();
169    }
170    result.erase(r);
171    return result;
172 }
173 
transform_primary(const char * p1,const char * p2)174 inline c_regex_traits<char>::string_type  c_regex_traits<char>::transform_primary(const char* p1, const char* p2)
175 {
176    static char s_delim;
177    static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<c_regex_traits<char>*>(0), &s_delim);
178    std::string result;
179    //
180    // What we do here depends upon the format of the sort key returned by
181    // sort key returned by this->transform:
182    //
183    switch (s_collate_type)
184    {
185    case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
186    case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
187       // the best we can do is translate to lower case, then get a regular sort key:
188    {
189       result.assign(p1, p2);
190       for (std::string::size_type i = 0; i < result.size(); ++i)
191          result[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(result[i])));
192       result = transform(&*result.begin(), &*result.begin() + result.size());
193       break;
194    }
195    case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
196    {
197       // get a regular sort key, and then truncate it:
198       result = transform(p1, p2);
199       result.erase(s_delim);
200       break;
201    }
202    case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
203       // get a regular sort key, and then truncate everything after the delim:
204       result = transform(p1, p2);
205       if ((!result.empty()) && (result[0] == s_delim))
206          break;
207       std::size_t i;
208       for (i = 0; i < result.size(); ++i)
209       {
210          if (result[i] == s_delim)
211             break;
212       }
213       result.erase(i);
214       break;
215    }
216    if (result.empty())
217       result = std::string(1, char(0));
218    return result;
219 }
220 
lookup_classname(const char * p1,const char * p2)221 inline c_regex_traits<char>::char_class_type  c_regex_traits<char>::lookup_classname(const char* p1, const char* p2)
222 {
223    using namespace BOOST_REGEX_DETAIL_NS;
224    static const char_class_type masks[] =
225    {
226       0,
227       char_class_alnum,
228       char_class_alpha,
229       char_class_blank,
230       char_class_cntrl,
231       char_class_digit,
232       char_class_digit,
233       char_class_graph,
234       char_class_horizontal,
235       char_class_lower,
236       char_class_lower,
237       char_class_print,
238       char_class_punct,
239       char_class_space,
240       char_class_space,
241       char_class_upper,
242       char_class_unicode,
243       char_class_upper,
244       char_class_vertical,
245       char_class_alnum | char_class_word,
246       char_class_alnum | char_class_word,
247       char_class_xdigit,
248    };
249 
250    int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
251    if (idx < 0)
252    {
253       std::string s(p1, p2);
254       for (std::string::size_type i = 0; i < s.size(); ++i)
255          s[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(s[i])));
256       idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
257    }
258    BOOST_REGEX_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0]));
259    return masks[idx + 1];
260 }
261 
isctype(char c,char_class_type mask)262 inline bool  c_regex_traits<char>::isctype(char c, char_class_type mask)
263 {
264    using namespace BOOST_REGEX_DETAIL_NS;
265    return
266       ((mask & char_class_space) && (std::isspace)(static_cast<unsigned char>(c)))
267       || ((mask & char_class_print) && (std::isprint)(static_cast<unsigned char>(c)))
268       || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast<unsigned char>(c)))
269       || ((mask & char_class_upper) && (std::isupper)(static_cast<unsigned char>(c)))
270       || ((mask & char_class_lower) && (std::islower)(static_cast<unsigned char>(c)))
271       || ((mask & char_class_alpha) && (std::isalpha)(static_cast<unsigned char>(c)))
272       || ((mask & char_class_digit) && (std::isdigit)(static_cast<unsigned char>(c)))
273       || ((mask & char_class_punct) && (std::ispunct)(static_cast<unsigned char>(c)))
274       || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast<unsigned char>(c)))
275       || ((mask & char_class_blank) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
276       || ((mask & char_class_word) && (c == '_'))
277       || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v')))
278       || ((mask & char_class_horizontal) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v'));
279 }
280 
lookup_collatename(const char * p1,const char * p2)281 inline c_regex_traits<char>::string_type  c_regex_traits<char>::lookup_collatename(const char* p1, const char* p2)
282 {
283    std::string s(p1, p2);
284    s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s);
285    if (s.empty() && (p2 - p1 == 1))
286       s.append(1, *p1);
287    return s;
288 }
289 
value(char c,int radix)290 inline int  c_regex_traits<char>::value(char c, int radix)
291 {
292    char b[2] = { c, '\0', };
293    char* ep;
294    int result = std::strtol(b, &ep, radix);
295    if (ep == b)
296       return -1;
297    return result;
298 }
299 
300 #ifndef BOOST_NO_WREGEX
301 
transform(const wchar_t * p1,const wchar_t * p2)302 inline c_regex_traits<wchar_t>::string_type  c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2)
303 {
304    std::size_t r;
305    std::size_t s = 10;
306    std::wstring src(p1, p2);
307    std::wstring result(s, L' ');
308    while (s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s)))
309    {
310 #if defined(_CPPLIB_VER)
311       //
312       // A bug in VC11 and 12 causes the program to hang if we pass a null-string
313       // to std::strxfrm, but only for certain locales :-(
314       // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
315       //
316       if (r == INT_MAX)
317       {
318          result.erase();
319          result.insert(result.begin(), static_cast<wchar_t>(0));
320          return result;
321       }
322 #endif
323       result.append(r - s + 3, L' ');
324       s = result.size();
325    }
326    result.erase(r);
327    return result;
328 }
329 
transform_primary(const wchar_t * p1,const wchar_t * p2)330 inline c_regex_traits<wchar_t>::string_type  c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2)
331 {
332    static wchar_t s_delim;
333    static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim);
334    std::wstring result;
335    //
336    // What we do here depends upon the format of the sort key returned by
337    // sort key returned by this->transform:
338    //
339    switch (s_collate_type)
340    {
341    case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
342    case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
343       // the best we can do is translate to lower case, then get a regular sort key:
344    {
345       result.assign(p1, p2);
346       for (std::wstring::size_type i = 0; i < result.size(); ++i)
347          result[i] = (std::towlower)(result[i]);
348       result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
349       break;
350    }
351    case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
352    {
353       // get a regular sort key, and then truncate it:
354       result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
355       result.erase(s_delim);
356       break;
357    }
358    case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
359       // get a regular sort key, and then truncate everything after the delim:
360       result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
361       if ((!result.empty()) && (result[0] == s_delim))
362          break;
363       std::size_t i;
364       for (i = 0; i < result.size(); ++i)
365       {
366          if (result[i] == s_delim)
367             break;
368       }
369       result.erase(i);
370       break;
371    }
372    if (result.empty())
373       result = std::wstring(1, char(0));
374    return result;
375 }
376 
lookup_classname(const wchar_t * p1,const wchar_t * p2)377 inline c_regex_traits<wchar_t>::char_class_type  c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2)
378 {
379    using namespace BOOST_REGEX_DETAIL_NS;
380    static const char_class_type masks[] =
381    {
382       0,
383       char_class_alnum,
384       char_class_alpha,
385       char_class_blank,
386       char_class_cntrl,
387       char_class_digit,
388       char_class_digit,
389       char_class_graph,
390       char_class_horizontal,
391       char_class_lower,
392       char_class_lower,
393       char_class_print,
394       char_class_punct,
395       char_class_space,
396       char_class_space,
397       char_class_upper,
398       char_class_unicode,
399       char_class_upper,
400       char_class_vertical,
401       char_class_alnum | char_class_word,
402       char_class_alnum | char_class_word,
403       char_class_xdigit,
404    };
405 
406    int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
407    if (idx < 0)
408    {
409       std::wstring s(p1, p2);
410       for (std::wstring::size_type i = 0; i < s.size(); ++i)
411          s[i] = (std::towlower)(s[i]);
412       idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
413    }
414    BOOST_REGEX_ASSERT(idx + 1 < static_cast<int>(sizeof(masks) / sizeof(masks[0])));
415    return masks[idx + 1];
416 }
417 
isctype(wchar_t c,char_class_type mask)418 inline bool  c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask)
419 {
420    using namespace BOOST_REGEX_DETAIL_NS;
421    return
422       ((mask & char_class_space) && (std::iswspace)(c))
423       || ((mask & char_class_print) && (std::iswprint)(c))
424       || ((mask & char_class_cntrl) && (std::iswcntrl)(c))
425       || ((mask & char_class_upper) && (std::iswupper)(c))
426       || ((mask & char_class_lower) && (std::iswlower)(c))
427       || ((mask & char_class_alpha) && (std::iswalpha)(c))
428       || ((mask & char_class_digit) && (std::iswdigit)(c))
429       || ((mask & char_class_punct) && (std::iswpunct)(c))
430       || ((mask & char_class_xdigit) && (std::iswxdigit)(c))
431       || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
432       || ((mask & char_class_word) && (c == '_'))
433       || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff)))
434       || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v')))
435       || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v'));
436 }
437 
lookup_collatename(const wchar_t * p1,const wchar_t * p2)438 inline c_regex_traits<wchar_t>::string_type  c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2)
439 {
440    std::string name;
441    // Usual msvc warning suppression does not work here with std::string template constructor.... use a workaround instead:
442    for (const wchar_t* pos = p1; pos != p2; ++pos)
443       name.push_back((char)*pos);
444    name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name);
445    if (!name.empty())
446       return string_type(name.begin(), name.end());
447    if (p2 - p1 == 1)
448       return string_type(1, *p1);
449    return string_type();
450 }
451 
value(wchar_t c,int radix)452 inline int  c_regex_traits<wchar_t>::value(wchar_t c, int radix)
453 {
454 #ifdef BOOST_BORLANDC
455    // workaround for broken wcstol:
456    if ((std::iswxdigit)(c) == 0)
457       return -1;
458 #endif
459    wchar_t b[2] = { c, '\0', };
460    wchar_t* ep;
461    int result = std::wcstol(b, &ep, radix);
462    if (ep == b)
463       return -1;
464    return result;
465 }
466 
467 #endif
468 
469 }
470 
471 #endif
472 
473 
474 
475