1 //////////////////////////////////////////////////////////////////////////////
2 /// \file c_regex_traits.hpp
3 /// Contains the definition of the c_regex_traits\<\> template, which is a
4 /// wrapper for the C locale functions that can be used to customize the
5 /// behavior of static and dynamic regexes.
6 //
7 //  Copyright 2008 Eric Niebler. Distributed under the Boost
8 //  Software License, Version 1.0. (See accompanying file
9 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
10 
11 #ifndef BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005
12 #define BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005
13 
14 // MS compatible compilers support #pragma once
15 #if defined(_MSC_VER)
16 # pragma once
17 #endif
18 
19 #include <cstdlib>
20 #include <boost/config.hpp>
21 #include <boost/assert.hpp>
22 #include <boost/xpressive/traits/detail/c_ctype.hpp>
23 
24 namespace boost { namespace xpressive
25 {
26 
27 namespace detail
28 {
29     ///////////////////////////////////////////////////////////////////////////////
30     // empty_locale
31     struct empty_locale
32     {
33     };
34 
35     ///////////////////////////////////////////////////////////////////////////////
36     // c_regex_traits_base
37     template<typename Char, std::size_t SizeOfChar = sizeof(Char)>
38     struct c_regex_traits_base
39     {
40     protected:
41         template<typename Traits>
imbueboost::xpressive::detail::c_regex_traits_base42         void imbue(Traits const &tr)
43         {
44         }
45     };
46 
47     template<typename Char>
48     struct c_regex_traits_base<Char, 1>
49     {
50     protected:
51         template<typename Traits>
imbueboost::xpressive::detail::c_regex_traits_base52         static void imbue(Traits const &)
53         {
54         }
55     };
56 
57     #ifndef BOOST_XPRESSIVE_NO_WREGEX
58     template<std::size_t SizeOfChar>
59     struct c_regex_traits_base<wchar_t, SizeOfChar>
60     {
61     protected:
62         template<typename Traits>
imbueboost::xpressive::detail::c_regex_traits_base63         static void imbue(Traits const &)
64         {
65         }
66     };
67     #endif
68 
69     template<typename Char>
70     Char c_tolower(Char);
71 
72     template<typename Char>
73     Char c_toupper(Char);
74 
75     template<>
c_tolower(char ch)76     inline char c_tolower(char ch)
77     {
78         using namespace std;
79         return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
80     }
81 
82     template<>
c_toupper(char ch)83     inline char c_toupper(char ch)
84     {
85         using namespace std;
86         return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
87     }
88 
89     #ifndef BOOST_XPRESSIVE_NO_WREGEX
90     template<>
c_tolower(wchar_t ch)91     inline wchar_t c_tolower(wchar_t ch)
92     {
93         using namespace std;
94         return towlower(ch);
95     }
96 
97     template<>
c_toupper(wchar_t ch)98     inline wchar_t c_toupper(wchar_t ch)
99     {
100         using namespace std;
101         return towupper(ch);
102     }
103     #endif
104 
105 } // namespace detail
106 
107 ///////////////////////////////////////////////////////////////////////////////
108 // regex_traits_version_1_tag
109 //
110 struct regex_traits_version_1_tag;
111 
112 ///////////////////////////////////////////////////////////////////////////////
113 // c_regex_traits
114 //
115 /// \brief Encapsaulates the standard C locale functions for use by the
116 /// \c basic_regex\<\> class template.
117 template<typename Char>
118 struct c_regex_traits
119   : detail::c_regex_traits_base<Char>
120 {
121     typedef Char char_type;
122     typedef std::basic_string<char_type> string_type;
123     typedef detail::empty_locale locale_type;
124     typedef typename detail::char_class_impl<Char>::char_class_type char_class_type;
125     typedef regex_traits_version_2_tag version_tag;
126     typedef detail::c_regex_traits_base<Char> base_type;
127 
128     /// Initialize a c_regex_traits object to use the global C locale.
129     ///
c_regex_traitsboost::xpressive::c_regex_traits130     c_regex_traits(locale_type const &loc = locale_type())
131       : base_type()
132     {
133         this->imbue(loc);
134     }
135 
136     /// Checks two c_regex_traits objects for equality
137     ///
138     /// \return true.
operator ==boost::xpressive::c_regex_traits139     bool operator ==(c_regex_traits<char_type> const &) const
140     {
141         return true;
142     }
143 
144     /// Checks two c_regex_traits objects for inequality
145     ///
146     /// \return false.
operator !=boost::xpressive::c_regex_traits147     bool operator !=(c_regex_traits<char_type> const &) const
148     {
149         return false;
150     }
151 
152     /// Convert a char to a Char
153     ///
154     /// \param ch The source character.
155     /// \return ch if Char is char, std::btowc(ch) if Char is wchar_t.
156     static char_type widen(char ch);
157 
158     /// Returns a hash value for a Char in the range [0, UCHAR_MAX]
159     ///
160     /// \param ch The source character.
161     /// \return a value between 0 and UCHAR_MAX, inclusive.
hashboost::xpressive::c_regex_traits162     static unsigned char hash(char_type ch)
163     {
164         return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch));
165     }
166 
167     /// No-op
168     ///
169     /// \param ch The source character.
170     /// \return ch
translateboost::xpressive::c_regex_traits171     static char_type translate(char_type ch)
172     {
173         return ch;
174     }
175 
176     /// Converts a character to lower-case using the current global C locale.
177     ///
178     /// \param ch The source character.
179     /// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t.
translate_nocaseboost::xpressive::c_regex_traits180     static char_type translate_nocase(char_type ch)
181     {
182         return detail::c_tolower(ch);
183     }
184 
185     /// Converts a character to lower-case using the current global C locale.
186     ///
187     /// \param ch The source character.
188     /// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t.
tolowerboost::xpressive::c_regex_traits189     static char_type tolower(char_type ch)
190     {
191         return detail::c_tolower(ch);
192     }
193 
194     /// Converts a character to upper-case using the current global C locale.
195     ///
196     /// \param ch The source character.
197     /// \return std::toupper(ch) if Char is char, std::towupper(ch) if Char is wchar_t.
toupperboost::xpressive::c_regex_traits198     static char_type toupper(char_type ch)
199     {
200         return detail::c_toupper(ch);
201     }
202 
203     /// Returns a \c string_type containing all the characters that compare equal
204     /// disregrarding case to the one passed in. This function can only be called
205     /// if <tt>has_fold_case\<c_regex_traits\<Char\> \>::value</tt> is \c true.
206     ///
207     /// \param ch The source character.
208     /// \return \c string_type containing all chars which are equal to \c ch when disregarding
209     ///     case
210     //typedef array<char_type, 2> fold_case_type;
fold_caseboost::xpressive::c_regex_traits211     string_type fold_case(char_type ch) const
212     {
213         BOOST_MPL_ASSERT((is_same<char_type, char>));
214         char_type ntcs[] = {
215             detail::c_tolower(ch)
216           , detail::c_toupper(ch)
217           , 0
218         };
219         if(ntcs[1] == ntcs[0])
220             ntcs[1] = 0;
221         return string_type(ntcs);
222     }
223 
224     /// Checks to see if a character is within a character range.
225     ///
226     /// \param first The bottom of the range, inclusive.
227     /// \param last The top of the range, inclusive.
228     /// \param ch The source character.
229     /// \return first <= ch && ch <= last.
in_rangeboost::xpressive::c_regex_traits230     static bool in_range(char_type first, char_type last, char_type ch)
231     {
232         return first <= ch && ch <= last;
233     }
234 
235     /// Checks to see if a character is within a character range, irregardless of case.
236     ///
237     /// \param first The bottom of the range, inclusive.
238     /// \param last The top of the range, inclusive.
239     /// \param ch The source character.
240     /// \return in_range(first, last, ch) || in_range(first, last, tolower(ch)) || in_range(first,
241     ///     last, toupper(ch))
242     /// \attention The default implementation doesn't do proper Unicode
243     ///     case folding, but this is the best we can do with the standard
244     ///     C locale functions.
in_range_nocaseboost::xpressive::c_regex_traits245     static bool in_range_nocase(char_type first, char_type last, char_type ch)
246     {
247         return c_regex_traits::in_range(first, last, ch)
248             || c_regex_traits::in_range(first, last, detail::c_tolower(ch))
249             || c_regex_traits::in_range(first, last, detail::c_toupper(ch));
250     }
251 
252     /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
253     /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
254     /// then v.transform(G1, G2) < v.transform(H1, H2).
255     ///
256     /// \attention Not currently used
257     template<typename FwdIter>
transformboost::xpressive::c_regex_traits258     static string_type transform(FwdIter begin, FwdIter end)
259     {
260         BOOST_ASSERT(false); // BUGBUG implement me
261     }
262 
263     /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
264     /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
265     /// when character case is not considered then
266     /// v.transform_primary(G1, G2) < v.transform_primary(H1, H2).
267     ///
268     /// \attention Not currently used
269     template<typename FwdIter>
transform_primaryboost::xpressive::c_regex_traits270     static string_type transform_primary(FwdIter begin, FwdIter end)
271     {
272         BOOST_ASSERT(false); // BUGBUG implement me
273     }
274 
275     /// Returns a sequence of characters that represents the collating element
276     /// consisting of the character sequence designated by the iterator range [F1, F2).
277     /// Returns an empty string if the character sequence is not a valid collating element.
278     ///
279     /// \attention Not currently used
280     template<typename FwdIter>
lookup_collatenameboost::xpressive::c_regex_traits281     static string_type lookup_collatename(FwdIter begin, FwdIter end)
282     {
283         BOOST_ASSERT(false); // BUGBUG implement me
284     }
285 
286     /// For the character class name represented by the specified character sequence,
287     /// return the corresponding bitmask representation.
288     ///
289     /// \param begin A forward iterator to the start of the character sequence representing
290     ///     the name of the character class.
291     /// \param end The end of the character sequence.
292     /// \param icase Specifies whether the returned bitmask should represent the case-insensitive
293     ///     version of the character class.
294     /// \return A bitmask representing the character class.
295     template<typename FwdIter>
lookup_classnameboost::xpressive::c_regex_traits296     static char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase)
297     {
298         return detail::char_class_impl<char_type>::lookup_classname(begin, end, icase);
299     }
300 
301     /// Tests a character against a character class bitmask.
302     ///
303     /// \param ch The character to test.
304     /// \param mask The character class bitmask against which to test.
305     /// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed
306     ///     together.
307     /// \return true if the character is a member of any of the specified character classes, false
308     ///     otherwise.
isctypeboost::xpressive::c_regex_traits309     static bool isctype(char_type ch, char_class_type mask)
310     {
311         return detail::char_class_impl<char_type>::isctype(ch, mask);
312     }
313 
314     /// Convert a digit character into the integer it represents.
315     ///
316     /// \param ch The digit character.
317     /// \param radix The radix to use for the conversion.
318     /// \pre radix is one of 8, 10, or 16.
319     /// \return -1 if ch is not a digit character, the integer value of the character otherwise. If
320     ///     char_type is char, std::strtol is used for the conversion. If char_type is wchar_t,
321     ///     std::wcstol is used.
322     static int value(char_type ch, int radix);
323 
324     /// No-op
325     ///
imbueboost::xpressive::c_regex_traits326     locale_type imbue(locale_type loc)
327     {
328         this->base_type::imbue(*this);
329         return loc;
330     }
331 
332     /// No-op
333     ///
getlocboost::xpressive::c_regex_traits334     static locale_type getloc()
335     {
336         locale_type loc;
337         return loc;
338     }
339 };
340 
341 ///////////////////////////////////////////////////////////////////////////////
342 // c_regex_traits<>::widen specializations
343 /// INTERNAL ONLY
344 template<>
widen(char ch)345 inline char c_regex_traits<char>::widen(char ch)
346 {
347     return ch;
348 }
349 
350 #ifndef BOOST_XPRESSIVE_NO_WREGEX
351 /// INTERNAL ONLY
352 template<>
widen(char ch)353 inline wchar_t c_regex_traits<wchar_t>::widen(char ch)
354 {
355     using namespace std;
356     return btowc(ch);
357 }
358 #endif
359 
360 ///////////////////////////////////////////////////////////////////////////////
361 // c_regex_traits<>::hash specializations
362 /// INTERNAL ONLY
363 template<>
hash(char ch)364 inline unsigned char c_regex_traits<char>::hash(char ch)
365 {
366     return static_cast<unsigned char>(ch);
367 }
368 
369 #ifndef BOOST_XPRESSIVE_NO_WREGEX
370 /// INTERNAL ONLY
371 template<>
hash(wchar_t ch)372 inline unsigned char c_regex_traits<wchar_t>::hash(wchar_t ch)
373 {
374     return static_cast<unsigned char>(ch);
375 }
376 #endif
377 
378 ///////////////////////////////////////////////////////////////////////////////
379 // c_regex_traits<>::value specializations
380 /// INTERNAL ONLY
381 template<>
value(char ch,int radix)382 inline int c_regex_traits<char>::value(char ch, int radix)
383 {
384     using namespace std;
385     BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
386     char begin[2] = { ch, '\0' }, *end = 0;
387     int val = strtol(begin, &end, radix);
388     return begin == end ? -1 : val;
389 }
390 
391 #ifndef BOOST_XPRESSIVE_NO_WREGEX
392 /// INTERNAL ONLY
393 template<>
value(wchar_t ch,int radix)394 inline int c_regex_traits<wchar_t>::value(wchar_t ch, int radix)
395 {
396     using namespace std;
397     BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
398     wchar_t begin[2] = { ch, L'\0' }, *end = 0;
399     int val = wcstol(begin, &end, radix);
400     return begin == end ? -1 : val;
401 }
402 #endif
403 
404 // Narrow C traits has fold_case() member function.
405 template<>
406 struct has_fold_case<c_regex_traits<char> >
407   : mpl::true_
408 {
409 };
410 
411 }}
412 
413 #endif
414