1 /*
2 *
3 * Copyright (c) 1998-2002
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE: primary_transform.hpp
15 * VERSION: see <boost/version.hpp>
16 * DESCRIPTION: Heuristically determines the sort string format in use
17 * by the current locale.
18 */
19
20 #ifndef BOOST_REGEX_PRIMARY_TRANSFORM
21 #define BOOST_REGEX_PRIMARY_TRANSFORM
22
23 namespace boost{
24 namespace BOOST_REGEX_DETAIL_NS{
25
26
27 enum{
28 sort_C,
29 sort_fixed,
30 sort_delim,
31 sort_unknown
32 };
33
34 template <class S, class charT>
count_chars(const S & s,charT c)35 unsigned count_chars(const S& s, charT c)
36 {
37 //
38 // Count how many occurrences of character c occur
39 // in string s: if c is a delimeter between collation
40 // fields, then this should be the same value for all
41 // sort keys:
42 //
43 unsigned int count = 0;
44 for(unsigned pos = 0; pos < s.size(); ++pos)
45 {
46 if(s[pos] == c) ++count;
47 }
48 return count;
49 }
50
51
52 template <class traits, class charT>
find_sort_syntax(const traits * pt,charT * delim)53 unsigned find_sort_syntax(const traits* pt, charT* delim)
54 {
55 //
56 // compare 'a' with 'A' to see how similar they are,
57 // should really use a-accute but we can't portably do that,
58 //
59 typedef typename traits::string_type string_type;
60 typedef typename traits::char_type char_type;
61
62 // Suppress incorrect warning for MSVC
63 (void)pt;
64
65 char_type a[2] = {'a', '\0', };
66 string_type sa(pt->transform(a, a+1));
67 if(sa == a)
68 {
69 *delim = 0;
70 return sort_C;
71 }
72 char_type A[2] = { 'A', '\0', };
73 string_type sA(pt->transform(A, A+1));
74 char_type c[2] = { ';', '\0', };
75 string_type sc(pt->transform(c, c+1));
76
77 int pos = 0;
78 while((pos <= static_cast<int>(sa.size())) && (pos <= static_cast<int>(sA.size())) && (sa[pos] == sA[pos])) ++pos;
79 --pos;
80 if(pos < 0)
81 {
82 *delim = 0;
83 return sort_unknown;
84 }
85 //
86 // at this point sa[pos] is either the end of a fixed width field
87 // or the character that acts as a delimiter:
88 //
89 charT maybe_delim = sa[pos];
90 if((pos != 0) && (count_chars(sa, maybe_delim) == count_chars(sA, maybe_delim)) && (count_chars(sa, maybe_delim) == count_chars(sc, maybe_delim)))
91 {
92 *delim = maybe_delim;
93 return sort_delim;
94 }
95 //
96 // OK doen't look like a delimiter, try for fixed width field:
97 //
98 if((sa.size() == sA.size()) && (sa.size() == sc.size()))
99 {
100 // note assumes that the fixed width field is less than
101 // (numeric_limits<charT>::max)(), should be true for all types
102 // I can't imagine 127 character fields...
103 *delim = static_cast<charT>(++pos);
104 return sort_fixed;
105 }
106 //
107 // don't know what it is:
108 //
109 *delim = 0;
110 return sort_unknown;
111 }
112
113
114 } // namespace BOOST_REGEX_DETAIL_NS
115 } // namespace boost
116
117 #endif
118
119
120
121