1 /*
2  *
3  * Copyright (c) 1998-2002
4  * John Maddock
5  *
6  * Use, modification and distribution are subject to the
7  * Boost Software License, Version 1.0. (See accompanying file
8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9  *
10  */
11 
12  /*
13   *   LOCATION:    see http://www.boost.org for most recent version.
14   *   FILE:        primary_transform.hpp
15   *   VERSION:     see <boost/version.hpp>
16   *   DESCRIPTION: Heuristically determines the sort string format in use
17   *                by the current locale.
18   */
19 
20 #ifndef BOOST_REGEX_PRIMARY_TRANSFORM
21 #define BOOST_REGEX_PRIMARY_TRANSFORM
22 
23 namespace boost{
24    namespace BOOST_REGEX_DETAIL_NS{
25 
26 
27 enum{
28    sort_C,
29    sort_fixed,
30    sort_delim,
31    sort_unknown
32 };
33 
34 template <class S, class charT>
count_chars(const S & s,charT c)35 unsigned count_chars(const S& s, charT c)
36 {
37    //
38    // Count how many occurrences of character c occur
39    // in string s: if c is a delimeter between collation
40    // fields, then this should be the same value for all
41    // sort keys:
42    //
43    unsigned int count = 0;
44    for(unsigned pos = 0; pos < s.size(); ++pos)
45    {
46       if(s[pos] == c) ++count;
47    }
48    return count;
49 }
50 
51 
52 template <class traits, class charT>
find_sort_syntax(const traits * pt,charT * delim)53 unsigned find_sort_syntax(const traits* pt, charT* delim)
54 {
55    //
56    // compare 'a' with 'A' to see how similar they are,
57    // should really use a-accute but we can't portably do that,
58    //
59    typedef typename traits::string_type string_type;
60    typedef typename traits::char_type char_type;
61 
62    // Suppress incorrect warning for MSVC
63    (void)pt;
64 
65    char_type a[2] = {'a', '\0', };
66    string_type sa(pt->transform(a, a+1));
67    if(sa == a)
68    {
69       *delim = 0;
70       return sort_C;
71    }
72    char_type A[2] = { 'A', '\0', };
73    string_type sA(pt->transform(A, A+1));
74    char_type c[2] = { ';', '\0', };
75    string_type sc(pt->transform(c, c+1));
76 
77    int pos = 0;
78    while((pos <= static_cast<int>(sa.size())) && (pos <= static_cast<int>(sA.size())) && (sa[pos] == sA[pos])) ++pos;
79    --pos;
80    if(pos < 0)
81    {
82       *delim = 0;
83       return sort_unknown;
84    }
85    //
86    // at this point sa[pos] is either the end of a fixed width field
87    // or the character that acts as a delimiter:
88    //
89    charT maybe_delim = sa[pos];
90    if((pos != 0) && (count_chars(sa, maybe_delim) == count_chars(sA, maybe_delim)) && (count_chars(sa, maybe_delim) == count_chars(sc, maybe_delim)))
91    {
92       *delim = maybe_delim;
93       return sort_delim;
94    }
95    //
96    // OK doen't look like a delimiter, try for fixed width field:
97    //
98    if((sa.size() == sA.size()) && (sa.size() == sc.size()))
99    {
100       // note assumes that the fixed width field is less than
101       // (numeric_limits<charT>::max)(), should be true for all types
102       // I can't imagine 127 character fields...
103       *delim = static_cast<charT>(++pos);
104       return sort_fixed;
105    }
106    //
107    // don't know what it is:
108    //
109    *delim = 0;
110    return sort_unknown;
111 }
112 
113 
114    } // namespace BOOST_REGEX_DETAIL_NS
115 } // namespace boost
116 
117 #endif
118 
119 
120 
121