1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #ifndef BOOST_LOCALE_COLLATOR_HPP_INCLUDED
9 #define BOOST_LOCALE_COLLATOR_HPP_INCLUDED
10 
11 #include <boost/locale/config.hpp>
12 #ifdef BOOST_MSVC
13 #  pragma warning(push)
14 #  pragma warning(disable : 4275 4251 4231 4660)
15 #endif
16 #include <locale>
17 
18 
19 namespace boost {
20 namespace locale {
21 
22     class info;
23 
24     ///
25     /// \defgroup collation Collation
26     ///
27     /// This module introduces collation related classes
28     ///
29     /// @{
30 
31     ///
32     /// \brief a base class that includes collation level flags
33     ///
34 
35     class collator_base {
36     public:
37         ///
38         /// Unicode collation level types
39         ///
40         typedef enum {
41             primary     = 0, ///< 1st collation level: base letters
42             secondary   = 1, ///< 2nd collation level: letters and accents
43             tertiary    = 2, ///< 3rd collation level: letters, accents and case
44             quaternary  = 3, ///< 4th collation level: letters, accents, case and punctuation
45             identical   = 4  ///< identical collation level: include code-point comparison
46         } level_type;
47     };
48 
49     ///
50     /// \brief Collation facet.
51     ///
52     /// It reimplements standard C++ std::collate,
53     /// allowing usage of std::locale for direct string comparison
54     ///
55     template<typename CharType>
56     class collator :
57         public std::collate<CharType>,
58         public collator_base
59     {
60     public:
61         ///
62         /// Type of the underlying character
63         ///
64         typedef CharType char_type;
65         ///
66         /// Type of string used with this facet
67         ///
68         typedef std::basic_string<CharType> string_type;
69 
70 
71         ///
72         /// Compare two strings in rage [b1,e1),  [b2,e2) according using a collation level \a level. Calls do_compare
73         ///
74         /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
75         /// they considered equal.
76         ///
compare(level_type level,char_type const * b1,char_type const * e1,char_type const * b2,char_type const * e2) const77         int compare(level_type level,
78                     char_type const *b1,char_type const *e1,
79                     char_type const *b2,char_type const *e2) const
80         {
81             return do_compare(level,b1,e1,b2,e2);
82         }
83         ///
84         /// Create a binary string that can be compared to other in order to get collation order. The string is created
85         /// for text in range [b,e). It is useful for collation of multiple strings for text.
86         ///
87         /// The transformation follows these rules:
88         /// \code
89         ///   compare(level,b1,e1,b2,e2) == sign( transform(level,b1,e1).compare(transform(level,b2,e2)) );
90         /// \endcode
91         ///
92         /// Calls do_transform
93         ///
transform(level_type level,char_type const * b,char_type const * e) const94         string_type transform(level_type level,char_type const *b,char_type const *e) const
95         {
96             return do_transform(level,b,e);
97         }
98 
99         ///
100         /// Calculate a hash of a text in range [b,e). The value can be used for collation sensitive string comparison.
101         ///
102         /// If compare(level,b1,e1,b2,e2) == 0 then hash(level,b1,e1) == hash(level,b2,e2)
103         ///
104         /// Calls do_hash
105         ///
hash(level_type level,char_type const * b,char_type const * e) const106         long hash(level_type level,char_type const *b,char_type const *e) const
107         {
108             return do_hash(level,b,e);
109         }
110 
111         ///
112         /// Compare two strings \a l and \a r using collation level \a level
113         ///
114         /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
115         /// they considered equal.
116         ///
117         ///
compare(level_type level,string_type const & l,string_type const & r) const118         int compare(level_type level,string_type const &l,string_type const &r) const
119         {
120             return do_compare(level,l.data(),l.data()+l.size(),r.data(),r.data()+r.size());
121         }
122 
123         ///
124         /// Calculate a hash that can be used for collation sensitive string comparison of a string \a s
125         ///
126         /// If compare(level,s1,s2) == 0 then hash(level,s1) == hash(level,s2)
127         ///
128 
hash(level_type level,string_type const & s) const129         long hash(level_type level,string_type const &s) const
130         {
131             return do_hash(level,s.data(),s.data()+s.size());
132         }
133         ///
134         /// Create a binary string from string \a s, that can be compared to other, useful for collation of multiple
135         /// strings.
136         ///
137         /// The transformation follows these rules:
138         /// \code
139         ///   compare(level,s1,s2) == sign( transform(level,s1).compare(transform(level,s2)) );
140         /// \endcode
141         ///
transform(level_type level,string_type const & s) const142         string_type transform(level_type level,string_type const &s) const
143         {
144             return do_transform(level,s.data(),s.data()+s.size());
145         }
146 
147     protected:
148 
149         ///
150         /// constructor of the collator object
151         ///
collator(size_t refs=0)152         collator(size_t refs = 0) : std::collate<CharType>(refs)
153         {
154         }
155 
~collator()156         virtual ~collator()
157         {
158         }
159 
160         ///
161         /// This function is used to override default collation function that does not take in account collation level.
162         /// Uses primary level
163         ///
do_compare(char_type const * b1,char_type const * e1,char_type const * b2,char_type const * e2) const164         virtual int do_compare( char_type const *b1,char_type const *e1,
165                                 char_type const *b2,char_type const *e2) const
166         {
167             return do_compare(identical,b1,e1,b2,e2);
168         }
169         ///
170         /// This function is used to override default collation function that does not take in account collation level.
171         /// Uses primary level
172         ///
do_transform(char_type const * b,char_type const * e) const173         virtual string_type do_transform(char_type const *b,char_type const *e) const
174         {
175             return do_transform(identical,b,e);
176         }
177         ///
178         /// This function is used to override default collation function that does not take in account collation level.
179         /// Uses primary level
180         ///
do_hash(char_type const * b,char_type const * e) const181         virtual long do_hash(char_type const *b,char_type const *e) const
182         {
183             return do_hash(identical,b,e);
184         }
185 
186         ///
187         /// Actual function that performs comparison between the strings. For details see compare member function. Can be overridden.
188         ///
189         virtual int do_compare( level_type level,
190                                 char_type const *b1,char_type const *e1,
191                                 char_type const *b2,char_type const *e2) const = 0;
192         ///
193         /// Actual function that performs transformation. For details see transform member function. Can be overridden.
194         ///
195         virtual string_type do_transform(level_type level,char_type const *b,char_type const *e) const = 0;
196         ///
197         /// Actual function that calculates hash. For details see hash member function. Can be overridden.
198         ///
199         virtual long do_hash(level_type level,char_type const *b,char_type const *e) const = 0;
200 
201 
202     };
203 
204     ///
205     /// \brief This class can be used in STL algorithms and containers for comparison of strings
206     /// with a level other than primary
207     ///
208     /// For example:
209     ///
210     /// \code
211     ///  std::map<std::string,std::string,comparator<char,collator_base::secondary> > data;
212     /// \endcode
213     ///
214     /// Would create a map the keys of which are sorted using secondary collation level
215     ///
216     template<typename CharType,collator_base::level_type default_level = collator_base::identical>
217     struct comparator
218     {
219     public:
220         ///
221         /// Create a comparator class for locale \a l and with collation leval \a level
222         ///
223         /// \note throws std::bad_cast if l does not have \ref collator facet installed
224         ///
comparatorboost::locale::comparator225         comparator(std::locale const &l=std::locale(),collator_base::level_type level=default_level) :
226             locale_(l),
227             level_(level)
228         {
229         }
230 
231         ///
232         /// Compare two strings -- equivalent to return left < right according to collation rules
233         ///
operator ()boost::locale::comparator234         bool operator()(std::basic_string<CharType> const &left,std::basic_string<CharType> const &right) const
235         {
236             return std::use_facet<collator<CharType> >(locale_).compare(level_,left,right) < 0;
237         }
238     private:
239         std::locale locale_;
240         collator_base::level_type level_;
241     };
242 
243 
244     ///
245     ///@}
246     ///
247 
248     } // locale
249 } // boost
250 
251 #ifdef BOOST_MSVC
252 #pragma warning(pop)
253 #endif
254 
255 
256 #endif
257 ///
258 /// \example collate.cpp
259 /// Example of using collation functions
260 ///
261 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
262