1 // 2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See 5 // accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt) 7 // 8 #ifndef BOOST_LOCALE_COLLATOR_HPP_INCLUDED 9 #define BOOST_LOCALE_COLLATOR_HPP_INCLUDED 10 11 #include <boost/locale/config.hpp> 12 #ifdef BOOST_MSVC 13 # pragma warning(push) 14 # pragma warning(disable : 4275 4251 4231 4660) 15 #endif 16 #include <locale> 17 18 19 namespace boost { 20 namespace locale { 21 22 class info; 23 24 /// 25 /// \defgroup collation Collation 26 /// 27 /// This module introduces collation related classes 28 /// 29 /// @{ 30 31 /// 32 /// \brief a base class that includes collation level flags 33 /// 34 35 class collator_base { 36 public: 37 /// 38 /// Unicode collation level types 39 /// 40 typedef enum { 41 primary = 0, ///< 1st collation level: base letters 42 secondary = 1, ///< 2nd collation level: letters and accents 43 tertiary = 2, ///< 3rd collation level: letters, accents and case 44 quaternary = 3, ///< 4th collation level: letters, accents, case and punctuation 45 identical = 4 ///< identical collation level: include code-point comparison 46 } level_type; 47 }; 48 49 /// 50 /// \brief Collation facet. 51 /// 52 /// It reimplements standard C++ std::collate, 53 /// allowing usage of std::locale for direct string comparison 54 /// 55 template<typename CharType> 56 class collator : 57 public std::collate<CharType>, 58 public collator_base 59 { 60 public: 61 /// 62 /// Type of the underlying character 63 /// 64 typedef CharType char_type; 65 /// 66 /// Type of string used with this facet 67 /// 68 typedef std::basic_string<CharType> string_type; 69 70 71 /// 72 /// Compare two strings in rage [b1,e1), [b2,e2) according using a collation level \a level. Calls do_compare 73 /// 74 /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if 75 /// they considered equal. 76 /// compare(level_type level,char_type const * b1,char_type const * e1,char_type const * b2,char_type const * e2) const77 int compare(level_type level, 78 char_type const *b1,char_type const *e1, 79 char_type const *b2,char_type const *e2) const 80 { 81 return do_compare(level,b1,e1,b2,e2); 82 } 83 /// 84 /// Create a binary string that can be compared to other in order to get collation order. The string is created 85 /// for text in range [b,e). It is useful for collation of multiple strings for text. 86 /// 87 /// The transformation follows these rules: 88 /// \code 89 /// compare(level,b1,e1,b2,e2) == sign( transform(level,b1,e1).compare(transform(level,b2,e2)) ); 90 /// \endcode 91 /// 92 /// Calls do_transform 93 /// transform(level_type level,char_type const * b,char_type const * e) const94 string_type transform(level_type level,char_type const *b,char_type const *e) const 95 { 96 return do_transform(level,b,e); 97 } 98 99 /// 100 /// Calculate a hash of a text in range [b,e). The value can be used for collation sensitive string comparison. 101 /// 102 /// If compare(level,b1,e1,b2,e2) == 0 then hash(level,b1,e1) == hash(level,b2,e2) 103 /// 104 /// Calls do_hash 105 /// hash(level_type level,char_type const * b,char_type const * e) const106 long hash(level_type level,char_type const *b,char_type const *e) const 107 { 108 return do_hash(level,b,e); 109 } 110 111 /// 112 /// Compare two strings \a l and \a r using collation level \a level 113 /// 114 /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if 115 /// they considered equal. 116 /// 117 /// compare(level_type level,string_type const & l,string_type const & r) const118 int compare(level_type level,string_type const &l,string_type const &r) const 119 { 120 return do_compare(level,l.data(),l.data()+l.size(),r.data(),r.data()+r.size()); 121 } 122 123 /// 124 /// Calculate a hash that can be used for collation sensitive string comparison of a string \a s 125 /// 126 /// If compare(level,s1,s2) == 0 then hash(level,s1) == hash(level,s2) 127 /// 128 hash(level_type level,string_type const & s) const129 long hash(level_type level,string_type const &s) const 130 { 131 return do_hash(level,s.data(),s.data()+s.size()); 132 } 133 /// 134 /// Create a binary string from string \a s, that can be compared to other, useful for collation of multiple 135 /// strings. 136 /// 137 /// The transformation follows these rules: 138 /// \code 139 /// compare(level,s1,s2) == sign( transform(level,s1).compare(transform(level,s2)) ); 140 /// \endcode 141 /// transform(level_type level,string_type const & s) const142 string_type transform(level_type level,string_type const &s) const 143 { 144 return do_transform(level,s.data(),s.data()+s.size()); 145 } 146 147 protected: 148 149 /// 150 /// constructor of the collator object 151 /// collator(size_t refs=0)152 collator(size_t refs = 0) : std::collate<CharType>(refs) 153 { 154 } 155 ~collator()156 virtual ~collator() 157 { 158 } 159 160 /// 161 /// This function is used to override default collation function that does not take in account collation level. 162 /// Uses primary level 163 /// do_compare(char_type const * b1,char_type const * e1,char_type const * b2,char_type const * e2) const164 virtual int do_compare( char_type const *b1,char_type const *e1, 165 char_type const *b2,char_type const *e2) const 166 { 167 return do_compare(identical,b1,e1,b2,e2); 168 } 169 /// 170 /// This function is used to override default collation function that does not take in account collation level. 171 /// Uses primary level 172 /// do_transform(char_type const * b,char_type const * e) const173 virtual string_type do_transform(char_type const *b,char_type const *e) const 174 { 175 return do_transform(identical,b,e); 176 } 177 /// 178 /// This function is used to override default collation function that does not take in account collation level. 179 /// Uses primary level 180 /// do_hash(char_type const * b,char_type const * e) const181 virtual long do_hash(char_type const *b,char_type const *e) const 182 { 183 return do_hash(identical,b,e); 184 } 185 186 /// 187 /// Actual function that performs comparison between the strings. For details see compare member function. Can be overridden. 188 /// 189 virtual int do_compare( level_type level, 190 char_type const *b1,char_type const *e1, 191 char_type const *b2,char_type const *e2) const = 0; 192 /// 193 /// Actual function that performs transformation. For details see transform member function. Can be overridden. 194 /// 195 virtual string_type do_transform(level_type level,char_type const *b,char_type const *e) const = 0; 196 /// 197 /// Actual function that calculates hash. For details see hash member function. Can be overridden. 198 /// 199 virtual long do_hash(level_type level,char_type const *b,char_type const *e) const = 0; 200 201 202 }; 203 204 /// 205 /// \brief This class can be used in STL algorithms and containers for comparison of strings 206 /// with a level other than primary 207 /// 208 /// For example: 209 /// 210 /// \code 211 /// std::map<std::string,std::string,comparator<char,collator_base::secondary> > data; 212 /// \endcode 213 /// 214 /// Would create a map the keys of which are sorted using secondary collation level 215 /// 216 template<typename CharType,collator_base::level_type default_level = collator_base::identical> 217 struct comparator 218 { 219 public: 220 /// 221 /// Create a comparator class for locale \a l and with collation leval \a level 222 /// 223 /// \note throws std::bad_cast if l does not have \ref collator facet installed 224 /// comparatorboost::locale::comparator225 comparator(std::locale const &l=std::locale(),collator_base::level_type level=default_level) : 226 locale_(l), 227 level_(level) 228 { 229 } 230 231 /// 232 /// Compare two strings -- equivalent to return left < right according to collation rules 233 /// operator ()boost::locale::comparator234 bool operator()(std::basic_string<CharType> const &left,std::basic_string<CharType> const &right) const 235 { 236 return std::use_facet<collator<CharType> >(locale_).compare(level_,left,right) < 0; 237 } 238 private: 239 std::locale locale_; 240 collator_base::level_type level_; 241 }; 242 243 244 /// 245 ///@} 246 /// 247 248 } // locale 249 } // boost 250 251 #ifdef BOOST_MSVC 252 #pragma warning(pop) 253 #endif 254 255 256 #endif 257 /// 258 /// \example collate.cpp 259 /// Example of using collation functions 260 /// 261 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 262