1 // 2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See 5 // accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt) 7 // 8 #ifndef BOOST_LOCALE_CONVERTER_HPP_INCLUDED 9 #define BOOST_LOCALE_CONVERTER_HPP_INCLUDED 10 11 #include <boost/locale/config.hpp> 12 #ifdef BOOST_MSVC 13 # pragma warning(push) 14 # pragma warning(disable : 4275 4251 4231 4660) 15 #endif 16 #include <locale> 17 18 19 namespace boost { 20 namespace locale { 21 22 /// 23 /// \defgroup convert Text Conversions 24 /// 25 /// This module provides various function for string manipulation like Unicode normalization, case conversion etc. 26 /// @{ 27 /// 28 29 30 /// 31 /// \brief This class provides base flags for text manipulation. It is used as base for converter facet. 32 /// 33 class converter_base { 34 public: 35 /// 36 /// The flag used for facet - the type of operation to perform 37 /// 38 typedef enum { 39 normalization, ///< Apply Unicode normalization on the text 40 upper_case, ///< Convert text to upper case 41 lower_case, ///< Convert text to lower case 42 case_folding, ///< Fold case in the text 43 title_case ///< Convert text to title case 44 } conversion_type; 45 }; 46 47 template<typename CharType> 48 class converter; 49 50 #ifdef BOOST_LOCALE_DOXYGEN 51 /// 52 /// \brief The facet that implements text manipulation 53 /// 54 /// It is used to performs text conversion operations defined by \ref conversion_type. It is specialized 55 /// for four types of characters \c char, \c wchar_t, \c char16_t, \c char32_t 56 /// 57 template<typename Char> 58 class BOOST_LOCALE_DECL converter: public converter_base, public std::locale::facet { 59 public: 60 /// Locale identification 61 static std::locale::id id; 62 63 /// Standard constructor converter(size_t refs=0)64 converter(size_t refs = 0) : std::locale::facet(refs) 65 { 66 } 67 /// 68 /// Convert text in range [\a begin, \a end) according to conversion method \a how. Parameter 69 /// \a flags is used for specification of normalization method like nfd, nfc etc. 70 /// 71 virtual std::basic_string<Char> convert(conversion_type how,Char const *begin,Char const *end,int flags = 0) const = 0; 72 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER) __get_id(void) const73 std::locale::id& __get_id (void) const { return id; } 74 #endif 75 }; 76 #else 77 78 template<> 79 class BOOST_LOCALE_DECL converter<char> : public converter_base, public std::locale::facet { 80 public: 81 static std::locale::id id; 82 converter(size_t refs=0)83 converter(size_t refs = 0) : std::locale::facet(refs) 84 { 85 } 86 virtual std::string convert(conversion_type how,char const *begin,char const *end,int flags = 0) const = 0; 87 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER) __get_id(void) const88 std::locale::id& __get_id (void) const { return id; } 89 #endif 90 }; 91 92 template<> 93 class BOOST_LOCALE_DECL converter<wchar_t> : public converter_base, public std::locale::facet { 94 public: 95 static std::locale::id id; converter(size_t refs=0)96 converter(size_t refs = 0) : std::locale::facet(refs) 97 { 98 } 99 virtual std::wstring convert(conversion_type how,wchar_t const *begin,wchar_t const *end,int flags = 0) const = 0; 100 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER) __get_id(void) const101 std::locale::id& __get_id (void) const { return id; } 102 #endif 103 }; 104 105 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T 106 template<> 107 class BOOST_LOCALE_DECL converter<char16_t> : public converter_base, public std::locale::facet { 108 public: 109 static std::locale::id id; converter(size_t refs=0)110 converter(size_t refs = 0) : std::locale::facet(refs) 111 { 112 } 113 virtual std::u16string convert(conversion_type how,char16_t const *begin,char16_t const *end,int flags = 0) const = 0; 114 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER) __get_id(void) const115 std::locale::id& __get_id (void) const { return id; } 116 #endif 117 }; 118 #endif 119 120 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T 121 template<> 122 class BOOST_LOCALE_DECL converter<char32_t> : public converter_base, public std::locale::facet { 123 public: 124 static std::locale::id id; converter(size_t refs=0)125 converter(size_t refs = 0) : std::locale::facet(refs) 126 { 127 } 128 virtual std::u32string convert(conversion_type how,char32_t const *begin,char32_t const *end,int flags = 0) const = 0; 129 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER) __get_id(void) const130 std::locale::id& __get_id (void) const { return id; } 131 #endif 132 }; 133 #endif 134 135 #endif 136 137 /// 138 /// The type that defined <a href="http://unicode.org/reports/tr15/#Norm_Forms">normalization form</a> 139 /// 140 141 typedef enum { 142 norm_nfd, ///< Canonical decomposition 143 norm_nfc, ///< Canonical decomposition followed by canonical composition 144 norm_nfkd, ///< Compatibility decomposition 145 norm_nfkc, ///< Compatibility decomposition followed by canonical composition. 146 norm_default = norm_nfc, ///< Default normalization - canonical decomposition followed by canonical composition 147 } norm_type; 148 149 /// 150 /// Normalize Unicode string \a str according to \ref norm_type "normalization form" \a n 151 /// 152 /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take 153 /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside 154 /// of a Unicode character set. 155 /// 156 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 157 /// 158 template<typename CharType> normalize(std::basic_string<CharType> const & str,norm_type n=norm_default,std::locale const & loc=std::locale ())159 std::basic_string<CharType> normalize(std::basic_string<CharType> const &str,norm_type n=norm_default,std::locale const &loc=std::locale()) 160 { 161 return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,str.data(),str.data() + str.size(),n); 162 } 163 164 /// 165 /// Normalize NUL terminated Unicode string \a str according to \ref norm_type "normalization form" \a n 166 /// 167 /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take 168 /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside 169 /// of a Unicode character set. 170 /// 171 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 172 /// 173 template<typename CharType> normalize(CharType const * str,norm_type n=norm_default,std::locale const & loc=std::locale ())174 std::basic_string<CharType> normalize(CharType const *str,norm_type n=norm_default,std::locale const &loc=std::locale()) 175 { 176 CharType const *end=str; 177 while(*end) 178 end++; 179 return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,str,end,n); 180 } 181 182 /// 183 /// Normalize Unicode string in range [begin,end) according to \ref norm_type "normalization form" \a n 184 /// 185 /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take 186 /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside 187 /// of a Unicode character set. 188 /// 189 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 190 /// 191 template<typename CharType> normalize(CharType const * begin,CharType const * end,norm_type n=norm_default,std::locale const & loc=std::locale ())192 std::basic_string<CharType> normalize( CharType const *begin, 193 CharType const *end, 194 norm_type n=norm_default, 195 std::locale const &loc=std::locale()) 196 { 197 return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,begin,end,n); 198 } 199 200 /////////////////////////////////////////////////// 201 202 /// 203 /// Convert a string \a str to upper case according to locale \a loc 204 /// 205 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 206 /// 207 208 template<typename CharType> to_upper(std::basic_string<CharType> const & str,std::locale const & loc=std::locale ())209 std::basic_string<CharType> to_upper(std::basic_string<CharType> const &str,std::locale const &loc=std::locale()) 210 { 211 return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,str.data(),str.data()+str.size()); 212 } 213 214 /// 215 /// Convert a NUL terminated string \a str to upper case according to locale \a loc 216 /// 217 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 218 /// 219 template<typename CharType> to_upper(CharType const * str,std::locale const & loc=std::locale ())220 std::basic_string<CharType> to_upper(CharType const *str,std::locale const &loc=std::locale()) 221 { 222 CharType const *end=str; 223 while(*end) 224 end++; 225 return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,str,end); 226 } 227 228 /// 229 /// Convert a string in range [begin,end) to upper case according to locale \a loc 230 /// 231 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 232 /// 233 template<typename CharType> to_upper(CharType const * begin,CharType const * end,std::locale const & loc=std::locale ())234 std::basic_string<CharType> to_upper(CharType const *begin,CharType const *end,std::locale const &loc=std::locale()) 235 { 236 return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,begin,end); 237 } 238 239 /////////////////////////////////////////////////// 240 241 /// 242 /// Convert a string \a str to lower case according to locale \a loc 243 /// 244 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 245 /// 246 247 template<typename CharType> to_lower(std::basic_string<CharType> const & str,std::locale const & loc=std::locale ())248 std::basic_string<CharType> to_lower(std::basic_string<CharType> const &str,std::locale const &loc=std::locale()) 249 { 250 return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,str.data(),str.data()+str.size()); 251 } 252 253 /// 254 /// Convert a NUL terminated string \a str to lower case according to locale \a loc 255 /// 256 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 257 /// 258 template<typename CharType> to_lower(CharType const * str,std::locale const & loc=std::locale ())259 std::basic_string<CharType> to_lower(CharType const *str,std::locale const &loc=std::locale()) 260 { 261 CharType const *end=str; 262 while(*end) 263 end++; 264 return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,str,end); 265 } 266 267 /// 268 /// Convert a string in range [begin,end) to lower case according to locale \a loc 269 /// 270 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 271 /// 272 template<typename CharType> to_lower(CharType const * begin,CharType const * end,std::locale const & loc=std::locale ())273 std::basic_string<CharType> to_lower(CharType const *begin,CharType const *end,std::locale const &loc=std::locale()) 274 { 275 return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,begin,end); 276 } 277 /////////////////////////////////////////////////// 278 279 /// 280 /// Convert a string \a str to title case according to locale \a loc 281 /// 282 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 283 /// 284 285 template<typename CharType> to_title(std::basic_string<CharType> const & str,std::locale const & loc=std::locale ())286 std::basic_string<CharType> to_title(std::basic_string<CharType> const &str,std::locale const &loc=std::locale()) 287 { 288 return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,str.data(),str.data()+str.size()); 289 } 290 291 /// 292 /// Convert a NUL terminated string \a str to title case according to locale \a loc 293 /// 294 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 295 /// 296 template<typename CharType> to_title(CharType const * str,std::locale const & loc=std::locale ())297 std::basic_string<CharType> to_title(CharType const *str,std::locale const &loc=std::locale()) 298 { 299 CharType const *end=str; 300 while(*end) 301 end++; 302 return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,str,end); 303 } 304 305 /// 306 /// Convert a string in range [begin,end) to title case according to locale \a loc 307 /// 308 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 309 /// 310 template<typename CharType> to_title(CharType const * begin,CharType const * end,std::locale const & loc=std::locale ())311 std::basic_string<CharType> to_title(CharType const *begin,CharType const *end,std::locale const &loc=std::locale()) 312 { 313 return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,begin,end); 314 } 315 316 /////////////////////////////////////////////////// 317 318 /// 319 /// Fold case of a string \a str according to locale \a loc 320 /// 321 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 322 /// 323 324 template<typename CharType> fold_case(std::basic_string<CharType> const & str,std::locale const & loc=std::locale ())325 std::basic_string<CharType> fold_case(std::basic_string<CharType> const &str,std::locale const &loc=std::locale()) 326 { 327 return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,str.data(),str.data()+str.size()); 328 } 329 330 /// 331 /// Fold case of a NUL terminated string \a str according to locale \a loc 332 /// 333 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 334 /// 335 template<typename CharType> fold_case(CharType const * str,std::locale const & loc=std::locale ())336 std::basic_string<CharType> fold_case(CharType const *str,std::locale const &loc=std::locale()) 337 { 338 CharType const *end=str; 339 while(*end) 340 end++; 341 return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,str,end); 342 } 343 344 /// 345 /// Fold case of a string in range [begin,end) according to locale \a loc 346 /// 347 /// \note throws std::bad_cast if loc does not have \ref converter facet installed 348 /// 349 template<typename CharType> fold_case(CharType const * begin,CharType const * end,std::locale const & loc=std::locale ())350 std::basic_string<CharType> fold_case(CharType const *begin,CharType const *end,std::locale const &loc=std::locale()) 351 { 352 return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,begin,end); 353 } 354 355 /// 356 ///@} 357 /// 358 } // locale 359 360 } // boost 361 362 #ifdef BOOST_MSVC 363 #pragma warning(pop) 364 #endif 365 366 367 #endif 368 369 /// 370 /// \example conversions.cpp 371 /// 372 /// Example of using various text conversion functions. 373 /// 374 /// \example wconversions.cpp 375 /// 376 /// Example of using various text conversion functions with wide strings. 377 /// 378 379 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 380 381