1 // 2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See 5 // accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt) 7 // 8 #define BOOST_LOCALE_SOURCE 9 #include <boost/locale/conversion.hpp> 10 #include "all_generator.hpp" 11 #include <unicode/normlzr.h> 12 #include <unicode/ustring.h> 13 #include <unicode/locid.h> 14 #include <unicode/uversion.h> 15 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 308 16 #include <unicode/ucasemap.h> 17 #define WITH_CASE_MAPS 18 #endif 19 20 21 #include "cdata.hpp" 22 #include "uconv.hpp" 23 24 #include <vector> 25 26 namespace boost { 27 namespace locale { 28 namespace impl_icu { 29 30 31 namespace { normalize_string(icu::UnicodeString & str,int flags)32 void normalize_string(icu::UnicodeString &str,int flags) 33 { 34 UErrorCode code=U_ZERO_ERROR; 35 UNormalizationMode mode=UNORM_DEFAULT; 36 switch(flags) { 37 case norm_nfd: 38 mode=UNORM_NFD; 39 break; 40 case norm_nfc: 41 mode=UNORM_NFC; 42 break; 43 case norm_nfkd: 44 mode=UNORM_NFKD; 45 break; 46 case norm_nfkc: 47 mode=UNORM_NFKC; 48 break; 49 } 50 icu::UnicodeString tmp; 51 icu::Normalizer::normalize(str,mode,0,tmp,code); 52 53 check_and_throw_icu_error(code); 54 55 str=tmp; 56 } 57 } 58 59 60 template<typename CharType> 61 class converter_impl : public converter<CharType> { 62 public: 63 typedef CharType char_type; 64 typedef std::basic_string<char_type> string_type; 65 converter_impl(cdata const & d)66 converter_impl(cdata const &d) : 67 locale_(d.locale), 68 encoding_(d.encoding) 69 { 70 } 71 convert(converter_base::conversion_type how,char_type const * begin,char_type const * end,int flags=0) const72 virtual string_type convert(converter_base::conversion_type how,char_type const *begin,char_type const *end,int flags = 0) const 73 { 74 icu_std_converter<char_type> cvt(encoding_); 75 icu::UnicodeString str=cvt.icu(begin,end); 76 switch(how) { 77 case converter_base::normalization: 78 normalize_string(str,flags); 79 break; 80 case converter_base::upper_case: 81 str.toUpper(locale_); 82 break; 83 case converter_base::lower_case: 84 str.toLower(locale_); 85 break; 86 case converter_base::title_case: 87 str.toTitle(0,locale_); 88 break; 89 case converter_base::case_folding: 90 str.foldCase(); 91 break; 92 default: 93 ; 94 } 95 return cvt.std(str); 96 } 97 98 private: 99 icu::Locale locale_; 100 std::string encoding_; 101 }; // converter_impl 102 103 #ifdef WITH_CASE_MAPS 104 class raii_casemap { 105 raii_casemap(raii_casemap const &); 106 void operator = (raii_casemap const&); 107 public: raii_casemap(std::string const & locale_id)108 raii_casemap(std::string const &locale_id) : 109 map_(0) 110 { 111 UErrorCode err=U_ZERO_ERROR; 112 map_ = ucasemap_open(locale_id.c_str(),0,&err); 113 check_and_throw_icu_error(err); 114 if(!map_) 115 throw std::runtime_error("Failed to create UCaseMap"); 116 } 117 template<typename Conv> convert(Conv func,char const * begin,char const * end) const118 std::string convert(Conv func,char const *begin,char const *end) const 119 { 120 std::vector<char> buf((end-begin)*11/10+1); 121 UErrorCode err=U_ZERO_ERROR; 122 int size = func(map_,&buf.front(),buf.size(),begin,end-begin,&err); 123 if(err == U_BUFFER_OVERFLOW_ERROR) { 124 err = U_ZERO_ERROR; 125 buf.resize(size+1); 126 size = func(map_,&buf.front(),buf.size(),begin,end-begin,&err); 127 } 128 check_and_throw_icu_error(err); 129 return std::string(&buf.front(),size); 130 } ~raii_casemap()131 ~raii_casemap() 132 { 133 ucasemap_close(map_); 134 } 135 private: 136 UCaseMap *map_; 137 }; 138 139 class utf8_converter_impl : public converter<char> { 140 public: 141 utf8_converter_impl(cdata const & d)142 utf8_converter_impl(cdata const &d) : 143 locale_id_(d.locale.getName()), 144 map_(locale_id_) 145 { 146 } 147 convert(converter_base::conversion_type how,char const * begin,char const * end,int flags=0) const148 virtual std::string convert(converter_base::conversion_type how,char const *begin,char const *end,int flags = 0) const 149 { 150 151 if(how == converter_base::normalization) { 152 icu_std_converter<char> cvt("UTF-8"); 153 icu::UnicodeString str=cvt.icu(begin,end); 154 normalize_string(str,flags); 155 return cvt.std(str); 156 } 157 158 switch(how) 159 { 160 case converter_base::upper_case: 161 return map_.convert(ucasemap_utf8ToUpper,begin,end); 162 case converter_base::lower_case: 163 return map_.convert(ucasemap_utf8ToLower,begin,end); 164 case converter_base::title_case: 165 { 166 // Non-const method, so need to create a separate map 167 raii_casemap map(locale_id_); 168 return map.convert(ucasemap_utf8ToTitle,begin,end); 169 } 170 case converter_base::case_folding: 171 return map_.convert(ucasemap_utf8FoldCase,begin,end); 172 default: 173 return std::string(begin,end-begin); 174 } 175 } 176 private: 177 std::string locale_id_; 178 raii_casemap map_; 179 }; // converter_impl 180 181 #endif // WITH_CASE_MAPS 182 create_convert(std::locale const & in,cdata const & cd,character_facet_type type)183 std::locale create_convert(std::locale const &in,cdata const &cd,character_facet_type type) 184 { 185 switch(type) { 186 case char_facet: 187 #ifdef WITH_CASE_MAPS 188 if(cd.utf8) 189 return std::locale(in,new utf8_converter_impl(cd)); 190 #endif 191 return std::locale(in,new converter_impl<char>(cd)); 192 case wchar_t_facet: 193 return std::locale(in,new converter_impl<wchar_t>(cd)); 194 #ifdef BOOST_HAS_CHAR16_T 195 case char16_t_facet: 196 return std::locale(in,new converter_impl<char16_t>(cd)); 197 #endif 198 #ifdef BOOST_HAS_CHAR32_T 199 case char32_t_facet: 200 return std::locale(in,new converter_impl<char32_t>(cd)); 201 #endif 202 default: 203 return in; 204 } 205 } 206 207 208 } // impl_icu 209 } // locale 210 } // boost 211 212 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 213