1 // 2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See 5 // accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt) 7 // 8 #ifndef BOOST_SRC_LOCALE_ICU_UCONV_HPP 9 #define BOOST_SRC_LOCALE_ICU_UCONV_HPP 10 #include <unicode/unistr.h> 11 #include <unicode/ucnv.h> 12 #include <unicode/ustring.h> 13 #include <unicode/utf.h> 14 #include <unicode/utf16.h> 15 16 #include <boost/locale/encoding.hpp> 17 18 #include <string> 19 #include <memory> 20 #include "icu_util.hpp" 21 22 namespace boost { 23 namespace locale { 24 namespace impl_icu { 25 26 typedef enum { 27 cvt_skip, 28 cvt_stop 29 } cpcvt_type; 30 31 32 template<typename CharType,int char_size = sizeof(CharType) > 33 class icu_std_converter { 34 public: 35 typedef CharType char_type; 36 typedef std::basic_string<char_type> string_type; 37 38 icu_std_converter(std::string charset,cpcvt_type cv=cvt_skip); 39 icu::UnicodeString icu(char_type const *begin,char_type const *end) const; 40 string_type std(icu::UnicodeString const &str) const; 41 size_t cut(icu::UnicodeString const &str,char_type const *begin,char_type const *end,size_t n,size_t from_u=0,size_t from_c=0) const; 42 }; 43 44 template<typename CharType> 45 class icu_std_converter<CharType,1> { 46 public: 47 typedef CharType char_type; 48 typedef std::basic_string<char_type> string_type; 49 50 icu_checked(char_type const * vb,char_type const * ve) const51 icu::UnicodeString icu_checked(char_type const *vb,char_type const *ve) const 52 { 53 return icu(vb,ve); // Already done 54 } icu(char_type const * vb,char_type const * ve) const55 icu::UnicodeString icu(char_type const *vb,char_type const *ve) const 56 { 57 char const *begin=reinterpret_cast<char const *>(vb); 58 char const *end=reinterpret_cast<char const *>(ve); 59 uconv cvt(charset_,cvt_type_); 60 UErrorCode err=U_ZERO_ERROR; 61 icu::UnicodeString tmp(begin,end-begin,cvt.cvt(),err); 62 check_and_throw_icu_error(err); 63 return tmp; 64 } 65 std(icu::UnicodeString const & str) const66 string_type std(icu::UnicodeString const &str) const 67 { 68 uconv cvt(charset_,cvt_type_); 69 return cvt.go(str.getBuffer(),str.length(),max_len_); 70 } 71 icu_std_converter(std::string charset,cpcvt_type cvt_type=cvt_skip)72 icu_std_converter(std::string charset,cpcvt_type cvt_type = cvt_skip) : 73 charset_(charset), 74 cvt_type_(cvt_type) 75 { 76 uconv cvt(charset_,cvt_type); 77 max_len_=cvt.max_char_size(); 78 } 79 cut(icu::UnicodeString const & str,char_type const * begin,char_type const * end,size_t n,size_t from_u=0,size_t from_char=0) const80 size_t cut(icu::UnicodeString const &str,char_type const *begin,char_type const *end, 81 size_t n,size_t from_u=0,size_t from_char=0) const 82 { 83 size_t code_points = str.countChar32(from_u,n); 84 uconv cvt(charset_,cvt_type_); 85 return cvt.cut(code_points,begin+from_char,end); 86 } 87 88 struct uconv { 89 uconv(uconv const &other); 90 void operator=(uconv const &other); 91 public: uconvboost::locale::impl_icu::icu_std_converter::uconv92 uconv(std::string const &charset,cpcvt_type cvt_type=cvt_skip) 93 { 94 UErrorCode err=U_ZERO_ERROR; 95 cvt_ = ucnv_open(charset.c_str(),&err); 96 if(!cvt_ || U_FAILURE(err)) { 97 if(cvt_) 98 ucnv_close(cvt_); 99 throw conv::invalid_charset_error(charset); 100 } 101 102 try { 103 if(cvt_type==cvt_skip) { 104 ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_SKIP,0,0,0,&err); 105 check_and_throw_icu_error(err); 106 107 err=U_ZERO_ERROR; 108 ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_SKIP,0,0,0,&err); 109 check_and_throw_icu_error(err); 110 } 111 else { 112 ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_STOP,0,0,0,&err); 113 check_and_throw_icu_error(err); 114 115 err=U_ZERO_ERROR; 116 ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_STOP,0,0,0,&err); 117 check_and_throw_icu_error(err); 118 } 119 } 120 catch(...) { ucnv_close(cvt_) ; throw; } 121 } 122 max_char_sizeboost::locale::impl_icu::icu_std_converter::uconv123 int max_char_size() 124 { 125 return ucnv_getMaxCharSize(cvt_); 126 } 127 goboost::locale::impl_icu::icu_std_converter::uconv128 string_type go(UChar const *buf,int length,int max_size) 129 { 130 string_type res; 131 res.resize(UCNV_GET_MAX_BYTES_FOR_STRING(length,max_size)); 132 char *ptr=reinterpret_cast<char *>(&res[0]); 133 UErrorCode err=U_ZERO_ERROR; 134 int n = ucnv_fromUChars(cvt_,ptr,res.size(),buf,length,&err); 135 check_and_throw_icu_error(err); 136 res.resize(n); 137 return res; 138 } 139 cutboost::locale::impl_icu::icu_std_converter::uconv140 size_t cut(size_t n,char_type const *begin,char_type const *end) 141 { 142 char_type const *saved = begin; 143 while(n > 0 && begin < end) { 144 UErrorCode err=U_ZERO_ERROR; 145 ucnv_getNextUChar(cvt_,&begin,end,&err); 146 if(U_FAILURE(err)) 147 return 0; 148 n--; 149 } 150 return begin - saved; 151 } 152 cvtboost::locale::impl_icu::icu_std_converter::uconv153 UConverter *cvt() { return cvt_; } 154 ~uconvboost::locale::impl_icu::icu_std_converter::uconv155 ~uconv() 156 { 157 ucnv_close(cvt_); 158 } 159 160 private: 161 UConverter *cvt_; 162 }; 163 164 private: 165 int max_len_; 166 std::string charset_; 167 cpcvt_type cvt_type_; 168 }; 169 170 template<typename CharType> 171 class icu_std_converter<CharType,2> { 172 public: 173 typedef CharType char_type; 174 typedef std::basic_string<char_type> string_type; 175 176 icu_checked(char_type const * begin,char_type const * end) const177 icu::UnicodeString icu_checked(char_type const *begin,char_type const *end) const 178 { 179 icu::UnicodeString tmp(end-begin,0,0); // make inital capacity 180 while(begin!=end) { 181 UChar cl = *begin++; 182 if(U16_IS_SINGLE(cl)) 183 tmp.append(static_cast<UChar32>(cl)); 184 else if(U16_IS_LEAD(cl)) { 185 if(begin==end) { 186 throw_if_needed(); 187 } 188 else { 189 UChar ct=*begin++; 190 if(!U16_IS_TRAIL(ct)) 191 throw_if_needed(); 192 else { 193 UChar32 c=U16_GET_SUPPLEMENTARY(cl,ct); 194 tmp.append(c); 195 } 196 } 197 } 198 else 199 throw_if_needed(); 200 } 201 return tmp; 202 } throw_if_needed() const203 void throw_if_needed() const 204 { 205 if(mode_ == cvt_stop) 206 throw conv::conversion_error(); 207 } icu(char_type const * vb,char_type const * ve) const208 icu::UnicodeString icu(char_type const *vb,char_type const *ve) const 209 { 210 UChar const *begin=reinterpret_cast<UChar const *>(vb); 211 UChar const *end=reinterpret_cast<UChar const *>(ve); 212 icu::UnicodeString tmp(begin,end-begin); 213 return tmp; 214 215 } 216 std(icu::UnicodeString const & str) const217 string_type std(icu::UnicodeString const &str) const 218 { 219 char_type const *ptr=reinterpret_cast<char_type const *>(str.getBuffer()); 220 return string_type(ptr,str.length()); 221 } cut(icu::UnicodeString const &,char_type const *,char_type const *,size_t n,size_t=0,size_t=0) const222 size_t cut(icu::UnicodeString const &/*str*/,char_type const * /*begin*/,char_type const * /*end*/,size_t n, 223 size_t /*from_u*/=0,size_t /*from_c*/=0) const 224 { 225 return n; 226 } 227 icu_std_converter(std::string,cpcvt_type mode=cvt_skip)228 icu_std_converter(std::string /*charset*/,cpcvt_type mode=cvt_skip) : 229 mode_(mode) 230 { 231 } 232 private: 233 cpcvt_type mode_; 234 235 }; 236 237 template<typename CharType> 238 class icu_std_converter<CharType,4> { 239 public: 240 241 typedef CharType char_type; 242 typedef std::basic_string<char_type> string_type; 243 icu_checked(char_type const * begin,char_type const * end) const244 icu::UnicodeString icu_checked(char_type const *begin,char_type const *end) const 245 { 246 icu::UnicodeString tmp(end-begin,0,0); // make inital capacity 247 while(begin!=end) { 248 UChar32 c = static_cast<UChar32>(*begin++); 249 if(U_IS_UNICODE_CHAR(c)) 250 tmp.append(c); 251 else 252 throw_if_needed(); 253 } 254 return tmp; 255 } throw_if_needed() const256 void throw_if_needed() const 257 { 258 if(mode_ == cvt_stop) 259 throw conv::conversion_error(); 260 } 261 icu(char_type const * begin,char_type const * end) const262 icu::UnicodeString icu(char_type const *begin,char_type const *end) const 263 { 264 icu::UnicodeString tmp(end-begin,0,0); // make inital capacity 265 while(begin!=end) { 266 UChar32 c=static_cast<UChar32>(*begin++); 267 tmp.append(c); 268 } 269 return tmp; 270 271 } 272 std(icu::UnicodeString const & str) const273 string_type std(icu::UnicodeString const &str) const 274 { 275 string_type tmp; 276 tmp.resize(str.length()); 277 UChar32 *ptr=reinterpret_cast<UChar32 *>(&tmp[0]); 278 279 #ifdef __SUNPRO_CC 280 int len=0; 281 #else 282 ::int32_t len=0; 283 #endif 284 285 UErrorCode code=U_ZERO_ERROR; 286 u_strToUTF32(ptr,tmp.size(),&len,str.getBuffer(),str.length(),&code); 287 288 check_and_throw_icu_error(code); 289 290 tmp.resize(len); 291 292 return tmp; 293 } 294 cut(icu::UnicodeString const & str,char_type const *,char_type const *,size_t n,size_t from_u=0,size_t=0) const295 size_t cut(icu::UnicodeString const &str,char_type const * /*begin*/,char_type const * /*end*/,size_t n, 296 size_t from_u=0,size_t /*from_c*/=0) const 297 { 298 return str.countChar32(from_u,n); 299 } 300 icu_std_converter(std::string,cpcvt_type mode=cvt_skip)301 icu_std_converter(std::string /*charset*/,cpcvt_type mode=cvt_skip) : 302 mode_(mode) 303 { 304 } 305 private: 306 cpcvt_type mode_; 307 308 }; 309 } /// impl_icu 310 } // locale 311 } // boost 312 313 #endif 314 315 316 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 317