1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #ifndef BOOST_SRC_LOCALE_ICU_UCONV_HPP
9 #define BOOST_SRC_LOCALE_ICU_UCONV_HPP
10 #include <unicode/unistr.h>
11 #include <unicode/ucnv.h>
12 #include <unicode/ustring.h>
13 #include <unicode/utf.h>
14 #include <unicode/utf16.h>
15 
16 #include <boost/locale/encoding.hpp>
17 
18 #include <string>
19 #include <memory>
20 #include "icu_util.hpp"
21 
22 namespace boost {
23 namespace locale {
24 namespace impl_icu {
25 
26     typedef enum {
27         cvt_skip,
28         cvt_stop
29     } cpcvt_type;
30 
31 
32     template<typename CharType,int char_size = sizeof(CharType) >
33     class icu_std_converter {
34     public:
35         typedef CharType char_type;
36         typedef std::basic_string<char_type> string_type;
37 
38         icu_std_converter(std::string charset,cpcvt_type cv=cvt_skip);
39         icu::UnicodeString icu(char_type const *begin,char_type const *end) const;
40         string_type std(icu::UnicodeString const &str) const;
41         size_t cut(icu::UnicodeString const &str,char_type const *begin,char_type const *end,size_t n,size_t from_u=0,size_t from_c=0) const;
42     };
43 
44     template<typename CharType>
45     class icu_std_converter<CharType,1> {
46     public:
47         typedef CharType char_type;
48         typedef std::basic_string<char_type> string_type;
49 
50 
icu_checked(char_type const * vb,char_type const * ve) const51         icu::UnicodeString icu_checked(char_type const *vb,char_type const *ve) const
52         {
53             return icu(vb,ve); // Already done
54         }
icu(char_type const * vb,char_type const * ve) const55         icu::UnicodeString icu(char_type const *vb,char_type const *ve) const
56         {
57             char const *begin=reinterpret_cast<char const *>(vb);
58             char const *end=reinterpret_cast<char const *>(ve);
59             uconv cvt(charset_,cvt_type_);
60             UErrorCode err=U_ZERO_ERROR;
61             icu::UnicodeString tmp(begin,end-begin,cvt.cvt(),err);
62             check_and_throw_icu_error(err);
63             return tmp;
64         }
65 
std(icu::UnicodeString const & str) const66         string_type std(icu::UnicodeString const &str) const
67         {
68             uconv cvt(charset_,cvt_type_);
69             return cvt.go(str.getBuffer(),str.length(),max_len_);
70         }
71 
icu_std_converter(std::string charset,cpcvt_type cvt_type=cvt_skip)72         icu_std_converter(std::string charset,cpcvt_type cvt_type = cvt_skip) :
73             charset_(charset),
74             cvt_type_(cvt_type)
75         {
76             uconv cvt(charset_,cvt_type);
77             max_len_=cvt.max_char_size();
78         }
79 
cut(icu::UnicodeString const & str,char_type const * begin,char_type const * end,size_t n,size_t from_u=0,size_t from_char=0) const80         size_t cut(icu::UnicodeString const &str,char_type const *begin,char_type const *end,
81                         size_t n,size_t from_u=0,size_t from_char=0) const
82         {
83             size_t code_points = str.countChar32(from_u,n);
84             uconv cvt(charset_,cvt_type_);
85             return cvt.cut(code_points,begin+from_char,end);
86         }
87 
88         struct uconv  {
89             uconv(uconv const &other);
90             void operator=(uconv const &other);
91         public:
uconvboost::locale::impl_icu::icu_std_converter::uconv92             uconv(std::string const &charset,cpcvt_type cvt_type=cvt_skip)
93             {
94                 UErrorCode err=U_ZERO_ERROR;
95                 cvt_ = ucnv_open(charset.c_str(),&err);
96                 if(!cvt_ || U_FAILURE(err)) {
97                     if(cvt_)
98                         ucnv_close(cvt_);
99                     throw conv::invalid_charset_error(charset);
100                 }
101 
102                 try {
103                     if(cvt_type==cvt_skip) {
104                         ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_SKIP,0,0,0,&err);
105                         check_and_throw_icu_error(err);
106 
107                         err=U_ZERO_ERROR;
108                         ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_SKIP,0,0,0,&err);
109                         check_and_throw_icu_error(err);
110                     }
111                     else {
112                         ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_STOP,0,0,0,&err);
113                         check_and_throw_icu_error(err);
114 
115                         err=U_ZERO_ERROR;
116                         ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_STOP,0,0,0,&err);
117                         check_and_throw_icu_error(err);
118                     }
119                 }
120                 catch(...) { ucnv_close(cvt_) ; throw; }
121             }
122 
max_char_sizeboost::locale::impl_icu::icu_std_converter::uconv123             int max_char_size()
124             {
125                 return ucnv_getMaxCharSize(cvt_);
126             }
127 
goboost::locale::impl_icu::icu_std_converter::uconv128             string_type go(UChar const *buf,int length,int max_size)
129             {
130                 string_type res;
131                 res.resize(UCNV_GET_MAX_BYTES_FOR_STRING(length,max_size));
132                 char *ptr=reinterpret_cast<char *>(&res[0]);
133                 UErrorCode err=U_ZERO_ERROR;
134                 int n = ucnv_fromUChars(cvt_,ptr,res.size(),buf,length,&err);
135                 check_and_throw_icu_error(err);
136                 res.resize(n);
137                 return res;
138             }
139 
cutboost::locale::impl_icu::icu_std_converter::uconv140             size_t cut(size_t n,char_type const *begin,char_type const *end)
141             {
142                 char_type const *saved = begin;
143                 while(n > 0 && begin < end) {
144                     UErrorCode err=U_ZERO_ERROR;
145                     ucnv_getNextUChar(cvt_,&begin,end,&err);
146                     if(U_FAILURE(err))
147                         return 0;
148                     n--;
149                 }
150                 return begin - saved;
151             }
152 
cvtboost::locale::impl_icu::icu_std_converter::uconv153             UConverter *cvt() { return cvt_; }
154 
~uconvboost::locale::impl_icu::icu_std_converter::uconv155             ~uconv()
156             {
157                 ucnv_close(cvt_);
158             }
159 
160         private:
161             UConverter *cvt_;
162         };
163 
164     private:
165         int max_len_;
166         std::string charset_;
167         cpcvt_type cvt_type_;
168     };
169 
170     template<typename CharType>
171     class icu_std_converter<CharType,2> {
172     public:
173         typedef CharType char_type;
174         typedef std::basic_string<char_type> string_type;
175 
176 
icu_checked(char_type const * begin,char_type const * end) const177         icu::UnicodeString icu_checked(char_type const *begin,char_type const *end) const
178         {
179             icu::UnicodeString tmp(end-begin,0,0); // make inital capacity
180             while(begin!=end) {
181                 UChar cl = *begin++;
182                 if(U16_IS_SINGLE(cl))
183                     tmp.append(static_cast<UChar32>(cl));
184                 else if(U16_IS_LEAD(cl)) {
185                     if(begin==end) {
186                         throw_if_needed();
187                     }
188                     else {
189                         UChar ct=*begin++;
190                         if(!U16_IS_TRAIL(ct))
191                             throw_if_needed();
192                         else {
193                             UChar32 c=U16_GET_SUPPLEMENTARY(cl,ct);
194                             tmp.append(c);
195                         }
196                     }
197                 }
198                 else
199                     throw_if_needed();
200             }
201             return tmp;
202         }
throw_if_needed() const203         void throw_if_needed() const
204         {
205             if(mode_ == cvt_stop)
206                 throw conv::conversion_error();
207         }
icu(char_type const * vb,char_type const * ve) const208         icu::UnicodeString icu(char_type const *vb,char_type const *ve) const
209         {
210             UChar const *begin=reinterpret_cast<UChar const *>(vb);
211             UChar const *end=reinterpret_cast<UChar const *>(ve);
212             icu::UnicodeString tmp(begin,end-begin);
213             return tmp;
214 
215         }
216 
std(icu::UnicodeString const & str) const217         string_type std(icu::UnicodeString const &str) const
218         {
219             char_type const *ptr=reinterpret_cast<char_type const *>(str.getBuffer());
220             return string_type(ptr,str.length());
221         }
cut(icu::UnicodeString const &,char_type const *,char_type const *,size_t n,size_t=0,size_t=0) const222         size_t cut(icu::UnicodeString const &/*str*/,char_type const * /*begin*/,char_type const * /*end*/,size_t n,
223                         size_t /*from_u*/=0,size_t /*from_c*/=0) const
224         {
225             return n;
226         }
227 
icu_std_converter(std::string,cpcvt_type mode=cvt_skip)228         icu_std_converter(std::string /*charset*/,cpcvt_type mode=cvt_skip) :
229             mode_(mode)
230         {
231         }
232     private:
233         cpcvt_type mode_;
234 
235     };
236 
237     template<typename CharType>
238     class icu_std_converter<CharType,4> {
239     public:
240 
241         typedef CharType char_type;
242         typedef std::basic_string<char_type> string_type;
243 
icu_checked(char_type const * begin,char_type const * end) const244         icu::UnicodeString icu_checked(char_type const *begin,char_type const *end) const
245         {
246             icu::UnicodeString tmp(end-begin,0,0); // make inital capacity
247             while(begin!=end) {
248                 UChar32 c = static_cast<UChar32>(*begin++);
249                 if(U_IS_UNICODE_CHAR(c))
250                         tmp.append(c);
251                 else
252                     throw_if_needed();
253             }
254             return tmp;
255         }
throw_if_needed() const256         void throw_if_needed() const
257         {
258             if(mode_ == cvt_stop)
259                 throw conv::conversion_error();
260         }
261 
icu(char_type const * begin,char_type const * end) const262         icu::UnicodeString icu(char_type const *begin,char_type const *end) const
263         {
264             icu::UnicodeString tmp(end-begin,0,0); // make inital capacity
265             while(begin!=end) {
266                 UChar32 c=static_cast<UChar32>(*begin++);
267                 tmp.append(c);
268             }
269             return tmp;
270 
271         }
272 
std(icu::UnicodeString const & str) const273         string_type std(icu::UnicodeString const &str) const
274         {
275             string_type tmp;
276             tmp.resize(str.length());
277             UChar32 *ptr=reinterpret_cast<UChar32 *>(&tmp[0]);
278 
279             #ifdef __SUNPRO_CC
280             int len=0;
281             #else
282             ::int32_t len=0;
283             #endif
284 
285             UErrorCode code=U_ZERO_ERROR;
286             u_strToUTF32(ptr,tmp.size(),&len,str.getBuffer(),str.length(),&code);
287 
288             check_and_throw_icu_error(code);
289 
290             tmp.resize(len);
291 
292             return tmp;
293         }
294 
cut(icu::UnicodeString const & str,char_type const *,char_type const *,size_t n,size_t from_u=0,size_t=0) const295         size_t cut(icu::UnicodeString const &str,char_type const * /*begin*/,char_type const * /*end*/,size_t n,
296                 size_t from_u=0,size_t /*from_c*/=0) const
297         {
298             return str.countChar32(from_u,n);
299         }
300 
icu_std_converter(std::string,cpcvt_type mode=cvt_skip)301         icu_std_converter(std::string /*charset*/,cpcvt_type mode=cvt_skip) :
302             mode_(mode)
303         {
304         }
305     private:
306         cpcvt_type mode_;
307 
308     };
309 } /// impl_icu
310 } //  locale
311 } // boost
312 
313 #endif
314 
315 
316 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
317