1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #define BOOST_LOCALE_SOURCE
9 #include <boost/locale/encoding.hpp>
10 #include <boost/locale/encoding_errors.hpp>
11 #include "../encoding/conv.hpp"
12 #include "all_generator.hpp"
13 #include "uconv.hpp"
14 #include <unicode/ucnv.h>
15 #include <unicode/ucnv_err.h>
16 #include <boost/locale/util.hpp>
17 #include <boost/locale/hold_ptr.hpp>
18 #include "codecvt.hpp"
19 
20 #ifdef BOOST_MSVC
21 #  pragma warning(disable : 4244) // loose data
22 #endif
23 
24 #include "icu_util.hpp"
25 #include <vector>
26 namespace boost {
27 namespace locale {
28 namespace impl_icu {
29     class uconv_converter : public util::base_converter {
30     public:
31 
uconv_converter(std::string const & encoding)32         uconv_converter(std::string const &encoding) :
33             encoding_(encoding)
34         {
35             UErrorCode err=U_ZERO_ERROR;
36 
37             // No need to check err each time, this
38             // is how ICU works.
39             cvt_ = ucnv_open(encoding.c_str(),&err);
40             ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_STOP,0,0,0,&err);
41             ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_STOP,0,0,0,&err);
42 
43             if(!cvt_ || U_FAILURE(err)) {
44                 if(cvt_)
45                     ucnv_close(cvt_);
46                 throw conv::invalid_charset_error(encoding);
47             }
48 
49             max_len_ = ucnv_getMaxCharSize(cvt_);
50         }
51 
~uconv_converter()52         virtual ~uconv_converter()
53         {
54             ucnv_close(cvt_);
55         }
56 
is_thread_safe() const57         virtual bool is_thread_safe() const
58         {
59             return false;
60         }
61 
clone() const62         virtual uconv_converter *clone() const
63         {
64             return new uconv_converter(encoding_);
65         }
66 
to_unicode(char const * & begin,char const * end)67         uint32_t to_unicode(char const *&begin,char const *end)
68         {
69             UErrorCode err=U_ZERO_ERROR;
70             char const *tmp = begin;
71             UChar32 c=ucnv_getNextUChar(cvt_,&tmp,end,&err);
72             ucnv_reset(cvt_);
73             if(err == U_TRUNCATED_CHAR_FOUND) {
74                 return incomplete;
75             }
76             if(U_FAILURE(err)) {
77                 return illegal;
78             }
79 
80             begin = tmp;
81             return c;
82         }
83 
from_unicode(uint32_t u,char * begin,char const * end)84         uint32_t from_unicode(uint32_t u,char *begin,char const *end)
85         {
86             UChar code_point[2]={0};
87             int len;
88             if(u<=0xFFFF) {
89                 if(0xD800 <=u && u<= 0xDFFF) // No surragates
90                     return illegal;
91                 code_point[0]=u;
92                 len=1;
93             }
94             else {
95                 u-=0x10000;
96                 code_point[0]=0xD800 | (u>>10);
97                 code_point[1]=0xDC00 | (u & 0x3FF);
98                 len=2;
99             }
100             UErrorCode err=U_ZERO_ERROR;
101             int olen = ucnv_fromUChars(cvt_,begin,end-begin,code_point,len,&err);
102             ucnv_reset(cvt_);
103             if(err == U_BUFFER_OVERFLOW_ERROR)
104                 return incomplete;
105             if(U_FAILURE(err))
106                 return illegal;
107             return olen;
108         }
109 
max_len() const110         virtual int max_len() const
111         {
112             return max_len_;
113         }
114 
115     private:
116         std::string encoding_;
117         UConverter *cvt_;
118         int max_len_;
119     };
120 
create_uconv_converter(std::string const & encoding)121     util::base_converter *create_uconv_converter(std::string const &encoding)
122     {
123         hold_ptr<util::base_converter> cvt;
124         try {
125             cvt.reset(new uconv_converter(encoding));
126         }
127         catch(std::exception const &/*e*/)
128         {
129             // no encoding so we return empty pointer
130         }
131         return cvt.release();
132     }
133 
create_codecvt(std::locale const & in,std::string const & encoding,character_facet_type type)134     std::locale create_codecvt(std::locale const &in,std::string const &encoding,character_facet_type type)
135     {
136         if(conv::impl::normalize_encoding(encoding.c_str())=="utf8")
137             return util::create_utf8_codecvt(in,type);
138 
139         try {
140             return util::create_simple_codecvt(in,encoding,type);
141         }
142         catch(boost::locale::conv::invalid_charset_error const &) {
143             hold_ptr<util::base_converter> cvt;
144             try {
145                 cvt.reset(create_uconv_converter(encoding));
146             }
147             catch(std::exception const &/*e*/)
148             {
149                 cvt.reset(new util::base_converter());
150             }
151             return util::create_codecvt_from_pointer(in,cvt.release(),type);
152         }
153     }
154 
155 } // impl_icu
156 } // locale
157 } // boost
158 
159 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
160