1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #define BOOST_LOCALE_SOURCE
9 #include <boost/locale/conversion.hpp>
10 #include "all_generator.hpp"
11 #include <unicode/normlzr.h>
12 #include <unicode/ustring.h>
13 #include <unicode/locid.h>
14 #include <unicode/uversion.h>
15 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 308
16 #include <unicode/ucasemap.h>
17 #define WITH_CASE_MAPS
18 #endif
19 
20 
21 #include "cdata.hpp"
22 #include "uconv.hpp"
23 
24 #include <vector>
25 
26 namespace boost {
27 namespace locale {
28 namespace impl_icu {
29 
30 
31     namespace {
normalize_string(icu::UnicodeString & str,int flags)32         void normalize_string(icu::UnicodeString &str,int flags)
33         {
34             UErrorCode code=U_ZERO_ERROR;
35             UNormalizationMode mode=UNORM_DEFAULT;
36             switch(flags) {
37             case norm_nfd:
38                 mode=UNORM_NFD;
39                 break;
40             case norm_nfc:
41                 mode=UNORM_NFC;
42                 break;
43             case norm_nfkd:
44                 mode=UNORM_NFKD;
45                 break;
46             case norm_nfkc:
47                 mode=UNORM_NFKC;
48                 break;
49             }
50             icu::UnicodeString tmp;
51             icu::Normalizer::normalize(str,mode,0,tmp,code);
52 
53             check_and_throw_icu_error(code);
54 
55             str=tmp;
56         }
57     }
58 
59 
60     template<typename CharType>
61     class converter_impl : public converter<CharType> {
62     public:
63         typedef CharType char_type;
64         typedef std::basic_string<char_type> string_type;
65 
converter_impl(cdata const & d)66         converter_impl(cdata const &d) :
67             locale_(d.locale),
68             encoding_(d.encoding)
69         {
70         }
71 
convert(converter_base::conversion_type how,char_type const * begin,char_type const * end,int flags=0) const72         virtual string_type convert(converter_base::conversion_type how,char_type const *begin,char_type const *end,int flags = 0) const
73         {
74             icu_std_converter<char_type> cvt(encoding_);
75             icu::UnicodeString str=cvt.icu(begin,end);
76             switch(how) {
77             case converter_base::normalization:
78                 normalize_string(str,flags);
79                 break;
80             case converter_base::upper_case:
81                 str.toUpper(locale_);
82                 break;
83             case converter_base::lower_case:
84                 str.toLower(locale_);
85                 break;
86             case converter_base::title_case:
87                 str.toTitle(0,locale_);
88                 break;
89             case converter_base::case_folding:
90                 str.foldCase();
91                 break;
92             default:
93                 ;
94             }
95             return cvt.std(str);
96         }
97 
98     private:
99         icu::Locale locale_;
100         std::string encoding_;
101     }; // converter_impl
102 
103     #ifdef WITH_CASE_MAPS
104     class raii_casemap {
105         raii_casemap(raii_casemap const &);
106         void operator = (raii_casemap const&);
107     public:
raii_casemap(std::string const & locale_id)108         raii_casemap(std::string const &locale_id) :
109             map_(0)
110         {
111             UErrorCode err=U_ZERO_ERROR;
112             map_ = ucasemap_open(locale_id.c_str(),0,&err);
113             check_and_throw_icu_error(err);
114             if(!map_)
115                 throw std::runtime_error("Failed to create UCaseMap");
116         }
117         template<typename Conv>
convert(Conv func,char const * begin,char const * end) const118         std::string convert(Conv func,char const *begin,char const *end) const
119         {
120                 std::vector<char> buf((end-begin)*11/10+1);
121                 UErrorCode err=U_ZERO_ERROR;
122                 int size = func(map_,&buf.front(),buf.size(),begin,end-begin,&err);
123                 if(err == U_BUFFER_OVERFLOW_ERROR) {
124                     err = U_ZERO_ERROR;
125                     buf.resize(size+1);
126                     size = func(map_,&buf.front(),buf.size(),begin,end-begin,&err);
127                 }
128                 check_and_throw_icu_error(err);
129                 return std::string(&buf.front(),size);
130         }
~raii_casemap()131         ~raii_casemap()
132         {
133             ucasemap_close(map_);
134         }
135     private:
136         UCaseMap *map_;
137     };
138 
139     class utf8_converter_impl : public converter<char> {
140     public:
141 
utf8_converter_impl(cdata const & d)142         utf8_converter_impl(cdata const &d) :
143             locale_id_(d.locale.getName()),
144             map_(locale_id_)
145         {
146         }
147 
convert(converter_base::conversion_type how,char const * begin,char const * end,int flags=0) const148         virtual std::string convert(converter_base::conversion_type how,char const *begin,char const *end,int flags = 0) const
149         {
150 
151             if(how == converter_base::normalization) {
152                 icu_std_converter<char> cvt("UTF-8");
153                 icu::UnicodeString str=cvt.icu(begin,end);
154                 normalize_string(str,flags);
155                 return cvt.std(str);
156             }
157 
158             switch(how)
159             {
160             case converter_base::upper_case:
161                 return map_.convert(ucasemap_utf8ToUpper,begin,end);
162             case converter_base::lower_case:
163                 return map_.convert(ucasemap_utf8ToLower,begin,end);
164             case converter_base::title_case:
165                 {
166                     // Non-const method, so need to create a separate map
167                     raii_casemap map(locale_id_);
168                     return map.convert(ucasemap_utf8ToTitle,begin,end);
169                 }
170             case converter_base::case_folding:
171                 return map_.convert(ucasemap_utf8FoldCase,begin,end);
172             default:
173                 return std::string(begin,end-begin);
174             }
175         }
176     private:
177         std::string locale_id_;
178         raii_casemap map_;
179     }; // converter_impl
180 
181 #endif // WITH_CASE_MAPS
182 
create_convert(std::locale const & in,cdata const & cd,character_facet_type type)183     std::locale create_convert(std::locale const &in,cdata const &cd,character_facet_type type)
184     {
185         switch(type) {
186         case char_facet:
187             #ifdef WITH_CASE_MAPS
188             if(cd.utf8)
189                 return std::locale(in,new utf8_converter_impl(cd));
190             #endif
191             return std::locale(in,new converter_impl<char>(cd));
192         case wchar_t_facet:
193             return std::locale(in,new converter_impl<wchar_t>(cd));
194         #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
195         case char16_t_facet:
196             return std::locale(in,new converter_impl<char16_t>(cd));
197         #endif
198         #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
199         case char32_t_facet:
200             return std::locale(in,new converter_impl<char32_t>(cd));
201         #endif
202         default:
203             return in;
204         }
205     }
206 
207 
208 } // impl_icu
209 } // locale
210 } // boost
211 
212 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
213