1 //
2 //  Copyright (c) 2015 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #include <boost/locale/utf8_codecvt.hpp>
9 #include <locale>
10 #include <iostream>
11 #include <iomanip>
12 #include <string.h>
13 #include <wchar.h>
14 #include <memory.h>
15 #define BOOST_LOCALE_ERROR_LIMIT -1
16 #include "test_locale.hpp"
17 
18 static char const *utf8_name = "\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt";
19 static wchar_t const *wide_name = L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt";
20 
res(std::codecvt_base::result r)21 char const *res(std::codecvt_base::result r)
22 {
23     switch(r){
24     case std::codecvt_base::ok: return "ok";
25     case std::codecvt_base::partial: return "partial";
26     case std::codecvt_base::error: return "error";
27     case std::codecvt_base::noconv: return "noconv";
28     default:
29         return "error";
30     }
31 }
32 
33 typedef std::codecvt<wchar_t,char,std::mbstate_t> cvt_type;
34 
test_codecvt_in_n_m(cvt_type const & cvt,int n,int m)35 void test_codecvt_in_n_m(cvt_type const &cvt,int n,int m)
36 {
37     wchar_t const *wptr = wide_name;
38     int wlen = wcslen(wide_name);
39     int u8len = strlen(utf8_name);
40     char const *from = utf8_name;
41     char const *end = from;
42     char const *real_end = utf8_name + u8len;
43     char const *from_next = from;
44     std::mbstate_t mb=std::mbstate_t();
45     while(from_next < real_end) {
46         if(from == end) {
47             end = from + n;
48             if(end > real_end)
49                 end = real_end;
50         }
51 
52         wchar_t buf[128];
53         wchar_t *to = buf;
54         wchar_t *to_end = to + m;
55         wchar_t *to_next = to;
56 
57 
58         std::mbstate_t mb2 = mb;
59         std::codecvt_base::result r = cvt.in(mb,from,end,from_next,to,to_end,to_next);
60         //std::cout << "In from_size=" << (end-from) << " from move=" <<  (from_next - from) << " to move= " << to_next - to << " state = " << res(r) << std::endl;
61 
62         int count = cvt.length(mb2,from,end,to_end - to);
63         #ifndef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
64         TEST(memcmp(&mb,&mb2,sizeof(mb))==0);
65         if(count != from_next - from) {
66             std::cout << count << " " << from_next - from << std::endl;
67         }
68         TEST(count == from_next - from);
69         #else
70         TEST(count == to_next - to);
71         #endif
72 
73 
74         if(r == cvt_type::partial) {
75             end+=n;
76             if(end > real_end)
77                 end = real_end;
78         }
79         else
80             TEST(r == cvt_type::ok);
81         while(to!=to_next) {
82             TEST(*wptr == *to);
83             wptr++;
84             to++;
85         }
86         to=to_next;
87         from = from_next;
88     }
89     TEST(wptr == wide_name + wlen);
90     TEST(from == real_end);
91 
92 }
93 
test_codecvt_out_n_m(cvt_type const & cvt,int n,int m)94 void test_codecvt_out_n_m(cvt_type const &cvt,int n,int m)
95 {
96     char const *nptr = utf8_name;
97     int wlen = wcslen(wide_name);
98     int u8len = strlen(utf8_name);
99 
100     std::mbstate_t mb=std::mbstate_t();
101 
102     wchar_t const *from_next = wide_name;
103     wchar_t const *real_from_end = wide_name + wlen;
104 
105     char buf[256];
106     char *to = buf;
107     char *to_next = to;
108     char *to_end = to + n;
109     char *real_to_end = buf + sizeof(buf);
110 
111     while(from_next < real_from_end) {
112         wchar_t const *from = from_next;
113         wchar_t const *from_end = from + m;
114         if(from_end > real_from_end)
115             from_end = real_from_end;
116         if(to_end == to) {
117             to_end = to+n;
118         }
119 
120         std::codecvt_base::result r = cvt.out(mb,from,from_end,from_next,to,to_end,to_next);
121         //std::cout << "In from_size=" << (end-from) << " from move=" <<  (from_next - from) << " to move= " << to_next - to << " state = " << res(r) << std::endl;
122         if(r == cvt_type::partial) {
123             TEST(to_end - to_next < cvt.max_length());
124             to_end += n;
125             if(to_end > real_to_end)
126                 to_end = real_to_end;
127         }
128         else {
129             TEST(r == cvt_type::ok);
130         }
131 
132         while(to!=to_next) {
133             TEST(*nptr == *to);
134             nptr++;
135             to++;
136         }
137         from = from_next;
138     }
139     TEST(nptr == utf8_name + u8len);
140     TEST(from_next == real_from_end);
141     TEST(cvt.unshift(mb,to,to+n,to_next)==cvt_type::ok);
142     TEST(to_next == to);
143 
144 }
145 
146 
test_codecvt_conv()147 void test_codecvt_conv()
148 {
149     std::cout << "Conversions " << std::endl;
150     std::locale l(std::locale::classic(),new boost::locale::utf8_codecvt<wchar_t>());
151 
152     cvt_type const &cvt = std::use_facet<cvt_type>(l);
153 
154     TEST(cvt.max_length()==4);
155 
156     for(int i=1;i<=(int)strlen(utf8_name)+1;i++) {
157         for(int j=1;j<=(int)wcslen(wide_name)+1;j++) {
158             try {
159                 test_codecvt_in_n_m(cvt,i,j);
160                 test_codecvt_out_n_m(cvt,i,j);
161             }
162             catch(...) {
163                 std::cerr << "Wlen=" <<j << " Nlen=" << i << std::endl;
164                 throw;
165             }
166         }
167     }
168 }
169 
test_codecvt_err()170 void test_codecvt_err()
171 {
172     std::cout << "Errors " << std::endl;
173     std::locale l(std::locale::classic(),new boost::locale::utf8_codecvt<wchar_t>());
174 
175     cvt_type const &cvt = std::use_facet<cvt_type>(l);
176 
177     std::cout << "- UTF-8" << std::endl;
178     {
179 
180         wchar_t buf[2];
181         wchar_t *to=buf;
182         wchar_t *to_end = buf+2;
183         wchar_t *to_next = to;
184         char const *err_utf="1\xFF\xFF";
185         {
186             std::mbstate_t mb=std::mbstate_t();
187             char const *from=err_utf;
188             char const *from_end = from + strlen(from);
189             char const *from_next = from;
190             to_next = to;
191             TEST(cvt.in(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
192             TEST(from_next == from+1);
193             TEST(to_next == to + 1);
194             TEST(*to == '1');
195         }
196         err_utf++;
197         {
198             std::mbstate_t mb=std::mbstate_t();
199             char const *from=err_utf;
200             char const *from_end = from + strlen(from);
201             char const *from_next = from;
202             TEST(cvt.in(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
203             TEST(from_next == from);
204             TEST(to_next == to);
205         }
206     }
207 
208     std::cout << "- UTF-16/32" << std::endl;
209     {
210 
211         char buf[32];
212         char *to=buf;
213         char *to_end = buf+32;
214         char *to_next = to;
215         wchar_t err_buf[3] = { '1' , 0xDC9E }; // second surrogate not works both for UTF-16 and 32
216         wchar_t const *err_utf = err_buf;
217         {
218             std::mbstate_t mb=std::mbstate_t();
219             wchar_t const *from=err_utf;
220             wchar_t const *from_end = from + wcslen(from);
221             wchar_t const *from_next = from;
222             TEST(cvt.out(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
223             TEST(from_next == from+1);
224             TEST(to_next == to + 1);
225             TEST(*to == '1');
226         }
227         err_utf++;
228         {
229             std::mbstate_t mb=std::mbstate_t();
230             wchar_t const *from=err_utf;
231             wchar_t const *from_end = from + wcslen(from);
232             wchar_t const *from_next = from;
233             to_next = to;
234             TEST(cvt.out(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
235             TEST(from_next == from);
236             TEST(to_next == to);
237         }
238     }
239 
240 }
241 
242 
test_char_char()243 void test_char_char()
244 {
245     std::cout << "Char-char specialization"<<std::endl;
246     std::locale l(std::locale::classic(),new boost::locale::utf8_codecvt<char>());
247     std::codecvt<char,char,std::mbstate_t> const &cvt=std::use_facet<std::codecvt<char,char,std::mbstate_t> >(l);
248     std::mbstate_t mb=std::mbstate_t();
249     char const *from = "a";
250     char const *from_end = from+1;
251     char const *from_next = from;
252     char buf[2];
253     char *to = buf;
254     char *to_end = buf+1;
255     char *to_next = to;
256     TEST(cvt.always_noconv()==true);
257     TEST(cvt.in(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::noconv);
258     TEST(from_next == from);
259     TEST(to_next == to);
260     TEST(cvt.out(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::noconv);
261     TEST(from_next == from);
262     TEST(to_next == to);
263     TEST(cvt.encoding()==1);
264     TEST(cvt.max_length()==1);
265 }
266 
main()267 int main()
268 {
269     try {
270         test_codecvt_conv();
271         test_codecvt_err();
272         test_char_char();
273 
274     }
275     catch(std::exception const &e) {
276         std::cerr << "Failed : " << e.what() << std::endl;
277         return 1;
278     }
279     std::cout << "Ok" << std::endl;
280     return 0;
281 }
282 ///
283 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
284