1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 
9 #include <boost/locale/encoding.hpp>
10 #include <boost/locale/generator.hpp>
11 #include <boost/locale/localization_backend.hpp>
12 #include <boost/locale/info.hpp>
13 #include <boost/locale/config.hpp>
14 #include <fstream>
15 #include "test_locale.hpp"
16 #include "test_locale_tools.hpp"
17 
18 
19 #ifndef BOOST_LOCALE_NO_POSIX_BACKEND
20 # ifdef __APPLE__
21 #  include <xlocale.h>
22 #  endif
23 # include <locale.h>
24 #endif
25 
26 #if !defined(BOOST_LOCALE_WITH_ICU) && !defined(BOOST_LOCALE_WITH_ICONV) && (defined(BOOST_WINDOWS) || defined(__CYGWIN__))
27 #ifndef NOMINMAX
28 # define NOMINMAX
29 #endif
30 #include <windows.h>
31 #endif
32 
33 
34 bool test_iso;
35 bool test_iso_8859_8 = true;
36 bool test_utf;
37 bool test_sjis;
38 
39 std::string he_il_8bit;
40 std::string en_us_8bit;
41 std::string ja_jp_shiftjis;
42 
43 
44 template<typename Char>
read_file(std::basic_istream<Char> & in)45 std::basic_string<Char> read_file(std::basic_istream<Char> &in)
46 {
47     std::basic_string<Char> res;
48     Char c;
49     while(in.get(c))
50         res+=c;
51     return res;
52 }
53 
54 
55 template<typename Char>
test_ok(std::string file,std::locale const & l,std::basic_string<Char> cmp=std::basic_string<Char> ())56 void test_ok(std::string file,std::locale const &l,std::basic_string<Char> cmp=std::basic_string<Char>())
57 {
58     if(cmp.empty())
59         cmp=to<Char>(file);
60     std::ofstream test("testi.txt");
61     test << file;
62     test.close();
63     typedef std::basic_fstream<Char> stream_type;
64 
65     stream_type f1("testi.txt",stream_type::in);
66     f1.imbue(l);
67     TEST(read_file<Char>(f1) == cmp);
68     f1.close();
69 
70     stream_type f2("testo.txt",stream_type::out);
71     f2.imbue(l);
72     f2 << cmp;
73     f2.close();
74 
75     std::ifstream testo("testo.txt");
76     TEST(read_file<char>(testo) == file);
77 }
78 
79 template<typename Char>
test_rfail(std::string file,std::locale const & l,int pos)80 void test_rfail(std::string file,std::locale const &l,int pos)
81 {
82     std::ofstream test("testi.txt");
83     test << file;
84     test.close();
85     typedef std::basic_fstream<Char> stream_type;
86 
87     stream_type f1("testi.txt",stream_type::in);
88     f1.imbue(l);
89     Char c;
90     for(int i=0;i<pos;i++) {
91         f1.get(c);
92         if(f1.fail()) { // failed before as detected errors at forward;
93             return;
94         }
95         TEST(f1);
96     }
97     // if the pos above suceed, at this point
98     // it MUST fail
99     TEST(f1.get(c).fail());
100 }
101 
102 template<typename Char>
test_wfail(std::string file,std::locale const & l,int pos)103 void test_wfail(std::string file,std::locale const &l,int pos)
104 {
105     typedef std::basic_fstream<Char> stream_type;
106     stream_type f1("testo.txt",stream_type::out);
107     f1.imbue(l);
108     std::basic_string<Char> out=to<Char>(file);
109     int i;
110     for(i=0;i<pos;i++) {
111         f1 << out.at(i);
112         f1<<std::flush;
113         TEST(f1.good());
114     }
115     f1 << out.at(i);
116     TEST(f1.fail() || (f1<<std::flush).fail());
117 }
118 
119 
120 template<typename Char>
test_for_char()121 void test_for_char()
122 {
123     boost::locale::generator g;
124     if(test_utf) {
125         std::cout << "    UTF-8" << std::endl;
126         test_ok<Char>("grüße\nn i",g("en_US.UTF-8"));
127         test_rfail<Char>("abc\xFF\xFF",g("en_US.UTF-8"),3);
128         std::cout << "    Testing codepoints above 0xFFFF" << std::endl;
129         std::cout << "      Single U+2008A" << std::endl;
130         test_ok<Char>("\xf0\xa0\x82\x8a",g("en_US.UTF-8")); // U+2008A
131         std::cout << "      Single U+2008A withing text" << std::endl;
132         test_ok<Char>("abc\"\xf0\xa0\x82\x8a\"",g("en_US.UTF-8")); // U+2008A
133         std::string one = "\xf0\xa0\x82\x8a";
134         std::string res;
135         for(unsigned i=0;i<1000;i++)
136             res+=one;
137         std::cout << "      U+2008A x 1000" << std::endl;
138         test_ok<Char>(res.c_str(),g("en_US.UTF-8")); // U+2008A
139     }
140     else {
141         std::cout << "    UTF-8 Not supported " << std::endl;
142     }
143 
144     if(test_iso) {
145         if(test_iso_8859_8) {
146             std::cout << "    ISO8859-8" << std::endl;
147             test_ok<Char>("hello \xf9\xec\xe5\xed",g(he_il_8bit),to<Char>("hello שלום"));
148         }
149         std::cout << "    ISO8859-1" << std::endl;
150         test_ok<Char>(to<char>("grüße\nn i"),g(en_us_8bit),to<Char>("grüße\nn i"));
151         test_wfail<Char>("grüßen שלום",g(en_us_8bit),7);
152     }
153 
154     if(test_sjis) {
155         std::cout << "    Shift-JIS" << std::endl;
156         test_ok<Char>("\x93\xfa\x96\x7b",g(ja_jp_shiftjis),
157                 boost::locale::conv::to_utf<Char>("\xe6\x97\xa5\xe6\x9c\xac","UTF-8"));  // Japan
158     }
159 }
test_wide_io()160 void test_wide_io()
161 {
162     std::cout << "  wchar_t" << std::endl;
163     test_for_char<wchar_t>();
164 
165     #if defined BOOST_LOCALE_ENABLE_CHAR16_T && !defined(BOOST_NO_CHAR16_T_CODECVT)
166     std::cout << "  char16_t" << std::endl;
167     test_for_char<char16_t>();
168     #endif
169     #if defined BOOST_LOCALE_ENABLE_CHAR32_T && !defined(BOOST_NO_CHAR32_T_CODECVT)
170     std::cout << "  char32_t" << std::endl;
171     test_for_char<char32_t>();
172     #endif
173 }
174 
175 template<typename Char>
test_pos(std::string source,std::basic_string<Char> target,std::string encoding)176 void test_pos(std::string source,std::basic_string<Char> target,std::string encoding)
177 {
178     using namespace boost::locale::conv;
179     boost::locale::generator g;
180     std::locale l= encoding == "ISO8859-8" ? g("he_IL."+encoding) : g("en_US."+encoding);
181     TEST(to_utf<Char>(source,encoding)==target);
182     TEST(to_utf<Char>(source.c_str(),encoding)==target);
183     TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
184 
185     TEST(to_utf<Char>(source,l)==target);
186     TEST(to_utf<Char>(source.c_str(),l)==target);
187     TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
188 
189     TEST(from_utf<Char>(target,encoding)==source);
190     TEST(from_utf<Char>(target.c_str(),encoding)==source);
191     TEST(from_utf<Char>(target.c_str(),target.c_str()+target.size(),encoding)==source);
192 
193     TEST(from_utf<Char>(target,l)==source);
194     TEST(from_utf<Char>(target.c_str(),l)==source);
195     TEST(from_utf<Char>(target.c_str(),target.c_str()+target.size(),l)==source);
196 }
197 
198 #define TESTF(X) TEST_THROWS(X,boost::locale::conv::conversion_error)
199 
200 template<typename Char>
test_to_neg(std::string source,std::basic_string<Char> target,std::string encoding)201 void test_to_neg(std::string source,std::basic_string<Char> target,std::string encoding)
202 {
203     using namespace boost::locale::conv;
204     boost::locale::generator g;
205     std::locale l=g("en_US."+encoding);
206 
207     TEST(to_utf<Char>(source,encoding)==target);
208     TEST(to_utf<Char>(source.c_str(),encoding)==target);
209     TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
210     TEST(to_utf<Char>(source,l)==target);
211     TEST(to_utf<Char>(source.c_str(),l)==target);
212     TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
213 
214     TESTF(to_utf<Char>(source,encoding,stop));
215     TESTF(to_utf<Char>(source.c_str(),encoding,stop));
216     TESTF(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding,stop));
217     TESTF(to_utf<Char>(source,l,stop));
218     TESTF(to_utf<Char>(source.c_str(),l,stop));
219     TESTF(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l,stop));
220 }
221 
222 template<typename Char>
test_from_neg(std::basic_string<Char> source,std::string target,std::string encoding)223 void test_from_neg(std::basic_string<Char> source,std::string target,std::string encoding)
224 {
225     using namespace boost::locale::conv;
226     boost::locale::generator g;
227     std::locale l=g("en_US."+encoding);
228 
229     TEST(from_utf<Char>(source,encoding)==target);
230     TEST(from_utf<Char>(source.c_str(),encoding)==target);
231     TEST(from_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
232     TEST(from_utf<Char>(source,l)==target);
233     TEST(from_utf<Char>(source.c_str(),l)==target);
234     TEST(from_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
235 
236     TESTF(from_utf<Char>(source,encoding,stop));
237     TESTF(from_utf<Char>(source.c_str(),encoding,stop));
238     TESTF(from_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding,stop));
239     TESTF(from_utf<Char>(source,l,stop));
240     TESTF(from_utf<Char>(source.c_str(),l,stop));
241     TESTF(from_utf<Char>(source.c_str(),source.c_str()+source.size(),l,stop));
242 }
243 
244 template<typename Char>
utf(char const * s)245 std::basic_string<Char> utf(char const *s)
246 {
247     return to<Char>(s);
248 }
249 
250 template<>
utf(char const * s)251 std::basic_string<char> utf(char const *s)
252 {
253     return s;
254 }
255 
256 template<typename Char>
test_with_0()257 void test_with_0()
258 {
259     std::string a("abc\0\0 yz\0",3+2+3+1);
260     TEST(boost::locale::conv::from_utf<Char>(boost::locale::conv::to_utf<Char>(a,"UTF-8"),"UTF-8") == a);
261     TEST(boost::locale::conv::from_utf<Char>(boost::locale::conv::to_utf<Char>(a,"ISO8859-1"),"ISO8859-1") == a);
262 }
263 
264 template<typename Char,int n=sizeof(Char)>
265 struct utfutf;
266 
267 template<>
268 struct utfutf<char,1> {
okutfutf269     static char const *ok() {return "grüßen";}
badutfutf270     static char const *bad() { return "gr\xFF" "üßen"; }
271                                 // split into 2 to make SunCC happy
272 };
273 
274 template<>
275 struct utfutf<wchar_t,2> {
okutfutf276     static wchar_t const *ok(){ return  L"\x67\x72\xfc\xdf\x65\x6e"; }
badutfutf277     static wchar_t const *bad() {
278         static wchar_t buf[256] = L"\x67\x72\xFF\xfc\xFE\xFD\xdf\x65\x6e";
279         buf[2]=0xDC01; // second surrogate must not be
280         buf[4]=0xD801; // First
281         buf[5]=0xD801; // Must be surrogate trail
282         return buf;
283     }
284 };
285 template<>
286 struct utfutf<wchar_t,4> {
okutfutf287     static wchar_t const *ok(){ return  L"\x67\x72\xfc\xdf\x65\x6e"; }
badutfutf288     static wchar_t const *bad() {
289         static wchar_t buf[256] = L"\x67\x72\xFF\xfc\xdf\x65\x6e";
290         buf[2]=static_cast<wchar_t>(0x1000000); // > 10FFFF
291         return buf;
292     }
293 };
294 
295 
296 template<typename CharOut,typename CharIn>
test_combinations()297 void test_combinations()
298 {
299     using boost::locale::conv::utf_to_utf;
300     typedef utfutf<CharOut> out;
301     typedef utfutf<CharIn> in;
302     TEST( (utf_to_utf<CharOut,CharIn>(in::ok())==out::ok()) );
303     TESTF( (utf_to_utf<CharOut,CharIn>(in::bad(),boost::locale::conv::stop)) );
304     TEST( (utf_to_utf<CharOut,CharIn>(in::bad())==out::ok()) );
305 }
306 
test_all_combinations()307 void test_all_combinations()
308 {
309     std::cout << "Testing utf_to_utf" << std::endl;
310     std::cout <<"  char<-char"<<std::endl;
311     test_combinations<char,char>();
312     std::cout <<"  char<-wchar"<<std::endl;
313     test_combinations<char,wchar_t>();
314     std::cout <<"  wchar<-char"<<std::endl;
315     test_combinations<wchar_t,char>();
316     std::cout <<"  wchar<-wchar"<<std::endl;
317     test_combinations<wchar_t,wchar_t>();
318 }
319 
320 template<typename Char>
test_to()321 void test_to()
322 {
323     test_pos<Char>(to<char>("grüßen"),utf<Char>("grüßen"),"ISO8859-1");
324     if(test_iso_8859_8)
325         test_pos<Char>("\xf9\xec\xe5\xed",utf<Char>("שלום"),"ISO8859-8");
326     test_pos<Char>("grüßen",utf<Char>("grüßen"),"UTF-8");
327     test_pos<Char>("abc\"\xf0\xa0\x82\x8a\"",utf<Char>("abc\"\xf0\xa0\x82\x8a\""),"UTF-8");
328 
329     test_to_neg<Char>("g\xFFrüßen",utf<Char>("grüßen"),"UTF-8");
330     test_from_neg<Char>(utf<Char>("hello שלום"),"hello ","ISO8859-1");
331 
332     test_with_0<Char>();
333 }
334 
335 
test_skip(char const * enc,char const * utf,char const * name,char const * opt=0)336 void test_skip(char const *enc,char const *utf,char const *name,char const *opt=0)
337 {
338     if(opt!=0) {
339         if(boost::locale::conv::to_utf<char>(enc,name) == opt) {
340             test_skip(enc,opt,name);
341             return;
342         }
343     }
344     TEST(boost::locale::conv::to_utf<char>(enc,name) == utf);
345     TEST(boost::locale::conv::to_utf<wchar_t>(enc,name) == boost::locale::conv::utf_to_utf<wchar_t>(utf));
346     #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
347     TEST(boost::locale::conv::to_utf<char16_t>(enc,name) == boost::locale::conv::utf_to_utf<char16_t>(utf));
348     #endif
349     #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
350     TEST(boost::locale::conv::to_utf<char32_t>(enc,name) == boost::locale::conv::utf_to_utf<char32_t>(utf));
351     #endif
352 }
353 
test_simple_conversions()354 void test_simple_conversions()
355 {
356     namespace blc=boost::locale::conv;
357     std::cout << "- Testing correct invalid bytes skipping" << std::endl;
358     try {
359         std::cout << "-- ISO-8859-8" << std::endl;
360         test_skip("test \xE0\xE1\xFB-","test \xd7\x90\xd7\x91-","ISO-8859-8");
361         test_skip("\xFB","","ISO-8859-8");
362         test_skip("test \xE0\xE1\xFB","test \xd7\x90\xd7\x91","ISO-8859-8");
363         test_skip("\xFB-","-","ISO-8859-8");
364     }
365     catch(blc::invalid_charset_error const &) {
366         std::cout <<"--- not supported" << std::endl;
367     }
368     try {
369         std::cout << "-- cp932" << std::endl;
370         test_skip("test\xE0\xA0 \x83\xF8-","test\xe7\x87\xbf -","cp932","test\xe7\x87\xbf ");
371         test_skip("\x83\xF8","","cp932");
372         test_skip("test\xE0\xA0 \x83\xF8","test\xe7\x87\xbf ","cp932");
373         test_skip("\x83\xF8-","-","cp932","");
374     }
375     catch(blc::invalid_charset_error const &) {
376         std::cout <<"--- not supported" << std::endl;
377     }
378 }
379 
380 
main()381 int main()
382 {
383     try {
384         std::vector<std::string> def;
385         #ifdef BOOST_LOCALE_WITH_ICU
386         def.push_back("icu");
387         #endif
388         #ifndef BOOST_LOCALE_NO_STD_BACKEND
389         def.push_back("std");
390         #endif
391         #ifndef BOOST_LOCALE_NO_WINAPI_BACKEND
392         def.push_back("winapi");
393         #endif
394         #ifndef BOOST_LOCALE_NO_POSIX_BACKEND
395         def.push_back("posix");
396         #endif
397 
398         #if !defined(BOOST_LOCALE_WITH_ICU) && !defined(BOOST_LOCALE_WITH_ICONV) && (defined(BOOST_WINDOWS) || defined(__CYGWIN__))
399         test_iso_8859_8 = IsValidCodePage(28598)!=0;
400         #endif
401 
402         test_simple_conversions();
403 
404 
405         for(int type = 0; type < int(def.size()); type ++ ) {
406             boost::locale::localization_backend_manager tmp_backend = boost::locale::localization_backend_manager::global();
407             tmp_backend.select(def[type]);
408             boost::locale::localization_backend_manager::global(tmp_backend);
409 
410             std::string bname = def[type];
411 
412             if(bname=="std") {
413                 en_us_8bit = get_std_name("en_US.ISO8859-1");
414                 he_il_8bit = get_std_name("he_IL.ISO8859-8");
415                 ja_jp_shiftjis = get_std_name("ja_JP.SJIS");
416                 if(!ja_jp_shiftjis.empty() && !test_std_supports_SJIS_codecvt(ja_jp_shiftjis))
417                 {
418                     std::cout << "Warning: detected unproper support of " << ja_jp_shiftjis << " locale, disableling it" << std::endl;
419                     ja_jp_shiftjis = "";
420                 }
421             }
422             else {
423                 en_us_8bit = "en_US.ISO8859-1";
424                 he_il_8bit = "he_IL.ISO8859-8";
425                 ja_jp_shiftjis = "ja_JP.SJIS";
426             }
427 
428             std::cout << "Testing for backend " << def[type] << std::endl;
429 
430             test_iso = true;
431             if(bname=="std" && (he_il_8bit.empty() || en_us_8bit.empty())) {
432                 std::cout << "no iso locales availible, passing" << std::endl;
433                 test_iso = false;
434             }
435             test_sjis = true;
436             if(bname=="std" && ja_jp_shiftjis.empty()) {
437                 test_sjis = false;
438             }
439             if(bname=="winapi") {
440                 test_iso = false;
441                 test_sjis = false;
442             }
443             test_utf = true;
444             #ifndef BOOST_LOCALE_NO_POSIX_BACKEND
445             if(bname=="posix") {
446                 {
447                     locale_t l = newlocale(LC_ALL_MASK,he_il_8bit.c_str(),0);
448                     if(!l)
449                         test_iso = false;
450                     else
451                         freelocale(l);
452                 }
453                 {
454                     locale_t l = newlocale(LC_ALL_MASK,en_us_8bit.c_str(),0);
455                     if(!l)
456                         test_iso = false;
457                     else
458                         freelocale(l);
459                 }
460                 {
461                     locale_t l = newlocale(LC_ALL_MASK,"en_US.UTF-8",0);
462                     if(!l)
463                         test_utf = false;
464                     else
465                         freelocale(l);
466                 }
467                 #ifdef BOOST_LOCALE_WITH_ICONV
468                 {
469                     locale_t l = newlocale(LC_ALL_MASK,ja_jp_shiftjis.c_str(),0);
470                     if(!l)
471                         test_sjis = false;
472                     else
473                         freelocale(l);
474                 }
475                 #else
476                 test_sjis = false;
477                 #endif
478             }
479             #endif
480 
481             if(def[type]=="std" && (get_std_name("en_US.UTF-8").empty() || get_std_name("he_IL.UTF-8").empty()))
482             {
483                 test_utf = false;
484             }
485 
486             std::cout << "Testing wide I/O" << std::endl;
487             test_wide_io();
488             std::cout << "Testing charset to/from UTF conversion functions" << std::endl;
489             std::cout << "  char" << std::endl;
490             test_to<char>();
491             std::cout << "  wchar_t" << std::endl;
492             test_to<wchar_t>();
493             #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
494             if(bname == "icu" || bname == "std") {
495                 std::cout << "  char16_t" << std::endl;
496                 test_to<char16_t>();
497             }
498             #endif
499             #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
500             if(bname == "icu" || bname == "std") {
501                 std::cout << "  char32_t" << std::endl;
502                 test_to<char32_t>();
503             }
504             #endif
505 
506             test_all_combinations();
507         }
508     }
509     catch(std::exception const &e) {
510         std::cerr << "Failed " << e.what() << std::endl;
511         return EXIT_FAILURE;
512     }
513     FINALIZE();
514 }
515 
516 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
517 // boostinspect:noascii
518