1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 
9 #ifndef BOOST_LOCALE_WITH_ICU
10 #include <iostream>
main()11 int main()
12 {
13         std::cout << "ICU is not build... Skipping" << std::endl;
14 }
15 #else
16 
17 //#define BOOST_LOCALE_ERROR_LIMIT 1000000
18 //#define BOOST_LOCALE_ERROR_LIMIT 0
19 
20 #include <boost/locale/boundary.hpp>
21 #include <boost/locale/generator.hpp>
22 #include "test_locale.hpp"
23 #include "test_locale_tools.hpp"
24 #include <list>
25 
26 // Debugging code
27 
28 template<typename Char>
print_str(std::basic_string<Char> const &)29 void print_str(std::basic_string<Char> const &/*s*/)
30 {
31 }
32 
33 template<>
print_str(std::basic_string<char> const & s)34 void print_str<char>(std::basic_string<char> const &s)
35 {
36     std::cout << "[" << s <<"]" << std::endl;
37 }
38 
39 
40 namespace lb = boost::locale::boundary;
41 
42 template<typename Char,typename Iterator>
test_word_container(Iterator begin,Iterator end,std::vector<int> const & ipos,std::vector<int> const & imasks,std::vector<std::basic_string<Char>> const & ichunks,std::locale l,lb::boundary_type bt=lb::word)43 void test_word_container(Iterator begin,Iterator end,
44     std::vector<int> const &ipos,
45     std::vector<int> const &imasks,
46     std::vector<std::basic_string<Char> > const &ichunks,
47     std::locale l,
48     lb::boundary_type bt=lb::word
49     )
50 {
51     for(int sm=(bt == lb::word ? 31 : 3 ) ;sm>=0;sm--) {
52         unsigned mask =
53               ((sm & 1 ) != 0) * 0xF
54             + ((sm & 2 ) != 0) * 0xF0
55             + ((sm & 4 ) != 0) * 0xF00
56             + ((sm & 8 ) != 0) * 0xF000
57             + ((sm & 16) != 0) * 0xF0000;
58 
59         std::vector<int> masks,pos;
60         std::vector<unsigned> bmasks;
61         std::basic_string<Char> empty_chunk;
62 
63         std::vector<std::basic_string<Char> > chunks;
64         std::vector<std::basic_string<Char> > fchunks;
65         std::vector<Iterator> iters;
66         iters.push_back(begin);
67         bmasks.push_back(0);
68 
69         for(unsigned i=0;i<imasks.size();i++) {
70             if(imasks[i] & mask) {
71                 masks.push_back(imasks[i]);
72                 chunks.push_back(ichunks[i]);
73                 fchunks.push_back(empty_chunk + ichunks[i]);
74                 empty_chunk.clear();
75                 pos.push_back(ipos[i]);
76             }
77             else {
78                 empty_chunk+=ichunks[i];
79             }
80 
81             if((imasks[i] & mask) || i==imasks.size()-1){
82                 Iterator ptr=begin;
83                 std::advance(ptr,ipos[i]);
84                 iters.push_back(ptr);
85                 bmasks.push_back(imasks[i]);
86             }
87         }
88 
89         //
90         // segment iterator tests
91         //
92         {
93             lb::segment_index<Iterator> map(bt,begin,end,l);
94             typedef typename lb::segment_index<Iterator>::iterator iter_type;
95 
96             map.rule(mask);
97 
98             {
99                 unsigned i=0;
100                 iter_type p;
101                 map.full_select(false);
102                 for(p=map.begin();p!=map.end();++p,i++) {
103                     TEST(p->str()==chunks[i]);
104                     TEST(p->rule() == unsigned(masks[i]));
105                 }
106 
107                 TEST(chunks.size() == i);
108                 for(;;) {
109                     if(p==map.begin()) {
110                         TEST(i==0);
111                         break;
112                     }
113                     else {
114                         --p;
115                         TEST(p->str()==chunks[--i]);
116                         TEST(p->rule() == unsigned(masks[i]));
117                     }
118                 }
119                 for(i=0,p=map.end();i<chunks.size();i++){
120                     --p;
121                     unsigned index = chunks.size() - i - 1;
122                     TEST(p->str()==chunks[index]);
123                     TEST(p->rule() == unsigned(masks[index]));
124                 }
125                 TEST(p==map.begin());
126             }
127 
128             {
129                 unsigned i=0;
130                 iter_type p;
131                 map.full_select(true);
132                 for(p=map.begin();p!=map.end();++p,i++) {
133                     TEST(p->str()==fchunks[i]);
134                     TEST(p->rule() == unsigned(masks[i]));
135                 }
136 
137                 TEST(chunks.size() == i);
138 
139                 for(;;) {
140                     if(p==map.begin()) {
141                         TEST(i==0);
142                         break;
143                     }
144                     else {
145                         --p;
146                         if(p->str()!=fchunks[i-1]) {
147                             print_str(p->str());
148                             print_str(fchunks[i-1]);
149                         }
150                         TEST(p->str()==fchunks[--i]);
151                         TEST(p->rule() == unsigned(masks[i]));
152                     }
153                 }
154 
155                 for(i=0,p=map.end();i<chunks.size();i++){
156                     --p;
157                     unsigned index = chunks.size() - i - 1;
158                     TEST(p->str()==fchunks[index]);
159                     TEST(p->rule() == unsigned(masks[index]));
160                 }
161                 TEST(p==map.begin());
162             }
163 
164             {
165                 iter_type p;
166                 unsigned chunk_ptr=0;
167                 unsigned i=0;
168                 map.full_select(false);
169                 for(Iterator optr=begin;optr!=end;optr++,i++) {
170                     p=map.find(optr);
171                     if(chunk_ptr < pos.size() && i>=unsigned(pos[chunk_ptr])){
172                         chunk_ptr++;
173                     }
174                     if(chunk_ptr>=pos.size()) {
175                         TEST(p==map.end());
176                     }
177                     else {
178                         TEST(p->str()==chunks[chunk_ptr]);
179                         TEST(p->rule()==unsigned(masks[chunk_ptr]));
180                     }
181                 }
182             }
183             {
184                 iter_type p;
185                 unsigned chunk_ptr=0;
186                 unsigned i=0;
187                 map.full_select(true);
188                 for(Iterator optr=begin;optr!=end;optr++,i++) {
189                     p=map.find(optr);
190                     if(chunk_ptr < pos.size() && i>=unsigned(pos[chunk_ptr])){
191                         chunk_ptr++;
192                     }
193                     if(chunk_ptr>=pos.size()) {
194                         TEST(p==map.end());
195                     }
196                     else {
197                         TEST(p->str()==fchunks[chunk_ptr]);
198                         TEST(p->rule()==unsigned(masks[chunk_ptr]));
199                     }
200                 }
201             }
202 
203         } // segment iterator tests
204 
205         { // break iterator tests
206             lb::boundary_point_index<Iterator> map(bt,begin,end,l);
207             typedef typename lb::boundary_point_index<Iterator>::iterator iter_type;
208 
209             map.rule(mask);
210 
211             unsigned i=0;
212             iter_type p;
213             for(p=map.begin();p!=map.end();++p,i++) {
214                 TEST(p->iterator()==iters[i]);
215                 TEST(p->rule()==bmasks[i]);
216             }
217 
218             TEST(iters.size() == i);
219 
220             do {
221                 --p;
222                 --i;
223                 TEST(p->iterator()==iters.at(i));
224             } while(p!=map.begin());
225             TEST(i==0);
226 
227             unsigned iters_ptr=0;
228             for(Iterator optr=begin;optr!=end;optr++) {
229                 p=map.find(optr);
230                 TEST(p->iterator()==iters[iters_ptr]);
231                 if(iters.at(iters_ptr)==optr)
232                     iters_ptr++;
233             }
234 
235         } // break iterator tests
236 
237         { // copy test
238             typedef lb::segment_index<Iterator> ti_type;
239             typedef lb::boundary_point_index<Iterator> bi_type;
240             {   // segment to bound
241                 ti_type ti(bt,begin,end,l);
242                 ti.rule(mask);
243                 {
244                     bi_type bi(ti);
245                     bi.rule(mask);
246                     unsigned i=0;
247                     typename bi_type::iterator p;
248                     for(p=bi.begin();p!=bi.end();++p,i++) {
249                         TEST(p->iterator()==iters[i]);
250                         TEST(p->rule()==bmasks[i]);
251                     }
252                 }
253                 {
254                     bi_type bi;
255                     bi.rule(mask);
256                     bi = ti;
257                     unsigned i=0;
258                     typename bi_type::iterator p;
259                     for(p=bi.begin();p!=bi.end();++p,i++) {
260                         TEST(p->iterator()==iters[i]);
261                         TEST(p->rule()==bmasks[i]);
262                     }
263                 }
264                 // boundary_point to bound
265                 bi_type bi_2(bt,begin,end,l);
266                 bi_2.rule(mask);
267                 {
268                     bi_type bi(bi_2);
269                     unsigned i=0;
270                     typename bi_type::iterator p;
271                     for(p=bi.begin();p!=bi.end();++p,i++) {
272                         TEST(p->iterator()==iters[i]);
273                         TEST(p->rule()==bmasks[i]);
274                     }
275                 }
276                 {
277                     bi_type bi;
278                     bi = bi_2;
279                     unsigned i=0;
280                     typename bi_type::iterator p;
281                     for(p=bi.begin();p!=bi.end();++p,i++) {
282                         TEST(p->iterator()==iters[i]);
283                         TEST(p->rule()==bmasks[i]);
284                     }
285                 }
286             }
287             {   // boundary_point to segment
288                 bi_type bi(bt,begin,end,l);
289                 {
290                     ti_type ti(bi);
291                     ti.rule(mask);
292                     unsigned i=0;
293                     typename ti_type::iterator p;
294                     for(p=ti.begin();p!=ti.end();++p,i++) {
295                         TEST(p->str()==chunks[i]);
296                         TEST(p->rule()==unsigned(masks[i]));
297                     }
298                 }
299                 {
300                     ti_type ti;
301                     ti.rule(mask);
302                     ti = (bi);
303                     unsigned i=0;
304                     typename ti_type::iterator p;
305                     for(p=ti.begin();p!=ti.end();++p,i++) {
306                         TEST(p->str()==chunks[i]);
307                         TEST(p->rule()==unsigned(masks[i]));
308                     }
309                 }
310                 ti_type ti_2(bt,begin,end,l);
311                 ti_2.rule(mask);
312                 {
313                     ti_type ti(ti_2);
314                     unsigned i=0;
315                     typename ti_type::iterator p;
316                     for(p=ti.begin();p!=ti.end();++p,i++) {
317                         TEST(p->str()==chunks[i]);
318                         TEST(p->rule()==unsigned(masks[i]));
319                     }
320                 }
321                 {
322                     ti_type ti;
323                     ti = (ti_2);
324                     unsigned i=0;
325                     typename ti_type::iterator p;
326                     for(p=ti.begin();p!=ti.end();++p,i++) {
327                         TEST(p->str()==chunks[i]);
328                         TEST(p->rule()==unsigned(masks[i]));
329                     }
330                 }
331             }
332         }
333     } // for mask
334 
335 }
336 
337 template<typename Char>
run_word(std::string * original,int * none,int * num,int * word,int * kana,int * ideo,std::locale l,lb::boundary_type b=lb::word)338 void run_word(std::string *original,int *none,int *num,int *word,int *kana,int *ideo,std::locale l,lb::boundary_type b=lb::word)
339 {
340     std::vector<int> pos;
341     std::vector<std::basic_string<Char> > chunks;
342     std::vector<int> masks;
343     std::basic_string<Char> test_string;
344     for(int i=0;!original[i].empty();i++) {
345         chunks.push_back(to_correct_string<Char>(original[i],l));
346         test_string+=chunks.back();
347         pos.push_back(test_string.size());
348         masks.push_back(
349               ( none ? none[i]*15 : 0)
350             | ( num  ? ((num[i]*15)  << 4) : 0)
351             | ( word ? ((word[i]*15) << 8) : 0)
352             | ( kana ? ((kana[i]*15) << 12) : 0)
353             | ( ideo ? ((ideo[i]*15) << 16) : 0)
354         );
355     }
356 
357     std::list<Char> lst(test_string.begin(),test_string.end());
358     test_word_container<Char>(lst.begin(),lst.end(),pos,masks,chunks,l,b);
359     test_word_container<Char>(test_string.begin(),test_string.end(),pos,masks,chunks,l,b);
360 }
361 
362 std::string character[]={"שָ","ל","וֹ","ם","!",""};
363 int         nones[]={1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
364 
365 std::string sentence1[]={"To be\n","or not\n","to be?\n"," That is the question. ","Or maybe not",""};
366 int         sentence1a[]={      0,          0,        1,                         1,             0, 0};
367 int         sentence1b[]={      1,          1,        0,                         0,             1, 0};
368 
369 std::string line1[]={"To ","be\n","or ","not\n","to ","be",""};
370 int         line1a[]={ 1,   0,     1 ,  0,       1,   1 , 0 };
371 int         line1b[]={ 0,   1,     0 ,  1,       0,   0 , 0 };
372 
373 
test_boundaries(std::string * all,int * first,int * second,lb::boundary_type t)374 void test_boundaries(std::string *all,int *first,int *second,lb::boundary_type t)
375 {
376     boost::locale::generator g;
377     std::cout << " char UTF-8" << std::endl;
378     run_word<char>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
379     std::cout << " char CP1255" << std::endl;
380     run_word<char>(all,first,second,0,0,0,g("he_IL.cp1255"),t);
381     std::cout << " wchar_t"<<std::endl;
382     run_word<wchar_t>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
383     #ifdef BOOST_HAS_CHAR16_T
384     std::cout << " char16_t"<<std::endl;
385     run_word<char16_t>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
386     #endif
387     #ifdef BOOST_HAS_CHAR32_T
388     std::cout << " char32_t"<<std::endl;
389     run_word<char32_t>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
390     #endif
391 
392 }
393 
word_boundary()394 void word_boundary()
395 {
396     boost::locale::generator g;
397 
398     std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","ひらがな","ヒラガナ",""};
399     int        none1[]={ 0,   1,      0,  1,         0,   1,      0,         0,          0};
400     int         num1[]={ 1,   0,      0,  0,         1,   0,      0 ,        0 ,         0};
401     int        word1[]={ 0,   0,      1,  0,         1,   0,      0 ,        0 ,         0};
402     int        kana1[]={ 0,   0,      0,  0,         0,   0,      0,         1 ,         1};
403     int        ideo1[]={ 0,   0,      0,  0,         0,   0,      1,         0 ,         0};
404 
405 
406     int zero[25]={0};
407     std::string all2[]={""};
408 
409     std::string all3[]={" "," ","Hello",",","World","!"," ",""};
410     int        none3[]={ 1,  1,      0,  1,      0,   1,  1, 0};
411     int        word3[]={ 0,  0,      1,  0,      1,   0,  0, 0};
412 
413     std::cout << " char UTF-8" << std::endl;
414     run_word<char>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
415     run_word<char>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
416     run_word<char>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
417 
418     std::cout << " char Shift-JIS" << std::endl;
419     run_word<char>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.Shift-JIS"));
420     run_word<char>(all2,zero,zero,zero,zero,zero,g("ja_JP.Shift-JIS"));
421     run_word<char>(all3,none3,zero,word3,zero,zero,g("ja_JP.Shift-JIS"));
422 
423     std::cout << " wchar_t"<<std::endl;
424     run_word<wchar_t>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
425     run_word<wchar_t>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
426     run_word<wchar_t>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
427 
428     #ifdef BOOST_HAS_CHAR16_T
429     std::cout << " char16_t"<<std::endl;
430     run_word<char16_t>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
431     run_word<char16_t>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
432     run_word<char16_t>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
433     #endif
434 
435     #ifdef BOOST_HAS_CHAR32_T
436     std::cout << " char32_t"<<std::endl;
437     run_word<char32_t>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
438     run_word<char32_t>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
439     run_word<char32_t>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
440     #endif
441 }
test_op_one_side(std::string const & sl,std::string const & sr,int val)442 void test_op_one_side(std::string const &sl,std::string const &sr,int val)
443 {
444     boost::locale::boundary::ssegment l(sl.begin(),sl.end(),0),r(sr.begin(),sr.end(),0);
445 
446     // segment
447     TEST( (l==r) == (val==0));
448     TEST( (l!=r) == (val!=0));
449     TEST( (l<=r) == (val<=0));
450     TEST( (l< r) == (val<0));
451     TEST( (l>=r) == (val>=0));
452     TEST( (l> r) == (val>0));
453 
454     // C string
455     TEST( (l==sr.c_str()) == (val==0));
456     TEST( (l!=sr.c_str()) == (val!=0));
457     TEST( (l<=sr.c_str()) == (val<=0));
458     TEST( (l< sr.c_str()) == (val<0));
459     TEST( (l>=sr.c_str()) == (val>=0));
460     TEST( (l> sr.c_str()) == (val>0));
461 
462     TEST( (sl.c_str()==r) == (val==0));
463     TEST( (sl.c_str()!=r) == (val!=0));
464     TEST( (sl.c_str()<=r) == (val<=0));
465     TEST( (sl.c_str()< r) == (val<0));
466     TEST( (sl.c_str()>=r) == (val>=0));
467     TEST( (sl.c_str()> r) == (val>0));
468 
469 
470     // C++ string
471     TEST( (l==sr) == (val==0));
472     TEST( (l!=sr) == (val!=0));
473     TEST( (l<=sr) == (val<=0));
474     TEST( (l< sr) == (val<0));
475     TEST( (l>=sr) == (val>=0));
476     TEST( (l> sr) == (val>0));
477 
478     TEST( (sl==r) == (val==0));
479     TEST( (sl!=r) == (val!=0));
480     TEST( (sl<=r) == (val<=0));
481     TEST( (sl< r) == (val<0));
482     TEST( (sl>=r) == (val>=0));
483     TEST( (sl> r) == (val>0));
484     // self check
485     TEST( (sl==sr) == (val==0));
486     TEST( (sl!=sr) == (val!=0));
487     TEST( (sl<=sr) == (val<=0));
488     TEST( (sl< sr) == (val<0));
489     TEST( (sl>=sr) == (val>=0));
490     TEST( (sl> sr) == (val>0));
491 
492 }
493 
test_op(std::string const & sl,std::string const & sr,int val)494 void test_op(std::string const &sl,std::string const &sr,int val)
495 {
496     test_op_one_side(sl,sr,val);
497     test_op_one_side(sr,sl,-val);
498 }
segment_operator()499 void segment_operator()
500 {
501     test_op("","a",-1);
502     test_op("","",0);
503     test_op("aa","aaa",-1);
504     test_op("aa","ab",-1);
505 }
506 
main()507 int main()
508 {
509     try {
510         std::cout << "Testing segment operators" << std::endl;
511         segment_operator();
512         std::cout << "Testing word boundary" << std::endl;
513         word_boundary();
514         std::cout << "Testing character boundary" << std::endl;
515         test_boundaries(character,nones,0,lb::character);
516         std::cout << "Testing sentence boundary" << std::endl;
517         test_boundaries(sentence1,sentence1a,sentence1b,lb::sentence);
518         std::cout << "Testing line boundary" << std::endl;
519         test_boundaries(line1,line1a,line1b,lb::line);
520     }
521     catch(std::exception const &e) {
522         std::cerr << "Failed " << e.what() << std::endl;
523         return EXIT_FAILURE;
524     }
525     FINALIZE();
526 }
527 
528 #endif // NOICU
529 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
530 
531 // boostinspect:noascii
532