1 /*  $Id: test_ncbistr.cpp 627618 2021-03-16 14:35:43Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Denis Vakatov
27  *
28  * File Description:
29  *   TEST for:  NCBI C++ core string-related API
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbi_limits.h>
35 #include <corelib/version.hpp>
36 #include <corelib/ncbi_xstr.hpp>
37 #include <corelib/ncbifloat.h>
38 #include <corelib/ncbitime.hpp>
39 #include <corelib/ncbiexec.hpp>
40 #include <corelib/ncbifile.hpp>
41 #include <algorithm>
42 #include <locale.h>
43 #include <math.h>
44 
45 #define BOOST_AUTO_TEST_MAIN
46 #include <corelib/test_boost.hpp>
47 
48 #include <common/test_assert.h>  /* This header must go last */
49 
50 
51 // This is to use the ANSI C++ standard templates without the "std::" prefix
52 // and to use NCBI C++ entities without the "ncbi::" prefix
53 USING_NCBI_SCOPE;
54 
55 static const int kBad = 555;
56 
57 
58 //----------------------------------------------------------------------------
59 // NStr::StringTo*()
60 //----------------------------------------------------------------------------
61 
62 struct SStringNumericValues
63 {
64     const char* str;
65     int         flags;
66     Int8        num;
67     Int8        i;
68     Uint8       u;
69     Int8        i8;
70     Uint8       u8;
71     float       f;
72     double      d;
73     double      delta;
74 
IsGoodIntSStringNumericValues75     bool IsGoodInt(void) const {
76         return i != kBad;
77     }
IsGoodUIntSStringNumericValues78     bool IsGoodUInt(void) const {
79         return u != (Uint8)kBad;
80     }
IsGoodInt8SStringNumericValues81     bool IsGoodInt8(void) const {
82         return i8 != kBad;
83     }
IsGoodUInt8SStringNumericValues84     bool IsGoodUInt8(void) const {
85         return u8 != (Uint8)kBad;
86     }
IsGoodFloatSStringNumericValues87     bool IsGoodFloat(void) const {
88         return f != (float)kBad;
89     }
IsGoodDoubleSStringNumericValues90     bool IsGoodDouble(void) const {
91         return d != (double)kBad;
92     }
SameSStringNumericValues93     bool Same(const string& s) const {
94         if ( str[0] == '+' && isdigit((unsigned char) str[1]) ) {
95             if ( s[0] == '+' ) {
96                 string tmp(s, 1, NPOS);
97                 return tmp == str + 1;
98             }
99             return s == str + 1;
100         }
101         return s == NStr::TruncateSpaces_Unsafe(str, NStr::eTrunc_Both);
102     }
103 };
104 
105 
106 // Macro to silence GCC's __wur (warn unused result)
107 #define _no_warning(expr)  while ( expr ) break
108 
109 // Test errno value, to check that 'errno' changes inside conversion methods.
110 static const int kTestErrno = 555;
111 #define CHECK_ERRNO  BOOST_CHECK(errno != kTestErrno)
112 
113 // Default flags
114 #define DF 0
115 
116 // define float constant to use in STR() macros.
117 // (we need extra 0 before 'f' for stricter compilers, plain constants don't need it)
118 #define NCBI_CONST_FLOAT(v) v##.0f
119 
120 //                str  flags  num   int   uint  Int8  Uint8  float  double
121 #define BAD(v)   {  v, DF, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. }
122 #define STR(v)   { #v, DF, NCBI_CONST_INT8(v), NCBI_CONST_INT8(v), NCBI_CONST_UINT8(v), NCBI_CONST_INT8(v), NCBI_CONST_UINT8(v), NCBI_CONST_FLOAT(v), v##., 0. }
123 #define STRI(v)  { #v, DF, -1, NCBI_CONST_INT8(v), kBad, NCBI_CONST_INT8(v), kBad, NCBI_CONST_FLOAT(v), v##., 0.}
124 #define STRU(v)  { #v, DF, -1, kBad, NCBI_CONST_UINT8(v), NCBI_CONST_INT8(v), NCBI_CONST_UINT8(v), NCBI_CONST_FLOAT(v), v##., 0.}
125 #define STR8(v)  { #v, DF, -1, kBad, kBad, NCBI_CONST_INT8(v), NCBI_CONST_UINT8(v), NCBI_CONST_FLOAT(v), v##., 0.}
126 #define STRI8(v) { #v, DF, -1, kBad, kBad, NCBI_CONST_INT8(v), kBad, NCBI_CONST_FLOAT(v), v##., 0.}
127 #define STRU8(v) { #v, DF, -1, kBad, kBad, kBad, NCBI_CONST_UINT8(v), NCBI_CONST_FLOAT(v), v##., 0. }
128 #define STRF(v)  { #v, DF, -1, kBad, kBad, kBad, kBad, NCBI_CONST_FLOAT(v), v##. }
129 #define STRD(v)  { #v, DF, -1, kBad, kBad, kBad, kBad, kBad, v##. }
130 
131 
132 static const SStringNumericValues s_Str2NumTests[] = {
133     STR(0),
134     STR(1),
135     STRI(-1),
136 #if (SIZEOF_INT > 4)
137     STRI(-2147483649),
138 #else
139     STRI8(-2147483649),
140 #endif
141     STRI(-2147483648),
142     STRI(-2147483647),
143     STR(2147483646),
144     STR(2147483647),
145 #if (SIZEOF_INT > 4)
146     STR(2147483648),
147 #else
148     STRU(2147483648),
149 #endif
150     STRU(4294967294),
151     STRU(4294967295),
152 #if (SIZEOF_INT > 4)
153     STR(4294967296),
154 #else
155     STR8(4294967296),
156 #endif
157     STR(10),
158     STR(100),
159     STR(1000),
160     STR(10000),
161     STR(100000),
162     STR(1000000),
163     STR(10000000),
164     STR(100000000),
165     STR(1000000000),
166 #if (SIZEOF_INT > 4)
167     STR(10000000000),
168     STR(100000000000),
169     STR(1000000000000),
170     STR(10000000000000),
171     STR(100000000000000),
172     STR(1000000000000000),
173     STR(10000000000000000),
174     STR(100000000000000000),
175     STR(1000000000000000000),
176 #else
177     STR8(10000000000),
178     STR8(100000000000),
179     STR8(1000000000000),
180     STR8(10000000000000),
181     STR8(100000000000000),
182     STR8(1000000000000000),
183     STR8(10000000000000000),
184     STR8(100000000000000000),
185     STR8(1000000000000000000),
186 #endif
187     STRF(-9223372036854775809),
188     { "-9223372036854775808", DF, -1, kBad, kBad, NCBI_CONST_INT8(-9223372036854775807)-1, kBad, -9223372036854775808.f, -9223372036854775808., 0. },
189     STRI8(-9223372036854775807),
190     STR8(9223372036854775806),
191     STR8(9223372036854775807),
192     STRU8(9223372036854775808),
193     STRU8(18446744073709551614),
194     STRU8(18446744073709551615),
195     STRF(18446744073709551616),
196 
197     BAD(""),
198     BAD("+"),
199     BAD("-"),
200     BAD("."),
201     BAD(".."),
202     BAD("abc"),
203     { ".0",  DF, -1, kBad, kBad, kBad, kBad,  .0f, .0, 0.  },
204     BAD(".0."),
205     BAD("..0"),
206     { ".01", DF, -1, kBad, kBad, kBad, kBad,  .01f, .01, 0. },
207     { "1.",  DF, -1, kBad, kBad, kBad, kBad,  1.f , 1. , 0. },
208     { "1.1", DF, -1, kBad, kBad, kBad, kBad,  1.1f, 1.1, 0. },
209     BAD("1.1."),
210     BAD("1.."),
211     STRI(-123),
212     BAD("--123"),
213     { "+123", DF, 123, 123, 123, 123, 123, 123, 123, 0. },
214     BAD("++123"),
215     BAD("- 123"),
216     BAD(" 123"),
217 
218     { " 123",     NStr::fAllowLeadingSpaces,  -1, 123,  123,  123,  123,  123.f, 123., 0.},
219     { " 123",     NStr::fAllowLeadingSymbols, -1, 123,  123,  123,  123,  123.f, 123., 0. },
220     BAD("123 "),
221     { "123 ",     NStr::fAllowTrailingSpaces,  -1, 123,  123,  123,  123,  123.f, 123., 0. },
222     { "123 ",     NStr::fAllowTrailingSymbols, -1, 123,  123,  123,  123,  123.f, 123., 0. },
223     { "123(45) ", NStr::fAllowTrailingSymbols, -1, 123,  123,  123,  123,  123.f, 123., 0. },
224     { " 123 ",    NStr::fAllowLeadingSpaces | NStr::fAllowTrailingSpaces, -1, 123,  123,  123,  123,  123.f, 123., 0. },
225 
226     { "1,234",    NStr::fAllowCommas, -1,    1234,    1234,    1234,    1234, kBad, kBad, 0. },
227     { "1,234,567",NStr::fAllowCommas, -1, 1234567, 1234567, 1234567, 1234567, kBad, kBad, 0. },
228     { "12,34",    NStr::fAllowCommas, -1,    kBad,    kBad,    kBad,    kBad, kBad, kBad, 0. },
229     { ",123",     NStr::fAllowCommas, -1,    kBad,    kBad,    kBad,    kBad, kBad, kBad, 0. },
230     { ",123",     NStr::fAllowCommas | NStr::fAllowLeadingSymbols, -1, 123, 123, 123, 123, 123, 123, 0. },
231 
232 #if (SIZEOF_INT > 4)
233     {  "4,294,967,294", NStr::fAllowCommas, -1, NCBI_CONST_UINT8(4294967294), NCBI_CONST_UINT8(4294967294), NCBI_CONST_UINT8(4294967294), NCBI_CONST_UINT8(4294967294), kBad, kBad, 0. },
234     {  "4,294,967,295", NStr::fAllowCommas, -1, NCBI_CONST_UINT8(4294967295), NCBI_CONST_UINT8(4294967295), NCBI_CONST_UINT8(4294967295), NCBI_CONST_UINT8(4294967295), kBad, kBad, 0. },
235     {  "4,294,967,296", NStr::fAllowCommas, -1, NCBI_CONST_UINT8(4294967296), NCBI_CONST_UINT8(4294967296), NCBI_CONST_UINT8(4294967296), NCBI_CONST_UINT8(4294967296), kBad, kBad, 0. },
236     { "-4,294,967,294", NStr::fAllowCommas, -1, NCBI_CONST_INT8(-4294967294), kBad,                         NCBI_CONST_INT8(-4294967294), kBad,                         kBad, kBad, 0. },
237     { "-4,294,967,295", NStr::fAllowCommas, -1, NCBI_CONST_INT8(-4294967295), kBad,                         NCBI_CONST_INT8(-4294967295), kBad,                         kBad, kBad, 0. },
238     { "-4,294,967,296", NStr::fAllowCommas, -1, NCBI_CONST_INT8(-4294967296), kBad,                         NCBI_CONST_INT8(-4294967296), kBad,                         kBad, kBad, 0. },
239 #else
240     {  "4,294,967,294", NStr::fAllowCommas, -1, kBad,                         NCBI_CONST_UINT8(4294967294), NCBI_CONST_UINT8(4294967294), NCBI_CONST_UINT8(4294967294), kBad, kBad, 0. },
241     {  "4,294,967,295", NStr::fAllowCommas, -1, kBad,                         NCBI_CONST_UINT8(4294967295), NCBI_CONST_UINT8(4294967295), NCBI_CONST_UINT8(4294967295), kBad, kBad, 0. },
242     {  "4,294,967,296", NStr::fAllowCommas, -1, kBad,                         kBad,                         NCBI_CONST_UINT8(4294967296), NCBI_CONST_UINT8(4294967296), kBad, kBad, 0. },
243     { "-4,294,967,294", NStr::fAllowCommas, -1, kBad,                         kBad,                         NCBI_CONST_INT8(-4294967294), kBad,                         kBad, kBad, 0. },
244     { "-4,294,967,295", NStr::fAllowCommas, -1, kBad,                         kBad,                         NCBI_CONST_INT8(-4294967295), kBad,                         kBad, kBad, 0. },
245     { "-4,294,967,296", NStr::fAllowCommas, -1, kBad,                         kBad,                         NCBI_CONST_INT8(-4294967296), kBad,                         kBad, kBad, 0. },
246 #endif
247 
248     { "+123",     0, 123, 123, 123, 123, 123, 123, 123, 0. },
249     { "123",      NStr::fMandatorySign, 123, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
250     { "+123",     NStr::fMandatorySign, 123,  123,  123,  123,  123, 123, 123, 0. },
251     { "-123",     NStr::fMandatorySign,  -1, -123, kBad, -123, kBad, -123, -123, 0. },
252     { "+123",     NStr::fAllowLeadingSymbols, 123,  123,  123,  123,  123,  123, 123, 0. },
253 #if 0
254     { "7E-380",   DF, -1, kBad, kBad, kBad, kBad, kBad, kBad,   0. },
255     { "7E-325",   DF, -1, kBad, kBad, kBad, kBad, kBad, kBad,   0. },
256     { "7E-324",   DF, -1, kBad, kBad, kBad, kBad, kBad, 7E-324, 0. },
257     { "7E-323",   DF, -1, kBad, kBad, kBad, kBad, kBad, 7E-323, 0. },
258 #endif
259 #if 0 && defined(NCBI_OS_LINUX) && (NCBI_PLATFORM_BITS == 32) && !defined(_DEBUG)
260     { "7E-38",   DF, -1, kBad, kBad, kBad, kBad, 7E-38, 7E-38, 0.000000000000002e-38 },
261 #else
262     { "7E-38",   DF, -1, kBad, kBad, kBad, kBad, 7E-38f, 7E-38, 0. },
263 #endif
264     { "7E38",    DF, -1, kBad, kBad, kBad, kBad, kBad, 7E38, 0. },
265 
266     { "2.2e-308",DF, -1, kBad, kBad, kBad, kBad, 0, 0, DBL_MIN },
267     { "-2.2e-310",DF, -1, kBad, kBad, kBad, kBad, 0, 0, DBL_MIN },
268     { "7E-500",  DF, -1, kBad, kBad, kBad, kBad, 0, 0, 0. },
269     { "7E-512",  DF, -1, kBad, kBad, kBad, kBad, 0, 0, 0. },
270     { "7E500",   DF, -1, kBad, kBad, kBad, kBad, kBad, HUGE_VAL, 0. },
271     { "7E512",   DF, -1, kBad, kBad, kBad, kBad, kBad, HUGE_VAL, 0. },
272     { "7E768",   DF, -1, kBad, kBad, kBad, kBad, kBad, HUGE_VAL, 0. },
273     { "7E4294967306", DF, -1, kBad, kBad, kBad, kBad, kBad, HUGE_VAL, 0. },
274     { ".000000000000000000000000000001", DF, -1, kBad, kBad, kBad, kBad,
275        .000000000000000000000000000001f, .000000000000000000000000000001, 1e-46 },
276     { "-123",     NStr::fAllowLeadingSymbols,  -1, -123, kBad, -123, kBad, -123, -123, 0. }
277 };
278 
279 
BOOST_AUTO_TEST_CASE(s_StringToNum)280 BOOST_AUTO_TEST_CASE(s_StringToNum)
281 {
282     const size_t count = sizeof(s_Str2NumTests) / sizeof(s_Str2NumTests[0]);
283     {{
284         // verify that compiler properly distinguishes overloads
285         string str_value;
286         NStr::NumericToString(str_value, (char)1);
287         TIntId id = 123;
288         TGi gi(id);
289         NStr::NumericToString(str_value, id);
290         NStr::NumericToString(str_value, gi);
291     }}
292 
293     for (size_t i = 0;  i < count;  ++i) {
294       for (int extra = 0;  extra < 4;  ++extra) {
295         const SStringNumericValues* test = &s_Str2NumTests[i];
296 
297         CTempString str;
298         string extra_str;
299         if ( !extra ) {
300             str = test->str;
301         }
302         else {
303             extra_str = test->str;
304             extra_str += "  9x"[extra];
305             str = CTempString(extra_str).substr(0, extra_str.size()-1);
306         }
307         NStr::TStringToNumFlags flags = test->flags;
308         NStr::TNumToStringFlags str_flags = 0;
309         if ( flags & NStr::fMandatorySign )
310             str_flags |= NStr::fWithSign;
311         if ( flags & NStr::fAllowCommas )
312             str_flags |= NStr::fWithCommas;
313         bool allow_same_test = (flags < NStr::fAllowLeadingSpaces);
314 
315         // num
316         {{
317             errno = kTestErrno;
318             int value = NStr::StringToNonNegativeInt(str);
319             CHECK_ERRNO;
320             BOOST_CHECK_EQUAL(value, test->num);
321         }}
322 
323         // int
324         try {
325             errno = kTestErrno;
326             int value = NStr::StringToInt(str, flags), v2;
327             CHECK_ERRNO;
328             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<int>(str, flags));
329             BOOST_CHECK(NStr::StringToNumeric(str, &v2, flags));
330             BOOST_CHECK_EQUAL(v2, value);
331             BOOST_CHECK(test->IsGoodInt());
332             BOOST_CHECK_EQUAL(value, test->i);
333             if (allow_same_test) {
334                 BOOST_CHECK(test->Same(NStr::IntToString(value, str_flags)));
335                 BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
336             }
337         }
338         catch (CException&) {
339             BOOST_CHECK(!test->IsGoodInt());
340         }
341 
342         // int
343         {
344             errno = kTestErrno;
345             int value = NStr::StringToInt(str, flags | NStr::fConvErr_NoThrow);
346             CHECK_ERRNO;
347             int err = errno;
348             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<int>(str,
349                               flags | NStr::fConvErr_NoThrow));
350             if ( value || !err ) {
351                 BOOST_CHECK(!err);
352                 BOOST_CHECK(test->IsGoodInt());
353                 BOOST_CHECK_EQUAL(value, test->i);
354                 if (allow_same_test) {
355                     BOOST_CHECK(test->Same(NStr::IntToString(value, str_flags)));
356                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
357                 }
358             }
359             else {
360                 BOOST_CHECK( CNcbiError::GetLast() );
361                 BOOST_CHECK(err);
362                 BOOST_CHECK(!test->IsGoodInt());
363             }
364         }
365 
366         // unsigned int
367         try {
368             errno = kTestErrno;
369             unsigned int value = NStr::StringToUInt(str, flags), v2;
370             CHECK_ERRNO;
371             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<unsigned int>(str, flags));
372             BOOST_CHECK(NStr::StringToNumeric(str, &v2, flags));
373             BOOST_CHECK_EQUAL(v2, value);
374             BOOST_CHECK(test->IsGoodUInt());
375             BOOST_CHECK_EQUAL(value, test->u);
376             if (allow_same_test) {
377                 BOOST_CHECK(test->Same(NStr::UIntToString(value, str_flags)));
378                 BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
379             }
380         }
381         catch (CException&) {
382             BOOST_CHECK(!test->IsGoodUInt());
383         }
384 
385         // unsigned int
386         {
387             errno = kTestErrno;
388             unsigned int value = NStr::StringToUInt(str, flags | NStr::fConvErr_NoThrow);
389             CHECK_ERRNO;
390             int err = errno;
391             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<unsigned int>(str,
392                               flags | NStr::fConvErr_NoThrow));
393             if ( value || !err ) {
394                 BOOST_CHECK(!err);
395                 BOOST_CHECK(test->IsGoodUInt());
396                 BOOST_CHECK_EQUAL(value, test->u);
397                 if (allow_same_test) {
398                     BOOST_CHECK(test->Same(NStr::UIntToString(value, str_flags)));
399                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
400                 }
401             }
402             else {
403                 BOOST_CHECK( CNcbiError::GetLast() );
404                 BOOST_CHECK(err);
405                 BOOST_CHECK(!test->IsGoodUInt());
406             }
407         }
408 
409         // long
410         try {
411             errno = kTestErrno;
412             long value = NStr::StringToLong(str, flags), v2;
413             CHECK_ERRNO;
414             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<long>(str, flags));
415             BOOST_CHECK(NStr::StringToNumeric(str, &v2, flags));
416             BOOST_CHECK_EQUAL(v2, value);
417 
418             #if (SIZEOF_LONG == SIZEOF_INT)
419                 BOOST_CHECK(test->IsGoodInt());
420                 BOOST_CHECK_EQUAL(value, test->i);
421                 if (allow_same_test) {
422                     BOOST_CHECK(test->Same(NStr::LongToString(value, str_flags)));
423                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
424                 }
425             #else
426                 BOOST_CHECK(test->IsGoodInt8());
427                 BOOST_CHECK_EQUAL(value, test->i8);
428                 if (allow_same_test) {
429                     BOOST_CHECK(test->Same(NStr::Int8ToString(value, str_flags)));
430                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
431                 }
432             #endif
433         }
434         catch (CException&) {
435             #if (SIZEOF_LONG == SIZEOF_INT)
436                 BOOST_CHECK(!test->IsGoodInt());
437             #else
438                 BOOST_CHECK(!test->IsGoodInt8());
439             #endif
440         }
441 
442         // long
443         {
444             errno = kTestErrno;
445             long value = NStr::StringToLong(str, flags | NStr::fConvErr_NoThrow);
446             CHECK_ERRNO;
447             int err = errno;
448             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<long>(str,
449                               flags | NStr::fConvErr_NoThrow));
450             if ( value || !err ) {
451                 BOOST_CHECK(!err);
452             #if (SIZEOF_LONG == SIZEOF_INT)
453                 BOOST_CHECK(test->IsGoodInt());
454                 BOOST_CHECK_EQUAL(value, test->i);
455                 if (allow_same_test) {
456                     BOOST_CHECK(test->Same(NStr::LongToString(value, str_flags)));
457                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
458                 }
459             #else
460                 BOOST_CHECK(test->IsGoodInt8());
461                 BOOST_CHECK_EQUAL(value, test->i8);
462                 if (allow_same_test) {
463                     BOOST_CHECK(test->Same(NStr::Int8ToString(value, str_flags)));
464                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
465                 }
466             #endif
467             }
468             else {
469                 BOOST_CHECK( CNcbiError::GetLast() );
470                 BOOST_CHECK(err);
471             #if (SIZEOF_LONG == SIZEOF_INT)
472                 BOOST_CHECK(!test->IsGoodInt());
473             #else
474                 BOOST_CHECK(!test->IsGoodInt8());
475             #endif
476             }
477         }
478 
479         // unsigned long
480         try {
481             errno = kTestErrno;
482             unsigned long value = NStr::StringToULong(str, flags), v2;
483             CHECK_ERRNO;
484             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<unsigned long>(str, flags));
485             BOOST_CHECK(NStr::StringToNumeric(str, &v2, flags));
486             BOOST_CHECK_EQUAL(v2, value);
487             #if (SIZEOF_LONG == SIZEOF_INT)
488                 BOOST_CHECK(test->IsGoodUInt());
489                 BOOST_CHECK_EQUAL(value, test->u);
490                 if (allow_same_test) {
491                     BOOST_CHECK(test->Same(NStr::ULongToString(value, str_flags)));
492                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
493                 }
494             #else
495                 BOOST_CHECK(test->IsGoodUInt8());
496                 BOOST_CHECK_EQUAL(value, test->u8);
497                 if (allow_same_test) {
498                     BOOST_CHECK(test->Same(NStr::UInt8ToString(value, str_flags)));
499                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
500                 }
501             #endif
502         }
503         catch (CException&) {
504             #if (SIZEOF_LONG == SIZEOF_INT)
505                 BOOST_CHECK(!test->IsGoodUInt());
506             #else
507                 BOOST_CHECK(!test->IsGoodUInt8());
508             #endif
509         }
510 
511         // unsigned long
512         {
513             errno = kTestErrno;
514             unsigned long value = NStr::StringToULong(str, flags | NStr::fConvErr_NoThrow);
515             CHECK_ERRNO;
516             int err = errno;
517             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<unsigned long>(str,
518                               flags | NStr::fConvErr_NoThrow));
519             if ( value || !err ) {
520                 BOOST_CHECK(!err);
521             #if (SIZEOF_LONG == SIZEOF_INT)
522                 BOOST_CHECK(test->IsGoodUInt());
523                 BOOST_CHECK_EQUAL(value, test->u);
524                 if (allow_same_test) {
525                     BOOST_CHECK(test->Same(NStr::ULongToString(value, str_flags)));
526                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
527                 }
528             #else
529                 BOOST_CHECK(test->IsGoodUInt8());
530                 BOOST_CHECK_EQUAL(value, test->u8);
531                 if (allow_same_test) {
532                     BOOST_CHECK(test->Same(NStr::UInt8ToString(value, str_flags)));
533                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
534                 }
535             #endif
536             }
537             else {
538                 BOOST_CHECK( CNcbiError::GetLast() );
539                 BOOST_CHECK(err);
540             #if (SIZEOF_LONG == SIZEOF_INT)
541                 BOOST_CHECK(!test->IsGoodUInt());
542             #else
543                 BOOST_CHECK(!test->IsGoodUInt8());
544             #endif
545             }
546         }
547 
548         // Int8
549         try {
550             errno = kTestErrno;
551             Int8 value = NStr::StringToInt8(str, flags), v2;
552             CHECK_ERRNO;
553             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<Int8>(str, flags));
554             BOOST_CHECK(NStr::StringToNumeric(str, &v2, flags));
555             BOOST_CHECK_EQUAL(v2, value);
556             BOOST_CHECK(test->IsGoodInt8());
557             BOOST_CHECK_EQUAL(value, test->i8);
558             if (allow_same_test) {
559                 BOOST_CHECK(test->Same(NStr::Int8ToString(value, str_flags)));
560                 BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
561             }
562         }
563         catch (CException&) {
564             BOOST_CHECK(!test->IsGoodInt8());
565         }
566 
567         // Int8
568         {
569             errno = kTestErrno;
570             Int8 value = NStr::StringToInt8(str, flags | NStr::fConvErr_NoThrow);
571             int err = errno;
572             BOOST_CHECK(err != kTestErrno);
573             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<Int8>(str,
574                 flags | NStr::fConvErr_NoThrow));
575             if ( value || !err ) {
576                 BOOST_CHECK(!err);
577                 BOOST_CHECK(test->IsGoodInt8());
578                 BOOST_CHECK_EQUAL(value, test->i8);
579                 if (allow_same_test) {
580                     BOOST_CHECK(test->Same(NStr::Int8ToString(value, str_flags)));
581                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
582                 }
583             }
584             else {
585                 BOOST_CHECK( CNcbiError::GetLast() );
586                 BOOST_CHECK(err);
587                 BOOST_CHECK(!test->IsGoodInt8());
588             }
589         }
590 
591         // Uint8
592         try {
593             errno = kTestErrno;
594             Uint8 value = NStr::StringToUInt8(str, flags), v2;
595             CHECK_ERRNO;
596             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<Uint8>(str, flags));
597             BOOST_CHECK(NStr::StringToNumeric(str, &v2, flags));
598             BOOST_CHECK_EQUAL(v2, value);
599             BOOST_CHECK(test->IsGoodUInt8());
600             BOOST_CHECK_EQUAL(value, test->u8);
601             if (allow_same_test) {
602                 BOOST_CHECK(test->Same(NStr::UInt8ToString(value, str_flags)));
603                 BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
604             }
605         }
606         catch (CException&) {
607             BOOST_CHECK(!test->IsGoodUInt8());
608         }
609 
610         // Uint8
611         {
612             errno = kTestErrno;
613             Uint8 value = NStr::StringToUInt8(str, flags | NStr::fConvErr_NoThrow);
614             CHECK_ERRNO;
615             int err = errno;
616             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<Uint8>(str,
617                               flags | NStr::fConvErr_NoThrow));
618             if ( value || !err ) {
619                 BOOST_CHECK(!err);
620                 BOOST_CHECK(test->IsGoodUInt8());
621                 BOOST_CHECK_EQUAL(value, test->u8);
622                 if (allow_same_test) {
623                     BOOST_CHECK(test->Same(NStr::UInt8ToString(value, str_flags)));
624                     BOOST_CHECK(test->Same(NStr::NumericToString(value, str_flags)));
625                 }
626             }
627             else {
628                 BOOST_CHECK( CNcbiError::GetLast() );
629                 BOOST_CHECK(err);
630                 BOOST_CHECK(!test->IsGoodUInt8());
631             }
632         }
633 
634         // float -- StringToNumeric<float>
635         try {
636             errno = kTestErrno;
637             double value = NStr::StringToDouble(str, flags);
638             CHECK_ERRNO;
639             errno = kTestErrno;
640             double valueP = NStr::StringToDouble(str, flags | NStr::fDecimalPosix);
641             CHECK_ERRNO;
642             //BOOST_CHECK_EQUAL(value, valueP);
643             // Note, we check conversion only, not rounding from double to float.
644             BOOST_CHECK_EQUAL((float)value,  NStr::StringToNumeric<float>(str, flags));
645             BOOST_CHECK_EQUAL((float)valueP, NStr::StringToNumeric<float>(str, flags | NStr::fDecimalPosix));
646             BOOST_CHECK(test->IsGoodFloat());
647         }
648         catch (CException&) {
649             BOOST_CHECK(!test->IsGoodFloat());
650         }
651 
652         // double
653         try {
654             errno = kTestErrno;
655             double value = NStr::StringToDouble(str, flags);
656             CHECK_ERRNO;
657             errno = kTestErrno;
658             double valueP = NStr::StringToDouble(str, flags | NStr::fDecimalPosix);
659             CHECK_ERRNO;
660             //BOOST_CHECK_EQUAL(value, valueP);
661             BOOST_CHECK_EQUAL(value,  NStr::StringToNumeric<double>(str, flags));
662             BOOST_CHECK_EQUAL(valueP, NStr::StringToNumeric<double>(str, flags | NStr::fDecimalPosix));
663             BOOST_CHECK(test->IsGoodDouble());
664             BOOST_CHECK(valueP >= test->d-test->delta && valueP <= test->d+test->delta);
665             BOOST_CHECK(value  >= test->d-test->delta && value  <= test->d+test->delta);
666         }
667         catch (CException&) {
668             BOOST_CHECK(!test->IsGoodDouble());
669         }
670 
671         // double
672         {
673             errno = kTestErrno;
674             double value = NStr::StringToDouble(str, flags | NStr::fConvErr_NoThrow);
675             CHECK_ERRNO;
676             int err = errno;
677             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<double>(str,
678                               flags | NStr::fConvErr_NoThrow));
679             if ( value || (err == 0 || err == ERANGE)) {
680                 BOOST_CHECK(test->IsGoodDouble());
681                 BOOST_CHECK(value  >= test->d-test->delta && value  <= test->d+test->delta);
682             }
683             else {
684                 BOOST_CHECK( CNcbiError::GetLast() );
685                 BOOST_CHECK(err);
686                 BOOST_CHECK(!test->IsGoodDouble());
687             }
688         }
689 
690         // double POSIX
691         {
692             errno = kTestErrno;
693             double value = NStr::StringToDouble(str, flags | NStr::fDecimalPosix | NStr::fConvErr_NoThrow);
694             CHECK_ERRNO;
695             int err = errno;
696             BOOST_CHECK_EQUAL(value, NStr::StringToNumeric<double>(str,
697                               flags | NStr::fDecimalPosix | NStr::fConvErr_NoThrow));
698             if ( value || (err == 0 || err == ERANGE)) {
699                 BOOST_CHECK(test->IsGoodDouble());
700                 BOOST_CHECK(value  >= test->d-test->delta && value  <= test->d+test->delta);
701             }
702             else {
703                 BOOST_CHECK( CNcbiError::GetLast() );
704                 BOOST_CHECK(err);
705                 BOOST_CHECK(!test->IsGoodDouble());
706             }
707         }
708       }
709     }
710 
711     // TGi
712     {
713         TIntId id = 123;
714         TGi gi(id), gi2, gi3;
715         string s = NStr::NumericToString(gi);
716         gi2 = NStr::StringToNumeric<TGi>(s);
717         BOOST_CHECK(NStr::StringToNumeric(s, &gi3));
718         BOOST_CHECK(gi2 == gi);
719         BOOST_CHECK(gi2 == gi3);
720     }
721     {
722         unsigned long long n1 = 456, n2, n3;
723         string s = NStr::NumericToString(n1);
724         n2 = NStr::StringToNumeric<unsigned long long>(s);
725         BOOST_CHECK(NStr::StringToNumeric(s, &n3));
726         BOOST_CHECK(n2 == n1);
727         BOOST_CHECK(n2 == n3);
728     }
729 }
730 
731 
732 struct SStringDoublePosixTest
733 {
734     const char* str;
735     double result;
736     double delta;
737 };
738 
739 static const SStringDoublePosixTest s_StrToDoublePosix[] = {
740     {"123",                 123.,                1e-13},
741     {"123.",                123.,                1e-13},
742     {"123.456",             123.456,             1e-13},
743     {"-123.456",           -123.456,             1e-14},
744     {"-12.45",             -12.45,               1e-15},
745     {"0.01",                0.01,                1e-18},
746     {"0.01456000",          0.01456,             1e-18},
747     {"2147483649",          2147483649.,         0.},
748     {"-2147483649",        -2147483649.,         0.},
749     {"214748364913",        214748364913.,       0.},
750     {"123456789123456789",  123456789123456789., 0.},
751     {"123456789123.45",     123456789123.45,     0.},
752     {"1234.5678912345",     1234.5678912345,     0.},
753     {"1.23456789123456789", 1.23456789123456789, 0.},
754     {".123456789",          .123456789,          0.},
755     {".123456789123",       .123456789123,       0.},
756     {"12e12",               12.e12,              0.},
757     {"123.e2",              123.e2,              0.},
758     {"123.456e+2",          123.456e+2,          0.},
759     {"+123.456e-2",         123.456e-2,          0.},
760     {"-123.456e+2",        -123.456e+2,          0.},
761     {"-123.456e+12",       -123.456e+12,         0.},
762     {"-123.456e+25",       -123.456e+25,         0.},
763     {"-123.456e+78",       -123.456e+78,         0.},
764     {"-123.456e-2",        -123.456e-2,          0.},
765     {"-123.456e-12",       -123.456e-12,         0.},
766     {"-123.456e-25",       -123.456e-25,         0.},
767     {"-123.456e-78",       -123.456e-78,         0.00000000000002e-078},
768     {"-9223372036854775809",      -9223372036854775809.,       0.},
769     {"-922337.2036854775809",     -922337.2036854775809,       0.},
770     {"-92233720368547.75809",     -92233720368547.75809,       0.},
771     {"-9223372036854775808",      -9223372036854775808.,       0.},
772     {"-9223372036854775807",      -9223372036854775807.,       0.},
773     {"9223372036854775806",        9223372036854775806.,       0.},
774     {"9223372036854775807",        9223372036854775807.,       0.},
775     {"9223372036854775808",        9223372036854775808.,       0.},
776     {"18446744073709551614",       18446744073709551614.,      0.},
777     {"18446744073709551615",       18446744073709551615.,      0.},
778     {"18446744073709551616",       18446744073709551616.,      0.},
779     {"1844674407370955.1616",      1844674407370955.1616,      0.},
780     {"1844674407370955.1616",      1844674407370955.1616,      0.},
781     {"184467.44073709551616",      184467.44073709551616,      0.},
782     {"1.8446744073709551616",      1.8446744073709551616,      0.},
783     {"1.8446744073709551616e5",    1.8446744073709551616e5,    0.},
784     {"1.8446744073709551616e25",   1.8446744073709551616e25,   0.},
785     {"1.8446744073709551616e125",  1.8446744073709551616e125,  0.},
786     {"184467.44073709551616e5",    184467.44073709551616e5,    0.},
787     {"184467.44073709551616e-5",   184467.44073709551616e-5,   0.},
788     {"184467.44073709551616e25",   184467.44073709551616e25,   0.},
789     {"184467.44073709551616e-25",  184467.44073709551616e-25,  0.},
790 
791     {"1.7976931348623159e+308",  HUGE_VAL, 0.},
792     {"1.7976931348623157e+308",  1.7976931348623157e+308, 0.},
793     {"1.7976931348623155e+308",  1.7976931348623155e+308, 0.0000000000000003e+308},
794     { "1.797693134862315e+307",  1.797693134862315e+307,  0.000000000000002e+307},
795     { "1.797693134862315e+306",  1.797693134862315e+306,  0.},
796     {"2.2250738585072014e-308",  2.2250738585072014e-308, 0.},
797     {"2.2250738585072019e-308",  2.2250738585072019e-308, 0.},
798     {"2.2250738585072024e-308",  2.2250738585072024e-308, 0.},
799     { "2.225073858507202e-308",  2.225073858507202e-308,  0.},
800     {"2.2250738585072016e-307",  2.2250738585072016e-307, 0.0000000000000004e-307},/* NCBI_FAKE_WARNING */
801     {"2.2250738585072016e-306",  2.2250738585072016e-306, 0.0000000000000004e-306},/* NCBI_FAKE_WARNING */
802     {"2.2250738585072016e-305",  2.2250738585072016e-305, 0.0000000000000004e-305},/* NCBI_FAKE_WARNING */
803     {"-123.456e+4578",  -HUGE_VAL, 0.},
804     {"-123.456e-4578",  0., 0.},
805 
806     { "7E0000000001", 70., 0.},
807     { "7E512", HUGE_VAL, 0.},
808     { "7E-500", 0., 0.},
809     { "7E4294967306", HUGE_VAL, 0.},
810     { "1.000000000000000000000000000001", 1., 0. },
811     { "000.000000000000000000000000000001", 1e-30, 0.0000000000000001e-030 },
812     { "0.", 0., 0. },
813     { "-0", 0., 0. },
814     {NULL,0,0}
815 };
816 
BOOST_AUTO_TEST_CASE(s_StringToDoublePosix)817 BOOST_AUTO_TEST_CASE(s_StringToDoublePosix)
818 {
819     char* endptr;
820     for (int i = 0; s_StrToDoublePosix[i].str; ++i) {
821         const double& result  = s_StrToDoublePosix[i].result;
822         double delta   = finite(result)? fabs(result)*2.22e-16: 0;
823         const char* str = s_StrToDoublePosix[i].str;
824         errno = kTestErrno;
825         endptr = 0;
826         double valuep = NStr::StringToDoublePosix(str, &endptr);
827         if ( delta == 0 )
828             BOOST_CHECK(valuep == result);
829         double min = result-delta, max = result+delta;
830         if ( finite(min) )
831             BOOST_CHECK(valuep >= min);
832         if ( finite(max) )
833             BOOST_CHECK(valuep <= max);
834     }
835 
836     string out;
837     double value;
838 
839     value = NStr::StringToDoublePosix("nan", &endptr);
840     BOOST_CHECK( isnan(value) );
841     BOOST_CHECK( endptr && !*endptr );
842     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
843     BOOST_CHECK( NStr::Compare(out, "NaN") == 0 );
844     NStr::NumericToString(out, value, NStr::fDoublePosix);
845     BOOST_CHECK( NStr::Compare(out, "NaN") == 0 );
846 
847     value = NStr::StringToDoublePosix(out.c_str(), &endptr);
848     BOOST_CHECK( isnan(value) );
849     BOOST_CHECK( endptr && !*endptr );
850     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
851     BOOST_CHECK( NStr::Compare(out, "NaN") == 0 );
852     NStr::NumericToString(out, value, NStr::fDoublePosix);
853     BOOST_CHECK( NStr::Compare(out, "NaN") == 0 );
854 
855     value = NStr::StringToDoublePosix("inf", &endptr);
856     BOOST_CHECK( !finite(value) && value>0.);
857     BOOST_CHECK( endptr && !*endptr );
858     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
859     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
860     NStr::NumericToString(out, value, NStr::fDoublePosix);
861     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
862 
863     value = NStr::StringToDoublePosix(out.c_str(), &endptr);
864     BOOST_CHECK( endptr && !*endptr );
865     BOOST_CHECK( !finite(value) && value>0.);
866     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
867     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
868     NStr::NumericToString(out, value, NStr::fDoublePosix);
869     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
870 
871     value = NStr::StringToDoublePosix("infinity", &endptr);
872     BOOST_CHECK( !finite(value) && value>0. );
873     BOOST_CHECK( endptr && !*endptr );
874     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
875     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
876     NStr::NumericToString(out, value, NStr::fDoublePosix);
877     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
878 
879     value = NStr::StringToDoublePosix(out.c_str(), &endptr);
880     BOOST_CHECK( !finite(value) && value>0. );
881     BOOST_CHECK( endptr && !*endptr );
882     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
883     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
884     NStr::NumericToString(out, value, NStr::fDoublePosix);
885     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
886 
887     value = NStr::StringToDoublePosix("+inf", &endptr);
888     BOOST_CHECK( !finite(value) && value>0. );
889     BOOST_CHECK( endptr && !*endptr );
890     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
891     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
892     NStr::NumericToString(out, value, NStr::fDoublePosix);
893     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
894 
895     value = NStr::StringToDoublePosix("+infinity", &endptr);
896     BOOST_CHECK( !finite(value) && value>0. );
897     BOOST_CHECK( endptr && !*endptr );
898     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
899     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
900     NStr::NumericToString(out, value, NStr::fDoublePosix);
901     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
902 
903     value = NStr::StringToDoublePosix("-inf", &endptr);
904     BOOST_CHECK( !finite(value) && value<0. );
905     BOOST_CHECK( endptr && !*endptr );
906     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
907     BOOST_CHECK( NStr::Compare(out, "-INF") == 0 );
908     NStr::NumericToString(out, value, NStr::fDoublePosix);
909     BOOST_CHECK( NStr::Compare(out, "-INF") == 0 );
910 
911     value = NStr::StringToDoublePosix("-infinity", &endptr);
912     BOOST_CHECK( !finite(value) && value<0. );
913     BOOST_CHECK( endptr && !*endptr );
914     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
915     BOOST_CHECK( NStr::Compare(out, "-INF") == 0 );
916     NStr::NumericToString(out, value, NStr::fDoublePosix);
917     BOOST_CHECK( NStr::Compare(out, "-INF") == 0 );
918 
919     value = NStr::StringToDoublePosix("+Infinity", &endptr);
920     BOOST_CHECK( !finite(value) && value>0. );
921     BOOST_CHECK( endptr && !*endptr );
922     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
923     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
924     NStr::NumericToString(out, value, NStr::fDoublePosix);
925     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
926 
927     value = NStr::StringToDoublePosix("Infinity", &endptr);
928     BOOST_CHECK( !finite(value) && value>0. );
929     BOOST_CHECK( endptr && !*endptr );
930     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
931     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
932     NStr::NumericToString(out, value, NStr::fDoublePosix);
933     BOOST_CHECK( NStr::Compare(out, "INF") == 0 );
934 
935     value = NStr::StringToDoublePosix("-infinity", &endptr);
936     BOOST_CHECK( !finite(value) && value<0. );
937     BOOST_CHECK( endptr && !*endptr );
938     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
939     BOOST_CHECK( NStr::Compare(out, "-INF") == 0 );
940     NStr::NumericToString(out, value, NStr::fDoublePosix);
941     BOOST_CHECK( NStr::Compare(out, "-INF") == 0 );
942 
943     value = NStr::StringToDoublePosix("-0", &endptr);
944     BOOST_CHECK( value == 0. );
945     NStr::DoubleToString(out, value, -1, NStr::fDoublePosix);
946     BOOST_CHECK( NStr::Compare(out, "-0") == 0 );
947 }
948 
949 static const SStringNumericValues s_Str2NumNonPosixTests[] = {
950     { "",          DF,                         -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
951     { "",          NStr::fConvErr_NoThrow,     -1, kBad, kBad, kBad, kBad, 0.f,  0.,   0. },
952     {  ",",        DF,                         -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
953     {  ",,",       DF,                         -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
954     {  ".,",       NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
955     {  ",.",       NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
956     { ",0",        DF,                         -1, kBad, kBad, kBad, kBad, .0f , .0,   0. },
957     { ",0.",       NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
958     { ".0,",       NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
959     { ",01",       DF,                         -1, kBad, kBad, kBad, kBad, .01f, .01,  0. },
960     { "1,",        DF,                         -1, kBad, kBad, kBad, kBad, 1.f , 1.,   0. },
961     { "1,1",       DF,                         -1, kBad, kBad, kBad, kBad, 1.1f, 1.1,  0. },
962     { "1,1",       NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, 1.1f, 1.1,  0. },
963     { "1,1",       NStr::fDecimalPosix,        -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
964     { "1.1",       NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, 1.1f, 1.1,  0. },
965     { "1.1",       NStr::fDecimalPosix,        -1, kBad, kBad, kBad, kBad, 1.1f, 1.1,  0. },
966     { "1,1.",      NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
967     { "1,1,",      DF,                         -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
968     { "1.,",       NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
969     { "1.1,",      NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
970     { "1.1,",      NStr::fDecimalPosix,        -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
971     { "1.,",       NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
972     { "1.,",       NStr::fDecimalPosix,        -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
973     { "12,34",     DF,                         -1, kBad, kBad, kBad, kBad, 12.34f, 12.34, 0. },
974     { "12,34",     NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, 12.34f, 12.34, 0. },
975     { "12.34",     NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, 12.34f, 12.34, 0. },
976     { "12.34",     NStr::fDecimalPosix,        -1, kBad, kBad, kBad, kBad, 12.34f, 12.34, 0. },
977     { "12,34e-2",  DF,                         -1, kBad, kBad, kBad, kBad, .1234f, .1234, 1e-17 },
978     { "12,34e-2",  NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, .1234f, .1234, 1e-17 },
979     { "12.34e-2",  NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, .1234f, .1234, 1e-17 },
980     { "12.34e-2",  NStr::fDecimalPosix,        -1, kBad, kBad, kBad, kBad, .1234f, .1234, 1e-17 },
981     { "1234,",     NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, 1234.f, 1234., 0. },
982     { "1234.",     NStr::fDecimalPosix,        -1, kBad, kBad, kBad, kBad, 1234.f, 1234., 0. },
983     { "1234",      NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, 1234.f, 1234., 0. },
984     { "0,0",       NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, 0.f,  0.,   0. },
985     { "0,000",     NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, 0.f,  0.,   0. },
986     { "0.000",     NStr::fDecimalPosix,        -1, kBad, kBad, kBad, kBad, 0.f,  0.,   0. },
987     { ",,1234",    NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
988     { "1234,,",    NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. },
989     { "12,,34",    NStr::fDecimalPosixOrLocal, -1, kBad, kBad, kBad, kBad, kBad, kBad, 0. }
990 };
991 
BOOST_AUTO_TEST_CASE(s_StringToDouble)992 BOOST_AUTO_TEST_CASE(s_StringToDouble)
993 {
994     char* prevlocal = NcbiSysChar_strdup(setlocale(LC_NUMERIC,NULL));
995     if (!setlocale(LC_NUMERIC,"deu")) {
996         if (!setlocale(LC_NUMERIC,"de")) {
997             if (!setlocale(LC_NUMERIC,"de_DE")) {
998                 if (!setlocale(LC_NUMERIC,"fr")) {
999                     // cannot find suitable locale, skip the test
1000                     free(prevlocal);
1001                     return;
1002                 }
1003             }
1004         }
1005     }
1006     const size_t count = sizeof(s_Str2NumNonPosixTests) / sizeof(s_Str2NumNonPosixTests[0]);
1007 
1008     for (size_t i = 0;  i < count;  ++i) {
1009         const SStringNumericValues* test = &s_Str2NumNonPosixTests[i];
1010         const char*                 str  = test->str;
1011         NStr::TStringToNumFlags     flags = test->flags;
1012 
1013         // double
1014         try {
1015             errno = kTestErrno;
1016             double value = NStr::StringToDouble(str, flags);
1017             CHECK_ERRNO;
1018             BOOST_CHECK(test->IsGoodDouble());
1019             BOOST_CHECK(value >= test->d-test->delta && value <= test->d+test->delta);
1020         }
1021         catch (CException&) {
1022             BOOST_CHECK(!test->IsGoodDouble());
1023         }
1024     }
1025     setlocale(LC_NUMERIC,prevlocal);
1026     free(prevlocal);
1027 }
1028 
1029 
1030 //----------------------------------------------------------------------------
1031 // NStr::StringTo*() radix test
1032 //----------------------------------------------------------------------------
1033 
1034 // Writing separate tests for StringToUInt8 because we
1035 // need to test for different radix values such as 2, 8, and 16.
1036 
1037 struct SRadixTest {
1038     const char* str;      // String input
1039     int         base;     // Radix base
1040     Uint8       value;    // Expected value
1041 
SameSRadixTest1042     bool Same(const string& s) const
1043     {
1044         if ( s.empty() ) {
1045             return false;
1046         }
1047         const char* s1 = str;
1048         const char* s2 = s.c_str();
1049 
1050         // Workaround for ICC 8.0. It have an optimizer bug.
1051         // c_str() can return non-null-terminated string.
1052         // So we will use strncmp() here instead of strcmp().
1053 
1054         size_t n = s.length();
1055 
1056         while (*s1 == '0') s1++;
1057         if ( *s1 == 'x' )  s1++;
1058         while (*s2 == '0') { s2++; n--; };
1059 
1060         // Use case-insensitive comparison for base 16
1061         if (base == 16) {
1062             return (NStr::strncasecmp(s1, s2, n) == 0);
1063         }
1064         return (NStr::strncmp(s1, s2, n) == 0);
1065     }
1066 };
1067 
1068 static const SRadixTest s_RadixTests[] = {
1069     { "0",         16, 0 },
1070     { "A",         16, 10 },
1071     { "a",         16, 10 },
1072     { "0xA",       16, 10 },
1073     { "0xa",       16, 10 },
1074     { "B9",        16, 185 },
1075     { "b9",        16, 185 },
1076     { "C5D",       16, 3165 },
1077     { "FFFF",      16, 65535 },
1078     { "ffff",      16, 65535 },
1079     { "17ABCDEF",  16, 397135343 },
1080     { "BADBADBA",  16, 3134959034U },
1081     { "badbadba",  16, 3134959034U },
1082     { "0",          8, 0 },
1083     { "0",          8, 0 },
1084     { "7",          8, 7 },
1085     { "17",         8, 15 },
1086     { "177",        8, 127 },
1087     { "0123",       8, 83 },
1088     { "01234567",   8, 342391 },
1089     { "0",          2, 0 },
1090     { "1",          2, 1 },
1091     { "10",         2, 2 },
1092     { "11",         2, 3 },
1093     { "100",        2, 4 },
1094     { "101",        2, 5 },
1095     { "110",        2, 6 },
1096     { "111",        2, 7 },
1097 
1098     // Autodetect radix base
1099     { "0xC5D",      0, 3165 },    // base 16
1100     { "0123",       0, 83   },    // base 8
1101     { "123",        0, 123  },    // base 10
1102     { "111",        0, 111  },    // base 10
1103 
1104     // Invalid values come next
1105     { "10ABCDEFGH",16, kBad },
1106     { "12345A",    10, kBad },
1107     { "012345678",  8, kBad },
1108     { "012",        2, kBad }
1109 };
1110 
BOOST_AUTO_TEST_CASE(s_StringToNum_Radix)1111 BOOST_AUTO_TEST_CASE(s_StringToNum_Radix)
1112 {
1113     const size_t count = sizeof(s_RadixTests)/sizeof(s_RadixTests[0]);
1114 
1115     for (size_t i = 0;  i < count;  ++i)
1116     {
1117         const SRadixTest* test = &s_RadixTests[i];
1118 
1119         // Int
1120         try {
1121             if ( test->value <= (Uint8)kMax_Int ) {
1122                 errno = kTestErrno;
1123                 int val = NStr::StringToInt(test->str, 0, test->base);
1124                 BOOST_CHECK(errno != kTestErrno);
1125                 string str;
1126                 if ( test->base ) {
1127                     errno = kTestErrno;
1128                     NStr::IntToString(str, val, 0, test->base);
1129                     BOOST_CHECK(errno != kTestErrno);
1130                 }
1131                 BOOST_CHECK_EQUAL((Uint8)val, test->value);
1132                 if ( test->base ) {
1133                     BOOST_CHECK(test->Same(str));
1134                     NStr::NumericToString(str, val, 0, test->base);
1135                     BOOST_CHECK(test->Same(str));
1136                 }
1137             }
1138         }
1139         catch (CException&) {
1140             BOOST_CHECK_EQUAL(test->value, (Uint8)kBad);
1141         }
1142 
1143         // UInt
1144         try {
1145             if ( test->value <= kMax_UInt ) {
1146                 errno = kTestErrno;
1147                 unsigned int val = NStr::StringToUInt(test->str, 0, test->base);
1148                 BOOST_CHECK(errno != kTestErrno);
1149                 string str;
1150                 if ( test->base ) {
1151                     errno = kTestErrno;
1152                     NStr::UIntToString(str, val, 0, test->base);
1153                     BOOST_CHECK(errno != kTestErrno);
1154                 }
1155                 BOOST_CHECK_EQUAL(val, test->value);
1156                 if ( test->base ) {
1157                     BOOST_CHECK(test->Same(str));
1158                     NStr::NumericToString(str, val, 0, test->base);
1159                     BOOST_CHECK(test->Same(str));
1160                 }
1161             }
1162         }
1163         catch (CException&) {
1164             BOOST_CHECK_EQUAL(test->value, (Uint8)kBad);
1165         }
1166 
1167         // Int8
1168         try {
1169             if ( test->value <= (Uint8)kMax_I8 ) {
1170                 errno = kTestErrno;
1171                 Int8 val = NStr::StringToInt8(test->str, 0, test->base);
1172                 BOOST_CHECK(errno != kTestErrno);
1173                 string str;
1174                 if ( test->base ) {
1175                     errno = kTestErrno;
1176                     NStr::Int8ToString(str, val, 0, test->base);
1177                     BOOST_CHECK(errno != kTestErrno);
1178                 }
1179                 BOOST_CHECK_EQUAL((Uint8)val, test->value);
1180                 if ( test->base ) {
1181                     BOOST_CHECK(test->Same(str));
1182                     NStr::NumericToString(str, val, 0, test->base);
1183                     BOOST_CHECK(test->Same(str));
1184                 }
1185             }
1186         }
1187         catch (CException&) {
1188             BOOST_CHECK_EQUAL(test->value, (Uint8)kBad);
1189         }
1190 
1191         // Uint8
1192         try {
1193             errno = kTestErrno;
1194             Uint8 val = NStr::StringToUInt8(test->str, 0, test->base);
1195             BOOST_CHECK(errno != kTestErrno);
1196             string str;
1197             if ( test->base ) {
1198                 errno = kTestErrno;
1199                 NStr::UInt8ToString(str, val, 0, test->base);
1200                 BOOST_CHECK(errno != kTestErrno);
1201             }
1202             BOOST_CHECK_EQUAL(val, test->value);
1203             if ( test->base ) {
1204                 BOOST_CHECK(test->Same(str));
1205                 NStr::NumericToString(str, val, 0, test->base);
1206                 BOOST_CHECK(test->Same(str));
1207             }
1208         }
1209         catch (CException&) {
1210             BOOST_CHECK_EQUAL(test->value, (Uint8)kBad);
1211         }
1212     }
1213 
1214     // Some additional tests
1215 
1216     string str;
1217 
1218     NStr::IntToString(str, kMax_Int, 0, 2);
1219 #if (SIZEOF_INT == 4)
1220     BOOST_CHECK(str == "1111111111111111111111111111111");
1221 #elif (SIZEOF_INT == 8)
1222     BOOST_CHECK(str == "111111111111111111111111111111111111111111111111111111111111111");
1223 #endif
1224     NStr::NumericToString(str, kMax_Int, 0, 2);
1225 #if (SIZEOF_INT == 4)
1226     BOOST_CHECK(str == "1111111111111111111111111111111");
1227 #elif (SIZEOF_INT == 8)
1228     BOOST_CHECK(str == "111111111111111111111111111111111111111111111111111111111111111");
1229 #endif
1230 
1231     NStr::LongToString(str, kMax_Long, 0, 2);
1232 #if (SIZEOF_LONG == 4)
1233     BOOST_CHECK(str == "1111111111111111111111111111111");
1234 #elif (SIZEOF_LONG == 8)
1235     BOOST_CHECK(str == "111111111111111111111111111111111111111111111111111111111111111");
1236 #endif
1237     NStr::NumericToString(str, kMax_Long, 0, 2);
1238 #if (SIZEOF_LONG == 4)
1239     BOOST_CHECK(str == "1111111111111111111111111111111");
1240 #elif (SIZEOF_LONG == 8)
1241     BOOST_CHECK(str == "111111111111111111111111111111111111111111111111111111111111111");
1242 #endif
1243 
1244     NStr::UIntToString(str, kMax_UInt, 0, 2);
1245 #if (SIZEOF_INT == 4)
1246     BOOST_CHECK(str == "11111111111111111111111111111111");
1247 #elif (SIZEOF_INT == 8)
1248     BOOST_CHECK(str == "1111111111111111111111111111111111111111111111111111111111111111");
1249 #endif
1250     NStr::NumericToString(str, kMax_UInt, 0, 2);
1251 #if (SIZEOF_INT == 4)
1252     BOOST_CHECK(str == "11111111111111111111111111111111");
1253 #elif (SIZEOF_INT == 8)
1254     BOOST_CHECK(str == "1111111111111111111111111111111111111111111111111111111111111111");
1255 #endif
1256 
1257     NStr::ULongToString(str, kMax_ULong, 0, 2);
1258 #if (SIZEOF_LONG == 4)
1259     BOOST_CHECK(str == "11111111111111111111111111111111");
1260 #elif (SIZEOF_LONG == 8)
1261     BOOST_CHECK(str == "1111111111111111111111111111111111111111111111111111111111111111");
1262 #endif
1263     NStr::NumericToString(str, kMax_ULong, 0, 2);
1264 #if (SIZEOF_LONG == 4)
1265     BOOST_CHECK(str == "11111111111111111111111111111111");
1266 #elif (SIZEOF_LONG == 8)
1267     BOOST_CHECK(str == "1111111111111111111111111111111111111111111111111111111111111111");
1268 #endif
1269 
1270     NStr::IntToString(str, -1, 0, 8);
1271 #if (SIZEOF_INT == 4)
1272     BOOST_CHECK(str == "37777777777");
1273 #elif (SIZEOF_INT == 8)
1274     BOOST_CHECK(str == "1777777777777777777777");
1275 #endif
1276     NStr::NumericToString(str, -1, 0, 8);
1277 #if (SIZEOF_INT == 4)
1278     BOOST_CHECK(str == "37777777777");
1279 #elif (SIZEOF_INT == 8)
1280     BOOST_CHECK(str == "1777777777777777777777");
1281 #endif
1282 
1283     NStr::LongToString(str, -1, 0, 8);
1284 #if (SIZEOF_LONG == 4)
1285     BOOST_CHECK(str == "37777777777");
1286 #elif (SIZEOF_LONG == 8)
1287     BOOST_CHECK(str == "1777777777777777777777");
1288 #endif
1289     NStr::NumericToString(str, (long)-1, 0, 8);
1290 #if (SIZEOF_LONG == 4)
1291     BOOST_CHECK(str == "37777777777");
1292 #elif (SIZEOF_LONG == 8)
1293     BOOST_CHECK(str == "1777777777777777777777");
1294 #endif
1295 
1296     NStr::IntToString(str, -1, 0, 16);
1297 #if (SIZEOF_INT == 4)
1298     BOOST_CHECK(str == "FFFFFFFF");
1299 #elif (SIZEOF_INT == 8)
1300     BOOST_CHECK(str == "FFFFFFFFFFFFFFFF");
1301 #endif
1302     NStr::NumericToString(str, -1, 0, 16);
1303 #if (SIZEOF_INT == 4)
1304     BOOST_CHECK(str == "FFFFFFFF");
1305 #elif (SIZEOF_INT == 8)
1306     BOOST_CHECK(str == "FFFFFFFFFFFFFFFF");
1307 #endif
1308 
1309     NStr::LongToString(str, -1, 0, 16);
1310 #if (SIZEOF_LONG == 4)
1311     BOOST_CHECK(str == "FFFFFFFF");
1312 #elif (SIZEOF_LONG == 8)
1313     BOOST_CHECK(str == "FFFFFFFFFFFFFFFF");
1314 #endif
1315     NStr::NumericToString(str, (long)-1, 0, 16);
1316 #if (SIZEOF_LONG == 4)
1317     BOOST_CHECK(str == "FFFFFFFF");
1318 #elif (SIZEOF_LONG == 8)
1319     BOOST_CHECK(str == "FFFFFFFFFFFFFFFF");
1320 #endif
1321 
1322     NStr::UInt8ToString(str, NCBI_CONST_UINT8(12345678901234567), 0, 16);
1323     BOOST_CHECK(str == "2BDC545D6B4B87");
1324     NStr::NumericToString(str, NCBI_CONST_UINT8(12345678901234567), 0, 16);
1325     BOOST_CHECK(str == "2BDC545D6B4B87");
1326 }
1327 
1328 
1329 //----------------------------------------------------------------------------
1330 // NStr::StringTo*_DataSize()
1331 //----------------------------------------------------------------------------
1332 
1333 struct SStringDataSizeValues
1334 {
1335     const char*             str;
1336     NStr::TStringToNumFlags flags;
1337     Uint8                   expected;
1338 
IsGoodSStringDataSizeValues1339     bool IsGood(void) const {
1340         return expected != (Uint8)kBad;
1341     }
1342 };
1343 
1344 static const SStringDataSizeValues s_Str2DataSizeTests[] = {
1345     // str  flags     num
1346     { "10",    0,      10 },
1347     { "10b",   0,      10 },
1348     { "10k",   0, 10*1000 },
1349     { "10K",   0, 10*1000 },
1350     { "10KB",  0, 10*1000 },
1351     { "10KiB", 0, 10*1024 },
1352     { "10KIB", 0, 10*1024 },
1353     { "10K",   NStr::fDS_ForceBinary, 10*1024 },
1354     { "10KB",  NStr::fDS_ForceBinary, 10*1024 },
1355     { "10M",   0, 10*1000*1000 },
1356     { "10MB",  0, 10*1000*1000 },
1357     { "10MiB", 0, 10*1024*1024 },
1358     { "10M",   NStr::fDS_ForceBinary, 10*1024*1024 },
1359     { "10MB",  NStr::fDS_ForceBinary, 10*1024*1024 },
1360     { "10G",   0, Uint8(10)*1000*1000*1000 },
1361     { "10GB",  0, Uint8(10)*1000*1000*1000 },
1362     { "10GiB", 0, Uint8(10)*1024*1024*1024 },
1363     { "10G",   NStr::fDS_ForceBinary, Uint8(10)*1024*1024*1024 },
1364     { "10GB",  NStr::fDS_ForceBinary, Uint8(10)*1024*1024*1024 },
1365     { "10T",   0, Uint8(10)*1000*1000*1000*1000 },
1366     { "10TB",  0, Uint8(10)*1000*1000*1000*1000 },
1367     { "10TiB", 0, Uint8(10)*1024*1024*1024*1024 },
1368     { "10T",   NStr::fDS_ForceBinary, Uint8(10)*1024*1024*1024*1024 },
1369     { "10TB",  NStr::fDS_ForceBinary, Uint8(10)*1024*1024*1024*1024 },
1370     { "10P",   0, Uint8(10)*1000*1000*1000*1000*1000 },
1371     { "10PB",  0, Uint8(10)*1000*1000*1000*1000*1000 },
1372     { "10PiB", 0, Uint8(10)*1024*1024*1024*1024*1024 },
1373     { "10P",   NStr::fDS_ForceBinary, Uint8(10)*1024*1024*1024*1024*1024 },
1374     { "10PB",  NStr::fDS_ForceBinary, Uint8(10)*1024*1024*1024*1024*1024 },
1375     { "10E",   0, Uint8(10)*1000*1000*1000*1000*1000*1000 },
1376     { "10EB",  0, Uint8(10)*1000*1000*1000*1000*1000*1000 },
1377     { "10EiB", 0, Uint8(10)*1024*1024*1024*1024*1024*1024 },
1378     { "10E",   NStr::fDS_ForceBinary, Uint8(10)*1024*1024*1024*1024*1024*1024 },
1379     { "10EB",  NStr::fDS_ForceBinary, Uint8(10)*1024*1024*1024*1024*1024*1024 },
1380     { "+10K",  0, 10*1000 },
1381     { "-10K",  0, kBad },
1382     { "1GBx",  0, kBad },
1383     { "1Gi",   0, kBad },
1384     { "10000000000000GB", 0, kBad },
1385     { " 10K",  0, kBad },
1386     { " 10K",  NStr::fAllowLeadingSpaces, 10*1000 },
1387     { "10K ",  0, kBad },
1388     { "10K ",  NStr::fAllowTrailingSpaces, 10*1000 },
1389     { "10 K",  0, 10*1000 },
1390     { "10 K",  NStr::fDS_ProhibitSpaceBeforeSuffix, kBad },
1391     { "10K",   NStr::fMandatorySign, kBad },
1392     { "+10K",  NStr::fMandatorySign, 10*1000 },
1393     { "-10K",  NStr::fMandatorySign, kBad },
1394     { "1,000K", 0, kBad },
1395     { "1,000K", NStr::fAllowCommas, 1000*1000 },
1396     { "K10K",  0, kBad },
1397     { "K10K",  NStr::fAllowLeadingSymbols, 10*1000 },
1398     { "K10K",  NStr::fAllowLeadingSymbols, 10*1000 },
1399     { "10KG",  NStr::fAllowTrailingSymbols, 10*1000 },
1400     { "0.123",  0, 0 },
1401     { "0.567",  0, 1 },
1402     { "0.123456 K",  0, 123 },
1403     { "0.123567 K",  0, 124 },
1404     { "0.123456 KiB",  0, 126 },
1405     { "0.123567 KiB",  0, 127 },
1406     { "0.123 MB",  0, 123000 },
1407     { "0.123 MiB", 0, 128975 },
1408     { "0.123456 GiB",  0, 132559871 },
1409     { "123abc",  NStr::fAllowTrailingSymbols, 123 },
1410     { "123klm",  NStr::fAllowTrailingSymbols, 123000 },
1411     { "123klm",  NStr::fAllowTrailingSymbols + NStr::fDS_ForceBinary, 125952 },
1412     { "123kbc",  NStr::fAllowTrailingSymbols, 123000 },
1413     { "123kic",  NStr::fAllowTrailingSymbols, 123000 },
1414     { "123kibc", NStr::fAllowTrailingSymbols, 125952 },
1415     { "123.abc", NStr::fAllowTrailingSymbols, 123 },
1416     { "123.kic", NStr::fAllowTrailingSymbols, 123 },
1417     { "123.001 kib", 0, 125953 },
1418     { "12.3456 pib", 0, NCBI_CONST_UINT8(13899909889916299) },
1419     { "abc.123.abc",  NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols, 123 },
1420     { "abc.+123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols, 123 },
1421     { "abc+.123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols, 123 },
1422     { "abc-.123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols, 123 },
1423     { "abc.-123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols, 123 },
1424     { "abc.123.abc",  NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1425                       + NStr::fMandatorySign, kBad },
1426     { "abc.+123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1427                       + NStr::fMandatorySign, 123 },
1428     { "abc+.123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1429                       + NStr::fMandatorySign, kBad },
1430     { "abc-.123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1431                       + NStr::fMandatorySign, kBad },
1432     { "abc.-123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1433                       + NStr::fMandatorySign, kBad },
1434     { "abc.123.abc",  NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1435                       + NStr::fDS_ProhibitFractions, 123 },
1436     { "abc.+123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1437                       + NStr::fDS_ProhibitFractions, 123 },
1438     { "abc+.123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1439                       + NStr::fDS_ProhibitFractions, 123 },
1440     { "abc-.123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1441                       + NStr::fDS_ProhibitFractions, 123 },
1442     { "abc.-123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1443                       + NStr::fDS_ProhibitFractions, 123 },
1444     { "abc.123.abc",  NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1445                       + NStr::fMandatorySign + NStr::fDS_ProhibitFractions, kBad },
1446     { "abc.+123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1447                       + NStr::fMandatorySign + NStr::fDS_ProhibitFractions, 123 },
1448     { "abc+.123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1449                       + NStr::fMandatorySign + NStr::fDS_ProhibitFractions, kBad },
1450     { "abc-.123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1451                       + NStr::fMandatorySign + NStr::fDS_ProhibitFractions, kBad },
1452     { "abc.-123.abc", NStr::fAllowLeadingSymbols + NStr::fAllowTrailingSymbols
1453                       + NStr::fMandatorySign + NStr::fDS_ProhibitFractions, kBad },
1454     { "123.456", 0, 123 },
1455     { "123.567", 0, 124 },
1456     { "123.567",   NStr::fAllowTrailingSymbols + NStr::fDS_ProhibitFractions, 123 },
1457     { "123.567MB", NStr::fAllowTrailingSymbols + NStr::fDS_ProhibitFractions, 123 },
1458     { "123.567MB", NStr::fDS_ProhibitFractions, kBad },
1459     { ".34KB",   0, kBad },
1460     { ".34KB",   NStr::fAllowLeadingSymbols, 34000 },
1461     { "k.5.6m",  NStr::fAllowLeadingSymbols, 5600000 },
1462     { "k.5.6m",  NStr::fAllowLeadingSymbols + NStr::fDS_ProhibitFractions, kBad },
1463     { "k.5.6m",  NStr::fAllowLeadingSymbols + NStr::fDS_ProhibitFractions
1464                  + NStr::fAllowTrailingSymbols, 5 },
1465     { "123,4,56789,0", NStr::fAllowCommas, 1234567890 },
1466     { "123,4,", NStr::fAllowCommas, kBad },
1467     { "123,4,", NStr::fAllowCommas + NStr::fAllowTrailingSymbols, 1234 },
1468     { "123,4,,56", NStr::fAllowCommas, kBad },
1469     { "123,4,,56", NStr::fAllowCommas + NStr::fAllowTrailingSymbols, 1234 },
1470     { ",123,4", NStr::fAllowCommas, kBad },
1471     { ",123,4", NStr::fAllowCommas + NStr::fAllowLeadingSymbols, 1234 },
1472     { "10", NStr::fDecimalPosix + NStr::fConvErr_NoThrow, kBad },
1473     { "10", NStr::fDecimalPosixOrLocal + NStr::fConvErr_NoThrow, kBad },
1474     { "10", NStr::fWithSign + NStr::fConvErr_NoThrow, kBad },
1475     { "10", NStr::fWithCommas + NStr::fConvErr_NoThrow, kBad },
1476     { "10", NStr::fDoubleFixed + NStr::fConvErr_NoThrow, kBad },
1477     { "10", NStr::fDoubleScientific + NStr::fConvErr_NoThrow, kBad },
1478     { "10", NStr::fDoublePosix + NStr::fConvErr_NoThrow, kBad },
1479     { "10", NStr::fDS_Binary + NStr::fConvErr_NoThrow, kBad },
1480     { "10", NStr::fDS_NoDecimalPoint + NStr::fConvErr_NoThrow, kBad },
1481     { "10", NStr::fDS_PutSpaceBeforeSuffix + NStr::fConvErr_NoThrow, kBad },
1482     { "10", NStr::fDS_ShortSuffix + NStr::fConvErr_NoThrow, kBad },
1483     { "10", NStr::fDS_PutBSuffixToo + NStr::fConvErr_NoThrow, kBad }
1484 };
1485 
BOOST_AUTO_TEST_CASE(s_StringToNum_DataSize)1486 BOOST_AUTO_TEST_CASE(s_StringToNum_DataSize)
1487 {
1488     const size_t count = sizeof(s_Str2DataSizeTests) /
1489                          sizeof(s_Str2DataSizeTests[0]);
1490 
1491     for (size_t i = 0;  i < count;  ++i)
1492     {
1493         const SStringDataSizeValues* test  = &s_Str2DataSizeTests[i];
1494         const char*                  str   = test->str;
1495         NStr::TStringToNumFlags      flags = test->flags;
1496 
1497         try {
1498             errno = kTestErrno;
1499             Uint8 value = NStr::StringToUInt8_DataSize(str, flags);
1500             CHECK_ERRNO;
1501             BOOST_CHECK(test->IsGood());
1502             BOOST_CHECK_EQUAL(value, test->expected);
1503         }
1504         catch (CException&) {
1505             BOOST_CHECK(!test->IsGood());
1506         }
1507     }
1508 }
1509 
1510 
1511 //----------------------------------------------------------------------------
1512 // NStr::Uint8ToString_DataSize()
1513 //----------------------------------------------------------------------------
1514 
1515 struct SUint8DataSizeValues
1516 {
1517     Uint8                   num;
1518     NStr::TNumToStringFlags flags;
1519     unsigned int            max_digits;
1520     const char*             expected;
1521 
IsGoodSUint8DataSizeValues1522     bool IsGood(void) const {
1523         return expected != NULL;
1524     }
1525 };
1526 
1527 static const SUint8DataSizeValues s_Num2StrDataSizeTests[] = {
1528     // num  flags    str
1529     { 10, 0, 3, "10" },
1530     { 10, NStr::fDS_PutBSuffixToo, 3, "10B" },
1531     { 10, NStr::fDS_PutBSuffixToo + NStr::fDS_PutSpaceBeforeSuffix, 3, "10 B" },
1532     { 10*1000, 0, 3, "10.0KB" },
1533     { 10*1000, NStr::fDS_ShortSuffix, 3, "10.0K" },
1534     { 10*1024, NStr::fDS_Binary, 3, "10.0KiB" },
1535     { 10*1024, NStr::fDS_Binary + NStr::fDS_ShortSuffix, 3, "10.0K" },
1536     { 10*1000*1000, 0, 3, "10.0MB" },
1537     { 10*1000*1000, NStr::fDS_ShortSuffix, 3, "10.0M" },
1538     { 10*1024*1024, NStr::fDS_Binary, 3, "10.0MiB" },
1539     { 10*1024*1024, NStr::fDS_Binary + NStr::fDS_ShortSuffix, 3, "10.0M" },
1540     { Uint8(10)*1000*1000*1000, 0, 3, "10.0GB" },
1541     { Uint8(10)*1000*1000*1000, NStr::fDS_ShortSuffix, 3, "10.0G" },
1542     { Uint8(10)*1024*1024*1024, NStr::fDS_Binary, 3, "10.0GiB" },
1543     { Uint8(10)*1024*1024*1024, NStr::fDS_Binary + NStr::fDS_ShortSuffix, 3, "10.0G" },
1544     { Uint8(10)*1000*1000*1000*1000, 0, 3, "10.0TB" },
1545     { Uint8(10)*1000*1000*1000*1000, NStr::fDS_ShortSuffix, 3, "10.0T" },
1546     { Uint8(10)*1024*1024*1024*1024, NStr::fDS_Binary, 3, "10.0TiB" },
1547     { Uint8(10)*1024*1024*1024*1024, NStr::fDS_Binary + NStr::fDS_ShortSuffix, 3, "10.0T" },
1548     { Uint8(10)*1000*1000*1000*1000*1000, 0, 3, "10.0PB" },
1549     { Uint8(10)*1000*1000*1000*1000*1000, NStr::fDS_ShortSuffix, 3, "10.0P" },
1550     { Uint8(10)*1024*1024*1024*1024*1024, NStr::fDS_Binary, 3, "10.0PiB" },
1551     { Uint8(10)*1024*1024*1024*1024*1024, NStr::fDS_Binary + NStr::fDS_ShortSuffix, 3, "10.0P" },
1552     { Uint8(10)*1000*1000*1000*1000*1000*1000, 0, 3, "10.0EB" },
1553     { Uint8(10)*1000*1000*1000*1000*1000*1000, NStr::fDS_ShortSuffix, 3, "10.0E" },
1554     { Uint8(10)*1024*1024*1024*1024*1024*1024, NStr::fDS_Binary, 3, "10.0EiB" },
1555     { Uint8(10)*1024*1024*1024*1024*1024*1024, NStr::fDS_Binary + NStr::fDS_ShortSuffix, 3, "10.0E" },
1556     { 10*1000, NStr::fWithSign, 3, "+10.0KB" },
1557     { 10*1000, NStr::fDS_NoDecimalPoint, 3, "10KB" },
1558     { 10*1000, NStr::fDS_PutSpaceBeforeSuffix, 3, "10.0 KB" },
1559     { 1000, NStr::fDS_NoDecimalPoint, 4, "1000" },
1560     { 1000, NStr::fDS_NoDecimalPoint + NStr::fWithCommas, 4, "1,000" },
1561     { 123456789, 0, 6, "123.457MB" },
1562     { 3456789, 0, 6, "3.45679MB" },
1563     { 456789, 0, 6, "456.789KB" },
1564     { 123456789, 0, 4, "123.5MB" },
1565     { 123456789, NStr::fDS_NoDecimalPoint, 6, "123457KB" },
1566     { 3456789, NStr::fDS_NoDecimalPoint, 6, "3457KB" },
1567     { 456789, NStr::fDS_NoDecimalPoint, 6, "456789" },
1568     { 123456789, NStr::fDS_NoDecimalPoint, 4, "123MB" },
1569     { 23456789, NStr::fDS_NoDecimalPoint, 4, "23MB" },
1570     { 3456789, NStr::fDS_NoDecimalPoint, 4, "3457KB" },
1571     { 123456789, 0, 1, "123MB" },
1572     { 123456789, 0, 2, "123MB" },
1573     { 12345, 0, 1, "12.3KB" },
1574     { NCBI_CONST_UINT8(13899853594920957), NStr::fDS_Binary, 6, "12.3456PiB" },
1575     { 1000, NStr::fDS_Binary, 3, "0.98KiB" },
1576     { 1000, NStr::fDS_Binary, 4, "1000" },
1577     { 1000, NStr::fDS_Binary, 5, "1000" },
1578     { 1000, NStr::fDS_Binary, 6, "1000" },
1579     { 1023, NStr::fDS_Binary, 3, "1.00KiB" },
1580     { 1023, NStr::fDS_Binary, 4, "1023" },
1581     { 1023, NStr::fDS_Binary, 5, "1023" },
1582     { 1023, NStr::fDS_Binary, 6, "1023" },
1583     { 1024, NStr::fDS_Binary, 3, "1.00KiB" },
1584     { 1024, NStr::fDS_Binary, 4, "1.000KiB" },
1585     { 1024, NStr::fDS_Binary, 5, "1.000KiB" },
1586     { 1024, NStr::fDS_Binary, 6, "1.000KiB" },
1587     { 99999, NStr::fDS_Binary, 3, "97.7KiB" },
1588     { 99999, NStr::fDS_Binary, 4, "97.66KiB" },
1589     { 99999, NStr::fDS_Binary, 5, "97.655KiB" },
1590     { 99999, NStr::fDS_Binary, 6, "97.655KiB" },
1591     { 999930, NStr::fDS_Binary, 3, "976KiB" },
1592     { 999930, NStr::fDS_Binary, 4, "976.5KiB" },
1593     { 999930, NStr::fDS_Binary, 5, "976.49KiB" },
1594     { 999930, NStr::fDS_Binary, 6, "976.494KiB" },
1595     { 1000000, NStr::fDS_Binary, 3, "977KiB" },
1596     { 1000000, NStr::fDS_Binary, 4, "976.6KiB" },
1597     { 1000000, NStr::fDS_Binary, 5, "976.56KiB" },
1598     { 1000000, NStr::fDS_Binary, 6, "976.563KiB" },
1599     { 1023993, NStr::fDS_Binary, 3, "0.98MiB" },
1600     { 1023993, NStr::fDS_Binary, 4, "1000KiB" },
1601     { 1023993, NStr::fDS_Binary, 5, "999.99KiB" },
1602     { 1023993, NStr::fDS_Binary, 6, "999.993KiB" },
1603     { 1047552, NStr::fDS_Binary, 3, "1.00MiB" },
1604     { 1047552, NStr::fDS_Binary, 4, "1023KiB" },
1605     { 1047552, NStr::fDS_Binary, 5, "1023.0KiB" },
1606     { 1047552, NStr::fDS_Binary, 6, "1023.00KiB" },
1607     { 1048064, NStr::fDS_Binary, 3, "1.00MiB" },
1608     { 1048064, NStr::fDS_Binary, 4, "1.000MiB" },
1609     { 1048064, NStr::fDS_Binary, 5, "1023.5KiB" },
1610     { 1048064, NStr::fDS_Binary, 6, "1023.50KiB" },
1611     { 1048576, NStr::fDS_Binary, 3, "1.00MiB" },
1612     { 1048576, NStr::fDS_Binary, 4, "1.000MiB" },
1613     { 1048576, NStr::fDS_Binary, 5, "1.0000MiB" },
1614     { 1048576, NStr::fDS_Binary, 6, "1.00000MiB" },
1615     { 1572864, NStr::fDS_Binary, 3, "1.50MiB" },
1616     { 1572864, NStr::fDS_Binary, 4, "1.500MiB" },
1617     { 1572864, NStr::fDS_Binary, 5, "1.5000MiB" },
1618     { 1572864, NStr::fDS_Binary, 6, "1.50000MiB" },
1619     { 1000, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 3, "1KiB" },
1620     { 1000, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 4, "1000" },
1621     { 1000, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 5, "1000" },
1622     { 1000, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 6, "1000" },
1623     { 1023, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 3, "1KiB" },
1624     { 1023, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 4, "1023" },
1625     { 1023, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 5, "1023" },
1626     { 1023, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 6, "1023" },
1627     { 1024, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 3, "1KiB" },
1628     { 1024, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 4, "1024" },
1629     { 1024, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 5, "1024" },
1630     { 1024, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 6, "1024" },
1631     { 99999, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 3, "98KiB" },
1632     { 99999, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 4, "98KiB" },
1633     { 99999, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 5, "99999" },
1634     { 99999, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 6, "99999" },
1635     { 999930, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 3, "976KiB" },
1636     { 999930, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 4, "976KiB" },
1637     { 999930, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 5, "976KiB" },
1638     { 999930, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 6, "999930" },
1639     { 1000000, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 3, "977KiB" },
1640     { 1000000, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 4, "977KiB" },
1641     { 1000000, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 5, "977KiB" },
1642     { 1000000, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 6, "977KiB" },
1643     { 1023993, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 3, "1MiB" },
1644     { 1023993, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 4, "1000KiB" },
1645     { 1023993, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 5, "1000KiB" },
1646     { 1023993, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 6, "1000KiB" },
1647     { 1047552, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 3, "1MiB" },
1648     { 1047552, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 4, "1023KiB" },
1649     { 1047552, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 5, "1023KiB" },
1650     { 1047552, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 6, "1023KiB" },
1651     { 1048064, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 3, "1MiB" },
1652     { 1048064, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 4, "1024KiB" },
1653     { 1048064, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 5, "1024KiB" },
1654     { 1048064, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 6, "1024KiB" },
1655     { 1048576, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 3, "1MiB" },
1656     { 1048576, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 4, "1024KiB" },
1657     { 1048576, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 5, "1024KiB" },
1658     { 1048576, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 6, "1024KiB" },
1659     { 1572864, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 3, "2MiB" },
1660     { 1572864, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 4, "1536KiB" },
1661     { 1572864, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 5, "1536KiB" },
1662     { 1572864, NStr::fDS_Binary + NStr::fDS_NoDecimalPoint, 6, "1536KiB" },
1663     { 1000, 0, 3, "1.00KB" },
1664     { 1000, 0, 4, "1.000KB" },
1665     { 1000, 0, 5, "1.000KB" },
1666     { 1000, 0, 6, "1.000KB" },
1667     { 1023, 0, 3, "1.02KB" },
1668     { 1023, 0, 4, "1.023KB" },
1669     { 1023, 0, 5, "1.023KB" },
1670     { 1023, 0, 6, "1.023KB" },
1671     { 1024, 0, 3, "1.02KB" },
1672     { 1024, 0, 4, "1.024KB" },
1673     { 1024, 0, 5, "1.024KB" },
1674     { 1024, 0, 6, "1.024KB" },
1675     { 99999, 0, 3, "100KB" },
1676     { 99999, 0, 4, "100.0KB" },
1677     { 99999, 0, 5, "99.999KB" },
1678     { 99999, 0, 6, "99.999KB" },
1679     { 999930, 0, 3, "1.00MB" },
1680     { 999930, 0, 4, "999.9KB" },
1681     { 999930, 0, 5, "999.93KB" },
1682     { 999930, 0, 6, "999.930KB" },
1683     { 1000000, 0, 3, "1.00MB" },
1684     { 1000000, 0, 4, "1.000MB" },
1685     { 1000000, 0, 5, "1.0000MB" },
1686     { 1000000, 0, 6, "1.00000MB" },
1687     { 1023993, 0, 3, "1.02MB" },
1688     { 1023993, 0, 4, "1.024MB" },
1689     { 1023993, 0, 5, "1.0240MB" },
1690     { 1023993, 0, 6, "1.02399MB" },
1691     { 1047552, 0, 3, "1.05MB" },
1692     { 1047552, 0, 4, "1.048MB" },
1693     { 1047552, 0, 5, "1.0476MB" },
1694     { 1047552, 0, 6, "1.04755MB" },
1695     { 1048064, 0, 3, "1.05MB" },
1696     { 1048064, 0, 4, "1.048MB" },
1697     { 1048064, 0, 5, "1.0481MB" },
1698     { 1048064, 0, 6, "1.04806MB" },
1699     { 1048576, 0, 3, "1.05MB" },
1700     { 1048576, 0, 4, "1.049MB" },
1701     { 1048576, 0, 5, "1.0486MB" },
1702     { 1048576, 0, 6, "1.04858MB" },
1703     { 1572864, 0, 3, "1.57MB" },
1704     { 1572864, 0, 4, "1.573MB" },
1705     { 1572864, 0, 5, "1.5729MB" },
1706     { 1572864, 0, 6, "1.57286MB" },
1707     { 1000, NStr::fDS_NoDecimalPoint, 3, "1KB" },
1708     { 1000, NStr::fDS_NoDecimalPoint, 4, "1000" },
1709     { 1000, NStr::fDS_NoDecimalPoint, 5, "1000" },
1710     { 1000, NStr::fDS_NoDecimalPoint, 6, "1000" },
1711     { 1023, NStr::fDS_NoDecimalPoint, 3, "1KB" },
1712     { 1023, NStr::fDS_NoDecimalPoint, 4, "1023" },
1713     { 1023, NStr::fDS_NoDecimalPoint, 5, "1023" },
1714     { 1023, NStr::fDS_NoDecimalPoint, 6, "1023" },
1715     { 1024, NStr::fDS_NoDecimalPoint, 3, "1KB" },
1716     { 1024, NStr::fDS_NoDecimalPoint, 4, "1024" },
1717     { 1024, NStr::fDS_NoDecimalPoint, 5, "1024" },
1718     { 1024, NStr::fDS_NoDecimalPoint, 6, "1024" },
1719     { 99999, NStr::fDS_NoDecimalPoint, 3, "100KB" },
1720     { 99999, NStr::fDS_NoDecimalPoint, 4, "100KB" },
1721     { 99999, NStr::fDS_NoDecimalPoint, 5, "99999" },
1722     { 99999, NStr::fDS_NoDecimalPoint, 6, "99999" },
1723     { 999930, NStr::fDS_NoDecimalPoint, 3, "1MB" },
1724     { 999930, NStr::fDS_NoDecimalPoint, 4, "1000KB" },
1725     { 999930, NStr::fDS_NoDecimalPoint, 5, "1000KB" },
1726     { 999930, NStr::fDS_NoDecimalPoint, 6, "999930" },
1727     { 1000000, NStr::fDS_NoDecimalPoint, 3, "1MB" },
1728     { 1000000, NStr::fDS_NoDecimalPoint, 4, "1000KB" },
1729     { 1000000, NStr::fDS_NoDecimalPoint, 5, "1000KB" },
1730     { 1000000, NStr::fDS_NoDecimalPoint, 6, "1000KB" },
1731     { 1023993, NStr::fDS_NoDecimalPoint, 3, "1MB" },
1732     { 1023993, NStr::fDS_NoDecimalPoint, 4, "1024KB" },
1733     { 1023993, NStr::fDS_NoDecimalPoint, 5, "1024KB" },
1734     { 1023993, NStr::fDS_NoDecimalPoint, 6, "1024KB" },
1735     { 1047552, NStr::fDS_NoDecimalPoint, 3, "1MB" },
1736     { 1047552, NStr::fDS_NoDecimalPoint, 4, "1048KB" },
1737     { 1047552, NStr::fDS_NoDecimalPoint, 5, "1048KB" },
1738     { 1047552, NStr::fDS_NoDecimalPoint, 6, "1048KB" },
1739     { 1048064, NStr::fDS_NoDecimalPoint, 3, "1MB" },
1740     { 1048064, NStr::fDS_NoDecimalPoint, 4, "1048KB" },
1741     { 1048064, NStr::fDS_NoDecimalPoint, 5, "1048KB" },
1742     { 1048064, NStr::fDS_NoDecimalPoint, 6, "1048KB" },
1743     { 1048576, NStr::fDS_NoDecimalPoint, 3, "1MB" },
1744     { 1048576, NStr::fDS_NoDecimalPoint, 4, "1049KB" },
1745     { 1048576, NStr::fDS_NoDecimalPoint, 5, "1049KB" },
1746     { 1048576, NStr::fDS_NoDecimalPoint, 6, "1049KB" },
1747     { 1572864, NStr::fDS_NoDecimalPoint, 3, "2MB" },
1748     { 1572864, NStr::fDS_NoDecimalPoint, 4, "1573KB" },
1749     { 1572864, NStr::fDS_NoDecimalPoint, 5, "1573KB" },
1750     { 1572864, NStr::fDS_NoDecimalPoint, 6, "1573KB" },
1751     { 2000, NStr::fDS_Binary, 3, "1.95KiB" },
1752     { 2047, NStr::fDS_Binary, 3, "2.00KiB" },
1753     { 2000, NStr::fDS_Binary, 6, "1.953KiB" },
1754     { 2047, NStr::fDS_Binary, 6, "1.999KiB" },
1755     { 1048575, NStr::fDS_Binary, 6, "1.00000MiB" },
1756     { 2000000, NStr::fDS_Binary, 6, "1.90735MiB" },
1757     { 2097146, NStr::fDS_Binary, 6, "1.99999MiB" },
1758     { 10, NStr::fConvErr_NoThrow, 3, NULL },
1759     { 10, NStr::fMandatorySign, 3, NULL },
1760     { 10, NStr::fAllowCommas, 3, NULL },
1761     { 10, NStr::fAllowLeadingSpaces, 3, NULL },
1762     { 10, NStr::fAllowLeadingSymbols, 3, NULL },
1763     { 10, NStr::fAllowTrailingSpaces, 3, NULL },
1764     { 10, NStr::fAllowTrailingSymbols, 3, NULL },
1765     { 10, NStr::fDecimalPosix, 3, NULL },
1766     { 10, NStr::fDecimalPosixOrLocal, 3, NULL },
1767     { 10, NStr::fDS_ForceBinary, 3, NULL },
1768     { 10, NStr::fDS_ProhibitFractions, 3, NULL },
1769     { 10, NStr::fDS_ProhibitSpaceBeforeSuffix, 3, NULL }
1770 };
1771 
BOOST_AUTO_TEST_CASE(s_NumToString_DataSize)1772 BOOST_AUTO_TEST_CASE(s_NumToString_DataSize)
1773 {
1774     const size_t count = sizeof(s_Num2StrDataSizeTests) /
1775                          sizeof(s_Num2StrDataSizeTests[0]);
1776 
1777     for (size_t i = 0;  i < count;  ++i)
1778     {
1779         const SUint8DataSizeValues* test = &s_Num2StrDataSizeTests[i];
1780         Uint8 num = test->num;
1781         NStr::TNumToStringFlags flags = test->flags;
1782         unsigned int max_digits = test->max_digits;
1783 
1784         try {
1785             errno = kTestErrno;
1786             string value = NStr::UInt8ToString_DataSize(num, flags, max_digits);
1787             CHECK_ERRNO;
1788             BOOST_CHECK(test->IsGood());
1789             BOOST_CHECK_EQUAL(value, test->expected);
1790         }
1791         catch (CException&) {
1792             BOOST_CHECK(!test->IsGood());
1793         }
1794     }
1795 }
1796 
1797 
1798 //----------------------------------------------------------------------------
1799 // NStr::BoolToString()
1800 //----------------------------------------------------------------------------
1801 
1802 struct SStringBoolValues
1803 {
1804     const char* str;        // String input
1805     bool        is_good;    // String input is correct
1806     bool        expected;   // Expected value
1807 };
1808 
1809 static const SStringBoolValues s_StrToBoolTests[] = {
1810     { "true",   true,   true  },
1811     { "false",  true,   false },
1812     { "t",      true,   true  },
1813     { "f",      true,   false },
1814     { "yes",    true,   true  },
1815     { "no",     true,   false },
1816     { "y",      true,   true  },
1817     { "n",      true,   false },
1818     { "1",      true,   true  },
1819     { "0",      true,   false },
1820     { "truee",  false,  false },
1821     { "10",     false,  false },
1822     { "00",     false,  false }
1823 };
1824 
BOOST_AUTO_TEST_CASE(s_BoolToString)1825 BOOST_AUTO_TEST_CASE(s_BoolToString)
1826 {
1827     // BoolToString()
1828 
1829     BOOST_CHECK_EQUAL(NStr::BoolToString(true), string("true"));
1830     BOOST_CHECK_EQUAL(NStr::BoolToString(false), string("false"));
1831 
1832     // StringToBool()
1833 
1834     const size_t count = sizeof(s_StrToBoolTests) /
1835                          sizeof(s_StrToBoolTests[0]);
1836     for (size_t i = 0;  i < count;  ++i)
1837     {
1838         const SStringBoolValues* test = &s_StrToBoolTests[i];
1839         try {
1840             errno = kTestErrno;
1841             bool value = NStr::StringToBool(test->str);
1842             BOOST_CHECK(errno != kTestErrno);
1843             BOOST_CHECK(test->is_good);
1844             BOOST_CHECK_EQUAL(value, test->expected);
1845         }
1846         catch (CException&) {
1847             BOOST_CHECK(!test->is_good);
1848         }
1849     }
1850 }
1851 
1852 
1853 //----------------------------------------------------------------------------
1854 // NStr::PtrToString()
1855 //----------------------------------------------------------------------------
1856 
BOOST_AUTO_TEST_CASE(s_PtrToString)1857 BOOST_AUTO_TEST_CASE(s_PtrToString)
1858 {
1859     string s;
1860     {{
1861         errno = kTestErrno;
1862         NStr::PtrToString(s, &s);
1863         BOOST_CHECK(!s.empty());
1864         BOOST_CHECK(errno != kTestErrno);
1865 
1866         errno = kTestErrno;
1867         const void* ptr = NStr::StringToPtr(s);
1868         BOOST_CHECK(errno != kTestErrno);
1869         BOOST_CHECK_EQUAL(ptr, &s);
1870     }}
1871     {{
1872         #if SIZEOF_VOIDP == 8
1873             const void* ptr_val = (void*)0x01234d00002fe008;
1874             #if defined(NCBI_OS_MSWIN)
1875                 const char* ptr_str = "01234D00002FE008";
1876             #elif defined(NCBI_OS_SOLARIS)
1877                 const char* ptr_str = "1234d00002fe008";
1878             #else
1879                 const char* ptr_str = "0x1234d00002fe008";
1880             #endif
1881         #else
1882             const void* ptr_val = (void*)0xD02fe008;
1883             #if defined(NCBI_OS_MSWIN)
1884                 const char* ptr_str = "D02FE008";
1885             #elif defined(NCBI_OS_SOLARIS)
1886                 const char* ptr_str = "d02fe008";
1887             #else
1888                 const char* ptr_str = "0xd02fe008";
1889             #endif
1890         #endif
1891 
1892         errno = kTestErrno;
1893         s = NStr::PtrToString(ptr_val);
1894         BOOST_CHECK(errno != kTestErrno);
1895         BOOST_CHECK_EQUAL(s, string(ptr_str));
1896 
1897         errno = kTestErrno;
1898         const void* ptr1 = NStr::StringToPtr(s);
1899         BOOST_CHECK(errno != kTestErrno);
1900         BOOST_CHECK_EQUAL(ptr1, ptr_val);
1901 
1902         errno = kTestErrno;
1903         const void* ptr2 = NStr::StringToPtr(CTempString(ptr_str));
1904         BOOST_CHECK(errno != kTestErrno);
1905         BOOST_CHECK_EQUAL(ptr2, ptr_val);
1906     }}
1907     {{
1908         const void* ptr;
1909 
1910         errno = kTestErrno;
1911         ptr = NStr::StringToPtr("0");
1912         BOOST_CHECK(errno == 0);
1913         BOOST_CHECK_EQUAL(ptr, (void*)0);
1914 
1915         errno = kTestErrno;
1916         ptr = NStr::StringToPtr("q");
1917         BOOST_CHECK(errno != kTestErrno  &&  errno > 0);
1918         BOOST_CHECK_EQUAL(ptr, (void*)0);
1919     }}
1920 }
1921 
1922 
1923 //----------------------------------------------------------------------------
1924 // CommonPrefixSize / CommonSuffixSize / CommonOverlapSize
1925 //----------------------------------------------------------------------------
1926 
BOOST_AUTO_TEST_CASE(s_CommonSize)1927 BOOST_AUTO_TEST_CASE(s_CommonSize)
1928 {
1929     BOOST_CHECK_EQUAL( NStr::CommonPrefixSize("", ""),            0U );
1930     BOOST_CHECK_EQUAL( NStr::CommonPrefixSize("123", ""),         0U );
1931     BOOST_CHECK_EQUAL( NStr::CommonPrefixSize("123", "123"),      3U );
1932     BOOST_CHECK_EQUAL( NStr::CommonPrefixSize("123", "12345"),    3U );
1933     BOOST_CHECK_EQUAL( NStr::CommonPrefixSize("12367", "12345"),  3U );
1934     BOOST_CHECK_EQUAL( NStr::CommonPrefixSize("123", "456"),      0U );
1935 
1936     BOOST_CHECK_EQUAL( NStr::CommonSuffixSize("", ""),            0U );
1937     BOOST_CHECK_EQUAL( NStr::CommonSuffixSize("123", ""),         0U );
1938     BOOST_CHECK_EQUAL( NStr::CommonSuffixSize("123", "123"),      3U );
1939     BOOST_CHECK_EQUAL( NStr::CommonSuffixSize("123", "12345"),    0U );
1940     BOOST_CHECK_EQUAL( NStr::CommonSuffixSize("12345", "345"),    3U );
1941     BOOST_CHECK_EQUAL( NStr::CommonSuffixSize("12345", "145"),    2U );
1942     BOOST_CHECK_EQUAL( NStr::CommonPrefixSize("123", "456"),      0U );
1943 
1944     BOOST_CHECK_EQUAL( NStr::CommonOverlapSize("", ""),           0U );
1945     BOOST_CHECK_EQUAL( NStr::CommonOverlapSize("", "123"),        0U );
1946     BOOST_CHECK_EQUAL( NStr::CommonOverlapSize("123", ""),        0U );
1947     BOOST_CHECK_EQUAL( NStr::CommonOverlapSize("123", "123"),     3U );
1948     BOOST_CHECK_EQUAL( NStr::CommonOverlapSize("123", "456"),     0U );
1949     BOOST_CHECK_EQUAL( NStr::CommonOverlapSize("123", "1234"),    3U );
1950     BOOST_CHECK_EQUAL( NStr::CommonOverlapSize("1234", "123"),    0U );
1951     BOOST_CHECK_EQUAL( NStr::CommonOverlapSize("1234", "234561"), 3U );
1952     BOOST_CHECK_EQUAL( NStr::CommonOverlapSize("234561", "1234"), 1U );
1953 }
1954 
1955 
1956 //----------------------------------------------------------------------------
1957 // NStr::Replace()
1958 //----------------------------------------------------------------------------
1959 
BOOST_AUTO_TEST_CASE(s_Replace)1960 BOOST_AUTO_TEST_CASE(s_Replace)
1961 {
1962     {{
1963         string src("aaabbbaaccczzcccXX");
1964         string dst;
1965 
1966         // Replace()
1967 
1968         string search("ccc");
1969         string replace("RrR");
1970         NStr::Replace(src, search, replace, dst);
1971         BOOST_CHECK_EQUAL(dst, string("aaabbbaaRrRzzRrRXX"));
1972 
1973         search  = "a";
1974         replace = "W";
1975         NStr::Replace(src, search, replace, dst, 6, 1);
1976         BOOST_CHECK_EQUAL(dst, string("aaabbbWaccczzcccXX"));
1977 
1978         search  = "bbb";
1979         replace = "BBB";
1980         NStr::Replace(src, search, replace, dst, 50);
1981         BOOST_CHECK_EQUAL(dst, string("aaabbbaaccczzcccXX"));
1982 
1983         search  = "ggg";
1984         replace = "no";
1985         dst = NStr::Replace(src, search, replace);
1986         BOOST_CHECK_EQUAL(dst, string("aaabbbaaccczzcccXX"));
1987 
1988         search  = "a";
1989         replace = "A";
1990         dst = NStr::Replace(src, search, replace);
1991         BOOST_CHECK_EQUAL(dst, string("AAAbbbAAccczzcccXX"));
1992 
1993         search  = "X";
1994         replace = "x";
1995         dst = NStr::Replace(src, search, replace, src.size() - 1);
1996         BOOST_CHECK_EQUAL(dst, string("aaabbbaaccczzcccXx"));
1997 
1998         // ReplaceInPlace()
1999 
2000         search  = "a";
2001         replace = "W";
2002         NStr::ReplaceInPlace(src, search, replace);
2003         BOOST_CHECK_EQUAL(src, string("WWWbbbWWccczzcccXX"));
2004 
2005         search = "W";
2006         replace = "a";
2007         NStr::ReplaceInPlace(src, search, replace, 2, 2);
2008         BOOST_CHECK_EQUAL(src, string("WWabbbaWccczzcccXX"));
2009 
2010         search = "a";
2011         replace = "bb";
2012         NStr::ReplaceInPlace(src, search, replace);
2013         BOOST_CHECK_EQUAL(src, string("WWbbbbbbbWccczzcccXX"));
2014 
2015         search = "bb";
2016         replace = "c";
2017         NStr::ReplaceInPlace(src, search, replace);
2018         BOOST_CHECK_EQUAL(src, string("WWcccbWccczzcccXX"));
2019     }}
2020 
2021     // "number of replacements occurred" test
2022     {{
2023         string src("aaabbbaaccczzcccXX");
2024         size_t n;
2025         string tmp;
2026 
2027         NStr::Replace(src, "a", "bb", tmp, 0, 0, &n);
2028         BOOST_CHECK(n == 5);
2029         NStr::Replace(src, "aa", "bb", tmp, 0, 0, &n);
2030         BOOST_CHECK(n == 2);
2031         NStr::Replace(src, "aaa", "b", tmp, 0, 0, &n);
2032         BOOST_CHECK(n == 1);
2033 
2034         tmp = src;
2035         NStr::ReplaceInPlace(tmp, "a", "bb", 0, 0, &n);
2036         BOOST_CHECK(n == 5);
2037         tmp = src;
2038         NStr::ReplaceInPlace(tmp, "aa", "bb", 0, 0, &n);
2039         BOOST_CHECK(n == 2);
2040         tmp = src;
2041         NStr::ReplaceInPlace(tmp, "aaa", "b", 0, 0, &n);
2042         BOOST_CHECK(n == 1);
2043     }}
2044 
2045     // Replace() for big strings (CXX-3871)
2046     {{
2047         const unsigned int kStringSize = 50*1024;
2048         AutoArray<char> buf(kStringSize);
2049         char* src_buf = buf.get();
2050         BOOST_CHECK(src_buf);
2051         assert(src_buf);
2052 
2053         srand((unsigned int)time(0));
2054         for (size_t i = 0; i < kStringSize; i++) {
2055             src_buf[i] = (rand() % 2 == 0) ? 'A' : 'B';
2056         }
2057         string src_str(src_buf, kStringSize);
2058         string dst_str;
2059         string cmp_str;
2060         size_t n1, n2;
2061 
2062         NStr::Replace(src_str, "A", "CCC", dst_str, 0, 0, &n1);
2063         NStr::Replace(dst_str, "CCC", "A", cmp_str, 0, 0, &n2);
2064         BOOST_CHECK_EQUAL(src_str, cmp_str);
2065         BOOST_CHECK_EQUAL(n1, n2);
2066 
2067         NStr::Replace(src_str, "A", "DD", dst_str, 0, 1, &n1);
2068         NStr::Replace(dst_str, "DD", "A", cmp_str, 0, 0, &n2);
2069         BOOST_CHECK_EQUAL(src_str, cmp_str);
2070         BOOST_CHECK(n1 == 1  &&  n2 == 1);
2071 
2072         NStr::Replace(src_str, "A", "EEEE", dst_str, 1000, 5, &n1);
2073         NStr::Replace(dst_str, "EEEE", "A", cmp_str, 0, 0, &n2);
2074         BOOST_CHECK_EQUAL(src_str, cmp_str);
2075         BOOST_CHECK(n1 <= 5);
2076         BOOST_CHECK_EQUAL(n1, n2);
2077 
2078         NStr::Replace(src_str, "A", "FFFFF", dst_str, 1000, kStringSize, &n1);
2079         NStr::Replace(dst_str, "FFFFF", "A", cmp_str, 0, 0, &n2);
2080         BOOST_CHECK_EQUAL(src_str, cmp_str);
2081         BOOST_CHECK_EQUAL(n1, n2);
2082     }}
2083 }
2084 
2085 
2086 //----------------------------------------------------------------------------
2087 // NStr::PrintableString/ParseEscapes()
2088 //----------------------------------------------------------------------------
2089 
BOOST_AUTO_TEST_CASE(s_PrintableString)2090 BOOST_AUTO_TEST_CASE(s_PrintableString)
2091 {
2092     // NStr::PrintableString()
2093 
2094     BOOST_CHECK(NStr::PrintableString(kEmptyStr).empty());
2095     BOOST_CHECK(NStr::PrintableString
2096                 ("AB\\CD\nAB\rCD\vAB?\tCD\'AB\"").compare
2097                 ("AB\\\\CD\\nAB\\rCD\\vAB\?\\tCD\\\'AB\\\"") == 0);
2098     BOOST_CHECK(NStr::PrintableString
2099                 ("A\x01\r\177\x000F\0205B" + string(1,'\0') + "CD").compare
2100                 ("A\\1\\r\\177\\17\\0205B\\0CD") == 0);
2101     BOOST_CHECK(NStr::PrintableString
2102                 ("A\x01\r\xC1\xF7\x07\x3A\252\336\202\x000F\0205B" + string(1,'\0') + "CD",
2103                  NStr::fNonAscii_Quote | NStr::fPrintable_Full).compare
2104                 ("A\\001\\r\\301\\367\\a:\\252\\336\\202\\017\\0205B\\000CD") == 0);
2105     BOOST_CHECK(NStr::PrintableString
2106                 ("A\nB\\\nC").compare
2107                 ("A\\nB\\\\\\nC") == 0);
2108     BOOST_CHECK(NStr::PrintableString
2109                 ("A\nB\\\nC", NStr::fNewLine_Passthru).compare
2110                 ("A\\n\\\nB\\\\\\n\\\nC") == 0);
2111 
2112     // NStr::ParseEscapes
2113 
2114     BOOST_CHECK(NStr::ParseEscapes(kEmptyStr).empty());
2115     BOOST_CHECK(NStr::ParseEscapes
2116                 ("AB\\\\CD\\nAB\\rCD\\vAB\?\\tCD\\\'AB\\\"").compare
2117                 ("AB\\CD\nAB\rCD\vAB?\tCD\'AB\"") == 0);
2118     BOOST_CHECK(NStr::ParseEscapes
2119                 ("A\\1\\r2\\17\\0205B\\x00CD\\x000F").compare
2120                 ("A\x01\r2\x0F\x10""5B\xCD\x0F") == 0);
2121     BOOST_CHECK(NStr::ParseEscapes
2122                 ("A\\\nB\\nC\\\\\\n\\\nD").compare
2123                 ("AB\nC\\\nD") == 0);
2124 
2125     // NStr::ParseEscapes() with out or range sequences
2126     {{
2127         string s;
2128         // eEscSeqRange_LastByte
2129         BOOST_CHECK_EQUAL(NStr::ParseEscapes("\\x4547", NStr::eEscSeqRange_Standard), "\x47");
2130 
2131         // eEscSeqRange_FirstByte
2132         BOOST_CHECK_EQUAL(NStr::ParseEscapes("\\x4547", NStr::eEscSeqRange_FirstByte), "E47");
2133 
2134         // eEscSeqRange_Errno
2135         errno = kTestErrno;
2136         s = NStr::ParseEscapes("\\x45", NStr::eEscSeqRange_Errno);
2137         BOOST_CHECK(errno == 0);
2138         BOOST_CHECK_EQUAL(s, "\x45");
2139         errno = kTestErrno;
2140         s = NStr::ParseEscapes("\\x4547", NStr::eEscSeqRange_Errno);
2141         BOOST_CHECK(errno == ERANGE);
2142         BOOST_CHECK(s.empty());
2143 
2144         // eEscSeqRange_Throw
2145         try {
2146             s = NStr::ParseEscapes("\\x4547", NStr::eEscSeqRange_Throw);
2147             _TROUBLE;
2148         }
2149         catch (CStringException&) {}
2150 
2151         // eEscSeqRange_User
2152         BOOST_CHECK_EQUAL(NStr::ParseEscapes("\\x4547", NStr::eEscSeqRange_User, '?'), "?");
2153     }}
2154 
2155 
2156     // NStr::PrintableString() vs. Printable()
2157     size_t size = (size_t)(rand() & 0xFFFF);
2158     string data;
2159     for (size_t i = 0;  i < size;  i++) {
2160         data += char(rand() & 0xFF);
2161     }
2162     BOOST_CHECK(data.size() == size);
2163 
2164     // NB: embedded '\0's are also being checked
2165     CNcbiOstrstream os1;
2166     os1 << Printable(data);
2167     const string& s1 = NStr::PrintableString(data);
2168     BOOST_CHECK(s1.compare(CNcbiOstrstreamToString(os1)) == 0);
2169 
2170     // Just to make sure it still parses okay
2171     BOOST_CHECK(NStr::ParseEscapes(s1).compare(data) == 0);
2172 
2173     // NB: checks C string (no '\0's)
2174     const char* str = data.c_str();
2175     CNcbiOstrstream os2;
2176     os2 << Printable(str);
2177     const string& s2 = NStr::PrintableString(str);
2178     BOOST_CHECK(s2.compare(CNcbiOstrstreamToString(os2)) == 0);
2179 }
2180 
2181 
2182 //----------------------------------------------------------------------------
2183 // NStr::Escape()
2184 //----------------------------------------------------------------------------
2185 
2186 struct SEscapeTest {
2187     const char*  str;
2188     const char*  meta;
2189     char         esc;
2190     const char*  result;
2191 };
2192 
2193 static const SEscapeTest s_EscapeTests[] = {
2194     { "",           "",    '\\',  "" },
2195     { "ABC",        "#",   '\\',  "ABC" },
2196     { "#",          "#",   '\\',  "\\#" },
2197     { "###",        "#",   '\\',  "\\#\\#\\#" },
2198     { "##A",        "#",   '\\',  "\\#\\#A" },
2199     { "A##",        "#",   '\\',  "A\\#\\#" },
2200     { "A#B##C###D", "#",   '\\',  "A\\#B\\#\\#C\\#\\#\\#D" },
2201     { "A#B##C###D", "#",   ' ',   "A #B # #C # # #D" },
2202     { "A*B=#C#=*D", "#*=", '\\',  "A\\*B\\=\\#C\\#\\=\\*D" },
2203     { "*",          "#",   '*',   "**" },
2204     { "*A**",       "#",   '*',   "**A****" },
2205     { "**A***B*",   "#",   '*',   "****A******B**" },
2206     { "********",   "#",   '*',   "****************" }
2207 };
2208 
BOOST_AUTO_TEST_CASE(s_Escape)2209 BOOST_AUTO_TEST_CASE(s_Escape)
2210 {
2211     const size_t count = sizeof(s_EscapeTests) / sizeof(s_EscapeTests[0]);
2212     for (size_t i = 0;  i < count;  ++i)
2213     {
2214         const SEscapeTest* test = &s_EscapeTests[i];
2215         string s = NStr::Escape(test->str, test->meta, test->esc);
2216         BOOST_CHECK(s.compare(test->result) == 0);
2217         BOOST_CHECK(NStr::Unescape(s, test->esc).compare(test->str) == 0);
2218     }
2219 }
2220 
2221 
2222 //----------------------------------------------------------------------------
2223 // NStr::Quote()
2224 //----------------------------------------------------------------------------
2225 
2226 struct SQuoteTest {
2227     const char* str;
2228     char        quote;
2229     char        esc;
2230     const char* result;
2231 };
2232 
2233 static const SQuoteTest s_QuoteTests[] = {
2234 
2235     // single quoting
2236 
2237     { "ABC",        '"',   '\\',   "\"ABC\""          },
2238     { "'ABC'",      '"',   '\\',   "\"'ABC'\""        },
2239     { "\"ABC\"",    '"',   '\\',   "\"\\\"ABC\\\"\""  },
2240     { "-ABC-",      '"',   '\\',   "\"-ABC-\""        },
2241     { "A'B'C",      '"',   '\\',   "\"A'B'C\""        },
2242     { "A\"B\"C",    '"',   '\\',   "\"A\\\"B\\\"C\""  },
2243     { "A-B-C",      '"',   '\\',   "\"A-B-C\""        },
2244     { "A\\B\\C",    '"',   '\\',   "\"A\\\\B\\\\C\""  },
2245 
2246     { "ABC",        '\'',  '\\',   "'ABC'"            },
2247     { "'ABC'",      '\'',  '\\',   "'\\'ABC\\''"      },
2248     { "\"ABC\"",    '\'',  '\\',   "'\"ABC\"'"        },
2249     { "-ABC-",      '\'',  '\\',   "'-ABC-'"          },
2250     { "A'B'C",      '\'',  '\\',   "'A\\'B\\'C'"      },
2251     { "A\"B\"C",    '\'',  '\\',   "'A\"B\"C'"        },
2252     { "A-B-C",      '\'',  '\\',   "'A-B-C'"          },
2253     { "A\\B\\C",    '\'',  '\\',   "'A\\\\B\\\\C'"    },
2254 
2255     { "ABC",        '-',   '\\',   "-ABC-"            },
2256     { "'ABC'",      '-',   '\\',   "-'ABC'-"          },
2257     { "\"ABC\"",    '-',   '\\',   "-\"ABC\"-"        },
2258     { "-ABC-",      '-',   '\\',   "-\\-ABC\\--"      },
2259     { "A'B'C",      '-',   '\\',   "-A'B'C-"          },
2260     { "A\"B\"C",    '-',   '\\',   "-A\"B\"C-"        },
2261     { "A-B-C",      '-',   '\\',   "-A\\-B\\-C-"      },
2262     { "A\\B\\C",    '-',   '\\',   "-A\\\\B\\\\C-"    },
2263 
2264     // double quoting
2265 
2266     { "ABC",        '"',   '"',    "\"ABC\""          },
2267     { "'ABC'",      '"',   '"',    "\"'ABC'\""        },
2268     { "\"ABC\"",    '"',   '"',    "\"\"\"ABC\"\"\""  },
2269     { "-ABC-",      '"',   '"',    "\"-ABC-\""        },
2270     { "A'B'C",      '"',   '"',    "\"A'B'C\""        },
2271     { "A\"B\"C",    '"',   '"',    "\"A\"\"B\"\"C\""  },
2272     { "A-B-C",      '"',   '"',    "\"A-B-C\""        },
2273     { "A\\B\\C",    '"',   '"',    "\"A\\B\\C\""      },
2274 
2275     { "ABC",        '\'',  '\'',   "'ABC'"            },
2276     { "'ABC'",      '\'',  '\'',   "'''ABC'''"        },
2277     { "\"ABC\"",    '\'',  '\'',   "'\"ABC\"'"        },
2278     { "-ABC-",      '\'',  '\'',   "'-ABC-'"          },
2279     { "A'B'C",      '\'',  '\'',   "'A''B''C'"        },
2280     { "A\"B\"C",    '\'',  '\'',   "'A\"B\"C'"        },
2281     { "A-B-C",      '\'',  '\'',   "'A-B-C'"          },
2282     { "A\\B\\C",    '\'',  '\'',   "'A\\B\\C'"        },
2283 
2284     { "ABC",        '-',   '-',    "-ABC-"            },
2285     { "'ABC'",      '-',   '-',    "-'ABC'-"          },
2286     { "\"ABC\"",    '-',   '-',    "-\"ABC\"-"        },
2287     { "-ABC-",      '-',   '-',    "---ABC---"        },
2288     { "A'B'C",      '-',   '-',    "-A'B'C-"          },
2289     { "A\"B\"C",    '-',   '-',    "-A\"B\"C-"        },
2290     { "A-B-C",      '-',   '-',    "-A--B--C-"        },
2291     { "A\\B\\C",    '-',   '-',   "-A\\B\\C-"         }
2292 };
2293 
BOOST_AUTO_TEST_CASE(s_Quote)2294 BOOST_AUTO_TEST_CASE(s_Quote)
2295 {
2296     const size_t count = sizeof(s_QuoteTests) / sizeof(s_QuoteTests[0]);
2297     for (size_t i = 0; i < count; ++i)
2298     {
2299         const SQuoteTest* test = &s_QuoteTests[i];
2300         BOOST_CHECK(NStr::Quote(test->str, test->quote, test->esc).compare(test->result) == 0);
2301         BOOST_CHECK(NStr::Unquote(test->result, test->esc).compare(test->str) == 0);
2302     }
2303     // Matrix test
2304     {
2305         for (unsigned i1 = 1; i1 < 256; i1++) {
2306             for (unsigned i2 = 0; i2 < 256; i2++) {
2307                 char s[3];
2308                 s[0] = (char)i1;
2309                 s[1] = (char)i2;
2310                 s[2] = '\0';
2311                 string sq = NStr::Quote(s);
2312                 string su = NStr::Unquote(sq);
2313                 BOOST_CHECK_EQUAL(su, CTempString(s));
2314             }
2315         }
2316     }
2317 }
2318 
2319 
2320 //----------------------------------------------------------------------------
2321 // NStr::Sanitize()
2322 //----------------------------------------------------------------------------
2323 
2324 struct SSanitizeTest {
2325     const char*     str;
2326     NStr::TSS_Flags flags;
2327     const char*     allow;
2328     const char*     reject;
2329     char            replacement;
2330     // Results for simplified version, that don't use (allow + reject + replacement) fields
2331     const char*     res_allowed;       // flags
2332     const char*     res_rejected;      // flags + fSS_Reject
2333     // Results for extended version
2334     const char*     res_ex_allowed;    // flags
2335     const char*     res_ex_rejected;   // flags + fSS_Reject
2336 };
2337 
2338 // Abbreviations to shorten tests description
2339 const NStr::TSS_Flags A   = NStr::fSS_alpha;
2340 const NStr::TSS_Flags D   = NStr::fSS_digit;
2341 const NStr::TSS_Flags AD  = NStr::fSS_alnum;
2342 const NStr::TSS_Flags PR  = NStr::fSS_print;
2343 const NStr::TSS_Flags C   = NStr::fSS_cntrl;
2344 const NStr::TSS_Flags I   = NStr::fSS_punct;
2345 const NStr::TSS_Flags RM  = NStr::fSS_Remove;
2346 const NStr::TSS_Flags NM  = NStr::fSS_NoMerge;
2347 const NStr::TSS_Flags NT  = NStr::fSS_NoTruncate;
2348 const NStr::TSS_Flags NTB = NStr::fSS_NoTruncate_Begin;
2349 const NStr::TSS_Flags NTE = NStr::fSS_NoTruncate_End;
2350 
2351 static const SSanitizeTest s_SanitizeTests[] = {
2352 
2353     // Default flags
2354 
2355     { "",                 0,   "",  "",  '?',  ""         , ""               , ""            , ""               }, // 0
2356     { "   ",              0,   "",  "",  '?',  ""         , ""               , ""            , "?"              },
2357     { "ABC",              0,   "",  "",  '?',  "ABC"      , ""               , "ABC"         , "?"              },
2358     { "  ABC",            0,   "",  "",  '?',  "ABC"      , ""               , "ABC"         , "?"              },
2359     { "ABC  ",            0,   "",  "",  '?',  "ABC"      , ""               , "ABC"         , "?"              },
2360     { " ABC ",            0,   "",  "",  '?',  "ABC"      , ""               , "ABC"         , "?"              },
2361     { "   ABC   ",        0,   "",  "",  '?',  "ABC"      , ""               , "ABC"         , "?"              },
2362     { "   A B C   ",      0,   "",  "",  '?',  "A B C"    , ""               , "A B C"       , "?"              },
2363     { "   A  B  C   ",    0,   "",  "",  '?',  "A B C"    , ""               , "A B C"       , "?"              },
2364     { "   AB CD EF   ",   0,   "",  "",  '?',  "AB CD EF" , ""               , "AB CD EF"    , "?"              },
2365     { "   AB  CD  EF   ", 0,   "",  "",  '?',  "AB CD EF" , ""               , "AB CD EF"    , "?"              }, // 10
2366     { "\nA\tB""\x01 ""C", 0,   "",  "",  '?',  "A B C"    , "\n \t \x01"     , "?A?B? C"     , "\n?\t?\x01?"    },
2367     { "\n \nA B\nC\n \n", 0,   "",  "",  '?',  "A B C"    , "\n \n \n \n \n" , "? ?A B?C? ?" , "\n?\n?\n?\n?\n" },
2368     { "\n\nA B\nC\n\n",   0,   "",  "",  '?',  "A B C"    , "\n\n \n \n\n"   , "?A B?C?"     , "\n\n?\n?\n\n"   },
2369     { "  \nA B\nC\n  ",   0,   "",  "",  '?',  "A B C"    , "\n \n \n"       , "?A B?C?"     , "?\n?\n?\n?"     },
2370 
2371     // Default flags (use custom filters only, so no character classes)
2372     // Affects extended Sanitize() only, simplified results are same as before.
2373 
2374     { "",                 0,   "A\t",  "BE",  '?',  ""         , ""               , ""                , ""      }, // 15
2375     { "   ",              0,   "A\t",  "BE",  '?',  ""         , ""               , ""                , "?"     },
2376     { "   ",              0,   "",     " ",   '?',  ""         , ""               , "?"               , "?"     },
2377     { "   ",              0,   " ",    "",    '?',  ""         , ""               , ""                , ""      },
2378     { "   ",              0,   " ",    " ",   '?',  ""         , ""               , "?"               , "?"     },
2379     { "ABC",              0,   "A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"             , "A?"    }, // 20
2380     { "  ABC",            0,   "A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"             , "?A?"   },
2381     { "ABC  ",            0,   "A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"             , "A?"    },
2382     { " ABC ",            0,   "A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"             , "?A?"   },
2383     { "   ABC   ",        0,   "A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"             , "?A?"   },
2384     { "   A B C   ",      0,   "A\t",  "BE",  '?',  "A B C"    , ""               , "A ? C"           , "?A?"   },
2385     { "   A  B  C   ",    0,   "A\t",  "BE",  '?',  "A B C"    , ""               , "A ? C"           , "?A?"   },
2386     { "   AB CD EF   ",   0,   "A\t",  "BE",  '?',  "AB CD EF" , ""               , "A? CD ?F"        , "?A?"   },
2387     { "   AB  CD  EF   ", 0,   "A\t",  "BE",  '?',  "AB CD EF" , ""               , "A? CD ?F"        , "?A?"   },
2388     { "\nA\tB""\x01 ""C", 0,   "A\t",  "BE",  '?',  "A B C"    , "\n \t \x01"     , "\nA\t?\x01 C"    , "?A\t?" },
2389     { "\n \nA B\nC\n \n", 0,   "A\t",  "BE",  '?',  "A B C"    , "\n \n \n \n \n" , "\n \nA ?\nC\n \n", "?A?"   }, // 30
2390     { "\n\nA B\nC\n\n",   0,   "A\t",  "BE",  '?',  "A B C"    , "\n\n \n \n\n"   , "\n\nA ?\nC\n\n"  , "?A?"   },
2391     { "  \nA B\nC\n  ",   0,   "A\t",  "BE",  '?',  "A B C"    , "\n \n \n"       , "\nA ?\nC\n"      , "?A?"   },
2392 
2393     // Default flags (use custom filters + fSS_Print)
2394     // Affects extended Sanitize() only, simplified results are same as before.
2395 
2396     { "",                 PR,   "A\t",  "BE",  '?',  ""         , ""               , ""           , ""                }, // 33
2397     { "   ",              PR,   "A\t",  "BE",  '?',  ""         , ""               , ""           , "?"               },
2398     { "   ",              PR,   " ",    "",    '?',  ""         , ""               , ""           , ""                },
2399     { "   ",              PR,   "",     " ",   '?',  ""         , ""               , "?"          , "?"               },
2400     { "   ",              PR,   " ",    " ",   '?',  ""         , ""               , "?"          , "?"               },
2401     { "ABC",              PR,   "A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"        , "A?"              },
2402     { "  ABC",            PR,   "A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"        , "?A?"             },
2403     { "ABC  ",            PR,   "A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"        , "A?"              }, // 40
2404     { " ABC ",            PR,   "A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"        , "?A?"             },
2405     { "   ABC   ",        PR,   "A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"        , "?A?"             },
2406     { "   A B C   ",      PR,   "A\t",  "BE",  '?',  "A B C"    , ""               , "A ? C"      , "?A?"             },
2407     { "   A  B  C   ",    PR,   "A\t",  "BE",  '?',  "A B C"    , ""               , "A ? C"      , "?A?"             },
2408     { "   AB CD EF   ",   PR,   "A\t",  "BE",  '?',  "AB CD EF" , ""               , "A? CD ?F"   , "?A?"             },
2409     { "   AB  CD  EF   ", PR,   "A\t",  "BE",  '?',  "AB CD EF" , ""               , "A? CD ?F"   , "?A?"             },
2410     { "\nA\tB""\x01 ""C", PR,   "A\t",  "BE",  '?',  "A B C"    , "\n \t \x01"     , "?A\t? C"    , "\nA\t?\x01?"     },
2411     { "\n \nA B\nC\n \n", PR,   "A\t",  "BE",  '?',  "A B C"    , "\n \n \n \n \n" , "? ?A ?C? ?" , "\n?\nA?\n?\n?\n" },
2412     { "\n\nA B\nC\n\n",   PR,   "A\t",  "BE",  '?',  "A B C"    , "\n\n \n \n\n"   , "?A ?C?"     , "\n\nA?\n?\n\n"   },
2413     { "  \nA B\nC\n  ",   PR,   "A\t",  "BE",  '?',  "A B C"    , "\n \n \n"       , "?A ?C?"     , "?\nA?\n?\n?"     }, // 50
2414 
2415     // same as before + allowed space
2416 
2417     { "",                 PR,   " A\t",  "BE",  '?',  ""         , ""               , ""           , ""                 }, // 51
2418     { "   ",              PR,   " A\t",  "BE",  '?',  ""         , ""               , ""           , ""                 },
2419     { "   ",              PR,   " ",     "",    '?',  ""         , ""               , ""           , ""                 },
2420     { "   ",              PR,   "",      " ",   '?',  ""         , ""               , "?"          , "?"                },
2421     { "   ",              PR,   " ",     " ",   '?',  ""         , ""               , "?"          , "?"                },
2422     { "ABC",              PR,   " A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"        , "A?"               },
2423     { "  ABC",            PR,   " A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"        , "A?"               },
2424     { "ABC  ",            PR,   " A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"        , "A?"               },
2425     { " ABC ",            PR,   " A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"        , "A?"               },
2426     { "   ABC   ",        PR,   " A\t",  "BE",  '?',  "ABC"      , ""               , "A?C"        , "A?"               }, // 60
2427     { "   A B C   ",      PR,   " A\t",  "BE",  '?',  "A B C"    , ""               , "A ? C"      , "A ? ?"            },
2428     { "   A  B  C   ",    PR,   " A\t",  "BE",  '?',  "A B C"    , ""               , "A ? C"      , "A ? ?"            },
2429     { "   AB CD EF   ",   PR,   " A\t",  "BE",  '?',  "AB CD EF" , ""               , "A? CD ?F"   , "A? ? ?"           },
2430     { "   AB  CD  EF   ", PR,   " A\t",  "BE",  '?',  "AB CD EF" , ""               , "A? CD ?F"   , "A? ? ?"           },
2431     { "\nA\tB""\x01 ""C", PR,   " A\t",  "BE",  '?',  "A B C"    , "\n \t \x01"     , "?A\t? C"    , "\nA\t?\x01 ?"     },
2432     { "\n \nA B\nC\n \n", PR,   " A\t",  "BE",  '?',  "A B C"    , "\n \n \n \n \n" , "? ?A ?C? ?" , "\n \nA ?\n?\n \n" },
2433     { "\n\nA B\nC\n\n",   PR,   " A\t",  "BE",  '?',  "A B C"    , "\n\n \n \n\n"   , "?A ?C?"     , "\n\nA ?\n?\n\n"   },
2434     { "  \nA B\nC\n  ",   PR,   " A\t",  "BE",  '?',  "A B C"    , "\n \n \n"       , "?A ?C?"     , "\nA ?\n?\n"       },
2435 
2436     // Character classes filters
2437 
2438     { "A123,BC45\nD6.",   0,    "A\t",   "BE",  '?',  "A123,BC45 D6."  , "\n"            , "A123,?C45\nD6." , "A?"            },
2439     { "A123,BC45\nD6.",   PR,   "A\t",   "BE",  '?',  "A123,BC45 D6."  , "\n"            , "A123,?C45?D6."  , "A?\n?"         }, // 70
2440     { "A123,BC45\nD6.",   A,    "A\t",   "BE",  '?',  "A BC D"         , "123, 45\n 6."  , "A?C?D?"         , "A123,?45\n?6." },
2441     { "A123,BC45\nD6.",   D,    "A\t",   "BE",  '?',  "123 45 6"       , "A ,BC \nD ."   , "A123?45?6?"     , "A?,?C?\nD?."   },
2442     { "A123,BC45\nD6.",   AD,   "A\t",   "BE",  '?',  "A123 BC45 D6"   , ", \n ."        , "A123?C45?D6?"   , "A?,?\n?."      },
2443     { "A123,BC45\nD6.",   A+D,  "A\t",   "BE",  '?',  "A123 BC45 D6"   , ", \n ."        , "A123?C45?D6?"   , "A?,?\n?."      },
2444     { "A123,BC45\nD6.",   I,    "A\t",   "BE",  '?',  ", ."            , "A123 BC45\nD6" , "A?,?."          , "A123?C45\nD6?" },
2445     { "A123,BC45\nD6.",   C,    "A\t",   "BE",  '?',  "\n"             , "A123,BC45 D6." , "A?\n?"          , "A123,?C45?D6." },
2446     { "A123,BC45\nD6.",   I+C,  "A\t",   "BE",  '?',   ", \n ."        , "A123 BC45 D6"  , "A?,?\n?."       , "A123?C45?D6?"  },
2447 
2448 
2449     // fSS_NoMerge
2450 
2451     { "",                 NM,     " A\t",  "BE",  '?',  ""              , ""                 , ""                 , ""               },
2452     { "   ",              NM,     " A\t",  "BE",  '?',  ""              , ""                 , ""                 , ""               },
2453     { "   ",              NM,     " ",     "",    '?',  ""              , ""                 , ""                 , ""               }, // 80
2454     { "   ",              NM,     "",      " ",   '?',  ""              , ""                 , "???"              , "???"            },
2455     { "   ",              NM,     " ",     " ",   '?',  ""              , ""                 , "???"              , "???"            },
2456     { "ABC",              NM,     " A\t",  "BE",  '?',  "ABC"           , ""                 , "A?C"              , "A??"            },
2457     { "  ABC",            NM,     " A\t",  "BE",  '?',  "ABC"           , ""                 , "A?C"              , "A??"            },
2458     { "ABC  ",            NM,     " A\t",  "BE",  '?',  "ABC"           , ""                 , "A?C"              , "A??"            },
2459     { " ABC ",            NM,     " A\t",  "BE",  '?',  "ABC"           , ""                 , "A?C"              , "A??"            },
2460     { "   ABC   ",        NM,     " A\t",  "BE",  '?',  "ABC"           , ""                 , "A?C"              , "A??"            },
2461     { "   A B C   ",      NM,     " A\t",  "BE",  '?',  "A B C"         , ""                 , "A ? C"            , "A ? ?"          },
2462     { "   A  B  C   ",    NM,     " A\t",  "BE",  '?',  "A  B  C"       , ""                 , "A  ?  C"          , "A  ?  ?"        },
2463     { "   AB CD EF   ",   NM,     " A\t",  "BE",  '?',  "AB CD EF"      , ""                 , "A? CD ?F"         , "A? ?? ??"       }, // 90
2464     { "   AB  CD  EF   ", NM,     " A\t",  "BE",  '?',  "AB  CD  EF"    , ""                 , "A?  CD  ?F"       , "A?  ??  ??"     },
2465     { "\nA\tB""\x01 ""C", NM,     " A\t",  "BE",  '?',  "A B  C"        , "\n \t \x01"       , "\nA\t?\x01 C"     , "?A\t?? ?"       },
2466     { "\n \nA B\nC\n \n", NM,     " A\t",  "BE",  '?',  "A B C"         , "\n \n   \n \n \n" , "\n \nA ?\nC\n \n" , "? ?A ???? ?"    },
2467     { "\n\nA B\nC\n\n",   NM,     " A\t",  "BE",  '?',  "A B C"         , "\n\n   \n \n\n"   , "\n\nA ?\nC\n\n"   , "??A ?????"      },
2468     { "  \nA B\nC\n  ",   NM,     " A\t",  "BE",  '?',  "A B C"         , "\n   \n \n"       , "\nA ?\nC\n"       , "?A ????"        },
2469     { "A123,BC45\nD6.",   NM,     " A\t",  "BE",  '?',  "A123,BC45 D6." , "\n"               , "A123,?C45\nD6."   , "A????????????"  },
2470     { "A123,BC45\nD6.",   NM+PR,  " A\t",  "BE",  '?',  "A123,BC45 D6." , "\n"               , "A123,?C45?D6."    , "A????????\n???" },
2471     { "A123,BC45\nD6.",   NM+A,   " A\t",  "BE",  '?',  "A    BC   D"   , "123,  45\n 6."    , "A?????C???D??"    , "A123,??45\n?6." },
2472     { "A123,BC45\nD6.",   NM+D,   " A\t",  "BE",  '?',  "123   45  6"   , "A   ,BC  \nD ."   , "A123???45??6?"    , "A???,?C??\nD?." },
2473     { "A123,BC45\nD6.",   NM+AD,  " A\t",  "BE",  '?',  "A123 BC45 D6"  , ",    \n  ."       , "A123??C45?D6?"    , "A???,????\n??." }, // 100
2474     { "A123,BC45\nD6.",   NM+A+D, " A\t",  "BE",  '?',  "A123 BC45 D6"  , ",    \n  ."       , "A123??C45?D6?"    , "A???,????\n??." },
2475     { "A123,BC45\nD6.",   NM+I,   " A\t",  "BE",  '?',  ",       ."     , "A123 BC45\nD6"    , "A???,???????."    , "A123??C45\nD6?" },
2476     { "A123,BC45\nD6.",   NM+C,   " A\t",  "BE",  '?',  "\n"            , "A123,BC45 D6."    , "A????????\n???"   , "A123,?C45?D6."  },
2477     { "A123,BC45\nD6.",   NM+I+C, " A\t",  "BE",  '?',  ",    \n  ."    , "A123 BC45 D6"     , "A???,????\n??."   , "A123??C45?D6?"  },
2478 
2479     // fSS_Remove (remove not allowed + merge by default)
2480 
2481     { "",                 RM,     " A\t",  "BE",  '?',  ""             , ""             , ""                , ""             }, // 105
2482     { "   ",              RM,     " A\t",  "BE",  '?',  ""             , ""             , ""                , ""             },
2483     { "   ",              RM,     " ",     "",    '?',  ""             , ""             , ""                , ""             },
2484     { "   ",              RM,     "",      " ",   '?',  ""             , ""             , ""                , ""             },
2485     { "   ",              RM,     " ",     " ",   '?',  ""             , ""             , ""                , ""             },
2486     { "ABC",              RM,     " A\t",  "BE",  '?',  "ABC"          , ""             , "AC"              , "A"            }, // 110
2487     { "  ABC",            RM,     " A\t",  "BE",  '?',  "ABC"          , ""             , "AC"              , "A"            },
2488     { "ABC  ",            RM,     " A\t",  "BE",  '?',  "ABC"          , ""             , "AC"              , "A"            },
2489     { " ABC ",            RM,     " A\t",  "BE",  '?',  "ABC"          , ""             , "AC"              , "A"            },
2490     { "   ABC   ",        RM,     " A\t",  "BE",  '?',  "ABC"          , ""             , "AC"              , "A"            },
2491     { "   A B C   ",      RM,     " A\t",  "BE",  '?',  "A B C"        , ""             , "A C"             , "A"            },
2492     { "   A  B  C   ",    RM,     " A\t",  "BE",  '?',  "A B C"        , ""             , "A C"             , "A"            },
2493     { "   AB CD EF   ",   RM,     " A\t",  "BE",  '?',  "AB CD EF"     , ""             , "A CD F"          , "A"            },
2494     { "   AB  CD  EF   ", RM,     " A\t",  "BE",  '?',  "AB CD EF"     , ""             , "A CD F"          , "A"            },
2495     { "\nA\tB""\x01 ""C", RM,     " A\t",  "BE",  '?',  "AB C"         , "\n\t\x01"     , "\nA\t\x01 C"     , "A\t"          },
2496     { "\n \nA B\nC\n \n", RM,     " A\t",  "BE",  '?',  "A BC"         , "\n\n\n\n\n"   , "\n \nA \nC\n \n" , "A"            }, // 120
2497     { "\n\nA B\nC\n\n",   RM,     " A\t",  "BE",  '?',  "A BC"         , "\n\n\n\n\n"   , "\n\nA \nC\n\n"   , "A"            },
2498     { "  \nA B\nC\n  ",   RM,     " A\t",  "BE",  '?',  "A BC"         , "\n\n\n"       , "\nA \nC\n"       , "A"            },
2499     { "A123,BC45\nD6.",   RM,     " A\t",  "BE",  '?',  "A123,BC45D6." , "\n"           , "A123,C45\nD6."   , "A"            },
2500     { "A123,BC45\nD6.",   RM+PR,  " A\t",  "BE",  '?',  "A123,BC45D6." , "\n"           , "A123,C45D6."     , "A\n"          },
2501     { "A123,BC45\nD6.",   RM+A,   " A\t",  "BE",  '?',  "ABCD"         , "123,45\n6."   , "ACD"             , "A123,45\n6."  },
2502     { "A123,BC45\nD6.",   RM+D,   " A\t",  "BE",  '?',  "123456"       , "A,BC\nD."     , "A123456"         , "A,C\nD."      },
2503     { "A123,BC45\nD6.",   RM+AD,  " A\t",  "BE",  '?',  "A123BC45D6"   , ",\n."         , "A123C45D6"       , "A,\n."        },
2504     { "A123,BC45\nD6.",   RM+A+D, " A\t",  "BE",  '?',  "A123BC45D6"   , ",\n."         , "A123C45D6"       , "A,\n."        },
2505     { "A123,BC45\nD6.",   RM+I,   " A\t",  "BE",  '?',  ",."           , "A123BC45\nD6" , "A,."             , "A123C45\nD6"  },
2506     { "A123,BC45\nD6.",   RM+C,   " A\t",  "BE",  '?',  "\n"           , "A123,BC45D6." , "A\n"             , "A123,C45D6."  }, //130
2507     { "A123,BC45\nD6.",   RM+I+C, " A\t",  "BE",  '?',  ",\n."         , "A123BC45D6"   , "A,\n."           , "A123C45D6"    },
2508 
2509     // fSS_Remove + fSS_NoMerge
2510 
2511     { "",                 NM+RM,     " A\t",  "BE",  '?',  ""             , ""             , ""                 , ""            }, // 132
2512     { "   ",              NM+RM,     " A\t",  "BE",  '?',  ""             , ""             , ""                 , ""            },
2513     { "   ",              NM+RM,     " ",     "",    '?',  ""             , ""             , ""                 , ""            },
2514     { "   ",              NM+RM,     "",      " ",   '?',  ""             , ""             , ""                 , ""            },
2515     { "   ",              NM+RM,     " ",     " ",   '?',  ""             , ""             , ""                 , ""            },
2516     { "ABC",              NM+RM,     " A\t",  "BE",  '?',  "ABC"          , ""             , "AC"               , "A"           },
2517     { "  ABC",            NM+RM,     " A\t",  "BE",  '?',  "ABC"          , ""             , "AC"               , "A"           },
2518     { "ABC  ",            NM+RM,     " A\t",  "BE",  '?',  "ABC"          , ""             , "AC"               , "A"           },
2519     { " ABC ",            NM+RM,     " A\t",  "BE",  '?',  "ABC"          , ""             , "AC"               , "A"           }, // 140
2520     { "   ABC   ",        NM+RM,     " A\t",  "BE",  '?',  "ABC"          , ""             , "AC"               , "A"           },
2521     { "   A B C   ",      NM+RM,     " A\t",  "BE",  '?',  "A B C"        , ""             , "A  C"             , "A"           },
2522     { "   A  B  C   ",    NM+RM,     " A\t",  "BE",  '?',  "A  B  C"      , ""             , "A    C"           , "A"           },
2523     { "   AB CD EF   ",   NM+RM,     " A\t",  "BE",  '?',  "AB CD EF"     , ""             , "A CD F"           , "A"           },
2524     { "   AB  CD  EF   ", NM+RM,     " A\t",  "BE",  '?',  "AB  CD  EF"   , ""             , "A  CD  F"         , "A"           },
2525     { "\nA\tB""\x01 ""C", NM+RM,     " A\t",  "BE",  '?',  "AB C"         , "\n\t\x01"     , "\nA\t\x01 C"      , "A\t"         },
2526     { "\n \nA B\nC\n \n", NM+RM,     " A\t",  "BE",  '?',  "A BC"         , "\n\n\n\n\n"   , "\n \nA \nC\n \n"  , "A"           },
2527     { "\n\nA B\nC\n\n",   NM+RM,     " A\t",  "BE",  '?',  "A BC"         , "\n\n\n\n\n"   , "\n\nA \nC\n\n"    , "A"           },
2528     { "  \nA B\nC\n  ",   NM+RM,     " A\t",  "BE",  '?',  "A BC"         , "\n\n\n"       , "\nA \nC\n"        , "A"           },
2529     { "A123,BC45\nD6.",   NM+RM,     " A\t",  "BE",  '?',  "A123,BC45D6." , "\n"           , "A123,C45\nD6."    , "A"           }, // 150
2530     { "A123,BC45\nD6.",   NM+RM+PR,  " A\t",  "BE",  '?',  "A123,BC45D6." , "\n"           , "A123,C45D6."      , "A\n"         },
2531     { "A123,BC45\nD6.",   NM+RM+A,   " A\t",  "BE",  '?',  "ABCD"         , "123,45\n6."   , "ACD"              , "A123,45\n6." },
2532     { "A123,BC45\nD6.",   NM+RM+D,   " A\t",  "BE",  '?',  "123456"       , "A,BC\nD."     , "A123456"          , "A,C\nD."     },
2533     { "A123,BC45\nD6.",   NM+RM+AD,  " A\t",  "BE",  '?',  "A123BC45D6"   , ",\n."         , "A123C45D6"        , "A,\n."       },
2534     { "A123,BC45\nD6.",   NM+RM+A+D, " A\t",  "BE",  '?',  "A123BC45D6"   , ",\n."         , "A123C45D6"        , "A,\n."       },
2535     { "A123,BC45\nD6.",   NM+RM+I,   " A\t",  "BE",  '?',  ",."           , "A123BC45\nD6" , "A,."              , "A123C45\nD6" },
2536     { "A123,BC45\nD6.",   NM+RM+C,   " A\t",  "BE",  '?',  "\n"           , "A123,BC45D6." , "A\n"              , "A123,C45D6." },
2537     { "A123,BC45\nD6.",   NM+RM+I+C, " A\t",  "BE",  '?',  ",\n."         , "A123BC45D6"   , "A,\n."            , "A123C45D6"   },
2538 
2539     // fSS_NoTruncate_* (+ merge by default)
2540 
2541     { "",                 NTB,  " A\t",  "BE",  '?',  ""           , ""                , ""                 , ""          },
2542     { "   ",              NTB,  " A\t",  "BE",  '?',  ""           , ""                , ""                 , ""          }, // 160
2543     { "   ",              NTB,  " ",     "",    '?',  ""           , ""                , ""                 , ""          },
2544     { "   ",              NTB,  "",      " ",   '?',  ""           , ""                , "?"                , "?"         },
2545     { "   ",              NTB,  " ",     " ",   '?',  ""           , ""                , "?"                , "?"         },
2546     { "ABC",              NTB,  " A\t",  "BE",  '?',  "ABC"        , ""                , "A?C"              , "A?"        },
2547     { "  ABC",            NTB,  " A\t",  "BE",  '?',  " ABC"       , ""                , " A?C"             , " A?"       },
2548     { "ABC  ",            NTB,  " A\t",  "BE",  '?',  "ABC"        , ""                , "A?C"              , "A?"        },
2549     { " ABC ",            NTB,  " A\t",  "BE",  '?',  " ABC"       , ""                , " A?C"             , " A?"       },
2550     { "   ABC   ",        NTB,  " A\t",  "BE",  '?',  " ABC"       , ""                , " A?C"             , " A?"       },
2551     { "   A B C   ",      NTB,  " A\t",  "BE",  '?',  " A B C"     , ""                , " A ? C"           , " A ? ?"    },
2552     { "   A  B  C   ",    NTB,  " A\t",  "BE",  '?',  " A B C"     , ""                , " A ? C"           , " A ? ?"    }, // 170
2553     { "   AB CD EF   ",   NTB,  " A\t",  "BE",  '?',  " AB CD EF"  , ""                , " A? CD ?F"        , " A? ? ?"   },
2554     { "   AB  CD  EF   ", NTB,  " A\t",  "BE",  '?',  " AB CD EF"  , ""                , " A? CD ?F"        , " A? ? ?"   },
2555     { "\n\nA  B\nC\n\n",  NTB,  " A\t",  "BE",  '?',  " A B C"     , "\n\n \n \n\n"    , "\n\nA ?\nC\n\n"   , "?A ?"      },
2556     { "\n \nA B\nC\n \n", NTB,  " A\t",  "BE",  '?',  " A B C"     , "\n \n \n \n \n"  , "\n \nA ?\nC\n \n" , "? ?A ? ?"  },
2557     { " \nA  B\nC\n ",    NTB,  " A\t",  "BE",  '?',  " A B C"     , " \n \n \n"       , " \nA ?\nC\n"      , " ?A ?"     },
2558     { "  \nA B\nC\n  ",   NTB,  " A\t",  "BE",  '?',  " A B C"     , " \n \n \n"       , " \nA ?\nC\n"      , " ?A ?"     },
2559 
2560     { "",                 NTE,  " A\t",  "BE",  '?',  ""           , ""                , ""                 , ""          },
2561     { "   ",              NTE,  " A\t",  "BE",  '?',  ""           , ""                , ""                 , ""          },
2562     { "   ",              NTE,  " ",     "",    '?',  ""           , ""                , ""                 , ""          },
2563     { "   ",              NTE,  "",      " ",   '?',  ""           , ""                , "?"                , "?"         }, // 180
2564     { "   ",              NTE,  " ",     " ",   '?',  ""           , ""                , "?"                , "?"         },
2565     { "ABC",              NTE,  " A\t",  "BE",  '?',  "ABC"        , ""                , "A?C"              , "A?"        },
2566     { "  ABC",            NTE,  " A\t",  "BE",  '?',  "ABC"        , ""                , "A?C"              , "A?"        },
2567     { "ABC  ",            NTE,  " A\t",  "BE",  '?',  "ABC "       , ""                , "A?C "             , "A? "       },
2568     { " ABC ",            NTE,  " A\t",  "BE",  '?',  "ABC "       , ""                , "A?C "             , "A? "       },
2569     { "   ABC   ",        NTE,  " A\t",  "BE",  '?',  "ABC "       , ""                , "A?C "             , "A? "       },
2570     { "   A B C   ",      NTE,  " A\t",  "BE",  '?',  "A B C "     , ""                , "A ? C "           , "A ? ? "    },
2571     { "   A  B  C   ",    NTE,  " A\t",  "BE",  '?',  "A B C "     , ""                , "A ? C "           , "A ? ? "    },
2572     { "   AB CD EF   ",   NTE,  " A\t",  "BE",  '?',  "AB CD EF "  , ""                , "A? CD ?F "        , "A? ? ? "   },
2573     { "   AB  CD  EF   ", NTE,  " A\t",  "BE",  '?',  "AB CD EF "  , ""                , "A? CD ?F "        , "A? ? ? "   }, // 190
2574     { "\n\nA  B\nC\n\n",  NTE,  " A\t",  "BE",  '?',  "A B C "     , "\n\n \n \n\n"    , "\n\nA ?\nC\n\n"   , "?A ?"      },
2575     { "\n \nA B\nC\n \n", NTE,  " A\t",  "BE",  '?',  "A B C "     , "\n \n \n \n \n"  , "\n \nA ?\nC\n \n" , "? ?A ? ?"  },
2576     { " \nA  B\nC\n ",    NTE,  " A\t",  "BE",  '?',  "A B C "     ,  "\n \n \n "      , "\nA ?\nC\n "      , "?A ? "     },
2577     { "  \nA B\nC\n  ",   NTE,  " A\t",  "BE",  '?',  "A B C "     , "\n \n \n "       , "\nA ?\nC\n "      , "?A ? "     },
2578 
2579     { "",                 NT,   " A\t",  "BE",  '?',  ""           , ""                , ""                 , ""          },
2580     { "   ",              NT,   " A\t",  "BE",  '?',  " "          , " "               , " "                , " "         },
2581     { "   ",              NT,   " ",     "",    '?',  " "          , " "               , " "                , " "         },
2582     { "   ",              NT,   "",      " ",   '?',  " "          , " "               , "?"                , "?"         },
2583     { "   ",              NT,   " ",     " ",   '?',  " "          , " "               , "?"                , "?"         },
2584     { "ABC",              NT,   " A\t",  "BE",  '?',  "ABC"        , " "               , "A?C"              , "A?"        }, // 200
2585     { "  ABC",            NT,   " A\t",  "BE",  '?',  " ABC"       , " "               , " A?C"             , " A?"       },
2586     { "ABC  ",            NT,   " A\t",  "BE",  '?',  "ABC "       , " "               , "A?C "             , "A? "       },
2587     { " ABC ",            NT,   " A\t",  "BE",  '?',  " ABC "      , " "               , " A?C "            , " A? "      },
2588     { "   ABC   ",        NT,   " A\t",  "BE",  '?',  " ABC "      , " "               , " A?C "            , " A? "      },
2589     { "   A B C   ",      NT,   " A\t",  "BE",  '?',  " A B C "    , " "               , " A ? C "          , " A ? ? "   },
2590     { "   A  B  C   ",    NT,   " A\t",  "BE",  '?',  " A B C "    , " "               , " A ? C "          , " A ? ? "   },
2591     { "   AB CD EF   ",   NT,   " A\t",  "BE",  '?',  " AB CD EF " , " "               , " A? CD ?F "       , " A? ? ? "  },
2592     { "   AB  CD  EF   ", NT,   " A\t",  "BE",  '?',  " AB CD EF " , " "               , " A? CD ?F "       , " A? ? ? "  },
2593     { "\n\nA  B\nC\n\n",  NT,   " A\t",  "BE",  '?',  " A B C "    , "\n\n \n \n\n"    , "\n\nA ?\nC\n\n"   , "?A ?"      },
2594     { "\n \nA B\nC\n \n", NT,   " A\t",  "BE",  '?',  " A B C "    , "\n \n \n \n \n"  , "\n \nA ?\nC\n \n" , "? ?A ? ?"  }, // 210
2595     { " \nA  B\nC\n ",    NT,   " A\t",  "BE",  '?',  " A B C "    , " \n \n \n "      , " \nA ?\nC\n "     , " ?A ? "    },
2596     { "  \nA B\nC\n  ",   NT,   " A\t",  "BE",  '?',  " A B C "    , " \n \n \n "      , " \nA ?\nC\n "     , " ?A ? "    },
2597 
2598     // fSS_NoTruncate_* + fSS_NoMerge
2599 
2600     { "",                 NTB+NM,  " A\t",  "BE",  '?',  ""              ,  ""                  , ""                 , ""               },
2601     { "   ",              NTB+NM,  " A\t",  "BE",  '?',  ""              ,  ""                  , ""                 , ""               },
2602     { "   ",              NTB+NM,  " ",     "",    '?',  ""              ,  ""                  , ""                 , ""               },
2603     { "   ",              NTB+NM,  "",      " ",   '?',  ""              ,  ""                  , "???"              , "???"            },
2604     { "   ",              NTB+NM,  " ",     " ",   '?',  ""              ,  ""                  , "???"              , "???"            },
2605     { "ABC",              NTB+NM,  " A\t",  "BE",  '?',  "ABC"           ,  ""                  , "A?C"              , "A??"            },
2606     { "  ABC",            NTB+NM,  " A\t",  "BE",  '?',  "  ABC"         ,  ""                  , "  A?C"            , "  A??"          },
2607     { "ABC  ",            NTB+NM,  " A\t",  "BE",  '?',  "ABC"           ,  ""                  , "A?C"              , "A??"            }, // 220
2608     { " ABC ",            NTB+NM,  " A\t",  "BE",  '?',  " ABC"          ,  ""                  , " A?C"             , " A??"           },
2609     { "   ABC   ",        NTB+NM,  " A\t",  "BE",  '?',  "   ABC"        ,  ""                  , "   A?C"           , "   A??"         },
2610     { "   A B C   ",      NTB+NM,  " A\t",  "BE",  '?',  "   A B C"      ,  ""                  , "   A ? C"         , "   A ? ?"       },
2611     { "   A  B  C   ",    NTB+NM,  " A\t",  "BE",  '?',  "   A  B  C"    ,  ""                  , "   A  ?  C"       , "   A  ?  ?"     },
2612     { "   AB CD EF   ",   NTB+NM,  " A\t",  "BE",  '?',  "   AB CD EF"   ,  ""                  , "   A? CD ?F"      , "   A? ?? ??"    },
2613     { "   AB  CD  EF   ", NTB+NM,  " A\t",  "BE",  '?',  "   AB  CD  EF" ,  ""                  , "   A?  CD  ?F"    , "   A?  ??  ??"  },
2614     { "\n\nA  B\nC\n\n",  NTB+NM,  " A\t",  "BE",  '?',  "  A  B C"      ,  "\n\n    \n \n\n"   , "\n\nA  ?\nC\n\n"  , "??A  ?????"     },
2615     { "\n \nA B\nC\n \n", NTB+NM,  " A\t",  "BE",  '?',  "   A B C"      ,  "\n \n   \n \n \n"  , "\n \nA ?\nC\n \n" , "? ?A ???? ?"    },
2616     { " \nA  B\nC\n ",    NTB+NM,  " A\t",  "BE",  '?',  "  A  B C"      ,  " \n    \n \n"      , " \nA  ?\nC\n"     , " ?A  ????"      },
2617     { "  \nA B\nC\n  ",   NTB+NM,  " A\t",  "BE",  '?',  "   A B C"      ,  "  \n   \n \n"      , "  \nA ?\nC\n"     , "  ?A ????"      }, // 230
2618 
2619     { "",                 NTE+NM,  " A\t",  "BE",  '?',  ""              ,  ""                  , ""                 , ""               },
2620     { "   ",              NTE+NM,  " A\t",  "BE",  '?',  ""              ,  ""                  , ""                 , ""               },
2621     { "   ",              NTE+NM,  " ",     "",    '?',  ""              ,  ""                  , ""                 , ""               },
2622     { "   ",              NTE+NM,  "",      " ",   '?',  ""              ,  ""                  , "???"              , "???"            },
2623     { "   ",              NTE+NM,  " ",     " ",   '?',  ""              ,  ""                  , "???"              , "???"            },
2624     { "ABC",              NTE+NM,  " A\t",  "BE",  '?',  "ABC"           ,  ""                  , "A?C"              , "A??"            },
2625     { "  ABC",            NTE+NM,  " A\t",  "BE",  '?',  "ABC"           ,  ""                  , "A?C"              , "A??"            },
2626     { "ABC  ",            NTE+NM,  " A\t",  "BE",  '?',  "ABC  "         ,  ""                  , "A?C  "            , "A??  "          },
2627     { " ABC ",            NTE+NM,  " A\t",  "BE",  '?',  "ABC "          ,  ""                  , "A?C "             , "A?? "           },
2628     { "   ABC   ",        NTE+NM,  " A\t",  "BE",  '?',  "ABC   "        ,  ""                  , "A?C   "           , "A??   "         }, // 240
2629     { "   A B C   ",      NTE+NM,  " A\t",  "BE",  '?',  "A B C   "      ,  ""                  , "A ? C   "         , "A ? ?   "       },
2630     { "   A  B  C   ",    NTE+NM,  " A\t",  "BE",  '?',  "A  B  C   "    ,  ""                  , "A  ?  C   "       , "A  ?  ?   "     },
2631     { "   AB CD EF   ",   NTE+NM,  " A\t",  "BE",  '?',  "AB CD EF   "   ,  ""                  , "A? CD ?F   "      , "A? ?? ??   "    },
2632     { "   AB  CD  EF   ", NTE+NM,  " A\t",  "BE",  '?',  "AB  CD  EF   " ,  ""                  , "A?  CD  ?F   "    , "A?  ??  ??   "  },
2633     { "\n\nA  B\nC\n\n",  NTE+NM,  " A\t",  "BE",  '?',  "A  B C  "      ,  "\n\n    \n \n\n"   , "\n\nA  ?\nC\n\n"  , "??A  ?????"     },
2634     { "\n \nA B\nC\n \n", NTE+NM,  " A\t",  "BE",  '?',  "A B C   "      ,  "\n \n   \n \n \n"  , "\n \nA ?\nC\n \n" , "? ?A ???? ?"    },
2635     { " \nA  B\nC\n ",    NTE+NM,  " A\t",  "BE",  '?',  "A  B C  "      ,  "\n    \n \n "      , "\nA  ?\nC\n "     , "?A  ???? "      },
2636     { "  \nA B\nC\n  ",   NTE+NM,  " A\t",  "BE",  '?',  "A B C   "      ,  "\n   \n \n  "      , "\nA ?\nC\n  "     , "?A ????  "      },
2637 
2638     { "",                 NT+NM,   " A\t",  "BE",  '?',  ""                 ,  ""                  , ""                 , ""                 },
2639     { "   ",              NT+NM,   " A\t",  "BE",  '?',  "   "              ,  "   "               , "   "              , "   "              }, // 250
2640     { "   ",              NT+NM,   " ",     "",    '?',  "   "              ,  "   "               , "   "              , "   "              },
2641     { "   ",              NT+NM,   "",      " ",   '?',  "   "              ,  "   "               , "???"              , "???"              },
2642     { "   ",              NT+NM,   " ",     " ",   '?',  "   "              ,  "   "               , "???"              , "???"              },
2643     { "ABC",              NT+NM,   " A\t",  "BE",  '?',  "ABC"              ,  "   "               , "A?C"              , "A??"              },
2644     { "  ABC",            NT+NM,   " A\t",  "BE",  '?',  "  ABC"            ,  "     "             , "  A?C"            , "  A??"            },
2645     { "ABC  ",            NT+NM,   " A\t",  "BE",  '?',  "ABC  "            ,  "     "             , "A?C  "            , "A??  "            },
2646     { " ABC ",            NT+NM,   " A\t",  "BE",  '?',  " ABC "            ,  "     "             , " A?C "            , " A?? "            },
2647     { "   ABC   ",        NT+NM,   " A\t",  "BE",  '?',  "   ABC   "        ,  "         "         , "   A?C   "        , "   A??   "        },
2648     { "   A B C   ",      NT+NM,   " A\t",  "BE",  '?',  "   A B C   "      ,  "           "       , "   A ? C   "      , "   A ? ?   "      },
2649     { "   A  B  C   ",    NT+NM,   " A\t",  "BE",  '?',  "   A  B  C   "    ,  "             "     , "   A  ?  C   "    , "   A  ?  ?   "    }, // 260
2650     { "   AB CD EF   ",   NT+NM,   " A\t",  "BE",  '?',  "   AB CD EF   "   ,  "              "    , "   A? CD ?F   "   , "   A? ?? ??   "   },
2651     { "   AB  CD  EF   ", NT+NM,   " A\t",  "BE",  '?',  "   AB  CD  EF   " ,  "                "  , "   A?  CD  ?F   " , "   A?  ??  ??   " },
2652     { "\n\nA  B\nC\n\n",  NT+NM,   " A\t",  "BE",  '?',  "  A  B C  "       ,  "\n\n    \n \n\n"   , "\n\nA  ?\nC\n\n"  , "??A  ?????"       },
2653     { "\n \nA B\nC\n \n", NT+NM,   " A\t",  "BE",  '?',  "   A B C   "      ,  "\n \n   \n \n \n"  , "\n \nA ?\nC\n \n" , "? ?A ???? ?"      },
2654     { " \nA  B\nC\n ",    NT+NM,   " A\t",  "BE",  '?',  "  A  B C  "       ,  " \n    \n \n "     , " \nA  ?\nC\n "    , " ?A  ???? "       },
2655     { "  \nA B\nC\n  ",   NT+NM,   " A\t",  "BE",  '?',  "   A B C   "      ,  "  \n   \n \n  "    , "  \nA ?\nC\n  "   , "  ?A ????  "      },
2656 
2657     // fSS_NoTruncate_* + fSS_Remove
2658 
2659     { "",                 NTB+RM,  " A\t",  "BE",  '?',  ""          ,  ""            , ""                 , ""     },
2660     { "   ",              NTB+RM,  " A\t",  "BE",  '?',  ""          ,  ""            , ""                 , ""     },
2661     { "   ",              NTB+RM,  " ",     "",    '?',  ""          ,  ""            , ""                 , ""     },
2662     { "   ",              NTB+RM,  "",      " ",   '?',  ""          ,  ""            , ""                 , ""     }, // 270
2663     { "   ",              NTB+RM,  " ",     " ",   '?',  ""          ,  ""            , ""                 , ""     },
2664     { "ABC",              NTB+RM,  " A\t",  "BE",  '?',  "ABC"       ,  ""            , "AC"               , "A"    },
2665     { "  ABC",            NTB+RM,  " A\t",  "BE",  '?',  " ABC"      ,  ""            , " AC"              , " A"   },
2666     { "ABC  ",            NTB+RM,  " A\t",  "BE",  '?',  "ABC"       ,  ""            , "AC"               , "A"    },
2667     { " ABC ",            NTB+RM,  " A\t",  "BE",  '?',  " ABC"      ,  ""            , " AC"              , " A"   },
2668     { "   ABC   ",        NTB+RM,  " A\t",  "BE",  '?',  " ABC"      ,  ""            , " AC"              , " A"   },
2669     { "   A B C   ",      NTB+RM,  " A\t",  "BE",  '?',  " A B C"    ,  ""            , " A C"             , " A"   },
2670     { "   A  B  C   ",    NTB+RM,  " A\t",  "BE",  '?',  " A B C"    ,  ""            , " A C"             , " A"   },
2671     { "   AB CD EF   ",   NTB+RM,  " A\t",  "BE",  '?',  " AB CD EF" ,  ""            , " A CD F"          , " A"   },
2672     { "   AB  CD  EF   ", NTB+RM,  " A\t",  "BE",  '?',  " AB CD EF" ,  ""            , " A CD F"          , " A"   }, // 280
2673     { "\n\nA  B\nC\n\n",  NTB+RM,  " A\t",  "BE",  '?',  "A BC"      ,  "\n\n\n\n\n"  , "\n\nA \nC\n\n"    , "A"    },
2674     { "\n \nA B\nC\n \n", NTB+RM,  " A\t",  "BE",  '?',  " A BC"     ,  "\n\n\n\n\n"  , "\n \nA \nC\n \n"  , " A"   },
2675     { " \nA  B\nC\n ",    NTB+RM,  " A\t",  "BE",  '?',  " A BC"     ,  "\n\n\n"      , " \nA \nC\n"       , " A"   },
2676     { "  \nA B\nC\n  ",   NTB+RM,  " A\t",  "BE",  '?',  " A BC"     ,  "\n\n\n"      , " \nA \nC\n"       , " A"   },
2677 
2678     { "",                 NTE+RM,  " A\t",  "BE",  '?',  ""          ,  ""            , ""                 , ""     },
2679     { "   ",              NTE+RM,  " A\t",  "BE",  '?',  ""          ,  ""            , ""                 , ""     },
2680     { "   ",              NTE+RM,  " ",     "",    '?',  ""          ,  ""            , ""                 , ""     },
2681     { "   ",              NTE+RM,  "",      " ",   '?',  ""          ,  ""            , ""                 , ""     },
2682     { "   ",              NTE+RM,  " ",     " ",   '?',  ""          ,  ""            , ""                 , ""     },
2683     { "ABC",              NTE+RM,  " A\t",  "BE",  '?',  "ABC"       ,  ""            , "AC"               , "A"    }, // 290
2684     { "  ABC",            NTE+RM,  " A\t",  "BE",  '?',  "ABC"       ,  ""            , "AC"               , "A"    },
2685     { "ABC  ",            NTE+RM,  " A\t",  "BE",  '?',  "ABC "      ,  ""            , "AC "              , "A "   },
2686     { " ABC ",            NTE+RM,  " A\t",  "BE",  '?',  "ABC "      ,  ""            , "AC "              , "A "   },
2687     { "   ABC   ",        NTE+RM,  " A\t",  "BE",  '?',  "ABC "      ,  ""            , "AC "              , "A "   },
2688     { "   A B C   ",      NTE+RM,  " A\t",  "BE",  '?',  "A B C "    ,  ""            , "A C "             , "A "   },
2689     { "   A  B  C   ",    NTE+RM,  " A\t",  "BE",  '?',  "A B C "    ,  ""            , "A C "             , "A "   },
2690     { "   AB CD EF   ",   NTE+RM,  " A\t",  "BE",  '?',  "AB CD EF " ,  ""            , "A CD F "          , "A "   },
2691     { "   AB  CD  EF   ", NTE+RM,  " A\t",  "BE",  '?',  "AB CD EF " ,  ""            , "A CD F "          , "A "   },
2692     { "\n\nA  B\nC\n\n",  NTE+RM,  " A\t",  "BE",  '?',  "A BC"      ,  "\n\n\n\n\n"  , "\n\nA \nC\n\n"    , "A "   },
2693     { "\n \nA B\nC\n \n", NTE+RM,  " A\t",  "BE",  '?',  "A BC "     ,  "\n\n\n\n\n"  , "\n \nA \nC\n \n"  , "A "   }, // 300
2694     { " \nA  B\nC\n ",    NTE+RM,  " A\t",  "BE",  '?',  "A BC "     ,  "\n\n\n"      , "\nA \nC\n "       , "A "   },
2695     { "  \nA B\nC\n  ",   NTE+RM,  " A\t",  "BE",  '?',  "A BC "     ,  "\n\n\n"      , "\nA \nC\n "       , "A "   },
2696 
2697     { "",                 NT+RM,   " A\t",  "BE",  '?',  ""           ,  ""            , ""                , ""     },
2698     { "   ",              NT+RM,   " A\t",  "BE",  '?',  " "          ,  ""            , " "               , " "    },
2699     { "   ",              NT+RM,   " ",     "",    '?',  " "          ,  ""            , " "               , " "    },
2700     { "   ",              NT+RM,   "",      " ",   '?',  " "          ,  ""            , ""                , ""     },
2701     { "   ",              NT+RM,   " ",     " ",   '?',  " "          ,  ""            , ""                , ""     },
2702     { "ABC",              NT+RM,   " A\t",  "BE",  '?',  "ABC"        ,  ""            , "AC"              , "A"    },
2703     { "  ABC",            NT+RM,   " A\t",  "BE",  '?',  " ABC"       ,  ""            , " AC"             , " A"   },
2704     { "ABC  ",            NT+RM,   " A\t",  "BE",  '?',  "ABC "       ,  ""            , "AC "             , "A "   }, // 310
2705     { " ABC ",            NT+RM,   " A\t",  "BE",  '?',  " ABC "      ,  ""            , " AC "            , " A "  },
2706     { "   ABC   ",        NT+RM,   " A\t",  "BE",  '?',  " ABC "      ,  ""            , " AC "            , " A "  },
2707     { "   A B C   ",      NT+RM,   " A\t",  "BE",  '?',  " A B C "    ,  ""            , " A C "           , " A "  },
2708     { "   A  B  C   ",    NT+RM,   " A\t",  "BE",  '?',  " A B C "    ,  ""            , " A C "           , " A "  },
2709     { "   AB CD EF   ",   NT+RM,   " A\t",  "BE",  '?',  " AB CD EF " ,  ""            , " A CD F "        , " A "  },
2710     { "   AB  CD  EF   ", NT+RM,   " A\t",  "BE",  '?',  " AB CD EF " ,  ""            , " A CD F "        , " A "  },
2711     { "\n\nA  B\nC\n\n" , NT+RM,   " A\t",  "BE",  '?',  "A BC"       ,  "\n\n\n\n\n"  , "\n\nA \nC\n\n"   , "A "   },
2712     { "\n \nA B\nC\n \n", NT+RM,   " A\t",  "BE",  '?',  " A BC "     ,  "\n\n\n\n\n"  , "\n \nA \nC\n \n" , " A "  },
2713     { " \nA  B\nC\n ",    NT+RM,   " A\t",  "BE",  '?',  " A BC "     ,  "\n\n\n"      , " \nA \nC\n "     , " A "  },
2714     { "  \nA B\nC\n  ",   NT+RM,   " A\t",  "BE",  '?',  " A BC "     ,  "\n\n\n"      , " \nA \nC\n "     , " A "  }, // 320
2715 
2716     // fSS_NoTruncate_* + fSS_NoMerge + fSS_Remove
2717 
2718     { "",                 NTB+NM+RM,  " A\t",  "BE",  '?',  ""              ,  ""            , ""                , ""      },
2719     { "   ",              NTB+NM+RM,  " A\t",  "BE",  '?',  ""              ,  ""            , ""                , ""      },
2720     { "   ",              NTB+NM+RM,  " ",     "",    '?',  ""              ,  ""            , ""                , ""      },
2721     { "   ",              NTB+NM+RM,  "",      " ",   '?',  ""              ,  ""            , ""                , ""      },
2722     { "   ",              NTB+NM+RM,  " ",     " ",   '?',  ""              ,  ""            , ""                , ""      },
2723     { "ABC",              NTB+NM+RM,  " A\t",  "BE",  '?',  "ABC"           ,  ""            , "AC"              , "A"     },
2724     { "  ABC",            NTB+NM+RM,  " A\t",  "BE",  '?',  "  ABC"         ,  ""            , "  AC"            , "  A"   },
2725     { "ABC  ",            NTB+NM+RM,  " A\t",  "BE",  '?',  "ABC"           ,  ""            , "AC"              , "A"     },
2726     { " ABC ",            NTB+NM+RM,  " A\t",  "BE",  '?',  " ABC"          ,  ""            , " AC"             , " A"    },
2727     { "   ABC   ",        NTB+NM+RM,  " A\t",  "BE",  '?',  "   ABC"        ,  ""            , "   AC"           , "   A"  }, // 330
2728     { "   A B C   ",      NTB+NM+RM,  " A\t",  "BE",  '?',  "   A B C"      ,  ""            , "   A  C"         , "   A"  },
2729     { "   A  B  C   ",    NTB+NM+RM,  " A\t",  "BE",  '?',  "   A  B  C"    ,  ""            , "   A    C"       , "   A"  },
2730     { "   AB CD EF   ",   NTB+NM+RM,  " A\t",  "BE",  '?',  "   AB CD EF"   ,  ""            , "   A CD F"       , "   A"  },
2731     { "   AB  CD  EF   ", NTB+NM+RM,  " A\t",  "BE",  '?',  "   AB  CD  EF" ,  ""            , "   A  CD  F"     , "   A"  },
2732     { "\n\nA  B\nC\n\n",  NTB+NM+RM,  " A\t",  "BE",  '?',  "A  BC"         ,  "\n\n\n\n\n"  , "\n\nA  \nC\n\n"  , "A"     },
2733     { "\n \nA B\nC\n \n", NTB+NM+RM,  " A\t",  "BE",  '?',  " A BC"         ,  "\n\n\n\n\n"  , "\n \nA \nC\n \n" , " A"    },
2734     { " \nA  B\nC\n ",    NTB+NM+RM,  " A\t",  "BE",  '?',  " A  BC"        ,  "\n\n\n"      , " \nA  \nC\n"     , " A"    },
2735     { "  \nA B\nC\n  ",   NTB+NM+RM,  " A\t",  "BE",  '?',  "  A BC"        ,  "\n\n\n"      , "  \nA \nC\n"     , "  A"   },
2736 
2737     { "",                 NTE+NM+RM,  " A\t",  "BE",  '?',  ""              ,  ""            , ""                , ""          },
2738     { "   ",              NTE+NM+RM,  " A\t",  "BE",  '?',  ""              ,  ""            , ""                , ""          }, // 340
2739     { "   ",              NTE+NM+RM,  " ",     "",    '?',  ""              ,  ""            , ""                , ""          },
2740     { "   ",              NTE+NM+RM,  "",      " ",   '?',  ""              ,  ""            , ""                , ""          },
2741     { "   ",              NTE+NM+RM,  " ",     " ",   '?',  ""              ,  ""            , ""                , ""          },
2742     { "ABC",              NTE+NM+RM,  " A\t",  "BE",  '?',  "ABC"           ,  ""            , "AC"              , "A"         },
2743     { "  ABC",            NTE+NM+RM,  " A\t",  "BE",  '?',  "ABC"           ,  ""            , "AC"              , "A"         },
2744     { "ABC  ",            NTE+NM+RM,  " A\t",  "BE",  '?',  "ABC  "         ,  ""            , "AC  "            , "A  "       },
2745     { " ABC ",            NTE+NM+RM,  " A\t",  "BE",  '?',  "ABC "          ,  ""            , "AC "             , "A "        },
2746     { "   ABC   ",        NTE+NM+RM,  " A\t",  "BE",  '?',  "ABC   "        ,  ""            , "AC   "           , "A   "      },
2747     { "   A B C   ",      NTE+NM+RM,  " A\t",  "BE",  '?',  "A B C   "      ,  ""            , "A  C   "         , "A     "    },
2748     { "   A  B  C   ",    NTE+NM+RM,  " A\t",  "BE",  '?',  "A  B  C   "    ,  ""            , "A    C   "       , "A       "  }, // 350
2749     { "   AB CD EF   ",   NTE+NM+RM,  " A\t",  "BE",  '?',  "AB CD EF   "   ,  ""            , "A CD F   "       , "A     "    },
2750     { "   AB  CD  EF   ", NTE+NM+RM,  " A\t",  "BE",  '?',  "AB  CD  EF   " ,  ""            , "A  CD  F   "     , "A       "  },
2751     { "\n\nA  B\nC\n\n",  NTE+NM+RM,  " A\t",  "BE",  '?',  "A  BC"         ,  "\n\n\n\n\n"  , "\n\nA  \nC\n\n"  , "A  "       },
2752     { "\n \nA B\nC\n \n", NTE+NM+RM,  " A\t",  "BE",  '?',  "A BC "         ,  "\n\n\n\n\n"  , "\n \nA \nC\n \n" , "A  "       },
2753     { " \nA  B\nC\n ",    NTE+NM+RM,  " A\t",  "BE",  '?',  "A  BC "        ,  "\n\n\n"      , "\nA  \nC\n "     , "A   "      },
2754     { "  \nA B\nC\n  ",   NTE+NM+RM,  " A\t",  "BE",  '?',  "A BC  "        ,  "\n\n\n"      , "\nA \nC\n  "     , "A   "      },
2755 
2756     { "",                 NT+NM+RM,   " A\t",  "BE",  '?',  ""                 ,  ""            , ""                , ""             },
2757     { "   ",              NT+NM+RM,   " A\t",  "BE",  '?',  "   "              ,  ""            , "   "             , "   "          },
2758     { "   ",              NT+NM+RM,   " ",     "",    '?',  "   "              ,  ""            , "   "             , "   "          },
2759     { "   ",              NT+NM+RM,   "",      " ",   '?',  "   "              ,  ""            , ""                , ""             }, // 360
2760     { "   ",              NT+NM+RM,   " ",     " ",   '?',  "   "              ,  ""            , ""                , ""             },
2761     { "ABC",              NT+NM+RM,   " A\t",  "BE",  '?',  "ABC"              ,  ""            , "AC"              , "A"            },
2762     { "  ABC",            NT+NM+RM,   " A\t",  "BE",  '?',  "  ABC"            ,  ""            , "  AC"            , "  A"          },
2763     { "ABC  ",            NT+NM+RM,   " A\t",  "BE",  '?',  "ABC  "            ,  ""            , "AC  "            , "A  "          },
2764     { " ABC ",            NT+NM+RM,   " A\t",  "BE",  '?',  " ABC "            ,  ""            , " AC "            , " A "          },
2765     { "   ABC   ",        NT+NM+RM,   " A\t",  "BE",  '?',  "   ABC   "        ,  ""            , "   AC   "        , "   A   "      },
2766     { "   A B C   ",      NT+NM+RM,   " A\t",  "BE",  '?',  "   A B C   "      ,  ""            , "   A  C   "      , "   A     "    },
2767     { "   A  B  C   ",    NT+NM+RM,   " A\t",  "BE",  '?',  "   A  B  C   "    ,  ""            , "   A    C   "    , "   A       "  },
2768     { "   AB CD EF   ",   NT+NM+RM,   " A\t",  "BE",  '?',  "   AB CD EF   "   ,  ""            , "   A CD F   "    , "   A     "    },
2769     { "   AB  CD  EF   ", NT+NM+RM,   " A\t",  "BE",  '?',  "   AB  CD  EF   " ,  ""            , "   A  CD  F   "  , "   A       "  }, // 370
2770     { "\n\nA  B\nC\n\n",  NT+NM+RM,   " A\t",  "BE",  '?',  "A  BC"            ,  "\n\n\n\n\n"  , "\n\nA  \nC\n\n"  , "A  "          },
2771     { "\n \nA B\nC\n \n", NT+NM+RM,   " A\t",  "BE",  '?',  " A BC "           ,  "\n\n\n\n\n"  , "\n \nA \nC\n \n" , " A  "         },
2772     { " \nA  B\nC\n ",    NT+NM+RM,   " A\t",  "BE",  '?',  " A  BC "          ,  "\n\n\n"      , " \nA  \nC\n "    , " A   "        },
2773     { "  \nA B\nC\n  ",   NT+NM+RM,   " A\t",  "BE",  '?',  "  A BC  "         ,  "\n\n\n"      , "  \nA \nC\n  "   , "  A   "       }
2774 };
2775 
BOOST_AUTO_TEST_CASE(s_Sanitize)2776 BOOST_AUTO_TEST_CASE(s_Sanitize)
2777 {
2778     const size_t count = sizeof(s_SanitizeTests) / sizeof(s_SanitizeTests[0]);
2779     for (size_t i = 0;  i < count;  ++i)
2780     {
2781         const SSanitizeTest* test = &s_SanitizeTests[i];
2782         string out;
2783 
2784         out = NStr::Sanitize(test->str, test->flags);
2785         BOOST_CHECK_EQUAL(out.compare(test->res_allowed), 0);
2786         if (out.compare(test->res_allowed) != 0) {
2787             cout << i << ". A    : "
2788                  << "str = '" << NStr::PrintableString(test->str) << "', "
2789                  << "res = '" << NStr::PrintableString(out) << "', "
2790                  << "expected = '" << NStr::PrintableString(test->res_allowed) << "'" << endl;
2791         }
2792         out = NStr::Sanitize(test->str, test->flags | NStr::fSS_Reject);
2793         BOOST_CHECK_EQUAL(out.compare(test->res_rejected), 0);
2794         if (out.compare(test->res_rejected) != 0) {
2795             cout << i << ". R    : "
2796                  << "str = '" << NStr::PrintableString(test->str) << "', "
2797                  << "res = '" << NStr::PrintableString(out) << "', "
2798                  << "expected = '" << NStr::PrintableString(test->res_rejected) << "'" << endl;
2799         }
2800         out = NStr::Sanitize(test->str, test->allow, test->reject, test->replacement, test->flags);
2801         BOOST_CHECK_EQUAL(out.compare(test->res_ex_allowed), 0);
2802         if (out.compare(test->res_ex_allowed) != 0) {
2803             cout << i << ". A ex : "
2804                  << "str = '" << NStr::PrintableString(test->str) << "', "
2805                  << "res = '" << NStr::PrintableString(out) << "', "
2806                  << "expected = '" << NStr::PrintableString(test->res_ex_allowed) << "'" << endl;
2807         }
2808         out = NStr::Sanitize(test->str, test->allow, test->reject, test->replacement, test->flags | NStr::fSS_Reject);
2809         BOOST_CHECK_EQUAL(out.compare(test->res_ex_rejected), 0);
2810         if (out.compare(test->res_ex_rejected) != 0) {
2811             cout << i << ". R ex : "
2812                  << "str = '" << NStr::PrintableString(test->str) << "', "
2813                  << "res = '" << NStr::PrintableString(out) << "', "
2814                  << "expected = '" << NStr::PrintableString(test->res_ex_rejected) << "'" << endl;
2815         }
2816     }
2817 }
2818 
2819 
2820 //----------------------------------------------------------------------------
2821 // NStr::CEncode|CParse()
2822 //----------------------------------------------------------------------------
2823 
2824 struct SCEncodeTest {
2825     const char* str;                // String input
2826     const char* expected_nonquoted; // C encoded string with eNotQuoted flag
2827     const char* expected_quoted;    // C encoded string with eQuoted flag
2828 };
2829 
2830 static const SCEncodeTest s_CEncodeTests[] = {
2831     { "ABC",        "ABC",          "\"ABC\""           },
2832     { "ABC",        "ABC",          "\"ABC\""           },
2833     { "\"ABC\"",    "\\\"ABC\\\"",  "\"\\\"ABC\\\"\""   },
2834     { "\t\n\x44",   "\\t\\nD",      "\"\\t\\nD\""       },
2835     { "\x81""f",    "\\201f",       "\"\\201f\""        },
2836     { "\\x81f",     "\\""\\x81f",   "\"\\\\x81f\""      },
2837     { "\\x81""f",   "\\""\\x81f",   "\"\\\\x81f\""      },
2838     { "\\x81""f",   "\\\\x81f",     "\"\\\\x81f\""      }
2839 };
2840 
BOOST_AUTO_TEST_CASE(s_CEncode)2841 BOOST_AUTO_TEST_CASE(s_CEncode)
2842 {
2843     const size_t count = sizeof(s_CEncodeTests) / sizeof(s_CEncodeTests[0]);
2844     for (size_t i = 0;  i < count;  ++i)
2845     {
2846         const SCEncodeTest* test = &s_CEncodeTests[i];
2847         string ce, cp;
2848 
2849         // eNotQuoted
2850         try {
2851             ce = NStr::CEncode(test->str, NStr::eNotQuoted);
2852             BOOST_CHECK_EQUAL(ce, test->expected_nonquoted);
2853             cp = NStr::CParse(ce, NStr::eNotQuoted);
2854             BOOST_CHECK_EQUAL(cp, test->str);
2855         }
2856         catch (CStringException&) {
2857             _TROUBLE;
2858         }
2859 
2860         // eQuoted (by default)
2861         try {
2862             ce = NStr::CEncode(test->str, NStr::eQuoted);
2863             BOOST_CHECK_EQUAL(ce, test->expected_quoted);
2864             cp = NStr::CParse(ce, NStr::eQuoted);
2865             BOOST_CHECK_EQUAL(cp, test->str);
2866         }
2867         catch (CStringException&) {
2868             _TROUBLE;
2869         }
2870     }
2871 
2872     // Special cases for CParse(str, eQuoted)
2873     {
2874         string s;
2875         // Unterminated escaped string
2876         try {
2877             s = NStr::CParse("\"", NStr::eQuoted);
2878             _TROUBLE;
2879         }
2880         catch (CStringException&) {}
2881 
2882         // Must start with a double quote
2883         try {
2884             s = NStr::CParse(" \"A\"", NStr::eQuoted);
2885             _TROUBLE;
2886         }
2887         catch (CStringException&) {}
2888 
2889         // Must finish with a double quote
2890         try {
2891             s = NStr::CParse("\"A\" ", NStr::eQuoted);
2892             _TROUBLE;
2893         }
2894         catch (CStringException&) {}
2895 
2896         // Must finish with a double quote
2897         try {
2898             s = NStr::CParse("\"A\\t", NStr::eQuoted);
2899             _TROUBLE;
2900         }
2901         catch (CStringException&) {}
2902 
2903         // Escaped string format error
2904         try {
2905             s = NStr::CParse("\"A\\\"", NStr::eQuoted);
2906             _TROUBLE;
2907         }
2908         catch (CStringException&) {}
2909 
2910         // No anything between adjacent strings ("A""B").
2911         try {
2912             s = NStr::CParse("\"A\"?\"B\"", NStr::eQuoted);
2913             _TROUBLE;
2914         }
2915         catch (CStringException&) {}
2916 
2917         BOOST_CHECK_EQUAL( NStr::CParse("\"A\"\"B\"",          NStr::eQuoted),    "AB" );
2918         BOOST_CHECK_EQUAL( NStr::CParse("\"A\"\"B\"",          NStr::eNotQuoted), "\"A\"\"B\"" );
2919         BOOST_CHECK_EQUAL( NStr::CParse("A\"\"B",              NStr::eNotQuoted), "A\"\"B" );
2920 
2921         BOOST_CHECK_EQUAL( NStr::CParse("\"bar\\x44zoo\"",     NStr::eQuoted),    "barDzoo" );
2922         BOOST_CHECK_EQUAL( NStr::CParse("\"bar\\x44zoo\"",     NStr::eNotQuoted), "\"barDzoo\"" );
2923 
2924         BOOST_CHECK_EQUAL( NStr::CParse("\"\\x44\"\"f\"",      NStr::eQuoted),    "Df" );
2925         BOOST_CHECK_EQUAL( NStr::CParse("\\x44\"\"f",          NStr::eNotQuoted), "D\"\"f" );
2926 
2927         BOOST_CHECK_EQUAL( NStr::CParse("\"\x4\"\"4\"",        NStr::eQuoted),    "\x4""4" );
2928         BOOST_CHECK_EQUAL( NStr::CParse("\"\x4\"\"4\"",        NStr::eNotQuoted), "\"\x4\"\"4\"" );
2929 
2930         BOOST_CHECK_EQUAL( NStr::CParse("\"bar\\x44foo\"",     NStr::eQuoted),    "barOoo" );
2931         BOOST_CHECK_EQUAL( NStr::CParse("\"bar\\x44foo\"",     NStr::eNotQuoted), "\"barOoo\"" );
2932         BOOST_CHECK_EQUAL( NStr::CParse("bar\\x44foo",         NStr::eNotQuoted), "barOoo" );
2933 
2934         BOOST_CHECK_EQUAL( NStr::CParse("\"bar\\x44\"\"foo\"", NStr::eQuoted),    "barDfoo" );
2935         BOOST_CHECK_EQUAL( NStr::CParse("\"bar\\x44\"\"foo\"", NStr::eNotQuoted), "\"barD\"\"foo\"" );
2936         BOOST_CHECK_EQUAL( NStr::CParse("bar\\x44\"\"foo",     NStr::eNotQuoted), "barD\"\"foo" );
2937     }
2938 
2939     // Matrix test
2940     {
2941         for (unsigned i1 = 1;  i1 < 256;  i1++) {
2942             for (unsigned i2 = 0;  i2 < 256;  i2++) {
2943                 char s[3];
2944                 s[0] = (char) i1;
2945                 s[1] = (char) i2;
2946                 s[2] = '\0';
2947 
2948                 string ce = NStr::CEncode     (s,  NStr::eQuoted);
2949                 string cp = NStr::CParse      (ce, NStr::eQuoted);
2950                 string pq = NStr::ParseQuoted (ce);
2951                 BOOST_CHECK_EQUAL(s, cp);
2952                 BOOST_CHECK_EQUAL(s, pq);
2953 
2954                 string cenq  = NStr::CEncode (s,    NStr::eNotQuoted);
2955                 string cpnq  = NStr::CParse  (cenq, NStr::eNotQuoted);
2956                 BOOST_CHECK_EQUAL(s, cpnq);
2957                 BOOST_CHECK_EQUAL("\"" + cenq + "\"", ce);
2958             }
2959         }
2960     }
2961     return;
2962 }
2963 
2964 
2965 //----------------------------------------------------------------------------
2966 // NStr::Compare()
2967 //----------------------------------------------------------------------------
2968 
s_CompareStr(int expr_res,int valid_res)2969 static void s_CompareStr(int expr_res, int valid_res)
2970 {
2971     int res = expr_res > 0 ? 1 :
2972         expr_res == 0 ? 0 : -1;
2973     BOOST_CHECK_EQUAL(res, valid_res);
2974 }
2975 
2976 struct SStrCompare
2977 {
2978     const char* s1;
2979     const char* s2;
2980 
2981     int case_res;      /* -1, 0, 1 */
2982     int nocase_res;    /* -1, 0, 1 */
2983 
2984     SIZE_TYPE n;
2985     int n_case_res;    /* -1, 0, 1 */
2986     int n_nocase_res;  /* -1, 0, 1 */
2987 };
2988 
2989 static const SStrCompare s_StrCompare[] = {
2990     { "", "",  0, 0,  0,     0, 0 },
2991     { "", "",  0, 0,  NPOS,  0, 0 },
2992     { "", "",  0, 0,  10,    0, 0 },
2993     { "", "",  0, 0,  1,     0, 0 },
2994 
2995     { "a", "",  1, 1,  0,     0, 0 },
2996     { "a", "",  1, 1,  1,     1, 1 },
2997     { "a", "",  1, 1,  2,     1, 1 },
2998     { "a", "",  1, 1,  NPOS,  1, 1 },
2999 
3000     { "", "bb",  -1, -1,  0,     -1, -1 },
3001     { "", "bb",  -1, -1,  1,     -1, -1 },
3002     { "", "bb",  -1, -1,  2,     -1, -1 },
3003     { "", "bb",  -1, -1,  3,     -1, -1 },
3004     { "", "bb",  -1, -1,  NPOS,  -1, -1 },
3005 
3006     { "ba", "bb",  -1, -1,  0,     -1, -1 },
3007     { "ba", "bb",  -1, -1,  1,     -1, -1 },
3008     { "ba", "b",    1,  1,  1,      0,  0 },
3009     { "ba", "bb",  -1, -1,  2,     -1, -1 },
3010     { "ba", "bb",  -1, -1,  3,     -1, -1 },
3011     { "ba", "bb",  -1, -1,  NPOS,  -1, -1 },
3012 
3013     { "a", "A",  1, 0,  0,    -1, -1 },
3014     { "a", "A",  1, 0,  1,     1,  0 },
3015     { "a", "A",  1, 0,  2,     1,  0 },
3016     { "a", "A",  1, 0,  NPOS,  1,  0 },
3017 
3018     { "A", "a",  -1, 0,  0,     -1, -1 },
3019     { "A", "a",  -1, 0,  1,     -1,  0 },
3020     { "A", "a",  -1, 0,  2,     -1,  0 },
3021     { "A", "a",  -1, 0,  NPOS,  -1,  0 },
3022 
3023     { "ba", "ba1",  -1, -1,  0,     -1, -1 },
3024     { "ba", "ba1",  -1, -1,  1,     -1, -1 },
3025     { "ba", "ba1",  -1, -1,  2,     -1, -1 },
3026     { "bA", "ba",   -1,  0,  2,     -1,  0 },
3027     { "ba", "ba1",  -1, -1,  3,     -1, -1 },
3028     { "ba", "ba1",  -1, -1,  NPOS,  -1, -1 },
3029 
3030     { "ba1", "ba",  1, 1,  0,    -1, -1 },
3031     { "ba1", "ba",  1, 1,  1,    -1, -1 },
3032     { "ba1", "ba",  1, 1,  2,     0,  0 },
3033     { "ba",  "bA",  1, 0,  2,     1,  0 },
3034     { "ba1", "ba",  1, 1,  3,     1,  1 },
3035     { "ba1", "ba",  1, 1,  NPOS,  1,  1 },
3036     { "ba1", "ba",  1, 1,  NPOS,  1,  1 }
3037 };
3038 
BOOST_AUTO_TEST_CASE(s_Compare)3039 BOOST_AUTO_TEST_CASE(s_Compare)
3040 {
3041     const SStrCompare* rec;
3042     const size_t count = sizeof(s_StrCompare) / sizeof(s_StrCompare[0]);
3043 
3044     for (size_t i = 0;  i < count;  i++) {
3045         rec = &s_StrCompare[i];
3046 
3047         string s1 = rec->s1;
3048         s_CompareStr(NStr::Compare(s1, rec->s2, NStr::eCase), rec->case_res);
3049         s_CompareStr(NStr::Compare(s1, rec->s2, NStr::eNocase),
3050                      rec->nocase_res);
3051         s_CompareStr(NStr::Compare(s1, 0, rec->n, rec->s2, NStr::eCase),
3052                      rec->n_case_res);
3053         s_CompareStr(NStr::Compare(s1, 0, rec->n, rec->s2, NStr::eNocase),
3054                      rec->n_nocase_res);
3055 
3056         string s2 = rec->s2;
3057         s_CompareStr(NStr::Compare(s1, s2, NStr::eCase), rec->case_res);
3058         s_CompareStr(NStr::Compare(s1, s2, NStr::eNocase), rec->nocase_res);
3059         s_CompareStr(NStr::Compare(s1, 0, rec->n, s2, NStr::eCase),
3060                      rec->n_case_res);
3061         s_CompareStr(NStr::Compare(s1, 0, rec->n, s2, NStr::eNocase),
3062                      rec->n_nocase_res);
3063     }
3064 
3065     BOOST_CHECK(NStr::Compare("0123", 0, 2, "12") <  0);
3066     BOOST_CHECK(NStr::Compare("0123", 1, 2, "12") == 0);
3067     BOOST_CHECK(NStr::Compare("0123", 2, 2, "12") >  0);
3068     BOOST_CHECK(NStr::Compare("0123", 3, 2,  "3") == 0);
3069 
3070     // std::string/CTempString with zero symbols inside
3071 
3072     string zs1{ 't', 'e', 0, 's', 't', 0 };
3073     string zs2("te");
3074 
3075     BOOST_CHECK( zs1.compare(zs2)                         >  0 );
3076     BOOST_CHECK( NStr::Compare(zs1, zs2)                  >  0 );
3077     BOOST_CHECK( NStr::CompareCase(zs1, zs2)              >  0 );
3078     BOOST_CHECK( NStr::CompareNocase(zs1, zs2)            >  0 );
3079     BOOST_CHECK( NStr::Compare(zs2, zs1)                  <  0 );
3080     BOOST_CHECK( NStr::Compare(zs1, 0, zs1.length(), zs2) >  0 );
3081     BOOST_CHECK( NStr::Compare(zs1, 0, zs2.length(), zs2) == 0 );
3082     BOOST_CHECK( NStr::Compare(zs1.data(), zs2.data())    == 0 );  // char*
3083 }
3084 
BOOST_AUTO_TEST_CASE(s_XCompare)3085 BOOST_AUTO_TEST_CASE(s_XCompare)
3086 {
3087     const SStrCompare* rec;
3088     const size_t count = sizeof(s_StrCompare) / sizeof(s_StrCompare[0]);
3089 
3090     for (size_t i = 0;  i < count;  i++) {
3091         rec = &s_StrCompare[i];
3092 
3093         string s1 = rec->s1;
3094         s_CompareStr(XStr::Compare(s1, rec->s2, XStr::eCase),   rec->case_res);
3095         s_CompareStr(XStr::Compare(s1, rec->s2, XStr::eNocase),
3096                      rec->nocase_res);
3097         s_CompareStr(XStr::Compare(s1, 0, rec->n, rec->s2, XStr::eCase),
3098                      rec->n_case_res);
3099         s_CompareStr(XStr::Compare(s1, 0, rec->n, rec->s2, XStr::eNocase),
3100                      rec->n_nocase_res);
3101 
3102         string s2 = rec->s2;
3103         s_CompareStr(XStr::Compare(s1, s2, XStr::eCase), rec->case_res);
3104         s_CompareStr(XStr::Compare(s1, s2, XStr::eNocase), rec->nocase_res);
3105         s_CompareStr(XStr::Compare(s1, 0, rec->n, s2, XStr::eCase),
3106                      rec->n_case_res);
3107         s_CompareStr(XStr::Compare(s1, 0, rec->n, s2, XStr::eNocase),
3108                      rec->n_nocase_res);
3109     }
3110 
3111     BOOST_CHECK(XStr::Compare("0123", 0, 2, "12") <  0);
3112     BOOST_CHECK(XStr::Compare("0123", 1, 2, "12") == 0);
3113     BOOST_CHECK(XStr::Compare("0123", 2, 2, "12") >  0);
3114     BOOST_CHECK(XStr::Compare("0123", 3, 2,  "3") == 0);
3115 }
3116 
3117 
3118 //----------------------------------------------------------------------------
3119 // NStr::Split() -- delimiters test
3120 //----------------------------------------------------------------------------
3121 
3122 static const char* s_SplitStr[] = {
3123     "abc",
3124     "---",
3125     "ab+cd+ef",
3126     "aaAAabBbbb",
3127     "-abc-def--ghijk---",
3128     "a12c3ba45acb678bc",
3129     "nodelim",
3130     "",
3131     "emptydelim",
3132     ";"
3133 };
3134 
3135 static const char* s_SplitDelim[] = {
3136     "def", "-", "+", "AB", "-", "abc", "*", "*", "", ";"
3137 };
3138 
3139 static const char* s_SplitRes[] =
3140 {
3141     // merge delimiters -- NStr::fSplit_MergeDelimiters
3142     "abc",
3143     "", "",
3144     "ab", "cd", "ef",
3145     "aa", "ab", "bbb",
3146     "", "abc", "def", "ghijk", "",
3147     "", "12", "3", "45", "678", "",
3148     "nodelim",
3149     "emptydelim",
3150     "", "",
3151 
3152     // no merge delimiters -- default
3153     "abc",
3154     "", "", "", "",
3155     "ab", "cd", "ef",
3156     "aa", "", "ab", "bbb",
3157     "", "abc", "def", "", "ghijk", "", "", "",
3158     "", "12", "3", "", "45", "", "", "678", "", "",
3159     "nodelim",
3160     "emptydelim",
3161     "", ""
3162 };
3163 
BOOST_AUTO_TEST_CASE(s_Split)3164 BOOST_AUTO_TEST_CASE(s_Split)
3165 {
3166     list<string> split;
3167     size_t count = sizeof(s_SplitStr) / sizeof(s_SplitStr[0]);
3168 
3169     for (size_t i = 0; i < count; i++) {
3170         NStr::Split(s_SplitStr[i], s_SplitDelim[i], split, NStr::fSplit_MergeDelimiters);
3171     }
3172     for (size_t i = 0; i < count; i++) {
3173         NStr::Split(s_SplitStr[i], s_SplitDelim[i], split, 0); // default
3174     }
3175     size_t j = 0;
3176     ITERATE(list<string>, it, split) {
3177         BOOST_REQUIRE(j < sizeof(s_SplitRes) / sizeof(s_SplitRes[0]));
3178         BOOST_CHECK(NStr::Compare(*it, s_SplitRes[j++]) == 0);
3179     }
3180 }
3181 
3182 
3183 //----------------------------------------------------------------------------
3184 // NStr::Split()
3185 //----------------------------------------------------------------------------
3186 
3187 struct SSplit
3188 {
3189     NStr::TSplitFlags flags;
3190     const char*       str;
3191     const char*       delim;
3192     const char*       expected;
3193 };
3194 
3195 static const SSplit s_SplitTest[] =
3196 {
3197     { 0,
3198             "one, two ", ", ",
3199             "0: one, 4: , 5: two, 9: " },
3200     { NStr::fSplit_MergeDelimiters,
3201             "one, two ", ", ",
3202             "0: one, 5: two, 9: " },
3203     { NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate,
3204             "one, two ", ", ",
3205             "0: one, 5: two" },
3206 
3207     { NStr::fSplit_Truncate_Begin,
3208             "---a----b-c---", "-",
3209             "3: a, 5: , 6: , 7: , 8: b, 10: c, 12: , 13: , 14: " },
3210     { NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate_Begin,
3211             "---a----b-c---", "-",
3212             "3: a, 8: b, 10: c, 12: " },
3213     { NStr::fSplit_Truncate_End,
3214             "---a----b-c---", "-",
3215             "0: , 1: , 2: , 3: a, 5: , 6: , 7: , 8: b, 10: c" },
3216     { NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate_End,
3217             "---a----b-c---", "-",
3218             "0: , 3: a, 8: b, 10: c" },
3219     { NStr::fSplit_Truncate,
3220             "---a----b-c---", "-",
3221             "3: a, 5: , 6: , 7: , 8: b, 10: c" },
3222     { NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate,
3223             "---a----b-c---", "-",
3224             "3: a, 8: b, 10: c" },
3225 
3226     { NStr::fSplit_CanEscape,
3227             "asdf jkl\\", " ", "" }, // throws
3228     { NStr::fSplit_CanEscape | NStr::fSplit_ByPattern,
3229             "\\x;y z\\; a'bc\\\\; de\"f;\\ ghi;",
3230             "; ",
3231             "0: x;y z; a'bc\\, 17: de\"f; ghi;" },
3232     { NStr::fSplit_CanSingleQuote | NStr::fSplit_MergeDelimiters,
3233             "'abc'' def' g\\hi, jk\"l",
3234             ", ",
3235             "0: abc' def, 12: g\\hi, 18: jk\"l" },
3236     { NStr::fSplit_CanSingleQuote | NStr::fSplit_ByPattern,
3237             "It's a trap!", ", ", "" }, // throws
3238     { NStr::fSplit_CanSingleQuote | NStr::fSplit_CanEscape,
3239             "It\\'s... 'Monty Python''s' \"Flying\" Circus\\!",
3240             " ",
3241             "0: It's..., 9: Monty Python's, 27: \"Flying\", 36: Circus!" },
3242     { NStr::fSplit_CanSingleQuote | NStr::fSplit_CanEscape  | NStr::fSplit_ByPattern | NStr::fSplit_MergeDelimiters,
3243             "<><>That\\'s<>s<'> n<'>t<><all>,<><>'f<>lks'",
3244             "<>",
3245             "0: , 4: That's, 13: s<> n<>t, 25: <all>,, 35: f<>lks" },
3246     { NStr::fSplit_CanSingleQuote | NStr::fSplit_CanEscape  | NStr::fSplit_ByPattern /* + no merge delimiters */,
3247             "<><>That\\'s<>s<'> n<'>t<><all>,<><>'f<>lks'",
3248             "<>",
3249             "0: , 2: , 4: That's, 13: s<> n<>t, 25: <all>,, 33: , 35: f<>lks" },
3250     { NStr::fSplit_CanDoubleQuote, "\"Forget something?", " ", "" }, // throws
3251     { NStr::fSplit_CanDoubleQuote | NStr::fSplit_ByPattern,
3252             "I said\\, \"\"\"Time's up, everyone!\"\"\"",
3253             ", ",
3254             "0: I said\\, 9: \"Time's up, everyone!\"" },
3255     { NStr::fSplit_CanDoubleQuote | NStr::fSplit_CanSingleQuote,
3256             " \"ne'st\" '\\eg\"gs'",
3257             " ",
3258             "0: , 1: ne'st, 9: \\eg\"gs" },
3259     { NStr::fSplit_CanDoubleQuote | NStr::fSplit_CanSingleQuote /* + no merge delimiters */,
3260             " \"ne'st\" '\\eg\"gs'",
3261             " ",
3262             "0: , 1: ne'st, 9: \\eg\"gs" },
3263     { NStr::fSplit_CanQuote | NStr::fSplit_CanEscape | NStr::fSplit_ByPattern | NStr::fSplit_MergeDelimiters,
3264             "abc\\, def, \"gh'i\"\", j\\\"kl\", 'm\"no, p''qr\\'s', ",
3265             ", ",
3266             "0: abc, def, 11: gh'i\", j\"kl, 28: m\"no, p'qr's, 46: " }
3267 };
3268 
BOOST_AUTO_TEST_CASE(s_Split_Flags)3269 BOOST_AUTO_TEST_CASE(s_Split_Flags)
3270 {
3271     vector<CTempStringEx> v;
3272     vector<SIZE_TYPE>     token_pos;
3273     string                s;
3274 
3275     size_t count = (sizeof(s_SplitTest) / sizeof(s_SplitTest[0]));
3276 
3277     for (size_t i = 0; i < count; i++) {
3278         const SSplit& data = s_SplitTest[i];
3279 
3280         CTempString_Storage storage;
3281         v.clear();
3282         token_pos.clear();
3283         try {
3284             NStr::Split(data.str, data.delim, v, data.flags, &token_pos, &storage);
3285             BOOST_REQUIRE_EQUAL(v.size(), token_pos.size());
3286             CNcbiOstrstream oss;
3287             const char* sep = "";
3288             for (size_t j = 0;  j < v.size();  ++j) {
3289                 oss << sep << token_pos[j] << ": " << v[j];
3290                 sep = ", ";
3291             }
3292             s = CNcbiOstrstreamToString(oss);
3293         } catch (CStringException&) {
3294             s.clear();
3295         }
3296         BOOST_CHECK_EQUAL(s, data.expected);
3297     }
3298 }
3299 
3300 
3301 //----------------------------------------------------------------------------
3302 // NStr::SplitInTwo()
3303 //----------------------------------------------------------------------------
3304 
3305 struct SSplitInTwo {
3306     const char*       str;
3307     const char*       delim;
3308     NStr::TSplitFlags flags;
3309     const char*       expected_str1;
3310     const char*       expected_str2;
3311     bool              expected_ret;
3312 };
3313 
3314 static const SSplitInTwo s_SplitInTwoTest[] =
3315 {
3316     { "ab+cd+ef",    "+",      0,                            "ab", "cd+ef",     true  },
3317     { "ab+cd+ef",    "+",      NStr::fSplit_MergeDelimiters, "ab", "cd+ef",     true  },
3318     { "ab+++cd+ef",  "+",      NStr::fSplit_MergeDelimiters, "ab", "cd+ef",     true  },
3319     { "+++ab+cd",    "+",      0,                            "",   "++ab+cd",   true  },
3320     { "+++ab+cd",    "+",      NStr::fSplit_MergeDelimiters, "",   "ab+cd",     true  },
3321     { "+++ab+cd",    "+",      NStr::fSplit_Truncate_Begin,  "ab", "cd",        true  },
3322     { "+++ab+cd",    "+",      NStr::fSplit_Truncate_End,    "",   "++ab+cd",   true  }, // no effect
3323     { "+++ab+cd",    "+",      NStr::fSplit_Truncate,        "ab", "cd",        true  },
3324     { "ab+++",       "+",      0,                            "ab", "++",        true  },
3325     { "ab+++",       "+",      NStr::fSplit_MergeDelimiters, "ab", "",          true  },
3326     { "ab+++",       "+",      NStr::fSplit_Truncate_End,    "ab", "++",        true  }, // no effect
3327     { "ab+++",       "+",      NStr::fSplit_Truncate,        "ab", "++",        true  }, // no effect
3328     { "ab+++",       "+",      NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate,
3329                                                              "ab", "",          true  }, // no effect
3330     { "ab+cd++",     "+",      0,                            "ab", "cd++",      true  },
3331     { "ab+cd++",     "+",      NStr::fSplit_MergeDelimiters, "ab", "cd++",      true  }, // no effect
3332     { "ab+cd++",     "+",      NStr::fSplit_Truncate_End,    "ab", "cd++",      true  }, // no effect
3333     { "ab+cd++",     "+",      NStr::fSplit_Truncate,        "ab", "cd++",      true  }, // no effect
3334     { "ab+cd++",     "+",      NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate,
3335                                                              "ab", "cd++",      true  }, // no effect
3336     { "aaAAabBbbb",  "AB",     0,                            "aa", "AabBbbb",   true  },
3337     { "aaABAabBbbb", "AB",     NStr::fSplit_MergeDelimiters, "aa", "abBbbb",    true  },
3338     { "aaCAabBCbbb", "ABC",    0,                            "aa", "AabBCbbb",  true  },
3339     { "aaCAabBCbbb", "ABC",    NStr::fSplit_MergeDelimiters, "aa", "abBCbbb",   true  },
3340     { "-beg-delim-", "-",      0,                            "",   "beg-delim-",true  },
3341     { "-beg-delim-", "-",      NStr::fSplit_MergeDelimiters, "",   "beg-delim-",true  }, // no effect
3342     { "-beg-delim-", "-",      NStr::fSplit_Truncate_End,    "",   "beg-delim-",true  }, // no effect
3343     { "-beg-delim-", "-",      NStr::fSplit_Truncate_Begin,  "beg","delim-",    true  },
3344     { "-beg-delim-", "-",      NStr::fSplit_Truncate,        "beg","delim-",    true  },
3345     { "end-delim:",  ":",      0,                            "end-delim",  "",  true  },
3346     { "end-delim:",  ":",      NStr::fSplit_Truncate_End,    "end-delim",  "",  true  }, // no effect
3347     { "end-delim:",  ":",      NStr::fSplit_Truncate,        "end-delim",  "",  true  }, // no effect
3348     { "nodelim",     ".,:;-+", 0,                            "nodelim",    "",  false },
3349     { "emptydelim",  "",       0,                            "emptydelim", "",  false },
3350     { "", "emtpystring",       0,                            "", "",            false },
3351     { "", "",                  0,                            "", "",            false },
3352     { "a b:c: d",    ": ",     NStr::fSplit_MergeDelimiters, "a",     "b:c: d", true  },
3353     { "a b:c: d",    ": ",     NStr::fSplit_ByPattern,       "a b:c", "d",      true  },
3354     { "abc\\\\:",    ":",      NStr::fSplit_CanEscape,       "abc\\", "",       true  },
3355     { "abc\\:",      ":",      NStr::fSplit_CanEscape,       "abc:", "",        false },
3356     { "abc\\\\: ",   ": ",     NStr::fSplit_CanEscape | NStr::fSplit_ByPattern,
3357                                                              "abc\\", "",       true  },
3358     { "abc\\: ",     ": ",     NStr::fSplit_CanEscape | NStr::fSplit_ByPattern,
3359                                                              "abc: ", "",       false },
3360     { "'abc':",      ":",      NStr::fSplit_CanSingleQuote,  "abc",  "",        true  },
3361     { "'abc:'",      ":",      NStr::fSplit_CanSingleQuote,  "abc:", "",        false }
3362 };
3363 
BOOST_AUTO_TEST_CASE(s_SplitInTwo)3364 BOOST_AUTO_TEST_CASE(s_SplitInTwo)
3365 {
3366     CTempStringEx str1, str2;
3367     size_t count = (sizeof(s_SplitInTwoTest) / sizeof(s_SplitInTwoTest[0]));
3368 
3369     for (size_t i = 0; i < count; i++) {
3370         const SSplitInTwo& data = s_SplitInTwoTest[i];
3371         CTempString_Storage storage;
3372         bool result = NStr::SplitInTwo(data.str, data.delim, str1, str2,
3373                                        data.flags, &storage);
3374         BOOST_CHECK_EQUAL(data.expected_ret, result);
3375         BOOST_CHECK_EQUAL(data.expected_str1, str1);
3376         BOOST_CHECK_EQUAL(data.expected_str2, str2);
3377     }
3378 }
3379 
3380 
3381 //----------------------------------------------------------------------------
3382 // NStr::SplitByPattern()
3383 //----------------------------------------------------------------------------
3384 
3385 static const char* s_SplitPatternStr[] = {
3386     "<tag>",
3387     "<tag><tag><tag>",
3388     "begin<tag>",
3389     "begin<tag><tag>",
3390     "<tag><tag>end",
3391     "<tag>text<tag>",
3392     "<tag>begin<tag>text<tag><tag><tag>end<tag>"
3393 };
3394 
3395 static const char* s_SplitPatternRes[] =
3396 {
3397     // merge delimiters -- NStr::fSplit_MergeDelimiters
3398     "", "",                         "#",
3399     "", "",                         "#",
3400     "begin", "",                    "#",
3401     "begin", "",                    "#",
3402     "", "end",                      "#",
3403     "", "text", "",                 "#",
3404     "", "begin", "text", "end", "", "#",
3405 
3406     // no merge delimiters -- default
3407     "", "",                         "#",
3408     "", "", "", "",                 "#",
3409     "begin", "",                    "#",
3410     "begin", "", "",                "#",
3411     "", "", "end",                  "#",
3412     "", "text", "",                 "#",
3413     "", "begin", "text", "", "", "end", "", "#"
3414 };
3415 
BOOST_AUTO_TEST_CASE(s_SplitByPattern)3416 BOOST_AUTO_TEST_CASE(s_SplitByPattern)
3417 {
3418     const char* pattern = "<tag>";
3419     const char* stopper = "#";  // to correctly compare separate strings splitting
3420 
3421     vector<string> split;
3422     size_t count = sizeof(s_SplitPatternStr) / sizeof(s_SplitPatternStr[0]);
3423 
3424     for (size_t i = 0; i < count; i++) {
3425         NStr::SplitByPattern(s_SplitPatternStr[i], pattern, split, NStr::fSplit_MergeDelimiters);
3426         split.push_back(stopper);
3427     }
3428     for (size_t i = 0; i < count; i++) {
3429         NStr::SplitByPattern(s_SplitPatternStr[i], pattern, split); // default
3430         split.push_back(stopper);
3431     }
3432 
3433     size_t j = 0;
3434     ITERATE(vector<string>, it, split)
3435     {
3436         BOOST_REQUIRE(j < sizeof(s_SplitPatternRes) / sizeof(s_SplitPatternRes[0]));
3437         BOOST_CHECK(NStr::Compare(*it, s_SplitPatternRes[j++]) == 0);
3438     }
3439 }
3440 
3441 
3442 //----------------------------------------------------------------------------
3443 // NStr::ToLower/ToUpper()
3444 //----------------------------------------------------------------------------
3445 
BOOST_AUTO_TEST_CASE(s_Case)3446 BOOST_AUTO_TEST_CASE(s_Case)
3447 {
3448     static const struct {
3449         const char* orig;
3450         const char* x_lower;
3451         const char* x_upper;
3452     } s_Tri[] = {
3453         { "", "", "" },
3454         { "a", "a", "A" },
3455         { "4", "4", "4" },
3456         { "B5a", "b5a", "B5A" },
3457         { "baObaB", "baobab", "BAOBAB" },
3458         { "B", "b", "B" },
3459         { "B", "b", "B" }
3460     };
3461     static const char s_Indiff[] =
3462         "#@+_)(*&^%/?\"':;~`'\\!\v|=-0123456789.,><{}[]\t\n\r";
3463 
3464     {{
3465         BOOST_CHECK( NStr::IsLower(""));
3466         BOOST_CHECK( NStr::IsUpper(""));
3467         BOOST_CHECK( NStr::IsLower("123 .,-"));
3468         BOOST_CHECK( NStr::IsUpper("123 .,-"));
3469         BOOST_CHECK( NStr::IsLower("123 a.,-"));
3470         BOOST_CHECK(!NStr::IsUpper("123 a.,-"));
3471         BOOST_CHECK(!NStr::IsLower("123 A.,-"));
3472         BOOST_CHECK( NStr::IsUpper("123 A.,-"));
3473      }}
3474 
3475     {{
3476         char indiff[sizeof(s_Indiff) + 1];
3477         ::strcpy(indiff, s_Indiff);
3478         BOOST_CHECK_EQUAL(NStr::Compare(s_Indiff, indiff), 0);
3479         BOOST_CHECK_EQUAL(NStr::Compare(s_Indiff, NStr::ToLower(indiff)), 0);
3480         BOOST_CHECK(NStr::IsLower(indiff));
3481         ::strcpy(indiff, s_Indiff);
3482         BOOST_CHECK_EQUAL(NStr::Compare(s_Indiff, NStr::ToUpper(indiff)), 0);
3483         BOOST_CHECK(NStr::IsUpper(indiff));
3484         BOOST_CHECK_EQUAL(NStr::Compare(s_Indiff, NStr::ToLower(indiff)), 0);
3485         BOOST_CHECK(NStr::IsLower(indiff));
3486     }}
3487     {{
3488         string indiff;
3489         indiff = s_Indiff;
3490         BOOST_CHECK_EQUAL(NStr::Compare(s_Indiff, indiff), 0);
3491         BOOST_CHECK_EQUAL(NStr::Compare(s_Indiff, NStr::ToLower(indiff)), 0);
3492         BOOST_CHECK(NStr::IsLower(indiff));
3493         indiff = s_Indiff;
3494         BOOST_CHECK_EQUAL(NStr::Compare(s_Indiff, NStr::ToUpper(indiff)), 0);
3495         BOOST_CHECK(NStr::IsUpper(indiff));
3496         BOOST_CHECK_EQUAL(NStr::Compare(s_Indiff, NStr::ToLower(indiff)), 0);
3497         BOOST_CHECK(NStr::IsLower(indiff));
3498     }}
3499 
3500     for (size_t i = 0;  i < sizeof(s_Tri) / sizeof(s_Tri[0]);  i++) {
3501         BOOST_CHECK_EQUAL(NStr::Compare(s_Tri[i].orig, s_Tri[i].x_lower, NStr::eNocase),0);
3502         BOOST_CHECK_EQUAL(NStr::Compare(s_Tri[i].orig, s_Tri[i].x_upper, NStr::eNocase),0);
3503         string orig = s_Tri[i].orig;
3504         BOOST_CHECK_EQUAL(NStr::Compare(orig, s_Tri[i].x_lower, NStr::eNocase), 0);
3505         BOOST_CHECK_EQUAL(NStr::Compare(orig, s_Tri[i].x_upper, NStr::eNocase), 0);
3506         string x_lower = s_Tri[i].x_lower;
3507         {{
3508             char x_str[16];
3509             ::strcpy(x_str, s_Tri[i].orig);
3510             BOOST_CHECK(::strlen(x_str) < sizeof(x_str));
3511             BOOST_CHECK_EQUAL(NStr::Compare(NStr::ToLower(x_str), x_lower), 0);
3512             BOOST_CHECK(NStr::IsLower(x_str));
3513             ::strcpy(x_str, s_Tri[i].orig);
3514             BOOST_CHECK_EQUAL(NStr::Compare(NStr::ToUpper(x_str), s_Tri[i].x_upper),0);
3515             BOOST_CHECK(NStr::IsUpper(x_str));
3516             BOOST_CHECK_EQUAL(NStr::Compare(x_lower, NStr::ToLower(x_str)), 0);
3517             BOOST_CHECK(NStr::IsLower(x_str));
3518         }}
3519         {{
3520             string x_str;
3521             x_lower = s_Tri[i].x_lower;
3522             x_str = s_Tri[i].orig;
3523             BOOST_CHECK_EQUAL(NStr::Compare(NStr::ToLower(x_str), x_lower), 0);
3524             BOOST_CHECK(NStr::IsLower(x_str));
3525             x_str = s_Tri[i].orig;
3526             BOOST_CHECK_EQUAL(NStr::Compare(NStr::ToUpper(x_str), s_Tri[i].x_upper),0);
3527             BOOST_CHECK(NStr::IsUpper(x_str));
3528             BOOST_CHECK_EQUAL(NStr::Compare(x_lower, NStr::ToLower(x_str)), 0);
3529             BOOST_CHECK(NStr::IsLower(x_str));
3530         }}
3531     }
3532 }
3533 
3534 
3535 //----------------------------------------------------------------------------
3536 // NStr::str[n]casecmp()
3537 //----------------------------------------------------------------------------
3538 
BOOST_AUTO_TEST_CASE(s_strcasecmp)3539 BOOST_AUTO_TEST_CASE(s_strcasecmp)
3540 {
3541     BOOST_CHECK_EQUAL(NStr::strncasecmp("ab", "a", 1), 0);
3542     BOOST_CHECK_EQUAL(NStr::strncasecmp("Ab", "a", 1), 0);
3543     BOOST_CHECK_EQUAL(NStr::strncasecmp("a", "Ab", 1), 0);
3544     BOOST_CHECK_EQUAL(NStr::strncasecmp("a", "ab", 1), 0);
3545 
3546     BOOST_CHECK_EQUAL(NStr::strcasecmp("a",  "A"), 0);
3547     BOOST_CHECK_EQUAL(NStr::strcasecmp("a",  "a"), 0);
3548     BOOST_CHECK(NStr::strcasecmp("ab", "a") != 0);
3549     BOOST_CHECK(NStr::strcasecmp("a", "ab") != 0);
3550     BOOST_CHECK(NStr::strcasecmp("a",   "") != 0);
3551     BOOST_CHECK(NStr::strcasecmp("",   "a") != 0);
3552     BOOST_CHECK_EQUAL(NStr::strcasecmp("",    ""), 0);
3553 }
3554 
3555 
3556 //----------------------------------------------------------------------------
3557 // NStr::AStrEquiv()  &   NStr::Equal*()
3558 //----------------------------------------------------------------------------
3559 
BOOST_AUTO_TEST_CASE(s_Equal)3560 BOOST_AUTO_TEST_CASE(s_Equal)
3561 {
3562     string as1("abcdefg ");
3563     string as2("abcdefg ");
3564     string as3("aBcdEfg ");
3565     string as4("lsekfu");
3566 
3567     BOOST_CHECK_EQUAL( AStrEquiv(as1, as2, PNocase()), true );
3568     BOOST_CHECK_EQUAL( AStrEquiv(as1, as3, PNocase()), true );
3569     BOOST_CHECK_EQUAL( AStrEquiv(as3, as4, PNocase()), false );
3570     BOOST_CHECK_EQUAL( AStrEquiv(as1, as2, PCase()),   true );
3571     BOOST_CHECK_EQUAL( AStrEquiv(as1, as3, PCase()),   false );
3572     BOOST_CHECK_EQUAL( AStrEquiv(as2, as4, PCase()),   false );
3573 
3574     BOOST_CHECK_EQUAL( NStr::EqualNocase(as1, as2),    true );
3575     BOOST_CHECK_EQUAL( NStr::EqualNocase(as1, as3),    true );
3576     BOOST_CHECK_EQUAL( NStr::EqualNocase(as3, as4),    false );
3577     BOOST_CHECK_EQUAL( NStr::EqualCase(as1, as2),      true );
3578     BOOST_CHECK_EQUAL( NStr::EqualCase(as1, as3),      false );
3579     BOOST_CHECK_EQUAL( NStr::EqualCase(as2, as4),      false );
3580 
3581 
3582     // std::string/CTempString with zero symbols inside
3583 
3584     string zs1{ 't', 'e', 0, 's', 't', 0 };
3585     string zs2("te");
3586 
3587     BOOST_CHECK_EQUAL( zs1 == zs2,                  false );
3588     BOOST_CHECK_EQUAL( NStr::Equal(zs1, zs2),       false );
3589     BOOST_CHECK_EQUAL( NStr::EqualCase(zs1, zs2),   false );
3590     BOOST_CHECK_EQUAL( NStr::EqualNocase(zs1, zs2), false );
3591     BOOST_CHECK_EQUAL( NStr::Equal(zs1, 0, zs1.length(), zs2), false );
3592     BOOST_CHECK_EQUAL( NStr::Equal(zs1, 0, zs2.length(), zs2), true  );
3593 }
3594 
3595 
3596 //----------------------------------------------------------------------------
3597 // NStr::MatchesMask()
3598 //----------------------------------------------------------------------------
3599 
BOOST_AUTO_TEST_CASE(s_MatchesMask)3600 BOOST_AUTO_TEST_CASE(s_MatchesMask)
3601 {
3602     BOOST_CHECK(   NStr::MatchesMask( "aaa", "*a"                            ) );
3603     BOOST_CHECK( ! NStr::MatchesMask( "bbb", "*a"                            ) );
3604     BOOST_CHECK(   NStr::MatchesMask( "bba", "*a"                            ) );
3605     BOOST_CHECK( ! NStr::MatchesMask( "aab", "*a"                            ) );
3606     BOOST_CHECK(   NStr::MatchesMask( "aaa", "*a*"                           ) );
3607     BOOST_CHECK(   NStr::MatchesMask( "AAA", "*a",             NStr::eNocase ) );
3608     BOOST_CHECK(   NStr::MatchesMask( "aaa", "*"                             ) );
3609     BOOST_CHECK(   NStr::MatchesMask( "aaa", "[a-z][a]a"                     ) );
3610     BOOST_CHECK(   NStr::MatchesMask( "aaa", "[!b][!b-c]a"                   ) );
3611     BOOST_CHECK( ! NStr::MatchesMask( "aaa", "a[]"                           ) );
3612     BOOST_CHECK( ! NStr::MatchesMask( "aaa", "a[a-a][a-"                     ) );
3613     BOOST_CHECK(   NStr::MatchesMask( "aaa", "a[a-a][a-]"                    ) );
3614     BOOST_CHECK( ! NStr::MatchesMask( "a\\", "a\\"                           ) );
3615     BOOST_CHECK(   NStr::MatchesMask( "a\\", "a\\\\"                         ) );
3616     BOOST_CHECK(   NStr::MatchesMask( "a\\", "a[\\]"                         ) );
3617     BOOST_CHECK(   NStr::MatchesMask( "a\\", "a[\\\\]"                       ) );
3618     BOOST_CHECK( ! NStr::MatchesMask( "aaa", "[a-b][a-b][a-b"                ) );
3619     BOOST_CHECK(   NStr::MatchesMask( "a*a", "a\\*a"                         ) );
3620     BOOST_CHECK(   NStr::MatchesMask( "a[]", "[a-z][[][]]"                   ) );
3621     BOOST_CHECK( ! NStr::MatchesMask( "a[]", "[a-z][[][[\\]]"                ) );
3622     BOOST_CHECK( ! NStr::MatchesMask( "a!b", "[a-z][!][A-Z]",  NStr::eNocase ) );
3623     BOOST_CHECK(   NStr::MatchesMask( "a!b", "[a-z][!A-Z]b",   NStr::eNocase ) );
3624     BOOST_CHECK(   NStr::MatchesMask( "a!b", "[a-z][!][A-Z]b", NStr::eNocase ) );
3625     BOOST_CHECK(   NStr::MatchesMask( "a-b", "[a-z][0-][A-Z]", NStr::eNocase ) );
3626     BOOST_CHECK(   NStr::MatchesMask( "a-b", "[a-z][-9][A-Z]", NStr::eNocase ) );
3627 }
3628 
3629 
3630 //----------------------------------------------------------------------------
3631 // Reference counting
3632 //----------------------------------------------------------------------------
3633 
BOOST_AUTO_TEST_CASE(s_ReferenceCounting)3634 BOOST_AUTO_TEST_CASE(s_ReferenceCounting)
3635 {
3636     string s1(10, '1');
3637     string s2(s1);
3638     if ( s1.data() != s2.data() ) {
3639         LOG_POST("BAD: string reference counting is OFF");
3640     }
3641     else {
3642         LOG_POST("GOOD: string reference counting is ON");
3643         for (size_t i = 0; i < 4; i++) {
3644             LOG_POST("Restoring reference counting");
3645             s2 = s1;
3646             if ( s1.data() != s2.data() ) {
3647                 LOG_POST("BAD: cannot restore string reference counting");
3648                 continue;
3649             }
3650             LOG_POST("GOOD: reference counting is ON");
3651 
3652             const char* type = i&1? "str.begin()": "str.c_str()";
3653             LOG_POST("Calling " << type);
3654             if ( i&1 ) {
3655                 s2.begin();
3656             }
3657             else {
3658                 s1.c_str();
3659             }
3660             if ( s1.data() == s2.data() ) {
3661                 LOG_POST("GOOD: " << type << " doesn't affect reference counting");
3662                 continue;
3663             }
3664             LOG_POST("OK: "<< type << " turns reference counting OFF");
3665 
3666             LOG_POST("Restoring reference counting");
3667             s2 = s1;
3668             if ( s1.data() != s2.data() ) {
3669                 LOG_POST("BAD: " << type << " turns reference counting OFF completely");
3670                 continue;
3671             }
3672             LOG_POST("GOOD: reference counting is ON");
3673 
3674             if ( i&1 ) continue;
3675 
3676             LOG_POST("Calling " << type << " on source");
3677             s1.c_str();
3678             if ( s1.data() != s2.data() ) {
3679                 LOG_POST("BAD: " << type << " on source turns reference counting OFF");
3680             }
3681 
3682             LOG_POST("Calling "<< type <<" on destination");
3683             s2.c_str();
3684             if ( s1.data() != s2.data() ) {
3685                 LOG_POST("BAD: " << type << " on destination turns reference counting OFF");
3686             }
3687         }
3688     }
3689 }
3690 
3691 
3692 //----------------------------------------------------------------------------
3693 // NStr::Find*()
3694 //----------------------------------------------------------------------------
3695 
3696 struct SFindStr {
3697     const char*  str;
3698     const char*  pattern;
3699     NStr::ECase  use_case;
3700     NStr::EDirection direction;
3701     SIZE_TYPE    occurence;
3702     SIZE_TYPE    result;
3703 };
3704 
3705 // Abbreviations to shorten tests description
3706 #define f_CF  NStr::eCase,   NStr::eForwardSearch
3707 #define f_CR  NStr::eCase,   NStr::eReverseSearch
3708 #define f_NF  NStr::eNocase, NStr::eForwardSearch
3709 #define f_NR  NStr::eNocase, NStr::eReverseSearch
3710 
3711 static const SFindStr s_FindStrTest[] =
3712 {
3713     // eCase + eForwardSearch
3714     { "bcbab",          "abc", f_CF, 0,  NPOS },
3715     { "abc",            "abc", f_CF, 0,  0    },
3716     { "abc++",          "abc", f_CF, 0,  0    },
3717     { "++abc",          "abc", f_CF, 0,  2    },
3718     { "ab",             "abc", f_CF, 0,  NPOS },
3719     { "+++abc++",       "abc", f_CF, 0,  3    },
3720     { "+abc+abc++abc+", "abc", f_CF, 0,  1    },
3721     { "+abc+abc++abc+", "abc", f_CF, 1,  5    },
3722     { "+abc+abc++abc+", "abc", f_CF, 2,  10   },
3723     { "+abc+abc++abc+", "abc", f_CF, 3,  NPOS },
3724     { "cabc+abc++abca", "abc", f_CF, 3,  NPOS },
3725 
3726     // eCase + eReverseSearch
3727     { "bcbab",          "abc", f_CR, 0,  NPOS },
3728     { "abc",            "abc", f_CR, 0,  0    },
3729     { "abc++",          "abc", f_CR, 0,  0    },
3730     { "++abc",          "abc", f_CR, 0,  2    },
3731     { "ab",             "abc", f_CR, 0,  NPOS },
3732     { "+++abc++",       "abc", f_CR, 0,  3    },
3733     { "+abc+abc++abc+", "abc", f_CR, 0,  10   },
3734     { "+abc+abc++abc+", "abc", f_CR, 1,  5    },
3735     { "+abc+abc++abc+", "abc", f_CR, 2,  1    },
3736     { "+abc+abc++abc+", "abc", f_CR, 3,  NPOS },
3737     { "cabc+abc++abca", "abc", f_CR, 3,  NPOS },
3738 
3739     // eNocase + eForwardSearch
3740     { "bcbab",          "ABC", f_NF, 0,  NPOS },
3741     { "abc",            "ABC", f_NF, 0,  0    },
3742     { "abc++",          "ABC", f_NF, 0,  0    },
3743     { "++abc",          "ABC", f_NF, 0,  2    },
3744     { "ab",             "ABC", f_NF, 0,  NPOS },
3745     { "+++abc++",       "ABC", f_NF, 0,  3    },
3746     { "+abc+abc++abc+", "ABC", f_NF, 0,  1    },
3747     { "+abc+abc++abc+", "ABC", f_NF, 1,  5    },
3748     { "+abc+abc++abc+", "ABC", f_NF, 2,  10   },
3749     { "+abc+abc++abc+", "ABC", f_NF, 3,  NPOS },
3750     { "cabc+abc++abca", "ABC", f_NF, 3,  NPOS },
3751 
3752     // eNocase + eReverseSearch
3753     { "bcbab",          "ABC", f_NR, 0,  NPOS },
3754     { "abc",            "ABC", f_NR, 0,  0    },
3755     { "abc++",          "ABC", f_NR, 0,  0    },
3756     { "++abc",          "ABC", f_NR, 0,  2    },
3757     { "ab",             "ABC", f_NR, 0,  NPOS },
3758     { "+++abc++",       "ABC", f_NR, 0,  3    },
3759     { "+abc+abc++abc+", "ABC", f_NR, 0,  10   },
3760     { "+abc+abc++abc+", "ABC", f_NR, 1,  5    },
3761     { "+abc+abc++abc+", "ABC", f_NR, 2,  1    },
3762     { "+abc+abc++abc+", "ABC", f_NR, 3,  NPOS },
3763     { "cabc+abc++abca", "ABC", f_NR, 3,  NPOS }
3764 };
3765 
3766 
BOOST_AUTO_TEST_CASE(s_Find)3767 BOOST_AUTO_TEST_CASE(s_Find)
3768 {
3769     {
3770         const size_t count = sizeof(s_FindStrTest) / sizeof(s_FindStrTest[0]);
3771         for (size_t i = 0; i < count; i++) {
3772             SFindStr t = s_FindStrTest[i];
3773             BOOST_CHECK_EQUAL(NStr::Find(t.str, t.pattern, t.use_case, t.direction, t.occurence), t.result);
3774             /*
3775             if (NStr::Find(t.str, t.pattern, t.use_case, t.direction, t.occurence) != t.result) {
3776                 cout << "s_FindStrTest [ " << i  << " ]"<< endl;
3777             }
3778             */
3779         }
3780     }
3781 
3782     // Backward compatibility test
3783     // @deprecated
3784     // TODO: change to Find() later.
3785 
3786     BOOST_CHECK_EQUAL(NStr::FindCase  ("abcd", "xyz"),                           NPOS);
3787     BOOST_CHECK_EQUAL(NStr::FindCase  ("abcd", "xyz", 0, NPOS, NStr::eLast),     NPOS);
3788     BOOST_CHECK_EQUAL(NStr::FindNoCase("abcd", "xyz"),                           NPOS);
3789     BOOST_CHECK_EQUAL(NStr::FindNoCase("abcd", "xyz", 0, NPOS, NStr::eLast),     NPOS);
3790     BOOST_CHECK_EQUAL(NStr::FindCase  ("abcd", "aBc", 0, NPOS, NStr::eLast),     NPOS);
3791     BOOST_CHECK_EQUAL(NStr::FindNoCase("abcd", "aBc", 0, NPOS, NStr::eLast),     0U);
3792     BOOST_CHECK_EQUAL(NStr::Find("abc abc abc", "bc", 2, 8, NStr::eFirst, NStr::eCase), 5U);
3793     BOOST_CHECK_EQUAL(NStr::FindCase  ("abc abc abc", "bc", 2, 8, NStr::eFirst), 5U);
3794     BOOST_CHECK_EQUAL(NStr::FindCase  ("abc abc abc", "bc", 2, 8, NStr::eLast),  5U);
3795 }
3796 
3797 
BOOST_AUTO_TEST_CASE(s_FindWord)3798 BOOST_AUTO_TEST_CASE(s_FindWord)
3799 {
3800     // NStr::eForwardSearch
3801     BOOST_CHECK_EQUAL(NStr::FindWord("abcd",    "xyz"), NPOS);
3802     BOOST_CHECK_EQUAL(NStr::FindWord("abcd",    "abc"), NPOS);
3803     BOOST_CHECK_EQUAL(NStr::FindWord("xabc",    "abc"), NPOS);
3804     BOOST_CHECK_EQUAL(NStr::FindWord("abc d",   "abc"), 0U  );
3805     BOOST_CHECK_EQUAL(NStr::FindWord("xabc d",  "abc"), NPOS);
3806     BOOST_CHECK_EQUAL(NStr::FindWord("x,abc:d", "abc"), 2U  );
3807     BOOST_CHECK_EQUAL(NStr::FindWord("x,abc",   "abc"), 2U  );
3808     BOOST_CHECK_EQUAL(NStr::FindWord("xabcx abc\ny abc,z", "abc"), 6U);
3809 
3810     // NStr::eReverseSearch
3811     BOOST_CHECK_EQUAL(NStr::FindWord("abcd",    "xyz", f_CR), NPOS);
3812     BOOST_CHECK_EQUAL(NStr::FindWord("abcd",    "abc", f_CR), NPOS);
3813     BOOST_CHECK_EQUAL(NStr::FindWord("xabc",    "abc", f_CR), NPOS);
3814     BOOST_CHECK_EQUAL(NStr::FindWord("ab abc",  "abc", f_CR), 3U);
3815     BOOST_CHECK_EQUAL(NStr::FindWord("x abcd",  "abc", f_CR), NPOS);
3816     BOOST_CHECK_EQUAL(NStr::FindWord("x,abc:d", "abc", f_CR), 2U);
3817     BOOST_CHECK_EQUAL(NStr::FindWord("x,abc",   "abc", f_CR), 2U);
3818     BOOST_CHECK_EQUAL(NStr::FindWord("xabcx abc\ny abc,z", "abc", f_CR), 12U);
3819 
3820     // NStr::eNocase
3821     BOOST_CHECK_EQUAL(NStr::FindWord("abcd",    "xyz", f_NF), NPOS);
3822     BOOST_CHECK_EQUAL(NStr::FindWord("abcd",    "ABC", f_NF), NPOS);
3823     BOOST_CHECK_EQUAL(NStr::FindWord("xabc",    "ABC", f_NF), NPOS);
3824     BOOST_CHECK_EQUAL(NStr::FindWord("abc d",   "ABC", f_NF), 0U  );
3825     BOOST_CHECK_EQUAL(NStr::FindWord("xabc d",  "ABC", f_NF), NPOS);
3826     BOOST_CHECK_EQUAL(NStr::FindWord("x,abc:d", "ABC", f_NF), 2U  );
3827     BOOST_CHECK_EQUAL(NStr::FindWord("x,abc",   "ABC", f_NF), 2U  );
3828     BOOST_CHECK_EQUAL(NStr::FindWord("xabcx abc\ny abc,z", "ABC", f_NF), 6U);
3829 
3830     BOOST_CHECK_EQUAL(NStr::FindWord("abcd",    "xyz", f_NR), NPOS);
3831     BOOST_CHECK_EQUAL(NStr::FindWord("abcd",    "ABC", f_NR), NPOS);
3832     BOOST_CHECK_EQUAL(NStr::FindWord("xabc",    "ABC", f_NR), NPOS);
3833     BOOST_CHECK_EQUAL(NStr::FindWord("ab abc",  "ABC", f_NR), 3U  );
3834     BOOST_CHECK_EQUAL(NStr::FindWord("x abcd",  "ABC", f_NR), NPOS);
3835     BOOST_CHECK_EQUAL(NStr::FindWord("x,abc:d", "ABC", f_NR), 2U  );
3836     BOOST_CHECK_EQUAL(NStr::FindWord("x,abc",   "ABC", f_NR), 2U  );
3837     BOOST_CHECK_EQUAL(NStr::FindWord("xabcx abc\ny abc,z", "ABC", f_NR), 12U);
3838 
3839     // "Word" with non-word characters
3840     BOOST_CHECK_EQUAL(NStr::FindWord("a b c",         "a b c"), 0U);
3841     BOOST_CHECK_EQUAL(NStr::FindWord(" a b c ",       "a b c"), 1U);
3842     BOOST_CHECK_EQUAL(NStr::FindWord("  a b c  ",     "a b c"), 2U);
3843     BOOST_CHECK_EQUAL(NStr::FindWord("x a b c y",     "a b c"), 2U);
3844     BOOST_CHECK_EQUAL(NStr::FindWord("a b a a b c d", "a b c"), 6U);
3845     BOOST_CHECK_EQUAL(NStr::FindWord("a b 1a b c d",  "a b c"), NPOS);
3846     BOOST_CHECK_EQUAL(NStr::FindWord("x abc x",       " abc "), NPOS);
3847     BOOST_CHECK_EQUAL(NStr::FindWord("x abc abc x",   " abc "), NPOS);
3848     BOOST_CHECK_EQUAL(NStr::FindWord("x abc  abc x",  " abc "), NPOS);
3849     BOOST_CHECK_EQUAL(NStr::FindWord("x abc  abc ",   " abc "), 6U);
3850     BOOST_CHECK_EQUAL(NStr::FindWord("x abc  abc  x", " abc "), 6U);
3851     BOOST_CHECK_EQUAL(NStr::FindWord("x  abc abc  x", " abc "), NPOS);
3852     BOOST_CHECK_EQUAL(NStr::FindWord("x  abc+ abc +", " abc "), 7U);
3853 }
3854 
3855 
3856 //----------------------------------------------------------------------------
3857 // CVersionInfo:: parse from str
3858 //----------------------------------------------------------------------------
3859 
BOOST_AUTO_TEST_CASE(s_VersionInfo)3860 BOOST_AUTO_TEST_CASE(s_VersionInfo)
3861 {
3862     {{
3863         CVersionInfo ver("1.2.3");
3864         BOOST_CHECK_EQUAL(ver.GetMajor(), 1);
3865         BOOST_CHECK_EQUAL(ver.GetMinor(), 2);
3866         BOOST_CHECK_EQUAL(ver.GetPatchLevel(), 3);
3867 
3868         ver.FromStr("12.35");
3869         BOOST_CHECK_EQUAL(ver.GetMajor(), 12);
3870         BOOST_CHECK_EQUAL(ver.GetMinor(), 35);
3871         BOOST_CHECK_EQUAL(ver.GetPatchLevel(), 0);
3872 
3873         BOOST_CHECK_THROW( ver.FromStr("12.35a"), exception);
3874     }}
3875 
3876     // ParseVersionString tests...
3877 
3878     static const struct {
3879         const char* str;
3880         const char* name;
3881         int         ver_major;
3882         int         ver_minor;
3883         int         patch_level;
3884     } s_VerInfo[] = {
3885         { "1.3.2",                      "",                  1, 3, 2 },
3886         { "My_Program21p32c 1.3.3",     "My_Program21p32c",  1, 3, 3 },
3887         { "2.3.4 ( program)",           "program",           2, 3, 4 },
3888         { "version 50.1.0",             "",                 50, 1, 0 },
3889         { "MyProgram version 50.2.1",   "MyProgram",        50, 2, 1 },
3890         { "MyProgram ver. 50.3.1",      "MyProgram",        50, 3, 1 },
3891         { "MyOtherProgram2 ver 51.3.1", "MyOtherProgram2",  51, 3, 1 },
3892         { "Program_ v. 1.3.1",          "Program_",          1, 3, 1 }
3893     };
3894 
3895     CVersionInfo ver("1.2.3");
3896     string       name;
3897 
3898     for (size_t i = 0;  i < sizeof(s_VerInfo) / sizeof(s_VerInfo[0]);  i++) {
3899         ParseVersionString(s_VerInfo[i].str, &name, &ver);
3900         BOOST_CHECK_EQUAL(name,                s_VerInfo[i].name);
3901         BOOST_CHECK_EQUAL(ver.GetMajor(),      s_VerInfo[i].ver_major);
3902         BOOST_CHECK_EQUAL(ver.GetMinor(),      s_VerInfo[i].ver_minor);
3903         BOOST_CHECK_EQUAL(ver.GetPatchLevel(), s_VerInfo[i].patch_level);
3904     }
3905     ParseVersionString("MyProgram ", &name, &ver);
3906     BOOST_CHECK_EQUAL(name, string("MyProgram"));
3907     BOOST_CHECK(ver.IsAny());
3908 }
3909 
3910     const unsigned char s_ExtAscii[] = {
3911         0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
3912 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
3913 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
3914 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
3915 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
3916 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
3917 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
3918 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
3919 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
3920 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
3921 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
3922 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
3923 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
3924 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3925 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
3926 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff,0x00};
3927     const unsigned char s_Converted[] = {
3928 0xc2,0x80,0xc2,0x81,0xc2,0x82,0xc2,0x83,0xc2,0x84,0xc2,0x85,0xc2,0x86,0xc2,0x87,
3929 0xc2,0x88,0xc2,0x89,0xc2,0x8a,0xc2,0x8b,0xc2,0x8c,0xc2,0x8d,0xc2,0x8e,0xc2,0x8f,
3930 0xc2,0x90,0xc2,0x91,0xc2,0x92,0xc2,0x93,0xc2,0x94,0xc2,0x95,0xc2,0x96,0xc2,0x97,
3931 0xc2,0x98,0xc2,0x99,0xc2,0x9a,0xc2,0x9b,0xc2,0x9c,0xc2,0x9d,0xc2,0x9e,0xc2,0x9f,
3932 0xc2,0xa0,0xc2,0xa1,0xc2,0xa2,0xc2,0xa3,0xc2,0xa4,0xc2,0xa5,0xc2,0xa6,0xc2,0xa7,
3933 0xc2,0xa8,0xc2,0xa9,0xc2,0xaa,0xc2,0xab,0xc2,0xac,0xc2,0xad,0xc2,0xae,0xc2,0xaf,
3934 0xc2,0xb0,0xc2,0xb1,0xc2,0xb2,0xc2,0xb3,0xc2,0xb4,0xc2,0xb5,0xc2,0xb6,0xc2,0xb7,
3935 0xc2,0xb8,0xc2,0xb9,0xc2,0xba,0xc2,0xbb,0xc2,0xbc,0xc2,0xbd,0xc2,0xbe,0xc2,0xbf,
3936 0xc3,0x80,0xc3,0x81,0xc3,0x82,0xc3,0x83,0xc3,0x84,0xc3,0x85,0xc3,0x86,0xc3,0x87,
3937 0xc3,0x88,0xc3,0x89,0xc3,0x8a,0xc3,0x8b,0xc3,0x8c,0xc3,0x8d,0xc3,0x8e,0xc3,0x8f,
3938 0xc3,0x90,0xc3,0x91,0xc3,0x92,0xc3,0x93,0xc3,0x94,0xc3,0x95,0xc3,0x96,0xc3,0x97,
3939 0xc3,0x98,0xc3,0x99,0xc3,0x9a,0xc3,0x9b,0xc3,0x9c,0xc3,0x9d,0xc3,0x9e,0xc3,0x9f,
3940 0xc3,0xa0,0xc3,0xa1,0xc3,0xa2,0xc3,0xa3,0xc3,0xa4,0xc3,0xa5,0xc3,0xa6,0xc3,0xa7,
3941 0xc3,0xa8,0xc3,0xa9,0xc3,0xaa,0xc3,0xab,0xc3,0xac,0xc3,0xad,0xc3,0xae,0xc3,0xaf,
3942 0xc3,0xb0,0xc3,0xb1,0xc3,0xb2,0xc3,0xb3,0xc3,0xb4,0xc3,0xb5,0xc3,0xb6,0xc3,0xb7,
3943 0xc3,0xb8,0xc3,0xb9,0xc3,0xba,0xc3,0xbb,0xc3,0xbc,0xc3,0xbd,0xc3,0xbe,0xc3,0xbf};
3944 
3945 
BOOST_AUTO_TEST_CASE(s_CUtf8)3946 BOOST_AUTO_TEST_CASE(s_CUtf8)
3947 {
3948     const char *src, *res, *conv;
3949     src = (char*)s_ExtAscii;
3950     conv= (char*)s_Converted;
3951     // extended ASCII does not match any encoding
3952     {
3953         CStringUTF8 u1( CUtf8::AsUTF8(src,eEncoding_ISO8859_1,CUtf8::eNoValidate) );
3954         CStringUTF8 u2 = CUtf8::AsUTF8(src,eEncoding_ISO8859_1,CUtf8::eNoValidate);
3955     }
3956 
3957     CStringUTF8 u8str( CUtf8::AsUTF8(src,eEncoding_ISO8859_1,CUtf8::eNoValidate) );
3958     res = u8str.c_str();
3959 
3960     BOOST_CHECK_EQUAL( strncmp(src,res,126), 0);
3961     BOOST_CHECK( strlen(res+127) == 256 );
3962     res += 127;
3963     BOOST_CHECK_EQUAL( strncmp(conv,res,256), 0);
3964 
3965     string sample("micro=\265 Agrave=\300 atilde=\343 ccedil=\347");
3966     string u8sample("micro=\302\265 Agrave=\303\200 atilde=\303\243 ccedil=\303\247");
3967 
3968     u8str = CUtf8::AsUTF8( sample, eEncoding_ISO8859_1);
3969     u8str = CUtf8::AsUTF8( sample, eEncoding_ISO8859_1, CUtf8::eValidate);
3970 
3971 #if defined(HAVE_WSTRING)
3972     wstring wss, wss2;
3973 #else
3974     TStringUnicode wss, wss2;
3975 #endif
3976     string ssab("ab");
3977     wss.append(1,0x61).append(1,0x62).append(1,0x0).append(1,0x63);
3978     wss2.append(1,0x61).append(1,0x62);
3979     {
3980 // string to string
3981         string u8test( CUtf8::AsUTF8(sample, eEncoding_ISO8859_1));
3982         BOOST_CHECK_EQUAL(u8test,u8sample);
3983 
3984         string u8test2( CUtf8::AsUTF8( sample.c_str(),eEncoding_ISO8859_1));
3985         BOOST_CHECK_EQUAL(u8test2,u8sample);
3986 
3987         string u8test3( CUtf8::AsUTF8( CTempString(sample.c_str(),sample.size()),eEncoding_ISO8859_1));
3988         BOOST_CHECK_EQUAL(u8test3,u8sample);
3989 
3990 // should not compile!
3991 //        u8test = CUtf8::AsUTF8( sample.c_str(), 1);
3992 //        u8test = CUtf8::AsUTF8( sample);
3993 //        u8test = CUtf8::AsBasicString<char>(sample);
3994 
3995 
3996         u8test.clear();
3997         CUtf8::AppendAsUTF8(u8test,ssab, eEncoding_ISO8859_1);
3998         CUtf8::AppendAsUTF8(u8test, CTempString(ssab.data(), ssab.size()), eEncoding_ISO8859_1);
3999         CUtf8::AppendAsUTF8(u8test,ssab[0], eEncoding_ISO8859_1);
4000         CUtf8::AppendAsUTF8(u8test, 'a', eEncoding_ISO8859_1);
4001         CUtf8::AppendAsUTF8(u8test, TUnicodeSymbol(0x62));
4002         BOOST_CHECK_EQUAL(u8test,"ababaab");
4003 
4004 // should not compile!
4005 //        CUtf8::AppendAsUTF8(u8test, 'a');
4006 //        CUtf8::AppendAsUTF8(u8test,ssab);
4007 //        CUtf8::AppendAsUTF8(u8test,ssab.data(), ssab.size());
4008 //        CUtf8::AppendAsUTF8(u8test,ssab[0]);
4009     }
4010     {
4011 // wide string to string
4012         string w8test(CUtf8::AsUTF8(wss));
4013         BOOST_CHECK_EQUAL(w8test.size(),4U);
4014         w8test = CUtf8::AsUTF8(wss);
4015         BOOST_CHECK_EQUAL(w8test.size(),4U);
4016         w8test = CUtf8::AsUTF8(wss.c_str());
4017         BOOST_CHECK_EQUAL(w8test.size(),2U);
4018 
4019         string w8test2( CUtf8::AsUTF8(wss.c_str()));
4020         BOOST_CHECK_EQUAL(w8test2,ssab);
4021         BOOST_CHECK_EQUAL(w8test2.size(),2U);
4022 
4023         string w8test3( CUtf8::AsUTF8( wss.c_str(), wss.size()));
4024         BOOST_CHECK_EQUAL(w8test3.size(),wss.size());
4025 
4026         string ss = CUtf8::AsSingleByteString(w8test2,eEncoding_UTF8);
4027         ss = CUtf8::AsSingleByteString(w8test2,eEncoding_Ascii);
4028         BOOST_CHECK_EQUAL(ss,ssab);
4029 
4030         w8test.clear();
4031         CUtf8::AppendAsUTF8(w8test,wss2);
4032         CUtf8::AppendAsUTF8(w8test, wss2.data(), wss2.size());
4033         CUtf8::AppendAsUTF8(w8test,wss2[0]);
4034         BOOST_CHECK_EQUAL(w8test,"ababa");
4035     }
4036     {
4037 // string or wide string to CStringUTF8
4038         CStringUTF8 w8test;
4039         w8test = CUtf8::AsUTF8( sample, eEncoding_ISO8859_1);
4040         w8test = CUtf8::AsUTF8( sample.c_str(), eEncoding_ISO8859_1);
4041 
4042         w8test = CUtf8::AsUTF8(wss);
4043         BOOST_CHECK_EQUAL(w8test.size(),4U);
4044         w8test = CUtf8::AsUTF8(wss.c_str());
4045         BOOST_CHECK_EQUAL(w8test.size(),2U);
4046         w8test = CUtf8::AsUTF8(wss.c_str(), wss.size());
4047         BOOST_CHECK_EQUAL(w8test.size(),4U);
4048         w8test = CUtf8::AsUTF8(wss.c_str());
4049         BOOST_CHECK_EQUAL(w8test.size(),2U);
4050         w8test += CUtf8::AsUTF8(wss.c_str());
4051         BOOST_CHECK_EQUAL(w8test.size(),4U);
4052     }
4053 
4054     BOOST_CHECK_EQUAL(u8str,u8sample);
4055     BOOST_CHECK_EQUAL( CUtf8::AsSingleByteString( u8str, eEncoding_ISO8859_1), sample);
4056 
4057     BOOST_CHECK_EQUAL(128u, CUtf8::GetValidSymbolCount(CTempString((const char*)s_Converted, sizeof(s_Converted))));
4058     BOOST_CHECK_EQUAL(127u, CUtf8::GetValidSymbolCount((const char*)s_ExtAscii));
4059     BOOST_CHECK_EQUAL(34u,  CUtf8::GetValidSymbolCount(CTempString(u8str.data(), u8str.length())));
4060     BOOST_CHECK_EQUAL(34u,  CUtf8::GetValidSymbolCount(u8str));
4061 
4062     BOOST_CHECK_EQUAL(256u, CUtf8::GetValidBytesCount(CTempString((const char*)s_Converted, sizeof(s_Converted))));
4063     BOOST_CHECK_EQUAL(127u, CUtf8::GetValidBytesCount((const char*)s_ExtAscii));
4064     BOOST_CHECK_EQUAL(38u,  CUtf8::GetValidBytesCount(CTempString(u8str.data(), u8str.length())));
4065     BOOST_CHECK_EQUAL(38u,  CUtf8::GetValidBytesCount(u8str));
4066 
4067     BOOST_CHECK_EQUAL( (int)CUtf8::StringToEncoding("UtF-8"),           (int)eEncoding_UTF8);
4068     BOOST_CHECK_EQUAL( (int)CUtf8::StringToEncoding("Windows-1252"),    (int)eEncoding_Windows_1252);
4069     BOOST_CHECK_EQUAL( (int)CUtf8::StringToEncoding("cp367"),           (int)eEncoding_Ascii);
4070     BOOST_CHECK_EQUAL( (int)CUtf8::StringToEncoding("csISOLatin1"),     (int)eEncoding_ISO8859_1);
4071     BOOST_CHECK_EQUAL( (int)CUtf8::StringToEncoding("ISO-2022-CN-EXT"), (int)eEncoding_Unknown);
4072 
4073     TStringUnicode uus = CUtf8::AsBasicString<TUnicodeSymbol>(u8sample);
4074     TStringUCS4    u4s = CUtf8::AsBasicString<TCharUCS4>(u8sample);
4075     TStringUCS2    u2s = CUtf8::AsBasicString<TCharUCS2>(u8sample);
4076 #if defined(HAVE_WSTRING)
4077     wstring        uws = CUtf8::AsBasicString<wchar_t>(u8sample);
4078 #endif
4079 
4080 // other types
4081 #if defined(HAVE_WSTRING)
4082     {
4083         typedef wchar_t xxxMywchar;
4084         basic_string<xxxMywchar> xxxwss;
4085         string w8test( CUtf8::AsUTF8(xxxwss));
4086         xxxwss = CUtf8::AsBasicString<xxxMywchar>(w8test);
4087         xxxwss = CUtf8::AsBasicString<xxxMywchar>(w8test, 0);
4088         xxxwss = CUtf8::AsBasicString<xxxMywchar>(w8test, 0, CUtf8::eNoValidate);
4089 
4090         xxxwss.append(1,1000);
4091         CUtf8::AppendAsUTF8(w8test,xxxwss);
4092         CUtf8::AppendAsUTF8(w8test,xxxwss.data(), xxxwss.size());
4093         CUtf8::AppendAsUTF8(w8test,xxxwss[0]);
4094     }
4095 #endif
4096     {
4097         typedef unsigned short xxxMywchar;
4098         basic_string<xxxMywchar> xxxwss;
4099         string w8test( CUtf8::AsUTF8(xxxwss));
4100         xxxwss = CUtf8::AsBasicString<xxxMywchar>(w8test);
4101 
4102         xxxwss.append(1,1000);
4103         CUtf8::AppendAsUTF8(w8test,xxxwss);
4104         CUtf8::AppendAsUTF8(w8test,xxxwss.data(), xxxwss.size());
4105         CUtf8::AppendAsUTF8(w8test,xxxwss[0]);
4106     }
4107     {
4108         typedef unsigned int xxxMywchar;
4109         basic_string<xxxMywchar> xxxwss;
4110         string w8test( CUtf8::AsUTF8(xxxwss));
4111         xxxwss = CUtf8::AsBasicString<xxxMywchar>(w8test);
4112 
4113         xxxwss.append(1,1000);
4114         CUtf8::AppendAsUTF8(w8test,xxxwss);
4115         CUtf8::AppendAsUTF8(w8test,xxxwss.data(), xxxwss.size());
4116         CUtf8::AppendAsUTF8(w8test,xxxwss[0]);
4117     }
4118 #if NCBITOOLKIT_USE_LONG_UCS4
4119     {
4120         typedef unsigned long xxxMywchar;
4121         basic_string<xxxMywchar> xxxwss;
4122         string w8test( CUtf8::AsUTF8(xxxwss));
4123         xxxwss = CUtf8::AsBasicString<xxxMywchar>(w8test);
4124 
4125         xxxwss.append(1,1000);
4126         CUtf8::AppendAsUTF8(w8test,xxxwss);
4127         CUtf8::AppendAsUTF8(w8test,xxxwss.data(), xxxwss.size());
4128         CUtf8::AppendAsUTF8(w8test,xxxwss[0]);
4129     }
4130 #endif
4131     // iteration
4132     {
4133         wss.erase().append(1,0x1000).append(1,0x1100).append(1,0x1110).append(1,0x1111);
4134         wss2.erase();
4135         string s(CUtf8::AsUTF8(wss));
4136         string s2;
4137         for (string::const_iterator i = s.begin(); i != s.end(); ++i) {
4138             TUnicodeSymbol sym = CUtf8::Decode(i);
4139             wss2.append(1,sym);
4140             s2 += CUtf8::AsUTF8(&sym,1);
4141         }
4142         BOOST_CHECK(s == s2);
4143         BOOST_CHECK(wss == wss2);
4144     }
4145 
4146     // wrong Utf8
4147     {
4148         string u8tmp("micro=\302 Agrave=\303\200 atilde=\303\243 ccedil=\303\247");
4149         string expected("micro=\\302 A");
4150         for (int i = 0; i < 3; ++i) {
4151             bool gotit = false;
4152             string msg;
4153             try {
4154                 switch (i) {
4155                 case 0: {
4156                     CUtf8::AsBasicString<TCharUCS2>(u8tmp, NULL, CUtf8::eValidate);
4157                 } break;
4158                 case 1: {
4159                     CUtf8::GetSymbolCount(u8tmp);
4160                 } break;
4161                 case 2: {
4162                     CUtf8::AsSingleByteString(u8tmp,eEncoding_Ascii, NULL, CUtf8::eValidate);
4163                 } break;
4164                 }
4165             } catch (CStringException& e) {
4166                 gotit = true;
4167                 msg = e.GetMsg();
4168             }
4169             BOOST_CHECK( gotit );
4170             BOOST_CHECK( NStr::FindCase(msg, expected) != NPOS);
4171         }
4172     }
4173 
4174 // locales
4175 #if defined(HAVE_WSTRING) && defined(NCBI_OS_MSWIN)
4176     {
4177         typedef unsigned short xxxMywchar;
4178         basic_string<xxxMywchar> xxxwss, wstest;
4179         string u8, u8x, res;
4180         const char* lcl_name;
4181         {
4182             lcl_name = "ru-RU";
4183             try {
4184                 locale lcl(lcl_name);
4185                 string t("�������");
4186                 wstest = {1050, 1086, 1084, 1072, 1085, 1076, 1099};
4187                 u8 = CUtf8::AsUTF8(t, lcl);
4188                 xxxwss = CUtf8::AsBasicString<xxxMywchar>(u8);
4189                 u8x = CUtf8::AsUTF8(xxxwss);
4190                 res = CUtf8::AsSingleByteString(u8, lcl);
4191                 BOOST_CHECK(xxxwss == wstest);
4192                 BOOST_CHECK(u8 == u8x);
4193                 BOOST_CHECK(t == res);
4194                 res = CUtf8::AsSingleByteString(u8, locale("pl"), "");
4195                 BOOST_CHECK(res.empty());
4196             } catch(const exception& e) {
4197                 cout << lcl_name << ": " << e.what() << endl;
4198             }
4199         }
4200         lcl_name = "pl-PL";
4201         {
4202             try {
4203                 locale lcl(lcl_name);
4204                 string t("Mo�esz u�ywa� wy��czy�");
4205                 wstest = {77,111,380,101,115,122,32,117,380,121,119,97,263,32,119,121,322,261,99,122,121,263};
4206                 u8 = CUtf8::AsUTF8(t, lcl);
4207                 xxxwss = CUtf8::AsBasicString<xxxMywchar>(u8);
4208                 u8x = CUtf8::AsUTF8(xxxwss);
4209                 res = CUtf8::AsSingleByteString(u8, lcl);
4210                 BOOST_CHECK(xxxwss == wstest);
4211                 BOOST_CHECK(u8 == u8x);
4212                 BOOST_CHECK(t == res);
4213             } catch(const exception& e) {
4214                 cout << lcl_name << ": " << e.what() << endl;
4215             }
4216         }
4217         lcl_name = "ar-EG";
4218         {
4219             try {
4220                 locale lcl(lcl_name);
4221                 string t("������� ������");
4222                 wstest = {1573,1593,1583,1575,1583,1575,1578,32,1573,1590,1575,1601,1610,1577};
4223                 u8 = CUtf8::AsUTF8(t, lcl);
4224                 xxxwss = CUtf8::AsBasicString<xxxMywchar>(u8);
4225                 u8x = CUtf8::AsUTF8(xxxwss);
4226                 res = CUtf8::AsSingleByteString(u8, lcl);
4227                 BOOST_CHECK(xxxwss == wstest);
4228                 BOOST_CHECK(u8 == u8x);
4229                 BOOST_CHECK(t == res);
4230             } catch(const exception& e) {
4231                 cout << lcl_name << ": " << e.what() << endl;
4232             }
4233         }
4234     }
4235 #endif
4236 }
4237 
4238 
4239 //----------------------------------------------------------------------------
4240 // NStr::TruncateSpaves()
4241 //----------------------------------------------------------------------------
4242 
BOOST_AUTO_TEST_CASE(s_TruncateSpaces)4243 BOOST_AUTO_TEST_CASE(s_TruncateSpaces)
4244 {
4245     const char* szEmpty     = "";
4246     const char* szSpaces    = "  \t\n  \t\n  \t\n";
4247     const char* szTrunc     = "some long\tmultiline\nstring";
4248     const char* szBegSpace  = "  \t\nsome long\tmultiline\nstring";
4249     const char* szEndSpace  = "some long\tmultiline\nstring  \t\n";
4250     const char* szBothSpace = "  \t\nsome long\tmultiline\nstring  \t\n";
4251 
4252     const string sEmpty     = szEmpty;
4253     const string sSpaces    = szSpaces;
4254     const string sTrunc     = szTrunc;
4255     const string sBegSpace  = szBegSpace;
4256     const string sEndSpace  = szEndSpace;
4257     const string sBothSpace = szBothSpace;
4258 
4259     const CTempString tsEmpty    (szEmpty    );
4260     const CTempString tsSpaces   (szSpaces   );
4261     const CTempString tsTrunc    (szTrunc    );
4262     const CTempString tsBegSpace (szBegSpace );
4263     const CTempString tsEndSpace (szEndSpace );
4264     const CTempString tsBothSpace(szBothSpace);
4265 
4266     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szEmpty,     NStr::eTrunc_Begin), tsEmpty    );
4267     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szEmpty,     NStr::eTrunc_End  ), tsEmpty    );
4268     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szEmpty,     NStr::eTrunc_Both ), tsEmpty    );
4269     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsEmpty,     NStr::eTrunc_Begin), tsEmpty    );
4270     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsEmpty,     NStr::eTrunc_End  ), tsEmpty    );
4271     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsEmpty,     NStr::eTrunc_Both ), tsEmpty    );
4272     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sEmpty,      NStr::eTrunc_Begin), sEmpty     );
4273     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sEmpty,      NStr::eTrunc_End  ), sEmpty     );
4274     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sEmpty,      NStr::eTrunc_Both ), sEmpty     );
4275 
4276     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szSpaces,    NStr::eTrunc_Begin), tsEmpty    );
4277     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szSpaces,    NStr::eTrunc_End  ), tsEmpty    );
4278     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szSpaces,    NStr::eTrunc_Both ), tsEmpty    );
4279     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsSpaces,    NStr::eTrunc_Begin), tsEmpty    );
4280     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsSpaces,    NStr::eTrunc_End  ), tsEmpty    );
4281     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsSpaces,    NStr::eTrunc_Both ), tsEmpty    );
4282     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sSpaces,     NStr::eTrunc_Begin), sEmpty     );
4283     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sSpaces,     NStr::eTrunc_End  ), sEmpty     );
4284     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sSpaces,     NStr::eTrunc_Both ), sEmpty     );
4285 
4286     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szTrunc,     NStr::eTrunc_Begin), tsTrunc    );
4287     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szTrunc,     NStr::eTrunc_End  ), tsTrunc    );
4288     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szTrunc,     NStr::eTrunc_Both ), tsTrunc    );
4289     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsTrunc,     NStr::eTrunc_Begin), tsTrunc    );
4290     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsTrunc,     NStr::eTrunc_End  ), tsTrunc    );
4291     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsTrunc,     NStr::eTrunc_Both ), tsTrunc    );
4292     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sTrunc,      NStr::eTrunc_Begin), sTrunc     );
4293     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sTrunc,      NStr::eTrunc_End  ), sTrunc     );
4294     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sTrunc,      NStr::eTrunc_Both ), sTrunc     );
4295 
4296     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szBegSpace,  NStr::eTrunc_Begin), tsTrunc    );
4297     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szBegSpace,  NStr::eTrunc_End  ), tsBegSpace );
4298     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szBegSpace,  NStr::eTrunc_Both ), tsTrunc    );
4299     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsBegSpace,  NStr::eTrunc_Begin), tsTrunc    );
4300     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsBegSpace,  NStr::eTrunc_End  ), tsBegSpace );
4301     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsBegSpace,  NStr::eTrunc_Both ), tsTrunc    );
4302     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sBegSpace,   NStr::eTrunc_Begin), sTrunc     );
4303     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sBegSpace,   NStr::eTrunc_End  ), sBegSpace  );
4304     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sBegSpace,   NStr::eTrunc_Both ), sTrunc     );
4305 
4306     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szEndSpace,  NStr::eTrunc_Begin), tsEndSpace );
4307     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szEndSpace,  NStr::eTrunc_End  ), tsTrunc    );
4308     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szEndSpace,  NStr::eTrunc_Both ), tsTrunc    );
4309     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsEndSpace,  NStr::eTrunc_Begin), tsEndSpace );
4310     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsEndSpace,  NStr::eTrunc_End  ), tsTrunc    );
4311     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsEndSpace,  NStr::eTrunc_Both ), tsTrunc    );
4312     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sEndSpace,   NStr::eTrunc_Begin), sEndSpace  );
4313     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sEndSpace,   NStr::eTrunc_End  ), sTrunc     );
4314     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sEndSpace,   NStr::eTrunc_Both ), sTrunc     );
4315 
4316     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szBothSpace, NStr::eTrunc_Begin), tsEndSpace );
4317     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szBothSpace, NStr::eTrunc_End  ), tsBegSpace );
4318     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(szBothSpace, NStr::eTrunc_Both ), tsTrunc    );
4319     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsBothSpace, NStr::eTrunc_Begin), tsEndSpace );
4320     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsBothSpace, NStr::eTrunc_End  ), tsBegSpace );
4321     BOOST_CHECK_EQUAL( NStr::TruncateSpaces_Unsafe(tsBothSpace, NStr::eTrunc_Both ), tsTrunc    );
4322     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sBothSpace,  NStr::eTrunc_Begin), sEndSpace  );
4323     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sBothSpace,  NStr::eTrunc_End  ), sBegSpace  );
4324     BOOST_CHECK_EQUAL( NStr::TruncateSpaces(sBothSpace,  NStr::eTrunc_Both ), sTrunc     );
4325 
4326 // http://unicode.org/charts/uca/chart_Whitespace.html
4327 // http://en.wikipedia.org/wiki/Whitespace_character
4328     TUnicodeSymbol ws[] = {0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x20, 0x85, 0xA0, 0x1680, 0x180E,
4329     0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A,
4330     0x2028, 0x2029, 0x202F, 0x205F, 0x3000, 0 };
4331     int i;
4332     cout << "Testing iswspace" << endl;
4333     for (i=0; ws[i]; ++i) {
4334         if (iswspace(ws[i]) == 0) {
4335             cout << "WARNING: wide char " << hex << ws[i] << " is not whitespace" << endl;
4336         }
4337     }
4338     cout << "Testing CStringUTF8::IsWhiteSpace" << endl;
4339     for (i=0; ws[i]; ++i) {
4340         BOOST_CHECK( CUtf8::IsWhiteSpace(ws[i]) );
4341     }
4342     for (TUnicodeSymbol t=1; t<=0x3000; ++t) {
4343         if (CUtf8::IsWhiteSpace(t)) {
4344             bool found=false;
4345             for (i=0; !found && ws[i]; ++i) {
4346                 found = ws[i] == t;
4347             }
4348             BOOST_CHECK(found);
4349         }
4350     }
4351 
4352 #if defined(HAVE_WSTRING)
4353     wstring wss;
4354 #else
4355     TStringUnicode wss;
4356 #endif
4357 
4358 // CUtf8
4359     wss.erase().append(1, 0x2003).append(1, 0x2004).append(1, 0x2028).append(1, 0x205F);
4360     CStringUTF8 u8 = CUtf8::AsUTF8(wss);
4361     BOOST_CHECK( CUtf8::TruncateSpacesInPlace(u8,NStr::eTrunc_Begin).empty() );
4362     u8 = CUtf8::AsUTF8(wss);
4363     BOOST_CHECK( CUtf8::TruncateSpacesInPlace(u8,NStr::eTrunc_End).empty() );
4364     u8 = CUtf8::AsUTF8(wss);
4365     BOOST_CHECK( CUtf8::TruncateSpacesInPlace(u8).empty() );
4366 
4367     wss.append(1,0x61).append(1,0x62);
4368     u8 = CUtf8::AsUTF8(wss);
4369     BOOST_CHECK_EQUAL( CUtf8::TruncateSpacesInPlace(u8), "ab" );
4370 
4371     wss.append(1, 0x2028).append(1, 0x205F).append(1,0x0D);
4372     u8 = CUtf8::AsUTF8(wss);
4373     BOOST_CHECK_EQUAL( CUtf8::TruncateSpacesInPlace(u8), "ab" );
4374     u8 = CUtf8::AsUTF8(wss);
4375     BOOST_CHECK_EQUAL( CUtf8::TruncateSpaces(u8), "ab" );
4376 }
4377 
4378 
4379 //----------------------------------------------------------------------------
4380 // NStr::Trim[Pre|Suf]fix*()
4381 //----------------------------------------------------------------------------
4382 
BOOST_AUTO_TEST_CASE(s_TrimPrefixSuffix)4383 BOOST_AUTO_TEST_CASE(s_TrimPrefixSuffix)
4384 {
4385     const char*       cStr = "some long string";
4386     const string      sStr = cStr;
4387     const CTempString tStr(cStr);
4388 
4389     string str, s;
4390     CTempString tstr, tres;
4391 
4392     // NStr::Case
4393 
4394     str = cStr;
4395     {{
4396         tres = NStr::TrimPrefix_Unsafe(cStr, "Some");
4397         NStr::TrimPrefixInPlace(str, "Some");
4398         BOOST_CHECK_EQUAL(str, string(cStr));
4399         BOOST_CHECK_EQUAL(str, tres);
4400 
4401         tres = NStr::TrimPrefix_Unsafe(cStr, "SOME");
4402         NStr::TrimPrefixInPlace(str, "SOME");
4403         BOOST_CHECK_EQUAL(str, string(cStr));
4404         BOOST_CHECK_EQUAL(str, tres);
4405 
4406         tres = NStr::TrimSuffix_Unsafe(cStr, "STRING");
4407         NStr::TrimSuffixInPlace(str, "STRING");
4408         BOOST_CHECK_EQUAL(str, string(cStr));
4409         BOOST_CHECK_EQUAL(str, tres);
4410     }}
4411     {{
4412         s = str;
4413         tres = NStr::TrimPrefix_Unsafe(s, "some ");
4414         NStr::TrimPrefixInPlace(str, "some ");
4415         BOOST_CHECK_EQUAL(str, string("long string"));
4416         BOOST_CHECK_EQUAL(str, tres);
4417 
4418         s = str;
4419         tres = NStr::TrimSuffix_Unsafe(s, " string");
4420         NStr::TrimSuffixInPlace(str, " string");
4421         BOOST_CHECK_EQUAL(str, string("long"));
4422         BOOST_CHECK_EQUAL(str, tres);
4423     }}
4424 
4425     tstr = cStr;
4426     {{
4427         s = tstr;
4428         tres = NStr::TrimPrefix_Unsafe(s, "Some");
4429         NStr::TrimPrefixInPlace(tstr, "Some");
4430         BOOST_CHECK_EQUAL(tstr, string(cStr));
4431         BOOST_CHECK_EQUAL(tstr, tres);
4432 
4433         s = tstr;
4434         tres = NStr::TrimPrefix_Unsafe(s, "SOME");
4435         NStr::TrimPrefixInPlace(tstr, "SOME");
4436         BOOST_CHECK_EQUAL(tstr, string(cStr));
4437         BOOST_CHECK_EQUAL(tstr, tres);
4438 
4439         s = tstr;
4440         tres = NStr::TrimSuffix_Unsafe(s, "STRING");
4441         NStr::TrimSuffixInPlace(str, "STRING");
4442         BOOST_CHECK_EQUAL(tstr, string(cStr));
4443         BOOST_CHECK_EQUAL(tstr, tres);
4444     }}
4445     {{
4446         s = tstr;
4447         tres = NStr::TrimPrefix_Unsafe(s, "some ");
4448         NStr::TrimPrefixInPlace(tstr, "some ");
4449         BOOST_CHECK_EQUAL(tstr, string("long string"));
4450         BOOST_CHECK_EQUAL(tstr, tres);
4451 
4452         s = tstr;
4453         tres = NStr::TrimSuffix_Unsafe(s, " string");
4454         NStr::TrimSuffixInPlace(tstr, " string");
4455         BOOST_CHECK_EQUAL(tstr, string("long"));
4456         BOOST_CHECK_EQUAL(tstr, tres);
4457     }}
4458 
4459     // NStr::eNocase
4460 
4461     str = cStr;
4462     {{
4463         s = str;
4464         tres = NStr::TrimPrefix_Unsafe(s, "Some ", NStr::eNocase);
4465         NStr::TrimPrefixInPlace(str, "Some ", NStr::eNocase);
4466         BOOST_CHECK_EQUAL(str, string("long string"));
4467         BOOST_CHECK_EQUAL(str, tres);
4468 
4469         s = str;
4470         tres = NStr::TrimSuffix_Unsafe(s, " STRING", NStr::eNocase);
4471         NStr::TrimSuffixInPlace(str, " STRING", NStr::eNocase);
4472         BOOST_CHECK_EQUAL(str, string("long"));
4473         BOOST_CHECK_EQUAL(str, tres);
4474     }}
4475 
4476     tstr = cStr;
4477     {{
4478         s = tstr;
4479         tres = NStr::TrimPrefix_Unsafe(s, "Some ", NStr::eNocase);
4480         NStr::TrimPrefixInPlace(tstr, "Some ", NStr::eNocase);
4481         BOOST_CHECK_EQUAL(tstr, string("long string"));
4482         BOOST_CHECK_EQUAL(tstr, tres);
4483 
4484         s = tstr;
4485         tres = NStr::TrimSuffix_Unsafe(s, " STRING", NStr::eNocase);
4486         NStr::TrimSuffixInPlace(tstr, " STRING", NStr::eNocase);
4487         BOOST_CHECK_EQUAL(tstr, string("long"));
4488         BOOST_CHECK_EQUAL(tstr, tres);
4489     }}
4490 }
4491 
4492 
4493 //----------------------------------------------------------------------------
4494 // NStr::GetField()
4495 //----------------------------------------------------------------------------
4496 
BOOST_AUTO_TEST_CASE(s_GetField)4497 BOOST_AUTO_TEST_CASE(s_GetField)
4498 {
4499     BOOST_CHECK_EQUAL( NStr::GetField(NULL, 17, "not important"), string() );
4500     BOOST_CHECK_EQUAL( NStr::GetField("", 0, ":"), string() );
4501     BOOST_CHECK_EQUAL( NStr::GetField("", 10, ":"), string() );
4502     BOOST_CHECK_EQUAL( NStr::GetField("one", 0, ":"), string( "one" ) );
4503     BOOST_CHECK_EQUAL( NStr::GetField("one:", 0, ":"), string( "one" ) );
4504     BOOST_CHECK_EQUAL( NStr::GetField("one: two", 0, ":"), string( "one" ) );
4505     BOOST_CHECK_EQUAL( NStr::GetField("one: two", 1, ":"), string( " two" ) );
4506     BOOST_CHECK_EQUAL( NStr::GetField("one: two", 1, "-.:;"), string( " two" ) );
4507     BOOST_CHECK_EQUAL( NStr::GetField("one: two", 1, "-.:"), string( " two" ) );
4508     BOOST_CHECK_EQUAL( NStr::GetField("one::two", 1, "-.:;"), string() );
4509     BOOST_CHECK_EQUAL( NStr::GetField("one::two", 176, "-.:;"), string() );
4510 
4511     BOOST_CHECK_EQUAL( NStr::GetField(NULL, 17, "not important", NStr::eMergeDelims), string() );
4512     BOOST_CHECK_EQUAL( NStr::GetField("", 0, ":", NStr::eMergeDelims), string() );
4513     BOOST_CHECK_EQUAL( NStr::GetField("", 10, ":", NStr::eMergeDelims), string() );
4514     BOOST_CHECK_EQUAL( NStr::GetField("one", 0, ":", NStr::eMergeDelims), string( "one" ) );
4515     BOOST_CHECK_EQUAL( NStr::GetField("one:", 0, ":", NStr::eMergeDelims), string( "one" ) );
4516     BOOST_CHECK_EQUAL( NStr::GetField("one::: two", 0, ":", NStr::eMergeDelims), string( "one" ) );
4517     BOOST_CHECK_EQUAL( NStr::GetField("one:::two:", 1, ":", NStr::eMergeDelims), string( "two" ) );
4518     BOOST_CHECK_EQUAL( NStr::GetField("one::-:two::", 1, "-.:;", NStr::eMergeDelims), string( "two" ) );
4519     BOOST_CHECK_EQUAL( NStr::GetField("one:two", 1, "-.:", NStr::eMergeDelims), string( "two" ) );
4520     BOOST_CHECK_EQUAL( NStr::GetField("one.two.", 1, "-.:;", NStr::eMergeDelims), string( "two" ) );
4521     BOOST_CHECK_EQUAL( NStr::GetField("one::two", 176, "-.:;", NStr::eMergeDelims), string() );
4522 }
4523 
BOOST_AUTO_TEST_CASE(s_GetField_SingleDilimiter)4524 BOOST_AUTO_TEST_CASE(s_GetField_SingleDilimiter)
4525 {
4526     BOOST_CHECK_EQUAL( NStr::GetField(NULL, 17, 'n'), string() );
4527     BOOST_CHECK_EQUAL( NStr::GetField("", 0, ':'), string() );
4528     BOOST_CHECK_EQUAL( NStr::GetField("", 10, ':'), string() );
4529     BOOST_CHECK_EQUAL( NStr::GetField("one", 0, ':'), string( "one" ) );
4530     BOOST_CHECK_EQUAL( NStr::GetField("one:", 0, ':'), string( "one" ) );
4531     BOOST_CHECK_EQUAL( NStr::GetField("one: two", 0, ':'), string( "one" ) );
4532     BOOST_CHECK_EQUAL( NStr::GetField("one: two", 1, ':'), string( " two" ) );
4533     BOOST_CHECK_EQUAL( NStr::GetField("one::two", 1, ':'), string() );
4534     BOOST_CHECK_EQUAL( NStr::GetField("one::two", 176, ':'), string() );
4535 
4536     BOOST_CHECK_EQUAL( NStr::GetField(NULL, 17, 'n', NStr::eMergeDelims), string() );
4537     BOOST_CHECK_EQUAL( NStr::GetField("", 0, ':', NStr::eMergeDelims), string() );
4538     BOOST_CHECK_EQUAL( NStr::GetField("", 10, ':', NStr::eMergeDelims), string() );
4539     BOOST_CHECK_EQUAL( NStr::GetField("one", 0, ':', NStr::eMergeDelims), string( "one" ) );
4540     BOOST_CHECK_EQUAL( NStr::GetField("one:", 0, ':', NStr::eMergeDelims), string( "one" ) );
4541     BOOST_CHECK_EQUAL( NStr::GetField("one::: two", 0, ':', NStr::eMergeDelims), string( "one" ) );
4542     BOOST_CHECK_EQUAL( NStr::GetField("one:::two:", 1, ':', NStr::eMergeDelims), string( "two" ) );
4543     BOOST_CHECK_EQUAL( NStr::GetField("one::-:two::", 2, ':', NStr::eMergeDelims), string( "two" ) );
4544     BOOST_CHECK_EQUAL( NStr::GetField("one:two", 1, ':', NStr::eMergeDelims), string( "two" ) );
4545     BOOST_CHECK_EQUAL( NStr::GetField("one.two.", 1, '.', NStr::eMergeDelims), string( "two" ) );
4546     BOOST_CHECK_EQUAL( NStr::GetField("one::two", 176, ':', NStr::eMergeDelims), string() );
4547 }
4548 
BOOST_AUTO_TEST_CASE(s_GetField_Unsafe)4549 BOOST_AUTO_TEST_CASE(s_GetField_Unsafe)
4550 {
4551     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe(NULL, 17, "not important"), string() );
4552     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("", 0, ":"), string() );
4553     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("", 10, ":"), string() );
4554     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one", 0, ":"), string( "one" ) );
4555     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one:", 0, ":"), string( "one" ) );
4556     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one: two", 0, ":"), string( "one" ) );
4557     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one: two", 1, ":"), string( " two" ) );
4558     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one: two", 1, "-.:;"), string( " two" ) );
4559     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one: two", 1, "-.:"), string( " two" ) );
4560     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one::two", 1, "-.:;"), string() );
4561     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one::two", 176, "-.:;"), string() );
4562 
4563     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe(NULL, 17, "not important", NStr::eMergeDelims), string() );
4564     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("", 0, ":", NStr::eMergeDelims), string() );
4565     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("", 10, ":", NStr::eMergeDelims), string() );
4566     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one", 0, ":", NStr::eMergeDelims), string( "one" ) );
4567     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one:", 0, ":", NStr::eMergeDelims), string( "one" ) );
4568     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one::: two", 0, ":", NStr::eMergeDelims), string( "one" ) );
4569     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one:::two:", 1, ":", NStr::eMergeDelims), string( "two" ) );
4570     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one::-:two::", 1, "-.:;", NStr::eMergeDelims), string( "two" ) );
4571     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one:two", 1, "-.:", NStr::eMergeDelims), string( "two" ) );
4572     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one.two.", 1, "-.:;", NStr::eMergeDelims), string( "two" ) );
4573     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one::two", 176, "-.:;", NStr::eMergeDelims), string() );
4574 }
4575 
BOOST_AUTO_TEST_CASE(s_GetField_SingleDilimiter_Unsafe)4576 BOOST_AUTO_TEST_CASE(s_GetField_SingleDilimiter_Unsafe)
4577 {
4578     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe(NULL, 17, 'n'), string() );
4579     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("", 0, ':'), string() );
4580     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("", 10, ':'), string() );
4581     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one", 0, ':'), string( "one" ) );
4582     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one:", 0, ':'), string( "one" ) );
4583     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one: two", 0, ':'), string( "one" ) );
4584     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one:two", 1, ':'), string( "two" ) );
4585     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one:two:", 1, ':'), string( "two" ) );
4586     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one::two", 1, ':'), string() );
4587     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one::two", 176, ':'), string() );
4588 
4589     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe(NULL, 17, 'n', NStr::eMergeDelims), string() );
4590     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("", 0, ':', NStr::eMergeDelims), string() );
4591     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("", 10, ':', NStr::eMergeDelims), string() );
4592     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one", 0, ':', NStr::eMergeDelims), string( "one" ) );
4593     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one:", 0, ':', NStr::eMergeDelims), string( "one" ) );
4594     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one::: two", 0, ':', NStr::eMergeDelims), string( "one" ) );
4595     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one:::two:", 1, ':', NStr::eMergeDelims), string( "two" ) );
4596     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one::-:two::", 2, ':', NStr::eMergeDelims), string( "two" ) );
4597     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one:two", 1, ':', NStr::eMergeDelims), string( "two" ) );
4598     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one.two.", 1, '.', NStr::eMergeDelims), string( "two" ) );
4599     BOOST_CHECK_EQUAL( NStr::GetField_Unsafe("one::two", 176, ':', NStr::eMergeDelims), string() );
4600 }
4601 
4602 
4603 //----------------------------------------------------------------------------
4604 // NStr::SQLEncode()
4605 //----------------------------------------------------------------------------
4606 
BOOST_AUTO_TEST_CASE(s_SQLEncode)4607 BOOST_AUTO_TEST_CASE(s_SQLEncode)
4608 {
4609     BOOST_CHECK_EQUAL( NStr::SQLEncode(
4610         CUtf8::AsUTF8("should not be touched",eEncoding_ISO8859_1),
4611         NStr::eSqlEnc_TagNonASCII),
4612         CUtf8::AsUTF8("'should not be touched'",eEncoding_ISO8859_1) );
4613     BOOST_CHECK_EQUAL(NStr::SQLEncode(CUtf8::AsUTF8("", eEncoding_ISO8859_1),
4614                                       NStr::eSqlEnc_TagNonASCII),
4615                       CUtf8::AsUTF8("''",eEncoding_ISO8859_1));
4616     BOOST_CHECK_EQUAL(NStr::SQLEncode(CUtf8::AsUTF8("'", eEncoding_ISO8859_1),
4617                                       NStr::eSqlEnc_TagNonASCII),
4618                       CUtf8::AsUTF8("''''",eEncoding_ISO8859_1));
4619     BOOST_CHECK_EQUAL(NStr::SQLEncode(CUtf8::AsUTF8("\\'",eEncoding_ISO8859_1),
4620                                       NStr::eSqlEnc_TagNonASCII),
4621                       CUtf8::AsUTF8("'\\'''",eEncoding_ISO8859_1));
4622     BOOST_CHECK_EQUAL(NStr::SQLEncode(CUtf8::AsUTF8("'a", eEncoding_ISO8859_1),
4623                                       NStr::eSqlEnc_TagNonASCII),
4624                       CUtf8::AsUTF8("'''a'",eEncoding_ISO8859_1));
4625     BOOST_CHECK_EQUAL(NStr::SQLEncode(CUtf8::AsUTF8("a'", eEncoding_ISO8859_1),
4626                                       NStr::eSqlEnc_TagNonASCII),
4627                       CUtf8::AsUTF8("'a'''",eEncoding_ISO8859_1));
4628     BOOST_CHECK_EQUAL( NStr::SQLEncode(
4629                            CUtf8::AsUTF8("`1234567890-=~!@#$%^&*()_+qwertyuiop[]\\asdfghjkl;zxcvbnm,./QWERTYUIOP{}|ASDFGHJKL:\"ZXCVBNM<>?",
4630                                          eEncoding_ISO8859_1),
4631                            NStr::eSqlEnc_TagNonASCII),
4632         CUtf8::AsUTF8("'`1234567890-=~!@#$%^&*()_+qwertyuiop[]\\asdfghjkl;zxcvbnm,./QWERTYUIOP{}|ASDFGHJKL:\"ZXCVBNM<>?'",eEncoding_ISO8859_1) );
4633 
4634     const unsigned char s_UpperHalf[] = {
4635         0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
4636         0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
4637         0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
4638         0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
4639         0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
4640         0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
4641         0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
4642         0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
4643     };
4644 
4645     const unsigned char s_Expected[] = {
4646   '\'', 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
4647         0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
4648         0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
4649         0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
4650         0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
4651         0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
4652         0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
4653         0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, '\''
4654     };
4655 
4656     CStringUTF8      upperHalf( CUtf8::AsUTF8(CTempString((char*)s_UpperHalf, 128),eEncoding_ISO8859_1 ));
4657     CStringUTF8      expected( CUtf8::AsUTF8( CTempString((char*)s_Expected, 130),eEncoding_ISO8859_1 ));
4658 
4659     BOOST_CHECK_EQUAL(NStr::SQLEncode(upperHalf, NStr::eSqlEnc_Plain),
4660                       expected);
4661     BOOST_CHECK_EQUAL(NStr::SQLEncode(upperHalf, NStr::eSqlEnc_TagNonASCII),
4662                       'N' + expected);
4663 }
4664 
4665 
4666 //----------------------------------------------------------------------------
4667 // NStr::StrigToNum() speed test
4668 //----------------------------------------------------------------------------
4669 
BOOST_AUTO_TEST_CASE(s_StringToInt_Speed)4670 BOOST_AUTO_TEST_CASE(s_StringToInt_Speed)
4671 {
4672     const int COUNT = 10000000;
4673     const int TESTS = 6;
4674     const string ss[TESTS] = { "", "0", "1", "12345", "1234567890", "TRACE" };
4675     const int ssr[TESTS] = { -1, 0, 1, 12345, 1234567890, -1 };
4676 
4677     for ( int t = 0; t < TESTS; ++t )
4678     {
4679         int v = NStr::StringToNumeric<int>(ss[t], NStr::fConvErr_NoThrow);
4680         if ( !v && errno ) v = -1;
4681         if ( v != ssr[t] ) Abort();
4682 
4683         errno = 0;
4684         Uint8 v8 = NStr::StringToUInt8(ss[t], NStr::fConvErr_NoThrow);
4685         v = (int)v8;
4686         if ( !v8 && errno ) v = -1;
4687         if ( v != ssr[t] ) Abort();
4688 
4689         try {
4690             v = (int)NStr::StringToNumeric<Uint8>(ss[t]);
4691         }
4692         catch ( exception& ) {
4693             v = -1;
4694         }
4695         if ( v != ssr[t] ) Abort();
4696         try {
4697             v = (int)NStr::StringToUInt8(ss[t]);
4698         }
4699         catch ( exception& ) {
4700             v = -1;
4701         }
4702         if ( v != ssr[t] ) Abort();
4703     }
4704     for ( int t = 0; t < TESTS; ++t ) {
4705         CTempString s = ss[t];
4706         CStopWatch sw;
4707         double time;
4708 
4709         if ( 1 ) {
4710             sw.Restart();
4711             for ( int i = 0; i < COUNT; ++i ) {
4712                 NStr::StringToNumeric<int>(ss[t], NStr::fConvErr_NoThrow);
4713             }
4714             time = sw.Elapsed();
4715             LOG_POST("StringToNumeric<int>("<<ss[t]<<") time: " << time);
4716         }
4717         if ( 1 ) {
4718             sw.Restart();
4719             for ( int i = 0; i < COUNT; ++i ) {
4720                 NStr::StringToUInt8(s, NStr::fConvErr_NoThrow);
4721             }
4722             time = sw.Elapsed();
4723             LOG_POST("StringToInt8("<<ss[t]<<") time: " << time);
4724         }
4725         if ( 0 ) {
4726             sw.Restart();
4727             for ( int i = 0; i < COUNT; ++i ) {
4728                 int v;
4729                 try {
4730                     v = (int)NStr::StringToUInt8(s);
4731                 }
4732                 catch ( exception& ) {
4733                     v = -1;
4734                 }
4735                 if ( v != ssr[t] ) Abort();
4736             }
4737             time = sw.Elapsed();
4738             LOG_POST("StringToInt8("<<ss[t]<<") time: " << time);
4739         }
4740     }
4741 }
4742 
4743 
BOOST_AUTO_TEST_CASE(s_StringToDouble_Speed)4744 BOOST_AUTO_TEST_CASE(s_StringToDouble_Speed)
4745 {
4746     const int COUNT = 10000000;
4747     const string ss[] = {
4748         "", "0", "1", "12", "123", "123456789", "1234567890", "TRACE",
4749         "0e9", "1e9",
4750         "1.234567890123456789e300", "-1.234567890123456789e-300",
4751         "1.234567890123456789e200", "-1.234567890123456789e-200"
4752     };
4753     const double ssr[] = {
4754         -1, 0, 1, 12, 123, 123456789, 1234567890, -1,
4755         0, 1e9,
4756         1.234567890123456789e300, -1.234567890123456789e-300,
4757         1.234567890123456789e200, -1.234567890123456789e-200
4758     };
4759     double ssr_min[sizeof(ssr)/sizeof(ssr[0])];
4760     double ssr_max[sizeof(ssr)/sizeof(ssr[0])];
4761     const size_t TESTS = ArraySize(ss);
4762 
4763     int flags = NStr::fConvErr_NoThrow|NStr::fAllowLeadingSpaces;
4764     double v;
4765     for ( size_t t = 0; t < TESTS; ++t ) {
4766         if ( 1 ) {
4767             double r_min = ssr[t], r_max = r_min;
4768             if ( r_min < 0 ) {
4769                 r_min *= 1+1e-15;
4770                 r_max *= 1-1e-15;
4771             }
4772             else {
4773                 r_min *= 1-1e-15;
4774                 r_max *= 1+1e-15;
4775             }
4776             ssr_min[t] = r_min;
4777             ssr_max[t] = r_max;
4778         }
4779         if ( 1 ) {
4780             errno = 0;
4781             v = NStr::StringToDouble(ss[t], flags|NStr::fDecimalPosix);
4782             if ( errno ) v = -1;
4783             if ( v < ssr_min[t] || v > ssr_max[t] )
4784                 ERR_FATAL(v<<" != "<<ssr[t]<<" for \"" << ss[t] << "\"");
4785         }
4786 
4787         if ( 1 ) {
4788             errno = 0;
4789             v = NStr::StringToDouble(ss[t], flags);
4790             if ( errno ) v = -1;
4791             if ( v < ssr_min[t] || v > ssr_max[t] )
4792                 ERR_FATAL(v<<" != "<<ssr[t]<<" for \"" << ss[t] << "\"");
4793         }
4794 
4795         if ( 1 ) {
4796             errno = 0;
4797             char* errptr;
4798             v = NStr::StringToDoublePosix(ss[t].c_str(), &errptr);
4799             if ( errno || (errptr&&(*errptr||errptr==ss[t].c_str())) ) v = -1;
4800             if ( v < ssr_min[t] || v > ssr_max[t] )
4801                 ERR_FATAL(v<<" != "<<ssr[t]<<" for \"" << ss[t] << "\"");
4802         }
4803 
4804         if ( 1 ) {
4805             errno = 0;
4806             char* errptr;
4807             v = strtod(ss[t].c_str(), &errptr);
4808             if ( errno || (errptr&&(*errptr||errptr==ss[t].c_str())) ) v = -1;
4809             if ( v < ssr_min[t] || v > ssr_max[t] )
4810                 ERR_FATAL(v<<" != "<<ssr[t]<<" for \"" << ss[t] << "\"");
4811         }
4812     }
4813     for ( size_t t = 0; t < TESTS; ++t ) {
4814         string s1 = ss[t];
4815         CTempStringEx s = ss[t];
4816         const char* s2 = ss[t].c_str();
4817         CStopWatch sw;
4818         double time;
4819 
4820         if ( 1 ) {
4821             sw.Restart();
4822             for ( int i = 0; i < COUNT; ++i ) {
4823                 NStr::StringToDouble(s, flags|NStr::fDecimalPosix);
4824             }
4825             time = sw.Elapsed();
4826             LOG_POST("StringToDouble("<<ss[t]<<", Posix) time: " << time);
4827         }
4828         if ( 1 ) {
4829             sw.Restart();
4830             for ( int i = 0; i < COUNT; ++i ) {
4831                 NStr::StringToDouble(s, flags);
4832             }
4833             time = sw.Elapsed();
4834             LOG_POST("StringToDouble("<<ss[t]<<") time: " << time);
4835         }
4836         if ( 1 ) {
4837             sw.Restart();
4838             for ( int i = 0; i < COUNT; ++i ) {
4839                 char* errptr;
4840                 NStr::StringToDoublePosix(s2, &errptr);
4841             }
4842             time = sw.Elapsed();
4843             LOG_POST("StringToDoublePosix("<<ss[t]<<") time: " << time);
4844         }
4845         if ( 1 ) {
4846             sw.Restart();
4847             for ( int i = 0; i < COUNT; ++i ) {
4848                 char* errptr;
4849                 _no_warning(strtod(s2, &errptr));
4850             }
4851             time = sw.Elapsed();
4852             LOG_POST("strtod("<<ss[t]<<") time: " << time);
4853         }
4854     }
4855 }
4856 
4857 
4858 static const string s_ShellStr[] = {
4859     "abc",            // normal string, no encoding
4860     "ab\acd",         // non-printable chars, need BASH encoding
4861     "ab\ncd",         // EOL in the string
4862     "ab cd\tef",      // spaces - need quotes
4863     "ab!{}cd?*",      // more special chars which need quotes
4864     "ab'cd'ef",       // single quote - use double quotes around the string
4865     "ab ' cd ' ef",   // the same with extra-spaces
4866     "ab'$cd",         // additional chars ($, \) - can not use double quotes
4867     "\"ab cd ef\"",   // double quotes - use single quotes around the string
4868     "ab\\cd",         // backslash
4869     "ab\\cd'ef",      // backslash with single quote
4870     "ab\\cd\"ef",     // backslash with double quote
4871     "ab'\\cd\"$ef",   // ', ", \, $
4872     "",               // empty string
4873     "''",             // empty single quotes
4874     "\"\""            // empty double quotes
4875 };
4876 
4877 
4878 #ifdef NCBI_OS_UNIX
BOOST_AUTO_TEST_CASE(s_ShellEncode)4879 BOOST_AUTO_TEST_CASE(s_ShellEncode)
4880 {
4881     string echo_file = CFile::GetTmpName(CFile::eTmpFileCreate);
4882     string cmd_file = "./echo.sh";
4883 
4884     {{
4885         CNcbiOfstream out(cmd_file.c_str());
4886         out << "#! /usr/bin/env bash" << endl;
4887 
4888         for (size_t i = 0;  i < sizeof(s_ShellStr) / sizeof(s_ShellStr[0]);  i++) {
4889             string cmd = "echo -E ";
4890             cmd += NStr::ShellEncode(s_ShellStr[i]);
4891             cmd += " >> ";
4892             cmd += echo_file;
4893            out << cmd << endl;
4894         }
4895     }}
4896     CFile(cmd_file).SetMode(CFile::fDefaultUser | CFile::fExecute);
4897     BOOST_CHECK_EQUAL(CExec::System(cmd_file.c_str()), 0);
4898 
4899     CNcbiIfstream in(echo_file.c_str());
4900     string s, line;
4901     for (size_t i = 0;  i < sizeof(s_ShellStr) / sizeof(s_ShellStr[0]);  i++) {
4902         s.clear();
4903         size_t eol_pos = 0;
4904         do {
4905             getline(in, line);
4906             s.append(line);
4907             eol_pos = s_ShellStr[i].find('\n', eol_pos);
4908             if (eol_pos != NPOS) {
4909                 eol_pos++;
4910                 s.append("\n");
4911             }
4912         } while (eol_pos != NPOS);
4913         BOOST_CHECK_EQUAL(s_ShellStr[i], s);
4914     }
4915     CFile(echo_file).Remove();
4916     CFile(cmd_file).Remove();
4917 }
4918 #endif
4919 
BOOST_AUTO_TEST_CASE(s_StringJoin)4920 BOOST_AUTO_TEST_CASE(s_StringJoin)
4921 {
4922     string result("one,two,three"), resultN("1,2,3");
4923     stringstream iss("one two three");
4924     istream_iterator<string> it(iss);
4925     BOOST_CHECK_EQUAL(result, NStr::Join(it, istream_iterator<string>(), ","));
4926 
4927     stringstream iss1("1 2 3");
4928     BOOST_CHECK_EQUAL(resultN, NStr::JoinNumeric(istream_iterator<int>(iss1), istream_iterator<int>(), ","));
4929 
4930     list<string> x = {"one", "two", "three"};
4931     BOOST_CHECK_EQUAL(result, NStr::Join(x, ","));
4932     BOOST_CHECK_EQUAL(result, NStr::Join(x.begin(), x.end(), ","));
4933 
4934     initializer_list<string> y = {"one", "two", "three"};
4935     BOOST_CHECK_EQUAL(result, NStr::Join(begin(y), end(y), ","));
4936     BOOST_CHECK_EQUAL(result, NStr::Join(y, ","));
4937     BOOST_CHECK_EQUAL(result, NStr::Join(initializer_list<string>({"one", "two", "three"}), ","));
4938     BOOST_CHECK_EQUAL(result, NStr::Join({"one", "two", "three"}, ","));
4939 
4940     initializer_list<const char*> y2 = {"one", "two", "three"};
4941     BOOST_CHECK_EQUAL(result, NStr::Join(begin(y2), end(y2), ","));
4942     BOOST_CHECK_EQUAL(result, NStr::Join(y2, ","));
4943 
4944     string z[3] = {"one", "two", "three"};
4945     BOOST_CHECK_EQUAL(result, NStr::Join(begin(z), end(z), ","));
4946     BOOST_CHECK_EQUAL(result, NStr::Join(z, ","));
4947 
4948     const char* z2[3] = {"one", "two", "three"};
4949     BOOST_CHECK_EQUAL(result, NStr::Join(begin(z2), end(z2), ","));
4950     BOOST_CHECK_EQUAL(result, NStr::Join(z2, ","));
4951 
4952     map<string, string> m;
4953     m["one"] = "uno";
4954     m["two"] = "dos";
4955     BOOST_CHECK_EQUAL("one:uno",NStr::Join({m.begin()->first, m.begin()->second}, ":"));
4956 #if 0
4957     string jjj = NStr::JoinNumeric( begin(m), end(m), ",");
4958 #endif
4959 
4960     BOOST_CHECK_EQUAL("one:uno,two:dos", NStr::TransformJoin( m.begin(), m.end(), ",",
4961                       [](const map<string, string>::value_type& i){ return NStr::Join( {i.first, i.second}, ":");}));
4962 // using auto in lambdas, requires C++14?
4963 // that is, this might fail to compile, otherwise, it is correct
4964 //    BOOST_CHECK_EQUAL("one:uno,two:dos", NStr::TransformJoin( m.begin(), m.end(), ",", [](const auto& i){ return NStr::Join( {i.first, i.second}, ":");}));
4965 
4966     list<int> mi = {1,2,3};
4967 //    BOOST_CHECK_EQUAL(resultN, NStr::TransformJoin( mi.begin(), mi.end(), ",", [](const auto& i){ return NStr::NumericToString(i);}));
4968     BOOST_CHECK_EQUAL(resultN, NStr::TransformJoin( mi.begin(), mi.end(), ",", [](const int& i){ return NStr::NumericToString(i);}));
4969     BOOST_CHECK_EQUAL(resultN, NStr::JoinNumeric( mi.begin(), mi.end(), ","));
4970 
4971     initializer_list<int> mi2 = {1,2,3};
4972 //    BOOST_CHECK_EQUAL(resultN, NStr::TransformJoin( mi2.begin(), mi2.end(), ",", [](const auto& i){ return NStr::NumericToString(i);}));
4973     BOOST_CHECK_EQUAL(resultN, NStr::TransformJoin( mi2.begin(), mi2.end(), ",", [](const int i){ return NStr::NumericToString(i);}));
4974     BOOST_CHECK_EQUAL(resultN, NStr::JoinNumeric( mi2.begin(), mi2.end(), ","));
4975 
4976     int z3[3] = {1,2,3};
4977 #if 0
4978     string jjj = NStr::Join(begin(z3), end(z3), ",");
4979     jjj = NStr::Join(z3, ",");
4980 #endif
4981     BOOST_CHECK_EQUAL(resultN, NStr::TransformJoin( begin(z3), end(z3), ",", [](const int& i){ return NStr::NumericToString(i);}));
4982     BOOST_CHECK_EQUAL(resultN, NStr::JoinNumeric( begin(z3), end(z3), ","));
4983 
4984     list<CTime> t1;
4985     t1.push_back( CTime(CTime::eCurrent, CTime::eLocal));
4986     t1.push_back( CTime(CTime::eCurrent, CTime::eUTC));
4987     t1.push_back( CTime(CTime::eCurrent, CTime::eGmt));
4988     string j = NStr::Join(t1, ",");
4989 //    j = NStr::TransformJoin( t1.begin(), t1.end(), ",", [](const auto& i){ return i.AsString();});
4990     j = NStr::TransformJoin( t1.begin(), t1.end(), ",", [](const CTime& i){ return i.AsString();});
4991     j = NStr::Join({CTime(CTime::eCurrent, CTime::eLocal), CTime(CTime::eCurrent, CTime::eUTC)}, ",");
4992 
4993     CTime arr[2];
4994     arr[0].SetCurrent();
4995     arr[1].SetCurrent();
4996     j = NStr::Join(arr, ",");
4997     j = NStr::Join(begin(arr), end(arr), ",");
4998 }
4999 
BOOST_AUTO_TEST_CASE(s_HtmlEncode)5000 BOOST_AUTO_TEST_CASE(s_HtmlEncode)
5001 {
5002     {
5003         TStringUCS2 wstest = {1050, 1086, 1084, 1072, 1085, 1076, 1099};
5004         string u8 = CUtf8::AsUTF8(wstest);
5005         string u8enc = NStr::HtmlEncode(u8);
5006         string u8dec = NStr::HtmlDecode(u8enc);
5007         TStringUCS2 wsout = CUtf8::AsBasicString<TCharUCS2>(u8dec);
5008         bool eq = wstest == wsout;
5009         BOOST_CHECK(eq);
5010     }
5011     {
5012         string stest = "&Aacute;&scaron;&Gamma;&thinsp;&rang;&#x960;&#x1225;";
5013         string sdec = NStr::HtmlDecode(stest);
5014         string senc = NStr::HtmlEncode(sdec);
5015         BOOST_CHECK_EQUAL(senc, "&#xC1;&#x161;&#x393;&#x2009;&#x232A;&#x960;&#x1225;");
5016         BOOST_CHECK( CUtf8::MatchEncoding(sdec, eEncoding_UTF8));
5017         TStringUCS2    u2s = CUtf8::AsBasicString<TCharUCS2>(sdec);
5018     }
5019     {
5020         string stest = "amp = &, preencoded = &Aacute;&#x960;, &;";
5021         string senc1 = NStr::HtmlEncode(stest, NStr::fHtmlEnc_SkipEntities);
5022         string sdec1 = NStr::HtmlDecode(senc1);
5023         BOOST_CHECK_EQUAL(senc1, "amp = &amp;, preencoded = &Aacute;&amp;#x960;, &amp;;");
5024         BOOST_CHECK_EQUAL(sdec1, "amp = &, preencoded = Á&#x960;, &;");
5025         string senc2 = NStr::HtmlEncode(stest, NStr::fHtmlEnc_EncodeAll);
5026         string sdec2 = NStr::HtmlDecode(senc2);
5027         BOOST_CHECK_EQUAL(senc2, "amp = &amp;, preencoded = &amp;Aacute;&amp;#x960;, &amp;;");
5028         BOOST_CHECK_EQUAL(sdec2, stest);
5029     }
5030 #if 0
5031     {
5032     string input = "this is &#32; but it's not &#33; a &#35; not &#38; a &#43; sign is different from a &#45; you can use &lt; but not &gt; &#63; &Agrave; &Aacute; &Aring; &Eacute; &Eumi; &Ntilde; &Uacute;";
5033     string output = NStr::HtmlDecode(input);
5034     cout << output << endl;
5035     string inagain = NStr::HtmlEncode(output);
5036     cout << inagain << endl;
5037     }
5038 #endif
5039 }
5040 
BOOST_AUTO_TEST_CASE(s_JsonEncode)5041 BOOST_AUTO_TEST_CASE(s_JsonEncode)
5042 {
5043     vector<const char*> plain = {
5044         "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0",
5045         "\xef\xee\xed\xec\xeb\xea\xe9\xe8\xe7\xe6\xe5\xe4\xe3\xe2\xe1\xe0",
5046         "\xdf\xde\xdd\xdc\xdb\xda\xd9\xd8\xd7\xd6\xd5\xd4\xd3\xd2\xd1\xd0",
5047         "\xcf\xce\xcd\xcc\xcb\xca\xc9\xc8\xc7\xc6\xc5\xc4\xc3\xc2\xc1\xc0",
5048         "\xbf\xbe\xbd\xbc\xbb\xba\xb9\xb8\xb7\xb6\xb5\xb4\xb3\xb2\xb1\xb0",
5049         "\xaf\xae\xad\xac\xab\xaa\xa9\xa8\xa7\xa6\xa5\xa4\xa3\xa2\xa1\xa0",
5050         "\x9f\x9e\x9d\x9c\x9b\x9a\x99\x98\x97\x96\x95\x94\x93\x92\x91\x90",
5051         "\x8f\x8e\x8d\x8c\x8b\x8a\x89\x88\x87\x86\x85\x84\x83\x82\x81\x80",
5052         "\x7f\x7e\x7d\x7c\x7b\x7a\x79\x78\x77\x76\x75\x74\x73\x72\x71\x70",
5053         "\x6f\x6e\x6d\x6c\x6b\x6a\x69\x68\x67\x66\x65\x64\x63\x62\x61\x60",
5054         "\x5f\x5e\x5d\x5c\x5b\x5a\x59\x58\x57\x56\x55\x54\x53\x52\x51\x50",
5055         "\x4f\x4e\x4d\x4c\x4b\x4a\x49\x48\x47\x46\x45\x44\x43\x42\x41\x40",
5056         "\x3f\x3e\x3d\x3c\x3b\x3a\x39\x38\x37\x36\x35\x34\x33\x32\x31\x30",
5057         "\x2f\x2e\x2d\x2c\x2b\x2a\x29\x28\x27\x26\x25\x24\x23\x22\x21\x20",
5058         "\x1f\x1e\x1d\x1c\x1b\x1a\x19\x18\x17\x16\x15\x14\x13\x12\x11\x10",
5059         "\x0f\x0e\x0d\x0c\x0b\x0a\x09\x08\x07\x06\x05\x04\x03\x02\x01",
5060     };
5061     vector<pair<NStr::EJsonEncode, vector<const char*>>> encoded = {
5062         {
5063             NStr::eJsonEnc_UTF8,
5064             {
5065                 "\\u00ff\\u00fe\\u00fd\\u00fc\\u00fb\\u00fa\\u00f9\\u00f8\\u00f7\\u00f6\\u00f5\\u00f4\\u00f3\\u00f2\\u00f1\\u00f0",
5066                 "\\u00ef\\u00ee\\u00ed\\u00ec\\u00eb\\u00ea\\u00e9\\u00e8\\u00e7\\u00e6\\u00e5\\u00e4\\u00e3\\u00e2\\u00e1\\u00e0",
5067                 "\\u00df\\u00de\\u00dd\\u00dc\\u00db\\u00da\\u00d9\\u00d8\\u00d7\\u00d6\\u00d5\\u00d4\\u00d3\\u00d2\\u00d1\\u00d0",
5068                 "\\u00cf\\u00ce\\u00cd\\u00cc\\u00cb\\u00ca\\u00c9\\u00c8\\u00c7\\u00c6\\u00c5\\u00c4\\u00c3\\u00c2\\u00c1\\u00c0",
5069                 "\\u00bf\\u00be\\u00bd\\u00bc\\u00bb\\u00ba\\u00b9\\u00b8\\u00b7\\u00b6\\u00b5\\u00b4\\u00b3\\u00b2\\u00b1\\u00b0",
5070                 "\\u00af\\u00ae\\u00ad\\u00ac\\u00ab\\u00aa\\u00a9\\u00a8\\u00a7\\u00a6\\u00a5\\u00a4\\u00a3\\u00a2\\u00a1\\u00a0",
5071                 "\\u009f\\u009e\\u009d\\u009c\\u009b\\u009a\\u0099\\u0098\\u0097\\u0096\\u0095\\u0094\\u0093\\u0092\\u0091\\u0090",
5072                 "\\u008f\\u008e\\u008d\\u008c\\u008b\\u008a\\u0089\\u0088\\u0087\\u0086\\u0085\\u0084\\u0083\\u0082\\u0081\\u0080",
5073                 "\x7f\x7e\x7d\x7c\x7b\x7a\x79\x78\x77\x76\x75\x74\x73\x72\x71\x70",
5074                 "\x6f\x6e\x6d\x6c\x6b\x6a\x69\x68\x67\x66\x65\x64\x63\x62\x61\x60",
5075                 "\x5f\x5e\x5d\\\x5c\x5b\x5a\x59\x58\x57\x56\x55\x54\x53\x52\x51\x50",
5076                 "\x4f\x4e\x4d\x4c\x4b\x4a\x49\x48\x47\x46\x45\x44\x43\x42\x41\x40",
5077                 "\x3f\x3e\x3d\x3c\x3b\x3a\x39\x38\x37\x36\x35\x34\x33\x32\x31\x30",
5078                 "\x2f\x2e\x2d\x2c\x2b\x2a\x29\x28\x27\x26\x25\x24\x23\\\x22\x21\x20",
5079                 "\\u001f\\u001e\\u001d\\u001c\\u001b\\u001a\\u0019\\u0018\\u0017\\u0016\\u0015\\u0014\\u0013\\u0012\\u0011\\u0010",
5080                 "\\u000f\\u000e\\u000d\\u000c\\u000b\\u000a\\u0009\\u0008\\u0007\\u0006\\u0005\\u0004\\u0003\\u0002\\u0001",
5081             },
5082         },
5083         {
5084             NStr::eJsonEnc_Quoted,
5085             {
5086                 "\"\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0\"",
5087                 "\"\xef\xee\xed\xec\xeb\xea\xe9\xe8\xe7\xe6\xe5\xe4\xe3\xe2\xe1\xe0\"",
5088                 "\"\xdf\xde\xdd\xdc\xdb\xda\xd9\xd8\xd7\xd6\xd5\xd4\xd3\xd2\xd1\xd0\"",
5089                 "\"\xcf\xce\xcd\xcc\xcb\xca\xc9\xc8\xc7\xc6\xc5\xc4\xc3\xc2\xc1\xc0\"",
5090                 "\"\xbf\xbe\xbd\xbc\xbb\xba\xb9\xb8\xb7\xb6\xb5\xb4\xb3\xb2\xb1\xb0\"",
5091                 "\"\xaf\xae\xad\xac\xab\xaa\xa9\xa8\xa7\xa6\xa5\xa4\xa3\xa2\xa1\xa0\"",
5092                 "\"\x9f\x9e\x9d\x9c\x9b\x9a\x99\x98\x97\x96\x95\x94\x93\x92\x91\x90\"",
5093                 "\"\x8f\x8e\x8d\x8c\x8b\x8a\x89\x88\x87\x86\x85\x84\x83\x82\x81\x80\"",
5094                 "\"\x7f\x7e\x7d\x7c\x7b\x7a\x79\x78\x77\x76\x75\x74\x73\x72\x71\x70\"",
5095                 "\"\x6f\x6e\x6d\x6c\x6b\x6a\x69\x68\x67\x66\x65\x64\x63\x62\x61\x60\"",
5096                 "\"\x5f\x5e\x5d\\\x5c\x5b\x5a\x59\x58\x57\x56\x55\x54\x53\x52\x51\x50\"",
5097                 "\"\x4f\x4e\x4d\x4c\x4b\x4a\x49\x48\x47\x46\x45\x44\x43\x42\x41\x40\"",
5098                 "\"\x3f\x3e\x3d\x3c\x3b\x3a\x39\x38\x37\x36\x35\x34\x33\x32\x31\x30\"",
5099                 "\"\x2f\x2e\x2d\x2c\x2b\x2a\x29\x28\x27\x26\x25\x24\x23\\\x22\x21\x20\"",
5100                 "\"\\u001f\\u001e\\u001d\\u001c\\u001b\\u001a\\u0019\\u0018\\u0017\\u0016\\u0015\\u0014\\u0013\\u0012\\u0011\\u0010\"",
5101                 "\"\\u000f\\u000e\\u000d\\u000c\\u000b\\u000a\\u0009\\u0008\\u0007\\u0006\\u0005\\u0004\\u0003\\u0002\\u0001\"",
5102             },
5103         },
5104     };
5105 
5106     for (const auto& current : encoded) {
5107         assert(current.second.size() == plain.size());
5108 
5109         for (size_t i = 0; i < plain.size(); ++i) {
5110             auto result = NStr::JsonEncode(plain[i], current.first);
5111             BOOST_CHECK_EQUAL(result, current.second[i]);
5112         }
5113     }
5114 }
5115 
BOOST_AUTO_TEST_CASE(s_JsonDecode)5116 BOOST_AUTO_TEST_CASE(s_JsonDecode)
5117 {
5118     vector<const char*> plain = {
5119         "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0",
5120         "\xef\xee\xed\xec\xeb\xea\xe9\xe8\xe7\xe6\xe5\xe4\xe3\xe2\xe1\xe0",
5121         "\xdf\xde\xdd\xdc\xdb\xda\xd9\xd8\xd7\xd6\xd5\xd4\xd3\xd2\xd1\xd0",
5122         "\xcf\xce\xcd\xcc\xcb\xca\xc9\xc8\xc7\xc6\xc5\xc4\xc3\xc2\xc1\xc0",
5123         "\xbf\xbe\xbd\xbc\xbb\xba\xb9\xb8\xb7\xb6\xb5\xb4\xb3\xb2\xb1\xb0",
5124         "\xaf\xae\xad\xac\xab\xaa\xa9\xa8\xa7\xa6\xa5\xa4\xa3\xa2\xa1\xa0",
5125         "\x9f\x9e\x9d\x9c\x9b\x9a\x99\x98\x97\x96\x95\x94\x93\x92\x91\x90",
5126         "\x8f\x8e\x8d\x8c\x8b\x8a\x89\x88\x87\x86\x85\x84\x83\x82\x81\x80",
5127         "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0",
5128         "\xef\xee\xed\xec\xeb\xea\xe9\xe8\xe7\xe6\xe5\xe4\xe3\xe2\xe1\xe0",
5129         "\xdf\xde\xdd\xdc\xdb\xda\xd9\xd8\xd7\xd6\xd5\xd4\xd3\xd2\xd1\xd0",
5130         "\xcf\xce\xcd\xcc\xcb\xca\xc9\xc8\xc7\xc6\xc5\xc4\xc3\xc2\xc1\xc0",
5131         "\xbf\xbe\xbd\xbc\xbb\xba\xb9\xb8\xb7\xb6\xb5\xb4\xb3\xb2\xb1\xb0",
5132         "\xaf\xae\xad\xac\xab\xaa\xa9\xa8\xa7\xa6\xa5\xa4\xa3\xa2\xa1\xa0",
5133         "\x9f\x9e\x9d\x9c\x9b\x9a\x99\x98\x97\x96\x95\x94\x93\x92\x91\x90",
5134         "\x8f\x8e\x8d\x8c\x8b\x8a\x89\x88\x87\x86\x85\x84\x83\x82\x81\x80",
5135         "\x7f\x7e\x7d\x7c\x7b\x7a\x79\x78\x77\x76\x75\x74\x73\x72\x71\x70",
5136         "\x6f\x6e\x6d\x6c\x6b\x6a\x69\x68\x67\x66\x65\x64\x63\x62\x61\x60",
5137         "\x5f\x5e\x5d\x5c\x5b\x5a\x59\x58\x57\x56\x55\x54\x53\x52\x51\x50",
5138         "\x4f\x4e\x4d\x4c\x4b\x4a\x49\x48\x47\x46\x45\x44\x43\x42\x41\x40",
5139         "\x3f\x3e\x3d\x3c\x3b\x3a\x39\x38\x37\x36\x35\x34\x33\x32\x31\x30",
5140         "\x2f\x2e\x2d\x2c\x2b\x2a\x29\x28\x27\x26\x25\x24\x23\x22\x21\x20",
5141         "\x1f\x1e\x1d\x1c\x1b\x1a\x19\x18\x17\x16\x15\x14\x13\x12\x11\x10",
5142         "\x0f\x0e\x0d\x0c\x0b\x0a\x09\x08\x07\x06\x05\x04\x03\x02\x01",
5143     };
5144     vector<const char*> encoded = {
5145         "\"\\u00ff\\u00fe\\u00fd\\u00fc\\u00fb\\u00fa\\u00f9\\u00f8\\u00f7\\u00f6\\u00f5\\u00f4\\u00f3\\u00f2\\u00f1\\u00f0\"",
5146         "\"\\u00ef\\u00ee\\u00ed\\u00ec\\u00eb\\u00ea\\u00e9\\u00e8\\u00e7\\u00e6\\u00e5\\u00e4\\u00e3\\u00e2\\u00e1\\u00e0\"",
5147         "\"\\u00df\\u00de\\u00dd\\u00dc\\u00db\\u00da\\u00d9\\u00d8\\u00d7\\u00d6\\u00d5\\u00d4\\u00d3\\u00d2\\u00d1\\u00d0\"",
5148         "\"\\u00cf\\u00ce\\u00cd\\u00cc\\u00cb\\u00ca\\u00c9\\u00c8\\u00c7\\u00c6\\u00c5\\u00c4\\u00c3\\u00c2\\u00c1\\u00c0\"",
5149         "\"\\u00bf\\u00be\\u00bd\\u00bc\\u00bb\\u00ba\\u00b9\\u00b8\\u00b7\\u00b6\\u00b5\\u00b4\\u00b3\\u00b2\\u00b1\\u00b0\"",
5150         "\"\\u00af\\u00ae\\u00ad\\u00ac\\u00ab\\u00aa\\u00a9\\u00a8\\u00a7\\u00a6\\u00a5\\u00a4\\u00a3\\u00a2\\u00a1\\u00a0\"",
5151         "\"\\u009f\\u009e\\u009d\\u009c\\u009b\\u009a\\u0099\\u0098\\u0097\\u0096\\u0095\\u0094\\u0093\\u0092\\u0091\\u0090\"",
5152         "\"\\u008f\\u008e\\u008d\\u008c\\u008b\\u008a\\u0089\\u0088\\u0087\\u0086\\u0085\\u0084\\u0083\\u0082\\u0081\\u0080\"",
5153         "\"\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0\"",
5154         "\"\xef\xee\xed\xec\xeb\xea\xe9\xe8\xe7\xe6\xe5\xe4\xe3\xe2\xe1\xe0\"",
5155         "\"\xdf\xde\xdd\xdc\xdb\xda\xd9\xd8\xd7\xd6\xd5\xd4\xd3\xd2\xd1\xd0\"",
5156         "\"\xcf\xce\xcd\xcc\xcb\xca\xc9\xc8\xc7\xc6\xc5\xc4\xc3\xc2\xc1\xc0\"",
5157         "\"\xbf\xbe\xbd\xbc\xbb\xba\xb9\xb8\xb7\xb6\xb5\xb4\xb3\xb2\xb1\xb0\"",
5158         "\"\xaf\xae\xad\xac\xab\xaa\xa9\xa8\xa7\xa6\xa5\xa4\xa3\xa2\xa1\xa0\"",
5159         "\"\x9f\x9e\x9d\x9c\x9b\x9a\x99\x98\x97\x96\x95\x94\x93\x92\x91\x90\"",
5160         "\"\x8f\x8e\x8d\x8c\x8b\x8a\x89\x88\x87\x86\x85\x84\x83\x82\x81\x80\"",
5161         "\"\x7f\x7e\x7d\x7c\x7b\x7a\x79\x78\x77\x76\x75\x74\x73\x72\x71\x70\"",
5162         "\"\x6f\x6e\x6d\x6c\x6b\x6a\x69\x68\x67\x66\x65\x64\x63\x62\x61\x60\"",
5163         "\"\x5f\x5e\x5d\\\x5c\x5b\x5a\x59\x58\x57\x56\x55\x54\x53\x52\x51\x50\"",
5164         "\"\x4f\x4e\x4d\x4c\x4b\x4a\x49\x48\x47\x46\x45\x44\x43\x42\x41\x40\"",
5165         "\"\x3f\x3e\x3d\x3c\x3b\x3a\x39\x38\x37\x36\x35\x34\x33\x32\x31\x30\"",
5166         "\"\x2f\x2e\x2d\x2c\x2b\x2a\x29\x28\x27\x26\x25\x24\x23\\\x22\x21\x20\"",
5167         "\"\\u001f\\u001e\\u001d\\u001c\\u001b\\u001a\\u0019\\u0018\\u0017\\u0016\\u0015\\u0014\\u0013\\u0012\\u0011\\u0010\"",
5168         "\"\\u000f\\u000e\\u000d\\u000c\\u000b\\u000a\\u0009\\u0008\\u0007\\u0006\\u0005\\u0004\\u0003\\u0002\\u0001\"",
5169     };
5170 
5171     assert(encoded.size() == plain.size());
5172 
5173     for (size_t i = 0; i < plain.size(); ++i) {
5174         const auto& original = encoded[i];
5175 
5176         size_t n_read;
5177         auto result = NStr::JsonDecode(original, &n_read);
5178         BOOST_CHECK_EQUAL(n_read, strlen(original));
5179         BOOST_CHECK_EQUAL(result, plain[i]);
5180     }
5181 }
5182 
5183 
NCBITEST_INIT_TREE()5184 NCBITEST_INIT_TREE()
5185 {
5186     NCBITEST_DISABLE(s_StringToInt_Speed);
5187     NCBITEST_DISABLE(s_StringToDouble_Speed);
5188 }
5189