1 #ifndef CORELIB___NCBISTR__HPP
2 #define CORELIB___NCBISTR__HPP
3 
4 /*  $Id: ncbistr.hpp 627618 2021-03-16 14:35:43Z ivanov $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors:  Eugene Vasilchenko, Denis Vakatov
30  *
31  *
32  */
33 
34 /// @file ncbistr.hpp
35 /// The NCBI C++ standard methods for dealing with std::string
36 
37 
38 #include <corelib/ncbi_limits.hpp>
39 #include <corelib/tempstr.hpp>
40 #include <corelib/ncbierror.hpp>
41 #ifdef NCBI_OS_OSF1
42 #  include <strings.h>
43 #endif
44 #include <stdarg.h>
45 #include <time.h>
46 #include <set>
47 #include <functional>
48 
49 
50 BEGIN_NCBI_NAMESPACE;
51 
52 /** @addtogroup String
53  *
54  * @{
55  */
56 
57 /// Empty "C" string (points to a '\0').
58 NCBI_XNCBI_EXPORT extern const char *const kEmptyCStr;
59 #define NcbiEmptyCStr NCBI_NS_NCBI::kEmptyCStr
60 
61 #if defined(HAVE_WSTRING)
62 NCBI_XNCBI_EXPORT extern const wchar_t *const kEmptyWCStr;
63 #define NcbiEmptyWCStr NCBI_NS_NCBI::kEmptyWCStr
64 #endif
65 
66 /// Empty "C++" string.
67 #if defined(NCBI_OS_MSWIN)  \
68     ||  (defined(NCBI_OS_LINUX) \
69          &&  (defined(NCBI_COMPILER_GCC) || defined(NCBI_COMPILER_ANY_CLANG)))
70 class CNcbiEmptyString
71 {
72 public:
73     /// Get string.
74     inline
Get(void)75     static const string& Get(void)
76     {
77         static string empty_str;
78         return empty_str;
79     }
80 };
81 #if defined(HAVE_WSTRING)
82 class CNcbiEmptyWString
83 {
84 public:
85     /// Get string.
Get(void)86     static const wstring& Get(void)
87     {
88         static wstring empty_str;
89         return empty_str;
90     }
91 };
92 #endif
93 #else
94 class NCBI_XNCBI_EXPORT CNcbiEmptyString
95 {
96 public:
97     /// Get string.
98     static const string& Get(void);
99 private:
100     /// Helper method to initialize private data member and return
101     /// null string.
102     static const string& FirstGet(void);
103     static const string* m_Str;     ///< Null string pointer.
104 };
105 
106 #  if defined(HAVE_WSTRING)
107 class NCBI_XNCBI_EXPORT CNcbiEmptyWString
108 {
109 public:
110     /// Get string.
111     static const wstring& Get(void);
112 private:
113     /// Helper method to initialize private data member and return
114     /// null string.
115     static const wstring& FirstGet(void);
116     static const wstring* m_Str;     ///< Null string pointer.
117 };
118 #  endif
119 #endif // NCBI_OS_MSWIN....
120 
121 
122 #define NcbiEmptyString NCBI_NS_NCBI::CNcbiEmptyString::Get()
123 #define kEmptyStr NcbiEmptyString
124 
125 #if defined(HAVE_WSTRING)
126 #  define NcbiEmptyWString NCBI_NS_NCBI::CNcbiEmptyWString::Get()
127 #  define kEmptyWStr NcbiEmptyWString
128 #endif
129 
130 // SIZE_TYPE and NPOS
131 
132 typedef NCBI_NS_STD::string::size_type SIZE_TYPE;
133 #define NPOS NCBI_NS_STD::string::npos
134 
135 
136 
137 /////////////////////////////////////////////////////////////////////////////
138 // Unicode-related definitions and conversions
139 
140 /// Unicode character
141 typedef Uint4 TUnicodeSymbol;
142 /// Unicode string
143 typedef basic_string<TUnicodeSymbol> TStringUnicode;
144 
145 #if defined(NCBI_OS_MSWIN) && defined(_UNICODE)
146 
147 typedef wchar_t TXChar;
148 typedef wstring TXString;
149 
150 #  if !defined(_TX)
151 #    define _TX(x) L ## x
152 #  endif
153 
154 #  if defined(_DEBUG)
155 #    define _T_XSTRING(x) \
156     ncbi::CUtf8::AsBasicString<ncbi::TXChar>(x, NULL, ncbi::CUtf8::eValidate)
157 #  else
158 #    define _T_XSTRING(x) \
159     ncbi::CUtf8::AsBasicString<ncbi::TXChar>(x, NULL, ncbi::CUtf8::eNoValidate)
160 #  endif
161 #  define _T_STDSTRING(x)     ncbi::CUtf8::AsUTF8(x)
162 #  define _T_XCSTRING(x)      _T_XSTRING(x).c_str()
163 #  define _T_CSTRING(x)       _T_STDSTRING(x).c_str()
164 
165 #  define NcbiEmptyXCStr   NcbiEmptyWCStr
166 #  define NcbiEmptyXString NcbiEmptyWString
167 #  define kEmptyXStr       kEmptyWStr
168 #  define kEmptyXCStr      kEmptyWCStr
169 
170 #else
171 
172 typedef char   TXChar;
173 typedef string TXString;
174 
175 #  if !defined(_TX)
176 #    define _TX(x) x
177 #  endif
178 
179 #  define _T_XSTRING(x)       (x)
180 #  define _T_STDSTRING(x)     (x)
181 #  define _T_XCSTRING(x)      ncbi::impl_ToCString(x)
182 #  define _T_CSTRING(x)       (x)
183 
184 #  define NcbiEmptyXCStr   NcbiEmptyCStr
185 #  define NcbiEmptyXString NcbiEmptyString
186 #  define kEmptyXStr       kEmptyStr
187 #  define kEmptyXCStr      kEmptyCStr
188 
impl_ToCString(const char * s)189 inline const char* impl_ToCString(const char*   s) { return s; }
impl_ToCString(const string & s)190 inline const char* impl_ToCString(const string& s) { return s.c_str(); }
191 
192 #endif
193 
194 
195 /////////////////////////////////////////////////////////////////////////////
196 ///
197 
198 #if defined(NCBI_CUTF8_ENCODING_CLASSIC)  ||  !defined(HAVE_ENUM_CLASS)
199 enum EEncoding {
200     eEncoding_Unknown,
201     eEncoding_UTF8,
202     eEncoding_Ascii,
203     eEncoding_ISO8859_1,    ///< Note:  From the point of view of the C++
204     ///< Toolkit, the ISO 8859-1 character set includes
205     ///< symbols 0x00 through 0xFF except 0x80 through
206     ///< 0x9F.
207     eEncoding_Windows_1252
208 };
209 #else
210 // Temporary safeguard to protect against implicit conversion of EEncoding
211 // to size_t, etc
212 // @attention  Do not use "EEncoding::Xxx" values directly, as they will go
213 //             away eventually! Use the "eEncoding_Xxx" values instead.
214 enum class EEncoding {
215     Unknown,      ///< Do not use this directly!  It will go away eventually!
216     UTF8,         ///< Do not use this directly!  It will go away eventually!
217     Ascii,        ///< Do not use this directly!  It will go away eventually!
218     ISO8859_1,    ///< Do not use this directly!  It will go away eventually!
219     Windows_1252  ///< Do not use this directly!  It will go away eventually!
220 };
221 #define eEncoding_Unknown      EEncoding::Unknown
222 #define eEncoding_UTF8         EEncoding::UTF8
223 #define eEncoding_Ascii        EEncoding::Ascii
224 #define eEncoding_ISO8859_1    EEncoding::ISO8859_1
225 #define eEncoding_Windows_1252 EEncoding::Windows_1252
226 #endif
227 
228 
229 /////////////////////////////////////////////////////////////////////////////
230 ///
231 /// NStr --
232 ///
233 /// Encapsulates class-wide string processing functions.
234 
235 class NCBI_XNCBI_EXPORT NStr
236 {
237 public:
238     /// Common conversion flags.
239     enum EConvErrFlags {
240         /// Do not throw an exception on error.
241         /// Could be used with methods throwing an exception by default, ignored otherwise.
242         /// Just return zero and set errno to non-zero instead of throwing an exception.
243         /// We recommend the following technique to check against errors
244         /// with minimum overhead when this flag is used:
245         /// @code
246         ///     if (!retval  &&  errno != 0)
247         ///        ERROR;
248         /// @endcode
249         /// And for StringToDouble*() variants:
250         /// @code
251         ///     if (retval == HUGE_VAL  ||  retval == -HUGE_VAL  ||
252         ///        !retval  &&  errno != 0)
253         ///        ERROR;
254         /// @endcode
255         fConvErr_NoThrow      = (1 <<  0),
256         /*
257         fConvErr_NoErrno      = (1 <<  1),  ///< Do not set errno at all.
258                                             ///< If used together with fConvErr_NoThrow flag
259                                             ///< returns 0 on error (-1 for StringToNonNegativeInt).
260         */
261         fConvErr_NoErrMessage = (1 <<  2)   ///< Set errno, but do not set CNcbiError message on error
262     };
263     typedef int TConvErrFlags;    ///< Bitwise OR of "EConvErrFlags"
264 
265     /// Number to string conversion flags.
266     ///
267     /// NOTE:
268     ///   If specified base in the *ToString() methods is not default 10,
269     ///   that some flags like fWithSign and fWithCommas will be ignored.
270     enum ENumToStringFlags {
271         fUseLowercase            = (1 <<  4),     ///< Use lowercase letters for string representation for bases above 10
272         fWithRadix               = (1 <<  5),     ///< Prefix the output value with radix for "well-known" bases like 8 ("0") and 16 ("0x")
273         fWithSign                = (1 <<  6),     ///< Prefix the output value with a sign ('+'/'-')
274         fWithCommas              = (1 <<  7),     ///< Use commas as thousands separator
275         fDoubleFixed             = (1 <<  8),     ///< DoubleToString*(): Use n.nnnn format for double conversions
276         fDoubleScientific        = (1 <<  9),     ///< DoubleToString*(): Use scientific format for double conversions
277         fDoublePosix             = (1 << 10),     ///< DoubleToString*(): Use C locale  for double conversions
278         fDoubleGeneral           = fDoubleFixed | fDoubleScientific,
279         // Additional flags to convert "software" qualifiers (see UInt8ToString_DataSize)
280         fDS_Binary               = (1 << 11),     ///< UInt8ToString_DataSize(): Use 1024 as a kilobyte factor, not 1000.
281         fDS_NoDecimalPoint       = (1 << 12),     ///< UInt8ToString_DataSize(): Do not add a decimal point ("10KB" vs "10.0KB")
282         fDS_PutSpaceBeforeSuffix = (1 << 13),     ///< UInt8ToString_DataSize(): Add space between value and qualifiers, like "10.0 KB"
283         fDS_ShortSuffix          = (1 << 14),     ///< UInt8ToString_DataSize(): Use short suffix, like "10.0K"
284         fDS_PutBSuffixToo        = (1 << 15)      ///< UInt8ToString_DataSize(): Use "B" suffix for small bytes values.
285     };
286     typedef int TNumToStringFlags;    ///< Bitwise OR of "ENumToStringFlags"
287 
288     /// String to number conversion flags.
289     enum EStringToNumFlags {
290         fMandatorySign           = (1 << 17),     ///< Check on mandatory sign. See 'ENumToStringFlags::fWithSign'.
291         fAllowCommas             = (1 << 18),     ///< Allow commas. See 'ENumToStringFlags::fWithCommas'.
292         fAllowLeadingSpaces      = (1 << 19),     ///< Ignore leading spaces in converted string.
293         fAllowLeadingSymbols     = (1 << 20) | fAllowLeadingSpaces,
294                                                   ///< Ignore leading non-numeric characters.
295         fAllowTrailingSpaces     = (1 << 21),     ///< Ignore trailing space characters.
296         fAllowTrailingSymbols    = (1 << 22) | fAllowTrailingSpaces,
297                                                   ///< Ignore trailing non-numerics characters.
298         fDecimalPosix            = (1 << 23),     ///< StringToDouble*(): For decimal point, use C locale.
299         fDecimalPosixOrLocal     = (1 << 24),     ///< StringToDouble*(): For decimal point, try both C and current locale.
300         fDecimalPosixFinite      = (1 << 25),     ///< StringToDouble*(): Keep result finite and normalized:
301                                                   ///< if DBL_MAX < result < INF,     result becomes DBL_MAX
302                                                   ///< if       0 < result < DBL_MIN, result becomes DBL_MIN
303         // Additional flags to convert "software" qualifiers (see StringToUInt8_DataSize)
304         fDS_ForceBinary          = (1 << 26),     ///< StringToUInt8_DataSize(): Use 1024 as a kilobyte factor regardless of suffix, like "KB" or "KiB".
305         fDS_ProhibitFractions    = (1 << 27),     ///< StringToUInt8_DataSize(): Ignore any fraction part of a value, "1.2K" ~ "1K"
306         fDS_ProhibitSpaceBeforeSuffix = (1 << 28) ///< StringToUInt8_DataSize(): Do not allow spaces between value and suffix, like "10 K".
307     };
308     typedef int TStringToNumFlags;   ///< Bitwise OR of "EStringToNumFlags"
309 
310     /// Convert string to a numeric value.
311     ///
312     /// @param str
313     ///   String to be converted.
314     /// @param flags
315     ///   Optional flags to tune up how the string is converted to value.
316     /// @param base
317     ///   Radix base. Allowed values are 0, 2..36. Zero means to use the
318     ///   first characters to determine the base - a leading "0x" or "0X"
319     ///   means base 16; otherwise a leading 0 means base 8; otherwise base 10.
320     /// @return
321     ///   - If conversion succeeds, set errno to zero and return the
322     ///     converted value.
323     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
324     ///   - Otherwise, set errno to non-zero and return zero.
325     template <typename TNumeric>
StringToNumeric(const CTempString str,TStringToNumFlags flags=0,int base=10)326     static TNumeric StringToNumeric(const CTempString str,
327                                     TStringToNumFlags flags = 0,
328                                     int               base  = 10)
329     {
330         return x_StringToNumeric<TNumeric>(str, flags, base);
331     }
332 
333     /// Convert string to a numeric value.
334     ///
335     /// @param str [in]
336     ///   String to be converted.
337     /// @param value [out]
338     ///   The numeric value represented by "str". Zero on any error.
339     /// @param flags [in]
340     ///   Optional flags to tune up how the string is converted to value.
341     /// @param base [in]
342     ///   Radix base. Allowed values are 0, 2..36. Zero means to use the
343     ///   first characters to determine the base - a leading "0x" or "0X"
344     ///   means base 16; otherwise a leading 0 means base 8; otherwise base 10.
345     /// @return
346     ///   - If conversion succeeds, set errno to zero, set the value, and
347     ///     return true.
348     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
349     ///   - Otherwise, set errno to non-zero, set the value to zero, and
350     ///     return false.
351     template <typename TNumeric>
StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags=0,int base=10)352     static bool StringToNumeric(const CTempString str,
353                                 TNumeric*         value, /*[out]*/
354                                 TStringToNumFlags flags = 0,
355                                 int               base  = 10)
356     {
357         return x_StringToNumeric(str, value, flags, base);
358     }
359 
360     /// Convert string to non-negative integer value.
361     ///
362     /// @param str
363     ///   String containing only digits, representing non-negative
364     ///   decimal value in the int range: [0..kMax_Int].
365     /// @param flags
366     ///   How to convert string to value.
367     ///   Only fConvErr_NoErrMessage flag is supported here.
368     /// @return
369     ///   - If conversion succeeds, set errno to zero and return the converted value.
370     ///   - Otherwise, set errno to non-zero and return -1.
371     static int StringToNonNegativeInt(const CTempString str, TConvErrFlags flags = 0);
372 
373     /// @deprecated
374     ///   Use template-based StringToNumeric<> or StringToNonNegativeInt() instead.
375     NCBI_DEPRECATED
StringToNumeric(const string & str)376     static int StringToNumeric(const string& str)
377     {
378         return StringToNonNegativeInt(str);
379     }
380 
381     /// Convert string to int.
382     ///
383     /// @param str
384     ///   String to be converted.
385     /// @param flags
386     ///   How to convert string to value.
387     /// @param base
388     ///   Radix base. Allowed values are 0, 2..36. Zero means to use the
389     ///   first characters to determine the base - a leading "0x" or "0X"
390     ///   means base 16; otherwise a leading 0 means base 8; otherwise base 10.
391     /// @return
392     ///   - If conversion succeeds, set errno to zero and return the
393     ///     converted value.
394     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
395     ///   - Otherwise, set errno to non-zero and return zero.
396     static int StringToInt(const CTempString str,
397                            TStringToNumFlags flags = 0,
398                            int               base  = 10);
399 
400     /// Convert string to unsigned int.
401     ///
402     /// @param str
403     ///   String to be converted.
404     /// @param flags
405     ///   How to convert string to value.
406     /// @param base
407     ///   Radix base. Allowed values are 0, 2..36. Zero means to use the
408     ///   first characters to determine the base - a leading "0x" or "0X"
409     ///   means base 16; otherwise a leading 0 means base 8; otherwise base 10.
410     /// @return
411     ///   - If conversion succeeds, set errno to zero and return the
412     ///     converted value.
413     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
414     ///   - Otherwise, set errno to non-zero and return zero.
415     static unsigned int StringToUInt(const CTempString str,
416                                      TStringToNumFlags flags = 0,
417                                      int               base  = 10);
418 
419     /// Convert string to long.
420     ///
421     /// @param str
422     ///   String to be converted.
423     /// @param flags
424     ///   How to convert string to value.
425     /// @param base
426     ///   Radix base. Allowed values are 0, 2..36. Zero means to use the
427     ///   first characters to determine the base - a leading "0x" or "0X"
428     ///   means base 16; otherwise a leading 0 means base 8; otherwise base 10.
429     /// @return
430     ///   - If conversion succeeds, set errno to zero and return the
431     ///     converted value.
432     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
433     ///   - Otherwise, set errno to non-zero and return zero.
434     static long StringToLong(const CTempString str,
435                              TStringToNumFlags flags = 0,
436                              int               base  = 10);
437 
438     /// Convert string to unsigned long.
439     ///
440     /// @param str
441     ///   String to be converted.
442     /// @param flags
443     ///   How to convert string to value.
444     /// @param base
445     ///   Radix base. Allowed values are 0, 2..36. Zero means to use the
446     ///   first characters to determine the base - a leading "0x" or "0X"
447     ///   means base 16; otherwise a leading 0 means base 8; otherwise base 10.
448     /// @return
449     ///   - If conversion succeeds, set errno to zero and return the
450     ///     converted value.
451     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
452     ///   - Otherwise, set errno to non-zero and return zero.
453     static unsigned long StringToULong(const CTempString str,
454                                        TStringToNumFlags flags = 0,
455                                        int               base  = 10);
456 
457     /// Convert string to double-precision value (analog of strtod function)
458     ///
459     /// @param str
460     ///   String to be converted.
461     /// @param endptr
462     ///   Pointer to character that stops scan.
463     /// @return
464     ///   Double-precision value.
465     ///   This function always uses dot as decimal separator.
466     ///   - on overflow, it returns HUGE_VAL and sets errno to ERANGE;
467     ///   - on underflow, it returns 0 and sets errno to ERANGE;
468     ///   - if conversion was impossible, it returns 0 and sets errno.
469     ///   Also, when input string equals (case-insensitive) to
470     ///   - "NAN", the function returns NaN;
471     ///   - "INF" or "INFINITY", the function returns HUGE_VAL;
472     ///   - "-INF" or "-INFINITY", the function returns -HUGE_VAL;
473     /// @note
474     ///   - If conversion succeeds, set errno to zero and return the
475     ///     converted value.
476     ///   - Otherwise, set errno to non-zero and return zero.
477     ///   - Denormal or infinite results are considered successful conversion.
478     ///   - To enforce finite and normalized result, use fDecimalPosixFinite flag.
479     ///   - This function is meant to be more "low-level" than other
480     ///     StringToXxx functions - for example, it allows trailing characters
481     ///     (and doesn't include a flags parameter for tweaking such behavior).
482     ///     This could result in strings like "nanosecond" being converted to
483     ///     NaN, "-inf=input_file" being converted to -INF, or other unexpected
484     ///     behavior. Therefore, please consider using StringToDouble unless
485     ///     you specifically need this functionality.
486     static double StringToDoublePosix(const char* str, char** endptr=0,
487                                       TStringToNumFlags flags=0);
488 
489 
490     /// Convert string to double.
491     ///
492     /// @param str
493     ///   String to be converted.
494     /// @param flags
495     ///   How to convert string to value.
496     ///   Do not support fAllowCommas flag.
497     /// @return
498     ///   - If invalid flags are passed, throw an exception.
499     ///   - If conversion succeeds, set errno to zero and return the
500     ///     converted value.
501     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
502     ///   - Otherwise, set errno to non-zero and return zero.
503     /// @note
504     ///   - Denormal or infinite results are considered successful conversion.
505     ///   - To enforce finite and normalized result, use fDecimalPosixFinite flag.
506     static double StringToDouble(const CTempStringEx str,
507                                  TStringToNumFlags   flags = 0);
508 
509     /// This version accepts zero-terminated string
510     /// @deprecated
511     ///   It is unsafe to use this method directly, please use StringToDouble()
512     ///   instead.
513     NCBI_DEPRECATED
514     static double StringToDoubleEx(const char* str, size_t size,
515                                    TStringToNumFlags flags = 0);
516 
517     /// Convert string to Int8.
518     ///
519     /// @param str
520     ///   String to be converted.
521     /// @param flags
522     ///   How to convert string to value.
523     /// @param base
524     ///   Radix base. Allowed values are 0, 2..36. Zero means to use the
525     ///   first characters to determine the base - a leading "0x" or "0X"
526     ///   means base 16; otherwise a leading 0 means base 8; otherwise base 10.
527     /// @return
528     ///   - If conversion succeeds, set errno to zero and return the
529     ///     converted value.
530     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
531     ///   - Otherwise, set errno to non-zero and return zero.
532     static Int8 StringToInt8(const CTempString str,
533                              TStringToNumFlags flags = 0,
534                              int               base  = 10);
535 
536     /// Convert string to Uint8.
537     ///
538     /// @param str
539     ///   String to be converted.
540     /// @param flags
541     ///   How to convert string to value.
542     /// @param base
543     ///   Radix base. Allowed values are 0, 2..36. Zero means to use the
544     ///   first characters to determine the base - a leading "0x" or "0X"
545     ///   means base 16; otherwise a leading 0 means base 8; otherwise base 10.
546     /// @return
547     ///   - If conversion succeeds, set errno to zero and return the
548     ///     converted value.
549     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
550     ///   - Otherwise, set errno to non-zero and return zero.
551     static Uint8 StringToUInt8(const CTempString str,
552                                TStringToNumFlags flags = 0,
553                                int               base  = 10);
554 
555     /// Convert string that can contain "software" qualifiers to Uint8.
556     ///
557     /// String can contain "software" qualifiers: G(giga-), MB(mega-),
558     /// KiB (kibi-) etc.
559     /// Example: 100MB, 1024KiB, 5.7G.
560     /// Meaning of qualifiers depends on flags and by default is 1000-based
561     /// (i.e. K=1000, M=10^6 etc.) except in cases when qualifiers with "iB"
562     /// are used, i.e. KiB=1024, MiB=1024^2 etc. When flags parameter contains
563     /// fDS_ForceBinary then qualifiers without "iB" (i.e. "K" or "MB") will
564     /// also be 1024-based.
565     /// String can contain a decimal fraction (except when fDS_ProhibitFractions
566     /// flag is used), in this case the resultant Uint8 number will be rounded
567     /// to fit into integer value.
568     ///
569     /// @param str
570     ///   String to be converted.
571     /// @param flags
572     ///   How to convert string to value.
573     /// @return
574     ///   - If invalid flags are passed, throw an exception.
575     ///   - If conversion succeeds, return the converted value.
576     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
577     ///   - Otherwise, set errno to non-zero and return zero.
578     static Uint8 StringToUInt8_DataSize(const CTempString str,
579                                         TStringToNumFlags flags = 0);
580 
581     /// Convert string to number of bytes.
582     ///
583     /// String can contain "software" qualifiers: MB(megabyte), KB (kilobyte).
584     /// Example: 100MB, 1024KB
585     /// Note the qualifiers are power-of-2 based, aka kibi-, mebi- etc, so that
586     /// 1KB = 1024B (not 1000B), 1MB = 1024KB = 1048576B, etc.
587     ///
588     /// @param str
589     ///   String to be converted.
590     /// @param flags
591     ///   How to convert string to value.
592     /// @param base
593     ///   Numeric base of the number (before the qualifier). Allowed values
594     ///   are 0, 2..20. Zero means to use the first characters to determine
595     ///   the base - a leading "0x" or "0X" means base 16; otherwise a
596     ///   leading 0 means base 8; otherwise base 10.
597     ///   The base is limited to 20 to prevent 'K' from being interpreted as
598     ///   a digit in the number.
599     /// @return
600     ///   - If conversion succeeds, set errno to zero and return the
601     ///     converted value.
602     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
603     ///   - Otherwise, set errno to non-zero and return zero.
604     /// @deprecated  Use StringToUInt8_DataSize(str, flags) instead.
605     NCBI_DEPRECATED
606     static Uint8 StringToUInt8_DataSize(const CTempString str,
607                                         TStringToNumFlags flags,
608                                         int               base);
609 
610     /// Convert string to size_t.
611     ///
612     /// @param str
613     ///   String to be converted.
614     /// @param flags
615     ///   How to convert string to value.
616     /// @param base
617     ///   Radix base. Allowed values are 0, 2..36. Zero means to use the
618     ///   first characters to determine the base - a leading "0x" or "0X"
619     ///   means base 16; otherwise a leading 0 means base 8; otherwise base 10.
620     /// @return
621     ///   - If conversion succeeds, set errno to zero and return the
622     ///     converted value.
623     ///   - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
624     ///   - Otherwise, set errno to non-zero and return zero.
625     static size_t StringToSizet(const CTempString str,
626                                 TStringToNumFlags flags = 0,
627                                 int               base  = 10);
628 
629     /// Convert string to pointer.
630     ///
631     /// @param str
632     ///   String to be converted.
633     /// @param flags
634     ///   How to convert string to value.
635     ///   Only fConvErr_NoErrMessage flag is supported here.
636     /// @return
637     ///   Pointer value corresponding to its string representation.
638     ///   - If conversion succeeds, set errno to zero and return the
639     ///     converted value.
640     ///   - Otherwise, set errno to non-zero and return NULL.
641     static const void* StringToPtr(const CTempStringEx str, TConvErrFlags flags = 0);
642 
643     /// Convert character to integer.
644     ///
645     /// @param ch
646     ///   Character to be converted.
647     /// @return
648     ///   Integer (0..15) corresponding to the "ch" as a hex digit.
649     ///   Return -1 on error.
650     static int HexChar(char ch);
651 
652     /// Convert numeric value to string.
653     ///
654     /// @param value
655     ///   Numeric value to be converted.
656     /// @param flags
657     ///   How to convert value to string.
658     /// @param base
659     ///   Radix base. Default is 10. Allowed values are 2..36.
660     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
661     ///   If necessary you should add it yourself.
662     ///   If value is float or double type, the parameter is ignored.
663     /// @return
664     ///   - If conversion succeeds, set errno to zero and return the
665     ///     converted string value.
666     ///   - Otherwise, set errno to non-zero and return empty string.
667     template<typename TNumeric>
668     static typename enable_if< is_arithmetic<TNumeric>::value || is_convertible<TNumeric, Int8>::value, string>::type
NumericToString(TNumeric value,TNumToStringFlags flags=0,int base=10)669     NumericToString(TNumeric value, TNumToStringFlags flags = 0, int base = 10)
670     {
671         string ret;
672         x_NumericToString(ret, value, flags, base);
673         return ret;
674     }
675     template <typename TStrictId>
676     static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, string>::type
NumericToString(TStrictId value,TNumToStringFlags flags=0,int base=10)677     NumericToString(TStrictId value, TNumToStringFlags flags = 0, int base = 10)
678     {
679         return NumericToString(value.Get(), flags, base);
680     }
681 
682     /// Convert numeric value to string.
683     ///
684     /// @param out_str
685     ///   Output string variable.
686     /// @param value
687     ///   Numeric value to be converted.
688     /// @param flags
689     ///   How to convert value to string.
690     /// @param base
691     ///   Radix base. Default is 10. Allowed values are 2..36.
692     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
693     ///   If necessary you should add it yourself.
694     ///   If value is float or double type, the parameter is ignored.
695     /// @note
696     ///   - If conversion succeeds, set errno to zero and return the
697     ///     converted string value in 'out_str'.
698     ///   - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
699     template<typename TNumeric>
NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags=0,int base=10)700     static void NumericToString(string& out_str, TNumeric value,
701                                 TNumToStringFlags flags = 0, int base = 10)
702     {
703         x_NumericToString(out_str, value, flags, base);
704     }
705 
706     /// Convert int to string.
707     ///
708     /// @param value
709     ///   Integer value to be converted.
710     /// @param flags
711     ///   How to convert value to string.
712     /// @param base
713     ///   Radix base. Default is 10. Allowed values are 2..36.
714     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
715     ///   If necessary you should add it yourself.
716     /// @return
717     ///   - If conversion succeeds, set errno to zero and return the
718     ///     converted string value.
719     ///   - Otherwise, set errno to non-zero and return empty string.
720     static string IntToString(int value, TNumToStringFlags flags = 0,
721                               int base = 10);
722 
723     static string IntToString(unsigned int value, TNumToStringFlags flags = 0,
724                               int base = 10);
725 
726     /// Convert int to string.
727     ///
728     /// @param out_str
729     ///   Output string variable.
730     /// @param value
731     ///   Integer value to be converted.
732     /// @param flags
733     ///   How to convert value to string.
734     /// @param base
735     ///   Radix base. Default is 10. Allowed values are 2..36.
736     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
737     ///   If necessary you should add it yourself.
738     /// @note
739     ///   - If conversion succeeds, set errno to zero and return the
740     ///     converted string value in 'out_str'.
741     ///   - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
742     static void IntToString(string& out_str, int value,
743                             TNumToStringFlags flags = 0,
744                             int               base  = 10);
745 
746     static void IntToString(string& out_str, unsigned int value,
747                             TNumToStringFlags flags = 0,
748                             int               base  = 10);
749 
750     /// Convert UInt to string.
751     ///
752     /// @param value
753     ///   Integer value (unsigned long) to be converted.
754     /// @param flags
755     ///   How to convert value to string.
756     /// @param base
757     ///   Radix base. Default is 10. Allowed values are 2..36.
758     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
759     ///   If necessary you should add it yourself.
760     /// @return
761     ///   - If conversion succeeds, set errno to zero and return the
762     ///     converted string value.
763     ///   - Otherwise, set errno to non-zero and return empty string.
764     static string UIntToString(unsigned int      value,
765                                TNumToStringFlags flags = 0,
766                                int               base  = 10);
767 
768     static string UIntToString(int               value,
769                                TNumToStringFlags flags = 0,
770                                int               base  = 10);
771 
772     /// Convert UInt to string.
773     ///
774     /// @param out_str
775     ///   Output string variable
776     /// @param value
777     ///   Integer value (unsigned long) to be converted.
778     /// @param flags
779     ///   How to convert value to string.
780     /// @param base
781     ///   Radix base. Default is 10. Allowed values are 2..36.
782     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
783     ///   If necessary you should add it yourself.
784     /// @note
785     ///   - If conversion succeeds, set errno to zero and return the
786     ///     converted string value in 'out_str'.
787     ///   - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
788     static void UIntToString(string& out_str, unsigned int value,
789                              TNumToStringFlags flags = 0,
790                              int               base  = 10);
791 
792     static void UIntToString(string& out_str, int value,
793                              TNumToStringFlags flags = 0,
794                              int               base  = 10);
795 
796     /// Convert Int to string.
797     ///
798     /// @param value
799     ///   Integer value (long) to be converted.
800     /// @param flags
801     ///   How to convert value to string.
802     /// @param base
803     ///   Radix base. Default is 10. Allowed values are 2..36.
804     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
805     ///   If necessary you should add it yourself.
806     /// @return
807     ///   - If conversion succeeds, set errno to zero and return the
808     ///     converted string value.
809     ///   - Otherwise, set errno to non-zero and return empty string.
810     static string LongToString(long value, TNumToStringFlags flags = 0,
811                                int base = 10);
812 
813     /// Convert Int to string.
814     ///
815     /// @param out_str
816     ///   Output string variable.
817     /// @param value
818     ///   Integer value (long) to be converted.
819     /// @param flags
820     ///   How to convert value to string.
821     /// @param base
822     ///   Radix base. Default is 10. Allowed values are 2..36.
823     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
824     ///   If necessary you should add it yourself.
825     /// @note
826     ///   - If conversion succeeds, set errno to zero and return the
827     ///     converted string value in 'out_str'.
828     ///   - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
829     static void LongToString(string& out_str, long value,
830                              TNumToStringFlags flags = 0,
831                              int               base  = 10);
832 
833     /// Convert unsigned long to string.
834     ///
835     /// @param value
836     ///   Integer value (unsigned long) to be converted.
837     /// @param flags
838     ///   How to convert value to string.
839     /// @param base
840     ///   Radix base. Default is 10. Allowed values are 2..36.
841     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
842     ///   If necessary you should add it yourself.
843     /// @return
844     ///   - If conversion succeeds, set errno to zero and return the
845     ///     converted string value.
846     ///   - Otherwise, set errno to non-zero and return empty string.
847     static string ULongToString(unsigned long     value,
848                                 TNumToStringFlags flags = 0,
849                                 int               base  = 10);
850 
851     /// Convert unsigned long to string.
852     ///
853     /// @param out_str
854     ///   Output string variable
855     /// @param value
856     ///   Integer value (unsigned long) to be converted.
857     /// @param flags
858     ///   How to convert value to string.
859     /// @param base
860     ///   Radix base. Default is 10. Allowed values are 2..36.
861     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
862     ///   If necessary you should add it yourself.
863     /// @note
864     ///   - If conversion succeeds, set errno to zero and return the
865     ///     converted string value in 'out_str'.
866     ///   - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
867     static void ULongToString(string& out_str, unsigned long value,
868                               TNumToStringFlags flags = 0,
869                               int               base  = 10);
870 
871     /// Convert Int8 to string.
872     ///
873     /// @param value
874     ///   Integer value (Int8) to be converted.
875     /// @param flags
876     ///   How to convert value to string.
877     /// @param base
878     ///   Radix base. Default is 10. Allowed values are 2..36.
879     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
880     ///   If necessary you should add it yourself.
881     /// @return
882     ///   - If conversion succeeds, set errno to zero and return the
883     ///     converted string value.
884     ///   - Otherwise, set errno to non-zero and return empty string.
885     static string Int8ToString(Int8 value,
886                                TNumToStringFlags flags = 0,
887                                int               base  = 10);
888 
889     /// Convert Int8 to string.
890     ///
891     /// @param out_str
892     ///   Output string variable
893     /// @param value
894     ///   Integer value (Int8) to be converted.
895     /// @param flags
896     ///   How to convert value to string.
897     /// @param base
898     ///   Radix base. Default is 10. Allowed values are 2..36.
899     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
900     ///   If necessary you should add it yourself.
901     /// @note
902     ///   - If conversion succeeds, set errno to zero and return the
903     ///     converted string value in 'out_str'.
904     ///   - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
905     static void Int8ToString(string& out_str, Int8 value,
906                              TNumToStringFlags flags = 0,
907                              int               base  = 10);
908 
909     /// Convert UInt8 to string.
910     ///
911     /// @param value
912     ///   Integer value (UInt8) to be converted.
913     /// @param flags
914     ///   How to convert value to string.
915     /// @param base
916     ///   Radix base. Default is 10. Allowed values are 2..36.
917     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
918     ///   If necessary you should add it yourself.
919     /// @return
920     ///   - If conversion succeeds, set errno to zero and return the
921     ///     converted string value.
922     ///   - Otherwise, set errno to non-zero and return empty string.
923     static string UInt8ToString(Uint8 value,
924                                 TNumToStringFlags flags = 0,
925                                 int               base  = 10);
926 
927     /// Convert UInt8 to string.
928     ///
929     /// @param out_str
930     ///   Output string variable
931     /// @param value
932     ///   Integer value (UInt8) to be converted.
933     /// @param flags
934     ///   How to convert value to string.
935     /// @param base
936     ///   Radix base. Default is 10. Allowed values are 2..36.
937     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
938     ///   If necessary you should add it yourself.
939     /// @note
940     ///   - If conversion succeeds, set errno to zero and return the
941     ///     converted string value in 'out_str'.
942     ///   - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
943     static void UInt8ToString(string& out_str, Uint8 value,
944                               TNumToStringFlags flags = 0,
945                               int               base  = 10);
946 
947     /// Convert UInt8 to string using "software" qualifiers.
948     ///
949     /// Result of conversion will be limited to max_digits digits so that e.g.
950     /// 1024 will be converted to 1.02KB. Conversion will be made using
951     /// rounding so that 1025 will be converted to 1.03KB. By default function
952     /// uses 1000-based qualifiers (as in examples above) but with fDS_Binary
953     /// flag it will use 1024-based qualifiers, e.g. 1100 will be converted to
954     /// 1.07KiB. With fDS_ShortSuffix flag function will omit "B" in 1000-based
955     /// and "iB" in 1024-based qualifiers. When the result of conversion doesn't
956     /// need any qualifiers then the result of this function will be equivalent
957     /// to result of UInt8ToString() above except if fDS_PutBSuffixToo flag
958     /// is passed. In the latter case "B" will be added to the number.
959     ///
960     /// Function will always try to use a maximum possible qualifier and
961     /// a number with decimal point except if fDS_NoDecimalPoint flag is passed.
962     /// In that case function will return only whole number and try to use a
963     /// minimum possible qualifier (which makes difference only if
964     /// max_digits > 3).
965     ///
966     /// @param value
967     ///   Integer value (UInt8) to be converted.
968     /// @param flags
969     ///   How to convert value to string.
970     /// @param max_digits
971     ///   Maximum number of digits to use (cannot be less than 3)
972     /// @return
973     ///   - If invalid flags are passed, throw an exception.
974     ///   - If conversion succeeds, return the converted value.
975     static string UInt8ToString_DataSize(Uint8 value,
976                                          TNumToStringFlags flags = 0,
977                                          unsigned int max_digits = 3);
978 
979     /// Convert UInt8 to string using "software" qualifiers.
980     ///
981     /// See notes and details of how function works in the comments to
982     /// UInt8ToString_DataSize() above.
983     ///
984     /// @param out_str
985     ///   Output string variable
986     /// @param value
987     ///   Integer value (UInt8) to be converted.
988     /// @param flags
989     ///   How to convert value to string.
990     /// @param max_digits
991     ///   Maximum number of digits to use (cannot be less than 3)
992     static void UInt8ToString_DataSize(string& out_str,
993                                        Uint8 value,
994                                        TNumToStringFlags flags = 0,
995                                        unsigned int max_digits = 3);
996     /// Convert double to string.
997     ///
998     /// @param value
999     ///   Double value to be converted.
1000     /// @param precision
1001     ///   Precision value for conversion. If precision is more that maximum
1002     ///   for current platform, then it will be truncated to this maximum.
1003     ///   If it is negative, that double will be converted to number in
1004     ///   scientific notation.
1005     /// @param flags
1006     ///   How to convert value to string.
1007     ///   If double format flags are not specified, that next output format
1008     ///   will be used by default:
1009     ///     - fDoubleFixed,   if 'precision' >= 0.
1010     ///     - fDoubleGeneral, if 'precision' < 0.
1011     /// @return
1012     ///   - If conversion succeeds, set errno to zero and return the
1013     ///     converted string value.
1014     ///   - Otherwise, set errno to non-zero and return empty string.
1015     static string DoubleToString(double value, int precision = -1,
1016                                  TNumToStringFlags flags = 0);
1017 
1018     /// Convert double to string.
1019     ///
1020     /// @param out_str
1021     ///   Output string variable
1022     /// @param value
1023     ///   Double value to be converted.
1024     /// @param precision
1025     ///   Precision value for conversion. If precision is more that maximum
1026     ///   for current platform, then it will be truncated to this maximum.
1027     ///   If it is negative, that double will be converted to number in
1028     ///   scientific notation.
1029     /// @param flags
1030     ///   How to convert value to string.
1031     ///   If double format flags are not specified, that next output format
1032     ///   will be used by default:
1033     ///     - fDoubleFixed,   if 'precision' >= 0.
1034     ///     - fDoubleGeneral, if 'precision' < 0.
1035     /// @note
1036     ///   - If conversion succeeds, set errno to zero and return the
1037     ///     converted string value in 'out_str'.
1038     ///   - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
1039     static void DoubleToString(string& out_str, double value,
1040                                int precision = -1,
1041                                TNumToStringFlags flags = 0);
1042 
1043     /// Convert double to string with specified precision and place the result
1044     /// in the specified buffer.
1045     ///
1046     /// @param value
1047     ///   Double value to be converted.
1048     /// @param precision
1049     ///   Precision value for conversion. If precision is more that maximum
1050     ///   for current platform, then it will be truncated to this maximum.
1051     /// @param buf
1052     ///   Put result of the conversion into this buffer.
1053     /// @param buf_size
1054     ///   Size of buffer, "buf".
1055     /// @param flags
1056     ///   How to convert value to string.
1057     ///   Default output format is fDoubleFixed.
1058     /// @return
1059     ///   - If conversion succeeds, set errno to zero and return the
1060     ///     number of bytes stored in "buf", not counting the
1061     ///     terminating '\0'.
1062     ///   - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
1063     static SIZE_TYPE DoubleToString(double value, unsigned int precision,
1064                                     char* buf, SIZE_TYPE buf_size,
1065                                     TNumToStringFlags flags = 0);
1066 
1067     /// Convert double to string with specified precision and put the result
1068     /// into a character buffer, in scientific format.
1069     ///
1070     /// NOTE:
1071     ///   The output character buffer is NOT zero-terminated.
1072     ///   The decimal separator is dot, always.
1073     ///   This function DOES NOT check 'value' for being finite or not-a-number;
1074     ///   if it is, the result is unpredictable.
1075     ///   This function is less precise for a small fraction of values
1076     ///   (the difference is in the last significant digit) than its
1077     ///   'DoubleToString' siblings, but it is much faster.
1078     ///
1079     /// @param value
1080     ///   Double value to be converted.
1081     /// @param precision
1082     ///   Maximum number of significant digits to preserve. If precision is greater than
1083     ///   maximum for the current platform, it will be truncated to this maximum.
1084     /// @param buf
1085     ///   Put result of the conversion into this buffer.
1086     /// @param buf_size
1087     ///   Size of buffer, "buf".
1088     /// @return
1089     ///   The number of bytes written into "buf".
1090     static SIZE_TYPE DoubleToStringPosix(double value,unsigned int precision,
1091                                          char* buf, SIZE_TYPE buf_size);
1092 
1093 
1094     /// Convert double to string with specified precision.
1095     ///
1096     /// The result consists of three parts: significant digits, exponent and sign.
1097     /// For example, input value -12345.67 will produce
1098     /// buf = "1234567" , *dec = 4, and *sign = -1.
1099     /// NOTE:
1100     ///   The output character buffer is NOT zero-terminated.
1101     ///   The buffer is NOT padded with zeros.
1102     ///   This function DOES NOT check 'value' for being finite or not-a-number;
1103     ///   if it is, the result is unpredictable.
1104     ///   This function is less precise for a small fraction of values
1105     ///   (the difference is in the last significant digit) than its
1106     ///   'DoubleToString' siblings, but it is much faster.
1107     ///
1108     /// @param value
1109     ///   Double value to be converted.
1110     /// @param precision
1111     ///   Maximum number of significant digits to preserve. If precision is greater than
1112     ///   maximum for the current platform, it will be truncated to this maximum.
1113     /// @param buf
1114     ///   Put result of the conversion into this buffer.
1115     /// @param buf_size
1116     ///   Size of buffer, "buf".
1117     /// @param dec
1118     ///   Exponent
1119     /// @param sign
1120     ///   Sign of the value
1121     /// @return
1122     ///   The number of bytes written into "buf".
1123     static SIZE_TYPE DoubleToString_Ecvt(double value,unsigned int precision,
1124                                          char* buf, SIZE_TYPE buf_size,
1125                                          int* dec, int* sign);
1126 
1127     /// Convert size_t to string.
1128     ///
1129     /// @param value
1130     ///   Value to be converted.
1131     /// @param flags
1132     ///   How to convert value to string.
1133     /// @param base
1134     ///   Radix base. Default is 10. Allowed values are 2..36.
1135     ///   Bases 8 and 16 do not add leading '0' and '0x' accordingly.
1136     ///   If necessary you should add it yourself.
1137     /// @return
1138     ///   - If conversion succeeds, set errno to zero and return the
1139     ///     converted string value.
1140     ///   - Otherwise, set errno to non-zero and return empty string.
1141     static string SizetToString(size_t value,
1142                                 TNumToStringFlags flags = 0,
1143                                 int               base  = 10);
1144 
1145     /// Convert pointer to string.
1146     ///
1147     /// @param out_str
1148     ///   Output string variable.
1149     /// @param str
1150     ///   Pointer to be converted.
1151     /// @note
1152     ///   - If conversion succeeds, set errno to zero and return the
1153     ///     converted string value in 'out_str'.
1154     ///   - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
1155     static void PtrToString(string& out_str, const void* ptr);
1156 
1157     /// Convert pointer to string.
1158     ///
1159     /// @param str
1160     ///   Pointer to be converted.
1161     /// @return
1162     ///   - If conversion succeeds, set errno to zero and return the
1163     ///     converted string value representing the pointer.
1164     ///   - Otherwise, set errno to non-zero and return empty string.
1165     static string PtrToString(const void* ptr);
1166 
1167     /// Convert bool to string.
1168     ///
1169     /// @param value
1170     ///   Boolean value to be converted.
1171     /// @return
1172     ///   One of: 'true, 'false'
1173     /// @note
1174     ///   Don't change errno.
1175     static const string BoolToString(bool value);
1176 
1177     /// Convert string to bool.
1178     ///
1179     /// @param str
1180     ///   Boolean string value to be converted.  Can recognize
1181     ///   case-insensitive version as one of:
1182     ///   TRUE  - 'true, 't', 'yes', 'y', '1';
1183     ///   FALSE - 'false', 'f', 'no', 'n', '0'.
1184     /// @return
1185     ///   - If conversion succeeds, set errno to zero and return TRUE or FALSE.
1186     ///   - Otherwise, set errno to non-zero and throw an exception.
1187     static bool StringToBool(const CTempString str);
1188 
1189 
1190     /// Handle an arbitrary printf-style format string.
1191     ///
1192     /// This method exists only to support third-party code that insists on
1193     /// representing messages in this format; please stick to type-checked
1194     /// means of formatting such as the above ToString methods and I/O
1195     /// streams whenever possible.
1196     static string FormatVarargs(const char* format, va_list args);
1197 
1198 
1199     /// Which type of string comparison.
1200     enum ECase {
1201         eCase,      ///< Case sensitive compare
1202         eNocase     ///< Case insensitive compare
1203     };
1204 
1205     // ATTENTION.  Be aware that:
1206     //
1207     // 1) "Compare***(..., SIZE_TYPE pos, SIZE_TYPE n, ...)" functions
1208     //    follow the ANSI C++ comparison rules a la "basic_string::compare()":
1209     //       s1[pos:pos+n) == s2   --> return 0
1210     //       s1[pos:pos+n) <  s2   --> return negative value
1211     //       s1[pos:pos+n) >  s2   --> return positive value
1212     //
1213     // 2) "strn[case]cmp()" functions follow the ANSI C comparison rules:
1214     //       s1[0:n) == s2[0:n)   --> return 0
1215     //       s1[0:n) <  s2[0:n)   --> return negative value
1216     //       s1[0:n) >  s2[0:n)   --> return positive value
1217 
1218 
1219     /// Case-sensitive compare of a substring with another string.
1220     ///
1221     /// @param s1
1222     ///   String containing the substring to be compared.
1223     /// @param pos
1224     ///   Start position of substring to be compared.
1225     /// @param n
1226     ///   Number of characters in substring to be compared.
1227     /// @param s2
1228     ///   String (char*) to be compared with substring.
1229     /// @return
1230     ///   - 0, if s1[pos:pos+n) == s2;
1231     ///   - Negative integer, if s1[pos:pos+n) <  s2;
1232     ///   - Positive integer, if s1[pos:pos+n) >  s2.
1233     /// @sa
1234     ///   Other forms of overloaded CompareCase() with differences in argument
1235     ///   types: char* vs. CTempString[Ex]
1236     static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1237                            const char* s2);
1238 
1239     /// Case-sensitive compare of a substring with another string.
1240     ///
1241     /// @param s1
1242     ///   String containing the substring to be compared.
1243     /// @param pos
1244     ///   Start position of substring to be compared.
1245     /// @param n
1246     ///   Number of characters in substring to be compared.
1247     /// @param s2
1248     ///   String to be compared with substring.
1249     /// @return
1250     ///   - 0, if s1[pos:pos+n) == s2;
1251     ///   - Negative integer, if s1[pos:pos+n) <  s2;
1252     ///   - Positive integer, if s1[pos:pos+n) >  s2.
1253     /// @sa
1254     ///   Other forms of overloaded CompareCase() with differences in argument
1255     ///   types: char* vs. CTempString[Ex]
1256     static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1257                            const CTempString s2);
1258 
1259     /// Case-sensitive compare of two strings -- char* version.
1260     ///
1261     /// @param s1
1262     ///   String to be compared -- operand 1.
1263     /// @param s2
1264     ///   String to be compared -- operand 2.
1265     /// @return
1266     ///   - 0, if s1 == s2;
1267     ///   - Negative integer, if s1 < s2;
1268     ///   - Positive integer, if s1 > s2.
1269     /// @sa
1270     ///   CompareNocase(), Compare() versions with same argument types.
1271     static int CompareCase(const char* s1, const char* s2);
1272 
1273     /// Case-sensitive compare of two strings -- CTempStringEx version.
1274     ///
1275     /// @param s1
1276     ///   String to be compared -- operand 1.
1277     /// @param s2
1278     ///   String to be compared -- operand 2.
1279     /// @return
1280     ///   - 0, if s1 == s2;
1281     ///   - Negative integer, if s1 < s2;
1282     ///   - Positive integer, if s1 > s2.
1283     /// @sa
1284     ///   CompareNocase(), Compare() versions with same argument types.
1285     static int CompareCase(const CTempStringEx s1, const CTempStringEx s2);
1286 
1287     /// Case-insensitive compare of a substring with another string.
1288     ///
1289     /// @param s1
1290     ///   String containing the substring to be compared.
1291     /// @param pos
1292     ///   Start position of substring to be compared.
1293     /// @param n
1294     ///   Number of characters in substring to be compared.
1295     /// @param s2
1296     ///   String (char*) to be compared with substring.
1297     /// @return
1298     ///   - 0, if s1[pos:pos+n) == s2 (case-insensitive compare);
1299     ///   - Negative integer, if s1[pos:pos+n) < s2 (case-insensitive compare);
1300     ///   - Positive integer, if s1[pos:pos+n) > s2 (case-insensitive compare).
1301     /// @sa
1302     ///   Other forms of overloaded CompareNocase() with differences in
1303     ///   argument types: char* vs. CTempString[Ex]
1304     static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1305                              const char* s2);
1306 
1307     /// Case-insensitive compare of a substring with another string.
1308     ///
1309     /// @param s1
1310     ///   String containing the substring to be compared.
1311     /// @param pos
1312     ///   Start position of substring to be compared.
1313     /// @param n
1314     ///   Number of characters in substring to be compared.
1315     /// @param s2
1316     ///   String to be compared with substring.
1317     /// @return
1318     ///   - 0, if s1[pos:pos+n) == s2 (case-insensitive compare);
1319     ///   - Negative integer, if s1[pos:pos+n) < s2 (case-insensitive compare);
1320     ///   - Positive integer, if s1[pos:pos+n) > s2 (case-insensitive compare).
1321     /// @sa
1322     ///   Other forms of overloaded CompareNocase() with differences in
1323     ///   argument types: char* vs. CTempString[Ex]
1324     static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1325                              const CTempString s2);
1326 
1327     /// Case-insensitive compare of two strings -- char* version.
1328     ///
1329     /// @param s1
1330     ///   String to be compared -- operand 1.
1331     /// @param s2
1332     ///   String to be compared -- operand 2.
1333     /// @return
1334     ///   - 0, if s1 == s2 (case-insensitive compare);
1335     ///   - Negative integer, if s1 < s2 (case-insensitive compare);
1336     ///   - Positive integer, if s1 > s2 (case-insensitive compare).
1337     /// @sa
1338     ///   CompareCase(), Compare() versions with same argument types.
1339     static int CompareNocase(const char* s1, const char* s2);
1340 
1341     /// Case-insensitive compare of two strings -- CTempStringEx version.
1342     ///
1343     /// @param s1
1344     ///   String to be compared -- operand 1.
1345     /// @param s2
1346     ///   String to be compared -- operand 2.
1347     /// @return
1348     ///   - 0, if s1 == s2 (case-insensitive compare);
1349     ///   - Negative integer, if s1 < s2 (case-insensitive compare);
1350     ///   - Positive integer, if s1 > s2 (case-insensitive compare).
1351     /// @sa
1352     ///   CompareCase(), Compare() versions with same argument types.
1353     static int CompareNocase(const CTempStringEx s1, const CTempStringEx s2);
1354 
1355     /// Compare of a substring with another string.
1356     ///
1357     /// @param s1
1358     ///   String containing the substring to be compared.
1359     /// @param pos
1360     ///   Start position of substring to be compared.
1361     /// @param n
1362     ///   Number of characters in substring to be compared.
1363     /// @param s2
1364     ///   String (char*) to be compared with substring.
1365     /// @param use_case
1366     ///   Whether to do a case sensitive compare(eCase -- default), or a
1367     ///   case-insensitive compare (eNocase).
1368     /// @return
1369     ///   - 0, if s1[pos:pos+n) == s2;
1370     ///   - Negative integer, if s1[pos:pos+n) < s2;
1371     ///   - Positive integer, if s1[pos:pos+n) > s2.
1372     /// @sa
1373     ///   Other forms of overloaded Compare() with differences in argument
1374     ///   types: char* vs. CTempString[Ex]
1375     static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1376                        const char* s2, ECase use_case = eCase);
1377 
1378     /// Compare of a substring with another string.
1379     ///
1380     /// @param s1
1381     ///   String containing the substring to be compared.
1382     /// @param pos
1383     ///   Start position of substring to be compared.
1384     /// @param n
1385     ///   Number of characters in substring to be compared.
1386     /// @param s2
1387     ///   String to be compared with substring.
1388     /// @param use_case
1389     ///   Whether to do a case sensitive compare(default is eCase), or a
1390     ///   case-insensitive compare (eNocase).
1391     /// @return
1392     ///   - 0, if s1[pos:pos+n) == s2;
1393     ///   - Negative integer, if s1pos:pos+n) < s2;
1394     ///   - Positive integer, if s1[pos:pos+n) > s2.
1395     /// @sa
1396     ///   Other forms of overloaded Compare() with differences in argument
1397     ///   types: char* vs. CTempString[Ex]
1398     static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1399                        const CTempString s2, ECase use_case = eCase);
1400 
1401     /// Compare two strings -- char* version.
1402     ///
1403     /// @param s1
1404     ///   String to be compared -- operand 1.
1405     /// @param s2
1406     ///   String to be compared -- operand 2.
1407     /// @param use_case
1408     ///   Whether to do a case sensitive compare(default is eCase), or a
1409     ///   case-insensitive compare (eNocase).
1410     /// @return
1411     ///   - 0, if s1 == s2.
1412     ///   - Negative integer, if s1 < s2.
1413     ///   - Positive integer, if s1 > s2.
1414     /// @sa
1415     ///   Other forms of overloaded Compare() with differences in argument
1416     ///   types: char* vs. CTempString[Ex]
1417     static int Compare(const char* s1, const char* s2,
1418                        ECase use_case = eCase);
1419 
1420     /// Compare two strings -- CTempStringEx version.
1421     ///
1422     /// @param s1
1423     ///   String to be compared -- operand 1.
1424     /// @param s2
1425     ///   String to be compared -- operand 2.
1426     /// @param use_case
1427     ///   Whether to do a case sensitive compare(default is eCase), or a
1428     ///   case-insensitive compare (eNocase).
1429     /// @return
1430     ///   - 0, if s1 == s2;
1431     ///   - Negative integer, if s1 < s2;
1432     ///   - Positive integer, if s1 > s2.
1433     /// @sa
1434     ///   Other forms of overloaded Compare() with differences in argument
1435     ///   types: char* vs. CTempString[Ex]
1436     static int Compare(const CTempStringEx s1, const CTempStringEx s2,
1437                        ECase use_case = eCase);
1438 
1439     /// Case-sensitive equality of a substring with another string.
1440     ///
1441     /// @param s1
1442     ///   String containing the substring to be compared.
1443     /// @param pos
1444     ///   Start position of substring to be compared.
1445     /// @param n
1446     ///   Number of characters in substring to be compared.
1447     /// @param s2
1448     ///   String (char*) to be compared with substring.
1449     /// @return
1450     ///   - true, if s1[pos:pos+n) equals s2;
1451     ///   - false, otherwise
1452     /// @sa
1453     ///   Other forms of overloaded EqualCase() with differences in argument
1454     ///   types: char* vs. CTempString[Ex]
1455     static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1456                           const char* s2);
1457 
1458     /// Case-sensitive equality of a substring with another string.
1459     ///
1460     /// @param s1
1461     ///   String containing the substring to be compared.
1462     /// @param pos
1463     ///   Start position of substring to be compared.
1464     /// @param n
1465     ///   Number of characters in substring to be compared.
1466     /// @param s2
1467     ///   String to be compared with substring.
1468     /// @return
1469     ///   - true, if s1[pos:pos+n) equals s2;
1470     ///   - false, otherwise
1471     /// @sa
1472     ///   Other forms of overloaded EqualCase() with differences in argument
1473     ///   types: char* vs. CTempString[Ex]
1474     static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1475                           const CTempString s2);
1476 
1477     /// Case-sensitive equality of two strings -- char* version.
1478     ///
1479     /// @param s1
1480     ///   String to be compared -- operand 1.
1481     /// @param s2
1482     ///   String to be compared -- operand 2.
1483     /// @return
1484     ///   - true, if s1 equals s2
1485     ///   - false, otherwise
1486     /// @sa
1487     ///   EqualCase(), Equal() versions with same argument types.
1488     static bool EqualCase(const char* s1, const char* s2);
1489 
1490     /// Case-sensitive equality of two strings.
1491     ///
1492     /// @param s1
1493     ///   String to be compared -- operand 1.
1494     /// @param s2
1495     ///   String to be compared -- operand 2.
1496     /// @return
1497     ///   - true, if s1 equals s2
1498     ///   - false, otherwise
1499     /// @sa
1500     ///   EqualCase(), Equal() versions with same argument types.
1501     static bool EqualCase(const CTempStringEx s1, const CTempStringEx s2);
1502 
1503     /// Case-insensitive equality of a substring with another string.
1504     ///
1505     /// @param s1
1506     ///   String containing the substring to be compared.
1507     /// @param pos
1508     ///   Start position of substring to be compared.
1509     /// @param n
1510     ///   Number of characters in substring to be compared.
1511     /// @param s2
1512     ///   String (char*) to be compared with substring.
1513     /// @return
1514     ///   - true, if s1[pos:pos+n) equals s2 (case-insensitive compare);
1515     ///   - false, otherwise.
1516     /// @sa
1517     ///   Other forms of overloaded EqualNocase() with differences in
1518     ///   argument types: char* vs. CTempString[Ex]
1519     static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1520                             const char* s2);
1521 
1522     /// Case-insensitive equality of a substring with another string.
1523     ///
1524     /// @param s1
1525     ///   String containing the substring to be compared.
1526     /// @param pos
1527     ///   Start position of substring to be compared.
1528     /// @param n
1529     ///   Number of characters in substring to be compared.
1530     /// @param s2
1531     ///   String to be compared with substring.
1532     /// @return
1533     ///   - true, if s1[pos:pos+n) equals s2 (case-insensitive compare);
1534     ///   - false, otherwise.
1535     /// @sa
1536     ///   Other forms of overloaded EqualNocase() with differences in
1537     ///   argument types: char* vs. CTempString[Ex]
1538     static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1539                             const CTempString s2);
1540 
1541     /// Case-insensitive equality of two strings -- char* version.
1542     ///
1543     /// @param s1
1544     ///   String to be compared -- operand 1.
1545     /// @param s2
1546     ///   String to be compared -- operand 2.
1547     /// @return
1548     ///   - true, if s1 equals s2 (case-insensitive compare);
1549     ///   - false, otherwise.
1550     /// @sa
1551     ///   EqualCase(), Equal() versions with same argument types.
1552     static bool EqualNocase(const char* s1, const char* s2);
1553 
1554     /// Case-insensitive equality of two strings.
1555     ///
1556     /// @param s1
1557     ///   String to be compared -- operand 1.
1558     /// @param s2
1559     ///   String to be compared -- operand 2.
1560     /// @return
1561     ///   - true, if s1 equals s2 (case-insensitive compare);
1562     ///   - false, otherwise.
1563     /// @sa
1564     ///   EqualCase(), Equal() versions with same argument types.
1565     static bool EqualNocase(const CTempStringEx s1, const CTempStringEx s2);
1566 
1567     /// Test for equality of a substring with another string.
1568     ///
1569     /// @param s1
1570     ///   String containing the substring to be compared.
1571     /// @param pos
1572     ///   Start position of substring to be compared.
1573     /// @param n
1574     ///   Number of characters in substring to be compared.
1575     /// @param s2
1576     ///   String (char*) to be compared with substring.
1577     /// @param use_case
1578     ///   Whether to do a case sensitive compare(eCase -- default), or a
1579     ///   case-insensitive compare (eNocase).
1580     /// @return
1581     ///   - true, if s1[pos:pos+n) equals s2;
1582     ///   - false, otherwise.
1583     /// @sa
1584     ///   Other forms of overloaded Equal() with differences in argument
1585     ///   types: char* vs. CTempString[Ex]
1586     static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1587                       const char* s2, ECase use_case = eCase);
1588 
1589     /// Test for equality of a substring with another string.
1590     ///
1591     /// @param s1
1592     ///   String containing the substring to be compared.
1593     /// @param pos
1594     ///   Start position of substring to be compared.
1595     /// @param n
1596     ///   Number of characters in substring to be compared.
1597     /// @param s2
1598     ///   String to be compared with substring.
1599     /// @param use_case
1600     ///   Whether to do a case sensitive compare (default is eCase), or a
1601     ///   case-insensitive compare (eNocase).
1602     /// @return
1603     ///   - 0, if s1[pos:pos+n) == s2;
1604     ///   - Negative integer, if s1[pos:pos+n) < s2;
1605     ///   - Positive integer, if s1[pos:pos+n) > s2.
1606     /// @sa
1607     ///   Other forms of overloaded Equal() with differences in argument
1608     ///   types: char* vs. CTempString[Ex]
1609     static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1610                       const CTempString s2, ECase use_case = eCase);
1611 
1612     /// Test for equality of two strings -- char* version.
1613     ///
1614     /// @param s1
1615     ///   String to be compared -- operand 1.
1616     /// @param s2
1617     ///   String to be compared -- operand 2.
1618     /// @param use_case
1619     ///   Whether to do a case sensitive compare (default is eCase), or a
1620     ///   case-insensitive compare (eNocase).
1621     /// @return
1622     ///   - 0, if s1 == s2;
1623     ///   - Negative integer, if s1 < s2;
1624     ///   - Positive integer, if s1 > s2.
1625     /// @sa
1626     ///   EqualNocase(), Equal() versions with similar argument types.
1627     static bool Equal(const char* s1, const char* s2,
1628                       ECase use_case = eCase);
1629 
1630     /// Test for equality of two strings.
1631     ///
1632     /// @param s1
1633     ///   String to be compared -- operand 1.
1634     /// @param s2
1635     ///   String to be compared -- operand 2.
1636     /// @param use_case
1637     ///   Whether to do a case sensitive compare (default is eCase), or a
1638     ///   case-insensitive compare (eNocase).
1639     /// @return
1640     ///   - true, if s1 equals s2;
1641     ///   - false, otherwise.
1642     /// @sa
1643     ///   EqualNocase(), Equal() versions with similar argument types.
1644     static bool Equal(const CTempStringEx s1, const CTempStringEx s2,
1645                       ECase use_case = eCase);
1646 
1647     // NOTE.  On some platforms, "strn[case]cmp()" can work faster than their
1648     //        "Compare***()" counterparts.
1649 
1650     /// String compare.
1651     ///
1652     /// @param s1
1653     ///   String to be compared -- operand 1.
1654     /// @param s2
1655     ///   String to be compared -- operand 2.
1656     /// @return
1657     ///   - 0, if s1 == s2;
1658     ///   - Negative integer, if s1 < s2;
1659     ///   - Positive integer, if s1 > s2.
1660     /// @sa
1661     ///   strncmp(), strcasecmp(), strncasecmp()
1662     static int strcmp(const char* s1, const char* s2);
1663 
1664     /// String compare up to specified number of characters.
1665     ///
1666     /// @param s1
1667     ///   String to be compared -- operand 1.
1668     /// @param s2
1669     ///   String to be compared -- operand 2.
1670     /// @param n
1671     ///   Number of characters in string
1672     /// @return
1673     ///   - 0, if s1 == s2;
1674     ///   - Negative integer, if s1 < s2;
1675     ///   - Positive integer, if s1 > s2.
1676     /// @sa
1677     ///   strcmp(), strcasecmp(), strncasecmp()
1678     static int strncmp(const char* s1, const char* s2, size_t n);
1679 
1680     /// Case-insensitive comparison of two zero-terminated strings.
1681     ///
1682     /// @param s1
1683     ///   String to be compared -- operand 1.
1684     /// @param s2
1685     ///   String to be compared -- operand 2.
1686     /// @return
1687     ///   - 0, if s1 == s2;
1688     ///   - Negative integer, if s1 < s2;
1689     ///   - Positive integer, if s1 > s2.
1690     /// @sa
1691     ///   strcmp(), strncmp(), strncasecmp()
1692     static int strcasecmp(const char* s1, const char* s2);
1693 
1694     /// Case-insensitive comparison of two zero-terminated strings,
1695     /// narrowed to the specified number of characters.
1696     ///
1697     /// @param s1
1698     ///   String to be compared -- operand 1.
1699     /// @param s2
1700     ///   String to be compared -- operand 2.
1701     /// @return
1702     ///   - 0, if s1 == s2;
1703     ///   - Negative integer, if s1 < s2.
1704     ///   - Positive integer, if s1 > s2.
1705     /// @sa
1706     ///   strcmp(), strcasecmp(), strcasecmp()
1707     static int strncasecmp(const char* s1, const char* s2, size_t n);
1708 
1709     /// Wrapper for the function strftime() that corrects handling %D and %T
1710     /// time formats on MS Windows.
1711     static size_t strftime(char* s, size_t maxsize, const char* format,
1712                            const struct tm* timeptr);
1713 
1714     /// Match "str" against the "mask".
1715     ///
1716     /// This function does not use regular expressions.
1717     /// Very similar to fnmatch(3), but there are differences (see also glob(7)).
1718     /// There's no special treatment for a slash character '/' in this call.
1719     ///
1720     /// @param str
1721     ///   String to match.
1722     /// @param mask
1723     ///   Mask used to match string "str".
1724     ///   This is a text pattern, which, along ordinary characters that must match
1725     ///   literally corresponding symbols in the string "str", can contains also
1726     ///   mext wildcard characters: \n
1727     ///     ? - matches to any single character in the string. \n
1728     ///     * - matches to any number of characters in the string (including none). \n
1729     ///
1730     ///   Mask also support POSIX character classes in the forms of "[...]" or "[!...]"
1731     ///   that must MATCH or NOT MATCH, respectively, a single character in "str".
1732     ///   To cancel the special meaning of '*', '?' or '[', they can be prepended with
1733     ///   a backslash '\\' (the backslash in front of other characters does not change
1734     ///   their meaning, so "\\\\" matches one graphical backslash in the "str").
1735     ///   Within a character class, to have its literal meaning a closing square bracket ']'
1736     ///   must be used at the first position, whereas '?', '*', '[, and '\\' stand
1737     ///   just for themselves. Two characters separated by a minus sign '-' denote
1738     ///   a range that can be used for contraction to include all characters in
1739     ///   between:  "[A-F]" is equivalent to "[ABCDEF]".
1740     ///   For its literal meaning, the minus sign '-' can be used either at the very
1741     ///   first position, or the last position before the closing bracket ']'.
1742     ///   To have a range that begins with an exclamation point, one has to use
1743     ///   a dummy empty range followed by that range with '!'.
1744     ///
1745     ///   Examples:
1746     ///     "!"        matches a single '!' (note that just "[!]" is invalid);
1747     ///     "[!!]"     matches any character, which is not an exclamation point '!';
1748     ///     "[][!]"    matches ']', '[', and '!';
1749     ///     "[!][-]"   matches any character except for ']', '[', and '-';
1750     ///     "[-]"      matches a minus sign '-' (same as '-' just by itself);
1751     ///     "[?*\\]"   matches either '?', or '*', or a backslash '\\';
1752     ///     "[]-\\]"   matches nothing as it defines an empty range (from ']' to '\\');
1753     ///     "\\[a]\\*" matches a literal substring "[a]*";
1754     ///     "[![a-]"   matches any char but '[', 'a' or '-' (same as "[!-[a]"; but not
1755     ///                "[![-a]", which defines an empty range, thus matches any char!);
1756     ///     "[]A]"     matches either ']' or 'A' (NB: "[A]]" matches a substring "A]");
1757     ///     "[0-9-]"   matches any decimal digit or a minus sign '-' (same: "[-0-9]");
1758     ///     "[9-0!-$]" matches '!', '"', '#', and '$' (as first range matches nothing).
1759     ///
1760     /// @note
1761     ///   In the above, each double backslash denotes a single graphical backslash
1762     ///   character (C string notation is used).
1763     /// @note
1764     ///   Unlike shell globbing, "[--0]" *does* match the slash character '/'
1765     ///  (along with '-', '.', and '0' that all fall within the range).
1766     /// @param use_case
1767     ///   Whether to do a case sensitive compare for letters (eCase -- default),
1768     ///   or a case-insensitive compare (eNocase).
1769     /// @return
1770     ///   Return TRUE if "str" matches "mask", and FALSE otherwise
1771     ///  (including patter errors).
1772     /// @sa
1773     ///    CRegexp, CRegexpUtil
1774     ///
1775     static bool MatchesMask(CTempString str, CTempString mask, ECase use_case = eCase);
1776 
1777     /// Check if a string is blank (has no text).
1778     ///
1779     /// @param str
1780     ///   String to check.
1781     /// @param pos
1782     ///   starting position (default 0)
1783     static bool IsBlank(const CTempString str, SIZE_TYPE pos = 0);
1784 
1785     /// Checks if all letters in the given string have a lower case.
1786     ///
1787     /// @param str
1788     ///   String to be checked.
1789     /// @return
1790     ///   TRUE if all letter characters in the string are lowercase
1791     ///   according to the current C locale (std::islower()).
1792     ///   All non-letter characters will be ignored.
1793     ///   TRUE if empty or no letters.
1794     static bool IsLower(const CTempString str);
1795 
1796     /// Checks if all letters in the given string have a upper case.
1797     ///
1798     /// @param str
1799     ///   String to be checked.
1800     /// @return
1801     ///   TRUE if all letter characters in the string are uppercase
1802     ///   according to the current C locale (std::isupper()).
1803     ///   All non-letter characters will be skipped.
1804     ///   TRUE if empty or no letters.
1805     static bool IsUpper(const CTempString str);
1806 
1807 
1808     // The following 4 methods change the passed string, then return it
1809 
1810     /// Convert string to lower case -- string& version.
1811     ///
1812     /// @param str
1813     ///   String to be converted.
1814     /// @return
1815     ///   Lower cased string.
1816     static string& ToLower(string& str);
1817 
1818     /// Convert string to lower case -- char* version.
1819     ///
1820     /// @param str
1821     ///   String to be converted.
1822     /// @return
1823     ///   Lower cased string.
1824     static char* ToLower(char* str);
1825 
1826     /// Convert string to upper case -- string& version.
1827     ///
1828     /// @param str
1829     ///   String to be converted.
1830     /// @return
1831     ///   Upper cased string.
1832     static string& ToUpper(string& str);
1833 
1834     /// Convert string to upper case -- char* version.
1835     ///
1836     /// @param str
1837     ///   String to be converted.
1838     /// @return
1839     ///   Upper cased string.
1840     static char* ToUpper(char* str);
1841 
1842 private:
1843     /// Privatized ToLower() with const char* parameter to prevent passing of
1844     /// constant strings.
1845     static void/*dummy*/ ToLower(const char* /*dummy*/);
1846 
1847     /// Privatized ToUpper() with const char* parameter to prevent passing of
1848     /// constant strings.
1849     static void/*dummy*/ ToUpper(const char* /*dummy*/);
1850 
1851 public:
1852 
1853     /// Check if a string starts with a specified prefix value.
1854     ///
1855     /// @param str
1856     ///   String to check.
1857     /// @param start
1858     ///   Prefix value to check for.
1859     /// @param use_case
1860     ///   Whether to do a case sensitive compare(default is eCase), or a
1861     ///   case-insensitive compare (eNocase) while checking.
1862     static bool StartsWith(const CTempString str, const CTempString start,
1863                            ECase use_case = eCase);
1864 
1865     /// Check if a string starts with a specified character value.
1866     ///
1867     /// @param str
1868     ///   String to check.
1869     /// @param start
1870     ///   Character value to check for.
1871     /// @param use_case
1872     ///   Whether to do a case sensitive compare(default is eCase), or a
1873     ///   case-insensitive compare (eNocase) while checking.
1874     static bool StartsWith(const CTempString str, char start,
1875                            ECase use_case = eCase);
1876 
1877     /// Check if a string ends with a specified suffix value.
1878     ///
1879     /// @param str
1880     ///   String to check.
1881     /// @param end
1882     ///   Suffix value to check for.
1883     /// @param use_case
1884     ///   Whether to do a case sensitive compare(default is eCase), or a
1885     ///   case-insensitive compare (eNocase) while checking.
1886     static bool EndsWith(const CTempString str, const CTempString end,
1887                          ECase use_case = eCase);
1888 
1889     /// Check if a string ends with a specified character value.
1890     ///
1891     /// @param str
1892     ///   String to check.
1893     /// @param end
1894     ///   Character value to check for.
1895     /// @param use_case
1896     ///   Whether to do a case sensitive compare(default is eCase), or a
1897     ///   case-insensitive compare (eNocase) while checking.
1898     static bool EndsWith(const CTempString str, char end,
1899                          ECase use_case = eCase);
1900 
1901     /// Determine the common prefix of two strings.
1902     ///
1903     /// @param s1
1904     ///   String to be compared -- operand 1.
1905     /// @param s2
1906     ///   String to be compared -- operand 2.
1907     /// @return
1908     ///   The number of characters common to the start of each string.
1909     static SIZE_TYPE CommonPrefixSize(const CTempString s1, const CTempString s2);
1910 
1911     /// Determine the common suffix of two strings.
1912     ///
1913     /// @param s1
1914     ///   String to be compared -- operand 1.
1915     /// @param s2
1916     ///   String to be compared -- operand 2.
1917     /// @return
1918     ///   The number of characters common to the end of each string.
1919     static SIZE_TYPE CommonSuffixSize(const CTempString s1, const CTempString s2);
1920 
1921     /// Determine if the suffix of one string is the prefix of another.
1922     ///
1923     /// @param s1
1924     ///   String to be compared -- operand 1.
1925     /// @param s2
1926     ///   String to be compared -- operand 2.
1927     /// @return
1928     ///   The number of characters common to the end of the first string
1929     ///   and the start of the second string.
1930     static SIZE_TYPE CommonOverlapSize(const CTempString s1, const CTempString s2);
1931 
1932 
1933     /// Whether it is the first or last occurrence.
1934     /// @deprecated
1935     enum EOccurrence {
1936         eFirst,             ///< First occurrence
1937         eLast               ///< Last occurrence
1938     };
1939 
1940     /// Search direction for Find() methods.
1941     enum EDirection {
1942         eForwardSearch = 0,  ///< Search in a forward direction
1943         eReverseSearch       ///< Search in a backward direction
1944     };
1945 
1946 
1947     /// Find the pattern in the string.
1948     ///
1949     /// @param str
1950     ///   String to search.
1951     /// @param pattern
1952     ///   Pattern to search for in "str".
1953     /// @param use_case
1954     ///   Whether to do a case sensitive compare (default is eCase), or a
1955     ///   case-insensitive compare (eNocase) while searching for the pattern.
1956     /// @param direction
1957     ///   Define a search direction of the requested "occurrence"
1958     ///   of "pattern" in "str".
1959     /// @param occurrence
1960     ///   Which occurrence of the pattern in the string to use (zero-based).
1961     ///   NOTE:  When an occurrence is found the next occurrence will be
1962     ///          searched for starting right *after* the found pattern.
1963     /// @return
1964     ///   Start of the found pattern in the string.
1965     ///   Or NPOS if there is no occurrence of the pattern in the string.
1966     static SIZE_TYPE Find(const CTempString str,
1967                           const CTempString pattern,
1968                           ECase             use_case   = eCase,
1969                           EDirection        direction  = eForwardSearch,
1970                           SIZE_TYPE         occurrence = 0);
1971 
1972     /// Find the pattern in the specified range of a string.
1973     ///
1974     /// @param str
1975     ///   String to search.
1976     /// @param pattern
1977     ///   Pattern to search for in "str".
1978     /// @param start
1979     ///   Position in "str" to start search from.
1980     ///   0 means start the search from the beginning of the string.
1981     /// @param end
1982     ///   Position in "str" to perform search up to.
1983     ///   NPOS means to search to the end of the string.
1984     /// @param which
1985     ///   When set to eFirst, this means to find the first occurrence of
1986     ///   "pattern" in "str". When set to eLast, this means to find the last
1987     ///    occurrence of "pattern" in "str".
1988     /// @param use_case
1989     ///   Whether to do a case sensitive compare (default is eCase), or a
1990     ///   case-insensitive compare (eNocase) while searching for the pattern.
1991     /// @return
1992     ///   - The start of the first or last (depending on "which" parameter)
1993     ///     occurrence of "pattern" in "str", within the string interval
1994     ///     ["start", "end"], or
1995     ///   - NPOS if there is no occurrence of the pattern.
1996     /// @sa FindCase, FindNoCase, FindWord
1997     ///
1998     /// @deprecated
1999     ///   Use
2000     ///   @code
2001     ///       Find(str, pattern, [use_case], [direction], [occurrence])
2002     ///   @endcode
2003     ///   method instead.
2004     ///   For example:
2005     ///   @code
2006     ///       Find(str, pattern, 0, NPOS, eLast, eCase)
2007     ///   @endcode
2008     ///   can be replaced by
2009     ///   @code
2010     ///       Find(str, pattern, eCase, eReverseSearch, /* 0 */)
2011     ///   @endcode
2012     ///   If you doing a search on a substring of the 'str' and ["start", "end"] search
2013     ///   interval is not a default [0, NPOS], that mean a whole 'str' string, you may
2014     ///   need to pass a substring instead of 'str', like
2015     ///   @code
2016     ///       Find(CTempString(str, start, len), pattern, ....)
2017     ///   @endcode
2018     ///  and after checking search result on NPOS, adjust it by 'start' yourself.
2019     NCBI_DEPRECATED
2020     static SIZE_TYPE Find(const CTempString str,
2021                           const CTempString pattern,
2022                           SIZE_TYPE   start, SIZE_TYPE end,
2023                           EOccurrence which = eFirst,
2024                           ECase       use_case = eCase);
2025 
2026     /// Wrapper for backward-compatibility
2027     inline
Find(const CTempString str,const CTempString pattern,SIZE_TYPE start)2028     static SIZE_TYPE Find(const CTempString str, const CTempString pattern, SIZE_TYPE start)
2029         { return FindCase(str, pattern, start); }
2030 
2031 
2032     /// Find the pattern in the specified range of a string using a case
2033     /// sensitive search.
2034     ///
2035     /// @param str
2036     ///   String to search.
2037     /// @param pattern
2038     ///   Pattern to search for in "str".
2039     /// @param start
2040     ///   Position in "str" to start search from -- default of 0 means start
2041     ///   the search from the beginning of the string.
2042     /// @param end
2043     ///   Position in "str" to perform search up to -- default of NPOS means
2044     ///   to search to the end of the string.
2045     /// @param which
2046     ///   When set to eFirst, this means to find the first occurrence of
2047     ///   "pattern" in "str". When set to eLast, this means to find the last
2048     ///    occurrence of "pattern" in "str".
2049     /// @return
2050     ///   - The start of the first or last (depending on "which" parameter)
2051     ///     occurrence of "pattern" in "str", within the string interval
2052     ///     ["start", "end"], or
2053     ///   - NPOS if there is no occurrence of the pattern.
2054     /// @sa Find
2055     ///
2056     /// @deprecated
2057     ///   Use Find() method without [start:end] range.
2058     /// @deprecated
2059     ///   Use one of the next methods instead:
2060     ///   @code
2061     ///       Find(str, pattern, [use_case], [direction], [occurrence])
2062     ///       FindCase(str, pattern, [start])
2063     ///   @endcode
2064     ///   For example:
2065     ///   @code
2066     ///       FindCase(str, pattern, 0, NPOS, eLast)
2067     ///   @endcode
2068     ///   can be replaced by
2069     ///   @code
2070     ///       Find(str, pattern, eCase, eReverseSearch, /* 0 */)
2071     ///   @endcode
2072     ///   For simpler cases without range, or with default [0, NPOS] please use
2073     ///   @code
2074     ///       FindCase(str, pattern, [start])
2075     ///   @endcode
2076     ///   But if you doing a search on a substring of the 'str' and ["start", "end"] search
2077     ///   interval is not a default [0, NPOS], that mean a whole 'str' string, you may
2078     ///   need to pass a substring instead of 'str', like
2079     ///   @code
2080     ///       FindCase(CTempString(str, start, len), pattern, ....)
2081     ///   @endcode
2082     ///  and after checking search result on NPOS, adjust it by 'start' yourself.
2083     NCBI_DEPRECATED
2084     static SIZE_TYPE FindCase(const CTempString str,
2085                               const CTempString pattern,
2086                               SIZE_TYPE   start, SIZE_TYPE end,
2087                               EOccurrence which = eFirst);
2088 
2089     /// Wrappers for backward-compatibility
2090     static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern);
2091     static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern, SIZE_TYPE start);
2092 
2093     /// Find the pattern in the specified range of a string using a case
2094     /// insensitive search.
2095     ///
2096     /// @param str
2097     ///   String to search.
2098     /// @param pattern
2099     ///   Pattern to search for in "str".
2100     /// @param start
2101     ///   Position in "str" to start search from -- default of 0 means start
2102     ///   the search from the beginning of the string.
2103     /// @param end
2104     ///   Position in "str" to perform search up to -- default of NPOS means
2105     ///   to search to the end of the string.
2106     /// @param which
2107     ///   When set to eFirst, this means to find the first occurrence of
2108     ///   "pattern" in "str". When set to eLast, this means to find the last
2109     ///    occurrence of "pattern" in "str".
2110     /// @return
2111     ///   - The start of the first or last (depending on "which" parameter)
2112     ///     occurrence of "pattern" in "str", within the string interval
2113     ///     ["start", "end"], or
2114     ///   - NPOS if there is no occurrence of the pattern.
2115     /// @sa Find
2116     ///
2117     /// @deprecated
2118     ///   Use one of the next methods instead:
2119     ///   @code
2120     ///       Find(str, pattern, [use_case], [direction], [occurrence])
2121     ///       FindNoCase(str, pattern, [start])
2122     ///   @endcode
2123     ///   For example:
2124     ///   @code
2125     ///       FindNoCase(str, pattern, 0, NPOS, eLast)
2126     ///   @endcode
2127     ///   can be replaced by
2128     ///   @code
2129     ///       Find(str, pattern, eNocase, eReverseSearch, /* 0 */)
2130     ///   @endcode
2131     ///   For simpler cases without range, or with default [0, NPOS] please use
2132     ///   @code
2133     ///       FindNoCase(str, pattern, [start])
2134     ///   @endcode
2135     ///   But if you doing a search on a substring of the 'str' and ["start", "end"] search
2136     ///   interval is not a default [0, NPOS], that mean a whole 'str' string, you may
2137     ///   need to pass a substring instead of 'str', like
2138     ///   @code
2139     ///       FindNoCase(CTempString(str, start, len), pattern, ....)
2140     ///   @endcode
2141     ///  and after checking search result on NPOS, adjust it by 'start' yourself.
2142     NCBI_DEPRECATED
2143     static SIZE_TYPE FindNoCase(const CTempString str,
2144                                 const CTempString pattern,
2145                                 SIZE_TYPE   start, SIZE_TYPE end,
2146                                 EOccurrence which = eFirst);
2147 
2148     /// Wrapper for backward-compatibility
2149     static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern);
2150     static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start);
2151 
2152     /// Test for presence of a given string in a list or vector of strings
2153 
2154     static const string* Find      (const list<string>& lst,
2155                                     const CTempString val,
2156                                     ECase use_case = eCase);
2157 
2158     static const string* FindCase  (const list<string>& lst,
2159                                     const CTempString val);
2160 
2161     static const string* FindNoCase(const list<string>& lst,
2162                                     const CTempString val);
2163 
2164     static const string* Find      (const vector<string>& vec,
2165                                     const CTempString val,
2166                                     ECase use_case = eCase);
2167 
2168     static const string* FindCase  (const vector<string>& vec,
2169                                     const CTempString val);
2170 
2171     static const string* FindNoCase(const vector<string>& vec,
2172                                     const CTempString val);
2173 
2174     /// Find given word in the string.
2175     ///
2176     /// @param str
2177     ///   String to search.
2178     /// @param word
2179     ///   Word to search for in "str". The "word" can have any symbols,
2180     ///   not letters only. Function treat it as a pattern, even it have
2181     ///   any non-word characters.
2182     /// @param use_case
2183     ///   Whether to do a case sensitive compare (default is eCase), or a
2184     ///   case-insensitive compare (eNocase) while searching for the word.
2185     /// @param direction
2186     ///   Define a search direction of the occurrence of "word" in "str".
2187     /// @return
2188     ///   - Start of the found word in the string.
2189     ///   - NPOS if there is no occurrence of the word in the string.
2190     static SIZE_TYPE FindWord(const CTempString str,
2191                               const CTempString word,
2192                               ECase             use_case  = eCase,
2193                               EDirection        direction = eForwardSearch);
2194 
2195     /// Find given word in the string.
2196     ///
2197     /// This function honors word boundaries:
2198     ///   - starting or ending of the string,
2199     ///   - any non-word character, all except [a-zA-Z0-9_].
2200     ///
2201     /// @param str
2202     ///   String to search.
2203     /// @param word
2204     ///   Word to search for in "str". The "word" can have any symbols,
2205     ///   not letters only. Function treat it as a pattern, even it have
2206     ///   any non-word characters.
2207     /// @param which
2208     ///   When set to eFirst, this means to find the first occurrence of
2209     ///   "word" in "str". When set to eLast, this means to find the last
2210     ///    occurrence of "word" in "str".
2211     /// @param use_case
2212     ///   Whether to do a case sensitive compare (default is eCase), or a
2213     ///   case-insensitive compare (eNocase) while searching for the word.
2214     /// @return
2215     ///   - The start of the first or last (depending on "which" parameter)
2216     ///     occurrence of "word" in "str", or
2217     ///   - NPOS if there is no occurrence of the word.
2218     /// @sa Find
2219     /// @deprecated
2220     ///   Use FindWord() variant with EDirection parameter:
2221     ///   @code
2222     ///       FindWord(str, word, [use_case], [direction])
2223     ///   @endcode
2224     inline
2225     NCBI_DEPRECATED
FindWord(const CTempString str,const CTempString word,EOccurrence which,ECase use_case=eCase)2226     static SIZE_TYPE FindWord(const CTempString str,
2227                               const CTempString word,
2228                               EOccurrence which,
2229                               ECase       use_case = eCase) {
2230         return FindWord(str, word, use_case, which == eFirst ? eForwardSearch : eReverseSearch);
2231     }
2232 
2233 
2234     /// Which end to truncate a string.
2235     enum ETrunc {
2236         eTrunc_Begin,  ///< Truncate leading spaces only
2237         eTrunc_End,    ///< Truncate trailing spaces only
2238         eTrunc_Both    ///< Truncate spaces at both begin and end of string
2239     };
2240 
2241     /// Truncate spaces in a string.
2242     ///
2243     /// @param str
2244     ///   String to truncate spaces from.
2245     /// @param where
2246     ///   Which end of the string to truncate space from. Default is to
2247     ///   truncate space from both ends (eTrunc_Both).
2248     /// @sa
2249     ///   TruncateSpaces_Unsafe
2250     static string TruncateSpaces(const string& str,
2251                                  ETrunc        where = eTrunc_Both);
2252 
2253     /// Truncate spaces in a string.
2254     /// It can be faster but it is also more dangerous than TruncateSpaces()
2255     ///
2256     /// @param str
2257     ///   String to truncate spaces from.
2258     /// @param where
2259     ///   Which end of the string to truncate space from. Default is to
2260     ///   truncate space from both ends (eTrunc_Both).
2261     /// @attention
2262     ///   The lifespan of the result string is the same as one of the source.
2263     ///   So, for example, if the source is temporary string, or it changes somehow,
2264     ///   then the result will be invalid right away (will point to already released
2265     ///   or wrong range in the memory).
2266     /// @sa
2267     ///   TruncateSpaces
2268     static CTempString TruncateSpaces_Unsafe(const CTempString str,
2269                                              ETrunc where = eTrunc_Both);
2270 
2271     /// @deprecated  Use TruncateSpaces_Unsafe() instead -- AND, do make sure
2272     ///              that you indeed use that in a safe manner!
2273     inline
2274     NCBI_DEPRECATED
TruncateSpaces(const CTempString str,ETrunc where=eTrunc_Both)2275     static CTempString TruncateSpaces(const CTempString str,
2276                                       ETrunc where = eTrunc_Both) {
2277         return TruncateSpaces_Unsafe(str, where);
2278     }
2279 
2280     /// @deprecated  Use TruncateSpaces_Unsafe() instead -- AND, do make sure
2281     ///              that you indeed use that in a safe manner!
2282     inline
2283     NCBI_DEPRECATED
TruncateSpaces(const char * str,ETrunc where=eTrunc_Both)2284     static CTempString TruncateSpaces(const char* str,
2285                                       ETrunc where = eTrunc_Both) {
2286         return TruncateSpaces_Unsafe(str, where);
2287     }
2288 
2289     /// Truncate spaces in a string (in-place)
2290     ///
2291     /// @param str
2292     ///   String to truncate spaces from.
2293     /// @param where
2294     ///   Which end of the string to truncate space from. Default is to
2295     ///   truncate space from both ends (eTrunc_Both).
2296     static void TruncateSpacesInPlace(string& str,  ETrunc where = eTrunc_Both);
2297     static void TruncateSpacesInPlace(CTempString&, ETrunc where = eTrunc_Both);
2298 
2299 
2300     /// Trim prefix from a string (in-place)
2301     ///
2302     /// @param str
2303     ///   String to trim from.
2304     /// @param prefix
2305     ///   Prefix to remove.
2306     ///   If string doesn't have specified prefix, it doesn't changes.
2307     /// @param use_case
2308     ///   Whether to do a case sensitive compare (default is eCase), or a
2309     ///   case-insensitive compare (eNocase) while checking for a prefix.
2310     static void TrimPrefixInPlace(string& str, const CTempString prefix,
2311                                   ECase use_case = eCase);
2312     static void TrimPrefixInPlace(CTempString& str, const CTempString prefix,
2313                                   ECase use_case = eCase);
2314 
2315     /// Trim prefix from a string.
2316     ///
2317     /// "Unsafe" counterpart to TrimPrefixInPlace().
2318     /// @param str
2319     ///   String to trim from.
2320     /// @param prefix
2321     ///   Prefix to remove.
2322     ///   If string doesn't have specified prefix, it doesn't changes.
2323     /// @param use_case
2324     ///   Whether to do a case sensitive compare (default is eCase), or a
2325     ///   case-insensitive compare (eNocase) while checking for a prefix.
2326     /// @attention
2327     ///   The lifespan of the result string is the same as one of the source.
2328     ///   So, for example, if the source is temporary string, or it changes somehow,
2329     ///   then the result will be invalid right away (will point to already released
2330     ///   or wrong range in the memory).
2331     /// @sa
2332     ///   TrimPrefixInPlace
2333     static CTempString TrimPrefix_Unsafe(const CTempString str,
2334                                          const CTempString prefix,
2335                                          ECase use_case = eCase);
2336 
2337     /// Trim suffix from a string (in-place)
2338     ///
2339     /// @param str
2340     ///   String to trim from.
2341     /// @param suffix
2342     ///   Suffix to remove.
2343     ///   If string doesn't have specified suffix, it doesn't changes.
2344     /// @param use_case
2345     ///   Whether to do a case sensitive compare (default is eCase), or a
2346     ///   case-insensitive compare (eNocase) while checking for a suffix.
2347     static void TrimSuffixInPlace(string& str, const CTempString suffix,
2348                                   ECase use_case = eCase);
2349     static void TrimSuffixInPlace(CTempString& str, const CTempString suffix,
2350                                   ECase use_case = eCase);
2351 
2352     /// Trim suffix from a string.
2353     ///
2354     /// "Unsafe" counterpart to TrimSuffixInPlace().
2355     /// @param str
2356     ///   String to trim from.
2357     /// @param suffix
2358     ///   Suffix to remove.
2359     ///   If string doesn't have specified suffix, it doesn't changes.
2360     /// @param use_case
2361     ///   Whether to do a case sensitive compare (default is eCase), or a
2362     ///   case-insensitive compare (eNocase) while checking for a suffix.
2363     /// @attention
2364     ///   The lifespan of the result string is the same as one of the source.
2365     ///   So, for example, if the source is temporary string, or it changes somehow,
2366     ///   then the result will be invalid right away (will point to already released
2367     ///   or wrong range in the memory).
2368     /// @sa
2369     ///   TrimSuffixInPlace
2370     static CTempString TrimSuffix_Unsafe(const CTempString str,
2371                                          const CTempString suffix,
2372                                          ECase use_case = eCase);
2373 
2374     /// Replace occurrences of a substring within a string.
2375     ///
2376     /// @param src
2377     ///   Source string from which specified substring occurrences are
2378     ///   replaced.
2379     /// @param search
2380     ///   Substring value in "src" that is replaced.
2381     /// @param replace
2382     ///   Replace "search" substring with this value.
2383     /// @param dst
2384     ///   Result of replacing the "search" string with "replace" in "src".
2385     ///   This value is also returned by the function.
2386     /// @param start_pos
2387     ///   Position to start search from.
2388     /// @param max_replace
2389     ///   Replace no more than "max_replace" occurrences of substring "search"
2390     ///   If "max_replace" is zero(default), then replace all occurrences with
2391     ///   "replace".
2392     /// @param num_replace
2393     ///   Optional pointer to a value which receives number of replacements occurred.
2394     /// @return
2395     ///   Result of replacing the "search" string with "replace" in "src". This
2396     ///   value is placed in "dst" as well.
2397     /// @sa
2398     ///   Version of Replace() that returns a new string.
2399     static string& Replace(const string& src,
2400                            const string& search,
2401                            const string& replace,
2402                            string&       dst,
2403                            SIZE_TYPE     start_pos = 0,
2404                            SIZE_TYPE     max_replace = 0,
2405                            SIZE_TYPE*    num_replace = 0);
2406 
2407     /// Replace occurrences of a substring within a string and returns the
2408     /// result as a new string.
2409     ///
2410     /// @param src
2411     ///   Source string from which specified substring occurrences are
2412     ///   replaced.
2413     /// @param search
2414     ///   Substring value in "src" that is replaced.
2415     /// @param replace
2416     ///   Replace "search" substring with this value.
2417     /// @param start_pos
2418     ///   Position to start search from.
2419     /// @param max_replace
2420     ///   Replace no more than "max_replace" occurrences of substring "search"
2421     ///   If "max_replace" is zero(default), then replace all occurrences with
2422     ///   "replace".
2423     /// @param num_replace
2424     ///   Optional pointer to a value which receives number of replacements occurred.
2425     /// @return
2426     ///   A new string containing the result of replacing the "search" string
2427     ///   with "replace" in "src"
2428     /// @sa
2429     ///   Version of Replace() that has a destination parameter to accept
2430     ///   result.
2431     static string Replace(const string& src,
2432                           const string& search,
2433                           const string& replace,
2434                           SIZE_TYPE     start_pos = 0,
2435                           SIZE_TYPE     max_replace = 0,
2436                           SIZE_TYPE*    num_replace = 0);
2437 
2438     /// Replace occurrences of a substring within a string.
2439     ///
2440     /// On some platforms this function is much faster than Replace()
2441     /// if sizes of "search" and "replace" strings are equal.
2442     /// Otherwise, the performance is mainly the same.
2443     /// @param src
2444     ///   String where the specified substring occurrences are replaced.
2445     ///   This value is also returned by the function.
2446     /// @param search
2447     ///   Substring value in "src" that is replaced.
2448     /// @param replace
2449     ///   Replace "search" substring with this value.
2450     /// @param start_pos
2451     ///   Position to start search from.
2452     /// @param max_replace
2453     ///   Replace no more than "max_replace" occurrences of substring "search"
2454     ///   If "max_replace" is zero(default), then replace all occurrences with
2455     ///   "replace".
2456     /// @param num_replace
2457     ///   Optional pointer to a value which receives number of replacements occurred.
2458     /// @return
2459     ///   Result of replacing the "search" string with "replace" in "src".
2460     /// @sa
2461     ///   Replace
2462     static string& ReplaceInPlace(string& src,
2463                                   const string& search,
2464                                   const string& replace,
2465                                   SIZE_TYPE     start_pos = 0,
2466                                   SIZE_TYPE     max_replace = 0,
2467                                   SIZE_TYPE*    num_replace = 0);
2468 
2469     /// Flags for Split*() methods.
2470     ///
2471     /// @note
2472     ///   With quote support enabled, doubling a quote character suppresses
2473     ///   its special meaning, as does escaping it if that's enabled too;
2474     ///   unescaped trailing backslashes and unbalanced quotes result in
2475     ///   exceptions.
2476     /// @note
2477     ///   All escape symbols, single or double quotes became removed
2478     ///   if a corresponding fSplit_Can* flag is used.
2479     enum ESplitFlags {
2480         fSplit_MergeDelimiters = 1 << 0,  ///< Merge adjacent delimiters
2481         fSplit_Truncate_Begin  = 1 << 1,  ///< Truncate leading delimiters
2482         fSplit_Truncate_End    = 1 << 2,  ///< Truncate trailing delimiters
2483         fSplit_Truncate        = fSplit_Truncate_Begin | fSplit_Truncate_End,
2484         fSplit_ByPattern       = 1 << 3,  ///< Require full delimiter strings
2485         fSplit_CanEscape       = 1 << 4,  ///< Allow \\... escaping
2486         fSplit_CanSingleQuote  = 1 << 5,  ///< Allow '...' quoting
2487         fSplit_CanDoubleQuote  = 1 << 6,  ///< Allow "..." quoting
2488         fSplit_CanQuote        = fSplit_CanSingleQuote | fSplit_CanDoubleQuote,
2489         /// All delimiters are merged and trimmed, to get non-empty tokens only
2490         fSplit_Tokenize        = fSplit_MergeDelimiters | fSplit_Truncate
2491     };
2492     typedef int TSplitFlags; ///< Bitwise OR of ESplitFlags
2493 
2494     /// Whether to merge adjacent delimiters.
2495     /// Used by some methods that don't need full functionality of ESplitFlags.
2496     enum EMergeDelims {
2497         eMergeDelims   = fSplit_MergeDelimiters | fSplit_Truncate,
2498         eNoMergeDelims = 0
2499     };
2500 
2501     /// Split a string using specified delimiters.
2502     ///
2503     /// @param str
2504     ///   String to be split.
2505     /// @param delim
2506     ///   Delimiter(s) used to split string "str". The interpretation of
2507     ///   multi-character values depends on flags: by default, any of those
2508     ///   characters marks a split point (when unquoted), but with
2509     ///   fSplit_ByPattern, the entire string must occur.  (Meanwhile,
2510     ///   an empty value disables splitting.)
2511     /// @param arr
2512     ///   The split tokens are added to the list "arr" and also returned
2513     ///   by the function.
2514     /// @param flags
2515     ///   Flags directing splitting, characterized under ESplitFlags.
2516     /// @param token_pos
2517     ///   Optional array for the tokens' positions in "str".
2518     /// @attention
2519     ///   Modifying source CTempString object or destroying it,
2520     ///   will invalidate results.
2521     /// @return
2522     ///   The list "arr" is also returned.
2523     /// @sa
2524     ///   ESplitFlags, SplitInTwo, SplitByPattern
2525     static list<string>& Split( const CTempString    str,
2526                                 const CTempString    delim,
2527                                 list<string>&        arr,
2528                                 TSplitFlags          flags = 0,
2529                                 vector<SIZE_TYPE>*   token_pos = NULL);
2530 
2531     static vector<string>& Split(
2532                                 const CTempString    str,
2533                                 const CTempString    delim,
2534                                 vector<string>&      arr,
2535                                 TSplitFlags          flags = 0,
2536                                 vector<SIZE_TYPE>*   token_pos = NULL);
2537 
2538     static list<CTempString>& Split(
2539                                 const CTempString    str,
2540                                 const CTempString    delim,
2541                                 list<CTempString>&   arr,
2542                                 TSplitFlags          flags = 0,
2543                                 vector<SIZE_TYPE>*   token_pos = NULL,
2544                                 CTempString_Storage* storage = NULL);
2545 
2546     static vector<CTempString>& Split(
2547                                 const CTempString    str,
2548                                 const CTempString    delim,
2549                                 vector<CTempString>& arr,
2550                                 TSplitFlags          flags = 0,
2551                                 vector<SIZE_TYPE>*   token_pos = NULL,
2552                                 CTempString_Storage* storage = NULL);
2553 
2554     static list<CTempStringEx>& Split(
2555                                 const CTempString    str,
2556                                 const CTempString    delim,
2557                                 list<CTempStringEx>& arr,
2558                                 TSplitFlags          flags = 0,
2559                                 vector<SIZE_TYPE>*   token_pos = NULL,
2560                                 CTempString_Storage* storage = NULL);
2561 
2562     static vector<CTempStringEx>& Split(
2563                                 const CTempString      str,
2564                                 const CTempString      delim,
2565                                 vector<CTempStringEx>& arr,
2566                                 TSplitFlags            flags = 0,
2567                                 vector<SIZE_TYPE>*     token_pos = NULL,
2568                                 CTempString_Storage*   storage = NULL);
2569 
2570     /// Split a string into two pieces using the specified delimiters
2571     ///
2572     /// @param str
2573     ///   String to be split.
2574     /// @param delim
2575     ///   Delimiters used to split string "str".
2576     /// @param str1
2577     ///   The sub-string of "str" before the first character of "delim".
2578     ///   It will not contain any characters in "delim".
2579     ///   Will be empty if "str" begin with a delimiter.
2580     /// @param str2
2581     ///   The sub-string of "str" after the first character of "delim" found.
2582     ///   May contain "delim" characters.
2583     ///   Will be empty if "str" had no "delim" characters or ended
2584     ///   with the "delim" character.
2585     /// @param flags
2586     ///   Flags directing splitting, characterized under ESplitFlags.
2587     ///   Note, that fSplit_Truncate_End don't have any effect due nature
2588     ///   of this method.
2589     /// @attention
2590     ///   Modifying source CTempString object or destroying it,
2591     ///   will invalidate results.
2592     /// @return
2593     ///   true if a symbol from "delim" was found in "str", false if not.
2594     ///   This lets you distinguish when there were no delimiters and when
2595     ///   the very last character was the first delimiter.
2596     /// @sa
2597     ///   ESplitFlags, Split
2598     static bool SplitInTwo(const CTempString  str,
2599                            const CTempString  delim,
2600                            string&            str1,
2601                            string&            str2,
2602                            TSplitFlags        flags = 0);
2603 
2604     static bool SplitInTwo(const CTempString  str,
2605                            const CTempString  delim,
2606                            CTempString&       str1,
2607                            CTempString&       str2,
2608                            TSplitFlags        flags = 0,
2609                            CTempString_Storage* storage = NULL);
2610 
2611     static bool SplitInTwo(const CTempString  str,
2612                            const CTempString  delim,
2613                            CTempStringEx&     str1,
2614                            CTempStringEx&     str2,
2615                            TSplitFlags        flags = 0,
2616                            CTempString_Storage* storage = NULL);
2617 
2618 
2619     /// Variation of Split() with fSplit_ByPattern flag applied by default
2620 
2621     static list<string>& SplitByPattern(
2622                                 const CTempString    str,
2623                                 const CTempString    delim,
2624                                 list<string>&        arr,
2625                                 TSplitFlags          flags = 0,
2626                                 vector<SIZE_TYPE>*   token_pos = NULL);
2627 
2628     static vector<string>& SplitByPattern(
2629                                 const CTempString    str,
2630                                 const CTempString    delim,
2631                                 vector<string>&      arr,
2632                                 TSplitFlags          flags = 0,
2633                                 vector<SIZE_TYPE>*   token_pos = NULL);
2634 
2635     static list<CTempString>& SplitByPattern(
2636                                 const CTempString    str,
2637                                 const CTempString    delim,
2638                                 list<CTempString>&   arr,
2639                                 TSplitFlags          flags = 0,
2640                                 vector<SIZE_TYPE>*   token_pos = NULL,
2641                                 CTempString_Storage* storage = NULL);
2642 
2643     static vector<CTempString>& SplitByPattern(
2644                                 const CTempString    str,
2645                                 const CTempString    delim,
2646                                 vector<CTempString>& arr,
2647                                 TSplitFlags          flags = 0,
2648                                 vector<SIZE_TYPE>*   token_pos = NULL,
2649                                 CTempString_Storage* storage = NULL);
2650 
2651     static list<CTempStringEx>& SplitByPattern(
2652                                 const CTempString    str,
2653                                 const CTempString    delim,
2654                                 list<CTempStringEx>& arr,
2655                                 TSplitFlags          flags = 0,
2656                                 vector<SIZE_TYPE>*   token_pos = NULL,
2657                                 CTempString_Storage* storage = NULL);
2658 
2659     static vector<CTempStringEx>& SplitByPattern(
2660                                 const CTempString    str,
2661                                 const CTempString    delim,
2662                                 vector<CTempStringEx>& arr,
2663                                 TSplitFlags          flags = 0,
2664                                 vector<SIZE_TYPE>*   token_pos = NULL,
2665                                 CTempString_Storage* storage = NULL);
2666 
2667     /// Join strings using the specified delimiter.
2668     ///
2669     /// @param arr
2670     ///   Array of strings to be joined.
2671     /// @param delim
2672     ///   Delimiter used to join the string.
2673     /// @return
2674     ///   The strings in "arr" are joined into a single string, separated
2675     ///   with "delim".
2676     /// @sa Split
2677     template<typename TContainer>
2678     static string
Join(const TContainer & arr,const CTempString & delim)2679     Join(const TContainer& arr, const CTempString& delim)
2680     {
2681         return x_Join(begin(arr), end(arr), delim);
2682     }
2683     template<typename TValue>
2684     static string
Join(const initializer_list<TValue> & arr,const CTempString & delim)2685     Join(const initializer_list<TValue>& arr, const CTempString& delim)
2686     {
2687         return x_Join(begin(arr), end(arr), delim);
2688     }
2689     template<typename TInputIterator>
2690     static string
Join(TInputIterator from,TInputIterator to,const CTempString & delim)2691     Join( TInputIterator from, TInputIterator to, const CTempString& delim)
2692     {
2693         return x_Join(from, to, delim);
2694     }
2695     template<typename TInputIterator>
2696     static string
JoinNumeric(TInputIterator from,TInputIterator to,const CTempString & delim)2697     JoinNumeric( TInputIterator from, TInputIterator to, const CTempString& delim)
2698     {
2699         return x_Join( from, to, delim);
2700     }
2701     template<typename TIterator, typename FTransform>
2702     static string
2703     TransformJoin( TIterator from, TIterator to, const CTempString& delim, FTransform fnTransform);
2704 
2705 
2706     /// How to display printable strings.
2707     ///
2708     /// Assists in making a printable version of "str".
2709     enum EPrintableMode {
2710         fNewLine_Quote     = 0,  ///< Display "\n" instead of actual linebreak
2711         eNewLine_Quote     = fNewLine_Quote,
2712         fNewLine_Passthru  = 1,  ///< Break the line at every "\n" occurrence
2713         eNewLine_Passthru  = fNewLine_Passthru,
2714         fNonAscii_Passthru = 0,  ///< Allow non-ASCII but printable characters
2715         fNonAscii_Quote    = 2,  ///< Octal for all non-ASCII characters
2716         fPrintable_Full    = 64  ///< Show all octal digits at all times
2717     };
2718     typedef int TPrintableMode;  ///< Bitwise OR of EPrintableMode flags
2719 
2720     /// Get a printable version of the specified string.
2721     ///
2722     /// All non-printable characters will be represented as "\r", "\n", "\v",
2723     /// "\t", "\"", "\\\\", etc, or "\\ooo" where 'ooo' is an octal code of the
2724     /// character.  The resultant string is a well-formed C string literal,
2725     /// which, without alterations, can be compiled by a C/C++ compiler.
2726     /// In many instances, octal representations of non-printable characters
2727     /// can be reduced to take less than all 3 digits, if there is no
2728     /// ambiguity in the interpretation.  fPrintable_Full cancels the
2729     /// reduction, and forces to produce full 3-digit octal codes throughout.
2730     ///
2731     /// @param str
2732     ///   The string whose printable version is wanted.
2733     /// @param mode
2734     ///   How to display the string.  The default setting of fNewLine_Quote
2735     ///   displays the new lines as "\n", and uses the octal code reduction.
2736     ///   When set to fNewLine_Passthru, line breaks are actually
2737     ///   produced on output but preceded with trailing backslashes.
2738     /// @return
2739     ///   Return a printable version of "str".
2740     /// @sa
2741     ///   ParseEscapes, Escape, CEncode, CParse, Sanitize
2742     static string PrintableString(const CTempString str,
2743                                   TPrintableMode    mode = fNewLine_Quote | fNonAscii_Passthru);
2744 
2745     /// Escape string (generic version).
2746     ///
2747     /// Prefix any occurrences of the metacharacters with the escape character.
2748     /// @param str
2749     ///   The string to be escaped.
2750     /// @metacharacters
2751     ///   List of characters that need to be escaped.
2752     ///   Use NStr::Join() if you have metacharacters in list<>, vector<> or set<>.
2753     /// @param escape_char
2754     ///   Character used for escaping metacharacters.
2755     ///   Each metacharacter will be replaced with pair "escape_char + metacharacter".
2756     ///   Each escape character will be replaced with pair "escape_char + escape_char".
2757     /// @return
2758     ///   Escaped string.
2759     /// @sa
2760     ///   Unescape, PrintableString, Join
2761     static string Escape(const CTempString str, const CTempString metacharacters,
2762                          char escape_char = '\\');
2763 
2764     /// Unescape string (generic version).
2765     ///
2766     /// Remove escape characters added by Escape().
2767     /// @param str
2768     ///   The string to be processed.
2769     /// @param escape_char
2770     ///   Character used for escaping.
2771     /// @return
2772     ///   Unescaped string.
2773     /// @sa
2774     ///   Escape
2775     static string Unescape(const CTempString str, char escape_char = '\\');
2776 
2777 
2778     /// Quote string (generic version).
2779     ///
2780     /// Prepend and append a specified quote character, but escaping any occurrence
2781     /// of the quote character using either a specified escape character (default '\')
2782     /// or as option, by doubling the quoting character if escape character is the same
2783     /// (e.g. like the single quote in SQL, double-quote in CSV).
2784     ///
2785     /// @param str
2786     ///   The string to be quoted.
2787     /// @param quote_char
2788     ///   Character used for quoting, default to double quote '"'.
2789     /// @param escape_char
2790     ///   Character used for escaping other quote characters inside string (default '\').
2791     ///   Each <quote_char>  in the string will be replaced with pair "escape_char + quote_char".
2792     ///   Each <escape_char> in the string will be replaced with pair "escape_char + escape_char".
2793     /// @return
2794     ///   Quoted string.
2795     /// @sa
2796     ///   Unquote, ParseQuoted, CEncode
2797     static string Quote(const CTempString str, char quote_char = '"', char escape_char = '\\');
2798 
2799     /// Unquote string (generic version).
2800     ///
2801     /// Remove quotation added by Quote(). Uses first character as quoting character.
2802     /// @param str
2803     ///   The string to be processed.
2804     /// @param escape_char
2805     ///   Character used for escaping.
2806     /// @return
2807     ///   Unquoted string.
2808     /// @sa
2809     ///   Quote, ParseQuoted, CEncode
2810     static string Unquote(const CTempString str, char escape_char = '\\');
2811 
2812 
2813     /// Flags for Sanitize().
2814     enum ESS_Flags {
2815         // Character filters
2816         fSS_alpha = 1 << 0,    ///< Check on ::isalpha()
2817         fSS_digit = 1 << 1,    ///< Check on ::isdigit()
2818         fSS_alnum = 1 << 2,    ///< Check on ::isalnum()
2819         fSS_print = 1 << 3,    ///< Check on ::isprint()
2820         fSS_cntrl = 1 << 4,    ///< Check on ::iscntrl()
2821         fSS_punct = 1 << 5,    ///< Check on ::ispunct()
2822 
2823         // Filter: in or out?
2824         fSS_Reject = 1 << 11,  ///< Reject specified characters, allow all other.
2825                                ///< Revert default behavior, that allow specified
2826                                ///< characters and reject all other.
2827         // Utility flags
2828         fSS_Remove           = 1 << 12,  ///< Remove (rather than replace) rejected chars
2829         fSS_NoMerge          = 1 << 13,  ///< Do not merge adjacent spaces (rejected chars)
2830         fSS_NoTruncate_Begin = 1 << 14,  ///< Do not truncate leading spaces
2831         fSS_NoTruncate_End   = 1 << 15,  ///< Do not truncate trailing spaces
2832         fSS_NoTruncate       = fSS_NoTruncate_Begin | fSS_NoTruncate_End
2833     };
2834     typedef int TSS_Flags;  ///< Bitwise OR of ESS_Flags
2835 
2836     /// Sanitize a string, allowing only specified classes of characters.
2837     ///
2838     /// By default:
2839     ///    - replace all non-printable characters with spaces;
2840     ///    - merge coalescent spaces;
2841     ///    - truncate leading and trailing spaces.
2842     /// @note
2843     ///   - All coalescent leading/trailing spaces also will be merged
2844     ///     by default if fSS_NoMerge has not specified.
2845     ///   - The truncation of leading/trailing spaces is doing after
2846     ///     allowing/rejecting characters. Depending on the specified flags,
2847     ///     all rejected characters adjacent to it can be treat as part
2848     ///     of leading/trailing spaces.
2849     /// @param str
2850     ///   String to sanitize
2851     /// @param flags
2852     ///   Alternative sanitation options
2853     /// @return
2854     ///   Sanitized string
2855     /// @sa
2856     ///   PrintableString
Sanitize(CTempString str,TSS_Flags flags=fSS_print)2857     static string Sanitize(CTempString str, TSS_Flags flags = fSS_print)
2858     {
2859         return Sanitize(str, CTempString(), CTempString(), ' ', flags);
2860     }
2861 
2862 
2863     /// Sanitize a string, allowing only specified characters or character classes.
2864     ///
2865     /// More customizable version of Sanitize():
2866     ///    - allow to specify custom sets of allowed and rejected characters,
2867     ///      in addition to predefined classes if specified, see TSS_Flags;
2868     ///    - allow to specify replacement character for rejected symbols;
2869     /// By default:
2870     ///    - replace all rejected characters with <reject_replacement>;
2871     ///    - merge coalescent spaces and <reject_replacement>s (separately if differ);
2872     ///    - truncate leading and trailing spaces.
2873     /// Filters check order:
2874     ///    - character classes via flags.
2875     ///      Note, that if no character classes are set, and no custom <allow_chars>
2876     ///      or <reject_chars>, fSS_print will be used;
2877     ///    - <allow_chars> if not empty, have priority over flags.
2878     ///    - <reject_chars> if not empty, have priority over flags and <allow_chars> if have intersections.
2879     /// @note
2880     ///   - All coalescent leading/trailing spaces also will be merged
2881     ///     by default if fSS_NoMerge has not specified.
2882     ///   - The truncation of leading/trailing spaces is doing after
2883     ///     allowing/rejecting characters.
2884     /// @note
2885     ///   Spaces processes after checks on allowance, so if it isn't allowed
2886     ///   it will be threatened as regular rejected character.
2887     /// @param str
2888     ///   String to sanitize.
2889     /// @param allow_chars
2890     ///   Additional list of allowed characters, in addition to character classes in <flags>.
2891     ///   Have priority over character classes.
2892     ///   Use NStr::Join() if you have it in list<>, vector<> or set<>.
2893     /// @param reject_chars
2894     ///   Additional list of rejected characters, in addition to character classes in <flags>.
2895     ///   Have priority over character classes and <allow_chars>.
2896     ///   Use NStr::Join() if you have it in list<>, vector<> or set<>.
2897     /// @param reject_replacement
2898     ///   Replacement character for all rejected characters.
2899     /// @param flags
2900     ///   Alternative sanitation options.
2901     ///   If no custom <allow_chars> or <reject_chars>, and no character classes are set, then use fSS_print by default.
2902     ///   If <reject_chars>, no class, and no fSS_Reject flag, then all characters allowed except <reject_chars>.
2903     ///   If <allow_chars>,  no class, and fSS_Reject flag, then no any character allowed except <allow_chars>.
2904     /// @return
2905     ///   Sanitized string
2906     /// @sa
2907     ///   PrintableString, Join
2908     static string Sanitize(CTempString str,
2909                            CTempString allow_chars,
2910                            CTempString reject_chars,
2911                            char reject_replacement = ' ',
2912                            TSS_Flags flags = 0);
2913 
2914     /// C-style escape sequences parsing mode.
2915     /// For escape sequences with a value outside the range of [0-255]
2916     /// the behavior of ParseEscapes() depends from this mode.
2917     /// By default all escape sequences within a out or range
2918     /// will be converted to the least significant byte, with no warning.
2919     enum EEscSeqRange {
2920         eEscSeqRange_Standard,   ///< Set char to the last (least significant
2921                                  ///< byte) of the escape sequence (default).
2922         eEscSeqRange_FirstByte,  ///< Set char to the first byte of the escape
2923                                  ///< sequence.
2924         eEscSeqRange_Throw,      ///< Throw an exception.
2925         eEscSeqRange_Errno,      ///< Set errno to ERANGE, return empty string.
2926         eEscSeqRange_User        ///< Set char to the user value
2927                                  ///< passed in another parameter.
2928     };
2929 
2930     /// Parse C-style escape sequences in the specified string.
2931     ///
2932     /// Parse escape sequences including all those produced by PrintableString.
2933     /// @param str
2934     ///   The string to be parsed.
2935     /// @param mode
2936     ///   Parsing mode.
2937     ///   By default all escape sequences with a value outside the range of [0-255]
2938     ///   will be converted to the least significant byte, with no warning.
2939     /// @param user_char
2940     ///   If 'mode' have eEscSeqRange_User, replace all out of range
2941     ///   escape sequences with this char.
2942     /// @return
2943     ///   String with parsed C-style escape sequences.
2944     ///   - If string have wrong format throw an CStringException exception.
2945     ///   - If parsing succeeds, return the converted value.
2946     ///     Set errno to zero only if eEscSeqRange_Errno is set.
2947     ///   - Otherwise, if escape sequence is out of range [0-255],
2948     ///     see eEscSeqRange* modes for behavior.
2949     /// @sa
2950     ///   EEscSeqFlags, PrintableString, CEncode, CParse
2951     static string ParseEscapes(const CTempString str,
2952                                EEscSeqRange mode = eEscSeqRange_Standard,
2953                                char user_char = '?');
2954 
2955     /// Discard C-style backslash escapes and extract a quoted string.
2956     ///
2957     /// @param[in] str
2958     ///   The original string to extract a quoted string from.
2959     ///   It must start with a double quote.
2960     /// @param[out] n_read
2961     ///   How many symbols the quoted string occupied in the original string.
2962     /// @return
2963     ///   The extracted string, un-escaped and with the quotes removed.
2964     ///   Throw an exception on format error.
2965     static string ParseQuoted(const CTempString str, size_t* n_read = NULL);
2966 
2967     /// Define that string is quoted or not.
2968     enum EQuoted {
2969         eQuoted,       ///< String is quoted
2970         eNotQuoted     ///< String is not quoted
2971     };
2972 
2973     /// Encode a string for C/C++.
2974     ///
2975     /// @param str
2976     ///   The string to be parsed.
2977     /// @param quoted
2978     ///   Define, to
2979     /// @sa
2980     ///   CParse, PrintableString
2981     static string CEncode(const CTempString str, EQuoted quoted = eQuoted);
2982 
2983     /// Discard C-style backslash escapes.
2984     ///
2985     /// @param str
2986     ///   The original string to parse.
2987     /// @param quoted
2988     ///   Define that parsing string is quoted or not.
2989     ///   If parameter "quoted" equal eQuoted and string is not started and
2990     ///   finished with a double-quote, the exception will be thrown,
2991     ///   otherwise quotes will be removed in result.
2992     /// @return
2993     ///   String with parsed C-style escape sequences.
2994     /// @sa
2995     ///   CEncode
2996     static string CParse(const CTempString str, EQuoted quoted = eQuoted);
2997 
2998     /// Encode a string for JavaScript.
2999     ///
3000     /// Replace relevant characters by predefined entities.
3001     /// Like to PrintableString(), but process some symbols in different way.
3002     /// @sa PrintableString
3003     static string JavaScriptEncode(const CTempString str);
3004 
3005     /// XML-encode flags
3006     enum EXmlEncode {
3007         /// Encode predefined entities only
3008         eXmlEnc_Contents = 0,
3009         /// Encode double hyphen and ending hyphen,
3010         /// making the result safe to put into XML comments.
3011         eXmlEnc_CommentSafe   = 1 << 0,
3012         /// Check each character to conform XML 1.1 standards,
3013         /// skip any not allowed character or throw an CStringException.
3014         /// https://www.w3.org/TR/xml11/#NT-Char
3015         eXmlEnc_Unsafe_Skip   = 1 << 1,
3016         eXmlEnc_Unsafe_Throw  = 1 << 2
3017     };
3018     typedef int TXmlEncode;   //<  bitwise OR of "EXmlEncode"
3019 
3020     /// Encode a string for XML.
3021     ///
3022     /// Replace relevant characters by predefined entities.
3023     static string XmlEncode(const CTempString str,
3024                             TXmlEncode flags = eXmlEnc_Contents);
3025 
3026 
3027     /// HTML-decode flags
3028     enum EHtmlEncode {
3029         fHtmlEnc_EncodeAll           = 0,       ///< Encode all symbols
3030         fHtmlEnc_SkipLiteralEntities = 1 << 1,  ///< Skip "&entity;"
3031         fHtmlEnc_SkipNumericEntities = 1 << 2,  ///< Skip "&#NNNN;"
3032         fHtmlEnc_SkipEntities        = fHtmlEnc_SkipLiteralEntities | fHtmlEnc_SkipNumericEntities,
3033         fHtmlEnc_CheckPreencoded     = 1 << 3   ///< Print warning if some pre-encoded
3034                                                 ///< entity found in the string
3035     };
3036     typedef int THtmlEncode;   //<  bitwise OR of "EHtmlEncode"
3037 
3038     /// Encode a string for HTML.
3039     ///
3040     /// Replace relevant characters by predefined entities.
3041     /// @param str
3042     ///   Original string in UTF8 encoding.
3043     static string HtmlEncode(const CTempString str,
3044                              THtmlEncode flags = fHtmlEnc_EncodeAll);
3045 
3046     /// HTML-decode flags
3047     enum EHtmlDecode {
3048         fHtmlDec_CharRef_Entity   = 1,       ///< Character entity reference(s) was found
3049         fHtmlDec_CharRef_Numeric  = 1 << 1,  ///< Numeric character reference(s) was found
3050         fHtmlDec_Encoding_Changed = 1 << 2   ///< Character encoding changed
3051     };
3052     typedef int THtmlDecode;   //<  bitwise OR of "EHtmlDecode"
3053 
3054     /// Decode HTML entities and character references.
3055     ///
3056     /// @param str
3057     ///   String to be decoded, which contains characters or numeric HTML entities
3058     /// @param encoding
3059     ///   Encoding of the input string
3060     /// @return
3061     ///   UTF8 encoded string
3062     static string HtmlDecode(const CTempString str,
3063                              EEncoding encoding = eEncoding_Unknown,
3064                              THtmlDecode* result_flags = NULL);
3065 
3066     /// Returns HTML entity name for this symbol if one exists
3067     /// (without leading ampersand and trailing semicolon);
3068     /// or empty string if suitable HTML entity was not found
3069     static string HtmlEntity(TUnicodeSymbol uch);
3070 
3071     /// Json-encode flags
3072     enum EJsonEncode {
3073         eJsonEnc_UTF8,     ///< Encode all characters above 0x80 to \uXXXX form.
3074                            ///< https://tools.ietf.org/html/rfc7159#section-8.1
3075         eJsonEnc_Quoted    ///< Quote resulting string. Keep all Unicode symbols as is.
3076                            ///< https://tools.ietf.org/html/rfc7159#section-7
3077     };
3078     /// Encode a string for JSON.
3079     ///
3080     /// @param str
3081     ///   The string to encode.
3082     /// @param encoding
3083     ///   Specifies how to encode string. There are 2 approaches, with representing whole
3084     ///   string as UTF-8 encoded string, or leave all Unicode symbols "as is",
3085     ///   but the resulting string  will be put in double quotes.
3086     /// @return
3087     ///   JSON encoded string
3088     static string JsonEncode(const CTempString str, EJsonEncode encoding = eJsonEnc_UTF8);
3089 
3090     /// Decode a string encoded by JsonEncode.
3091     ///
3092     /// @param str
3093     ///   The string to encode.
3094     ///   It must be in double quotes.
3095     /// @param[out] n_read
3096     ///   How many symbols the quoted string occupied in the original string.
3097     /// @sa
3098     ///   JsonEncode
3099     /// @warning
3100     ///   This method only supports strings encoded by JsonEncode-specific encodings.
3101     static string JsonDecode(const CTempString str, size_t* n_read = NULL);
3102 
3103     /// Quotes a string in Bourne Again Shell (BASH) syntax, in a way
3104     /// that disallows non-printable characters in the result.
3105     /// This function does NOT implement aesthetically optimal quoting,
3106     /// but does try to avoid redundant quoting in simpler cases.
3107     /// Also, since it implements BASH syntax, the result may be
3108     /// incompatible with Bourne syntax, and may be non-obvious to
3109     /// people who are not familiar with the extended quoting syntax.
3110     /// @note The BASH shell has extensions beyond Bourne Shell quoting.
3111     ///       Also, this is very different from C Shell quoting, and
3112     ///       MS Windows Command Prompt quoting rules.
3113     static string ShellEncode(const string& str);
3114 
3115     /// URL-encode flags
3116     enum EUrlEncode {
3117         eUrlEnc_SkipMarkChars,    ///< Do not convert chars like '!', '(' etc.
3118         eUrlEnc_ProcessMarkChars, ///< Convert all non-alphanumeric chars, spaces are converted to '+'
3119         eUrlEnc_PercentOnly,      ///< Convert all non-alphanumeric chars including space and '%' to %## format
3120         eUrlEnc_Path,             ///< Same as ProcessMarkChars but preserves valid path characters ('/', '.')
3121         eUrlEnc_URIScheme,        ///< Encode scheme part of an URI.
3122         eUrlEnc_URIUserinfo,      ///< Encode userinfo part of an URI.
3123         eUrlEnc_URIHost,          ///< Encode host part of an URI.
3124         eUrlEnc_URIPath,          ///< Encode path part of an URI.
3125         eUrlEnc_URIQueryName,     ///< Encode query part of an URI, arg name.
3126         eUrlEnc_URIQueryValue,    ///< Encode query part of an URI, arg value.
3127         eUrlEnc_URIFragment,      ///< Encode fragment part of an URI.
3128         eUrlEnc_Cookie,           ///< Same as SkipMarkChars with encoded ','
3129         eUrlEnc_None              ///< Do not encode
3130     };
3131     /// URL decode flags
3132     enum EUrlDecode {
3133         eUrlDec_All,              ///< Decode '+' to space
3134         eUrlDec_Percent           ///< Decode only %XX
3135     };
3136     /// URL-encode string
3137     static string URLEncode(const CTempString str,
3138                             EUrlEncode flag = eUrlEnc_SkipMarkChars);
3139 
3140     /// SQL encode flags
3141     enum ESqlEncode {
3142         eSqlEnc_Plain,       ///< Always produce '...', with no tag.
3143         eSqlEnc_TagNonASCII  ///< Produce N'...' when input's not pure ASCII.
3144     };
3145     /// SQL-encode string
3146     ///
3147     /// There are some assumptions/notes about the function:
3148     /// 1. Only for MS SQL and Sybase.
3149     /// 2. Only for string values in WHERE and LIKE clauses.
3150     /// 3. The ' symbol must not be used as an escape symbol in LIKE clause.
3151     /// 4. It must not be used for non-string values.
3152     /// 5. It expects a string without any outer quotes, and
3153     ///    it adds single quotes to the returned string.
3154     /// 6. It expects UTF-8 (including its subsets, ASCII and Latin1) or
3155     ///    Win1252 string, and the input encoding is preserved.
3156     /// @param str
3157     ///   The string to encode
3158     /// @param flag
3159     ///   Whether to tag the result with an N prefix if it contains any
3160     ///   non-ASCII characters.  Such tagging is generally advisable,
3161     ///   but off by default per historical practice, since there are
3162     ///   corner cases in which it may be inappropriate.
3163     /// @return
3164     ///   Encoded string with added outer single quotes
3165     static CStringUTF8 SQLEncode(const CStringUTF8& str, ESqlEncode flag);
3166 
SQLEncode(const CStringUTF8 & str)3167     NCBI_DEPRECATED static CStringUTF8 SQLEncode(const CStringUTF8& str)
3168         { return SQLEncode(str, eSqlEnc_Plain); }
3169 
3170     /// URL-decode string
3171     static string URLDecode(const CTempString str, EUrlDecode flag = eUrlDec_All);
3172     /// URL-decode string to itself
3173     static void URLDecodeInPlace(string& str, EUrlDecode flag = eUrlDec_All);
3174     /// Check if the string needs the requested URL-encoding
3175     static bool NeedsURLEncoding(const CTempString str, EUrlEncode flag = eUrlEnc_SkipMarkChars);
3176 
3177     /// Base64-encode string.
3178     ///
3179     /// @param str
3180     ///   The string to encode.
3181     /// @param line_len
3182     ///   Specify a length for Base64-encoded lines. Default 0 mean no line breaks at all.
3183     /// @return
3184     ///   Encoded string.
3185     /// @sa Base64Decode, BASE64_Encode, BASE64_Deccode
3186     static string Base64Encode(const CTempString str, size_t line_len = 0);
3187 
3188     /// Base64-decode string
3189     ///
3190     /// @param str
3191     ///   The string to decode.
3192     /// @return
3193     ///   Encoded string, or empty line on encoding error.
3194     /// @sa Base64Encode, BASE64_Encode, BASE64_Deccode
3195     static string Base64Decode(const CTempString str);
3196 
3197     /// Check if the string contains a valid IP address
3198     static bool IsIPAddress(const CTempStringEx str);
3199 
3200 
3201     /// How to wrap the words in a string to a new line.
3202     enum EWrapFlags {
3203         fWrap_Hyphenate  = 0x1, ///< Add a hyphen when breaking words?
3204         fWrap_HTMLPre    = 0x2, ///< Wrap as pre-formatted HTML?
3205         fWrap_FlatFile   = 0x4  ///< Wrap for flat file use.
3206     };
3207     typedef int TWrapFlags;     ///< Bitwise OR of "EWrapFlags"
3208 
3209     /// Wrap the specified string into lines of a specified width.
3210     ///
3211     /// Split string "str" into lines of width "width" and add the
3212     /// resulting lines to the list "arr".  Normally, all
3213     /// lines will begin with "prefix" (counted against "width"),
3214     /// but the first line will instead begin with "prefix1" if
3215     /// you supply it.
3216     ///
3217     /// @param str
3218     ///   String to be split into wrapped lines.
3219     /// @param width
3220     ///   Width of each wrapped line.
3221     /// @param arr
3222     ///   List of strings containing wrapped lines.
3223     /// @param flags
3224     ///   How to wrap the words to a new line. See EWrapFlags documentation.
3225     /// @param prefix
3226     ///   The prefix string added to each wrapped line, except the first line,
3227     ///   unless "prefix1" is set.
3228     ///   If "prefix" is set to 0(default), do not add a prefix string to the
3229     ///   wrapped lines.
3230     /// @param prefix1
3231     ///   The prefix string for the first line. Use this for the first line
3232     ///   instead of "prefix".
3233     ///   If "prefix1" is set to 0(default), do not add a prefix string to the
3234     ///   first line.
3235     /// @return
3236     ///   Return "arr", the list of wrapped lines.
3237     template<typename _D>
3238     static void WrapIt(const string& str, SIZE_TYPE width,
3239         _D& dest, TWrapFlags flags = 0,
3240         const string* prefix = 0,
3241         const string* prefix1 = 0);
3242 
3243     class IWrapDest
3244     {
3245     public:
~IWrapDest()3246         virtual ~IWrapDest() {}
3247         virtual void Append(const string& s) = 0;
3248         virtual void Append(const CTempString& s) = 0;
3249     };
3250 
3251     class CWrapDestStringList : public IWrapDest
3252     {
3253     protected:
3254         list<string>& m_list;
3255     public:
CWrapDestStringList(list<string> & l)3256         CWrapDestStringList(list<string>& l) : m_list(l) {};
Append(const string & s)3257         virtual void Append(const string& s)
3258         {
3259             m_list.push_back(s);
3260         }
Append(const CTempString & s)3261         virtual void Append(const CTempString& s)
3262         {
3263             m_list.push_back(NcbiEmptyString);
3264             m_list.back().assign(s.data(), s.length());
3265         }
3266     };
3267 
3268     static void Wrap(const string& str, SIZE_TYPE width,
3269                               IWrapDest& dest, TWrapFlags flags,
3270                               const string* prefix,
3271                               const string* prefix1);
3272 
3273     static list<string>& Wrap(const string& str, SIZE_TYPE width,
3274                               list<string>& arr, TWrapFlags flags = 0,
3275                               const string* prefix = 0,
3276                               const string* prefix1 = 0);
3277 
3278     static list<string>& Wrap(const string& str, SIZE_TYPE width,
3279                               list<string>& arr, TWrapFlags flags,
3280                               const string& prefix,
3281                               const string* prefix1 = 0);
3282 
3283     static list<string>& Wrap(const string& str, SIZE_TYPE width,
3284                               list<string>& arr, TWrapFlags flags,
3285                               const string& prefix,
3286                               const string& prefix1);
3287 
3288 
3289     /// Wrap the list using the specified criteria.
3290     ///
3291     /// WrapList() is similar to Wrap(), but tries to avoid splitting any
3292     /// elements of the list to be wrapped.  Also, the "delim" only applies
3293     /// between elements on the same line; if you want everything to end with
3294     /// commas or such, you should add them first.
3295     ///
3296     /// @param l
3297     ///   The list to be wrapped.
3298     /// @param width
3299     ///   Width of each wrapped line.
3300     /// @param delim
3301     ///   Delimiters used to split elements on the same line.
3302     /// @param arr
3303     ///   List containing the wrapped list result.
3304     /// @param flags
3305     ///   How to wrap the words to a new line. See EWrapFlags documentation.
3306     /// @param prefix
3307     ///   The prefix string added to each wrapped line, except the first line,
3308     ///   unless "prefix1" is set.
3309     ///   If "prefix" is set to 0(default), do not add a prefix string to the
3310     ///   wrapped lines.
3311     /// @param prefix1
3312     ///   The prefix string for the first line. Use this for the first line
3313     ///   instead of "prefix".
3314     ///   If "prefix1" is set to 0(default), do not add a prefix string to the
3315     ///   first line.
3316     /// @return
3317     ///   Return "arr", the wrapped list.
3318     static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
3319                                   const string& delim, list<string>& arr,
3320                                   TWrapFlags    flags = 0,
3321                                   const string* prefix = 0,
3322                                   const string* prefix1 = 0);
3323 
3324     static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
3325                                   const string& delim, list<string>& arr,
3326                                   TWrapFlags    flags,
3327                                   const string& prefix,
3328                                   const string* prefix1 = 0);
3329 
3330     static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
3331                                   const string& delim, list<string>& arr,
3332                                   TWrapFlags    flags,
3333                                   const string& prefix,
3334                                   const string& prefix1);
3335 
3336 
3337     /// Justify the specified string into a series of lines of the same width.
3338     ///
3339     /// Split string "str" into a series of lines, all of which are to
3340     /// be "width" characters wide (by adding extra inner spaces between
3341     /// words), and store the resulting lines in the list "par".  Normally,
3342     /// all lines in "par" will begin with "pfx" (counted against "width"),
3343     /// but the first line will instead begin with "pfx1" if provided.
3344     ///
3345     /// @note Words exceeding the specified "width" will not be split between
3346     /// lines but occupy individual lines (which will be wider than "width").
3347     ///
3348     /// @param str
3349     ///   String to be split into justified lines.
3350     /// @param width
3351     ///   Width of every line (except for the last one).
3352     /// @param par
3353     ///   Resultant list of justified lines.
3354     /// @param pfx
3355     ///   The prefix string added to each line, except for the first line
3356     ///   if non-NULL "pfx1" is also set.  Empty(or NULL) "pfx" causes no
3357     ///   additions.
3358     /// @param pfx1
3359     ///   The prefix string for the first line, if non-NULL.
3360     /// @return
3361     ///   Return "par", the list of justified lines (a paragraph).
3362     static list<string>& Justify(const CTempString  str,
3363                                  SIZE_TYPE          width,
3364                                  list<string>&      par,
3365                                  const CTempString* pfx  = 0,
3366                                  const CTempString* pfx1 = 0);
3367 
3368     static list<string>& Justify(const CTempString  str,
3369                                  SIZE_TYPE          width,
3370                                  list<string>&      par,
3371                                  const CTempString  pfx,
3372                                  const CTempString* pfx1 = 0);
3373 
3374     static list<string>& Justify(const CTempString  str,
3375                                  SIZE_TYPE          width,
3376                                  list<string>&      par,
3377                                  const CTempString  pfx,
3378                                  const CTempString  pfx1);
3379 
3380 
3381     /// Search for a field.
3382     ///
3383     /// @param str
3384     ///   C or C++ string to search in.
3385     /// @param field_no
3386     ///   Zero-based field number.
3387     /// @param delimiters
3388     ///   A set of single-character delimiters.
3389     /// @param merge
3390     ///   Whether to merge or not adjacent delimiters. Default: not to merge.
3391     /// @return
3392     ///   Found field; or empty string if the required field is not found.
3393     /// @note
3394     ///   Field 0 spans up to the first-found delimiter or the end-of-string.
3395     static string GetField(const CTempString str,
3396                            size_t            field_no,
3397                            const CTempString delimiters,
3398                            EMergeDelims      merge = eNoMergeDelims);
3399 
3400     /// Search for a field.
3401     ///
3402     /// @param str
3403     ///   C or C++ string to search in.
3404     /// @param field_no
3405     ///   Zero-based field number.
3406     /// @param delimiter
3407     ///   A single-character delimiter.
3408     /// @param merge
3409     ///   Whether to merge or not adjacent delimiters. Default: not to merge.
3410     /// @return
3411     ///   Found field; or empty string if the required field is not found.
3412     /// @note
3413     ///   Field 0 spans up to the delimiter or the end-of-string.
3414     static string GetField(const CTempString str,
3415                            size_t            field_no,
3416                            char              delimiter,
3417                            EMergeDelims      merge = eNoMergeDelims);
3418 
3419     /// Search for a field.
3420     /// Avoid memory allocation at the expense of some usage safety.
3421     ///
3422     /// @param str
3423     ///   C or C++ string to search in.
3424     /// @param field_no
3425     ///   Zero-based field number.
3426     /// @param delimiters
3427     ///   A set of single-character delimiters.
3428     /// @param merge
3429     ///   Whether to merge or not adjacent delimiters. Default: not to merge.
3430     /// @return
3431     ///   Found field; or empty string if the required field is not found.
3432     /// @note
3433     ///   Field 0 spans up to the first-found delimiter or the end-of-string.
3434     /// @warning
3435     ///   The return value stores a pointer to the input string 'str' so
3436     ///   the return object validity time matches lifetime of the input 'str'.
3437     static
3438     CTempString GetField_Unsafe(const CTempString str,
3439                                 size_t            field_no,
3440                                 const CTempString delimiters,
3441                                 EMergeDelims      merge = eNoMergeDelims);
3442 
3443     /// Search for a field.
3444     /// Avoid memory allocation at the expense of some usage safety.
3445     ///
3446     /// @param str
3447     ///   C or C++ string to search in.
3448     /// @param field_no
3449     ///   Zero-based field number.
3450     /// @param delimiter
3451     ///   A single-character delimiter.
3452     /// @param merge
3453     ///   Whether to merge or not adjacent delimiters. Default: not to merge.
3454     /// @return
3455     ///   Found field; or empty string if the required field is not found.
3456     /// @note
3457     ///   Field 0 spans up to the delimiter or the end-of-string.
3458     /// @warning
3459     ///   The return value stores a pointer to the input string 'str' so
3460     ///   the return object validity time matches lifetime of the input 'str'.
3461     static
3462     CTempString GetField_Unsafe(const CTempString str,
3463                                 size_t            field_no,
3464                                 char              delimiter,
3465                                 EMergeDelims      merge = eNoMergeDelims);
3466 
3467 private:
3468 // implementations
3469 
3470 // StringToNumeric
3471     static bool x_ReportLimitsError(const CTempString str, TStringToNumFlags flags);
3472 
3473     template< typename TNumeric, typename TSource>
x_VerifyIntLimits(TSource v,const CTempString str,TStringToNumFlags flags)3474     static bool x_VerifyIntLimits(TSource v, const CTempString str, TStringToNumFlags flags)
3475     {
3476         if (v < numeric_limits<TNumeric>::min()  ||  v > numeric_limits<TNumeric>::max()) {
3477             return x_ReportLimitsError(str, flags);
3478         }
3479         return true;
3480     }
3481     template< typename TNumeric, typename TSource>
x_VerifyFloatLimits(TSource v,const CTempString str,TStringToNumFlags flags)3482     static bool x_VerifyFloatLimits(TSource v, const CTempString str, TStringToNumFlags flags)
3483     {
3484         // dont use ::min() for float types, it returns positive value
3485         if (v < -numeric_limits<TNumeric>::max()  ||  v > numeric_limits<TNumeric>::max()) {
3486             return x_ReportLimitsError(str, flags);
3487         }
3488         return true;
3489     }
3490 
3491     template <typename TNumeric>
3492     static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) < sizeof(int)), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3493     x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3494     {
3495         int n = StringToInt(str, flags, base);
3496         return x_VerifyIntLimits<TNumeric>(n, str, flags) ? (TNumeric)n : 0;
3497     }
3498     template <typename TNumeric>
3499     static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) < sizeof(unsigned int)), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3500     x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3501     {
3502         unsigned int n = StringToUInt(str, flags, base);
3503         return x_VerifyIntLimits<TNumeric>(n, str, flags) ? (TNumeric)n : 0;
3504     }
3505 
3506     template <typename TNumeric>
3507     static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(int) && !is_same<TNumeric, long>::value), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3508     x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3509     {
3510         return StringToInt(str, flags, base);
3511     }
3512     template <typename TNumeric>
3513     static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(unsigned int) && !is_same<TNumeric, unsigned long>::value), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3514     x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3515     {
3516         return StringToUInt(str, flags, base);
3517     }
3518     template <typename TNumeric>
3519     static typename enable_if< is_same<TNumeric, long>::value, TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3520     x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3521     {
3522         return StringToLong(str, flags, base);
3523     }
3524     template <typename TNumeric>
3525     static typename enable_if< is_same<TNumeric, unsigned long>::value, TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3526     x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3527     {
3528         return StringToULong(str, flags, base);
3529     }
3530     template <typename TNumeric>
3531     static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(Int8) && !is_same<TNumeric, long>::value), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3532     x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3533     {
3534         return StringToInt8(str, flags, base);
3535     }
3536     template <typename TNumeric>
3537     static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(Uint8) && !is_same<TNumeric, unsigned long>::value), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3538     x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3539     {
3540         return StringToUInt8(str, flags, base);
3541     }
3542     template <typename TStrictId>
3543     static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, TStrictId>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3544         x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3545     {
3546         return TStrictId(StringToNumeric<typename TStrictId::TId>(str, flags, base));
3547     }
3548 
3549     template <typename TNumeric>
3550     static typename enable_if< is_same<TNumeric, float>::value, TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int)3551     x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int /*base*/)
3552     {
3553         double n = StringToDouble(str, flags);
3554         return x_VerifyFloatLimits<TNumeric>(n, str, flags) ? (TNumeric)n : 0;
3555     }
3556     template <typename TNumeric>
3557     static typename enable_if< is_same<TNumeric, double>::value, TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int)3558     x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int /*base*/)
3559     {
3560         return StringToDouble(str, flags);
3561     }
3562 
3563     template <typename TNumeric>
3564     static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) < sizeof(int)), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3565     x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3566     {
3567         int n = StringToInt(str, flags, base);
3568         *value = 0;
3569         if (( !n && errno ) || !x_VerifyIntLimits<TNumeric>(n, str, flags)) {
3570             return false;
3571         }
3572         *value = (TNumeric) n;
3573         return true;
3574     }
3575     template <typename TNumeric>
3576     static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) < sizeof(unsigned int)), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3577     x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3578     {
3579         unsigned int n = StringToUInt(str, flags, base);
3580         *value = 0;
3581         if (( !n && errno ) || !x_VerifyIntLimits<TNumeric>(n, str, flags)) {
3582             return false;
3583         }
3584         *value = (TNumeric) n;
3585         return true;
3586     }
3587     template <typename TNumeric>
3588     static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(int) && !is_same<TNumeric, long>::value), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3589     x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3590     {
3591         *value = StringToInt(str, flags, base);
3592         return (*value || !errno);
3593     }
3594     template <typename TNumeric>
3595     static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(unsigned int) && !is_same<TNumeric, unsigned long>::value), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3596     x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3597     {
3598         *value = StringToUInt(str, flags, base);
3599         return (*value || !errno);
3600     }
3601     static bool
x_StringToNumeric(const CTempString str,long * value,TStringToNumFlags flags,int base)3602     x_StringToNumeric(const CTempString str, long* value, TStringToNumFlags flags, int base)
3603     {
3604         *value = StringToLong(str, flags, base);
3605         return (*value || !errno);
3606     }
3607     static bool
x_StringToNumeric(const CTempString str,unsigned long * value,TStringToNumFlags flags,int base)3608     x_StringToNumeric(const CTempString str, unsigned long* value, TStringToNumFlags flags, int base)
3609     {
3610         *value = StringToULong(str, flags, base);
3611         return (*value || !errno);
3612     }
3613     template <typename TNumeric>
3614     static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(Int8) && !is_same<TNumeric, long>::value), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3615     x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3616     {
3617         *value = StringToInt8(str, flags, base);
3618         return (*value || !errno);
3619     }
3620     template <typename TNumeric>
3621     static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(Uint8) && !is_same<TNumeric, unsigned long>::value), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3622     x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3623     {
3624         *value = StringToUInt8(str, flags, base);
3625         return (*value || !errno);
3626     }
3627     static bool
x_StringToNumeric(const CTempString str,float * value,TStringToNumFlags flags,int)3628     x_StringToNumeric(const CTempString str, float* value, TStringToNumFlags flags, int /*base*/)
3629     {
3630         double n = StringToDouble(str, flags);
3631         *value = 0;
3632         if (( !n && errno ) || !x_VerifyFloatLimits<float>(n, str, flags)) {
3633             return false;
3634         }
3635         *value = (float) n;
3636         return true;
3637     }
3638     static bool
x_StringToNumeric(const CTempString str,double * value,TStringToNumFlags flags,int)3639     x_StringToNumeric(const CTempString str, double* value, TStringToNumFlags flags, int /*base*/)
3640     {
3641         *value = StringToDouble(str, flags);
3642         return (*value || !errno);
3643     }
3644     template <typename TStrictId>
3645     static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, bool>::type
x_StringToNumeric(const CTempString str,TStrictId * value,TStringToNumFlags flags,int base)3646     x_StringToNumeric(const CTempString str, TStrictId* value, TStringToNumFlags flags, int base)
3647     {
3648         return x_StringToNumeric(str, &value->Set(), flags, base);
3649     }
3650 
3651 // NumericToString
3652     template<typename TNumeric>
3653     static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) <= sizeof(int) && !is_same<TNumeric, long>::value), void>::type
x_NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags,int base)3654     x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3655     {
3656         IntToString(out_str, value, flags, base);
3657     }
3658     template<typename TNumeric>
3659     static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) <= sizeof(unsigned int) && !is_same<TNumeric, unsigned long>::value), void>::type
x_NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags,int base)3660     x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3661     {
3662         UIntToString(out_str, value, flags, base);
3663     }
3664     static void
x_NumericToString(string & out_str,long value,TNumToStringFlags flags,int base)3665     x_NumericToString(string& out_str, long value, TNumToStringFlags flags, int base)
3666     {
3667         LongToString(out_str, value, flags, base);
3668     }
3669     static void
x_NumericToString(string & out_str,unsigned long value,TNumToStringFlags flags,int base)3670     x_NumericToString(string& out_str, unsigned long value, TNumToStringFlags flags, int base)
3671     {
3672         ULongToString(out_str, value, flags, base);
3673     }
3674 #if NCBI_COMPILER_MSVC && (_MSC_VER < 1900)
3675     static void
x_NumericToString(string & out_str,Int8 value,TNumToStringFlags flags,int base)3676     x_NumericToString(string& out_str, Int8 value, TNumToStringFlags flags, int base)
3677     {
3678         Int8ToString(out_str, value, flags, base);
3679     }
3680     static void
x_NumericToString(string & out_str,Uint8 value,TNumToStringFlags flags,int base)3681     x_NumericToString(string& out_str, Uint8 value, TNumToStringFlags flags, int base)
3682     {
3683         UInt8ToString(out_str, value, flags, base);
3684     }
3685 #endif
3686     template<typename TNumeric>
3687     static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(Int8) && !is_same<TNumeric, long>::value), void>::type
x_NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags,int base)3688     x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3689     {
3690         Int8ToString(out_str, value, flags, base);
3691     }
3692     template<typename TNumeric>
3693     static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(Uint8) && !is_same<TNumeric, unsigned long>::value), void>::type
x_NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags,int base)3694     x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3695     {
3696         UInt8ToString(out_str, value, flags, base);
3697     }
3698     template<typename TNumeric>
3699     static typename enable_if< is_floating_point<TNumeric>::value, void>::type
x_NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags,int)3700     x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int /*base*/)
3701     {
3702         DoubleToString(out_str, value, -1, flags);
3703     }
3704     template <typename TStrictId>
3705     static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, void>::type
x_NumericToString(string & out_str,TStrictId value,TNumToStringFlags flags,int base)3706         x_NumericToString(string& out_str, TStrictId value, TNumToStringFlags flags, int base)
3707     {
3708         return x_NumericToString(out_str, value.Get(), flags, base);
3709     }
3710 
3711 
3712 // Join
3713     template<typename TIterator>
3714     static string xx_Join( TIterator from, TIterator to, const CTempString& delim);
3715 
3716     template<typename TIterator>
3717     static typename enable_if<is_same<typename TIterator::iterator_category, input_iterator_tag>::value &&
3718                               is_convertible<typename TIterator::value_type, string>::value, string>::type
x_Join(TIterator from,TIterator to,const CTempString & delim)3719     x_Join( TIterator from, TIterator to, const CTempString& delim)
3720     {
3721         return TransformJoin(from, to, delim, [](const typename TIterator::value_type& i){ return i;});
3722     }
3723 
3724     template<typename TIterator>
3725     static typename enable_if<is_convertible<typename TIterator::iterator_category, forward_iterator_tag>::value &&
3726                               is_convertible<typename TIterator::value_type, string>::value, string>::type
x_Join(TIterator from,TIterator to,const CTempString & delim)3727     x_Join( TIterator from, TIterator to, const CTempString& delim)
3728     {
3729         return xx_Join(from, to, delim);
3730     }
3731 
3732     template<typename TValue>
3733     static typename enable_if<is_convertible<TValue, string>::value, string>::type
x_Join(TValue * from,TValue * to,const CTempString & delim)3734     x_Join( TValue* from, TValue* to, const CTempString& delim)
3735     {
3736         return xx_Join(from, to, delim);
3737     }
3738 
3739     template<typename TIterator>
3740     static typename enable_if<is_convertible<typename TIterator::iterator_category, input_iterator_tag>::value &&
3741                               is_arithmetic< typename TIterator::value_type>::value, string>::type
x_Join(TIterator from,TIterator to,const CTempString & delim)3742     x_Join( TIterator from, TIterator to, const CTempString& delim)
3743     {
3744         return TransformJoin( from, to, delim, [](const typename TIterator::value_type& i){ return NumericToString(i);});
3745     }
3746 
3747     template<typename TValue>
3748     static typename enable_if<is_arithmetic<TValue>::value, string>::type
x_Join(TValue * from,TValue * to,const CTempString & delim)3749     x_Join( TValue* from, TValue* to, const CTempString& delim)
3750     {
3751         return TransformJoin( from, to, delim, [](const TValue& i){ return NumericToString(i);});
3752     }
3753 }; // class NStr
3754 
3755 
3756 
3757 /////////////////////////////////////////////////////////////////////////////
3758 ///
3759 
3760 
3761 #define NCBITOOLKIT_USE_LONG_UCS4 (SIZEOF_LONG == 4)
3762 #if NCBITOOLKIT_USE_LONG_UCS4
3763 /// UCS-4 character
3764 typedef unsigned long TCharUCS4;
3765 /// UCS-4 string
3766 typedef basic_string<TCharUCS4> TStringUCS4;
3767 #else
3768 typedef TUnicodeSymbol TCharUCS4;
3769 typedef TStringUnicode TStringUCS4;
3770 #endif
3771 
3772 /// Type for character in UCS-2 encoding
3773 typedef Uint2 TCharUCS2;
3774 /// Type for string in UCS-2 encoding
3775 typedef basic_string<TCharUCS2> TStringUCS2;
3776 
3777 
3778 /// Operator for writing TStringUCS2 to stream.
3779 /// Operator is needed for using in SDBAPI.
operator <<(CNcbiOstream & os,const TStringUCS2 & str)3780 inline CNcbiOstream& operator<< (CNcbiOstream& os, const TStringUCS2& str)
3781 {
3782     os.write((const char*)str.data(), str.size() * sizeof(TCharUCS2));
3783     return os;
3784 }
3785 
3786 
3787 
3788 /////////////////////////////////////////////////////////////////////////////
3789 ///
3790 /// CUtf8 --
3791 ///
3792 ///   Utility class to handle strings in UTF8 encoding.
3793 ///   Can convert data to and from the following encodings:
3794 ///      ISO 8859-1 (Latin1)
3795 ///      Microsoft Windows code page 1252
3796 ///      UCS-2, UCS-4 (no surrogates)
3797 
3798 class NCBI_XNCBI_EXPORT CUtf8
3799 {
3800 public:
3801     /// How to verify character encoding of the source data
3802     enum EValidate {
3803         eNoValidate,
3804         eValidate
3805     };
3806 
3807     /// Convert into UTF8 from a C/C++ string
3808     ///
3809     /// @param src
3810     ///   Source string
3811     /// @param encoding
3812     ///   Character encoding of the source string
3813     /// @param validate
3814     ///   Verify the character encoding of the source
AsUTF8(const CTempString & src,EEncoding encoding,EValidate validate=eNoValidate)3815     static CStringUTF8 AsUTF8(const CTempString& src,
3816                               EEncoding          encoding,
3817                               EValidate          validate = eNoValidate)
3818     {
3819         CStringUTF8 u8;
3820         return x_Append(u8,src,encoding,validate);
3821     }
3822 
3823 #if defined(HAVE_WSTRING)
3824     /// Convert into UTF8 from a C/C++ string
3825     ///
3826     /// @param src
3827     ///   Source string
3828     /// @param lcl
3829     ///   String locale
AsUTF8(const CTempString & src,const locale & lcl)3830     static CStringUTF8 AsUTF8(const CTempString& src, const locale& lcl)
3831     {
3832         CStringUTF8 u8;
3833         return x_Append(u8,src,lcl);
3834     }
3835 #endif
3836 
3837     /// Convert into UTF8 from a Unicode C++ string
3838     ///
3839     /// @param src
3840     ///   Source string
3841     /// @attention
3842     ///   Only for TStringUnicode, TStringUCS4, TStringUCS2, wstring types
3843     template <typename TChar>
3844     static typename enable_if< is_integral<TChar>::value  && (1 < sizeof(TChar)), CStringUTF8>::type
AsUTF8(const basic_string<TChar> & src)3845     AsUTF8(const basic_string<TChar>& src)
3846     {
3847         CStringUTF8 u8;
3848         return  x_Append(u8, src.data(), src.size());
3849     }
3850 
3851     /// Convert into UTF8 from a Unicode character buffer
3852     ///
3853     /// @param src
3854     ///   Source character buffer
3855     /// @param tchar_count
3856     ///   Number of characters in the buffer;
3857     ///   If it equals to NPOS, buffer is assumed to be zero-terminated
3858     template <typename TChar>
3859     static typename enable_if< is_integral<TChar>::value  && (1 < sizeof(TChar)), CStringUTF8>::type
AsUTF8(const TChar * src,SIZE_TYPE tchar_count=NPOS)3860     AsUTF8(const TChar* src, SIZE_TYPE tchar_count = NPOS)
3861     {
3862         CStringUTF8 u8;
3863         return  x_Append(u8, src, tchar_count);
3864     }
3865 
3866     /// Convert Unicode C++ string into UTF8 and append it to existing string
3867     ///
3868     /// @param dest
3869     ///   Existing UTF8 string
3870     /// @param src
3871     ///   Source Unicode string
3872     /// return
3873     ///   reference to modified dest string
3874     template <typename TChar>
3875     static typename enable_if< is_integral<TChar>::value  && (1 < sizeof(TChar)), CStringUTF8& >::type
AppendAsUTF8(CStringUTF8 & dest,const basic_string<TChar> & src)3876     AppendAsUTF8(CStringUTF8& dest, const basic_string<TChar>& src)
3877     {
3878         return x_Append(dest, src.data(), src.size());
3879     }
3880 
3881     /// Convert Unicode character buffer into UTF8 and append it to existing string
3882     ///
3883     /// @param dest
3884     ///   Existing UTF8 string
3885     /// @param src
3886     ///   Source Unicode character buffer
3887     /// @param tchar_count
3888     ///   Number of characters in the buffer;
3889     ///   If it equals to NPOS, buffer is assumed to be zero-terminated
3890     /// return
3891     ///   reference to modified dest string
3892     template <typename TChar>
3893     static typename enable_if< is_integral<TChar>::value  && (1 < sizeof(TChar)), CStringUTF8& >::type
AppendAsUTF8(CStringUTF8 & dest,const TChar * src,SIZE_TYPE tchar_count=NPOS)3894     AppendAsUTF8(CStringUTF8& dest, const TChar* src, SIZE_TYPE tchar_count = NPOS)
3895     {
3896         return x_Append(dest, src, tchar_count);
3897     }
3898 
3899     /// Convert Unicode symbol into UTF8 and append it to existing string
3900     ///
3901     /// @param dest
3902     ///   Existing UTF8 string
3903     /// @param ch
3904     ///   Unicode symbol
3905     /// return
3906     ///   reference to modified dest string
3907     template <typename TChar>
3908     static typename enable_if< is_integral<TChar>::value  && (1 < sizeof(TChar)), CStringUTF8& >::type
AppendAsUTF8(CStringUTF8 & dest,TChar ch)3909     AppendAsUTF8(CStringUTF8& dest, TChar ch)
3910     {
3911         return  x_Append(dest, &ch, 1);
3912     }
3913 
3914     /// Convert non-Unicode C++ string into UTF8 and append it to existing string
3915     ///
3916     /// @param dest
3917     ///   Existing UTF8 string
3918     /// @param src
3919     ///   Source string
3920     /// @param encoding
3921     ///   Character encoding of the source string
3922     /// @param validate
3923     ///   Verify the character encoding of the source
3924     /// return
3925     ///   reference to modified dest string
AppendAsUTF8(CStringUTF8 & dest,const CTempString & src,EEncoding encoding,EValidate validate=eNoValidate)3926     static CStringUTF8& AppendAsUTF8(CStringUTF8&       dest,
3927                                      const CTempString& src,
3928                                      EEncoding          encoding,
3929                                      EValidate          validate = eNoValidate)
3930     {
3931         return  x_Append(dest,src,encoding,validate);
3932     }
3933 
3934 #if defined(HAVE_WSTRING)
3935     /// Convert non-Unicode C++ string into UTF8 and append it to existing string
3936     ///
3937     /// @param dest
3938     ///   Existing UTF8 string
3939     /// @param src
3940     ///   Source string
3941     /// @param lcl
3942     ///   Source string locale
3943     /// return
3944     ///   reference to modified dest string
AppendAsUTF8(CStringUTF8 & dest,const CTempString & src,const locale & lcl)3945     static CStringUTF8& AppendAsUTF8(CStringUTF8&       dest,
3946                                      const CTempString& src,
3947                                      const locale&      lcl)
3948     {
3949         return  x_Append(dest,src,lcl);
3950     }
3951 #endif
3952 
3953     /// Convert non-Unicode character into UTF8 and append it to existing string
3954     ///
3955     /// @param dest
3956     ///   Existing UTF8 string
3957     /// @param ch
3958     ///   Character
3959     /// @param encoding
3960     ///   Character encoding
3961     /// @param validate
3962     ///   Verify the character encoding of the source
3963     /// return
3964     ///   reference to modified dest string
AppendAsUTF8(CStringUTF8 & dest,char ch,EEncoding encoding,EValidate validate=eNoValidate)3965     static CStringUTF8& AppendAsUTF8(CStringUTF8& dest,
3966                                      char         ch,
3967                                      EEncoding    encoding,
3968                                      EValidate    validate = eNoValidate)
3969     {
3970         return  x_Append(dest,CTempString(&ch,1),encoding,validate);
3971     }
3972 
3973 #if defined(HAVE_WSTRING)
3974     /// Convert non-Unicode character into UTF8 and append it to existing string
3975     ///
3976     /// @param dest
3977     ///   Existing UTF8 string
3978     /// @param ch
3979     ///   Character
3980     /// @param lcl
3981     ///   Character locale
3982     /// return
3983     ///   reference to modified dest string
AppendAsUTF8(CStringUTF8 & dest,char ch,const locale & lcl)3984     static CStringUTF8& AppendAsUTF8(CStringUTF8&  dest,
3985                                      char          ch,
3986                                      const locale& lcl)
3987     {
3988         return  x_Append(dest,CTempString(&ch,1),lcl);
3989     }
3990 #endif
3991 
3992     /// Convert UTF8 string into a single-byte character representation
3993     ///
3994     /// Can throw a CStringException if the conversion is impossible
3995     /// or the string has invalid UTF-8 encoding.
3996     ///
3997     /// @param src
3998     ///   Source UTF8 string
3999     /// @param encoding
4000     ///   Encoding of the result
4001     /// @param substitute_on_error
4002     ///   If the conversion is impossible, append the provided string
4003     ///   or, if substitute_on_error equals 0, throw an exception
4004     /// @param validate
4005     ///   Verify UTF8 character encoding of the source
4006     /// @return
4007     ///   C++ string
4008     static string AsSingleByteString
4009         (const CTempString& src, EEncoding encoding,
4010          const char* substitute_on_error = 0, EValidate validate = eNoValidate);
4011 
4012 #if defined(HAVE_WSTRING)
4013     static string AsSingleByteString
4014         (const CTempString& src, const locale& lcl,
4015          const char* substitute_on_error = 0, EValidate validate = eNoValidate);
4016 #endif
4017 
4018     /// Convert UTF8 string into Unicode
4019     ///
4020     /// Can throw a CStringException if the conversion is impossible
4021     /// or the string has invalid UTF-8 encoding.
4022     ///
4023     /// @param src
4024     ///   Source UTF8 string
4025     /// @param substitute_on_error
4026     ///   If the conversion is impossible, append the provided string
4027     ///   or, if substitute_on_error equals 0, throw an exception
4028     /// @param validate
4029     ///   Verify UTF8 character encoding of the source
4030     /// @attention
4031     ///   Only for TStringUnicode, TStringUCS4, TStringUCS2, wstring types
4032     template <typename TChar>
4033     static typename enable_if< is_integral<TChar>::value  && (1 < sizeof(TChar)), basic_string<TChar> >::type
AsBasicString(const CTempString & src,const TChar * substitute_on_error,EValidate validate=eNoValidate)4034     AsBasicString(const CTempString& src, const TChar* substitute_on_error, EValidate validate = eNoValidate)
4035     {
4036         return x_AsBasicString(src,substitute_on_error,validate);
4037     }
4038 
4039     template <typename TChar>
4040     static typename enable_if< is_integral<TChar>::value  && (1 < sizeof(TChar)), basic_string<TChar> >::type
AsBasicString(const CTempString & src)4041     AsBasicString(const CTempString& src)
4042     {
4043         return x_AsBasicString<TChar>(src,nullptr,eNoValidate);
4044     }
4045 
4046     /// Get the number of symbols (code points) in UTF8 string
4047     ///
4048     /// @param src
4049     ///   Source UTF8 string
4050     /// @return
4051     ///   Number of symbols (code points)
4052     static SIZE_TYPE GetSymbolCount(const CTempString& src);
4053 
4054     /// Get the number of valid UTF-8 symbols (code points) in buffer
4055     ///
4056     /// @param src
4057     ///   Character buffer
4058     /// @return
4059     ///   Number of valid symbols (no exception thrown)
4060     static SIZE_TYPE GetValidSymbolCount(const CTempString& src);
4061 
4062     /// Get the number of valid UTF-8 bytes (code units) in buffer
4063     ///
4064     /// @param src
4065     ///   Character buffer
4066     /// @return
4067     ///   Number of valid bytes (no exception thrown)
4068     static SIZE_TYPE GetValidBytesCount(const CTempString& src);
4069 
4070     /// Check buffer for presence of UTF-8 byte sequence and return length of first symbol
4071     ///
4072     /// @param src
4073     ///   Character buffer
4074     /// @return
4075     ///   Number of bytes
4076     static SIZE_TYPE EvaluateSymbolLength(const CTempString& src);
4077 
4078     /// Check that the character is valid first byte of an UTF8 byte sequence
4079     ///
4080     /// @param ch
4081     ///   Character
4082     /// @param more
4083     ///   Number of additional bytes to expect
4084     /// @return
4085     ///   true, if this is a valid first byte
EvaluateFirst(char ch,SIZE_TYPE & more)4086     static bool EvaluateFirst(char ch, SIZE_TYPE& more) {
4087         return x_EvalFirst(ch, more);
4088     }
4089 
4090     /// Check that the character is valid continuation byte of an UTF8 byte sequence
4091     ///
4092     /// @param ch
4093     ///   Character
4094     /// @return
4095     ///   true, if this is a valid byte
EvaluateNext(char ch)4096     static bool EvaluateNext(char ch) {
4097         return x_EvalNext(ch);
4098     }
4099 
4100     /// Check the encoding of the C/C++ string
4101     ///
4102     /// Check that the encoding of the source is the same, or
4103     /// is compatible with the specified one
4104     /// @param src
4105     ///   Source string
4106     /// @param encoding
4107     ///   Character encoding form to check against
4108     /// @return
4109     ///   Boolean result: encoding is same or compatible
4110     static bool MatchEncoding(const CTempString& src, EEncoding encoding);
4111 
4112     /// Guess the encoding of the C/C++ string
4113     ///
4114     /// It can distinguish between UTF-8, Latin1, and Win1252 only
4115     /// @param src
4116     ///   Character buffer
4117     /// @return
4118     ///   Encoding as guessed;  eEncoding_Unknown if cannot guess
4119     static EEncoding GuessEncoding(const CTempString& src);
4120 
4121     /// Give Encoding name as string
4122     ///
4123     /// @param encoding
4124     ///   EEncoding enum. (Throw CStringException if passed eEncoding_Unknown.)
4125     /// @return
4126     ///   Encoding name
4127     static string EncodingToString(EEncoding encoding);
4128 
4129     /// Convert encoding name into EEncoding enum, taking into account synonyms
4130     /// as per  http://www.iana.org/assignments/character-sets
4131     ///
4132     /// @param encoding_name
4133     ///   Name of the encoding
4134     /// @return
4135     ///   EEncoding enum;  eEncoding_Unknown for unsupported encodings
4136     static EEncoding StringToEncoding(const CTempString& encoding_name);
4137 
4138     /// Convert encoded character into Unicode
4139     ///
4140     /// @param ch
4141     ///   Encoded character
4142     /// @param encoding
4143     ///   Character encoding
4144     /// @return
4145     ///   Unicode code point (symbol)
4146     static TUnicodeSymbol CharToSymbol(char ch, EEncoding encoding);
4147 
4148 #if defined(HAVE_WSTRING)
4149     /// Convert encoded character into Unicode
4150     ///
4151     /// @param ch
4152     ///   Encoded character
4153     /// @param lcl
4154     ///   Character locale
4155     /// @return
4156     ///   Unicode code point (symbol)
4157     static TUnicodeSymbol CharToSymbol(char ch, const locale& lcl);
4158 #endif
4159 
4160     /// Convert Unicode code point into encoded character
4161     ///
4162     /// @param sym
4163     ///   Unicode code point (symbol)
4164     /// @param encoding
4165     ///   Character encoding
4166     /// @return
4167     ///   Encoded character
4168     static char SymbolToChar(TUnicodeSymbol sym, EEncoding encoding);
4169 
4170 #if defined(HAVE_WSTRING)
4171     /// Convert Unicode code point into encoded character
4172     ///
4173     /// @param sym
4174     ///   Unicode code point (symbol)
4175     /// @param lcl
4176     ///   Character locale
4177     /// @return
4178     ///   Encoded character
4179     static char SymbolToChar(TUnicodeSymbol sym, const locale& lcl);
4180 #endif
4181 
4182     /// Determines if a symbol is whitespace
4183     /// per  http://unicode.org/charts/uca/chart_Whitespace.html
4184     ///
4185     /// @param sym
4186     ///   Unicode code point (symbol)
4187     /// @sa
4188     ///   TruncateSpacesInPlace, TruncateSpaces_Unsafe, TruncateSpaces
4189     static bool IsWhiteSpace(TUnicodeSymbol sym);
4190 
4191     /// Truncate spaces in the string (in-place)
4192     ///
4193     /// @param src
4194     ///   UTF8 string
4195     /// @param side
4196     ///   Which end of the string to truncate spaces from. Default is to
4197     ///   truncate spaces from both ends.
4198     /// @return
4199     ///   Reference to src
4200     /// @sa
4201     ///   IsWhiteSpace, TruncateSpaces_Unsafe, TruncateSpaces
4202     static CStringUTF8& TruncateSpacesInPlace
4203     (CStringUTF8& str, NStr::ETrunc side = NStr::eTrunc_Both);
4204 
4205     /// Truncate spaces in the string
4206     ///
4207     /// @param str
4208     ///   Source string, in UTF8 encoding
4209     /// @param side
4210     ///   Which end of the string to truncate spaces from. Default is to
4211     ///   truncate spaces from both ends.
4212     /// @sa
4213     ///   IsWhiteSpace, TruncateSpacesInPlace, TruncateSpaces_Unsafe
4214     static CStringUTF8 TruncateSpaces
4215         (const CTempString& str, NStr::ETrunc side = NStr::eTrunc_Both);
4216 
4217     /// Truncate spaces in the string
4218     ///
4219     /// @param str
4220     ///   Source string, in UTF8 encoding
4221     /// @param side
4222     ///   Which end of the string to truncate spaces from. Default is to
4223     ///   truncate spaces from both ends.
4224     /// @attention
4225     ///   The lifespan of the result string is the same as one of the source.
4226     ///   So, for example, if the source is temporary string, then the result
4227     ///   will be invalid right away (will point to already released memory).
4228     /// @sa
4229     ///   IsWhiteSpace, TruncateSpacesInPlace, TruncateSpaces
4230     static CTempString TruncateSpaces_Unsafe
4231         (const CTempString& str, NStr::ETrunc side = NStr::eTrunc_Both);
4232 
4233     /// Convert sequence of UTF8 code units into Unicode code point
4234     ///
4235     /// @param src
4236     ///   Zero-terminated buffer, in UTF8 encoding
4237     /// @return
4238     ///   Unicode code point
4239     static TUnicodeSymbol Decode(const char*& src);
4240 
4241 #ifndef NCBI_COMPILER_WORKSHOP
4242     /// Convert sequence of UTF8 code units into Unicode code point
4243     ///
4244     /// @param src
4245     ///   C++ string iterator
4246     /// @return
4247     ///   Unicode code point
4248     static TUnicodeSymbol Decode(string::const_iterator& src);
4249 #endif
4250 
4251     /// Begin converting first character of UTF8 sequence into Unicode
4252     ///
4253     /// @param ch
4254     ///   Character
4255     /// @param more
4256     ///   If the character is valid, - how many more characters to expect
4257     /// @return
4258     ///   Part of Unicode code point. Zero if the character is invalid.
4259     static TUnicodeSymbol DecodeFirst(char ch, SIZE_TYPE& more);
4260 
4261     /// Convert next character of UTF8 sequence into Unicode
4262     ///
4263     /// @param ch
4264     ///   Character
4265     /// @param chU
4266     ///   Incomplete Unicode code point
4267     /// @return
4268     ///   Accumulated Unicode code point. Zero if the character is invalid.
4269     static TUnicodeSymbol DecodeNext(TUnicodeSymbol chU, char ch);
4270 
4271 private:
4272     static void x_Validate(const CTempString& str);
4273 
4274     static SIZE_TYPE x_GetValidSymbolCount
4275         (const CTempString& src, CTempString::const_iterator& err);
4276 
4277     static CStringUTF8& x_AppendChar(CStringUTF8& u8str, TUnicodeSymbol ch);
4278 
4279     static CStringUTF8& x_Append(CStringUTF8& u8str, const CTempString& src,
4280                                  EEncoding encoding, EValidate validate);
4281 #if defined(HAVE_WSTRING)
4282     static CStringUTF8& x_Append(CStringUTF8& u8str, const CTempString& src, const locale& lcl);
4283 #endif
4284     template <typename TChar>
4285     static CStringUTF8& x_Append(CStringUTF8& u8str, const TChar* src, SIZE_TYPE tchar_count);
4286 
4287     template <typename TChar>
4288     static basic_string<TChar> x_AsBasicString
4289         (const CTempString& src,
4290          const TChar* substitute_on_error, EValidate validate);
4291 
4292     template <typename TIterator>
4293     static TUnicodeSymbol x_Decode(TIterator& src);
4294 
4295     static SIZE_TYPE x_BytesNeeded(TUnicodeSymbol ch);
4296     static bool   x_EvalFirst(char ch, SIZE_TYPE& more);
4297     static bool   x_EvalNext(char ch);
4298 
4299     // returns part of the string around an error in Utf8 encoding
4300     static CTempString x_GetErrorFragment(const CTempString& src);
4301 
4302     friend class CStringUTF8_DEPRECATED;
4303 };
4304 
4305 // deprecated CStringUTF8 is there
4306 #include <corelib/impl/stringutf8_deprecated.hpp>
4307 
4308 
4309 
4310 /////////////////////////////////////////////////////////////////////////////
4311 ///
4312 /// CParseTemplException --
4313 ///
4314 /// Define template class for parsing exception. This class is used to define
4315 /// exceptions for complex parsing tasks and includes an additional m_Pos
4316 /// data member. The constructor requires that an additional positional
4317 /// parameter be supplied along with the description message.
4318 
4319 template <class TBase>
4320 class CParseTemplException : EXCEPTION_VIRTUAL_BASE public TBase
4321 {
4322 public:
4323     /// Error types that for exception class.
4324     enum EErrCode {
4325         eErr        ///< Generic error
4326     };
4327 
4328     /// Translate from the error code value to its string representation.
GetErrCodeString(void) const4329     virtual const char* GetErrCodeString(void) const override
4330     {
4331         switch (GetErrCode()) {
4332         case eErr: return "eErr";
4333         default:   return CException::GetErrCodeString();
4334         }
4335     }
4336 
4337     /// Constructor.
4338     ///
4339     /// Report "pos" along with "what".
CParseTemplException(const CDiagCompileInfo & info,const CException * prev_exception,EErrCode err_code,const string & message,string::size_type pos,EDiagSev severity=eDiag_Error)4340     CParseTemplException(const CDiagCompileInfo &info,
4341         const CException* prev_exception,
4342         EErrCode err_code,const string& message,
4343         string::size_type pos, EDiagSev severity = eDiag_Error)
4344           : TBase(info, prev_exception, message, severity, 0), m_Pos(pos)
4345     {
4346         this->x_Init(info,
4347                      string("{") + NStr::SizetToString(m_Pos) +
4348                      "} " + message,
4349                      prev_exception,
4350                      severity);
4351         this->x_InitErrCode((CException::EErrCode) err_code);
4352     }
4353 
4354     /// Constructor.
CParseTemplException(const CParseTemplException<TBase> & other)4355     CParseTemplException(const CParseTemplException<TBase>& other)
4356         : TBase(other)
4357     {
4358         m_Pos = other.m_Pos;
4359         this->x_Assign(other);
4360     }
4361 
4362     /// Destructor.
~CParseTemplException(void)4363     virtual ~CParseTemplException(void) throw() {}
4364 
4365     /// Report error position.
ReportExtra(ostream & out) const4366     virtual void ReportExtra(ostream& out) const override
4367     {
4368         out << "m_Pos = " << (unsigned long)m_Pos;
4369     }
4370 
4371     // Attributes.
4372 
4373     /// Get exception class type.
GetType(void) const4374     virtual const char* GetType(void) const override
4375         { return "CParseTemplException"; }
4376 
4377     typedef int TErrCode;
4378     /// Get error code.
GetErrCode(void) const4379     TErrCode GetErrCode(void) const
4380     {
4381         return typeid(*this) == typeid(CParseTemplException<TBase>) ?
4382             (TErrCode) this->x_GetErrCode() :
4383             (TErrCode) CException::eInvalid;
4384     }
4385 
4386     /// Get error position.
GetPos(void) const4387     string::size_type GetPos(void) const throw() { return m_Pos; }
4388 
4389 protected:
CParseTemplException(const CDiagCompileInfo & info,const CException * prev_exception,const string & message,string::size_type pos,EDiagSev severity,CException::TFlags flags)4390     CParseTemplException(const CDiagCompileInfo &info,
4391         const CException* prev_exception,
4392         const string& message,
4393         string::size_type pos, EDiagSev severity, CException::TFlags flags)
4394           : TBase(info, prev_exception, message, severity, flags), m_Pos(pos)
4395     {
4396         this->x_Init(info,
4397                      string("{") + NStr::SizetToString(m_Pos) +
4398                      "} " + message,
4399                      prev_exception,
4400                      severity);
4401     }
4402     /// Constructor.
CParseTemplException(void)4403     CParseTemplException(void)
4404     {
4405         m_Pos = 0;
4406     }
4407 
4408     /// Helper clone method.
x_Clone(void) const4409     virtual const CException* x_Clone(void) const override
4410     {
4411         return new CParseTemplException<TBase>(*this);
4412     }
4413 
4414 private:
4415     string::size_type m_Pos;    ///< Error position
4416 };
4417 
4418 
4419 /////////////////////////////////////////////////////////////////////////////
4420 ///
4421 /// CStringException --
4422 ///
4423 /// Define exceptions generated by string classes.
4424 ///
4425 /// CStringException inherits its basic functionality from
4426 /// CParseTemplException<CCoreException> and defines additional error codes
4427 /// for string parsing.
4428 
4429 class NCBI_XNCBI_EXPORT CStringException : public CParseTemplException<CCoreException>
4430 {
4431 public:
4432     /// Error types that string classes can generate.
4433     enum EErrCode {
4434         eConvert,       ///< Failure to convert string
4435         eBadArgs,       ///< Bad arguments to string methods
4436         eFormat         ///< Wrong format for any input to string methods
4437     };
4438 
4439     /// Translate from the error code value to its string representation.
4440     virtual const char* GetErrCodeString(void) const override;
4441 
4442     // Standard exception boilerplate code.
4443     NCBI_EXCEPTION_DEFAULT2(CStringException,
4444         CParseTemplException<CCoreException>, std::string::size_type);
4445 };
4446 
4447 
4448 
4449 /////////////////////////////////////////////////////////////////////////////
4450 ///
4451 /// CStringPairsParser --
4452 ///
4453 /// Base class for parsing a string to a set of name-value pairs.
4454 
4455 
4456 /// Decoder interface. Names and values can be decoded with different rules.
4457 class IStringDecoder
4458 {
4459 public:
4460     /// Type of string to be decoded
4461     enum EStringType {
4462         eName,
4463         eValue
4464     };
4465     /// Decode the string. Must throw CStringException if the source string
4466     /// is not valid.
4467     virtual string Decode(const CTempString src, EStringType stype) const = 0;
~IStringDecoder(void)4468     virtual ~IStringDecoder(void) {}
4469 };
4470 
4471 
4472 /// Encoder interface. Names and values can be encoded with different rules.
4473 class IStringEncoder
4474 {
4475 public:
4476     /// Type of string to be decoded
4477     enum EStringType {
4478         eName,
4479         eValue
4480     };
4481     /// Encode the string.
4482     virtual string Encode(const CTempString src, EStringType stype) const = 0;
~IStringEncoder(void)4483     virtual ~IStringEncoder(void) {}
4484 };
4485 
4486 
4487 /// URL-decoder for string pairs parser
4488 class NCBI_XNCBI_EXPORT CStringDecoder_Url : public IStringDecoder
4489 {
4490 public:
4491     CStringDecoder_Url(NStr::EUrlDecode flag = NStr::eUrlDec_All);
4492 
4493     virtual string Decode(const CTempString src, EStringType stype) const;
4494 
4495 private:
4496     NStr::EUrlDecode m_Flag;
4497 };
4498 
4499 
4500 /// URL-encoder for string pairs parser
4501 class NCBI_XNCBI_EXPORT CStringEncoder_Url : public IStringEncoder
4502 {
4503 public:
4504     CStringEncoder_Url(NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars);
4505 
4506     virtual string Encode(const CTempString src, EStringType stype) const;
4507 
4508 private:
4509     NStr::EUrlEncode m_Flag;
4510 };
4511 
4512 
4513 /// Template for parsing string into pairs of name and value or merging
4514 /// them back into a single string.
4515 /// The container class must hold pairs of strings (pair<string, string>).
4516 template<class TContainer>
4517 class CStringPairs
4518 {
4519 public:
4520     typedef TContainer TStrPairs;
4521     /// The container's value type must be pair<string, string>
4522     /// or a compatible type.
4523     typedef typename TContainer::value_type TStrPair;
4524 
4525     /// Create parser with the specified decoder/encoder and default separators.
4526     ///
4527     /// @param decoder
4528     ///   String decoder (Url, Xml etc.)
4529     /// @param own_decoder
4530     ///   Decoder ownership flag
4531     /// @param decoder
4532     ///   String encoder (Url, Xml etc.), optional
4533     /// @param own_encoder
4534     ///   Encoder ownership flag, optional
CStringPairs(IStringDecoder * decoder=NULL,EOwnership own_decoder=eTakeOwnership,IStringEncoder * encoder=NULL,EOwnership own_encoder=eTakeOwnership)4535     CStringPairs(IStringDecoder* decoder = NULL,
4536                  EOwnership      own_decoder = eTakeOwnership,
4537                  IStringEncoder* encoder = NULL,
4538                  EOwnership      own_encoder = eTakeOwnership)
4539         : m_ArgSep("&"),
4540           m_ValSep("="),
4541           m_Decoder(decoder, own_decoder),
4542           m_Encoder(encoder, own_encoder)
4543     {
4544     }
4545 
4546     /// Create parser with the specified parameters.
4547     ///
4548     /// @param arg_sep
4549     ///   Separator between name+value pairs
4550     /// @param val_sep
4551     ///   Separator between name and value
4552     /// @param decoder
4553     ///   String decoder (Url, Xml etc.)
4554     /// @param own_decoder
4555     ///   Decoder ownership flag
4556     /// @param encoder
4557     ///   String encoder (Url, Xml etc.)
4558     /// @param own_encoder
4559     ///   Encoder ownership flag
CStringPairs(const CTempString arg_sep,const CTempString val_sep,IStringDecoder * decoder=NULL,EOwnership own_decoder=eTakeOwnership,IStringEncoder * encoder=NULL,EOwnership own_encoder=eTakeOwnership)4560     CStringPairs(const CTempString arg_sep,
4561                  const CTempString val_sep,
4562                  IStringDecoder*   decoder = NULL,
4563                  EOwnership        own_decoder = eTakeOwnership,
4564                  IStringEncoder*   encoder = NULL,
4565                  EOwnership        own_encoder = eTakeOwnership)
4566         : m_ArgSep(arg_sep),
4567           m_ValSep(val_sep),
4568           m_Decoder(decoder, own_decoder),
4569           m_Encoder(encoder, own_encoder)
4570     {
4571     }
4572 
4573     /// Create parser with the selected URL-encoding/decoding options
4574     /// and default separators.
4575     ///
4576     /// @param decode_flag
4577     ///   URL-decoding flag
4578     /// @param encode_flag
4579     ///   URL-encoding flag
CStringPairs(NStr::EUrlDecode decode_flag,NStr::EUrlEncode encode_flag)4580     CStringPairs(NStr::EUrlDecode decode_flag,
4581                  NStr::EUrlEncode encode_flag)
4582         : m_ArgSep("&"),
4583           m_ValSep("="),
4584           m_Decoder(new CStringDecoder_Url(decode_flag), eTakeOwnership),
4585           m_Encoder(new CStringEncoder_Url(encode_flag), eTakeOwnership)
4586     {
4587     }
4588 
~CStringPairs(void)4589     virtual ~CStringPairs(void) {}
4590 
4591     /// Set string decoder.
4592     ///
4593     /// @param decoder
4594     ///   String decoder (Url, Xml etc.)
4595     /// @param own
4596     ///   Decoder ownership flag
SetDecoder(IStringDecoder * decoder,EOwnership own=eTakeOwnership)4597     void SetDecoder(IStringDecoder* decoder, EOwnership own = eTakeOwnership)
4598         { m_Decoder.reset(decoder, own); }
4599     /// Get decoder or NULL. Does not affect decoder ownership.
GetDecoder(void)4600     IStringDecoder* GetDecoder(void) { return m_Decoder.get(); }
4601 
4602     /// Set string encoder.
4603     ///
4604     /// @param encoder
4605     ///   String encoder (Url, Xml etc.)
4606     /// @param own
4607     ///   Encoder ownership flag
SetEncoder(IStringEncoder * encoder,EOwnership own=eTakeOwnership)4608     void SetEncoder(IStringEncoder* encoder, EOwnership own = eTakeOwnership)
4609         { m_Encoder.reset(encoder, own); }
4610     /// Get encoder or NULL. Does not affect encoder ownership.
GetEncoder(void)4611     IStringEncoder* GetEncoder(void) { return m_Encoder.get(); }
4612 
4613     /// Parse the string.
4614     ///
4615     /// @param str
4616     ///   String to parse. The parser assumes the string is formatted like
4617     ///   "name1<valsep>value1<argsep>name2<valsep>value2...". Each name and
4618     ///   value is passed to the decoder (if not NULL) before storing the pair.
4619     /// @param merge_argsep
4620     ///   Flag for merging separators between pairs. By default the separators
4621     ///   are merged to prevent pairs where both name and value are empty.
Parse(const CTempString str,NStr::EMergeDelims merge_argsep=NStr::eMergeDelims)4622     void Parse(const CTempString str,
4623                NStr::EMergeDelims merge_argsep = NStr::eMergeDelims)
4624     {
4625         Parse(m_Data, str, m_ArgSep, m_ValSep,
4626               m_Decoder.get(), eNoOwnership, merge_argsep);
4627     }
4628 
4629     /// Parse the string using the provided decoder, put data into the
4630     /// container.
4631     ///
4632     /// @param pairs
4633     ///   Container to be filled with the parsed name/value pairs
4634     /// @param str
4635     ///   String to parse. The parser assumes the string is formatted like
4636     ///   "name1<valsep>value1<argsep>name2<valsep>value2...". Each name and
4637     ///   value is passed to the decoder (if not NULL) before storing the pair.
4638     /// @param decoder
4639     ///   String decoder (Url, Xml etc.)
4640     /// @param own
4641     ///   Flag indicating if the decoder must be deleted by the function.
4642     /// @param merge_argsep
4643     ///   Flag for merging separators between pairs. By default the separators
4644     ///   are merged to prevent pairs where both name and value are empty.
Parse(TStrPairs & pairs,const CTempString str,const CTempString arg_sep,const CTempString val_sep,IStringDecoder * decoder=NULL,EOwnership own=eTakeOwnership,NStr::EMergeDelims merge_argsep=NStr::eMergeDelims)4645     static void Parse(TStrPairs&         pairs,
4646                       const CTempString  str,
4647                       const CTempString  arg_sep,
4648                       const CTempString  val_sep,
4649                       IStringDecoder*    decoder = NULL,
4650                       EOwnership         own = eTakeOwnership,
4651                       NStr::EMergeDelims merge_argsep = NStr::eMergeDelims)
4652     {
4653         AutoPtr<IStringDecoder> decoder_guard(decoder, own);
4654         list<string> lst;
4655         NStr::Split(str, arg_sep, lst, (NStr::TSplitFlags)merge_argsep);
4656         pairs.clear();
4657         ITERATE(list<string>, it, lst) {
4658             string name, val;
4659             NStr::SplitInTwo(*it, val_sep, name, val);
4660             if ( decoder ) {
4661                 try {
4662                     name = decoder->Decode(name, IStringDecoder::eName);
4663                     val = decoder->Decode(val, IStringDecoder::eValue);
4664                 }
4665                 catch (const CStringException&) {
4666                     // Discard all data
4667                     pairs.clear();
4668                     throw;
4669                 }
4670             }
4671             pairs.insert(pairs.end(), TStrPair(name, val));
4672         }
4673     }
4674 
4675     /// Merge name-value pairs into a single string using the currently set
4676     /// separators and the provided encoder if any.
Merge(void) const4677     string Merge(void) const
4678     {
4679         return Merge(m_Data, m_ArgSep, m_ValSep,
4680                      m_Encoder.get(), eNoOwnership);
4681     }
4682 
4683     /// Merge name-value pairs from the provided container, separators
4684     /// and encoder. Delete the encoder if the ownership flag allows.
4685     ///
4686     /// @param pairs
4687     ///   Container with the name/value pairs to be merged.
4688     /// @param arg_sep
4689     ///   Separator to be inserted between pairs.
4690     /// @param val_sep
4691     ///   Separator to be inserted between name and value.
4692     /// @param encoder
4693     ///   String encoder (Url, Xml etc.)
4694     /// @param own
4695     ///   Flag indicating if the encoder must be deleted by the function.
Merge(const TStrPairs & pairs,const string & arg_sep,const string & val_sep,IStringEncoder * encoder=NULL,EOwnership own=eTakeOwnership)4696     static string Merge(const TStrPairs& pairs,
4697                         const string&    arg_sep,
4698                         const string&    val_sep,
4699                         IStringEncoder*  encoder = NULL,
4700                         EOwnership       own = eTakeOwnership)
4701     {
4702         AutoPtr<IStringEncoder> encoder_guard(encoder, own);
4703         string ret;
4704         ITERATE(typename TStrPairs, it, pairs) {
4705             if ( !ret.empty() ) {
4706                 ret += arg_sep;
4707             }
4708             if ( encoder ) {
4709                 ret += encoder->Encode(it->first, IStringEncoder::eName) +
4710                     val_sep +
4711                     encoder->Encode(it->second, IStringEncoder::eValue);
4712             }
4713             else {
4714                 ret += it->first + val_sep + it->second;
4715             }
4716         }
4717         return ret;
4718     }
4719 
4720     /// Read data
GetPairs(void) const4721     const TStrPairs& GetPairs(void) const { return m_Data; }
4722     /// Get non-const data
GetPairs(void)4723     TStrPairs& GetPairs(void) { return m_Data; }
4724 
4725 private:
4726     string                  m_ArgSep;   // Separator between name+value pairs ("&")
4727     string                  m_ValSep;   // Separator between name and value ("=")
4728     AutoPtr<IStringDecoder> m_Decoder;  // String decoder (Url, Xml etc.)
4729     AutoPtr<IStringEncoder> m_Encoder;  // String encoder (Url, Xml etc.)
4730     TStrPairs               m_Data;     // Parsed data
4731 };
4732 
4733 
4734 typedef vector<pair<string, string> > TStringPairsVector;
4735 typedef CStringPairs<TStringPairsVector> CStringPairsParser;
4736 
4737 
4738 /////////////////////////////////////////////////////////////////////////////
4739 ///
4740 /// CEncodedString --
4741 ///
4742 /// Class to detect if a string needs to be URL-encoded and hold both
4743 /// encoded and original versions.
4744 ///
4745 
4746 class NCBI_XNCBI_EXPORT CEncodedString
4747 {
4748 public:
CEncodedString(void)4749     CEncodedString(void) {}
4750     CEncodedString(const CTempString s,
4751                    NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars);
4752 
4753     /// Set new original string
4754     void SetString(const CTempString s,
4755                    NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars);
4756 
4757     /// Check if the original string was encoded.
IsEncoded(void) const4758     bool IsEncoded(void) const { return m_Encoded.get() != 0; }
4759     /// Get the original unencoded string
GetOriginalString(void) const4760     const string& GetOriginalString(void) const { return m_Original; }
4761     /// Get encoded string
GetEncodedString(void) const4762     const string& GetEncodedString(void) const
4763         { return IsEncoded() ? *m_Encoded : m_Original; }
4764 
4765     /// Check if the string is empty
IsEmpty(void) const4766     bool IsEmpty(void) const { return m_Original.empty(); }
4767 
4768 private:
4769     string           m_Original;
4770     unique_ptr<string> m_Encoded;
4771 };
4772 
4773 
4774 /////////////////////////////////////////////////////////////////////////////
4775 //  Predicates
4776 //
4777 
4778 
4779 /////////////////////////////////////////////////////////////////////////////
4780 ///
4781 /// Define Case-sensitive string comparison methods.
4782 ///
4783 /// Used as arguments to template functions for specifying the type of
4784 /// comparison.
4785 
4786 template <typename T>
4787 struct PCase_Generic
4788 {
4789     /// Return difference between "s1" and "s2".
4790     int Compare(const T& s1, const T& s2) const;
4791 
4792     /// Return TRUE if s1 < s2.
4793     bool Less(const T& s1, const T& s2) const;
4794 
4795     /// Return TRUE if s1 == s2.
4796     bool Equals(const T& s1, const T& s2) const;
4797 
4798     /// Return TRUE if s1 < s2.
4799     bool operator()(const T& s1, const T& s2) const;
4800 };
4801 
4802 typedef PCase_Generic<string>       PCase;
4803 typedef PCase_Generic<const char *> PCase_CStr;
4804 
4805 
4806 
4807 /////////////////////////////////////////////////////////////////////////////
4808 ///
4809 /// Define Case-insensitive string comparison methods.
4810 ///
4811 /// Used as arguments to template functions for specifying the type of
4812 /// comparison.
4813 ///
4814 /// @sa PNocase_Conditional_Generic
4815 
4816 template <typename T>
4817 struct PNocase_Generic
4818 {
4819     /// Return difference between "s1" and "s2".
4820     int Compare(const T& s1, const T& s2) const;
4821 
4822     /// Return TRUE if s1 < s2.
4823     bool Less(const T& s1, const T& s2) const;
4824 
4825     /// Return TRUE if s1 == s2.
4826     bool Equals(const T& s1, const T& s2) const;
4827 
4828     /// Return TRUE if s1 < s2 ignoring case.
4829     bool operator()(const T& s1, const T& s2) const;
4830 };
4831 
4832 typedef PNocase_Generic<string>       PNocase;
4833 typedef PNocase_Generic<const char *> PNocase_CStr;
4834 
4835 
4836 /////////////////////////////////////////////////////////////////////////////
4837 ///
4838 /// Define Case-insensitive string comparison methods.
4839 /// Case sensitivity can be turned on and off at runtime.
4840 ///
4841 /// Used as arguments to template functions for specifying the type of
4842 /// comparison.
4843 ///
4844 /// @sa PNocase_Generic
4845 
4846 template <typename T>
4847 class PNocase_Conditional_Generic
4848 {
4849 public:
4850     /// Construction
4851     PNocase_Conditional_Generic(NStr::ECase case_sens = NStr::eCase);
4852 
4853     /// Get comparison type
GetCase() const4854     NStr::ECase GetCase() const { return m_CaseSensitive; }
4855 
4856     /// Set comparison type
SetCase(NStr::ECase case_sens)4857     void SetCase(NStr::ECase case_sens) { m_CaseSensitive = case_sens; }
4858 
4859     /// Return difference between "s1" and "s2".
4860     int Compare(const T& s1, const T& s2) const;
4861 
4862     /// Return TRUE if s1 < s2.
4863     bool Less(const T& s1, const T& s2) const;
4864 
4865     /// Return TRUE if s1 == s2.
4866     bool Equals(const T& s1, const T& s2) const;
4867 
4868     /// Return TRUE if s1 < s2 ignoring case.
4869     bool operator()(const T& s1, const T& s2) const;
4870 private:
4871     NStr::ECase m_CaseSensitive; ///< case sensitive when TRUE
4872 };
4873 
4874 typedef PNocase_Conditional_Generic<string>       PNocase_Conditional;
4875 typedef PNocase_Conditional_Generic<const char *> PNocase_Conditional_CStr;
4876 
4877 
4878 /////////////////////////////////////////////////////////////////////////////
4879 ///
4880 /// PQuickStringLess implements an ordering of strings,
4881 /// that is more efficient than usual lexicographical order.
4882 /// It can be used in cases when no specific order is required,
4883 /// e.g. only simple key lookup is needed.
4884 /// Current implementation first compares lengths of strings,
4885 /// and will compare string data only when lengths are the same.
4886 ///
4887 struct PQuickStringLess
4888 {
operator ()PQuickStringLess4889     bool operator()(const CTempString s1, const CTempString s2) const {
4890         size_t len1 = s1.size(), len2 = s2.size();
4891         return len1 < len2 ||
4892             (len1 == len2 && ::memcmp(s1.data(), s2.data(), len1) < 0);
4893     }
4894 };
4895 
4896 
4897 /////////////////////////////////////////////////////////////////////////////
4898 //  Algorithms
4899 //
4900 
4901 
4902 /// Check equivalence of arguments using predicate.
4903 template<class Arg1, class Arg2, class Pred>
4904 inline
AStrEquiv(const Arg1 & x,const Arg2 & y,Pred pr)4905 bool AStrEquiv(const Arg1& x, const Arg2& y, Pred pr)
4906 {
4907     return pr.Equals(x, y);
4908 }
4909 
4910 
4911 /* @} */
4912 
4913 
4914 
4915 /////////////////////////////////////////////////////////////////////////////
4916 //
4917 //  IMPLEMENTATION of INLINE functions
4918 //
4919 /////////////////////////////////////////////////////////////////////////////
4920 
4921 
4922 /////////////////////////////////////////////////////////////////////////////
4923 //  CNcbiEmptyString::
4924 //
4925 #if !defined(NCBI_OS_MSWIN)  &&  \
4926     !(defined(NCBI_OS_LINUX)  &&  \
4927       (defined(NCBI_COMPILER_GCC)  ||  defined(NCBI_COMPILER_ANY_CLANG)))
4928 inline
Get(void)4929 const string& CNcbiEmptyString::Get(void)
4930 {
4931     const string* str = m_Str;
4932     return str ? *str: FirstGet();
4933 }
4934 
4935 #  ifdef HAVE_WSTRING
4936 inline
Get(void)4937 const wstring& CNcbiEmptyWString::Get(void)
4938 {
4939     const wstring* str = m_Str;
4940     return str ? *str: FirstGet();
4941 }
4942 #  endif
4943 #endif
4944 
4945 
4946 
4947 /////////////////////////////////////////////////////////////////////////////
4948 //  NStr::
4949 //
4950 
4951 inline
IntToString(int value,TNumToStringFlags flags,int base)4952 string NStr::IntToString(int value,
4953                          TNumToStringFlags flags, int base)
4954 {
4955     string ret;
4956     IntToString(ret, value, flags, base);
4957     return ret;
4958 }
4959 
4960 inline
IntToString(unsigned int value,TNumToStringFlags flags,int base)4961 string NStr::IntToString(unsigned int value,
4962                          TNumToStringFlags flags, int base)
4963 {
4964     string ret;
4965     IntToString(ret, (int)value, flags, base);
4966     return ret;
4967 }
4968 
4969 inline
IntToString(string & out_str,unsigned int value,TNumToStringFlags flags,int base)4970 void NStr::IntToString(string& out_str, unsigned int value,
4971                        TNumToStringFlags flags, int base)
4972 {
4973     IntToString(out_str, (int)value, flags, base);
4974 }
4975 
4976 inline
UIntToString(unsigned int value,TNumToStringFlags flags,int base)4977 string NStr::UIntToString(unsigned int value,
4978                           TNumToStringFlags flags, int base)
4979 {
4980     string ret;
4981     ULongToString(ret, value, flags, base);
4982     return ret;
4983 }
4984 
4985 inline
UIntToString(int value,TNumToStringFlags flags,int base)4986 string NStr::UIntToString(int value,
4987                           TNumToStringFlags flags, int base)
4988 {
4989     string ret;
4990     UIntToString(ret, (unsigned int)value, flags, base);
4991     return ret;
4992 }
4993 
4994 inline
UIntToString(string & out_str,unsigned int value,TNumToStringFlags flags,int base)4995 void NStr::UIntToString(string& out_str, unsigned int value,
4996                         TNumToStringFlags flags, int base)
4997 {
4998     ULongToString(out_str, value, flags, base);
4999 }
5000 
5001 inline
UIntToString(string & out_str,int value,TNumToStringFlags flags,int base)5002 void NStr::UIntToString(string& out_str, int value,
5003                         TNumToStringFlags flags, int base)
5004 {
5005     UIntToString(out_str, (unsigned int)value, flags, base);
5006 }
5007 
5008 inline
LongToString(long value,TNumToStringFlags flags,int base)5009 string NStr::LongToString(long value,
5010                           TNumToStringFlags flags, int base)
5011 {
5012     string ret;
5013     LongToString(ret, value, flags, base);
5014     return ret;
5015 }
5016 
5017 inline
ULongToString(unsigned long value,TNumToStringFlags flags,int base)5018 string NStr::ULongToString(unsigned long value,
5019                            TNumToStringFlags flags, int base)
5020 {
5021     string ret;
5022     ULongToString(ret, value, flags, base);
5023     return ret;
5024 }
5025 
5026 inline
Int8ToString(Int8 value,TNumToStringFlags flags,int base)5027 string NStr::Int8ToString(Int8 value,
5028                           TNumToStringFlags flags, int base)
5029 {
5030     string ret;
5031     NStr::Int8ToString(ret, value, flags, base);
5032     return ret;
5033 }
5034 
5035 inline
UInt8ToString(Uint8 value,TNumToStringFlags flags,int base)5036 string NStr::UInt8ToString(Uint8 value,
5037                            TNumToStringFlags flags, int base)
5038 {
5039     string ret;
5040     NStr::UInt8ToString(ret, value, flags, base);
5041     return ret;
5042 }
5043 
5044 inline
UInt8ToString_DataSize(Uint8 value,TNumToStringFlags flags,unsigned int max_digits)5045 string NStr::UInt8ToString_DataSize(Uint8 value,
5046                                     TNumToStringFlags flags /* = 0 */,
5047                                     unsigned int max_digits /* = 3 */)
5048 {
5049     string ret;
5050     NStr::UInt8ToString_DataSize(ret, value, flags, max_digits);
5051     return ret;
5052 }
5053 
5054 inline
DoubleToString(double value,int precision,TNumToStringFlags flags)5055 string NStr::DoubleToString(double value, int precision,
5056                             TNumToStringFlags flags)
5057 {
5058     string str;
5059     DoubleToString(str, value, precision, flags);
5060     return str;
5061 }
5062 
5063 inline
HexChar(char ch)5064 int NStr::HexChar(char ch)
5065 {
5066     unsigned int rc = ch - '0';
5067     if (rc <= 9) {
5068         return rc;
5069     } else {
5070         rc = (ch | ' ') - 'a';
5071         return rc <= 5 ? int(rc + 10) : -1;
5072     }
5073 }
5074 
5075 inline
strcmp(const char * s1,const char * s2)5076 int NStr::strcmp(const char* s1, const char* s2)
5077 {
5078     return ::strcmp(s1, s2);
5079 }
5080 
5081 inline
strncmp(const char * s1,const char * s2,size_t n)5082 int NStr::strncmp(const char* s1, const char* s2, size_t n)
5083 {
5084     return ::strncmp(s1, s2, n);
5085 }
5086 
5087 inline
strcasecmp(const char * s1,const char * s2)5088 int NStr::strcasecmp(const char* s1, const char* s2)
5089 {
5090 #if defined(HAVE_STRICMP)
5091 #if NCBI_COMPILER_MSVC && (_MSC_VER >= 1400)
5092     return ::_stricmp(s1, s2);
5093 #else
5094     return ::stricmp(s1, s2);
5095 #endif
5096 
5097 #elif defined(HAVE_STRCASECMP_LC)
5098     return ::strcasecmp(s1, s2);
5099 
5100 #else
5101     int diff = 0;
5102     for ( ;; ++s1, ++s2) {
5103         char c1 = *s1;
5104         // calculate difference
5105         diff = tolower((unsigned char) c1) - tolower((unsigned char)(*s2));
5106         // if end of string or different
5107         if (!c1  ||  diff)
5108             break; // return difference
5109     }
5110     return diff;
5111 #endif
5112 }
5113 
5114 inline
strncasecmp(const char * s1,const char * s2,size_t n)5115 int NStr::strncasecmp(const char* s1, const char* s2, size_t n)
5116 {
5117 #if defined(HAVE_STRICMP)
5118 #if NCBI_COMPILER_MSVC && (_MSC_VER >= 1400)
5119     return ::_strnicmp(s1, s2, n);
5120 #else
5121     return ::strnicmp(s1, s2, n);
5122 #endif
5123 
5124 #elif defined(HAVE_STRCASECMP_LC)
5125     return ::strncasecmp(s1, s2, n);
5126 
5127 #else
5128     int diff = 0;
5129     for ( ; ; ++s1, ++s2, --n) {
5130         if (n == 0)
5131             return 0;
5132         char c1 = *s1;
5133         // calculate difference
5134         diff = tolower((unsigned char) c1) - tolower((unsigned char)(*s2));
5135         // if end of string or different
5136         if (!c1  ||  diff)
5137             break; // return difference
5138     }
5139     return diff;
5140 #endif
5141 }
5142 
5143 inline
strftime(char * s,size_t maxsize,const char * format,const struct tm * timeptr)5144 size_t NStr::strftime(char* s, size_t maxsize, const char* format,
5145                       const struct tm* timeptr)
5146 {
5147     string x_format = Replace(format, "%T", "%H:%M:%S");
5148     ReplaceInPlace(x_format,          "%D", "%m/%d/%y");
5149     return ::strftime(s, maxsize, x_format.c_str(), timeptr);
5150 }
5151 
5152 inline
CompareCase(const char * s1,const char * s2)5153 int NStr::CompareCase(const char* s1, const char* s2)
5154 {
5155     return NStr::strcmp(s1, s2);
5156 }
5157 
5158 inline
CompareNocase(const char * s1,const char * s2)5159 int NStr::CompareNocase(const char* s1, const char* s2)
5160 {
5161     return NStr::strcasecmp(s1, s2);
5162 }
5163 
5164 inline
Compare(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const char * s2,ECase use_case)5165 int NStr::Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
5166                   const char* s2, ECase use_case)
5167 {
5168     return use_case == eCase ? CompareCase(s1.substr(pos, n), s2)
5169                              : CompareNocase(s1.substr(pos, n), s2);
5170 }
5171 
5172 inline
Compare(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const CTempString s2,ECase use_case)5173 int NStr::Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
5174                   const CTempString s2, ECase use_case)
5175 {
5176     return use_case == eCase ? CompareCase(s1.substr(pos, n), s2)
5177                              : CompareNocase(s1.substr(pos, n), s2);
5178 }
5179 
5180 inline
Compare(const char * s1,const char * s2,ECase use_case)5181 int NStr::Compare(const char* s1, const char* s2, ECase use_case)
5182 {
5183     return use_case == eCase ? CompareCase(s1, s2) : CompareNocase(s1, s2);
5184 }
5185 
5186 inline
Compare(const CTempStringEx s1,const CTempStringEx s2,ECase use_case)5187 int NStr::Compare(const CTempStringEx s1, const CTempStringEx s2, ECase use_case)
5188 {
5189     return use_case == eCase ? CompareCase(s1, s2) : CompareNocase(s1, s2);
5190 }
5191 
5192 inline
EqualCase(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const char * s2)5193 bool NStr::EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char* s2)
5194 {
5195     return s1.substr(pos, n) == s2;
5196 }
5197 
5198 inline
EqualCase(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const CTempString s2)5199 bool NStr::EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const CTempString s2)
5200 {
5201     return s1.substr(pos, n) == s2;
5202 }
5203 
5204 inline
EqualCase(const char * s1,const char * s2)5205 bool NStr::EqualCase(const char* s1, const char* s2)
5206 {
5207     size_t n = strlen(s1);
5208     if (n != strlen(s2)) {
5209         return false;
5210     }
5211     return NStr::strncmp(s1, s2, n) == 0;
5212 }
5213 
5214 inline
EqualCase(const CTempStringEx s1,const CTempStringEx s2)5215 bool NStr::EqualCase(const CTempStringEx s1, const CTempStringEx s2)
5216 {
5217     return s1 == s2;
5218 }
5219 
5220 inline
EqualNocase(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const char * s2)5221 bool NStr::EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char* s2)
5222 {
5223     return CompareNocase(s1.substr(pos, n), s2) == 0;
5224 }
5225 
5226 inline
EqualNocase(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const CTempString s2)5227 bool NStr::EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const CTempString s2)
5228 {
5229     return CompareNocase(s1.substr(pos, n), s2) == 0;
5230 }
5231 
5232 inline
EqualNocase(const char * s1,const char * s2)5233 bool NStr::EqualNocase(const char* s1, const char* s2)
5234 {
5235     size_t n = strlen(s1);
5236     if (n != strlen(s2)) {
5237         return false;
5238     }
5239     return NStr::strncasecmp(s1, s2, n) == 0;
5240 }
5241 
5242 inline
EqualNocase(const CTempStringEx s1,const CTempStringEx s2)5243 bool NStr::EqualNocase(const CTempStringEx s1, const CTempStringEx s2)
5244 {
5245     if (s1.length() != s2.length()) {
5246         return false;
5247     }
5248     return CompareNocase(s1, s2) == 0;
5249 }
5250 
5251 inline
Equal(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const char * s2,ECase use_case)5252 bool NStr::Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
5253                  const char* s2, ECase use_case)
5254 {
5255     return use_case == eCase ? EqualCase(s1.substr(pos, n), s2)
5256                              : EqualNocase(s1.substr(pos, n), s2);
5257 }
5258 
5259 inline
Equal(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const CTempString s2,ECase use_case)5260 bool NStr::Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
5261                  const CTempString s2, ECase use_case)
5262 {
5263     return use_case == eCase ? EqualCase(s1.substr(pos, n), s2)
5264                              : EqualNocase(s1.substr(pos, n), s2);
5265 }
5266 
5267 inline
Equal(const char * s1,const char * s2,ECase use_case)5268 bool NStr::Equal(const char* s1, const char* s2, ECase use_case)
5269 {
5270     return use_case == eCase ? EqualCase(s1, s2) : EqualNocase(s1, s2);
5271 }
5272 
5273 inline
Equal(const CTempStringEx s1,const CTempStringEx s2,ECase use_case)5274 bool NStr::Equal(const CTempStringEx s1, const CTempStringEx s2, ECase use_case)
5275 {
5276     return use_case == eCase ? EqualCase(s1, s2) : EqualNocase(s1, s2);
5277 }
5278 
5279 inline
StartsWith(const CTempString str,const CTempString start,ECase use_case)5280 bool NStr::StartsWith(const CTempString str, const CTempString start, ECase use_case)
5281 {
5282     return str.size() >= start.size()  &&
5283            Equal(str.substr(0, start.size()), start, use_case);
5284 }
5285 
5286 inline
StartsWith(const CTempString str,char start,ECase use_case)5287 bool NStr::StartsWith(const CTempString str, char start, ECase use_case)
5288 {
5289     return !str.empty()  &&
5290            (use_case == eCase ? (str[0] == start)
5291                               : (str[0] == start  ||
5292                                  toupper((unsigned char) str[0]) == start  ||
5293                                  tolower((unsigned char) str[0]))
5294            );
5295 }
5296 
5297 inline
EndsWith(const CTempString str,const CTempString end,ECase use_case)5298 bool NStr::EndsWith(const CTempString str, const CTempString end, ECase use_case)
5299 {
5300     return str.size() >= end.size()  &&
5301            Equal(str.substr(str.size() - end.size(), end.size()), end, use_case);
5302 }
5303 
5304 inline
EndsWith(const CTempString str,char end,ECase use_case)5305 bool NStr::EndsWith(const CTempString str, char end, ECase use_case)
5306 {
5307     if (!str.empty()) {
5308         char last = str[str.length() - 1];
5309         return use_case == eCase ? (last == end)
5310                                  : (last == end  ||
5311                                     toupper((unsigned char) last) == end  ||
5312                                     tolower((unsigned char) last) == end);
5313     }
5314     return false;
5315 }
5316 
5317 inline
CommonPrefixSize(const CTempString s1,const CTempString s2)5318 SIZE_TYPE NStr::CommonPrefixSize(const CTempString s1, const CTempString s2)
5319 {
5320     const SIZE_TYPE n = min(s1.length(), s2.length());
5321     for (SIZE_TYPE i = 0;  i < n;  i++) {
5322         if (s1[i] != s2[i]) {
5323             return i;
5324         }
5325     }
5326     return n;
5327 }
5328 
5329 inline
CommonSuffixSize(const CTempString s1,const CTempString s2)5330 SIZE_TYPE NStr::CommonSuffixSize(const CTempString s1, const CTempString s2)
5331 {
5332     const SIZE_TYPE len1 = s1.length();
5333     const SIZE_TYPE len2 = s2.length();
5334     const SIZE_TYPE n = min(len1, len2);
5335     for (SIZE_TYPE i = 1; i <= n; i++) {
5336         if (s1[len1 - i] != s2[len2 - i]) {
5337             return i - 1;
5338         }
5339     }
5340     return n;
5341 }
5342 
5343 inline
Find(const CTempString str,const CTempString pattern,SIZE_TYPE start,SIZE_TYPE end,EOccurrence where,ECase use_case)5344 SIZE_TYPE NStr::Find(const CTempString str, const CTempString pattern,
5345                      SIZE_TYPE start, SIZE_TYPE end, EOccurrence where,
5346                      ECase use_case)
5347 {
5348     SIZE_TYPE pos = Find(CTempString(str, start, end - start), pattern, use_case,
5349                          where == eFirst ? eForwardSearch : eReverseSearch, 0);
5350     if (pos == NPOS) {
5351         return NPOS;
5352     }
5353     return pos + start;
5354 }
5355 
5356 // @deprecated
5357 inline
FindCase(const CTempString str,const CTempString pattern,SIZE_TYPE start,SIZE_TYPE end,EOccurrence where)5358 SIZE_TYPE NStr::FindCase(const CTempString str, const CTempString pattern,
5359                          SIZE_TYPE start, SIZE_TYPE end, EOccurrence where)
5360 {
5361     if (where == eFirst) {
5362         SIZE_TYPE pos = str.find(pattern, start);
5363         return (pos == NPOS  ||  (pos + pattern.length()) > end) ? NPOS : pos;
5364     } else {
5365         SIZE_TYPE pos = str.rfind(pattern, end);
5366         return (pos == NPOS  ||  pos < start) ? NPOS : pos;
5367     }
5368 }
5369 
5370 inline
FindCase(const CTempString str,const CTempString pattern)5371 SIZE_TYPE NStr::FindCase(const CTempString str, const CTempString pattern)
5372 {
5373     return Find(str, pattern, eCase);
5374 }
5375 
5376 inline
FindCase(const CTempString str,const CTempString pattern,SIZE_TYPE start)5377 SIZE_TYPE NStr::FindCase(const CTempString str, const CTempString pattern, SIZE_TYPE start)
5378 {
5379     SIZE_TYPE pos = Find(CTempString(str, start), pattern, eCase);
5380     if (pos == NPOS) {
5381         return NPOS;
5382     }
5383     return pos + start;
5384 }
5385 
5386 inline
FindNoCase(const CTempString str,const CTempString pattern)5387 SIZE_TYPE NStr::FindNoCase(const CTempString str, const CTempString pattern)
5388 {
5389     return Find(str, pattern, eNocase);
5390 }
5391 
5392 inline
FindNoCase(const CTempString str,const CTempString pattern,SIZE_TYPE start)5393 SIZE_TYPE NStr::FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start)
5394 {
5395     SIZE_TYPE pos = Find(CTempString(str, start), pattern, eNocase);
5396     if (pos == NPOS) {
5397         return NPOS;
5398     }
5399     return pos + start;
5400 }
5401 
5402 inline
FindCase(const list<string> & lst,const CTempString val)5403 const string* NStr::FindCase(const list<string>& lst, const CTempString val)
5404 {
5405     return Find(lst, val, eCase);
5406 }
5407 
5408 inline
FindNoCase(const list<string> & lst,const CTempString val)5409 const string* NStr::FindNoCase(const list <string>& lst, const CTempString val)
5410 {
5411     return Find(lst, val, eNocase);
5412 }
5413 
5414 inline
FindCase(const vector<string> & vec,const CTempString val)5415 const string* NStr::FindCase(const vector <string>& vec, const CTempString val)
5416 {
5417     return Find(vec, val, eCase);
5418 }
5419 
5420 inline
FindNoCase(const vector<string> & vec,const CTempString val)5421 const string* NStr::FindNoCase(const vector <string>& vec, const CTempString val)
5422 {
5423     return Find(vec, val, eNocase);
5424 }
5425 
5426 template<typename TIterator, typename FTransform>
5427 string
TransformJoin(TIterator from,TIterator to,const CTempString & delim,FTransform fnTransform)5428 NStr::TransformJoin( TIterator from, TIterator to, const CTempString& delim, FTransform fnTransform)
5429 {
5430     if (from == to) {
5431         return kEmptyStr;
5432     }
5433     string result(fnTransform(*from++));
5434     for ( ; from != to; ++from) {
5435         result.append(delim).append(fnTransform(*from));
5436     }
5437     return result;
5438 }
5439 
5440 template<typename TIterator>
5441 string
xx_Join(TIterator from,TIterator to,const CTempString & delim)5442 NStr::xx_Join( TIterator from, TIterator to, const CTempString& delim)
5443 {
5444     if (from == to) {
5445         return kEmptyStr;
5446     }
5447     string result(*from++);
5448     size_t sz_all = 0, sz_delim = delim.size();
5449     for ( TIterator f = from; f != to; ++f) {
5450         sz_all += string(*f).size() + sz_delim;
5451     }
5452     result.reserve(result.size() + sz_all);
5453     for ( ; from != to; ++from) {
5454         result.append(delim).append(string(*from));
5455     }
5456     return result;
5457 }
5458 
5459 inline
Wrap(const string & str,SIZE_TYPE width,list<string> & arr,NStr::TWrapFlags flags,const string & prefix,const string * prefix1)5460 list<string>& NStr::Wrap(const string& str, SIZE_TYPE width, list<string>& arr,
5461                          NStr::TWrapFlags flags, const string& prefix,
5462                          const string* prefix1)
5463 {
5464     return Wrap(str, width, arr, flags, &prefix, prefix1);
5465 }
5466 
5467 inline
Wrap(const string & str,SIZE_TYPE width,list<string> & arr,NStr::TWrapFlags flags,const string & prefix,const string & prefix1)5468 list<string>& NStr::Wrap(const string& str, SIZE_TYPE width, list<string>& arr,
5469                          NStr::TWrapFlags flags, const string& prefix,
5470                          const string& prefix1)
5471 {
5472     return Wrap(str, width, arr, flags, &prefix, &prefix1);
5473 }
5474 
5475 inline
WrapList(const list<string> & l,SIZE_TYPE width,const string & delim,list<string> & arr,NStr::TWrapFlags flags,const string & prefix,const string * prefix1)5476 list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width,
5477                              const string& delim, list<string>& arr,
5478                              NStr::TWrapFlags flags, const string& prefix,
5479                              const string* prefix1)
5480 {
5481     return WrapList(l, width, delim, arr, flags, &prefix, prefix1);
5482 }
5483 
5484 inline
WrapList(const list<string> & l,SIZE_TYPE width,const string & delim,list<string> & arr,NStr::TWrapFlags flags,const string & prefix,const string & prefix1)5485 list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width,
5486                              const string& delim, list<string>& arr,
5487                              NStr::TWrapFlags flags, const string& prefix,
5488                              const string& prefix1)
5489 {
5490     return WrapList(l, width, delim, arr, flags, &prefix, &prefix1);
5491 }
5492 
5493 inline
Justify(const CTempString str,SIZE_TYPE width,list<string> & par,const CTempString pfx,const CTempString * pfx1)5494 list<string>& NStr::Justify(const CTempString str, SIZE_TYPE width,
5495                             list<string>& par, const CTempString pfx,
5496                             const CTempString* pfx1)
5497 {
5498     return Justify(str, width, par, &pfx, pfx1);
5499 }
5500 
5501 inline
Justify(const CTempString str,SIZE_TYPE width,list<string> & par,const CTempString pfx,const CTempString pfx1)5502 list<string>& NStr::Justify(const CTempString str, SIZE_TYPE width,
5503                             list<string>& par, const CTempString pfx,
5504                             const CTempString pfx1)
5505 {
5506     return Justify(str, width, par, &pfx, &pfx1);
5507 }
5508 
5509 
5510 
5511 /////////////////////////////////////////////////////////////////////////////
5512 //  CUtf8::
5513 //
5514 
GetValidSymbolCount(const CTempString & src)5515 inline SIZE_TYPE CUtf8::GetValidSymbolCount(const CTempString& src) {
5516     CTempString::const_iterator err;
5517     return x_GetValidSymbolCount(src, err);
5518 }
5519 
GetValidBytesCount(const CTempString & src)5520 inline SIZE_TYPE CUtf8::GetValidBytesCount(const CTempString& src) {
5521     CTempString::const_iterator err;
5522     x_GetValidSymbolCount(src,err);
5523     return (err-src.begin());
5524 }
Decode(const char * & src)5525 inline TUnicodeSymbol CUtf8::Decode(const char*& src) {
5526     return x_Decode(src);
5527 }
5528 #ifndef NCBI_COMPILER_WORKSHOP
Decode(string::const_iterator & src)5529 inline TUnicodeSymbol CUtf8::Decode(string::const_iterator& src) {
5530     return x_Decode(src);
5531 }
5532 #endif
5533 
5534 template <typename TIterator> inline TUnicodeSymbol
x_Decode(TIterator & src)5535 CUtf8::x_Decode(TIterator& src)
5536 {
5537     SIZE_TYPE more=0;
5538     TUnicodeSymbol sym = DecodeFirst(*src,more);
5539     while (more--) {
5540         sym = DecodeNext(sym, *(++src));
5541     }
5542     return sym;
5543 }
5544 
5545 template <typename TChar> basic_string<TChar>
x_AsBasicString(const CTempString & str,const TChar * substitute_on_error,EValidate validate)5546 CUtf8::x_AsBasicString(const CTempString& str,
5547     const TChar* substitute_on_error, EValidate validate)
5548 {
5549     if (validate == eValidate) {
5550         x_Validate(str);
5551     }
5552     TUnicodeSymbol max_char = (TUnicodeSymbol)numeric_limits<TChar>::max();
5553     basic_string<TChar> result;
5554     result.reserve(CUtf8::GetSymbolCount(str) + 1);
5555     CTempString::const_iterator src = str.begin();
5556     CTempString::const_iterator to  = str.end();
5557     for (; src != to; ++src) {
5558         TUnicodeSymbol ch = Decode(src);
5559         if (ch > max_char) {
5560             if (substitute_on_error) {
5561                 result.append(substitute_on_error);
5562                 continue;
5563             } else {
5564                 NCBI_THROW2(CStringException, eConvert,
5565                     "Failed to convert symbol to wide character",
5566                     (src - str.begin()));
5567             }
5568         }
5569         result.append(1, (TChar)ch);
5570     }
5571     return result;
5572 }
5573 
5574 template <typename TChar> CStringUTF8&
x_Append(CStringUTF8 & u8str,const TChar * src,SIZE_TYPE to)5575 CUtf8::x_Append(CStringUTF8& u8str, const TChar* src, SIZE_TYPE to)
5576 {
5577     const TChar* srcBuf;
5578     SIZE_TYPE needed = 0;
5579     SIZE_TYPE pos=0;
5580 
5581     for (pos=0, srcBuf=src;
5582             (to == NPOS) ? (*srcBuf != 0) : (pos<to); ++pos, ++srcBuf) {
5583         needed += x_BytesNeeded( *srcBuf );
5584     }
5585     if ( !needed ) {
5586         return u8str;
5587     }
5588     u8str.reserve(max(u8str.capacity(),u8str.length()+needed+1));
5589     for (pos=0, srcBuf=src;
5590             (to == NPOS) ? (*srcBuf != 0) : (pos<to); ++pos, ++srcBuf) {
5591         x_AppendChar( u8str, *srcBuf );
5592     }
5593     return u8str;
5594 }
5595 
5596 inline  CStringUTF8
TruncateSpaces(const CTempString & str,NStr::ETrunc side)5597 CUtf8::TruncateSpaces(const CTempString& str, NStr::ETrunc side) {
5598     CStringUTF8 u8;
5599     return x_Append(u8, TruncateSpaces_Unsafe(str,side), eEncoding_UTF8, eNoValidate);
5600 }
5601 
5602 // deprecated CStringUTF8 is there
5603 #include <corelib/impl/stringutf8_deprecated.inl>
5604 
5605 
5606 
5607 /////////////////////////////////////////////////////////////////////////////
5608 //  PCase_Generic::
5609 //
5610 
5611 template <typename T>
5612 inline
Compare(const T & s1,const T & s2) const5613 int PCase_Generic<T>::Compare(const T& s1, const T& s2) const
5614 {
5615     return NStr::Compare(s1, s2, NStr::eCase);
5616 }
5617 
5618 template <typename T>
5619 inline
Less(const T & s1,const T & s2) const5620 bool PCase_Generic<T>::Less(const T& s1, const T& s2) const
5621 {
5622     return Compare(s1, s2) < 0;
5623 }
5624 
5625 template <typename T>
5626 inline
Equals(const T & s1,const T & s2) const5627 bool PCase_Generic<T>::Equals(const T& s1, const T& s2) const
5628 {
5629     return Compare(s1, s2) == 0;
5630 }
5631 
5632 template <typename T>
5633 inline
operator ()(const T & s1,const T & s2) const5634 bool PCase_Generic<T>::operator()(const T& s1, const T& s2) const
5635 {
5636     return Less(s1, s2);
5637 }
5638 
5639 
5640 
5641 ////////////////////////////////////////////////////////////////////////////
5642 //  PNocase_Generic<T>::
5643 //
5644 
5645 
5646 template <typename T>
5647 inline
Compare(const T & s1,const T & s2) const5648 int PNocase_Generic<T>::Compare(const T& s1, const T& s2) const
5649 {
5650     return NStr::Compare(s1, s2, NStr::eNocase);
5651 }
5652 
5653 template <typename T>
5654 inline
Less(const T & s1,const T & s2) const5655 bool PNocase_Generic<T>::Less(const T& s1, const T& s2) const
5656 {
5657     return Compare(s1, s2) < 0;
5658 }
5659 
5660 template <typename T>
5661 inline
Equals(const T & s1,const T & s2) const5662 bool PNocase_Generic<T>::Equals(const T& s1, const T& s2) const
5663 {
5664     return Compare(s1, s2) == 0;
5665 }
5666 
5667 template <typename T>
5668 inline
operator ()(const T & s1,const T & s2) const5669 bool PNocase_Generic<T>::operator()(const T& s1, const T& s2) const
5670 {
5671     return Less(s1, s2);
5672 }
5673 
5674 ////////////////////////////////////////////////////////////////////////////
5675 //  PNocase_Conditional_Generic<T>::
5676 //
5677 
5678 template <typename T>
5679 inline
PNocase_Conditional_Generic(NStr::ECase cs)5680 PNocase_Conditional_Generic<T>::PNocase_Conditional_Generic(NStr::ECase cs)
5681     : m_CaseSensitive(cs)
5682 {}
5683 
5684 template <typename T>
5685 inline
Compare(const T & s1,const T & s2) const5686 int PNocase_Conditional_Generic<T>::Compare(const T& s1, const T& s2) const
5687 {
5688     return NStr::Compare(s1, s2, m_CaseSensitive);
5689 }
5690 
5691 template <typename T>
5692 inline
Less(const T & s1,const T & s2) const5693 bool PNocase_Conditional_Generic<T>::Less(const T& s1, const T& s2) const
5694 {
5695     return Compare(s1, s2) < 0;
5696 }
5697 
5698 template <typename T>
5699 inline
Equals(const T & s1,const T & s2) const5700 bool PNocase_Conditional_Generic<T>::Equals(const T& s1, const T& s2) const
5701 {
5702     return Compare(s1, s2) == 0;
5703 }
5704 
5705 template <typename T>
5706 inline
operator ()(const T & s1,const T & s2) const5707 bool PNocase_Conditional_Generic<T>::operator()(const T& s1, const T& s2) const
5708 {
5709     return Less(s1, s2);
5710 }
5711 
5712 
5713 END_NCBI_NAMESPACE;
5714 
5715 #endif  /* CORELIB___NCBISTR__HPP */
5716