1 #ifndef CORELIB___NCBISTR__HPP
2 #define CORELIB___NCBISTR__HPP
3
4 /* $Id: ncbistr.hpp 627618 2021-03-16 14:35:43Z ivanov $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Authors: Eugene Vasilchenko, Denis Vakatov
30 *
31 *
32 */
33
34 /// @file ncbistr.hpp
35 /// The NCBI C++ standard methods for dealing with std::string
36
37
38 #include <corelib/ncbi_limits.hpp>
39 #include <corelib/tempstr.hpp>
40 #include <corelib/ncbierror.hpp>
41 #ifdef NCBI_OS_OSF1
42 # include <strings.h>
43 #endif
44 #include <stdarg.h>
45 #include <time.h>
46 #include <set>
47 #include <functional>
48
49
50 BEGIN_NCBI_NAMESPACE;
51
52 /** @addtogroup String
53 *
54 * @{
55 */
56
57 /// Empty "C" string (points to a '\0').
58 NCBI_XNCBI_EXPORT extern const char *const kEmptyCStr;
59 #define NcbiEmptyCStr NCBI_NS_NCBI::kEmptyCStr
60
61 #if defined(HAVE_WSTRING)
62 NCBI_XNCBI_EXPORT extern const wchar_t *const kEmptyWCStr;
63 #define NcbiEmptyWCStr NCBI_NS_NCBI::kEmptyWCStr
64 #endif
65
66 /// Empty "C++" string.
67 #if defined(NCBI_OS_MSWIN) \
68 || (defined(NCBI_OS_LINUX) \
69 && (defined(NCBI_COMPILER_GCC) || defined(NCBI_COMPILER_ANY_CLANG)))
70 class CNcbiEmptyString
71 {
72 public:
73 /// Get string.
74 inline
Get(void)75 static const string& Get(void)
76 {
77 static string empty_str;
78 return empty_str;
79 }
80 };
81 #if defined(HAVE_WSTRING)
82 class CNcbiEmptyWString
83 {
84 public:
85 /// Get string.
Get(void)86 static const wstring& Get(void)
87 {
88 static wstring empty_str;
89 return empty_str;
90 }
91 };
92 #endif
93 #else
94 class NCBI_XNCBI_EXPORT CNcbiEmptyString
95 {
96 public:
97 /// Get string.
98 static const string& Get(void);
99 private:
100 /// Helper method to initialize private data member and return
101 /// null string.
102 static const string& FirstGet(void);
103 static const string* m_Str; ///< Null string pointer.
104 };
105
106 # if defined(HAVE_WSTRING)
107 class NCBI_XNCBI_EXPORT CNcbiEmptyWString
108 {
109 public:
110 /// Get string.
111 static const wstring& Get(void);
112 private:
113 /// Helper method to initialize private data member and return
114 /// null string.
115 static const wstring& FirstGet(void);
116 static const wstring* m_Str; ///< Null string pointer.
117 };
118 # endif
119 #endif // NCBI_OS_MSWIN....
120
121
122 #define NcbiEmptyString NCBI_NS_NCBI::CNcbiEmptyString::Get()
123 #define kEmptyStr NcbiEmptyString
124
125 #if defined(HAVE_WSTRING)
126 # define NcbiEmptyWString NCBI_NS_NCBI::CNcbiEmptyWString::Get()
127 # define kEmptyWStr NcbiEmptyWString
128 #endif
129
130 // SIZE_TYPE and NPOS
131
132 typedef NCBI_NS_STD::string::size_type SIZE_TYPE;
133 #define NPOS NCBI_NS_STD::string::npos
134
135
136
137 /////////////////////////////////////////////////////////////////////////////
138 // Unicode-related definitions and conversions
139
140 /// Unicode character
141 typedef Uint4 TUnicodeSymbol;
142 /// Unicode string
143 typedef basic_string<TUnicodeSymbol> TStringUnicode;
144
145 #if defined(NCBI_OS_MSWIN) && defined(_UNICODE)
146
147 typedef wchar_t TXChar;
148 typedef wstring TXString;
149
150 # if !defined(_TX)
151 # define _TX(x) L ## x
152 # endif
153
154 # if defined(_DEBUG)
155 # define _T_XSTRING(x) \
156 ncbi::CUtf8::AsBasicString<ncbi::TXChar>(x, NULL, ncbi::CUtf8::eValidate)
157 # else
158 # define _T_XSTRING(x) \
159 ncbi::CUtf8::AsBasicString<ncbi::TXChar>(x, NULL, ncbi::CUtf8::eNoValidate)
160 # endif
161 # define _T_STDSTRING(x) ncbi::CUtf8::AsUTF8(x)
162 # define _T_XCSTRING(x) _T_XSTRING(x).c_str()
163 # define _T_CSTRING(x) _T_STDSTRING(x).c_str()
164
165 # define NcbiEmptyXCStr NcbiEmptyWCStr
166 # define NcbiEmptyXString NcbiEmptyWString
167 # define kEmptyXStr kEmptyWStr
168 # define kEmptyXCStr kEmptyWCStr
169
170 #else
171
172 typedef char TXChar;
173 typedef string TXString;
174
175 # if !defined(_TX)
176 # define _TX(x) x
177 # endif
178
179 # define _T_XSTRING(x) (x)
180 # define _T_STDSTRING(x) (x)
181 # define _T_XCSTRING(x) ncbi::impl_ToCString(x)
182 # define _T_CSTRING(x) (x)
183
184 # define NcbiEmptyXCStr NcbiEmptyCStr
185 # define NcbiEmptyXString NcbiEmptyString
186 # define kEmptyXStr kEmptyStr
187 # define kEmptyXCStr kEmptyCStr
188
impl_ToCString(const char * s)189 inline const char* impl_ToCString(const char* s) { return s; }
impl_ToCString(const string & s)190 inline const char* impl_ToCString(const string& s) { return s.c_str(); }
191
192 #endif
193
194
195 /////////////////////////////////////////////////////////////////////////////
196 ///
197
198 #if defined(NCBI_CUTF8_ENCODING_CLASSIC) || !defined(HAVE_ENUM_CLASS)
199 enum EEncoding {
200 eEncoding_Unknown,
201 eEncoding_UTF8,
202 eEncoding_Ascii,
203 eEncoding_ISO8859_1, ///< Note: From the point of view of the C++
204 ///< Toolkit, the ISO 8859-1 character set includes
205 ///< symbols 0x00 through 0xFF except 0x80 through
206 ///< 0x9F.
207 eEncoding_Windows_1252
208 };
209 #else
210 // Temporary safeguard to protect against implicit conversion of EEncoding
211 // to size_t, etc
212 // @attention Do not use "EEncoding::Xxx" values directly, as they will go
213 // away eventually! Use the "eEncoding_Xxx" values instead.
214 enum class EEncoding {
215 Unknown, ///< Do not use this directly! It will go away eventually!
216 UTF8, ///< Do not use this directly! It will go away eventually!
217 Ascii, ///< Do not use this directly! It will go away eventually!
218 ISO8859_1, ///< Do not use this directly! It will go away eventually!
219 Windows_1252 ///< Do not use this directly! It will go away eventually!
220 };
221 #define eEncoding_Unknown EEncoding::Unknown
222 #define eEncoding_UTF8 EEncoding::UTF8
223 #define eEncoding_Ascii EEncoding::Ascii
224 #define eEncoding_ISO8859_1 EEncoding::ISO8859_1
225 #define eEncoding_Windows_1252 EEncoding::Windows_1252
226 #endif
227
228
229 /////////////////////////////////////////////////////////////////////////////
230 ///
231 /// NStr --
232 ///
233 /// Encapsulates class-wide string processing functions.
234
235 class NCBI_XNCBI_EXPORT NStr
236 {
237 public:
238 /// Common conversion flags.
239 enum EConvErrFlags {
240 /// Do not throw an exception on error.
241 /// Could be used with methods throwing an exception by default, ignored otherwise.
242 /// Just return zero and set errno to non-zero instead of throwing an exception.
243 /// We recommend the following technique to check against errors
244 /// with minimum overhead when this flag is used:
245 /// @code
246 /// if (!retval && errno != 0)
247 /// ERROR;
248 /// @endcode
249 /// And for StringToDouble*() variants:
250 /// @code
251 /// if (retval == HUGE_VAL || retval == -HUGE_VAL ||
252 /// !retval && errno != 0)
253 /// ERROR;
254 /// @endcode
255 fConvErr_NoThrow = (1 << 0),
256 /*
257 fConvErr_NoErrno = (1 << 1), ///< Do not set errno at all.
258 ///< If used together with fConvErr_NoThrow flag
259 ///< returns 0 on error (-1 for StringToNonNegativeInt).
260 */
261 fConvErr_NoErrMessage = (1 << 2) ///< Set errno, but do not set CNcbiError message on error
262 };
263 typedef int TConvErrFlags; ///< Bitwise OR of "EConvErrFlags"
264
265 /// Number to string conversion flags.
266 ///
267 /// NOTE:
268 /// If specified base in the *ToString() methods is not default 10,
269 /// that some flags like fWithSign and fWithCommas will be ignored.
270 enum ENumToStringFlags {
271 fUseLowercase = (1 << 4), ///< Use lowercase letters for string representation for bases above 10
272 fWithRadix = (1 << 5), ///< Prefix the output value with radix for "well-known" bases like 8 ("0") and 16 ("0x")
273 fWithSign = (1 << 6), ///< Prefix the output value with a sign ('+'/'-')
274 fWithCommas = (1 << 7), ///< Use commas as thousands separator
275 fDoubleFixed = (1 << 8), ///< DoubleToString*(): Use n.nnnn format for double conversions
276 fDoubleScientific = (1 << 9), ///< DoubleToString*(): Use scientific format for double conversions
277 fDoublePosix = (1 << 10), ///< DoubleToString*(): Use C locale for double conversions
278 fDoubleGeneral = fDoubleFixed | fDoubleScientific,
279 // Additional flags to convert "software" qualifiers (see UInt8ToString_DataSize)
280 fDS_Binary = (1 << 11), ///< UInt8ToString_DataSize(): Use 1024 as a kilobyte factor, not 1000.
281 fDS_NoDecimalPoint = (1 << 12), ///< UInt8ToString_DataSize(): Do not add a decimal point ("10KB" vs "10.0KB")
282 fDS_PutSpaceBeforeSuffix = (1 << 13), ///< UInt8ToString_DataSize(): Add space between value and qualifiers, like "10.0 KB"
283 fDS_ShortSuffix = (1 << 14), ///< UInt8ToString_DataSize(): Use short suffix, like "10.0K"
284 fDS_PutBSuffixToo = (1 << 15) ///< UInt8ToString_DataSize(): Use "B" suffix for small bytes values.
285 };
286 typedef int TNumToStringFlags; ///< Bitwise OR of "ENumToStringFlags"
287
288 /// String to number conversion flags.
289 enum EStringToNumFlags {
290 fMandatorySign = (1 << 17), ///< Check on mandatory sign. See 'ENumToStringFlags::fWithSign'.
291 fAllowCommas = (1 << 18), ///< Allow commas. See 'ENumToStringFlags::fWithCommas'.
292 fAllowLeadingSpaces = (1 << 19), ///< Ignore leading spaces in converted string.
293 fAllowLeadingSymbols = (1 << 20) | fAllowLeadingSpaces,
294 ///< Ignore leading non-numeric characters.
295 fAllowTrailingSpaces = (1 << 21), ///< Ignore trailing space characters.
296 fAllowTrailingSymbols = (1 << 22) | fAllowTrailingSpaces,
297 ///< Ignore trailing non-numerics characters.
298 fDecimalPosix = (1 << 23), ///< StringToDouble*(): For decimal point, use C locale.
299 fDecimalPosixOrLocal = (1 << 24), ///< StringToDouble*(): For decimal point, try both C and current locale.
300 fDecimalPosixFinite = (1 << 25), ///< StringToDouble*(): Keep result finite and normalized:
301 ///< if DBL_MAX < result < INF, result becomes DBL_MAX
302 ///< if 0 < result < DBL_MIN, result becomes DBL_MIN
303 // Additional flags to convert "software" qualifiers (see StringToUInt8_DataSize)
304 fDS_ForceBinary = (1 << 26), ///< StringToUInt8_DataSize(): Use 1024 as a kilobyte factor regardless of suffix, like "KB" or "KiB".
305 fDS_ProhibitFractions = (1 << 27), ///< StringToUInt8_DataSize(): Ignore any fraction part of a value, "1.2K" ~ "1K"
306 fDS_ProhibitSpaceBeforeSuffix = (1 << 28) ///< StringToUInt8_DataSize(): Do not allow spaces between value and suffix, like "10 K".
307 };
308 typedef int TStringToNumFlags; ///< Bitwise OR of "EStringToNumFlags"
309
310 /// Convert string to a numeric value.
311 ///
312 /// @param str
313 /// String to be converted.
314 /// @param flags
315 /// Optional flags to tune up how the string is converted to value.
316 /// @param base
317 /// Radix base. Allowed values are 0, 2..36. Zero means to use the
318 /// first characters to determine the base - a leading "0x" or "0X"
319 /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
320 /// @return
321 /// - If conversion succeeds, set errno to zero and return the
322 /// converted value.
323 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
324 /// - Otherwise, set errno to non-zero and return zero.
325 template <typename TNumeric>
StringToNumeric(const CTempString str,TStringToNumFlags flags=0,int base=10)326 static TNumeric StringToNumeric(const CTempString str,
327 TStringToNumFlags flags = 0,
328 int base = 10)
329 {
330 return x_StringToNumeric<TNumeric>(str, flags, base);
331 }
332
333 /// Convert string to a numeric value.
334 ///
335 /// @param str [in]
336 /// String to be converted.
337 /// @param value [out]
338 /// The numeric value represented by "str". Zero on any error.
339 /// @param flags [in]
340 /// Optional flags to tune up how the string is converted to value.
341 /// @param base [in]
342 /// Radix base. Allowed values are 0, 2..36. Zero means to use the
343 /// first characters to determine the base - a leading "0x" or "0X"
344 /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
345 /// @return
346 /// - If conversion succeeds, set errno to zero, set the value, and
347 /// return true.
348 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
349 /// - Otherwise, set errno to non-zero, set the value to zero, and
350 /// return false.
351 template <typename TNumeric>
StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags=0,int base=10)352 static bool StringToNumeric(const CTempString str,
353 TNumeric* value, /*[out]*/
354 TStringToNumFlags flags = 0,
355 int base = 10)
356 {
357 return x_StringToNumeric(str, value, flags, base);
358 }
359
360 /// Convert string to non-negative integer value.
361 ///
362 /// @param str
363 /// String containing only digits, representing non-negative
364 /// decimal value in the int range: [0..kMax_Int].
365 /// @param flags
366 /// How to convert string to value.
367 /// Only fConvErr_NoErrMessage flag is supported here.
368 /// @return
369 /// - If conversion succeeds, set errno to zero and return the converted value.
370 /// - Otherwise, set errno to non-zero and return -1.
371 static int StringToNonNegativeInt(const CTempString str, TConvErrFlags flags = 0);
372
373 /// @deprecated
374 /// Use template-based StringToNumeric<> or StringToNonNegativeInt() instead.
375 NCBI_DEPRECATED
StringToNumeric(const string & str)376 static int StringToNumeric(const string& str)
377 {
378 return StringToNonNegativeInt(str);
379 }
380
381 /// Convert string to int.
382 ///
383 /// @param str
384 /// String to be converted.
385 /// @param flags
386 /// How to convert string to value.
387 /// @param base
388 /// Radix base. Allowed values are 0, 2..36. Zero means to use the
389 /// first characters to determine the base - a leading "0x" or "0X"
390 /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
391 /// @return
392 /// - If conversion succeeds, set errno to zero and return the
393 /// converted value.
394 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
395 /// - Otherwise, set errno to non-zero and return zero.
396 static int StringToInt(const CTempString str,
397 TStringToNumFlags flags = 0,
398 int base = 10);
399
400 /// Convert string to unsigned int.
401 ///
402 /// @param str
403 /// String to be converted.
404 /// @param flags
405 /// How to convert string to value.
406 /// @param base
407 /// Radix base. Allowed values are 0, 2..36. Zero means to use the
408 /// first characters to determine the base - a leading "0x" or "0X"
409 /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
410 /// @return
411 /// - If conversion succeeds, set errno to zero and return the
412 /// converted value.
413 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
414 /// - Otherwise, set errno to non-zero and return zero.
415 static unsigned int StringToUInt(const CTempString str,
416 TStringToNumFlags flags = 0,
417 int base = 10);
418
419 /// Convert string to long.
420 ///
421 /// @param str
422 /// String to be converted.
423 /// @param flags
424 /// How to convert string to value.
425 /// @param base
426 /// Radix base. Allowed values are 0, 2..36. Zero means to use the
427 /// first characters to determine the base - a leading "0x" or "0X"
428 /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
429 /// @return
430 /// - If conversion succeeds, set errno to zero and return the
431 /// converted value.
432 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
433 /// - Otherwise, set errno to non-zero and return zero.
434 static long StringToLong(const CTempString str,
435 TStringToNumFlags flags = 0,
436 int base = 10);
437
438 /// Convert string to unsigned long.
439 ///
440 /// @param str
441 /// String to be converted.
442 /// @param flags
443 /// How to convert string to value.
444 /// @param base
445 /// Radix base. Allowed values are 0, 2..36. Zero means to use the
446 /// first characters to determine the base - a leading "0x" or "0X"
447 /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
448 /// @return
449 /// - If conversion succeeds, set errno to zero and return the
450 /// converted value.
451 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
452 /// - Otherwise, set errno to non-zero and return zero.
453 static unsigned long StringToULong(const CTempString str,
454 TStringToNumFlags flags = 0,
455 int base = 10);
456
457 /// Convert string to double-precision value (analog of strtod function)
458 ///
459 /// @param str
460 /// String to be converted.
461 /// @param endptr
462 /// Pointer to character that stops scan.
463 /// @return
464 /// Double-precision value.
465 /// This function always uses dot as decimal separator.
466 /// - on overflow, it returns HUGE_VAL and sets errno to ERANGE;
467 /// - on underflow, it returns 0 and sets errno to ERANGE;
468 /// - if conversion was impossible, it returns 0 and sets errno.
469 /// Also, when input string equals (case-insensitive) to
470 /// - "NAN", the function returns NaN;
471 /// - "INF" or "INFINITY", the function returns HUGE_VAL;
472 /// - "-INF" or "-INFINITY", the function returns -HUGE_VAL;
473 /// @note
474 /// - If conversion succeeds, set errno to zero and return the
475 /// converted value.
476 /// - Otherwise, set errno to non-zero and return zero.
477 /// - Denormal or infinite results are considered successful conversion.
478 /// - To enforce finite and normalized result, use fDecimalPosixFinite flag.
479 /// - This function is meant to be more "low-level" than other
480 /// StringToXxx functions - for example, it allows trailing characters
481 /// (and doesn't include a flags parameter for tweaking such behavior).
482 /// This could result in strings like "nanosecond" being converted to
483 /// NaN, "-inf=input_file" being converted to -INF, or other unexpected
484 /// behavior. Therefore, please consider using StringToDouble unless
485 /// you specifically need this functionality.
486 static double StringToDoublePosix(const char* str, char** endptr=0,
487 TStringToNumFlags flags=0);
488
489
490 /// Convert string to double.
491 ///
492 /// @param str
493 /// String to be converted.
494 /// @param flags
495 /// How to convert string to value.
496 /// Do not support fAllowCommas flag.
497 /// @return
498 /// - If invalid flags are passed, throw an exception.
499 /// - If conversion succeeds, set errno to zero and return the
500 /// converted value.
501 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
502 /// - Otherwise, set errno to non-zero and return zero.
503 /// @note
504 /// - Denormal or infinite results are considered successful conversion.
505 /// - To enforce finite and normalized result, use fDecimalPosixFinite flag.
506 static double StringToDouble(const CTempStringEx str,
507 TStringToNumFlags flags = 0);
508
509 /// This version accepts zero-terminated string
510 /// @deprecated
511 /// It is unsafe to use this method directly, please use StringToDouble()
512 /// instead.
513 NCBI_DEPRECATED
514 static double StringToDoubleEx(const char* str, size_t size,
515 TStringToNumFlags flags = 0);
516
517 /// Convert string to Int8.
518 ///
519 /// @param str
520 /// String to be converted.
521 /// @param flags
522 /// How to convert string to value.
523 /// @param base
524 /// Radix base. Allowed values are 0, 2..36. Zero means to use the
525 /// first characters to determine the base - a leading "0x" or "0X"
526 /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
527 /// @return
528 /// - If conversion succeeds, set errno to zero and return the
529 /// converted value.
530 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
531 /// - Otherwise, set errno to non-zero and return zero.
532 static Int8 StringToInt8(const CTempString str,
533 TStringToNumFlags flags = 0,
534 int base = 10);
535
536 /// Convert string to Uint8.
537 ///
538 /// @param str
539 /// String to be converted.
540 /// @param flags
541 /// How to convert string to value.
542 /// @param base
543 /// Radix base. Allowed values are 0, 2..36. Zero means to use the
544 /// first characters to determine the base - a leading "0x" or "0X"
545 /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
546 /// @return
547 /// - If conversion succeeds, set errno to zero and return the
548 /// converted value.
549 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
550 /// - Otherwise, set errno to non-zero and return zero.
551 static Uint8 StringToUInt8(const CTempString str,
552 TStringToNumFlags flags = 0,
553 int base = 10);
554
555 /// Convert string that can contain "software" qualifiers to Uint8.
556 ///
557 /// String can contain "software" qualifiers: G(giga-), MB(mega-),
558 /// KiB (kibi-) etc.
559 /// Example: 100MB, 1024KiB, 5.7G.
560 /// Meaning of qualifiers depends on flags and by default is 1000-based
561 /// (i.e. K=1000, M=10^6 etc.) except in cases when qualifiers with "iB"
562 /// are used, i.e. KiB=1024, MiB=1024^2 etc. When flags parameter contains
563 /// fDS_ForceBinary then qualifiers without "iB" (i.e. "K" or "MB") will
564 /// also be 1024-based.
565 /// String can contain a decimal fraction (except when fDS_ProhibitFractions
566 /// flag is used), in this case the resultant Uint8 number will be rounded
567 /// to fit into integer value.
568 ///
569 /// @param str
570 /// String to be converted.
571 /// @param flags
572 /// How to convert string to value.
573 /// @return
574 /// - If invalid flags are passed, throw an exception.
575 /// - If conversion succeeds, return the converted value.
576 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
577 /// - Otherwise, set errno to non-zero and return zero.
578 static Uint8 StringToUInt8_DataSize(const CTempString str,
579 TStringToNumFlags flags = 0);
580
581 /// Convert string to number of bytes.
582 ///
583 /// String can contain "software" qualifiers: MB(megabyte), KB (kilobyte).
584 /// Example: 100MB, 1024KB
585 /// Note the qualifiers are power-of-2 based, aka kibi-, mebi- etc, so that
586 /// 1KB = 1024B (not 1000B), 1MB = 1024KB = 1048576B, etc.
587 ///
588 /// @param str
589 /// String to be converted.
590 /// @param flags
591 /// How to convert string to value.
592 /// @param base
593 /// Numeric base of the number (before the qualifier). Allowed values
594 /// are 0, 2..20. Zero means to use the first characters to determine
595 /// the base - a leading "0x" or "0X" means base 16; otherwise a
596 /// leading 0 means base 8; otherwise base 10.
597 /// The base is limited to 20 to prevent 'K' from being interpreted as
598 /// a digit in the number.
599 /// @return
600 /// - If conversion succeeds, set errno to zero and return the
601 /// converted value.
602 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
603 /// - Otherwise, set errno to non-zero and return zero.
604 /// @deprecated Use StringToUInt8_DataSize(str, flags) instead.
605 NCBI_DEPRECATED
606 static Uint8 StringToUInt8_DataSize(const CTempString str,
607 TStringToNumFlags flags,
608 int base);
609
610 /// Convert string to size_t.
611 ///
612 /// @param str
613 /// String to be converted.
614 /// @param flags
615 /// How to convert string to value.
616 /// @param base
617 /// Radix base. Allowed values are 0, 2..36. Zero means to use the
618 /// first characters to determine the base - a leading "0x" or "0X"
619 /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
620 /// @return
621 /// - If conversion succeeds, set errno to zero and return the
622 /// converted value.
623 /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
624 /// - Otherwise, set errno to non-zero and return zero.
625 static size_t StringToSizet(const CTempString str,
626 TStringToNumFlags flags = 0,
627 int base = 10);
628
629 /// Convert string to pointer.
630 ///
631 /// @param str
632 /// String to be converted.
633 /// @param flags
634 /// How to convert string to value.
635 /// Only fConvErr_NoErrMessage flag is supported here.
636 /// @return
637 /// Pointer value corresponding to its string representation.
638 /// - If conversion succeeds, set errno to zero and return the
639 /// converted value.
640 /// - Otherwise, set errno to non-zero and return NULL.
641 static const void* StringToPtr(const CTempStringEx str, TConvErrFlags flags = 0);
642
643 /// Convert character to integer.
644 ///
645 /// @param ch
646 /// Character to be converted.
647 /// @return
648 /// Integer (0..15) corresponding to the "ch" as a hex digit.
649 /// Return -1 on error.
650 static int HexChar(char ch);
651
652 /// Convert numeric value to string.
653 ///
654 /// @param value
655 /// Numeric value to be converted.
656 /// @param flags
657 /// How to convert value to string.
658 /// @param base
659 /// Radix base. Default is 10. Allowed values are 2..36.
660 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
661 /// If necessary you should add it yourself.
662 /// If value is float or double type, the parameter is ignored.
663 /// @return
664 /// - If conversion succeeds, set errno to zero and return the
665 /// converted string value.
666 /// - Otherwise, set errno to non-zero and return empty string.
667 template<typename TNumeric>
668 static typename enable_if< is_arithmetic<TNumeric>::value || is_convertible<TNumeric, Int8>::value, string>::type
NumericToString(TNumeric value,TNumToStringFlags flags=0,int base=10)669 NumericToString(TNumeric value, TNumToStringFlags flags = 0, int base = 10)
670 {
671 string ret;
672 x_NumericToString(ret, value, flags, base);
673 return ret;
674 }
675 template <typename TStrictId>
676 static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, string>::type
NumericToString(TStrictId value,TNumToStringFlags flags=0,int base=10)677 NumericToString(TStrictId value, TNumToStringFlags flags = 0, int base = 10)
678 {
679 return NumericToString(value.Get(), flags, base);
680 }
681
682 /// Convert numeric value to string.
683 ///
684 /// @param out_str
685 /// Output string variable.
686 /// @param value
687 /// Numeric value to be converted.
688 /// @param flags
689 /// How to convert value to string.
690 /// @param base
691 /// Radix base. Default is 10. Allowed values are 2..36.
692 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
693 /// If necessary you should add it yourself.
694 /// If value is float or double type, the parameter is ignored.
695 /// @note
696 /// - If conversion succeeds, set errno to zero and return the
697 /// converted string value in 'out_str'.
698 /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
699 template<typename TNumeric>
NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags=0,int base=10)700 static void NumericToString(string& out_str, TNumeric value,
701 TNumToStringFlags flags = 0, int base = 10)
702 {
703 x_NumericToString(out_str, value, flags, base);
704 }
705
706 /// Convert int to string.
707 ///
708 /// @param value
709 /// Integer value to be converted.
710 /// @param flags
711 /// How to convert value to string.
712 /// @param base
713 /// Radix base. Default is 10. Allowed values are 2..36.
714 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
715 /// If necessary you should add it yourself.
716 /// @return
717 /// - If conversion succeeds, set errno to zero and return the
718 /// converted string value.
719 /// - Otherwise, set errno to non-zero and return empty string.
720 static string IntToString(int value, TNumToStringFlags flags = 0,
721 int base = 10);
722
723 static string IntToString(unsigned int value, TNumToStringFlags flags = 0,
724 int base = 10);
725
726 /// Convert int to string.
727 ///
728 /// @param out_str
729 /// Output string variable.
730 /// @param value
731 /// Integer value to be converted.
732 /// @param flags
733 /// How to convert value to string.
734 /// @param base
735 /// Radix base. Default is 10. Allowed values are 2..36.
736 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
737 /// If necessary you should add it yourself.
738 /// @note
739 /// - If conversion succeeds, set errno to zero and return the
740 /// converted string value in 'out_str'.
741 /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
742 static void IntToString(string& out_str, int value,
743 TNumToStringFlags flags = 0,
744 int base = 10);
745
746 static void IntToString(string& out_str, unsigned int value,
747 TNumToStringFlags flags = 0,
748 int base = 10);
749
750 /// Convert UInt to string.
751 ///
752 /// @param value
753 /// Integer value (unsigned long) to be converted.
754 /// @param flags
755 /// How to convert value to string.
756 /// @param base
757 /// Radix base. Default is 10. Allowed values are 2..36.
758 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
759 /// If necessary you should add it yourself.
760 /// @return
761 /// - If conversion succeeds, set errno to zero and return the
762 /// converted string value.
763 /// - Otherwise, set errno to non-zero and return empty string.
764 static string UIntToString(unsigned int value,
765 TNumToStringFlags flags = 0,
766 int base = 10);
767
768 static string UIntToString(int value,
769 TNumToStringFlags flags = 0,
770 int base = 10);
771
772 /// Convert UInt to string.
773 ///
774 /// @param out_str
775 /// Output string variable
776 /// @param value
777 /// Integer value (unsigned long) to be converted.
778 /// @param flags
779 /// How to convert value to string.
780 /// @param base
781 /// Radix base. Default is 10. Allowed values are 2..36.
782 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
783 /// If necessary you should add it yourself.
784 /// @note
785 /// - If conversion succeeds, set errno to zero and return the
786 /// converted string value in 'out_str'.
787 /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
788 static void UIntToString(string& out_str, unsigned int value,
789 TNumToStringFlags flags = 0,
790 int base = 10);
791
792 static void UIntToString(string& out_str, int value,
793 TNumToStringFlags flags = 0,
794 int base = 10);
795
796 /// Convert Int to string.
797 ///
798 /// @param value
799 /// Integer value (long) to be converted.
800 /// @param flags
801 /// How to convert value to string.
802 /// @param base
803 /// Radix base. Default is 10. Allowed values are 2..36.
804 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
805 /// If necessary you should add it yourself.
806 /// @return
807 /// - If conversion succeeds, set errno to zero and return the
808 /// converted string value.
809 /// - Otherwise, set errno to non-zero and return empty string.
810 static string LongToString(long value, TNumToStringFlags flags = 0,
811 int base = 10);
812
813 /// Convert Int to string.
814 ///
815 /// @param out_str
816 /// Output string variable.
817 /// @param value
818 /// Integer value (long) to be converted.
819 /// @param flags
820 /// How to convert value to string.
821 /// @param base
822 /// Radix base. Default is 10. Allowed values are 2..36.
823 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
824 /// If necessary you should add it yourself.
825 /// @note
826 /// - If conversion succeeds, set errno to zero and return the
827 /// converted string value in 'out_str'.
828 /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
829 static void LongToString(string& out_str, long value,
830 TNumToStringFlags flags = 0,
831 int base = 10);
832
833 /// Convert unsigned long to string.
834 ///
835 /// @param value
836 /// Integer value (unsigned long) to be converted.
837 /// @param flags
838 /// How to convert value to string.
839 /// @param base
840 /// Radix base. Default is 10. Allowed values are 2..36.
841 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
842 /// If necessary you should add it yourself.
843 /// @return
844 /// - If conversion succeeds, set errno to zero and return the
845 /// converted string value.
846 /// - Otherwise, set errno to non-zero and return empty string.
847 static string ULongToString(unsigned long value,
848 TNumToStringFlags flags = 0,
849 int base = 10);
850
851 /// Convert unsigned long to string.
852 ///
853 /// @param out_str
854 /// Output string variable
855 /// @param value
856 /// Integer value (unsigned long) to be converted.
857 /// @param flags
858 /// How to convert value to string.
859 /// @param base
860 /// Radix base. Default is 10. Allowed values are 2..36.
861 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
862 /// If necessary you should add it yourself.
863 /// @note
864 /// - If conversion succeeds, set errno to zero and return the
865 /// converted string value in 'out_str'.
866 /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
867 static void ULongToString(string& out_str, unsigned long value,
868 TNumToStringFlags flags = 0,
869 int base = 10);
870
871 /// Convert Int8 to string.
872 ///
873 /// @param value
874 /// Integer value (Int8) to be converted.
875 /// @param flags
876 /// How to convert value to string.
877 /// @param base
878 /// Radix base. Default is 10. Allowed values are 2..36.
879 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
880 /// If necessary you should add it yourself.
881 /// @return
882 /// - If conversion succeeds, set errno to zero and return the
883 /// converted string value.
884 /// - Otherwise, set errno to non-zero and return empty string.
885 static string Int8ToString(Int8 value,
886 TNumToStringFlags flags = 0,
887 int base = 10);
888
889 /// Convert Int8 to string.
890 ///
891 /// @param out_str
892 /// Output string variable
893 /// @param value
894 /// Integer value (Int8) to be converted.
895 /// @param flags
896 /// How to convert value to string.
897 /// @param base
898 /// Radix base. Default is 10. Allowed values are 2..36.
899 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
900 /// If necessary you should add it yourself.
901 /// @note
902 /// - If conversion succeeds, set errno to zero and return the
903 /// converted string value in 'out_str'.
904 /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
905 static void Int8ToString(string& out_str, Int8 value,
906 TNumToStringFlags flags = 0,
907 int base = 10);
908
909 /// Convert UInt8 to string.
910 ///
911 /// @param value
912 /// Integer value (UInt8) to be converted.
913 /// @param flags
914 /// How to convert value to string.
915 /// @param base
916 /// Radix base. Default is 10. Allowed values are 2..36.
917 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
918 /// If necessary you should add it yourself.
919 /// @return
920 /// - If conversion succeeds, set errno to zero and return the
921 /// converted string value.
922 /// - Otherwise, set errno to non-zero and return empty string.
923 static string UInt8ToString(Uint8 value,
924 TNumToStringFlags flags = 0,
925 int base = 10);
926
927 /// Convert UInt8 to string.
928 ///
929 /// @param out_str
930 /// Output string variable
931 /// @param value
932 /// Integer value (UInt8) to be converted.
933 /// @param flags
934 /// How to convert value to string.
935 /// @param base
936 /// Radix base. Default is 10. Allowed values are 2..36.
937 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
938 /// If necessary you should add it yourself.
939 /// @note
940 /// - If conversion succeeds, set errno to zero and return the
941 /// converted string value in 'out_str'.
942 /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
943 static void UInt8ToString(string& out_str, Uint8 value,
944 TNumToStringFlags flags = 0,
945 int base = 10);
946
947 /// Convert UInt8 to string using "software" qualifiers.
948 ///
949 /// Result of conversion will be limited to max_digits digits so that e.g.
950 /// 1024 will be converted to 1.02KB. Conversion will be made using
951 /// rounding so that 1025 will be converted to 1.03KB. By default function
952 /// uses 1000-based qualifiers (as in examples above) but with fDS_Binary
953 /// flag it will use 1024-based qualifiers, e.g. 1100 will be converted to
954 /// 1.07KiB. With fDS_ShortSuffix flag function will omit "B" in 1000-based
955 /// and "iB" in 1024-based qualifiers. When the result of conversion doesn't
956 /// need any qualifiers then the result of this function will be equivalent
957 /// to result of UInt8ToString() above except if fDS_PutBSuffixToo flag
958 /// is passed. In the latter case "B" will be added to the number.
959 ///
960 /// Function will always try to use a maximum possible qualifier and
961 /// a number with decimal point except if fDS_NoDecimalPoint flag is passed.
962 /// In that case function will return only whole number and try to use a
963 /// minimum possible qualifier (which makes difference only if
964 /// max_digits > 3).
965 ///
966 /// @param value
967 /// Integer value (UInt8) to be converted.
968 /// @param flags
969 /// How to convert value to string.
970 /// @param max_digits
971 /// Maximum number of digits to use (cannot be less than 3)
972 /// @return
973 /// - If invalid flags are passed, throw an exception.
974 /// - If conversion succeeds, return the converted value.
975 static string UInt8ToString_DataSize(Uint8 value,
976 TNumToStringFlags flags = 0,
977 unsigned int max_digits = 3);
978
979 /// Convert UInt8 to string using "software" qualifiers.
980 ///
981 /// See notes and details of how function works in the comments to
982 /// UInt8ToString_DataSize() above.
983 ///
984 /// @param out_str
985 /// Output string variable
986 /// @param value
987 /// Integer value (UInt8) to be converted.
988 /// @param flags
989 /// How to convert value to string.
990 /// @param max_digits
991 /// Maximum number of digits to use (cannot be less than 3)
992 static void UInt8ToString_DataSize(string& out_str,
993 Uint8 value,
994 TNumToStringFlags flags = 0,
995 unsigned int max_digits = 3);
996 /// Convert double to string.
997 ///
998 /// @param value
999 /// Double value to be converted.
1000 /// @param precision
1001 /// Precision value for conversion. If precision is more that maximum
1002 /// for current platform, then it will be truncated to this maximum.
1003 /// If it is negative, that double will be converted to number in
1004 /// scientific notation.
1005 /// @param flags
1006 /// How to convert value to string.
1007 /// If double format flags are not specified, that next output format
1008 /// will be used by default:
1009 /// - fDoubleFixed, if 'precision' >= 0.
1010 /// - fDoubleGeneral, if 'precision' < 0.
1011 /// @return
1012 /// - If conversion succeeds, set errno to zero and return the
1013 /// converted string value.
1014 /// - Otherwise, set errno to non-zero and return empty string.
1015 static string DoubleToString(double value, int precision = -1,
1016 TNumToStringFlags flags = 0);
1017
1018 /// Convert double to string.
1019 ///
1020 /// @param out_str
1021 /// Output string variable
1022 /// @param value
1023 /// Double value to be converted.
1024 /// @param precision
1025 /// Precision value for conversion. If precision is more that maximum
1026 /// for current platform, then it will be truncated to this maximum.
1027 /// If it is negative, that double will be converted to number in
1028 /// scientific notation.
1029 /// @param flags
1030 /// How to convert value to string.
1031 /// If double format flags are not specified, that next output format
1032 /// will be used by default:
1033 /// - fDoubleFixed, if 'precision' >= 0.
1034 /// - fDoubleGeneral, if 'precision' < 0.
1035 /// @note
1036 /// - If conversion succeeds, set errno to zero and return the
1037 /// converted string value in 'out_str'.
1038 /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
1039 static void DoubleToString(string& out_str, double value,
1040 int precision = -1,
1041 TNumToStringFlags flags = 0);
1042
1043 /// Convert double to string with specified precision and place the result
1044 /// in the specified buffer.
1045 ///
1046 /// @param value
1047 /// Double value to be converted.
1048 /// @param precision
1049 /// Precision value for conversion. If precision is more that maximum
1050 /// for current platform, then it will be truncated to this maximum.
1051 /// @param buf
1052 /// Put result of the conversion into this buffer.
1053 /// @param buf_size
1054 /// Size of buffer, "buf".
1055 /// @param flags
1056 /// How to convert value to string.
1057 /// Default output format is fDoubleFixed.
1058 /// @return
1059 /// - If conversion succeeds, set errno to zero and return the
1060 /// number of bytes stored in "buf", not counting the
1061 /// terminating '\0'.
1062 /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
1063 static SIZE_TYPE DoubleToString(double value, unsigned int precision,
1064 char* buf, SIZE_TYPE buf_size,
1065 TNumToStringFlags flags = 0);
1066
1067 /// Convert double to string with specified precision and put the result
1068 /// into a character buffer, in scientific format.
1069 ///
1070 /// NOTE:
1071 /// The output character buffer is NOT zero-terminated.
1072 /// The decimal separator is dot, always.
1073 /// This function DOES NOT check 'value' for being finite or not-a-number;
1074 /// if it is, the result is unpredictable.
1075 /// This function is less precise for a small fraction of values
1076 /// (the difference is in the last significant digit) than its
1077 /// 'DoubleToString' siblings, but it is much faster.
1078 ///
1079 /// @param value
1080 /// Double value to be converted.
1081 /// @param precision
1082 /// Maximum number of significant digits to preserve. If precision is greater than
1083 /// maximum for the current platform, it will be truncated to this maximum.
1084 /// @param buf
1085 /// Put result of the conversion into this buffer.
1086 /// @param buf_size
1087 /// Size of buffer, "buf".
1088 /// @return
1089 /// The number of bytes written into "buf".
1090 static SIZE_TYPE DoubleToStringPosix(double value,unsigned int precision,
1091 char* buf, SIZE_TYPE buf_size);
1092
1093
1094 /// Convert double to string with specified precision.
1095 ///
1096 /// The result consists of three parts: significant digits, exponent and sign.
1097 /// For example, input value -12345.67 will produce
1098 /// buf = "1234567" , *dec = 4, and *sign = -1.
1099 /// NOTE:
1100 /// The output character buffer is NOT zero-terminated.
1101 /// The buffer is NOT padded with zeros.
1102 /// This function DOES NOT check 'value' for being finite or not-a-number;
1103 /// if it is, the result is unpredictable.
1104 /// This function is less precise for a small fraction of values
1105 /// (the difference is in the last significant digit) than its
1106 /// 'DoubleToString' siblings, but it is much faster.
1107 ///
1108 /// @param value
1109 /// Double value to be converted.
1110 /// @param precision
1111 /// Maximum number of significant digits to preserve. If precision is greater than
1112 /// maximum for the current platform, it will be truncated to this maximum.
1113 /// @param buf
1114 /// Put result of the conversion into this buffer.
1115 /// @param buf_size
1116 /// Size of buffer, "buf".
1117 /// @param dec
1118 /// Exponent
1119 /// @param sign
1120 /// Sign of the value
1121 /// @return
1122 /// The number of bytes written into "buf".
1123 static SIZE_TYPE DoubleToString_Ecvt(double value,unsigned int precision,
1124 char* buf, SIZE_TYPE buf_size,
1125 int* dec, int* sign);
1126
1127 /// Convert size_t to string.
1128 ///
1129 /// @param value
1130 /// Value to be converted.
1131 /// @param flags
1132 /// How to convert value to string.
1133 /// @param base
1134 /// Radix base. Default is 10. Allowed values are 2..36.
1135 /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
1136 /// If necessary you should add it yourself.
1137 /// @return
1138 /// - If conversion succeeds, set errno to zero and return the
1139 /// converted string value.
1140 /// - Otherwise, set errno to non-zero and return empty string.
1141 static string SizetToString(size_t value,
1142 TNumToStringFlags flags = 0,
1143 int base = 10);
1144
1145 /// Convert pointer to string.
1146 ///
1147 /// @param out_str
1148 /// Output string variable.
1149 /// @param str
1150 /// Pointer to be converted.
1151 /// @note
1152 /// - If conversion succeeds, set errno to zero and return the
1153 /// converted string value in 'out_str'.
1154 /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
1155 static void PtrToString(string& out_str, const void* ptr);
1156
1157 /// Convert pointer to string.
1158 ///
1159 /// @param str
1160 /// Pointer to be converted.
1161 /// @return
1162 /// - If conversion succeeds, set errno to zero and return the
1163 /// converted string value representing the pointer.
1164 /// - Otherwise, set errno to non-zero and return empty string.
1165 static string PtrToString(const void* ptr);
1166
1167 /// Convert bool to string.
1168 ///
1169 /// @param value
1170 /// Boolean value to be converted.
1171 /// @return
1172 /// One of: 'true, 'false'
1173 /// @note
1174 /// Don't change errno.
1175 static const string BoolToString(bool value);
1176
1177 /// Convert string to bool.
1178 ///
1179 /// @param str
1180 /// Boolean string value to be converted. Can recognize
1181 /// case-insensitive version as one of:
1182 /// TRUE - 'true, 't', 'yes', 'y', '1';
1183 /// FALSE - 'false', 'f', 'no', 'n', '0'.
1184 /// @return
1185 /// - If conversion succeeds, set errno to zero and return TRUE or FALSE.
1186 /// - Otherwise, set errno to non-zero and throw an exception.
1187 static bool StringToBool(const CTempString str);
1188
1189
1190 /// Handle an arbitrary printf-style format string.
1191 ///
1192 /// This method exists only to support third-party code that insists on
1193 /// representing messages in this format; please stick to type-checked
1194 /// means of formatting such as the above ToString methods and I/O
1195 /// streams whenever possible.
1196 static string FormatVarargs(const char* format, va_list args);
1197
1198
1199 /// Which type of string comparison.
1200 enum ECase {
1201 eCase, ///< Case sensitive compare
1202 eNocase ///< Case insensitive compare
1203 };
1204
1205 // ATTENTION. Be aware that:
1206 //
1207 // 1) "Compare***(..., SIZE_TYPE pos, SIZE_TYPE n, ...)" functions
1208 // follow the ANSI C++ comparison rules a la "basic_string::compare()":
1209 // s1[pos:pos+n) == s2 --> return 0
1210 // s1[pos:pos+n) < s2 --> return negative value
1211 // s1[pos:pos+n) > s2 --> return positive value
1212 //
1213 // 2) "strn[case]cmp()" functions follow the ANSI C comparison rules:
1214 // s1[0:n) == s2[0:n) --> return 0
1215 // s1[0:n) < s2[0:n) --> return negative value
1216 // s1[0:n) > s2[0:n) --> return positive value
1217
1218
1219 /// Case-sensitive compare of a substring with another string.
1220 ///
1221 /// @param s1
1222 /// String containing the substring to be compared.
1223 /// @param pos
1224 /// Start position of substring to be compared.
1225 /// @param n
1226 /// Number of characters in substring to be compared.
1227 /// @param s2
1228 /// String (char*) to be compared with substring.
1229 /// @return
1230 /// - 0, if s1[pos:pos+n) == s2;
1231 /// - Negative integer, if s1[pos:pos+n) < s2;
1232 /// - Positive integer, if s1[pos:pos+n) > s2.
1233 /// @sa
1234 /// Other forms of overloaded CompareCase() with differences in argument
1235 /// types: char* vs. CTempString[Ex]
1236 static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1237 const char* s2);
1238
1239 /// Case-sensitive compare of a substring with another string.
1240 ///
1241 /// @param s1
1242 /// String containing the substring to be compared.
1243 /// @param pos
1244 /// Start position of substring to be compared.
1245 /// @param n
1246 /// Number of characters in substring to be compared.
1247 /// @param s2
1248 /// String to be compared with substring.
1249 /// @return
1250 /// - 0, if s1[pos:pos+n) == s2;
1251 /// - Negative integer, if s1[pos:pos+n) < s2;
1252 /// - Positive integer, if s1[pos:pos+n) > s2.
1253 /// @sa
1254 /// Other forms of overloaded CompareCase() with differences in argument
1255 /// types: char* vs. CTempString[Ex]
1256 static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1257 const CTempString s2);
1258
1259 /// Case-sensitive compare of two strings -- char* version.
1260 ///
1261 /// @param s1
1262 /// String to be compared -- operand 1.
1263 /// @param s2
1264 /// String to be compared -- operand 2.
1265 /// @return
1266 /// - 0, if s1 == s2;
1267 /// - Negative integer, if s1 < s2;
1268 /// - Positive integer, if s1 > s2.
1269 /// @sa
1270 /// CompareNocase(), Compare() versions with same argument types.
1271 static int CompareCase(const char* s1, const char* s2);
1272
1273 /// Case-sensitive compare of two strings -- CTempStringEx version.
1274 ///
1275 /// @param s1
1276 /// String to be compared -- operand 1.
1277 /// @param s2
1278 /// String to be compared -- operand 2.
1279 /// @return
1280 /// - 0, if s1 == s2;
1281 /// - Negative integer, if s1 < s2;
1282 /// - Positive integer, if s1 > s2.
1283 /// @sa
1284 /// CompareNocase(), Compare() versions with same argument types.
1285 static int CompareCase(const CTempStringEx s1, const CTempStringEx s2);
1286
1287 /// Case-insensitive compare of a substring with another string.
1288 ///
1289 /// @param s1
1290 /// String containing the substring to be compared.
1291 /// @param pos
1292 /// Start position of substring to be compared.
1293 /// @param n
1294 /// Number of characters in substring to be compared.
1295 /// @param s2
1296 /// String (char*) to be compared with substring.
1297 /// @return
1298 /// - 0, if s1[pos:pos+n) == s2 (case-insensitive compare);
1299 /// - Negative integer, if s1[pos:pos+n) < s2 (case-insensitive compare);
1300 /// - Positive integer, if s1[pos:pos+n) > s2 (case-insensitive compare).
1301 /// @sa
1302 /// Other forms of overloaded CompareNocase() with differences in
1303 /// argument types: char* vs. CTempString[Ex]
1304 static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1305 const char* s2);
1306
1307 /// Case-insensitive compare of a substring with another string.
1308 ///
1309 /// @param s1
1310 /// String containing the substring to be compared.
1311 /// @param pos
1312 /// Start position of substring to be compared.
1313 /// @param n
1314 /// Number of characters in substring to be compared.
1315 /// @param s2
1316 /// String to be compared with substring.
1317 /// @return
1318 /// - 0, if s1[pos:pos+n) == s2 (case-insensitive compare);
1319 /// - Negative integer, if s1[pos:pos+n) < s2 (case-insensitive compare);
1320 /// - Positive integer, if s1[pos:pos+n) > s2 (case-insensitive compare).
1321 /// @sa
1322 /// Other forms of overloaded CompareNocase() with differences in
1323 /// argument types: char* vs. CTempString[Ex]
1324 static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1325 const CTempString s2);
1326
1327 /// Case-insensitive compare of two strings -- char* version.
1328 ///
1329 /// @param s1
1330 /// String to be compared -- operand 1.
1331 /// @param s2
1332 /// String to be compared -- operand 2.
1333 /// @return
1334 /// - 0, if s1 == s2 (case-insensitive compare);
1335 /// - Negative integer, if s1 < s2 (case-insensitive compare);
1336 /// - Positive integer, if s1 > s2 (case-insensitive compare).
1337 /// @sa
1338 /// CompareCase(), Compare() versions with same argument types.
1339 static int CompareNocase(const char* s1, const char* s2);
1340
1341 /// Case-insensitive compare of two strings -- CTempStringEx version.
1342 ///
1343 /// @param s1
1344 /// String to be compared -- operand 1.
1345 /// @param s2
1346 /// String to be compared -- operand 2.
1347 /// @return
1348 /// - 0, if s1 == s2 (case-insensitive compare);
1349 /// - Negative integer, if s1 < s2 (case-insensitive compare);
1350 /// - Positive integer, if s1 > s2 (case-insensitive compare).
1351 /// @sa
1352 /// CompareCase(), Compare() versions with same argument types.
1353 static int CompareNocase(const CTempStringEx s1, const CTempStringEx s2);
1354
1355 /// Compare of a substring with another string.
1356 ///
1357 /// @param s1
1358 /// String containing the substring to be compared.
1359 /// @param pos
1360 /// Start position of substring to be compared.
1361 /// @param n
1362 /// Number of characters in substring to be compared.
1363 /// @param s2
1364 /// String (char*) to be compared with substring.
1365 /// @param use_case
1366 /// Whether to do a case sensitive compare(eCase -- default), or a
1367 /// case-insensitive compare (eNocase).
1368 /// @return
1369 /// - 0, if s1[pos:pos+n) == s2;
1370 /// - Negative integer, if s1[pos:pos+n) < s2;
1371 /// - Positive integer, if s1[pos:pos+n) > s2.
1372 /// @sa
1373 /// Other forms of overloaded Compare() with differences in argument
1374 /// types: char* vs. CTempString[Ex]
1375 static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1376 const char* s2, ECase use_case = eCase);
1377
1378 /// Compare of a substring with another string.
1379 ///
1380 /// @param s1
1381 /// String containing the substring to be compared.
1382 /// @param pos
1383 /// Start position of substring to be compared.
1384 /// @param n
1385 /// Number of characters in substring to be compared.
1386 /// @param s2
1387 /// String to be compared with substring.
1388 /// @param use_case
1389 /// Whether to do a case sensitive compare(default is eCase), or a
1390 /// case-insensitive compare (eNocase).
1391 /// @return
1392 /// - 0, if s1[pos:pos+n) == s2;
1393 /// - Negative integer, if s1pos:pos+n) < s2;
1394 /// - Positive integer, if s1[pos:pos+n) > s2.
1395 /// @sa
1396 /// Other forms of overloaded Compare() with differences in argument
1397 /// types: char* vs. CTempString[Ex]
1398 static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1399 const CTempString s2, ECase use_case = eCase);
1400
1401 /// Compare two strings -- char* version.
1402 ///
1403 /// @param s1
1404 /// String to be compared -- operand 1.
1405 /// @param s2
1406 /// String to be compared -- operand 2.
1407 /// @param use_case
1408 /// Whether to do a case sensitive compare(default is eCase), or a
1409 /// case-insensitive compare (eNocase).
1410 /// @return
1411 /// - 0, if s1 == s2.
1412 /// - Negative integer, if s1 < s2.
1413 /// - Positive integer, if s1 > s2.
1414 /// @sa
1415 /// Other forms of overloaded Compare() with differences in argument
1416 /// types: char* vs. CTempString[Ex]
1417 static int Compare(const char* s1, const char* s2,
1418 ECase use_case = eCase);
1419
1420 /// Compare two strings -- CTempStringEx version.
1421 ///
1422 /// @param s1
1423 /// String to be compared -- operand 1.
1424 /// @param s2
1425 /// String to be compared -- operand 2.
1426 /// @param use_case
1427 /// Whether to do a case sensitive compare(default is eCase), or a
1428 /// case-insensitive compare (eNocase).
1429 /// @return
1430 /// - 0, if s1 == s2;
1431 /// - Negative integer, if s1 < s2;
1432 /// - Positive integer, if s1 > s2.
1433 /// @sa
1434 /// Other forms of overloaded Compare() with differences in argument
1435 /// types: char* vs. CTempString[Ex]
1436 static int Compare(const CTempStringEx s1, const CTempStringEx s2,
1437 ECase use_case = eCase);
1438
1439 /// Case-sensitive equality of a substring with another string.
1440 ///
1441 /// @param s1
1442 /// String containing the substring to be compared.
1443 /// @param pos
1444 /// Start position of substring to be compared.
1445 /// @param n
1446 /// Number of characters in substring to be compared.
1447 /// @param s2
1448 /// String (char*) to be compared with substring.
1449 /// @return
1450 /// - true, if s1[pos:pos+n) equals s2;
1451 /// - false, otherwise
1452 /// @sa
1453 /// Other forms of overloaded EqualCase() with differences in argument
1454 /// types: char* vs. CTempString[Ex]
1455 static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1456 const char* s2);
1457
1458 /// Case-sensitive equality of a substring with another string.
1459 ///
1460 /// @param s1
1461 /// String containing the substring to be compared.
1462 /// @param pos
1463 /// Start position of substring to be compared.
1464 /// @param n
1465 /// Number of characters in substring to be compared.
1466 /// @param s2
1467 /// String to be compared with substring.
1468 /// @return
1469 /// - true, if s1[pos:pos+n) equals s2;
1470 /// - false, otherwise
1471 /// @sa
1472 /// Other forms of overloaded EqualCase() with differences in argument
1473 /// types: char* vs. CTempString[Ex]
1474 static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1475 const CTempString s2);
1476
1477 /// Case-sensitive equality of two strings -- char* version.
1478 ///
1479 /// @param s1
1480 /// String to be compared -- operand 1.
1481 /// @param s2
1482 /// String to be compared -- operand 2.
1483 /// @return
1484 /// - true, if s1 equals s2
1485 /// - false, otherwise
1486 /// @sa
1487 /// EqualCase(), Equal() versions with same argument types.
1488 static bool EqualCase(const char* s1, const char* s2);
1489
1490 /// Case-sensitive equality of two strings.
1491 ///
1492 /// @param s1
1493 /// String to be compared -- operand 1.
1494 /// @param s2
1495 /// String to be compared -- operand 2.
1496 /// @return
1497 /// - true, if s1 equals s2
1498 /// - false, otherwise
1499 /// @sa
1500 /// EqualCase(), Equal() versions with same argument types.
1501 static bool EqualCase(const CTempStringEx s1, const CTempStringEx s2);
1502
1503 /// Case-insensitive equality of a substring with another string.
1504 ///
1505 /// @param s1
1506 /// String containing the substring to be compared.
1507 /// @param pos
1508 /// Start position of substring to be compared.
1509 /// @param n
1510 /// Number of characters in substring to be compared.
1511 /// @param s2
1512 /// String (char*) to be compared with substring.
1513 /// @return
1514 /// - true, if s1[pos:pos+n) equals s2 (case-insensitive compare);
1515 /// - false, otherwise.
1516 /// @sa
1517 /// Other forms of overloaded EqualNocase() with differences in
1518 /// argument types: char* vs. CTempString[Ex]
1519 static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1520 const char* s2);
1521
1522 /// Case-insensitive equality of a substring with another string.
1523 ///
1524 /// @param s1
1525 /// String containing the substring to be compared.
1526 /// @param pos
1527 /// Start position of substring to be compared.
1528 /// @param n
1529 /// Number of characters in substring to be compared.
1530 /// @param s2
1531 /// String to be compared with substring.
1532 /// @return
1533 /// - true, if s1[pos:pos+n) equals s2 (case-insensitive compare);
1534 /// - false, otherwise.
1535 /// @sa
1536 /// Other forms of overloaded EqualNocase() with differences in
1537 /// argument types: char* vs. CTempString[Ex]
1538 static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1539 const CTempString s2);
1540
1541 /// Case-insensitive equality of two strings -- char* version.
1542 ///
1543 /// @param s1
1544 /// String to be compared -- operand 1.
1545 /// @param s2
1546 /// String to be compared -- operand 2.
1547 /// @return
1548 /// - true, if s1 equals s2 (case-insensitive compare);
1549 /// - false, otherwise.
1550 /// @sa
1551 /// EqualCase(), Equal() versions with same argument types.
1552 static bool EqualNocase(const char* s1, const char* s2);
1553
1554 /// Case-insensitive equality of two strings.
1555 ///
1556 /// @param s1
1557 /// String to be compared -- operand 1.
1558 /// @param s2
1559 /// String to be compared -- operand 2.
1560 /// @return
1561 /// - true, if s1 equals s2 (case-insensitive compare);
1562 /// - false, otherwise.
1563 /// @sa
1564 /// EqualCase(), Equal() versions with same argument types.
1565 static bool EqualNocase(const CTempStringEx s1, const CTempStringEx s2);
1566
1567 /// Test for equality of a substring with another string.
1568 ///
1569 /// @param s1
1570 /// String containing the substring to be compared.
1571 /// @param pos
1572 /// Start position of substring to be compared.
1573 /// @param n
1574 /// Number of characters in substring to be compared.
1575 /// @param s2
1576 /// String (char*) to be compared with substring.
1577 /// @param use_case
1578 /// Whether to do a case sensitive compare(eCase -- default), or a
1579 /// case-insensitive compare (eNocase).
1580 /// @return
1581 /// - true, if s1[pos:pos+n) equals s2;
1582 /// - false, otherwise.
1583 /// @sa
1584 /// Other forms of overloaded Equal() with differences in argument
1585 /// types: char* vs. CTempString[Ex]
1586 static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1587 const char* s2, ECase use_case = eCase);
1588
1589 /// Test for equality of a substring with another string.
1590 ///
1591 /// @param s1
1592 /// String containing the substring to be compared.
1593 /// @param pos
1594 /// Start position of substring to be compared.
1595 /// @param n
1596 /// Number of characters in substring to be compared.
1597 /// @param s2
1598 /// String to be compared with substring.
1599 /// @param use_case
1600 /// Whether to do a case sensitive compare (default is eCase), or a
1601 /// case-insensitive compare (eNocase).
1602 /// @return
1603 /// - 0, if s1[pos:pos+n) == s2;
1604 /// - Negative integer, if s1[pos:pos+n) < s2;
1605 /// - Positive integer, if s1[pos:pos+n) > s2.
1606 /// @sa
1607 /// Other forms of overloaded Equal() with differences in argument
1608 /// types: char* vs. CTempString[Ex]
1609 static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1610 const CTempString s2, ECase use_case = eCase);
1611
1612 /// Test for equality of two strings -- char* version.
1613 ///
1614 /// @param s1
1615 /// String to be compared -- operand 1.
1616 /// @param s2
1617 /// String to be compared -- operand 2.
1618 /// @param use_case
1619 /// Whether to do a case sensitive compare (default is eCase), or a
1620 /// case-insensitive compare (eNocase).
1621 /// @return
1622 /// - 0, if s1 == s2;
1623 /// - Negative integer, if s1 < s2;
1624 /// - Positive integer, if s1 > s2.
1625 /// @sa
1626 /// EqualNocase(), Equal() versions with similar argument types.
1627 static bool Equal(const char* s1, const char* s2,
1628 ECase use_case = eCase);
1629
1630 /// Test for equality of two strings.
1631 ///
1632 /// @param s1
1633 /// String to be compared -- operand 1.
1634 /// @param s2
1635 /// String to be compared -- operand 2.
1636 /// @param use_case
1637 /// Whether to do a case sensitive compare (default is eCase), or a
1638 /// case-insensitive compare (eNocase).
1639 /// @return
1640 /// - true, if s1 equals s2;
1641 /// - false, otherwise.
1642 /// @sa
1643 /// EqualNocase(), Equal() versions with similar argument types.
1644 static bool Equal(const CTempStringEx s1, const CTempStringEx s2,
1645 ECase use_case = eCase);
1646
1647 // NOTE. On some platforms, "strn[case]cmp()" can work faster than their
1648 // "Compare***()" counterparts.
1649
1650 /// String compare.
1651 ///
1652 /// @param s1
1653 /// String to be compared -- operand 1.
1654 /// @param s2
1655 /// String to be compared -- operand 2.
1656 /// @return
1657 /// - 0, if s1 == s2;
1658 /// - Negative integer, if s1 < s2;
1659 /// - Positive integer, if s1 > s2.
1660 /// @sa
1661 /// strncmp(), strcasecmp(), strncasecmp()
1662 static int strcmp(const char* s1, const char* s2);
1663
1664 /// String compare up to specified number of characters.
1665 ///
1666 /// @param s1
1667 /// String to be compared -- operand 1.
1668 /// @param s2
1669 /// String to be compared -- operand 2.
1670 /// @param n
1671 /// Number of characters in string
1672 /// @return
1673 /// - 0, if s1 == s2;
1674 /// - Negative integer, if s1 < s2;
1675 /// - Positive integer, if s1 > s2.
1676 /// @sa
1677 /// strcmp(), strcasecmp(), strncasecmp()
1678 static int strncmp(const char* s1, const char* s2, size_t n);
1679
1680 /// Case-insensitive comparison of two zero-terminated strings.
1681 ///
1682 /// @param s1
1683 /// String to be compared -- operand 1.
1684 /// @param s2
1685 /// String to be compared -- operand 2.
1686 /// @return
1687 /// - 0, if s1 == s2;
1688 /// - Negative integer, if s1 < s2;
1689 /// - Positive integer, if s1 > s2.
1690 /// @sa
1691 /// strcmp(), strncmp(), strncasecmp()
1692 static int strcasecmp(const char* s1, const char* s2);
1693
1694 /// Case-insensitive comparison of two zero-terminated strings,
1695 /// narrowed to the specified number of characters.
1696 ///
1697 /// @param s1
1698 /// String to be compared -- operand 1.
1699 /// @param s2
1700 /// String to be compared -- operand 2.
1701 /// @return
1702 /// - 0, if s1 == s2;
1703 /// - Negative integer, if s1 < s2.
1704 /// - Positive integer, if s1 > s2.
1705 /// @sa
1706 /// strcmp(), strcasecmp(), strcasecmp()
1707 static int strncasecmp(const char* s1, const char* s2, size_t n);
1708
1709 /// Wrapper for the function strftime() that corrects handling %D and %T
1710 /// time formats on MS Windows.
1711 static size_t strftime(char* s, size_t maxsize, const char* format,
1712 const struct tm* timeptr);
1713
1714 /// Match "str" against the "mask".
1715 ///
1716 /// This function does not use regular expressions.
1717 /// Very similar to fnmatch(3), but there are differences (see also glob(7)).
1718 /// There's no special treatment for a slash character '/' in this call.
1719 ///
1720 /// @param str
1721 /// String to match.
1722 /// @param mask
1723 /// Mask used to match string "str".
1724 /// This is a text pattern, which, along ordinary characters that must match
1725 /// literally corresponding symbols in the string "str", can contains also
1726 /// mext wildcard characters: \n
1727 /// ? - matches to any single character in the string. \n
1728 /// * - matches to any number of characters in the string (including none). \n
1729 ///
1730 /// Mask also support POSIX character classes in the forms of "[...]" or "[!...]"
1731 /// that must MATCH or NOT MATCH, respectively, a single character in "str".
1732 /// To cancel the special meaning of '*', '?' or '[', they can be prepended with
1733 /// a backslash '\\' (the backslash in front of other characters does not change
1734 /// their meaning, so "\\\\" matches one graphical backslash in the "str").
1735 /// Within a character class, to have its literal meaning a closing square bracket ']'
1736 /// must be used at the first position, whereas '?', '*', '[, and '\\' stand
1737 /// just for themselves. Two characters separated by a minus sign '-' denote
1738 /// a range that can be used for contraction to include all characters in
1739 /// between: "[A-F]" is equivalent to "[ABCDEF]".
1740 /// For its literal meaning, the minus sign '-' can be used either at the very
1741 /// first position, or the last position before the closing bracket ']'.
1742 /// To have a range that begins with an exclamation point, one has to use
1743 /// a dummy empty range followed by that range with '!'.
1744 ///
1745 /// Examples:
1746 /// "!" matches a single '!' (note that just "[!]" is invalid);
1747 /// "[!!]" matches any character, which is not an exclamation point '!';
1748 /// "[][!]" matches ']', '[', and '!';
1749 /// "[!][-]" matches any character except for ']', '[', and '-';
1750 /// "[-]" matches a minus sign '-' (same as '-' just by itself);
1751 /// "[?*\\]" matches either '?', or '*', or a backslash '\\';
1752 /// "[]-\\]" matches nothing as it defines an empty range (from ']' to '\\');
1753 /// "\\[a]\\*" matches a literal substring "[a]*";
1754 /// "[![a-]" matches any char but '[', 'a' or '-' (same as "[!-[a]"; but not
1755 /// "[![-a]", which defines an empty range, thus matches any char!);
1756 /// "[]A]" matches either ']' or 'A' (NB: "[A]]" matches a substring "A]");
1757 /// "[0-9-]" matches any decimal digit or a minus sign '-' (same: "[-0-9]");
1758 /// "[9-0!-$]" matches '!', '"', '#', and '$' (as first range matches nothing).
1759 ///
1760 /// @note
1761 /// In the above, each double backslash denotes a single graphical backslash
1762 /// character (C string notation is used).
1763 /// @note
1764 /// Unlike shell globbing, "[--0]" *does* match the slash character '/'
1765 /// (along with '-', '.', and '0' that all fall within the range).
1766 /// @param use_case
1767 /// Whether to do a case sensitive compare for letters (eCase -- default),
1768 /// or a case-insensitive compare (eNocase).
1769 /// @return
1770 /// Return TRUE if "str" matches "mask", and FALSE otherwise
1771 /// (including patter errors).
1772 /// @sa
1773 /// CRegexp, CRegexpUtil
1774 ///
1775 static bool MatchesMask(CTempString str, CTempString mask, ECase use_case = eCase);
1776
1777 /// Check if a string is blank (has no text).
1778 ///
1779 /// @param str
1780 /// String to check.
1781 /// @param pos
1782 /// starting position (default 0)
1783 static bool IsBlank(const CTempString str, SIZE_TYPE pos = 0);
1784
1785 /// Checks if all letters in the given string have a lower case.
1786 ///
1787 /// @param str
1788 /// String to be checked.
1789 /// @return
1790 /// TRUE if all letter characters in the string are lowercase
1791 /// according to the current C locale (std::islower()).
1792 /// All non-letter characters will be ignored.
1793 /// TRUE if empty or no letters.
1794 static bool IsLower(const CTempString str);
1795
1796 /// Checks if all letters in the given string have a upper case.
1797 ///
1798 /// @param str
1799 /// String to be checked.
1800 /// @return
1801 /// TRUE if all letter characters in the string are uppercase
1802 /// according to the current C locale (std::isupper()).
1803 /// All non-letter characters will be skipped.
1804 /// TRUE if empty or no letters.
1805 static bool IsUpper(const CTempString str);
1806
1807
1808 // The following 4 methods change the passed string, then return it
1809
1810 /// Convert string to lower case -- string& version.
1811 ///
1812 /// @param str
1813 /// String to be converted.
1814 /// @return
1815 /// Lower cased string.
1816 static string& ToLower(string& str);
1817
1818 /// Convert string to lower case -- char* version.
1819 ///
1820 /// @param str
1821 /// String to be converted.
1822 /// @return
1823 /// Lower cased string.
1824 static char* ToLower(char* str);
1825
1826 /// Convert string to upper case -- string& version.
1827 ///
1828 /// @param str
1829 /// String to be converted.
1830 /// @return
1831 /// Upper cased string.
1832 static string& ToUpper(string& str);
1833
1834 /// Convert string to upper case -- char* version.
1835 ///
1836 /// @param str
1837 /// String to be converted.
1838 /// @return
1839 /// Upper cased string.
1840 static char* ToUpper(char* str);
1841
1842 private:
1843 /// Privatized ToLower() with const char* parameter to prevent passing of
1844 /// constant strings.
1845 static void/*dummy*/ ToLower(const char* /*dummy*/);
1846
1847 /// Privatized ToUpper() with const char* parameter to prevent passing of
1848 /// constant strings.
1849 static void/*dummy*/ ToUpper(const char* /*dummy*/);
1850
1851 public:
1852
1853 /// Check if a string starts with a specified prefix value.
1854 ///
1855 /// @param str
1856 /// String to check.
1857 /// @param start
1858 /// Prefix value to check for.
1859 /// @param use_case
1860 /// Whether to do a case sensitive compare(default is eCase), or a
1861 /// case-insensitive compare (eNocase) while checking.
1862 static bool StartsWith(const CTempString str, const CTempString start,
1863 ECase use_case = eCase);
1864
1865 /// Check if a string starts with a specified character value.
1866 ///
1867 /// @param str
1868 /// String to check.
1869 /// @param start
1870 /// Character value to check for.
1871 /// @param use_case
1872 /// Whether to do a case sensitive compare(default is eCase), or a
1873 /// case-insensitive compare (eNocase) while checking.
1874 static bool StartsWith(const CTempString str, char start,
1875 ECase use_case = eCase);
1876
1877 /// Check if a string ends with a specified suffix value.
1878 ///
1879 /// @param str
1880 /// String to check.
1881 /// @param end
1882 /// Suffix value to check for.
1883 /// @param use_case
1884 /// Whether to do a case sensitive compare(default is eCase), or a
1885 /// case-insensitive compare (eNocase) while checking.
1886 static bool EndsWith(const CTempString str, const CTempString end,
1887 ECase use_case = eCase);
1888
1889 /// Check if a string ends with a specified character value.
1890 ///
1891 /// @param str
1892 /// String to check.
1893 /// @param end
1894 /// Character value to check for.
1895 /// @param use_case
1896 /// Whether to do a case sensitive compare(default is eCase), or a
1897 /// case-insensitive compare (eNocase) while checking.
1898 static bool EndsWith(const CTempString str, char end,
1899 ECase use_case = eCase);
1900
1901 /// Determine the common prefix of two strings.
1902 ///
1903 /// @param s1
1904 /// String to be compared -- operand 1.
1905 /// @param s2
1906 /// String to be compared -- operand 2.
1907 /// @return
1908 /// The number of characters common to the start of each string.
1909 static SIZE_TYPE CommonPrefixSize(const CTempString s1, const CTempString s2);
1910
1911 /// Determine the common suffix of two strings.
1912 ///
1913 /// @param s1
1914 /// String to be compared -- operand 1.
1915 /// @param s2
1916 /// String to be compared -- operand 2.
1917 /// @return
1918 /// The number of characters common to the end of each string.
1919 static SIZE_TYPE CommonSuffixSize(const CTempString s1, const CTempString s2);
1920
1921 /// Determine if the suffix of one string is the prefix of another.
1922 ///
1923 /// @param s1
1924 /// String to be compared -- operand 1.
1925 /// @param s2
1926 /// String to be compared -- operand 2.
1927 /// @return
1928 /// The number of characters common to the end of the first string
1929 /// and the start of the second string.
1930 static SIZE_TYPE CommonOverlapSize(const CTempString s1, const CTempString s2);
1931
1932
1933 /// Whether it is the first or last occurrence.
1934 /// @deprecated
1935 enum EOccurrence {
1936 eFirst, ///< First occurrence
1937 eLast ///< Last occurrence
1938 };
1939
1940 /// Search direction for Find() methods.
1941 enum EDirection {
1942 eForwardSearch = 0, ///< Search in a forward direction
1943 eReverseSearch ///< Search in a backward direction
1944 };
1945
1946
1947 /// Find the pattern in the string.
1948 ///
1949 /// @param str
1950 /// String to search.
1951 /// @param pattern
1952 /// Pattern to search for in "str".
1953 /// @param use_case
1954 /// Whether to do a case sensitive compare (default is eCase), or a
1955 /// case-insensitive compare (eNocase) while searching for the pattern.
1956 /// @param direction
1957 /// Define a search direction of the requested "occurrence"
1958 /// of "pattern" in "str".
1959 /// @param occurrence
1960 /// Which occurrence of the pattern in the string to use (zero-based).
1961 /// NOTE: When an occurrence is found the next occurrence will be
1962 /// searched for starting right *after* the found pattern.
1963 /// @return
1964 /// Start of the found pattern in the string.
1965 /// Or NPOS if there is no occurrence of the pattern in the string.
1966 static SIZE_TYPE Find(const CTempString str,
1967 const CTempString pattern,
1968 ECase use_case = eCase,
1969 EDirection direction = eForwardSearch,
1970 SIZE_TYPE occurrence = 0);
1971
1972 /// Find the pattern in the specified range of a string.
1973 ///
1974 /// @param str
1975 /// String to search.
1976 /// @param pattern
1977 /// Pattern to search for in "str".
1978 /// @param start
1979 /// Position in "str" to start search from.
1980 /// 0 means start the search from the beginning of the string.
1981 /// @param end
1982 /// Position in "str" to perform search up to.
1983 /// NPOS means to search to the end of the string.
1984 /// @param which
1985 /// When set to eFirst, this means to find the first occurrence of
1986 /// "pattern" in "str". When set to eLast, this means to find the last
1987 /// occurrence of "pattern" in "str".
1988 /// @param use_case
1989 /// Whether to do a case sensitive compare (default is eCase), or a
1990 /// case-insensitive compare (eNocase) while searching for the pattern.
1991 /// @return
1992 /// - The start of the first or last (depending on "which" parameter)
1993 /// occurrence of "pattern" in "str", within the string interval
1994 /// ["start", "end"], or
1995 /// - NPOS if there is no occurrence of the pattern.
1996 /// @sa FindCase, FindNoCase, FindWord
1997 ///
1998 /// @deprecated
1999 /// Use
2000 /// @code
2001 /// Find(str, pattern, [use_case], [direction], [occurrence])
2002 /// @endcode
2003 /// method instead.
2004 /// For example:
2005 /// @code
2006 /// Find(str, pattern, 0, NPOS, eLast, eCase)
2007 /// @endcode
2008 /// can be replaced by
2009 /// @code
2010 /// Find(str, pattern, eCase, eReverseSearch, /* 0 */)
2011 /// @endcode
2012 /// If you doing a search on a substring of the 'str' and ["start", "end"] search
2013 /// interval is not a default [0, NPOS], that mean a whole 'str' string, you may
2014 /// need to pass a substring instead of 'str', like
2015 /// @code
2016 /// Find(CTempString(str, start, len), pattern, ....)
2017 /// @endcode
2018 /// and after checking search result on NPOS, adjust it by 'start' yourself.
2019 NCBI_DEPRECATED
2020 static SIZE_TYPE Find(const CTempString str,
2021 const CTempString pattern,
2022 SIZE_TYPE start, SIZE_TYPE end,
2023 EOccurrence which = eFirst,
2024 ECase use_case = eCase);
2025
2026 /// Wrapper for backward-compatibility
2027 inline
Find(const CTempString str,const CTempString pattern,SIZE_TYPE start)2028 static SIZE_TYPE Find(const CTempString str, const CTempString pattern, SIZE_TYPE start)
2029 { return FindCase(str, pattern, start); }
2030
2031
2032 /// Find the pattern in the specified range of a string using a case
2033 /// sensitive search.
2034 ///
2035 /// @param str
2036 /// String to search.
2037 /// @param pattern
2038 /// Pattern to search for in "str".
2039 /// @param start
2040 /// Position in "str" to start search from -- default of 0 means start
2041 /// the search from the beginning of the string.
2042 /// @param end
2043 /// Position in "str" to perform search up to -- default of NPOS means
2044 /// to search to the end of the string.
2045 /// @param which
2046 /// When set to eFirst, this means to find the first occurrence of
2047 /// "pattern" in "str". When set to eLast, this means to find the last
2048 /// occurrence of "pattern" in "str".
2049 /// @return
2050 /// - The start of the first or last (depending on "which" parameter)
2051 /// occurrence of "pattern" in "str", within the string interval
2052 /// ["start", "end"], or
2053 /// - NPOS if there is no occurrence of the pattern.
2054 /// @sa Find
2055 ///
2056 /// @deprecated
2057 /// Use Find() method without [start:end] range.
2058 /// @deprecated
2059 /// Use one of the next methods instead:
2060 /// @code
2061 /// Find(str, pattern, [use_case], [direction], [occurrence])
2062 /// FindCase(str, pattern, [start])
2063 /// @endcode
2064 /// For example:
2065 /// @code
2066 /// FindCase(str, pattern, 0, NPOS, eLast)
2067 /// @endcode
2068 /// can be replaced by
2069 /// @code
2070 /// Find(str, pattern, eCase, eReverseSearch, /* 0 */)
2071 /// @endcode
2072 /// For simpler cases without range, or with default [0, NPOS] please use
2073 /// @code
2074 /// FindCase(str, pattern, [start])
2075 /// @endcode
2076 /// But if you doing a search on a substring of the 'str' and ["start", "end"] search
2077 /// interval is not a default [0, NPOS], that mean a whole 'str' string, you may
2078 /// need to pass a substring instead of 'str', like
2079 /// @code
2080 /// FindCase(CTempString(str, start, len), pattern, ....)
2081 /// @endcode
2082 /// and after checking search result on NPOS, adjust it by 'start' yourself.
2083 NCBI_DEPRECATED
2084 static SIZE_TYPE FindCase(const CTempString str,
2085 const CTempString pattern,
2086 SIZE_TYPE start, SIZE_TYPE end,
2087 EOccurrence which = eFirst);
2088
2089 /// Wrappers for backward-compatibility
2090 static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern);
2091 static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern, SIZE_TYPE start);
2092
2093 /// Find the pattern in the specified range of a string using a case
2094 /// insensitive search.
2095 ///
2096 /// @param str
2097 /// String to search.
2098 /// @param pattern
2099 /// Pattern to search for in "str".
2100 /// @param start
2101 /// Position in "str" to start search from -- default of 0 means start
2102 /// the search from the beginning of the string.
2103 /// @param end
2104 /// Position in "str" to perform search up to -- default of NPOS means
2105 /// to search to the end of the string.
2106 /// @param which
2107 /// When set to eFirst, this means to find the first occurrence of
2108 /// "pattern" in "str". When set to eLast, this means to find the last
2109 /// occurrence of "pattern" in "str".
2110 /// @return
2111 /// - The start of the first or last (depending on "which" parameter)
2112 /// occurrence of "pattern" in "str", within the string interval
2113 /// ["start", "end"], or
2114 /// - NPOS if there is no occurrence of the pattern.
2115 /// @sa Find
2116 ///
2117 /// @deprecated
2118 /// Use one of the next methods instead:
2119 /// @code
2120 /// Find(str, pattern, [use_case], [direction], [occurrence])
2121 /// FindNoCase(str, pattern, [start])
2122 /// @endcode
2123 /// For example:
2124 /// @code
2125 /// FindNoCase(str, pattern, 0, NPOS, eLast)
2126 /// @endcode
2127 /// can be replaced by
2128 /// @code
2129 /// Find(str, pattern, eNocase, eReverseSearch, /* 0 */)
2130 /// @endcode
2131 /// For simpler cases without range, or with default [0, NPOS] please use
2132 /// @code
2133 /// FindNoCase(str, pattern, [start])
2134 /// @endcode
2135 /// But if you doing a search on a substring of the 'str' and ["start", "end"] search
2136 /// interval is not a default [0, NPOS], that mean a whole 'str' string, you may
2137 /// need to pass a substring instead of 'str', like
2138 /// @code
2139 /// FindNoCase(CTempString(str, start, len), pattern, ....)
2140 /// @endcode
2141 /// and after checking search result on NPOS, adjust it by 'start' yourself.
2142 NCBI_DEPRECATED
2143 static SIZE_TYPE FindNoCase(const CTempString str,
2144 const CTempString pattern,
2145 SIZE_TYPE start, SIZE_TYPE end,
2146 EOccurrence which = eFirst);
2147
2148 /// Wrapper for backward-compatibility
2149 static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern);
2150 static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start);
2151
2152 /// Test for presence of a given string in a list or vector of strings
2153
2154 static const string* Find (const list<string>& lst,
2155 const CTempString val,
2156 ECase use_case = eCase);
2157
2158 static const string* FindCase (const list<string>& lst,
2159 const CTempString val);
2160
2161 static const string* FindNoCase(const list<string>& lst,
2162 const CTempString val);
2163
2164 static const string* Find (const vector<string>& vec,
2165 const CTempString val,
2166 ECase use_case = eCase);
2167
2168 static const string* FindCase (const vector<string>& vec,
2169 const CTempString val);
2170
2171 static const string* FindNoCase(const vector<string>& vec,
2172 const CTempString val);
2173
2174 /// Find given word in the string.
2175 ///
2176 /// @param str
2177 /// String to search.
2178 /// @param word
2179 /// Word to search for in "str". The "word" can have any symbols,
2180 /// not letters only. Function treat it as a pattern, even it have
2181 /// any non-word characters.
2182 /// @param use_case
2183 /// Whether to do a case sensitive compare (default is eCase), or a
2184 /// case-insensitive compare (eNocase) while searching for the word.
2185 /// @param direction
2186 /// Define a search direction of the occurrence of "word" in "str".
2187 /// @return
2188 /// - Start of the found word in the string.
2189 /// - NPOS if there is no occurrence of the word in the string.
2190 static SIZE_TYPE FindWord(const CTempString str,
2191 const CTempString word,
2192 ECase use_case = eCase,
2193 EDirection direction = eForwardSearch);
2194
2195 /// Find given word in the string.
2196 ///
2197 /// This function honors word boundaries:
2198 /// - starting or ending of the string,
2199 /// - any non-word character, all except [a-zA-Z0-9_].
2200 ///
2201 /// @param str
2202 /// String to search.
2203 /// @param word
2204 /// Word to search for in "str". The "word" can have any symbols,
2205 /// not letters only. Function treat it as a pattern, even it have
2206 /// any non-word characters.
2207 /// @param which
2208 /// When set to eFirst, this means to find the first occurrence of
2209 /// "word" in "str". When set to eLast, this means to find the last
2210 /// occurrence of "word" in "str".
2211 /// @param use_case
2212 /// Whether to do a case sensitive compare (default is eCase), or a
2213 /// case-insensitive compare (eNocase) while searching for the word.
2214 /// @return
2215 /// - The start of the first or last (depending on "which" parameter)
2216 /// occurrence of "word" in "str", or
2217 /// - NPOS if there is no occurrence of the word.
2218 /// @sa Find
2219 /// @deprecated
2220 /// Use FindWord() variant with EDirection parameter:
2221 /// @code
2222 /// FindWord(str, word, [use_case], [direction])
2223 /// @endcode
2224 inline
2225 NCBI_DEPRECATED
FindWord(const CTempString str,const CTempString word,EOccurrence which,ECase use_case=eCase)2226 static SIZE_TYPE FindWord(const CTempString str,
2227 const CTempString word,
2228 EOccurrence which,
2229 ECase use_case = eCase) {
2230 return FindWord(str, word, use_case, which == eFirst ? eForwardSearch : eReverseSearch);
2231 }
2232
2233
2234 /// Which end to truncate a string.
2235 enum ETrunc {
2236 eTrunc_Begin, ///< Truncate leading spaces only
2237 eTrunc_End, ///< Truncate trailing spaces only
2238 eTrunc_Both ///< Truncate spaces at both begin and end of string
2239 };
2240
2241 /// Truncate spaces in a string.
2242 ///
2243 /// @param str
2244 /// String to truncate spaces from.
2245 /// @param where
2246 /// Which end of the string to truncate space from. Default is to
2247 /// truncate space from both ends (eTrunc_Both).
2248 /// @sa
2249 /// TruncateSpaces_Unsafe
2250 static string TruncateSpaces(const string& str,
2251 ETrunc where = eTrunc_Both);
2252
2253 /// Truncate spaces in a string.
2254 /// It can be faster but it is also more dangerous than TruncateSpaces()
2255 ///
2256 /// @param str
2257 /// String to truncate spaces from.
2258 /// @param where
2259 /// Which end of the string to truncate space from. Default is to
2260 /// truncate space from both ends (eTrunc_Both).
2261 /// @attention
2262 /// The lifespan of the result string is the same as one of the source.
2263 /// So, for example, if the source is temporary string, or it changes somehow,
2264 /// then the result will be invalid right away (will point to already released
2265 /// or wrong range in the memory).
2266 /// @sa
2267 /// TruncateSpaces
2268 static CTempString TruncateSpaces_Unsafe(const CTempString str,
2269 ETrunc where = eTrunc_Both);
2270
2271 /// @deprecated Use TruncateSpaces_Unsafe() instead -- AND, do make sure
2272 /// that you indeed use that in a safe manner!
2273 inline
2274 NCBI_DEPRECATED
TruncateSpaces(const CTempString str,ETrunc where=eTrunc_Both)2275 static CTempString TruncateSpaces(const CTempString str,
2276 ETrunc where = eTrunc_Both) {
2277 return TruncateSpaces_Unsafe(str, where);
2278 }
2279
2280 /// @deprecated Use TruncateSpaces_Unsafe() instead -- AND, do make sure
2281 /// that you indeed use that in a safe manner!
2282 inline
2283 NCBI_DEPRECATED
TruncateSpaces(const char * str,ETrunc where=eTrunc_Both)2284 static CTempString TruncateSpaces(const char* str,
2285 ETrunc where = eTrunc_Both) {
2286 return TruncateSpaces_Unsafe(str, where);
2287 }
2288
2289 /// Truncate spaces in a string (in-place)
2290 ///
2291 /// @param str
2292 /// String to truncate spaces from.
2293 /// @param where
2294 /// Which end of the string to truncate space from. Default is to
2295 /// truncate space from both ends (eTrunc_Both).
2296 static void TruncateSpacesInPlace(string& str, ETrunc where = eTrunc_Both);
2297 static void TruncateSpacesInPlace(CTempString&, ETrunc where = eTrunc_Both);
2298
2299
2300 /// Trim prefix from a string (in-place)
2301 ///
2302 /// @param str
2303 /// String to trim from.
2304 /// @param prefix
2305 /// Prefix to remove.
2306 /// If string doesn't have specified prefix, it doesn't changes.
2307 /// @param use_case
2308 /// Whether to do a case sensitive compare (default is eCase), or a
2309 /// case-insensitive compare (eNocase) while checking for a prefix.
2310 static void TrimPrefixInPlace(string& str, const CTempString prefix,
2311 ECase use_case = eCase);
2312 static void TrimPrefixInPlace(CTempString& str, const CTempString prefix,
2313 ECase use_case = eCase);
2314
2315 /// Trim prefix from a string.
2316 ///
2317 /// "Unsafe" counterpart to TrimPrefixInPlace().
2318 /// @param str
2319 /// String to trim from.
2320 /// @param prefix
2321 /// Prefix to remove.
2322 /// If string doesn't have specified prefix, it doesn't changes.
2323 /// @param use_case
2324 /// Whether to do a case sensitive compare (default is eCase), or a
2325 /// case-insensitive compare (eNocase) while checking for a prefix.
2326 /// @attention
2327 /// The lifespan of the result string is the same as one of the source.
2328 /// So, for example, if the source is temporary string, or it changes somehow,
2329 /// then the result will be invalid right away (will point to already released
2330 /// or wrong range in the memory).
2331 /// @sa
2332 /// TrimPrefixInPlace
2333 static CTempString TrimPrefix_Unsafe(const CTempString str,
2334 const CTempString prefix,
2335 ECase use_case = eCase);
2336
2337 /// Trim suffix from a string (in-place)
2338 ///
2339 /// @param str
2340 /// String to trim from.
2341 /// @param suffix
2342 /// Suffix to remove.
2343 /// If string doesn't have specified suffix, it doesn't changes.
2344 /// @param use_case
2345 /// Whether to do a case sensitive compare (default is eCase), or a
2346 /// case-insensitive compare (eNocase) while checking for a suffix.
2347 static void TrimSuffixInPlace(string& str, const CTempString suffix,
2348 ECase use_case = eCase);
2349 static void TrimSuffixInPlace(CTempString& str, const CTempString suffix,
2350 ECase use_case = eCase);
2351
2352 /// Trim suffix from a string.
2353 ///
2354 /// "Unsafe" counterpart to TrimSuffixInPlace().
2355 /// @param str
2356 /// String to trim from.
2357 /// @param suffix
2358 /// Suffix to remove.
2359 /// If string doesn't have specified suffix, it doesn't changes.
2360 /// @param use_case
2361 /// Whether to do a case sensitive compare (default is eCase), or a
2362 /// case-insensitive compare (eNocase) while checking for a suffix.
2363 /// @attention
2364 /// The lifespan of the result string is the same as one of the source.
2365 /// So, for example, if the source is temporary string, or it changes somehow,
2366 /// then the result will be invalid right away (will point to already released
2367 /// or wrong range in the memory).
2368 /// @sa
2369 /// TrimSuffixInPlace
2370 static CTempString TrimSuffix_Unsafe(const CTempString str,
2371 const CTempString suffix,
2372 ECase use_case = eCase);
2373
2374 /// Replace occurrences of a substring within a string.
2375 ///
2376 /// @param src
2377 /// Source string from which specified substring occurrences are
2378 /// replaced.
2379 /// @param search
2380 /// Substring value in "src" that is replaced.
2381 /// @param replace
2382 /// Replace "search" substring with this value.
2383 /// @param dst
2384 /// Result of replacing the "search" string with "replace" in "src".
2385 /// This value is also returned by the function.
2386 /// @param start_pos
2387 /// Position to start search from.
2388 /// @param max_replace
2389 /// Replace no more than "max_replace" occurrences of substring "search"
2390 /// If "max_replace" is zero(default), then replace all occurrences with
2391 /// "replace".
2392 /// @param num_replace
2393 /// Optional pointer to a value which receives number of replacements occurred.
2394 /// @return
2395 /// Result of replacing the "search" string with "replace" in "src". This
2396 /// value is placed in "dst" as well.
2397 /// @sa
2398 /// Version of Replace() that returns a new string.
2399 static string& Replace(const string& src,
2400 const string& search,
2401 const string& replace,
2402 string& dst,
2403 SIZE_TYPE start_pos = 0,
2404 SIZE_TYPE max_replace = 0,
2405 SIZE_TYPE* num_replace = 0);
2406
2407 /// Replace occurrences of a substring within a string and returns the
2408 /// result as a new string.
2409 ///
2410 /// @param src
2411 /// Source string from which specified substring occurrences are
2412 /// replaced.
2413 /// @param search
2414 /// Substring value in "src" that is replaced.
2415 /// @param replace
2416 /// Replace "search" substring with this value.
2417 /// @param start_pos
2418 /// Position to start search from.
2419 /// @param max_replace
2420 /// Replace no more than "max_replace" occurrences of substring "search"
2421 /// If "max_replace" is zero(default), then replace all occurrences with
2422 /// "replace".
2423 /// @param num_replace
2424 /// Optional pointer to a value which receives number of replacements occurred.
2425 /// @return
2426 /// A new string containing the result of replacing the "search" string
2427 /// with "replace" in "src"
2428 /// @sa
2429 /// Version of Replace() that has a destination parameter to accept
2430 /// result.
2431 static string Replace(const string& src,
2432 const string& search,
2433 const string& replace,
2434 SIZE_TYPE start_pos = 0,
2435 SIZE_TYPE max_replace = 0,
2436 SIZE_TYPE* num_replace = 0);
2437
2438 /// Replace occurrences of a substring within a string.
2439 ///
2440 /// On some platforms this function is much faster than Replace()
2441 /// if sizes of "search" and "replace" strings are equal.
2442 /// Otherwise, the performance is mainly the same.
2443 /// @param src
2444 /// String where the specified substring occurrences are replaced.
2445 /// This value is also returned by the function.
2446 /// @param search
2447 /// Substring value in "src" that is replaced.
2448 /// @param replace
2449 /// Replace "search" substring with this value.
2450 /// @param start_pos
2451 /// Position to start search from.
2452 /// @param max_replace
2453 /// Replace no more than "max_replace" occurrences of substring "search"
2454 /// If "max_replace" is zero(default), then replace all occurrences with
2455 /// "replace".
2456 /// @param num_replace
2457 /// Optional pointer to a value which receives number of replacements occurred.
2458 /// @return
2459 /// Result of replacing the "search" string with "replace" in "src".
2460 /// @sa
2461 /// Replace
2462 static string& ReplaceInPlace(string& src,
2463 const string& search,
2464 const string& replace,
2465 SIZE_TYPE start_pos = 0,
2466 SIZE_TYPE max_replace = 0,
2467 SIZE_TYPE* num_replace = 0);
2468
2469 /// Flags for Split*() methods.
2470 ///
2471 /// @note
2472 /// With quote support enabled, doubling a quote character suppresses
2473 /// its special meaning, as does escaping it if that's enabled too;
2474 /// unescaped trailing backslashes and unbalanced quotes result in
2475 /// exceptions.
2476 /// @note
2477 /// All escape symbols, single or double quotes became removed
2478 /// if a corresponding fSplit_Can* flag is used.
2479 enum ESplitFlags {
2480 fSplit_MergeDelimiters = 1 << 0, ///< Merge adjacent delimiters
2481 fSplit_Truncate_Begin = 1 << 1, ///< Truncate leading delimiters
2482 fSplit_Truncate_End = 1 << 2, ///< Truncate trailing delimiters
2483 fSplit_Truncate = fSplit_Truncate_Begin | fSplit_Truncate_End,
2484 fSplit_ByPattern = 1 << 3, ///< Require full delimiter strings
2485 fSplit_CanEscape = 1 << 4, ///< Allow \\... escaping
2486 fSplit_CanSingleQuote = 1 << 5, ///< Allow '...' quoting
2487 fSplit_CanDoubleQuote = 1 << 6, ///< Allow "..." quoting
2488 fSplit_CanQuote = fSplit_CanSingleQuote | fSplit_CanDoubleQuote,
2489 /// All delimiters are merged and trimmed, to get non-empty tokens only
2490 fSplit_Tokenize = fSplit_MergeDelimiters | fSplit_Truncate
2491 };
2492 typedef int TSplitFlags; ///< Bitwise OR of ESplitFlags
2493
2494 /// Whether to merge adjacent delimiters.
2495 /// Used by some methods that don't need full functionality of ESplitFlags.
2496 enum EMergeDelims {
2497 eMergeDelims = fSplit_MergeDelimiters | fSplit_Truncate,
2498 eNoMergeDelims = 0
2499 };
2500
2501 /// Split a string using specified delimiters.
2502 ///
2503 /// @param str
2504 /// String to be split.
2505 /// @param delim
2506 /// Delimiter(s) used to split string "str". The interpretation of
2507 /// multi-character values depends on flags: by default, any of those
2508 /// characters marks a split point (when unquoted), but with
2509 /// fSplit_ByPattern, the entire string must occur. (Meanwhile,
2510 /// an empty value disables splitting.)
2511 /// @param arr
2512 /// The split tokens are added to the list "arr" and also returned
2513 /// by the function.
2514 /// @param flags
2515 /// Flags directing splitting, characterized under ESplitFlags.
2516 /// @param token_pos
2517 /// Optional array for the tokens' positions in "str".
2518 /// @attention
2519 /// Modifying source CTempString object or destroying it,
2520 /// will invalidate results.
2521 /// @return
2522 /// The list "arr" is also returned.
2523 /// @sa
2524 /// ESplitFlags, SplitInTwo, SplitByPattern
2525 static list<string>& Split( const CTempString str,
2526 const CTempString delim,
2527 list<string>& arr,
2528 TSplitFlags flags = 0,
2529 vector<SIZE_TYPE>* token_pos = NULL);
2530
2531 static vector<string>& Split(
2532 const CTempString str,
2533 const CTempString delim,
2534 vector<string>& arr,
2535 TSplitFlags flags = 0,
2536 vector<SIZE_TYPE>* token_pos = NULL);
2537
2538 static list<CTempString>& Split(
2539 const CTempString str,
2540 const CTempString delim,
2541 list<CTempString>& arr,
2542 TSplitFlags flags = 0,
2543 vector<SIZE_TYPE>* token_pos = NULL,
2544 CTempString_Storage* storage = NULL);
2545
2546 static vector<CTempString>& Split(
2547 const CTempString str,
2548 const CTempString delim,
2549 vector<CTempString>& arr,
2550 TSplitFlags flags = 0,
2551 vector<SIZE_TYPE>* token_pos = NULL,
2552 CTempString_Storage* storage = NULL);
2553
2554 static list<CTempStringEx>& Split(
2555 const CTempString str,
2556 const CTempString delim,
2557 list<CTempStringEx>& arr,
2558 TSplitFlags flags = 0,
2559 vector<SIZE_TYPE>* token_pos = NULL,
2560 CTempString_Storage* storage = NULL);
2561
2562 static vector<CTempStringEx>& Split(
2563 const CTempString str,
2564 const CTempString delim,
2565 vector<CTempStringEx>& arr,
2566 TSplitFlags flags = 0,
2567 vector<SIZE_TYPE>* token_pos = NULL,
2568 CTempString_Storage* storage = NULL);
2569
2570 /// Split a string into two pieces using the specified delimiters
2571 ///
2572 /// @param str
2573 /// String to be split.
2574 /// @param delim
2575 /// Delimiters used to split string "str".
2576 /// @param str1
2577 /// The sub-string of "str" before the first character of "delim".
2578 /// It will not contain any characters in "delim".
2579 /// Will be empty if "str" begin with a delimiter.
2580 /// @param str2
2581 /// The sub-string of "str" after the first character of "delim" found.
2582 /// May contain "delim" characters.
2583 /// Will be empty if "str" had no "delim" characters or ended
2584 /// with the "delim" character.
2585 /// @param flags
2586 /// Flags directing splitting, characterized under ESplitFlags.
2587 /// Note, that fSplit_Truncate_End don't have any effect due nature
2588 /// of this method.
2589 /// @attention
2590 /// Modifying source CTempString object or destroying it,
2591 /// will invalidate results.
2592 /// @return
2593 /// true if a symbol from "delim" was found in "str", false if not.
2594 /// This lets you distinguish when there were no delimiters and when
2595 /// the very last character was the first delimiter.
2596 /// @sa
2597 /// ESplitFlags, Split
2598 static bool SplitInTwo(const CTempString str,
2599 const CTempString delim,
2600 string& str1,
2601 string& str2,
2602 TSplitFlags flags = 0);
2603
2604 static bool SplitInTwo(const CTempString str,
2605 const CTempString delim,
2606 CTempString& str1,
2607 CTempString& str2,
2608 TSplitFlags flags = 0,
2609 CTempString_Storage* storage = NULL);
2610
2611 static bool SplitInTwo(const CTempString str,
2612 const CTempString delim,
2613 CTempStringEx& str1,
2614 CTempStringEx& str2,
2615 TSplitFlags flags = 0,
2616 CTempString_Storage* storage = NULL);
2617
2618
2619 /// Variation of Split() with fSplit_ByPattern flag applied by default
2620
2621 static list<string>& SplitByPattern(
2622 const CTempString str,
2623 const CTempString delim,
2624 list<string>& arr,
2625 TSplitFlags flags = 0,
2626 vector<SIZE_TYPE>* token_pos = NULL);
2627
2628 static vector<string>& SplitByPattern(
2629 const CTempString str,
2630 const CTempString delim,
2631 vector<string>& arr,
2632 TSplitFlags flags = 0,
2633 vector<SIZE_TYPE>* token_pos = NULL);
2634
2635 static list<CTempString>& SplitByPattern(
2636 const CTempString str,
2637 const CTempString delim,
2638 list<CTempString>& arr,
2639 TSplitFlags flags = 0,
2640 vector<SIZE_TYPE>* token_pos = NULL,
2641 CTempString_Storage* storage = NULL);
2642
2643 static vector<CTempString>& SplitByPattern(
2644 const CTempString str,
2645 const CTempString delim,
2646 vector<CTempString>& arr,
2647 TSplitFlags flags = 0,
2648 vector<SIZE_TYPE>* token_pos = NULL,
2649 CTempString_Storage* storage = NULL);
2650
2651 static list<CTempStringEx>& SplitByPattern(
2652 const CTempString str,
2653 const CTempString delim,
2654 list<CTempStringEx>& arr,
2655 TSplitFlags flags = 0,
2656 vector<SIZE_TYPE>* token_pos = NULL,
2657 CTempString_Storage* storage = NULL);
2658
2659 static vector<CTempStringEx>& SplitByPattern(
2660 const CTempString str,
2661 const CTempString delim,
2662 vector<CTempStringEx>& arr,
2663 TSplitFlags flags = 0,
2664 vector<SIZE_TYPE>* token_pos = NULL,
2665 CTempString_Storage* storage = NULL);
2666
2667 /// Join strings using the specified delimiter.
2668 ///
2669 /// @param arr
2670 /// Array of strings to be joined.
2671 /// @param delim
2672 /// Delimiter used to join the string.
2673 /// @return
2674 /// The strings in "arr" are joined into a single string, separated
2675 /// with "delim".
2676 /// @sa Split
2677 template<typename TContainer>
2678 static string
Join(const TContainer & arr,const CTempString & delim)2679 Join(const TContainer& arr, const CTempString& delim)
2680 {
2681 return x_Join(begin(arr), end(arr), delim);
2682 }
2683 template<typename TValue>
2684 static string
Join(const initializer_list<TValue> & arr,const CTempString & delim)2685 Join(const initializer_list<TValue>& arr, const CTempString& delim)
2686 {
2687 return x_Join(begin(arr), end(arr), delim);
2688 }
2689 template<typename TInputIterator>
2690 static string
Join(TInputIterator from,TInputIterator to,const CTempString & delim)2691 Join( TInputIterator from, TInputIterator to, const CTempString& delim)
2692 {
2693 return x_Join(from, to, delim);
2694 }
2695 template<typename TInputIterator>
2696 static string
JoinNumeric(TInputIterator from,TInputIterator to,const CTempString & delim)2697 JoinNumeric( TInputIterator from, TInputIterator to, const CTempString& delim)
2698 {
2699 return x_Join( from, to, delim);
2700 }
2701 template<typename TIterator, typename FTransform>
2702 static string
2703 TransformJoin( TIterator from, TIterator to, const CTempString& delim, FTransform fnTransform);
2704
2705
2706 /// How to display printable strings.
2707 ///
2708 /// Assists in making a printable version of "str".
2709 enum EPrintableMode {
2710 fNewLine_Quote = 0, ///< Display "\n" instead of actual linebreak
2711 eNewLine_Quote = fNewLine_Quote,
2712 fNewLine_Passthru = 1, ///< Break the line at every "\n" occurrence
2713 eNewLine_Passthru = fNewLine_Passthru,
2714 fNonAscii_Passthru = 0, ///< Allow non-ASCII but printable characters
2715 fNonAscii_Quote = 2, ///< Octal for all non-ASCII characters
2716 fPrintable_Full = 64 ///< Show all octal digits at all times
2717 };
2718 typedef int TPrintableMode; ///< Bitwise OR of EPrintableMode flags
2719
2720 /// Get a printable version of the specified string.
2721 ///
2722 /// All non-printable characters will be represented as "\r", "\n", "\v",
2723 /// "\t", "\"", "\\\\", etc, or "\\ooo" where 'ooo' is an octal code of the
2724 /// character. The resultant string is a well-formed C string literal,
2725 /// which, without alterations, can be compiled by a C/C++ compiler.
2726 /// In many instances, octal representations of non-printable characters
2727 /// can be reduced to take less than all 3 digits, if there is no
2728 /// ambiguity in the interpretation. fPrintable_Full cancels the
2729 /// reduction, and forces to produce full 3-digit octal codes throughout.
2730 ///
2731 /// @param str
2732 /// The string whose printable version is wanted.
2733 /// @param mode
2734 /// How to display the string. The default setting of fNewLine_Quote
2735 /// displays the new lines as "\n", and uses the octal code reduction.
2736 /// When set to fNewLine_Passthru, line breaks are actually
2737 /// produced on output but preceded with trailing backslashes.
2738 /// @return
2739 /// Return a printable version of "str".
2740 /// @sa
2741 /// ParseEscapes, Escape, CEncode, CParse, Sanitize
2742 static string PrintableString(const CTempString str,
2743 TPrintableMode mode = fNewLine_Quote | fNonAscii_Passthru);
2744
2745 /// Escape string (generic version).
2746 ///
2747 /// Prefix any occurrences of the metacharacters with the escape character.
2748 /// @param str
2749 /// The string to be escaped.
2750 /// @metacharacters
2751 /// List of characters that need to be escaped.
2752 /// Use NStr::Join() if you have metacharacters in list<>, vector<> or set<>.
2753 /// @param escape_char
2754 /// Character used for escaping metacharacters.
2755 /// Each metacharacter will be replaced with pair "escape_char + metacharacter".
2756 /// Each escape character will be replaced with pair "escape_char + escape_char".
2757 /// @return
2758 /// Escaped string.
2759 /// @sa
2760 /// Unescape, PrintableString, Join
2761 static string Escape(const CTempString str, const CTempString metacharacters,
2762 char escape_char = '\\');
2763
2764 /// Unescape string (generic version).
2765 ///
2766 /// Remove escape characters added by Escape().
2767 /// @param str
2768 /// The string to be processed.
2769 /// @param escape_char
2770 /// Character used for escaping.
2771 /// @return
2772 /// Unescaped string.
2773 /// @sa
2774 /// Escape
2775 static string Unescape(const CTempString str, char escape_char = '\\');
2776
2777
2778 /// Quote string (generic version).
2779 ///
2780 /// Prepend and append a specified quote character, but escaping any occurrence
2781 /// of the quote character using either a specified escape character (default '\')
2782 /// or as option, by doubling the quoting character if escape character is the same
2783 /// (e.g. like the single quote in SQL, double-quote in CSV).
2784 ///
2785 /// @param str
2786 /// The string to be quoted.
2787 /// @param quote_char
2788 /// Character used for quoting, default to double quote '"'.
2789 /// @param escape_char
2790 /// Character used for escaping other quote characters inside string (default '\').
2791 /// Each <quote_char> in the string will be replaced with pair "escape_char + quote_char".
2792 /// Each <escape_char> in the string will be replaced with pair "escape_char + escape_char".
2793 /// @return
2794 /// Quoted string.
2795 /// @sa
2796 /// Unquote, ParseQuoted, CEncode
2797 static string Quote(const CTempString str, char quote_char = '"', char escape_char = '\\');
2798
2799 /// Unquote string (generic version).
2800 ///
2801 /// Remove quotation added by Quote(). Uses first character as quoting character.
2802 /// @param str
2803 /// The string to be processed.
2804 /// @param escape_char
2805 /// Character used for escaping.
2806 /// @return
2807 /// Unquoted string.
2808 /// @sa
2809 /// Quote, ParseQuoted, CEncode
2810 static string Unquote(const CTempString str, char escape_char = '\\');
2811
2812
2813 /// Flags for Sanitize().
2814 enum ESS_Flags {
2815 // Character filters
2816 fSS_alpha = 1 << 0, ///< Check on ::isalpha()
2817 fSS_digit = 1 << 1, ///< Check on ::isdigit()
2818 fSS_alnum = 1 << 2, ///< Check on ::isalnum()
2819 fSS_print = 1 << 3, ///< Check on ::isprint()
2820 fSS_cntrl = 1 << 4, ///< Check on ::iscntrl()
2821 fSS_punct = 1 << 5, ///< Check on ::ispunct()
2822
2823 // Filter: in or out?
2824 fSS_Reject = 1 << 11, ///< Reject specified characters, allow all other.
2825 ///< Revert default behavior, that allow specified
2826 ///< characters and reject all other.
2827 // Utility flags
2828 fSS_Remove = 1 << 12, ///< Remove (rather than replace) rejected chars
2829 fSS_NoMerge = 1 << 13, ///< Do not merge adjacent spaces (rejected chars)
2830 fSS_NoTruncate_Begin = 1 << 14, ///< Do not truncate leading spaces
2831 fSS_NoTruncate_End = 1 << 15, ///< Do not truncate trailing spaces
2832 fSS_NoTruncate = fSS_NoTruncate_Begin | fSS_NoTruncate_End
2833 };
2834 typedef int TSS_Flags; ///< Bitwise OR of ESS_Flags
2835
2836 /// Sanitize a string, allowing only specified classes of characters.
2837 ///
2838 /// By default:
2839 /// - replace all non-printable characters with spaces;
2840 /// - merge coalescent spaces;
2841 /// - truncate leading and trailing spaces.
2842 /// @note
2843 /// - All coalescent leading/trailing spaces also will be merged
2844 /// by default if fSS_NoMerge has not specified.
2845 /// - The truncation of leading/trailing spaces is doing after
2846 /// allowing/rejecting characters. Depending on the specified flags,
2847 /// all rejected characters adjacent to it can be treat as part
2848 /// of leading/trailing spaces.
2849 /// @param str
2850 /// String to sanitize
2851 /// @param flags
2852 /// Alternative sanitation options
2853 /// @return
2854 /// Sanitized string
2855 /// @sa
2856 /// PrintableString
Sanitize(CTempString str,TSS_Flags flags=fSS_print)2857 static string Sanitize(CTempString str, TSS_Flags flags = fSS_print)
2858 {
2859 return Sanitize(str, CTempString(), CTempString(), ' ', flags);
2860 }
2861
2862
2863 /// Sanitize a string, allowing only specified characters or character classes.
2864 ///
2865 /// More customizable version of Sanitize():
2866 /// - allow to specify custom sets of allowed and rejected characters,
2867 /// in addition to predefined classes if specified, see TSS_Flags;
2868 /// - allow to specify replacement character for rejected symbols;
2869 /// By default:
2870 /// - replace all rejected characters with <reject_replacement>;
2871 /// - merge coalescent spaces and <reject_replacement>s (separately if differ);
2872 /// - truncate leading and trailing spaces.
2873 /// Filters check order:
2874 /// - character classes via flags.
2875 /// Note, that if no character classes are set, and no custom <allow_chars>
2876 /// or <reject_chars>, fSS_print will be used;
2877 /// - <allow_chars> if not empty, have priority over flags.
2878 /// - <reject_chars> if not empty, have priority over flags and <allow_chars> if have intersections.
2879 /// @note
2880 /// - All coalescent leading/trailing spaces also will be merged
2881 /// by default if fSS_NoMerge has not specified.
2882 /// - The truncation of leading/trailing spaces is doing after
2883 /// allowing/rejecting characters.
2884 /// @note
2885 /// Spaces processes after checks on allowance, so if it isn't allowed
2886 /// it will be threatened as regular rejected character.
2887 /// @param str
2888 /// String to sanitize.
2889 /// @param allow_chars
2890 /// Additional list of allowed characters, in addition to character classes in <flags>.
2891 /// Have priority over character classes.
2892 /// Use NStr::Join() if you have it in list<>, vector<> or set<>.
2893 /// @param reject_chars
2894 /// Additional list of rejected characters, in addition to character classes in <flags>.
2895 /// Have priority over character classes and <allow_chars>.
2896 /// Use NStr::Join() if you have it in list<>, vector<> or set<>.
2897 /// @param reject_replacement
2898 /// Replacement character for all rejected characters.
2899 /// @param flags
2900 /// Alternative sanitation options.
2901 /// If no custom <allow_chars> or <reject_chars>, and no character classes are set, then use fSS_print by default.
2902 /// If <reject_chars>, no class, and no fSS_Reject flag, then all characters allowed except <reject_chars>.
2903 /// If <allow_chars>, no class, and fSS_Reject flag, then no any character allowed except <allow_chars>.
2904 /// @return
2905 /// Sanitized string
2906 /// @sa
2907 /// PrintableString, Join
2908 static string Sanitize(CTempString str,
2909 CTempString allow_chars,
2910 CTempString reject_chars,
2911 char reject_replacement = ' ',
2912 TSS_Flags flags = 0);
2913
2914 /// C-style escape sequences parsing mode.
2915 /// For escape sequences with a value outside the range of [0-255]
2916 /// the behavior of ParseEscapes() depends from this mode.
2917 /// By default all escape sequences within a out or range
2918 /// will be converted to the least significant byte, with no warning.
2919 enum EEscSeqRange {
2920 eEscSeqRange_Standard, ///< Set char to the last (least significant
2921 ///< byte) of the escape sequence (default).
2922 eEscSeqRange_FirstByte, ///< Set char to the first byte of the escape
2923 ///< sequence.
2924 eEscSeqRange_Throw, ///< Throw an exception.
2925 eEscSeqRange_Errno, ///< Set errno to ERANGE, return empty string.
2926 eEscSeqRange_User ///< Set char to the user value
2927 ///< passed in another parameter.
2928 };
2929
2930 /// Parse C-style escape sequences in the specified string.
2931 ///
2932 /// Parse escape sequences including all those produced by PrintableString.
2933 /// @param str
2934 /// The string to be parsed.
2935 /// @param mode
2936 /// Parsing mode.
2937 /// By default all escape sequences with a value outside the range of [0-255]
2938 /// will be converted to the least significant byte, with no warning.
2939 /// @param user_char
2940 /// If 'mode' have eEscSeqRange_User, replace all out of range
2941 /// escape sequences with this char.
2942 /// @return
2943 /// String with parsed C-style escape sequences.
2944 /// - If string have wrong format throw an CStringException exception.
2945 /// - If parsing succeeds, return the converted value.
2946 /// Set errno to zero only if eEscSeqRange_Errno is set.
2947 /// - Otherwise, if escape sequence is out of range [0-255],
2948 /// see eEscSeqRange* modes for behavior.
2949 /// @sa
2950 /// EEscSeqFlags, PrintableString, CEncode, CParse
2951 static string ParseEscapes(const CTempString str,
2952 EEscSeqRange mode = eEscSeqRange_Standard,
2953 char user_char = '?');
2954
2955 /// Discard C-style backslash escapes and extract a quoted string.
2956 ///
2957 /// @param[in] str
2958 /// The original string to extract a quoted string from.
2959 /// It must start with a double quote.
2960 /// @param[out] n_read
2961 /// How many symbols the quoted string occupied in the original string.
2962 /// @return
2963 /// The extracted string, un-escaped and with the quotes removed.
2964 /// Throw an exception on format error.
2965 static string ParseQuoted(const CTempString str, size_t* n_read = NULL);
2966
2967 /// Define that string is quoted or not.
2968 enum EQuoted {
2969 eQuoted, ///< String is quoted
2970 eNotQuoted ///< String is not quoted
2971 };
2972
2973 /// Encode a string for C/C++.
2974 ///
2975 /// @param str
2976 /// The string to be parsed.
2977 /// @param quoted
2978 /// Define, to
2979 /// @sa
2980 /// CParse, PrintableString
2981 static string CEncode(const CTempString str, EQuoted quoted = eQuoted);
2982
2983 /// Discard C-style backslash escapes.
2984 ///
2985 /// @param str
2986 /// The original string to parse.
2987 /// @param quoted
2988 /// Define that parsing string is quoted or not.
2989 /// If parameter "quoted" equal eQuoted and string is not started and
2990 /// finished with a double-quote, the exception will be thrown,
2991 /// otherwise quotes will be removed in result.
2992 /// @return
2993 /// String with parsed C-style escape sequences.
2994 /// @sa
2995 /// CEncode
2996 static string CParse(const CTempString str, EQuoted quoted = eQuoted);
2997
2998 /// Encode a string for JavaScript.
2999 ///
3000 /// Replace relevant characters by predefined entities.
3001 /// Like to PrintableString(), but process some symbols in different way.
3002 /// @sa PrintableString
3003 static string JavaScriptEncode(const CTempString str);
3004
3005 /// XML-encode flags
3006 enum EXmlEncode {
3007 /// Encode predefined entities only
3008 eXmlEnc_Contents = 0,
3009 /// Encode double hyphen and ending hyphen,
3010 /// making the result safe to put into XML comments.
3011 eXmlEnc_CommentSafe = 1 << 0,
3012 /// Check each character to conform XML 1.1 standards,
3013 /// skip any not allowed character or throw an CStringException.
3014 /// https://www.w3.org/TR/xml11/#NT-Char
3015 eXmlEnc_Unsafe_Skip = 1 << 1,
3016 eXmlEnc_Unsafe_Throw = 1 << 2
3017 };
3018 typedef int TXmlEncode; //< bitwise OR of "EXmlEncode"
3019
3020 /// Encode a string for XML.
3021 ///
3022 /// Replace relevant characters by predefined entities.
3023 static string XmlEncode(const CTempString str,
3024 TXmlEncode flags = eXmlEnc_Contents);
3025
3026
3027 /// HTML-decode flags
3028 enum EHtmlEncode {
3029 fHtmlEnc_EncodeAll = 0, ///< Encode all symbols
3030 fHtmlEnc_SkipLiteralEntities = 1 << 1, ///< Skip "&entity;"
3031 fHtmlEnc_SkipNumericEntities = 1 << 2, ///< Skip "&#NNNN;"
3032 fHtmlEnc_SkipEntities = fHtmlEnc_SkipLiteralEntities | fHtmlEnc_SkipNumericEntities,
3033 fHtmlEnc_CheckPreencoded = 1 << 3 ///< Print warning if some pre-encoded
3034 ///< entity found in the string
3035 };
3036 typedef int THtmlEncode; //< bitwise OR of "EHtmlEncode"
3037
3038 /// Encode a string for HTML.
3039 ///
3040 /// Replace relevant characters by predefined entities.
3041 /// @param str
3042 /// Original string in UTF8 encoding.
3043 static string HtmlEncode(const CTempString str,
3044 THtmlEncode flags = fHtmlEnc_EncodeAll);
3045
3046 /// HTML-decode flags
3047 enum EHtmlDecode {
3048 fHtmlDec_CharRef_Entity = 1, ///< Character entity reference(s) was found
3049 fHtmlDec_CharRef_Numeric = 1 << 1, ///< Numeric character reference(s) was found
3050 fHtmlDec_Encoding_Changed = 1 << 2 ///< Character encoding changed
3051 };
3052 typedef int THtmlDecode; //< bitwise OR of "EHtmlDecode"
3053
3054 /// Decode HTML entities and character references.
3055 ///
3056 /// @param str
3057 /// String to be decoded, which contains characters or numeric HTML entities
3058 /// @param encoding
3059 /// Encoding of the input string
3060 /// @return
3061 /// UTF8 encoded string
3062 static string HtmlDecode(const CTempString str,
3063 EEncoding encoding = eEncoding_Unknown,
3064 THtmlDecode* result_flags = NULL);
3065
3066 /// Returns HTML entity name for this symbol if one exists
3067 /// (without leading ampersand and trailing semicolon);
3068 /// or empty string if suitable HTML entity was not found
3069 static string HtmlEntity(TUnicodeSymbol uch);
3070
3071 /// Json-encode flags
3072 enum EJsonEncode {
3073 eJsonEnc_UTF8, ///< Encode all characters above 0x80 to \uXXXX form.
3074 ///< https://tools.ietf.org/html/rfc7159#section-8.1
3075 eJsonEnc_Quoted ///< Quote resulting string. Keep all Unicode symbols as is.
3076 ///< https://tools.ietf.org/html/rfc7159#section-7
3077 };
3078 /// Encode a string for JSON.
3079 ///
3080 /// @param str
3081 /// The string to encode.
3082 /// @param encoding
3083 /// Specifies how to encode string. There are 2 approaches, with representing whole
3084 /// string as UTF-8 encoded string, or leave all Unicode symbols "as is",
3085 /// but the resulting string will be put in double quotes.
3086 /// @return
3087 /// JSON encoded string
3088 static string JsonEncode(const CTempString str, EJsonEncode encoding = eJsonEnc_UTF8);
3089
3090 /// Decode a string encoded by JsonEncode.
3091 ///
3092 /// @param str
3093 /// The string to encode.
3094 /// It must be in double quotes.
3095 /// @param[out] n_read
3096 /// How many symbols the quoted string occupied in the original string.
3097 /// @sa
3098 /// JsonEncode
3099 /// @warning
3100 /// This method only supports strings encoded by JsonEncode-specific encodings.
3101 static string JsonDecode(const CTempString str, size_t* n_read = NULL);
3102
3103 /// Quotes a string in Bourne Again Shell (BASH) syntax, in a way
3104 /// that disallows non-printable characters in the result.
3105 /// This function does NOT implement aesthetically optimal quoting,
3106 /// but does try to avoid redundant quoting in simpler cases.
3107 /// Also, since it implements BASH syntax, the result may be
3108 /// incompatible with Bourne syntax, and may be non-obvious to
3109 /// people who are not familiar with the extended quoting syntax.
3110 /// @note The BASH shell has extensions beyond Bourne Shell quoting.
3111 /// Also, this is very different from C Shell quoting, and
3112 /// MS Windows Command Prompt quoting rules.
3113 static string ShellEncode(const string& str);
3114
3115 /// URL-encode flags
3116 enum EUrlEncode {
3117 eUrlEnc_SkipMarkChars, ///< Do not convert chars like '!', '(' etc.
3118 eUrlEnc_ProcessMarkChars, ///< Convert all non-alphanumeric chars, spaces are converted to '+'
3119 eUrlEnc_PercentOnly, ///< Convert all non-alphanumeric chars including space and '%' to %## format
3120 eUrlEnc_Path, ///< Same as ProcessMarkChars but preserves valid path characters ('/', '.')
3121 eUrlEnc_URIScheme, ///< Encode scheme part of an URI.
3122 eUrlEnc_URIUserinfo, ///< Encode userinfo part of an URI.
3123 eUrlEnc_URIHost, ///< Encode host part of an URI.
3124 eUrlEnc_URIPath, ///< Encode path part of an URI.
3125 eUrlEnc_URIQueryName, ///< Encode query part of an URI, arg name.
3126 eUrlEnc_URIQueryValue, ///< Encode query part of an URI, arg value.
3127 eUrlEnc_URIFragment, ///< Encode fragment part of an URI.
3128 eUrlEnc_Cookie, ///< Same as SkipMarkChars with encoded ','
3129 eUrlEnc_None ///< Do not encode
3130 };
3131 /// URL decode flags
3132 enum EUrlDecode {
3133 eUrlDec_All, ///< Decode '+' to space
3134 eUrlDec_Percent ///< Decode only %XX
3135 };
3136 /// URL-encode string
3137 static string URLEncode(const CTempString str,
3138 EUrlEncode flag = eUrlEnc_SkipMarkChars);
3139
3140 /// SQL encode flags
3141 enum ESqlEncode {
3142 eSqlEnc_Plain, ///< Always produce '...', with no tag.
3143 eSqlEnc_TagNonASCII ///< Produce N'...' when input's not pure ASCII.
3144 };
3145 /// SQL-encode string
3146 ///
3147 /// There are some assumptions/notes about the function:
3148 /// 1. Only for MS SQL and Sybase.
3149 /// 2. Only for string values in WHERE and LIKE clauses.
3150 /// 3. The ' symbol must not be used as an escape symbol in LIKE clause.
3151 /// 4. It must not be used for non-string values.
3152 /// 5. It expects a string without any outer quotes, and
3153 /// it adds single quotes to the returned string.
3154 /// 6. It expects UTF-8 (including its subsets, ASCII and Latin1) or
3155 /// Win1252 string, and the input encoding is preserved.
3156 /// @param str
3157 /// The string to encode
3158 /// @param flag
3159 /// Whether to tag the result with an N prefix if it contains any
3160 /// non-ASCII characters. Such tagging is generally advisable,
3161 /// but off by default per historical practice, since there are
3162 /// corner cases in which it may be inappropriate.
3163 /// @return
3164 /// Encoded string with added outer single quotes
3165 static CStringUTF8 SQLEncode(const CStringUTF8& str, ESqlEncode flag);
3166
SQLEncode(const CStringUTF8 & str)3167 NCBI_DEPRECATED static CStringUTF8 SQLEncode(const CStringUTF8& str)
3168 { return SQLEncode(str, eSqlEnc_Plain); }
3169
3170 /// URL-decode string
3171 static string URLDecode(const CTempString str, EUrlDecode flag = eUrlDec_All);
3172 /// URL-decode string to itself
3173 static void URLDecodeInPlace(string& str, EUrlDecode flag = eUrlDec_All);
3174 /// Check if the string needs the requested URL-encoding
3175 static bool NeedsURLEncoding(const CTempString str, EUrlEncode flag = eUrlEnc_SkipMarkChars);
3176
3177 /// Base64-encode string.
3178 ///
3179 /// @param str
3180 /// The string to encode.
3181 /// @param line_len
3182 /// Specify a length for Base64-encoded lines. Default 0 mean no line breaks at all.
3183 /// @return
3184 /// Encoded string.
3185 /// @sa Base64Decode, BASE64_Encode, BASE64_Deccode
3186 static string Base64Encode(const CTempString str, size_t line_len = 0);
3187
3188 /// Base64-decode string
3189 ///
3190 /// @param str
3191 /// The string to decode.
3192 /// @return
3193 /// Encoded string, or empty line on encoding error.
3194 /// @sa Base64Encode, BASE64_Encode, BASE64_Deccode
3195 static string Base64Decode(const CTempString str);
3196
3197 /// Check if the string contains a valid IP address
3198 static bool IsIPAddress(const CTempStringEx str);
3199
3200
3201 /// How to wrap the words in a string to a new line.
3202 enum EWrapFlags {
3203 fWrap_Hyphenate = 0x1, ///< Add a hyphen when breaking words?
3204 fWrap_HTMLPre = 0x2, ///< Wrap as pre-formatted HTML?
3205 fWrap_FlatFile = 0x4 ///< Wrap for flat file use.
3206 };
3207 typedef int TWrapFlags; ///< Bitwise OR of "EWrapFlags"
3208
3209 /// Wrap the specified string into lines of a specified width.
3210 ///
3211 /// Split string "str" into lines of width "width" and add the
3212 /// resulting lines to the list "arr". Normally, all
3213 /// lines will begin with "prefix" (counted against "width"),
3214 /// but the first line will instead begin with "prefix1" if
3215 /// you supply it.
3216 ///
3217 /// @param str
3218 /// String to be split into wrapped lines.
3219 /// @param width
3220 /// Width of each wrapped line.
3221 /// @param arr
3222 /// List of strings containing wrapped lines.
3223 /// @param flags
3224 /// How to wrap the words to a new line. See EWrapFlags documentation.
3225 /// @param prefix
3226 /// The prefix string added to each wrapped line, except the first line,
3227 /// unless "prefix1" is set.
3228 /// If "prefix" is set to 0(default), do not add a prefix string to the
3229 /// wrapped lines.
3230 /// @param prefix1
3231 /// The prefix string for the first line. Use this for the first line
3232 /// instead of "prefix".
3233 /// If "prefix1" is set to 0(default), do not add a prefix string to the
3234 /// first line.
3235 /// @return
3236 /// Return "arr", the list of wrapped lines.
3237 template<typename _D>
3238 static void WrapIt(const string& str, SIZE_TYPE width,
3239 _D& dest, TWrapFlags flags = 0,
3240 const string* prefix = 0,
3241 const string* prefix1 = 0);
3242
3243 class IWrapDest
3244 {
3245 public:
~IWrapDest()3246 virtual ~IWrapDest() {}
3247 virtual void Append(const string& s) = 0;
3248 virtual void Append(const CTempString& s) = 0;
3249 };
3250
3251 class CWrapDestStringList : public IWrapDest
3252 {
3253 protected:
3254 list<string>& m_list;
3255 public:
CWrapDestStringList(list<string> & l)3256 CWrapDestStringList(list<string>& l) : m_list(l) {};
Append(const string & s)3257 virtual void Append(const string& s)
3258 {
3259 m_list.push_back(s);
3260 }
Append(const CTempString & s)3261 virtual void Append(const CTempString& s)
3262 {
3263 m_list.push_back(NcbiEmptyString);
3264 m_list.back().assign(s.data(), s.length());
3265 }
3266 };
3267
3268 static void Wrap(const string& str, SIZE_TYPE width,
3269 IWrapDest& dest, TWrapFlags flags,
3270 const string* prefix,
3271 const string* prefix1);
3272
3273 static list<string>& Wrap(const string& str, SIZE_TYPE width,
3274 list<string>& arr, TWrapFlags flags = 0,
3275 const string* prefix = 0,
3276 const string* prefix1 = 0);
3277
3278 static list<string>& Wrap(const string& str, SIZE_TYPE width,
3279 list<string>& arr, TWrapFlags flags,
3280 const string& prefix,
3281 const string* prefix1 = 0);
3282
3283 static list<string>& Wrap(const string& str, SIZE_TYPE width,
3284 list<string>& arr, TWrapFlags flags,
3285 const string& prefix,
3286 const string& prefix1);
3287
3288
3289 /// Wrap the list using the specified criteria.
3290 ///
3291 /// WrapList() is similar to Wrap(), but tries to avoid splitting any
3292 /// elements of the list to be wrapped. Also, the "delim" only applies
3293 /// between elements on the same line; if you want everything to end with
3294 /// commas or such, you should add them first.
3295 ///
3296 /// @param l
3297 /// The list to be wrapped.
3298 /// @param width
3299 /// Width of each wrapped line.
3300 /// @param delim
3301 /// Delimiters used to split elements on the same line.
3302 /// @param arr
3303 /// List containing the wrapped list result.
3304 /// @param flags
3305 /// How to wrap the words to a new line. See EWrapFlags documentation.
3306 /// @param prefix
3307 /// The prefix string added to each wrapped line, except the first line,
3308 /// unless "prefix1" is set.
3309 /// If "prefix" is set to 0(default), do not add a prefix string to the
3310 /// wrapped lines.
3311 /// @param prefix1
3312 /// The prefix string for the first line. Use this for the first line
3313 /// instead of "prefix".
3314 /// If "prefix1" is set to 0(default), do not add a prefix string to the
3315 /// first line.
3316 /// @return
3317 /// Return "arr", the wrapped list.
3318 static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
3319 const string& delim, list<string>& arr,
3320 TWrapFlags flags = 0,
3321 const string* prefix = 0,
3322 const string* prefix1 = 0);
3323
3324 static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
3325 const string& delim, list<string>& arr,
3326 TWrapFlags flags,
3327 const string& prefix,
3328 const string* prefix1 = 0);
3329
3330 static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
3331 const string& delim, list<string>& arr,
3332 TWrapFlags flags,
3333 const string& prefix,
3334 const string& prefix1);
3335
3336
3337 /// Justify the specified string into a series of lines of the same width.
3338 ///
3339 /// Split string "str" into a series of lines, all of which are to
3340 /// be "width" characters wide (by adding extra inner spaces between
3341 /// words), and store the resulting lines in the list "par". Normally,
3342 /// all lines in "par" will begin with "pfx" (counted against "width"),
3343 /// but the first line will instead begin with "pfx1" if provided.
3344 ///
3345 /// @note Words exceeding the specified "width" will not be split between
3346 /// lines but occupy individual lines (which will be wider than "width").
3347 ///
3348 /// @param str
3349 /// String to be split into justified lines.
3350 /// @param width
3351 /// Width of every line (except for the last one).
3352 /// @param par
3353 /// Resultant list of justified lines.
3354 /// @param pfx
3355 /// The prefix string added to each line, except for the first line
3356 /// if non-NULL "pfx1" is also set. Empty(or NULL) "pfx" causes no
3357 /// additions.
3358 /// @param pfx1
3359 /// The prefix string for the first line, if non-NULL.
3360 /// @return
3361 /// Return "par", the list of justified lines (a paragraph).
3362 static list<string>& Justify(const CTempString str,
3363 SIZE_TYPE width,
3364 list<string>& par,
3365 const CTempString* pfx = 0,
3366 const CTempString* pfx1 = 0);
3367
3368 static list<string>& Justify(const CTempString str,
3369 SIZE_TYPE width,
3370 list<string>& par,
3371 const CTempString pfx,
3372 const CTempString* pfx1 = 0);
3373
3374 static list<string>& Justify(const CTempString str,
3375 SIZE_TYPE width,
3376 list<string>& par,
3377 const CTempString pfx,
3378 const CTempString pfx1);
3379
3380
3381 /// Search for a field.
3382 ///
3383 /// @param str
3384 /// C or C++ string to search in.
3385 /// @param field_no
3386 /// Zero-based field number.
3387 /// @param delimiters
3388 /// A set of single-character delimiters.
3389 /// @param merge
3390 /// Whether to merge or not adjacent delimiters. Default: not to merge.
3391 /// @return
3392 /// Found field; or empty string if the required field is not found.
3393 /// @note
3394 /// Field 0 spans up to the first-found delimiter or the end-of-string.
3395 static string GetField(const CTempString str,
3396 size_t field_no,
3397 const CTempString delimiters,
3398 EMergeDelims merge = eNoMergeDelims);
3399
3400 /// Search for a field.
3401 ///
3402 /// @param str
3403 /// C or C++ string to search in.
3404 /// @param field_no
3405 /// Zero-based field number.
3406 /// @param delimiter
3407 /// A single-character delimiter.
3408 /// @param merge
3409 /// Whether to merge or not adjacent delimiters. Default: not to merge.
3410 /// @return
3411 /// Found field; or empty string if the required field is not found.
3412 /// @note
3413 /// Field 0 spans up to the delimiter or the end-of-string.
3414 static string GetField(const CTempString str,
3415 size_t field_no,
3416 char delimiter,
3417 EMergeDelims merge = eNoMergeDelims);
3418
3419 /// Search for a field.
3420 /// Avoid memory allocation at the expense of some usage safety.
3421 ///
3422 /// @param str
3423 /// C or C++ string to search in.
3424 /// @param field_no
3425 /// Zero-based field number.
3426 /// @param delimiters
3427 /// A set of single-character delimiters.
3428 /// @param merge
3429 /// Whether to merge or not adjacent delimiters. Default: not to merge.
3430 /// @return
3431 /// Found field; or empty string if the required field is not found.
3432 /// @note
3433 /// Field 0 spans up to the first-found delimiter or the end-of-string.
3434 /// @warning
3435 /// The return value stores a pointer to the input string 'str' so
3436 /// the return object validity time matches lifetime of the input 'str'.
3437 static
3438 CTempString GetField_Unsafe(const CTempString str,
3439 size_t field_no,
3440 const CTempString delimiters,
3441 EMergeDelims merge = eNoMergeDelims);
3442
3443 /// Search for a field.
3444 /// Avoid memory allocation at the expense of some usage safety.
3445 ///
3446 /// @param str
3447 /// C or C++ string to search in.
3448 /// @param field_no
3449 /// Zero-based field number.
3450 /// @param delimiter
3451 /// A single-character delimiter.
3452 /// @param merge
3453 /// Whether to merge or not adjacent delimiters. Default: not to merge.
3454 /// @return
3455 /// Found field; or empty string if the required field is not found.
3456 /// @note
3457 /// Field 0 spans up to the delimiter or the end-of-string.
3458 /// @warning
3459 /// The return value stores a pointer to the input string 'str' so
3460 /// the return object validity time matches lifetime of the input 'str'.
3461 static
3462 CTempString GetField_Unsafe(const CTempString str,
3463 size_t field_no,
3464 char delimiter,
3465 EMergeDelims merge = eNoMergeDelims);
3466
3467 private:
3468 // implementations
3469
3470 // StringToNumeric
3471 static bool x_ReportLimitsError(const CTempString str, TStringToNumFlags flags);
3472
3473 template< typename TNumeric, typename TSource>
x_VerifyIntLimits(TSource v,const CTempString str,TStringToNumFlags flags)3474 static bool x_VerifyIntLimits(TSource v, const CTempString str, TStringToNumFlags flags)
3475 {
3476 if (v < numeric_limits<TNumeric>::min() || v > numeric_limits<TNumeric>::max()) {
3477 return x_ReportLimitsError(str, flags);
3478 }
3479 return true;
3480 }
3481 template< typename TNumeric, typename TSource>
x_VerifyFloatLimits(TSource v,const CTempString str,TStringToNumFlags flags)3482 static bool x_VerifyFloatLimits(TSource v, const CTempString str, TStringToNumFlags flags)
3483 {
3484 // dont use ::min() for float types, it returns positive value
3485 if (v < -numeric_limits<TNumeric>::max() || v > numeric_limits<TNumeric>::max()) {
3486 return x_ReportLimitsError(str, flags);
3487 }
3488 return true;
3489 }
3490
3491 template <typename TNumeric>
3492 static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) < sizeof(int)), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3493 x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3494 {
3495 int n = StringToInt(str, flags, base);
3496 return x_VerifyIntLimits<TNumeric>(n, str, flags) ? (TNumeric)n : 0;
3497 }
3498 template <typename TNumeric>
3499 static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) < sizeof(unsigned int)), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3500 x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3501 {
3502 unsigned int n = StringToUInt(str, flags, base);
3503 return x_VerifyIntLimits<TNumeric>(n, str, flags) ? (TNumeric)n : 0;
3504 }
3505
3506 template <typename TNumeric>
3507 static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(int) && !is_same<TNumeric, long>::value), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3508 x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3509 {
3510 return StringToInt(str, flags, base);
3511 }
3512 template <typename TNumeric>
3513 static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(unsigned int) && !is_same<TNumeric, unsigned long>::value), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3514 x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3515 {
3516 return StringToUInt(str, flags, base);
3517 }
3518 template <typename TNumeric>
3519 static typename enable_if< is_same<TNumeric, long>::value, TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3520 x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3521 {
3522 return StringToLong(str, flags, base);
3523 }
3524 template <typename TNumeric>
3525 static typename enable_if< is_same<TNumeric, unsigned long>::value, TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3526 x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3527 {
3528 return StringToULong(str, flags, base);
3529 }
3530 template <typename TNumeric>
3531 static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(Int8) && !is_same<TNumeric, long>::value), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3532 x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3533 {
3534 return StringToInt8(str, flags, base);
3535 }
3536 template <typename TNumeric>
3537 static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(Uint8) && !is_same<TNumeric, unsigned long>::value), TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3538 x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3539 {
3540 return StringToUInt8(str, flags, base);
3541 }
3542 template <typename TStrictId>
3543 static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, TStrictId>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int base)3544 x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int base)
3545 {
3546 return TStrictId(StringToNumeric<typename TStrictId::TId>(str, flags, base));
3547 }
3548
3549 template <typename TNumeric>
3550 static typename enable_if< is_same<TNumeric, float>::value, TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int)3551 x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int /*base*/)
3552 {
3553 double n = StringToDouble(str, flags);
3554 return x_VerifyFloatLimits<TNumeric>(n, str, flags) ? (TNumeric)n : 0;
3555 }
3556 template <typename TNumeric>
3557 static typename enable_if< is_same<TNumeric, double>::value, TNumeric>::type
x_StringToNumeric(const CTempString str,TStringToNumFlags flags,int)3558 x_StringToNumeric(const CTempString str, TStringToNumFlags flags, int /*base*/)
3559 {
3560 return StringToDouble(str, flags);
3561 }
3562
3563 template <typename TNumeric>
3564 static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) < sizeof(int)), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3565 x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3566 {
3567 int n = StringToInt(str, flags, base);
3568 *value = 0;
3569 if (( !n && errno ) || !x_VerifyIntLimits<TNumeric>(n, str, flags)) {
3570 return false;
3571 }
3572 *value = (TNumeric) n;
3573 return true;
3574 }
3575 template <typename TNumeric>
3576 static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) < sizeof(unsigned int)), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3577 x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3578 {
3579 unsigned int n = StringToUInt(str, flags, base);
3580 *value = 0;
3581 if (( !n && errno ) || !x_VerifyIntLimits<TNumeric>(n, str, flags)) {
3582 return false;
3583 }
3584 *value = (TNumeric) n;
3585 return true;
3586 }
3587 template <typename TNumeric>
3588 static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(int) && !is_same<TNumeric, long>::value), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3589 x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3590 {
3591 *value = StringToInt(str, flags, base);
3592 return (*value || !errno);
3593 }
3594 template <typename TNumeric>
3595 static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(unsigned int) && !is_same<TNumeric, unsigned long>::value), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3596 x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3597 {
3598 *value = StringToUInt(str, flags, base);
3599 return (*value || !errno);
3600 }
3601 static bool
x_StringToNumeric(const CTempString str,long * value,TStringToNumFlags flags,int base)3602 x_StringToNumeric(const CTempString str, long* value, TStringToNumFlags flags, int base)
3603 {
3604 *value = StringToLong(str, flags, base);
3605 return (*value || !errno);
3606 }
3607 static bool
x_StringToNumeric(const CTempString str,unsigned long * value,TStringToNumFlags flags,int base)3608 x_StringToNumeric(const CTempString str, unsigned long* value, TStringToNumFlags flags, int base)
3609 {
3610 *value = StringToULong(str, flags, base);
3611 return (*value || !errno);
3612 }
3613 template <typename TNumeric>
3614 static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(Int8) && !is_same<TNumeric, long>::value), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3615 x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3616 {
3617 *value = StringToInt8(str, flags, base);
3618 return (*value || !errno);
3619 }
3620 template <typename TNumeric>
3621 static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(Uint8) && !is_same<TNumeric, unsigned long>::value), bool>::type
x_StringToNumeric(const CTempString str,TNumeric * value,TStringToNumFlags flags,int base)3622 x_StringToNumeric(const CTempString str, TNumeric* value, TStringToNumFlags flags, int base)
3623 {
3624 *value = StringToUInt8(str, flags, base);
3625 return (*value || !errno);
3626 }
3627 static bool
x_StringToNumeric(const CTempString str,float * value,TStringToNumFlags flags,int)3628 x_StringToNumeric(const CTempString str, float* value, TStringToNumFlags flags, int /*base*/)
3629 {
3630 double n = StringToDouble(str, flags);
3631 *value = 0;
3632 if (( !n && errno ) || !x_VerifyFloatLimits<float>(n, str, flags)) {
3633 return false;
3634 }
3635 *value = (float) n;
3636 return true;
3637 }
3638 static bool
x_StringToNumeric(const CTempString str,double * value,TStringToNumFlags flags,int)3639 x_StringToNumeric(const CTempString str, double* value, TStringToNumFlags flags, int /*base*/)
3640 {
3641 *value = StringToDouble(str, flags);
3642 return (*value || !errno);
3643 }
3644 template <typename TStrictId>
3645 static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, bool>::type
x_StringToNumeric(const CTempString str,TStrictId * value,TStringToNumFlags flags,int base)3646 x_StringToNumeric(const CTempString str, TStrictId* value, TStringToNumFlags flags, int base)
3647 {
3648 return x_StringToNumeric(str, &value->Set(), flags, base);
3649 }
3650
3651 // NumericToString
3652 template<typename TNumeric>
3653 static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) <= sizeof(int) && !is_same<TNumeric, long>::value), void>::type
x_NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags,int base)3654 x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3655 {
3656 IntToString(out_str, value, flags, base);
3657 }
3658 template<typename TNumeric>
3659 static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) <= sizeof(unsigned int) && !is_same<TNumeric, unsigned long>::value), void>::type
x_NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags,int base)3660 x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3661 {
3662 UIntToString(out_str, value, flags, base);
3663 }
3664 static void
x_NumericToString(string & out_str,long value,TNumToStringFlags flags,int base)3665 x_NumericToString(string& out_str, long value, TNumToStringFlags flags, int base)
3666 {
3667 LongToString(out_str, value, flags, base);
3668 }
3669 static void
x_NumericToString(string & out_str,unsigned long value,TNumToStringFlags flags,int base)3670 x_NumericToString(string& out_str, unsigned long value, TNumToStringFlags flags, int base)
3671 {
3672 ULongToString(out_str, value, flags, base);
3673 }
3674 #if NCBI_COMPILER_MSVC && (_MSC_VER < 1900)
3675 static void
x_NumericToString(string & out_str,Int8 value,TNumToStringFlags flags,int base)3676 x_NumericToString(string& out_str, Int8 value, TNumToStringFlags flags, int base)
3677 {
3678 Int8ToString(out_str, value, flags, base);
3679 }
3680 static void
x_NumericToString(string & out_str,Uint8 value,TNumToStringFlags flags,int base)3681 x_NumericToString(string& out_str, Uint8 value, TNumToStringFlags flags, int base)
3682 {
3683 UInt8ToString(out_str, value, flags, base);
3684 }
3685 #endif
3686 template<typename TNumeric>
3687 static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(Int8) && !is_same<TNumeric, long>::value), void>::type
x_NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags,int base)3688 x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3689 {
3690 Int8ToString(out_str, value, flags, base);
3691 }
3692 template<typename TNumeric>
3693 static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(Uint8) && !is_same<TNumeric, unsigned long>::value), void>::type
x_NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags,int base)3694 x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3695 {
3696 UInt8ToString(out_str, value, flags, base);
3697 }
3698 template<typename TNumeric>
3699 static typename enable_if< is_floating_point<TNumeric>::value, void>::type
x_NumericToString(string & out_str,TNumeric value,TNumToStringFlags flags,int)3700 x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int /*base*/)
3701 {
3702 DoubleToString(out_str, value, -1, flags);
3703 }
3704 template <typename TStrictId>
3705 static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, void>::type
x_NumericToString(string & out_str,TStrictId value,TNumToStringFlags flags,int base)3706 x_NumericToString(string& out_str, TStrictId value, TNumToStringFlags flags, int base)
3707 {
3708 return x_NumericToString(out_str, value.Get(), flags, base);
3709 }
3710
3711
3712 // Join
3713 template<typename TIterator>
3714 static string xx_Join( TIterator from, TIterator to, const CTempString& delim);
3715
3716 template<typename TIterator>
3717 static typename enable_if<is_same<typename TIterator::iterator_category, input_iterator_tag>::value &&
3718 is_convertible<typename TIterator::value_type, string>::value, string>::type
x_Join(TIterator from,TIterator to,const CTempString & delim)3719 x_Join( TIterator from, TIterator to, const CTempString& delim)
3720 {
3721 return TransformJoin(from, to, delim, [](const typename TIterator::value_type& i){ return i;});
3722 }
3723
3724 template<typename TIterator>
3725 static typename enable_if<is_convertible<typename TIterator::iterator_category, forward_iterator_tag>::value &&
3726 is_convertible<typename TIterator::value_type, string>::value, string>::type
x_Join(TIterator from,TIterator to,const CTempString & delim)3727 x_Join( TIterator from, TIterator to, const CTempString& delim)
3728 {
3729 return xx_Join(from, to, delim);
3730 }
3731
3732 template<typename TValue>
3733 static typename enable_if<is_convertible<TValue, string>::value, string>::type
x_Join(TValue * from,TValue * to,const CTempString & delim)3734 x_Join( TValue* from, TValue* to, const CTempString& delim)
3735 {
3736 return xx_Join(from, to, delim);
3737 }
3738
3739 template<typename TIterator>
3740 static typename enable_if<is_convertible<typename TIterator::iterator_category, input_iterator_tag>::value &&
3741 is_arithmetic< typename TIterator::value_type>::value, string>::type
x_Join(TIterator from,TIterator to,const CTempString & delim)3742 x_Join( TIterator from, TIterator to, const CTempString& delim)
3743 {
3744 return TransformJoin( from, to, delim, [](const typename TIterator::value_type& i){ return NumericToString(i);});
3745 }
3746
3747 template<typename TValue>
3748 static typename enable_if<is_arithmetic<TValue>::value, string>::type
x_Join(TValue * from,TValue * to,const CTempString & delim)3749 x_Join( TValue* from, TValue* to, const CTempString& delim)
3750 {
3751 return TransformJoin( from, to, delim, [](const TValue& i){ return NumericToString(i);});
3752 }
3753 }; // class NStr
3754
3755
3756
3757 /////////////////////////////////////////////////////////////////////////////
3758 ///
3759
3760
3761 #define NCBITOOLKIT_USE_LONG_UCS4 (SIZEOF_LONG == 4)
3762 #if NCBITOOLKIT_USE_LONG_UCS4
3763 /// UCS-4 character
3764 typedef unsigned long TCharUCS4;
3765 /// UCS-4 string
3766 typedef basic_string<TCharUCS4> TStringUCS4;
3767 #else
3768 typedef TUnicodeSymbol TCharUCS4;
3769 typedef TStringUnicode TStringUCS4;
3770 #endif
3771
3772 /// Type for character in UCS-2 encoding
3773 typedef Uint2 TCharUCS2;
3774 /// Type for string in UCS-2 encoding
3775 typedef basic_string<TCharUCS2> TStringUCS2;
3776
3777
3778 /// Operator for writing TStringUCS2 to stream.
3779 /// Operator is needed for using in SDBAPI.
operator <<(CNcbiOstream & os,const TStringUCS2 & str)3780 inline CNcbiOstream& operator<< (CNcbiOstream& os, const TStringUCS2& str)
3781 {
3782 os.write((const char*)str.data(), str.size() * sizeof(TCharUCS2));
3783 return os;
3784 }
3785
3786
3787
3788 /////////////////////////////////////////////////////////////////////////////
3789 ///
3790 /// CUtf8 --
3791 ///
3792 /// Utility class to handle strings in UTF8 encoding.
3793 /// Can convert data to and from the following encodings:
3794 /// ISO 8859-1 (Latin1)
3795 /// Microsoft Windows code page 1252
3796 /// UCS-2, UCS-4 (no surrogates)
3797
3798 class NCBI_XNCBI_EXPORT CUtf8
3799 {
3800 public:
3801 /// How to verify character encoding of the source data
3802 enum EValidate {
3803 eNoValidate,
3804 eValidate
3805 };
3806
3807 /// Convert into UTF8 from a C/C++ string
3808 ///
3809 /// @param src
3810 /// Source string
3811 /// @param encoding
3812 /// Character encoding of the source string
3813 /// @param validate
3814 /// Verify the character encoding of the source
AsUTF8(const CTempString & src,EEncoding encoding,EValidate validate=eNoValidate)3815 static CStringUTF8 AsUTF8(const CTempString& src,
3816 EEncoding encoding,
3817 EValidate validate = eNoValidate)
3818 {
3819 CStringUTF8 u8;
3820 return x_Append(u8,src,encoding,validate);
3821 }
3822
3823 #if defined(HAVE_WSTRING)
3824 /// Convert into UTF8 from a C/C++ string
3825 ///
3826 /// @param src
3827 /// Source string
3828 /// @param lcl
3829 /// String locale
AsUTF8(const CTempString & src,const locale & lcl)3830 static CStringUTF8 AsUTF8(const CTempString& src, const locale& lcl)
3831 {
3832 CStringUTF8 u8;
3833 return x_Append(u8,src,lcl);
3834 }
3835 #endif
3836
3837 /// Convert into UTF8 from a Unicode C++ string
3838 ///
3839 /// @param src
3840 /// Source string
3841 /// @attention
3842 /// Only for TStringUnicode, TStringUCS4, TStringUCS2, wstring types
3843 template <typename TChar>
3844 static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), CStringUTF8>::type
AsUTF8(const basic_string<TChar> & src)3845 AsUTF8(const basic_string<TChar>& src)
3846 {
3847 CStringUTF8 u8;
3848 return x_Append(u8, src.data(), src.size());
3849 }
3850
3851 /// Convert into UTF8 from a Unicode character buffer
3852 ///
3853 /// @param src
3854 /// Source character buffer
3855 /// @param tchar_count
3856 /// Number of characters in the buffer;
3857 /// If it equals to NPOS, buffer is assumed to be zero-terminated
3858 template <typename TChar>
3859 static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), CStringUTF8>::type
AsUTF8(const TChar * src,SIZE_TYPE tchar_count=NPOS)3860 AsUTF8(const TChar* src, SIZE_TYPE tchar_count = NPOS)
3861 {
3862 CStringUTF8 u8;
3863 return x_Append(u8, src, tchar_count);
3864 }
3865
3866 /// Convert Unicode C++ string into UTF8 and append it to existing string
3867 ///
3868 /// @param dest
3869 /// Existing UTF8 string
3870 /// @param src
3871 /// Source Unicode string
3872 /// return
3873 /// reference to modified dest string
3874 template <typename TChar>
3875 static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), CStringUTF8& >::type
AppendAsUTF8(CStringUTF8 & dest,const basic_string<TChar> & src)3876 AppendAsUTF8(CStringUTF8& dest, const basic_string<TChar>& src)
3877 {
3878 return x_Append(dest, src.data(), src.size());
3879 }
3880
3881 /// Convert Unicode character buffer into UTF8 and append it to existing string
3882 ///
3883 /// @param dest
3884 /// Existing UTF8 string
3885 /// @param src
3886 /// Source Unicode character buffer
3887 /// @param tchar_count
3888 /// Number of characters in the buffer;
3889 /// If it equals to NPOS, buffer is assumed to be zero-terminated
3890 /// return
3891 /// reference to modified dest string
3892 template <typename TChar>
3893 static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), CStringUTF8& >::type
AppendAsUTF8(CStringUTF8 & dest,const TChar * src,SIZE_TYPE tchar_count=NPOS)3894 AppendAsUTF8(CStringUTF8& dest, const TChar* src, SIZE_TYPE tchar_count = NPOS)
3895 {
3896 return x_Append(dest, src, tchar_count);
3897 }
3898
3899 /// Convert Unicode symbol into UTF8 and append it to existing string
3900 ///
3901 /// @param dest
3902 /// Existing UTF8 string
3903 /// @param ch
3904 /// Unicode symbol
3905 /// return
3906 /// reference to modified dest string
3907 template <typename TChar>
3908 static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), CStringUTF8& >::type
AppendAsUTF8(CStringUTF8 & dest,TChar ch)3909 AppendAsUTF8(CStringUTF8& dest, TChar ch)
3910 {
3911 return x_Append(dest, &ch, 1);
3912 }
3913
3914 /// Convert non-Unicode C++ string into UTF8 and append it to existing string
3915 ///
3916 /// @param dest
3917 /// Existing UTF8 string
3918 /// @param src
3919 /// Source string
3920 /// @param encoding
3921 /// Character encoding of the source string
3922 /// @param validate
3923 /// Verify the character encoding of the source
3924 /// return
3925 /// reference to modified dest string
AppendAsUTF8(CStringUTF8 & dest,const CTempString & src,EEncoding encoding,EValidate validate=eNoValidate)3926 static CStringUTF8& AppendAsUTF8(CStringUTF8& dest,
3927 const CTempString& src,
3928 EEncoding encoding,
3929 EValidate validate = eNoValidate)
3930 {
3931 return x_Append(dest,src,encoding,validate);
3932 }
3933
3934 #if defined(HAVE_WSTRING)
3935 /// Convert non-Unicode C++ string into UTF8 and append it to existing string
3936 ///
3937 /// @param dest
3938 /// Existing UTF8 string
3939 /// @param src
3940 /// Source string
3941 /// @param lcl
3942 /// Source string locale
3943 /// return
3944 /// reference to modified dest string
AppendAsUTF8(CStringUTF8 & dest,const CTempString & src,const locale & lcl)3945 static CStringUTF8& AppendAsUTF8(CStringUTF8& dest,
3946 const CTempString& src,
3947 const locale& lcl)
3948 {
3949 return x_Append(dest,src,lcl);
3950 }
3951 #endif
3952
3953 /// Convert non-Unicode character into UTF8 and append it to existing string
3954 ///
3955 /// @param dest
3956 /// Existing UTF8 string
3957 /// @param ch
3958 /// Character
3959 /// @param encoding
3960 /// Character encoding
3961 /// @param validate
3962 /// Verify the character encoding of the source
3963 /// return
3964 /// reference to modified dest string
AppendAsUTF8(CStringUTF8 & dest,char ch,EEncoding encoding,EValidate validate=eNoValidate)3965 static CStringUTF8& AppendAsUTF8(CStringUTF8& dest,
3966 char ch,
3967 EEncoding encoding,
3968 EValidate validate = eNoValidate)
3969 {
3970 return x_Append(dest,CTempString(&ch,1),encoding,validate);
3971 }
3972
3973 #if defined(HAVE_WSTRING)
3974 /// Convert non-Unicode character into UTF8 and append it to existing string
3975 ///
3976 /// @param dest
3977 /// Existing UTF8 string
3978 /// @param ch
3979 /// Character
3980 /// @param lcl
3981 /// Character locale
3982 /// return
3983 /// reference to modified dest string
AppendAsUTF8(CStringUTF8 & dest,char ch,const locale & lcl)3984 static CStringUTF8& AppendAsUTF8(CStringUTF8& dest,
3985 char ch,
3986 const locale& lcl)
3987 {
3988 return x_Append(dest,CTempString(&ch,1),lcl);
3989 }
3990 #endif
3991
3992 /// Convert UTF8 string into a single-byte character representation
3993 ///
3994 /// Can throw a CStringException if the conversion is impossible
3995 /// or the string has invalid UTF-8 encoding.
3996 ///
3997 /// @param src
3998 /// Source UTF8 string
3999 /// @param encoding
4000 /// Encoding of the result
4001 /// @param substitute_on_error
4002 /// If the conversion is impossible, append the provided string
4003 /// or, if substitute_on_error equals 0, throw an exception
4004 /// @param validate
4005 /// Verify UTF8 character encoding of the source
4006 /// @return
4007 /// C++ string
4008 static string AsSingleByteString
4009 (const CTempString& src, EEncoding encoding,
4010 const char* substitute_on_error = 0, EValidate validate = eNoValidate);
4011
4012 #if defined(HAVE_WSTRING)
4013 static string AsSingleByteString
4014 (const CTempString& src, const locale& lcl,
4015 const char* substitute_on_error = 0, EValidate validate = eNoValidate);
4016 #endif
4017
4018 /// Convert UTF8 string into Unicode
4019 ///
4020 /// Can throw a CStringException if the conversion is impossible
4021 /// or the string has invalid UTF-8 encoding.
4022 ///
4023 /// @param src
4024 /// Source UTF8 string
4025 /// @param substitute_on_error
4026 /// If the conversion is impossible, append the provided string
4027 /// or, if substitute_on_error equals 0, throw an exception
4028 /// @param validate
4029 /// Verify UTF8 character encoding of the source
4030 /// @attention
4031 /// Only for TStringUnicode, TStringUCS4, TStringUCS2, wstring types
4032 template <typename TChar>
4033 static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), basic_string<TChar> >::type
AsBasicString(const CTempString & src,const TChar * substitute_on_error,EValidate validate=eNoValidate)4034 AsBasicString(const CTempString& src, const TChar* substitute_on_error, EValidate validate = eNoValidate)
4035 {
4036 return x_AsBasicString(src,substitute_on_error,validate);
4037 }
4038
4039 template <typename TChar>
4040 static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), basic_string<TChar> >::type
AsBasicString(const CTempString & src)4041 AsBasicString(const CTempString& src)
4042 {
4043 return x_AsBasicString<TChar>(src,nullptr,eNoValidate);
4044 }
4045
4046 /// Get the number of symbols (code points) in UTF8 string
4047 ///
4048 /// @param src
4049 /// Source UTF8 string
4050 /// @return
4051 /// Number of symbols (code points)
4052 static SIZE_TYPE GetSymbolCount(const CTempString& src);
4053
4054 /// Get the number of valid UTF-8 symbols (code points) in buffer
4055 ///
4056 /// @param src
4057 /// Character buffer
4058 /// @return
4059 /// Number of valid symbols (no exception thrown)
4060 static SIZE_TYPE GetValidSymbolCount(const CTempString& src);
4061
4062 /// Get the number of valid UTF-8 bytes (code units) in buffer
4063 ///
4064 /// @param src
4065 /// Character buffer
4066 /// @return
4067 /// Number of valid bytes (no exception thrown)
4068 static SIZE_TYPE GetValidBytesCount(const CTempString& src);
4069
4070 /// Check buffer for presence of UTF-8 byte sequence and return length of first symbol
4071 ///
4072 /// @param src
4073 /// Character buffer
4074 /// @return
4075 /// Number of bytes
4076 static SIZE_TYPE EvaluateSymbolLength(const CTempString& src);
4077
4078 /// Check that the character is valid first byte of an UTF8 byte sequence
4079 ///
4080 /// @param ch
4081 /// Character
4082 /// @param more
4083 /// Number of additional bytes to expect
4084 /// @return
4085 /// true, if this is a valid first byte
EvaluateFirst(char ch,SIZE_TYPE & more)4086 static bool EvaluateFirst(char ch, SIZE_TYPE& more) {
4087 return x_EvalFirst(ch, more);
4088 }
4089
4090 /// Check that the character is valid continuation byte of an UTF8 byte sequence
4091 ///
4092 /// @param ch
4093 /// Character
4094 /// @return
4095 /// true, if this is a valid byte
EvaluateNext(char ch)4096 static bool EvaluateNext(char ch) {
4097 return x_EvalNext(ch);
4098 }
4099
4100 /// Check the encoding of the C/C++ string
4101 ///
4102 /// Check that the encoding of the source is the same, or
4103 /// is compatible with the specified one
4104 /// @param src
4105 /// Source string
4106 /// @param encoding
4107 /// Character encoding form to check against
4108 /// @return
4109 /// Boolean result: encoding is same or compatible
4110 static bool MatchEncoding(const CTempString& src, EEncoding encoding);
4111
4112 /// Guess the encoding of the C/C++ string
4113 ///
4114 /// It can distinguish between UTF-8, Latin1, and Win1252 only
4115 /// @param src
4116 /// Character buffer
4117 /// @return
4118 /// Encoding as guessed; eEncoding_Unknown if cannot guess
4119 static EEncoding GuessEncoding(const CTempString& src);
4120
4121 /// Give Encoding name as string
4122 ///
4123 /// @param encoding
4124 /// EEncoding enum. (Throw CStringException if passed eEncoding_Unknown.)
4125 /// @return
4126 /// Encoding name
4127 static string EncodingToString(EEncoding encoding);
4128
4129 /// Convert encoding name into EEncoding enum, taking into account synonyms
4130 /// as per http://www.iana.org/assignments/character-sets
4131 ///
4132 /// @param encoding_name
4133 /// Name of the encoding
4134 /// @return
4135 /// EEncoding enum; eEncoding_Unknown for unsupported encodings
4136 static EEncoding StringToEncoding(const CTempString& encoding_name);
4137
4138 /// Convert encoded character into Unicode
4139 ///
4140 /// @param ch
4141 /// Encoded character
4142 /// @param encoding
4143 /// Character encoding
4144 /// @return
4145 /// Unicode code point (symbol)
4146 static TUnicodeSymbol CharToSymbol(char ch, EEncoding encoding);
4147
4148 #if defined(HAVE_WSTRING)
4149 /// Convert encoded character into Unicode
4150 ///
4151 /// @param ch
4152 /// Encoded character
4153 /// @param lcl
4154 /// Character locale
4155 /// @return
4156 /// Unicode code point (symbol)
4157 static TUnicodeSymbol CharToSymbol(char ch, const locale& lcl);
4158 #endif
4159
4160 /// Convert Unicode code point into encoded character
4161 ///
4162 /// @param sym
4163 /// Unicode code point (symbol)
4164 /// @param encoding
4165 /// Character encoding
4166 /// @return
4167 /// Encoded character
4168 static char SymbolToChar(TUnicodeSymbol sym, EEncoding encoding);
4169
4170 #if defined(HAVE_WSTRING)
4171 /// Convert Unicode code point into encoded character
4172 ///
4173 /// @param sym
4174 /// Unicode code point (symbol)
4175 /// @param lcl
4176 /// Character locale
4177 /// @return
4178 /// Encoded character
4179 static char SymbolToChar(TUnicodeSymbol sym, const locale& lcl);
4180 #endif
4181
4182 /// Determines if a symbol is whitespace
4183 /// per http://unicode.org/charts/uca/chart_Whitespace.html
4184 ///
4185 /// @param sym
4186 /// Unicode code point (symbol)
4187 /// @sa
4188 /// TruncateSpacesInPlace, TruncateSpaces_Unsafe, TruncateSpaces
4189 static bool IsWhiteSpace(TUnicodeSymbol sym);
4190
4191 /// Truncate spaces in the string (in-place)
4192 ///
4193 /// @param src
4194 /// UTF8 string
4195 /// @param side
4196 /// Which end of the string to truncate spaces from. Default is to
4197 /// truncate spaces from both ends.
4198 /// @return
4199 /// Reference to src
4200 /// @sa
4201 /// IsWhiteSpace, TruncateSpaces_Unsafe, TruncateSpaces
4202 static CStringUTF8& TruncateSpacesInPlace
4203 (CStringUTF8& str, NStr::ETrunc side = NStr::eTrunc_Both);
4204
4205 /// Truncate spaces in the string
4206 ///
4207 /// @param str
4208 /// Source string, in UTF8 encoding
4209 /// @param side
4210 /// Which end of the string to truncate spaces from. Default is to
4211 /// truncate spaces from both ends.
4212 /// @sa
4213 /// IsWhiteSpace, TruncateSpacesInPlace, TruncateSpaces_Unsafe
4214 static CStringUTF8 TruncateSpaces
4215 (const CTempString& str, NStr::ETrunc side = NStr::eTrunc_Both);
4216
4217 /// Truncate spaces in the string
4218 ///
4219 /// @param str
4220 /// Source string, in UTF8 encoding
4221 /// @param side
4222 /// Which end of the string to truncate spaces from. Default is to
4223 /// truncate spaces from both ends.
4224 /// @attention
4225 /// The lifespan of the result string is the same as one of the source.
4226 /// So, for example, if the source is temporary string, then the result
4227 /// will be invalid right away (will point to already released memory).
4228 /// @sa
4229 /// IsWhiteSpace, TruncateSpacesInPlace, TruncateSpaces
4230 static CTempString TruncateSpaces_Unsafe
4231 (const CTempString& str, NStr::ETrunc side = NStr::eTrunc_Both);
4232
4233 /// Convert sequence of UTF8 code units into Unicode code point
4234 ///
4235 /// @param src
4236 /// Zero-terminated buffer, in UTF8 encoding
4237 /// @return
4238 /// Unicode code point
4239 static TUnicodeSymbol Decode(const char*& src);
4240
4241 #ifndef NCBI_COMPILER_WORKSHOP
4242 /// Convert sequence of UTF8 code units into Unicode code point
4243 ///
4244 /// @param src
4245 /// C++ string iterator
4246 /// @return
4247 /// Unicode code point
4248 static TUnicodeSymbol Decode(string::const_iterator& src);
4249 #endif
4250
4251 /// Begin converting first character of UTF8 sequence into Unicode
4252 ///
4253 /// @param ch
4254 /// Character
4255 /// @param more
4256 /// If the character is valid, - how many more characters to expect
4257 /// @return
4258 /// Part of Unicode code point. Zero if the character is invalid.
4259 static TUnicodeSymbol DecodeFirst(char ch, SIZE_TYPE& more);
4260
4261 /// Convert next character of UTF8 sequence into Unicode
4262 ///
4263 /// @param ch
4264 /// Character
4265 /// @param chU
4266 /// Incomplete Unicode code point
4267 /// @return
4268 /// Accumulated Unicode code point. Zero if the character is invalid.
4269 static TUnicodeSymbol DecodeNext(TUnicodeSymbol chU, char ch);
4270
4271 private:
4272 static void x_Validate(const CTempString& str);
4273
4274 static SIZE_TYPE x_GetValidSymbolCount
4275 (const CTempString& src, CTempString::const_iterator& err);
4276
4277 static CStringUTF8& x_AppendChar(CStringUTF8& u8str, TUnicodeSymbol ch);
4278
4279 static CStringUTF8& x_Append(CStringUTF8& u8str, const CTempString& src,
4280 EEncoding encoding, EValidate validate);
4281 #if defined(HAVE_WSTRING)
4282 static CStringUTF8& x_Append(CStringUTF8& u8str, const CTempString& src, const locale& lcl);
4283 #endif
4284 template <typename TChar>
4285 static CStringUTF8& x_Append(CStringUTF8& u8str, const TChar* src, SIZE_TYPE tchar_count);
4286
4287 template <typename TChar>
4288 static basic_string<TChar> x_AsBasicString
4289 (const CTempString& src,
4290 const TChar* substitute_on_error, EValidate validate);
4291
4292 template <typename TIterator>
4293 static TUnicodeSymbol x_Decode(TIterator& src);
4294
4295 static SIZE_TYPE x_BytesNeeded(TUnicodeSymbol ch);
4296 static bool x_EvalFirst(char ch, SIZE_TYPE& more);
4297 static bool x_EvalNext(char ch);
4298
4299 // returns part of the string around an error in Utf8 encoding
4300 static CTempString x_GetErrorFragment(const CTempString& src);
4301
4302 friend class CStringUTF8_DEPRECATED;
4303 };
4304
4305 // deprecated CStringUTF8 is there
4306 #include <corelib/impl/stringutf8_deprecated.hpp>
4307
4308
4309
4310 /////////////////////////////////////////////////////////////////////////////
4311 ///
4312 /// CParseTemplException --
4313 ///
4314 /// Define template class for parsing exception. This class is used to define
4315 /// exceptions for complex parsing tasks and includes an additional m_Pos
4316 /// data member. The constructor requires that an additional positional
4317 /// parameter be supplied along with the description message.
4318
4319 template <class TBase>
4320 class CParseTemplException : EXCEPTION_VIRTUAL_BASE public TBase
4321 {
4322 public:
4323 /// Error types that for exception class.
4324 enum EErrCode {
4325 eErr ///< Generic error
4326 };
4327
4328 /// Translate from the error code value to its string representation.
GetErrCodeString(void) const4329 virtual const char* GetErrCodeString(void) const override
4330 {
4331 switch (GetErrCode()) {
4332 case eErr: return "eErr";
4333 default: return CException::GetErrCodeString();
4334 }
4335 }
4336
4337 /// Constructor.
4338 ///
4339 /// Report "pos" along with "what".
CParseTemplException(const CDiagCompileInfo & info,const CException * prev_exception,EErrCode err_code,const string & message,string::size_type pos,EDiagSev severity=eDiag_Error)4340 CParseTemplException(const CDiagCompileInfo &info,
4341 const CException* prev_exception,
4342 EErrCode err_code,const string& message,
4343 string::size_type pos, EDiagSev severity = eDiag_Error)
4344 : TBase(info, prev_exception, message, severity, 0), m_Pos(pos)
4345 {
4346 this->x_Init(info,
4347 string("{") + NStr::SizetToString(m_Pos) +
4348 "} " + message,
4349 prev_exception,
4350 severity);
4351 this->x_InitErrCode((CException::EErrCode) err_code);
4352 }
4353
4354 /// Constructor.
CParseTemplException(const CParseTemplException<TBase> & other)4355 CParseTemplException(const CParseTemplException<TBase>& other)
4356 : TBase(other)
4357 {
4358 m_Pos = other.m_Pos;
4359 this->x_Assign(other);
4360 }
4361
4362 /// Destructor.
~CParseTemplException(void)4363 virtual ~CParseTemplException(void) throw() {}
4364
4365 /// Report error position.
ReportExtra(ostream & out) const4366 virtual void ReportExtra(ostream& out) const override
4367 {
4368 out << "m_Pos = " << (unsigned long)m_Pos;
4369 }
4370
4371 // Attributes.
4372
4373 /// Get exception class type.
GetType(void) const4374 virtual const char* GetType(void) const override
4375 { return "CParseTemplException"; }
4376
4377 typedef int TErrCode;
4378 /// Get error code.
GetErrCode(void) const4379 TErrCode GetErrCode(void) const
4380 {
4381 return typeid(*this) == typeid(CParseTemplException<TBase>) ?
4382 (TErrCode) this->x_GetErrCode() :
4383 (TErrCode) CException::eInvalid;
4384 }
4385
4386 /// Get error position.
GetPos(void) const4387 string::size_type GetPos(void) const throw() { return m_Pos; }
4388
4389 protected:
CParseTemplException(const CDiagCompileInfo & info,const CException * prev_exception,const string & message,string::size_type pos,EDiagSev severity,CException::TFlags flags)4390 CParseTemplException(const CDiagCompileInfo &info,
4391 const CException* prev_exception,
4392 const string& message,
4393 string::size_type pos, EDiagSev severity, CException::TFlags flags)
4394 : TBase(info, prev_exception, message, severity, flags), m_Pos(pos)
4395 {
4396 this->x_Init(info,
4397 string("{") + NStr::SizetToString(m_Pos) +
4398 "} " + message,
4399 prev_exception,
4400 severity);
4401 }
4402 /// Constructor.
CParseTemplException(void)4403 CParseTemplException(void)
4404 {
4405 m_Pos = 0;
4406 }
4407
4408 /// Helper clone method.
x_Clone(void) const4409 virtual const CException* x_Clone(void) const override
4410 {
4411 return new CParseTemplException<TBase>(*this);
4412 }
4413
4414 private:
4415 string::size_type m_Pos; ///< Error position
4416 };
4417
4418
4419 /////////////////////////////////////////////////////////////////////////////
4420 ///
4421 /// CStringException --
4422 ///
4423 /// Define exceptions generated by string classes.
4424 ///
4425 /// CStringException inherits its basic functionality from
4426 /// CParseTemplException<CCoreException> and defines additional error codes
4427 /// for string parsing.
4428
4429 class NCBI_XNCBI_EXPORT CStringException : public CParseTemplException<CCoreException>
4430 {
4431 public:
4432 /// Error types that string classes can generate.
4433 enum EErrCode {
4434 eConvert, ///< Failure to convert string
4435 eBadArgs, ///< Bad arguments to string methods
4436 eFormat ///< Wrong format for any input to string methods
4437 };
4438
4439 /// Translate from the error code value to its string representation.
4440 virtual const char* GetErrCodeString(void) const override;
4441
4442 // Standard exception boilerplate code.
4443 NCBI_EXCEPTION_DEFAULT2(CStringException,
4444 CParseTemplException<CCoreException>, std::string::size_type);
4445 };
4446
4447
4448
4449 /////////////////////////////////////////////////////////////////////////////
4450 ///
4451 /// CStringPairsParser --
4452 ///
4453 /// Base class for parsing a string to a set of name-value pairs.
4454
4455
4456 /// Decoder interface. Names and values can be decoded with different rules.
4457 class IStringDecoder
4458 {
4459 public:
4460 /// Type of string to be decoded
4461 enum EStringType {
4462 eName,
4463 eValue
4464 };
4465 /// Decode the string. Must throw CStringException if the source string
4466 /// is not valid.
4467 virtual string Decode(const CTempString src, EStringType stype) const = 0;
~IStringDecoder(void)4468 virtual ~IStringDecoder(void) {}
4469 };
4470
4471
4472 /// Encoder interface. Names and values can be encoded with different rules.
4473 class IStringEncoder
4474 {
4475 public:
4476 /// Type of string to be decoded
4477 enum EStringType {
4478 eName,
4479 eValue
4480 };
4481 /// Encode the string.
4482 virtual string Encode(const CTempString src, EStringType stype) const = 0;
~IStringEncoder(void)4483 virtual ~IStringEncoder(void) {}
4484 };
4485
4486
4487 /// URL-decoder for string pairs parser
4488 class NCBI_XNCBI_EXPORT CStringDecoder_Url : public IStringDecoder
4489 {
4490 public:
4491 CStringDecoder_Url(NStr::EUrlDecode flag = NStr::eUrlDec_All);
4492
4493 virtual string Decode(const CTempString src, EStringType stype) const;
4494
4495 private:
4496 NStr::EUrlDecode m_Flag;
4497 };
4498
4499
4500 /// URL-encoder for string pairs parser
4501 class NCBI_XNCBI_EXPORT CStringEncoder_Url : public IStringEncoder
4502 {
4503 public:
4504 CStringEncoder_Url(NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars);
4505
4506 virtual string Encode(const CTempString src, EStringType stype) const;
4507
4508 private:
4509 NStr::EUrlEncode m_Flag;
4510 };
4511
4512
4513 /// Template for parsing string into pairs of name and value or merging
4514 /// them back into a single string.
4515 /// The container class must hold pairs of strings (pair<string, string>).
4516 template<class TContainer>
4517 class CStringPairs
4518 {
4519 public:
4520 typedef TContainer TStrPairs;
4521 /// The container's value type must be pair<string, string>
4522 /// or a compatible type.
4523 typedef typename TContainer::value_type TStrPair;
4524
4525 /// Create parser with the specified decoder/encoder and default separators.
4526 ///
4527 /// @param decoder
4528 /// String decoder (Url, Xml etc.)
4529 /// @param own_decoder
4530 /// Decoder ownership flag
4531 /// @param decoder
4532 /// String encoder (Url, Xml etc.), optional
4533 /// @param own_encoder
4534 /// Encoder ownership flag, optional
CStringPairs(IStringDecoder * decoder=NULL,EOwnership own_decoder=eTakeOwnership,IStringEncoder * encoder=NULL,EOwnership own_encoder=eTakeOwnership)4535 CStringPairs(IStringDecoder* decoder = NULL,
4536 EOwnership own_decoder = eTakeOwnership,
4537 IStringEncoder* encoder = NULL,
4538 EOwnership own_encoder = eTakeOwnership)
4539 : m_ArgSep("&"),
4540 m_ValSep("="),
4541 m_Decoder(decoder, own_decoder),
4542 m_Encoder(encoder, own_encoder)
4543 {
4544 }
4545
4546 /// Create parser with the specified parameters.
4547 ///
4548 /// @param arg_sep
4549 /// Separator between name+value pairs
4550 /// @param val_sep
4551 /// Separator between name and value
4552 /// @param decoder
4553 /// String decoder (Url, Xml etc.)
4554 /// @param own_decoder
4555 /// Decoder ownership flag
4556 /// @param encoder
4557 /// String encoder (Url, Xml etc.)
4558 /// @param own_encoder
4559 /// Encoder ownership flag
CStringPairs(const CTempString arg_sep,const CTempString val_sep,IStringDecoder * decoder=NULL,EOwnership own_decoder=eTakeOwnership,IStringEncoder * encoder=NULL,EOwnership own_encoder=eTakeOwnership)4560 CStringPairs(const CTempString arg_sep,
4561 const CTempString val_sep,
4562 IStringDecoder* decoder = NULL,
4563 EOwnership own_decoder = eTakeOwnership,
4564 IStringEncoder* encoder = NULL,
4565 EOwnership own_encoder = eTakeOwnership)
4566 : m_ArgSep(arg_sep),
4567 m_ValSep(val_sep),
4568 m_Decoder(decoder, own_decoder),
4569 m_Encoder(encoder, own_encoder)
4570 {
4571 }
4572
4573 /// Create parser with the selected URL-encoding/decoding options
4574 /// and default separators.
4575 ///
4576 /// @param decode_flag
4577 /// URL-decoding flag
4578 /// @param encode_flag
4579 /// URL-encoding flag
CStringPairs(NStr::EUrlDecode decode_flag,NStr::EUrlEncode encode_flag)4580 CStringPairs(NStr::EUrlDecode decode_flag,
4581 NStr::EUrlEncode encode_flag)
4582 : m_ArgSep("&"),
4583 m_ValSep("="),
4584 m_Decoder(new CStringDecoder_Url(decode_flag), eTakeOwnership),
4585 m_Encoder(new CStringEncoder_Url(encode_flag), eTakeOwnership)
4586 {
4587 }
4588
~CStringPairs(void)4589 virtual ~CStringPairs(void) {}
4590
4591 /// Set string decoder.
4592 ///
4593 /// @param decoder
4594 /// String decoder (Url, Xml etc.)
4595 /// @param own
4596 /// Decoder ownership flag
SetDecoder(IStringDecoder * decoder,EOwnership own=eTakeOwnership)4597 void SetDecoder(IStringDecoder* decoder, EOwnership own = eTakeOwnership)
4598 { m_Decoder.reset(decoder, own); }
4599 /// Get decoder or NULL. Does not affect decoder ownership.
GetDecoder(void)4600 IStringDecoder* GetDecoder(void) { return m_Decoder.get(); }
4601
4602 /// Set string encoder.
4603 ///
4604 /// @param encoder
4605 /// String encoder (Url, Xml etc.)
4606 /// @param own
4607 /// Encoder ownership flag
SetEncoder(IStringEncoder * encoder,EOwnership own=eTakeOwnership)4608 void SetEncoder(IStringEncoder* encoder, EOwnership own = eTakeOwnership)
4609 { m_Encoder.reset(encoder, own); }
4610 /// Get encoder or NULL. Does not affect encoder ownership.
GetEncoder(void)4611 IStringEncoder* GetEncoder(void) { return m_Encoder.get(); }
4612
4613 /// Parse the string.
4614 ///
4615 /// @param str
4616 /// String to parse. The parser assumes the string is formatted like
4617 /// "name1<valsep>value1<argsep>name2<valsep>value2...". Each name and
4618 /// value is passed to the decoder (if not NULL) before storing the pair.
4619 /// @param merge_argsep
4620 /// Flag for merging separators between pairs. By default the separators
4621 /// are merged to prevent pairs where both name and value are empty.
Parse(const CTempString str,NStr::EMergeDelims merge_argsep=NStr::eMergeDelims)4622 void Parse(const CTempString str,
4623 NStr::EMergeDelims merge_argsep = NStr::eMergeDelims)
4624 {
4625 Parse(m_Data, str, m_ArgSep, m_ValSep,
4626 m_Decoder.get(), eNoOwnership, merge_argsep);
4627 }
4628
4629 /// Parse the string using the provided decoder, put data into the
4630 /// container.
4631 ///
4632 /// @param pairs
4633 /// Container to be filled with the parsed name/value pairs
4634 /// @param str
4635 /// String to parse. The parser assumes the string is formatted like
4636 /// "name1<valsep>value1<argsep>name2<valsep>value2...". Each name and
4637 /// value is passed to the decoder (if not NULL) before storing the pair.
4638 /// @param decoder
4639 /// String decoder (Url, Xml etc.)
4640 /// @param own
4641 /// Flag indicating if the decoder must be deleted by the function.
4642 /// @param merge_argsep
4643 /// Flag for merging separators between pairs. By default the separators
4644 /// are merged to prevent pairs where both name and value are empty.
Parse(TStrPairs & pairs,const CTempString str,const CTempString arg_sep,const CTempString val_sep,IStringDecoder * decoder=NULL,EOwnership own=eTakeOwnership,NStr::EMergeDelims merge_argsep=NStr::eMergeDelims)4645 static void Parse(TStrPairs& pairs,
4646 const CTempString str,
4647 const CTempString arg_sep,
4648 const CTempString val_sep,
4649 IStringDecoder* decoder = NULL,
4650 EOwnership own = eTakeOwnership,
4651 NStr::EMergeDelims merge_argsep = NStr::eMergeDelims)
4652 {
4653 AutoPtr<IStringDecoder> decoder_guard(decoder, own);
4654 list<string> lst;
4655 NStr::Split(str, arg_sep, lst, (NStr::TSplitFlags)merge_argsep);
4656 pairs.clear();
4657 ITERATE(list<string>, it, lst) {
4658 string name, val;
4659 NStr::SplitInTwo(*it, val_sep, name, val);
4660 if ( decoder ) {
4661 try {
4662 name = decoder->Decode(name, IStringDecoder::eName);
4663 val = decoder->Decode(val, IStringDecoder::eValue);
4664 }
4665 catch (const CStringException&) {
4666 // Discard all data
4667 pairs.clear();
4668 throw;
4669 }
4670 }
4671 pairs.insert(pairs.end(), TStrPair(name, val));
4672 }
4673 }
4674
4675 /// Merge name-value pairs into a single string using the currently set
4676 /// separators and the provided encoder if any.
Merge(void) const4677 string Merge(void) const
4678 {
4679 return Merge(m_Data, m_ArgSep, m_ValSep,
4680 m_Encoder.get(), eNoOwnership);
4681 }
4682
4683 /// Merge name-value pairs from the provided container, separators
4684 /// and encoder. Delete the encoder if the ownership flag allows.
4685 ///
4686 /// @param pairs
4687 /// Container with the name/value pairs to be merged.
4688 /// @param arg_sep
4689 /// Separator to be inserted between pairs.
4690 /// @param val_sep
4691 /// Separator to be inserted between name and value.
4692 /// @param encoder
4693 /// String encoder (Url, Xml etc.)
4694 /// @param own
4695 /// Flag indicating if the encoder must be deleted by the function.
Merge(const TStrPairs & pairs,const string & arg_sep,const string & val_sep,IStringEncoder * encoder=NULL,EOwnership own=eTakeOwnership)4696 static string Merge(const TStrPairs& pairs,
4697 const string& arg_sep,
4698 const string& val_sep,
4699 IStringEncoder* encoder = NULL,
4700 EOwnership own = eTakeOwnership)
4701 {
4702 AutoPtr<IStringEncoder> encoder_guard(encoder, own);
4703 string ret;
4704 ITERATE(typename TStrPairs, it, pairs) {
4705 if ( !ret.empty() ) {
4706 ret += arg_sep;
4707 }
4708 if ( encoder ) {
4709 ret += encoder->Encode(it->first, IStringEncoder::eName) +
4710 val_sep +
4711 encoder->Encode(it->second, IStringEncoder::eValue);
4712 }
4713 else {
4714 ret += it->first + val_sep + it->second;
4715 }
4716 }
4717 return ret;
4718 }
4719
4720 /// Read data
GetPairs(void) const4721 const TStrPairs& GetPairs(void) const { return m_Data; }
4722 /// Get non-const data
GetPairs(void)4723 TStrPairs& GetPairs(void) { return m_Data; }
4724
4725 private:
4726 string m_ArgSep; // Separator between name+value pairs ("&")
4727 string m_ValSep; // Separator between name and value ("=")
4728 AutoPtr<IStringDecoder> m_Decoder; // String decoder (Url, Xml etc.)
4729 AutoPtr<IStringEncoder> m_Encoder; // String encoder (Url, Xml etc.)
4730 TStrPairs m_Data; // Parsed data
4731 };
4732
4733
4734 typedef vector<pair<string, string> > TStringPairsVector;
4735 typedef CStringPairs<TStringPairsVector> CStringPairsParser;
4736
4737
4738 /////////////////////////////////////////////////////////////////////////////
4739 ///
4740 /// CEncodedString --
4741 ///
4742 /// Class to detect if a string needs to be URL-encoded and hold both
4743 /// encoded and original versions.
4744 ///
4745
4746 class NCBI_XNCBI_EXPORT CEncodedString
4747 {
4748 public:
CEncodedString(void)4749 CEncodedString(void) {}
4750 CEncodedString(const CTempString s,
4751 NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars);
4752
4753 /// Set new original string
4754 void SetString(const CTempString s,
4755 NStr::EUrlEncode flag = NStr::eUrlEnc_SkipMarkChars);
4756
4757 /// Check if the original string was encoded.
IsEncoded(void) const4758 bool IsEncoded(void) const { return m_Encoded.get() != 0; }
4759 /// Get the original unencoded string
GetOriginalString(void) const4760 const string& GetOriginalString(void) const { return m_Original; }
4761 /// Get encoded string
GetEncodedString(void) const4762 const string& GetEncodedString(void) const
4763 { return IsEncoded() ? *m_Encoded : m_Original; }
4764
4765 /// Check if the string is empty
IsEmpty(void) const4766 bool IsEmpty(void) const { return m_Original.empty(); }
4767
4768 private:
4769 string m_Original;
4770 unique_ptr<string> m_Encoded;
4771 };
4772
4773
4774 /////////////////////////////////////////////////////////////////////////////
4775 // Predicates
4776 //
4777
4778
4779 /////////////////////////////////////////////////////////////////////////////
4780 ///
4781 /// Define Case-sensitive string comparison methods.
4782 ///
4783 /// Used as arguments to template functions for specifying the type of
4784 /// comparison.
4785
4786 template <typename T>
4787 struct PCase_Generic
4788 {
4789 /// Return difference between "s1" and "s2".
4790 int Compare(const T& s1, const T& s2) const;
4791
4792 /// Return TRUE if s1 < s2.
4793 bool Less(const T& s1, const T& s2) const;
4794
4795 /// Return TRUE if s1 == s2.
4796 bool Equals(const T& s1, const T& s2) const;
4797
4798 /// Return TRUE if s1 < s2.
4799 bool operator()(const T& s1, const T& s2) const;
4800 };
4801
4802 typedef PCase_Generic<string> PCase;
4803 typedef PCase_Generic<const char *> PCase_CStr;
4804
4805
4806
4807 /////////////////////////////////////////////////////////////////////////////
4808 ///
4809 /// Define Case-insensitive string comparison methods.
4810 ///
4811 /// Used as arguments to template functions for specifying the type of
4812 /// comparison.
4813 ///
4814 /// @sa PNocase_Conditional_Generic
4815
4816 template <typename T>
4817 struct PNocase_Generic
4818 {
4819 /// Return difference between "s1" and "s2".
4820 int Compare(const T& s1, const T& s2) const;
4821
4822 /// Return TRUE if s1 < s2.
4823 bool Less(const T& s1, const T& s2) const;
4824
4825 /// Return TRUE if s1 == s2.
4826 bool Equals(const T& s1, const T& s2) const;
4827
4828 /// Return TRUE if s1 < s2 ignoring case.
4829 bool operator()(const T& s1, const T& s2) const;
4830 };
4831
4832 typedef PNocase_Generic<string> PNocase;
4833 typedef PNocase_Generic<const char *> PNocase_CStr;
4834
4835
4836 /////////////////////////////////////////////////////////////////////////////
4837 ///
4838 /// Define Case-insensitive string comparison methods.
4839 /// Case sensitivity can be turned on and off at runtime.
4840 ///
4841 /// Used as arguments to template functions for specifying the type of
4842 /// comparison.
4843 ///
4844 /// @sa PNocase_Generic
4845
4846 template <typename T>
4847 class PNocase_Conditional_Generic
4848 {
4849 public:
4850 /// Construction
4851 PNocase_Conditional_Generic(NStr::ECase case_sens = NStr::eCase);
4852
4853 /// Get comparison type
GetCase() const4854 NStr::ECase GetCase() const { return m_CaseSensitive; }
4855
4856 /// Set comparison type
SetCase(NStr::ECase case_sens)4857 void SetCase(NStr::ECase case_sens) { m_CaseSensitive = case_sens; }
4858
4859 /// Return difference between "s1" and "s2".
4860 int Compare(const T& s1, const T& s2) const;
4861
4862 /// Return TRUE if s1 < s2.
4863 bool Less(const T& s1, const T& s2) const;
4864
4865 /// Return TRUE if s1 == s2.
4866 bool Equals(const T& s1, const T& s2) const;
4867
4868 /// Return TRUE if s1 < s2 ignoring case.
4869 bool operator()(const T& s1, const T& s2) const;
4870 private:
4871 NStr::ECase m_CaseSensitive; ///< case sensitive when TRUE
4872 };
4873
4874 typedef PNocase_Conditional_Generic<string> PNocase_Conditional;
4875 typedef PNocase_Conditional_Generic<const char *> PNocase_Conditional_CStr;
4876
4877
4878 /////////////////////////////////////////////////////////////////////////////
4879 ///
4880 /// PQuickStringLess implements an ordering of strings,
4881 /// that is more efficient than usual lexicographical order.
4882 /// It can be used in cases when no specific order is required,
4883 /// e.g. only simple key lookup is needed.
4884 /// Current implementation first compares lengths of strings,
4885 /// and will compare string data only when lengths are the same.
4886 ///
4887 struct PQuickStringLess
4888 {
operator ()PQuickStringLess4889 bool operator()(const CTempString s1, const CTempString s2) const {
4890 size_t len1 = s1.size(), len2 = s2.size();
4891 return len1 < len2 ||
4892 (len1 == len2 && ::memcmp(s1.data(), s2.data(), len1) < 0);
4893 }
4894 };
4895
4896
4897 /////////////////////////////////////////////////////////////////////////////
4898 // Algorithms
4899 //
4900
4901
4902 /// Check equivalence of arguments using predicate.
4903 template<class Arg1, class Arg2, class Pred>
4904 inline
AStrEquiv(const Arg1 & x,const Arg2 & y,Pred pr)4905 bool AStrEquiv(const Arg1& x, const Arg2& y, Pred pr)
4906 {
4907 return pr.Equals(x, y);
4908 }
4909
4910
4911 /* @} */
4912
4913
4914
4915 /////////////////////////////////////////////////////////////////////////////
4916 //
4917 // IMPLEMENTATION of INLINE functions
4918 //
4919 /////////////////////////////////////////////////////////////////////////////
4920
4921
4922 /////////////////////////////////////////////////////////////////////////////
4923 // CNcbiEmptyString::
4924 //
4925 #if !defined(NCBI_OS_MSWIN) && \
4926 !(defined(NCBI_OS_LINUX) && \
4927 (defined(NCBI_COMPILER_GCC) || defined(NCBI_COMPILER_ANY_CLANG)))
4928 inline
Get(void)4929 const string& CNcbiEmptyString::Get(void)
4930 {
4931 const string* str = m_Str;
4932 return str ? *str: FirstGet();
4933 }
4934
4935 # ifdef HAVE_WSTRING
4936 inline
Get(void)4937 const wstring& CNcbiEmptyWString::Get(void)
4938 {
4939 const wstring* str = m_Str;
4940 return str ? *str: FirstGet();
4941 }
4942 # endif
4943 #endif
4944
4945
4946
4947 /////////////////////////////////////////////////////////////////////////////
4948 // NStr::
4949 //
4950
4951 inline
IntToString(int value,TNumToStringFlags flags,int base)4952 string NStr::IntToString(int value,
4953 TNumToStringFlags flags, int base)
4954 {
4955 string ret;
4956 IntToString(ret, value, flags, base);
4957 return ret;
4958 }
4959
4960 inline
IntToString(unsigned int value,TNumToStringFlags flags,int base)4961 string NStr::IntToString(unsigned int value,
4962 TNumToStringFlags flags, int base)
4963 {
4964 string ret;
4965 IntToString(ret, (int)value, flags, base);
4966 return ret;
4967 }
4968
4969 inline
IntToString(string & out_str,unsigned int value,TNumToStringFlags flags,int base)4970 void NStr::IntToString(string& out_str, unsigned int value,
4971 TNumToStringFlags flags, int base)
4972 {
4973 IntToString(out_str, (int)value, flags, base);
4974 }
4975
4976 inline
UIntToString(unsigned int value,TNumToStringFlags flags,int base)4977 string NStr::UIntToString(unsigned int value,
4978 TNumToStringFlags flags, int base)
4979 {
4980 string ret;
4981 ULongToString(ret, value, flags, base);
4982 return ret;
4983 }
4984
4985 inline
UIntToString(int value,TNumToStringFlags flags,int base)4986 string NStr::UIntToString(int value,
4987 TNumToStringFlags flags, int base)
4988 {
4989 string ret;
4990 UIntToString(ret, (unsigned int)value, flags, base);
4991 return ret;
4992 }
4993
4994 inline
UIntToString(string & out_str,unsigned int value,TNumToStringFlags flags,int base)4995 void NStr::UIntToString(string& out_str, unsigned int value,
4996 TNumToStringFlags flags, int base)
4997 {
4998 ULongToString(out_str, value, flags, base);
4999 }
5000
5001 inline
UIntToString(string & out_str,int value,TNumToStringFlags flags,int base)5002 void NStr::UIntToString(string& out_str, int value,
5003 TNumToStringFlags flags, int base)
5004 {
5005 UIntToString(out_str, (unsigned int)value, flags, base);
5006 }
5007
5008 inline
LongToString(long value,TNumToStringFlags flags,int base)5009 string NStr::LongToString(long value,
5010 TNumToStringFlags flags, int base)
5011 {
5012 string ret;
5013 LongToString(ret, value, flags, base);
5014 return ret;
5015 }
5016
5017 inline
ULongToString(unsigned long value,TNumToStringFlags flags,int base)5018 string NStr::ULongToString(unsigned long value,
5019 TNumToStringFlags flags, int base)
5020 {
5021 string ret;
5022 ULongToString(ret, value, flags, base);
5023 return ret;
5024 }
5025
5026 inline
Int8ToString(Int8 value,TNumToStringFlags flags,int base)5027 string NStr::Int8ToString(Int8 value,
5028 TNumToStringFlags flags, int base)
5029 {
5030 string ret;
5031 NStr::Int8ToString(ret, value, flags, base);
5032 return ret;
5033 }
5034
5035 inline
UInt8ToString(Uint8 value,TNumToStringFlags flags,int base)5036 string NStr::UInt8ToString(Uint8 value,
5037 TNumToStringFlags flags, int base)
5038 {
5039 string ret;
5040 NStr::UInt8ToString(ret, value, flags, base);
5041 return ret;
5042 }
5043
5044 inline
UInt8ToString_DataSize(Uint8 value,TNumToStringFlags flags,unsigned int max_digits)5045 string NStr::UInt8ToString_DataSize(Uint8 value,
5046 TNumToStringFlags flags /* = 0 */,
5047 unsigned int max_digits /* = 3 */)
5048 {
5049 string ret;
5050 NStr::UInt8ToString_DataSize(ret, value, flags, max_digits);
5051 return ret;
5052 }
5053
5054 inline
DoubleToString(double value,int precision,TNumToStringFlags flags)5055 string NStr::DoubleToString(double value, int precision,
5056 TNumToStringFlags flags)
5057 {
5058 string str;
5059 DoubleToString(str, value, precision, flags);
5060 return str;
5061 }
5062
5063 inline
HexChar(char ch)5064 int NStr::HexChar(char ch)
5065 {
5066 unsigned int rc = ch - '0';
5067 if (rc <= 9) {
5068 return rc;
5069 } else {
5070 rc = (ch | ' ') - 'a';
5071 return rc <= 5 ? int(rc + 10) : -1;
5072 }
5073 }
5074
5075 inline
strcmp(const char * s1,const char * s2)5076 int NStr::strcmp(const char* s1, const char* s2)
5077 {
5078 return ::strcmp(s1, s2);
5079 }
5080
5081 inline
strncmp(const char * s1,const char * s2,size_t n)5082 int NStr::strncmp(const char* s1, const char* s2, size_t n)
5083 {
5084 return ::strncmp(s1, s2, n);
5085 }
5086
5087 inline
strcasecmp(const char * s1,const char * s2)5088 int NStr::strcasecmp(const char* s1, const char* s2)
5089 {
5090 #if defined(HAVE_STRICMP)
5091 #if NCBI_COMPILER_MSVC && (_MSC_VER >= 1400)
5092 return ::_stricmp(s1, s2);
5093 #else
5094 return ::stricmp(s1, s2);
5095 #endif
5096
5097 #elif defined(HAVE_STRCASECMP_LC)
5098 return ::strcasecmp(s1, s2);
5099
5100 #else
5101 int diff = 0;
5102 for ( ;; ++s1, ++s2) {
5103 char c1 = *s1;
5104 // calculate difference
5105 diff = tolower((unsigned char) c1) - tolower((unsigned char)(*s2));
5106 // if end of string or different
5107 if (!c1 || diff)
5108 break; // return difference
5109 }
5110 return diff;
5111 #endif
5112 }
5113
5114 inline
strncasecmp(const char * s1,const char * s2,size_t n)5115 int NStr::strncasecmp(const char* s1, const char* s2, size_t n)
5116 {
5117 #if defined(HAVE_STRICMP)
5118 #if NCBI_COMPILER_MSVC && (_MSC_VER >= 1400)
5119 return ::_strnicmp(s1, s2, n);
5120 #else
5121 return ::strnicmp(s1, s2, n);
5122 #endif
5123
5124 #elif defined(HAVE_STRCASECMP_LC)
5125 return ::strncasecmp(s1, s2, n);
5126
5127 #else
5128 int diff = 0;
5129 for ( ; ; ++s1, ++s2, --n) {
5130 if (n == 0)
5131 return 0;
5132 char c1 = *s1;
5133 // calculate difference
5134 diff = tolower((unsigned char) c1) - tolower((unsigned char)(*s2));
5135 // if end of string or different
5136 if (!c1 || diff)
5137 break; // return difference
5138 }
5139 return diff;
5140 #endif
5141 }
5142
5143 inline
strftime(char * s,size_t maxsize,const char * format,const struct tm * timeptr)5144 size_t NStr::strftime(char* s, size_t maxsize, const char* format,
5145 const struct tm* timeptr)
5146 {
5147 string x_format = Replace(format, "%T", "%H:%M:%S");
5148 ReplaceInPlace(x_format, "%D", "%m/%d/%y");
5149 return ::strftime(s, maxsize, x_format.c_str(), timeptr);
5150 }
5151
5152 inline
CompareCase(const char * s1,const char * s2)5153 int NStr::CompareCase(const char* s1, const char* s2)
5154 {
5155 return NStr::strcmp(s1, s2);
5156 }
5157
5158 inline
CompareNocase(const char * s1,const char * s2)5159 int NStr::CompareNocase(const char* s1, const char* s2)
5160 {
5161 return NStr::strcasecmp(s1, s2);
5162 }
5163
5164 inline
Compare(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const char * s2,ECase use_case)5165 int NStr::Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
5166 const char* s2, ECase use_case)
5167 {
5168 return use_case == eCase ? CompareCase(s1.substr(pos, n), s2)
5169 : CompareNocase(s1.substr(pos, n), s2);
5170 }
5171
5172 inline
Compare(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const CTempString s2,ECase use_case)5173 int NStr::Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
5174 const CTempString s2, ECase use_case)
5175 {
5176 return use_case == eCase ? CompareCase(s1.substr(pos, n), s2)
5177 : CompareNocase(s1.substr(pos, n), s2);
5178 }
5179
5180 inline
Compare(const char * s1,const char * s2,ECase use_case)5181 int NStr::Compare(const char* s1, const char* s2, ECase use_case)
5182 {
5183 return use_case == eCase ? CompareCase(s1, s2) : CompareNocase(s1, s2);
5184 }
5185
5186 inline
Compare(const CTempStringEx s1,const CTempStringEx s2,ECase use_case)5187 int NStr::Compare(const CTempStringEx s1, const CTempStringEx s2, ECase use_case)
5188 {
5189 return use_case == eCase ? CompareCase(s1, s2) : CompareNocase(s1, s2);
5190 }
5191
5192 inline
EqualCase(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const char * s2)5193 bool NStr::EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char* s2)
5194 {
5195 return s1.substr(pos, n) == s2;
5196 }
5197
5198 inline
EqualCase(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const CTempString s2)5199 bool NStr::EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const CTempString s2)
5200 {
5201 return s1.substr(pos, n) == s2;
5202 }
5203
5204 inline
EqualCase(const char * s1,const char * s2)5205 bool NStr::EqualCase(const char* s1, const char* s2)
5206 {
5207 size_t n = strlen(s1);
5208 if (n != strlen(s2)) {
5209 return false;
5210 }
5211 return NStr::strncmp(s1, s2, n) == 0;
5212 }
5213
5214 inline
EqualCase(const CTempStringEx s1,const CTempStringEx s2)5215 bool NStr::EqualCase(const CTempStringEx s1, const CTempStringEx s2)
5216 {
5217 return s1 == s2;
5218 }
5219
5220 inline
EqualNocase(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const char * s2)5221 bool NStr::EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char* s2)
5222 {
5223 return CompareNocase(s1.substr(pos, n), s2) == 0;
5224 }
5225
5226 inline
EqualNocase(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const CTempString s2)5227 bool NStr::EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const CTempString s2)
5228 {
5229 return CompareNocase(s1.substr(pos, n), s2) == 0;
5230 }
5231
5232 inline
EqualNocase(const char * s1,const char * s2)5233 bool NStr::EqualNocase(const char* s1, const char* s2)
5234 {
5235 size_t n = strlen(s1);
5236 if (n != strlen(s2)) {
5237 return false;
5238 }
5239 return NStr::strncasecmp(s1, s2, n) == 0;
5240 }
5241
5242 inline
EqualNocase(const CTempStringEx s1,const CTempStringEx s2)5243 bool NStr::EqualNocase(const CTempStringEx s1, const CTempStringEx s2)
5244 {
5245 if (s1.length() != s2.length()) {
5246 return false;
5247 }
5248 return CompareNocase(s1, s2) == 0;
5249 }
5250
5251 inline
Equal(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const char * s2,ECase use_case)5252 bool NStr::Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
5253 const char* s2, ECase use_case)
5254 {
5255 return use_case == eCase ? EqualCase(s1.substr(pos, n), s2)
5256 : EqualNocase(s1.substr(pos, n), s2);
5257 }
5258
5259 inline
Equal(const CTempString s1,SIZE_TYPE pos,SIZE_TYPE n,const CTempString s2,ECase use_case)5260 bool NStr::Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
5261 const CTempString s2, ECase use_case)
5262 {
5263 return use_case == eCase ? EqualCase(s1.substr(pos, n), s2)
5264 : EqualNocase(s1.substr(pos, n), s2);
5265 }
5266
5267 inline
Equal(const char * s1,const char * s2,ECase use_case)5268 bool NStr::Equal(const char* s1, const char* s2, ECase use_case)
5269 {
5270 return use_case == eCase ? EqualCase(s1, s2) : EqualNocase(s1, s2);
5271 }
5272
5273 inline
Equal(const CTempStringEx s1,const CTempStringEx s2,ECase use_case)5274 bool NStr::Equal(const CTempStringEx s1, const CTempStringEx s2, ECase use_case)
5275 {
5276 return use_case == eCase ? EqualCase(s1, s2) : EqualNocase(s1, s2);
5277 }
5278
5279 inline
StartsWith(const CTempString str,const CTempString start,ECase use_case)5280 bool NStr::StartsWith(const CTempString str, const CTempString start, ECase use_case)
5281 {
5282 return str.size() >= start.size() &&
5283 Equal(str.substr(0, start.size()), start, use_case);
5284 }
5285
5286 inline
StartsWith(const CTempString str,char start,ECase use_case)5287 bool NStr::StartsWith(const CTempString str, char start, ECase use_case)
5288 {
5289 return !str.empty() &&
5290 (use_case == eCase ? (str[0] == start)
5291 : (str[0] == start ||
5292 toupper((unsigned char) str[0]) == start ||
5293 tolower((unsigned char) str[0]))
5294 );
5295 }
5296
5297 inline
EndsWith(const CTempString str,const CTempString end,ECase use_case)5298 bool NStr::EndsWith(const CTempString str, const CTempString end, ECase use_case)
5299 {
5300 return str.size() >= end.size() &&
5301 Equal(str.substr(str.size() - end.size(), end.size()), end, use_case);
5302 }
5303
5304 inline
EndsWith(const CTempString str,char end,ECase use_case)5305 bool NStr::EndsWith(const CTempString str, char end, ECase use_case)
5306 {
5307 if (!str.empty()) {
5308 char last = str[str.length() - 1];
5309 return use_case == eCase ? (last == end)
5310 : (last == end ||
5311 toupper((unsigned char) last) == end ||
5312 tolower((unsigned char) last) == end);
5313 }
5314 return false;
5315 }
5316
5317 inline
CommonPrefixSize(const CTempString s1,const CTempString s2)5318 SIZE_TYPE NStr::CommonPrefixSize(const CTempString s1, const CTempString s2)
5319 {
5320 const SIZE_TYPE n = min(s1.length(), s2.length());
5321 for (SIZE_TYPE i = 0; i < n; i++) {
5322 if (s1[i] != s2[i]) {
5323 return i;
5324 }
5325 }
5326 return n;
5327 }
5328
5329 inline
CommonSuffixSize(const CTempString s1,const CTempString s2)5330 SIZE_TYPE NStr::CommonSuffixSize(const CTempString s1, const CTempString s2)
5331 {
5332 const SIZE_TYPE len1 = s1.length();
5333 const SIZE_TYPE len2 = s2.length();
5334 const SIZE_TYPE n = min(len1, len2);
5335 for (SIZE_TYPE i = 1; i <= n; i++) {
5336 if (s1[len1 - i] != s2[len2 - i]) {
5337 return i - 1;
5338 }
5339 }
5340 return n;
5341 }
5342
5343 inline
Find(const CTempString str,const CTempString pattern,SIZE_TYPE start,SIZE_TYPE end,EOccurrence where,ECase use_case)5344 SIZE_TYPE NStr::Find(const CTempString str, const CTempString pattern,
5345 SIZE_TYPE start, SIZE_TYPE end, EOccurrence where,
5346 ECase use_case)
5347 {
5348 SIZE_TYPE pos = Find(CTempString(str, start, end - start), pattern, use_case,
5349 where == eFirst ? eForwardSearch : eReverseSearch, 0);
5350 if (pos == NPOS) {
5351 return NPOS;
5352 }
5353 return pos + start;
5354 }
5355
5356 // @deprecated
5357 inline
FindCase(const CTempString str,const CTempString pattern,SIZE_TYPE start,SIZE_TYPE end,EOccurrence where)5358 SIZE_TYPE NStr::FindCase(const CTempString str, const CTempString pattern,
5359 SIZE_TYPE start, SIZE_TYPE end, EOccurrence where)
5360 {
5361 if (where == eFirst) {
5362 SIZE_TYPE pos = str.find(pattern, start);
5363 return (pos == NPOS || (pos + pattern.length()) > end) ? NPOS : pos;
5364 } else {
5365 SIZE_TYPE pos = str.rfind(pattern, end);
5366 return (pos == NPOS || pos < start) ? NPOS : pos;
5367 }
5368 }
5369
5370 inline
FindCase(const CTempString str,const CTempString pattern)5371 SIZE_TYPE NStr::FindCase(const CTempString str, const CTempString pattern)
5372 {
5373 return Find(str, pattern, eCase);
5374 }
5375
5376 inline
FindCase(const CTempString str,const CTempString pattern,SIZE_TYPE start)5377 SIZE_TYPE NStr::FindCase(const CTempString str, const CTempString pattern, SIZE_TYPE start)
5378 {
5379 SIZE_TYPE pos = Find(CTempString(str, start), pattern, eCase);
5380 if (pos == NPOS) {
5381 return NPOS;
5382 }
5383 return pos + start;
5384 }
5385
5386 inline
FindNoCase(const CTempString str,const CTempString pattern)5387 SIZE_TYPE NStr::FindNoCase(const CTempString str, const CTempString pattern)
5388 {
5389 return Find(str, pattern, eNocase);
5390 }
5391
5392 inline
FindNoCase(const CTempString str,const CTempString pattern,SIZE_TYPE start)5393 SIZE_TYPE NStr::FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start)
5394 {
5395 SIZE_TYPE pos = Find(CTempString(str, start), pattern, eNocase);
5396 if (pos == NPOS) {
5397 return NPOS;
5398 }
5399 return pos + start;
5400 }
5401
5402 inline
FindCase(const list<string> & lst,const CTempString val)5403 const string* NStr::FindCase(const list<string>& lst, const CTempString val)
5404 {
5405 return Find(lst, val, eCase);
5406 }
5407
5408 inline
FindNoCase(const list<string> & lst,const CTempString val)5409 const string* NStr::FindNoCase(const list <string>& lst, const CTempString val)
5410 {
5411 return Find(lst, val, eNocase);
5412 }
5413
5414 inline
FindCase(const vector<string> & vec,const CTempString val)5415 const string* NStr::FindCase(const vector <string>& vec, const CTempString val)
5416 {
5417 return Find(vec, val, eCase);
5418 }
5419
5420 inline
FindNoCase(const vector<string> & vec,const CTempString val)5421 const string* NStr::FindNoCase(const vector <string>& vec, const CTempString val)
5422 {
5423 return Find(vec, val, eNocase);
5424 }
5425
5426 template<typename TIterator, typename FTransform>
5427 string
TransformJoin(TIterator from,TIterator to,const CTempString & delim,FTransform fnTransform)5428 NStr::TransformJoin( TIterator from, TIterator to, const CTempString& delim, FTransform fnTransform)
5429 {
5430 if (from == to) {
5431 return kEmptyStr;
5432 }
5433 string result(fnTransform(*from++));
5434 for ( ; from != to; ++from) {
5435 result.append(delim).append(fnTransform(*from));
5436 }
5437 return result;
5438 }
5439
5440 template<typename TIterator>
5441 string
xx_Join(TIterator from,TIterator to,const CTempString & delim)5442 NStr::xx_Join( TIterator from, TIterator to, const CTempString& delim)
5443 {
5444 if (from == to) {
5445 return kEmptyStr;
5446 }
5447 string result(*from++);
5448 size_t sz_all = 0, sz_delim = delim.size();
5449 for ( TIterator f = from; f != to; ++f) {
5450 sz_all += string(*f).size() + sz_delim;
5451 }
5452 result.reserve(result.size() + sz_all);
5453 for ( ; from != to; ++from) {
5454 result.append(delim).append(string(*from));
5455 }
5456 return result;
5457 }
5458
5459 inline
Wrap(const string & str,SIZE_TYPE width,list<string> & arr,NStr::TWrapFlags flags,const string & prefix,const string * prefix1)5460 list<string>& NStr::Wrap(const string& str, SIZE_TYPE width, list<string>& arr,
5461 NStr::TWrapFlags flags, const string& prefix,
5462 const string* prefix1)
5463 {
5464 return Wrap(str, width, arr, flags, &prefix, prefix1);
5465 }
5466
5467 inline
Wrap(const string & str,SIZE_TYPE width,list<string> & arr,NStr::TWrapFlags flags,const string & prefix,const string & prefix1)5468 list<string>& NStr::Wrap(const string& str, SIZE_TYPE width, list<string>& arr,
5469 NStr::TWrapFlags flags, const string& prefix,
5470 const string& prefix1)
5471 {
5472 return Wrap(str, width, arr, flags, &prefix, &prefix1);
5473 }
5474
5475 inline
WrapList(const list<string> & l,SIZE_TYPE width,const string & delim,list<string> & arr,NStr::TWrapFlags flags,const string & prefix,const string * prefix1)5476 list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width,
5477 const string& delim, list<string>& arr,
5478 NStr::TWrapFlags flags, const string& prefix,
5479 const string* prefix1)
5480 {
5481 return WrapList(l, width, delim, arr, flags, &prefix, prefix1);
5482 }
5483
5484 inline
WrapList(const list<string> & l,SIZE_TYPE width,const string & delim,list<string> & arr,NStr::TWrapFlags flags,const string & prefix,const string & prefix1)5485 list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width,
5486 const string& delim, list<string>& arr,
5487 NStr::TWrapFlags flags, const string& prefix,
5488 const string& prefix1)
5489 {
5490 return WrapList(l, width, delim, arr, flags, &prefix, &prefix1);
5491 }
5492
5493 inline
Justify(const CTempString str,SIZE_TYPE width,list<string> & par,const CTempString pfx,const CTempString * pfx1)5494 list<string>& NStr::Justify(const CTempString str, SIZE_TYPE width,
5495 list<string>& par, const CTempString pfx,
5496 const CTempString* pfx1)
5497 {
5498 return Justify(str, width, par, &pfx, pfx1);
5499 }
5500
5501 inline
Justify(const CTempString str,SIZE_TYPE width,list<string> & par,const CTempString pfx,const CTempString pfx1)5502 list<string>& NStr::Justify(const CTempString str, SIZE_TYPE width,
5503 list<string>& par, const CTempString pfx,
5504 const CTempString pfx1)
5505 {
5506 return Justify(str, width, par, &pfx, &pfx1);
5507 }
5508
5509
5510
5511 /////////////////////////////////////////////////////////////////////////////
5512 // CUtf8::
5513 //
5514
GetValidSymbolCount(const CTempString & src)5515 inline SIZE_TYPE CUtf8::GetValidSymbolCount(const CTempString& src) {
5516 CTempString::const_iterator err;
5517 return x_GetValidSymbolCount(src, err);
5518 }
5519
GetValidBytesCount(const CTempString & src)5520 inline SIZE_TYPE CUtf8::GetValidBytesCount(const CTempString& src) {
5521 CTempString::const_iterator err;
5522 x_GetValidSymbolCount(src,err);
5523 return (err-src.begin());
5524 }
Decode(const char * & src)5525 inline TUnicodeSymbol CUtf8::Decode(const char*& src) {
5526 return x_Decode(src);
5527 }
5528 #ifndef NCBI_COMPILER_WORKSHOP
Decode(string::const_iterator & src)5529 inline TUnicodeSymbol CUtf8::Decode(string::const_iterator& src) {
5530 return x_Decode(src);
5531 }
5532 #endif
5533
5534 template <typename TIterator> inline TUnicodeSymbol
x_Decode(TIterator & src)5535 CUtf8::x_Decode(TIterator& src)
5536 {
5537 SIZE_TYPE more=0;
5538 TUnicodeSymbol sym = DecodeFirst(*src,more);
5539 while (more--) {
5540 sym = DecodeNext(sym, *(++src));
5541 }
5542 return sym;
5543 }
5544
5545 template <typename TChar> basic_string<TChar>
x_AsBasicString(const CTempString & str,const TChar * substitute_on_error,EValidate validate)5546 CUtf8::x_AsBasicString(const CTempString& str,
5547 const TChar* substitute_on_error, EValidate validate)
5548 {
5549 if (validate == eValidate) {
5550 x_Validate(str);
5551 }
5552 TUnicodeSymbol max_char = (TUnicodeSymbol)numeric_limits<TChar>::max();
5553 basic_string<TChar> result;
5554 result.reserve(CUtf8::GetSymbolCount(str) + 1);
5555 CTempString::const_iterator src = str.begin();
5556 CTempString::const_iterator to = str.end();
5557 for (; src != to; ++src) {
5558 TUnicodeSymbol ch = Decode(src);
5559 if (ch > max_char) {
5560 if (substitute_on_error) {
5561 result.append(substitute_on_error);
5562 continue;
5563 } else {
5564 NCBI_THROW2(CStringException, eConvert,
5565 "Failed to convert symbol to wide character",
5566 (src - str.begin()));
5567 }
5568 }
5569 result.append(1, (TChar)ch);
5570 }
5571 return result;
5572 }
5573
5574 template <typename TChar> CStringUTF8&
x_Append(CStringUTF8 & u8str,const TChar * src,SIZE_TYPE to)5575 CUtf8::x_Append(CStringUTF8& u8str, const TChar* src, SIZE_TYPE to)
5576 {
5577 const TChar* srcBuf;
5578 SIZE_TYPE needed = 0;
5579 SIZE_TYPE pos=0;
5580
5581 for (pos=0, srcBuf=src;
5582 (to == NPOS) ? (*srcBuf != 0) : (pos<to); ++pos, ++srcBuf) {
5583 needed += x_BytesNeeded( *srcBuf );
5584 }
5585 if ( !needed ) {
5586 return u8str;
5587 }
5588 u8str.reserve(max(u8str.capacity(),u8str.length()+needed+1));
5589 for (pos=0, srcBuf=src;
5590 (to == NPOS) ? (*srcBuf != 0) : (pos<to); ++pos, ++srcBuf) {
5591 x_AppendChar( u8str, *srcBuf );
5592 }
5593 return u8str;
5594 }
5595
5596 inline CStringUTF8
TruncateSpaces(const CTempString & str,NStr::ETrunc side)5597 CUtf8::TruncateSpaces(const CTempString& str, NStr::ETrunc side) {
5598 CStringUTF8 u8;
5599 return x_Append(u8, TruncateSpaces_Unsafe(str,side), eEncoding_UTF8, eNoValidate);
5600 }
5601
5602 // deprecated CStringUTF8 is there
5603 #include <corelib/impl/stringutf8_deprecated.inl>
5604
5605
5606
5607 /////////////////////////////////////////////////////////////////////////////
5608 // PCase_Generic::
5609 //
5610
5611 template <typename T>
5612 inline
Compare(const T & s1,const T & s2) const5613 int PCase_Generic<T>::Compare(const T& s1, const T& s2) const
5614 {
5615 return NStr::Compare(s1, s2, NStr::eCase);
5616 }
5617
5618 template <typename T>
5619 inline
Less(const T & s1,const T & s2) const5620 bool PCase_Generic<T>::Less(const T& s1, const T& s2) const
5621 {
5622 return Compare(s1, s2) < 0;
5623 }
5624
5625 template <typename T>
5626 inline
Equals(const T & s1,const T & s2) const5627 bool PCase_Generic<T>::Equals(const T& s1, const T& s2) const
5628 {
5629 return Compare(s1, s2) == 0;
5630 }
5631
5632 template <typename T>
5633 inline
operator ()(const T & s1,const T & s2) const5634 bool PCase_Generic<T>::operator()(const T& s1, const T& s2) const
5635 {
5636 return Less(s1, s2);
5637 }
5638
5639
5640
5641 ////////////////////////////////////////////////////////////////////////////
5642 // PNocase_Generic<T>::
5643 //
5644
5645
5646 template <typename T>
5647 inline
Compare(const T & s1,const T & s2) const5648 int PNocase_Generic<T>::Compare(const T& s1, const T& s2) const
5649 {
5650 return NStr::Compare(s1, s2, NStr::eNocase);
5651 }
5652
5653 template <typename T>
5654 inline
Less(const T & s1,const T & s2) const5655 bool PNocase_Generic<T>::Less(const T& s1, const T& s2) const
5656 {
5657 return Compare(s1, s2) < 0;
5658 }
5659
5660 template <typename T>
5661 inline
Equals(const T & s1,const T & s2) const5662 bool PNocase_Generic<T>::Equals(const T& s1, const T& s2) const
5663 {
5664 return Compare(s1, s2) == 0;
5665 }
5666
5667 template <typename T>
5668 inline
operator ()(const T & s1,const T & s2) const5669 bool PNocase_Generic<T>::operator()(const T& s1, const T& s2) const
5670 {
5671 return Less(s1, s2);
5672 }
5673
5674 ////////////////////////////////////////////////////////////////////////////
5675 // PNocase_Conditional_Generic<T>::
5676 //
5677
5678 template <typename T>
5679 inline
PNocase_Conditional_Generic(NStr::ECase cs)5680 PNocase_Conditional_Generic<T>::PNocase_Conditional_Generic(NStr::ECase cs)
5681 : m_CaseSensitive(cs)
5682 {}
5683
5684 template <typename T>
5685 inline
Compare(const T & s1,const T & s2) const5686 int PNocase_Conditional_Generic<T>::Compare(const T& s1, const T& s2) const
5687 {
5688 return NStr::Compare(s1, s2, m_CaseSensitive);
5689 }
5690
5691 template <typename T>
5692 inline
Less(const T & s1,const T & s2) const5693 bool PNocase_Conditional_Generic<T>::Less(const T& s1, const T& s2) const
5694 {
5695 return Compare(s1, s2) < 0;
5696 }
5697
5698 template <typename T>
5699 inline
Equals(const T & s1,const T & s2) const5700 bool PNocase_Conditional_Generic<T>::Equals(const T& s1, const T& s2) const
5701 {
5702 return Compare(s1, s2) == 0;
5703 }
5704
5705 template <typename T>
5706 inline
operator ()(const T & s1,const T & s2) const5707 bool PNocase_Conditional_Generic<T>::operator()(const T& s1, const T& s2) const
5708 {
5709 return Less(s1, s2);
5710 }
5711
5712
5713 END_NCBI_NAMESPACE;
5714
5715 #endif /* CORELIB___NCBISTR__HPP */
5716