1 #ifndef CORELIB___STRINGUTF8_DEPRECATED__HPP 2 #define CORELIB___STRINGUTF8_DEPRECATED__HPP 3 4 /* $Id: stringutf8_deprecated.hpp 480130 2015-09-28 12:57:50Z ivanov $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Author: Andrei Gourianov 30 * 31 * 32 */ 33 34 #define STRINGUTF8_DEFINITION 1 35 #define STRINGUTF8_OBSOLETE_STATIC 0 36 37 #if STRINGUTF8_DEFINITION 38 ///////////////////////////////////////////////////////////////////////////// 39 /// 40 /// CStringUTF8 -- 41 /// 42 /// An UTF-8 string. 43 /// Stores character data in UTF-8 encoding form. 44 /// Being initialized, converts source characters into UTF-8. 45 /// Can convert data back into a particular encoding form (non-UTF8) 46 /// Supported encodings: 47 /// ISO 8859-1 (Latin1) 48 /// Microsoft Windows code page 1252 49 /// UCS-2, UCS-4 (no surrogates) 50 51 52 // On MSVC2010, we cannot export CStringUTF8 53 // So, all its methods must be inline 54 #if !defined(NCBI_COMPILER_MSVC) 55 # define __EXPORT_CTOR_STRINGUTF8__ 1 56 #endif 57 //# define __EXPORT_IMPL_STRINGUTF8__ 1 58 //# define __EXPORT_CTOR_STRINGUTF8__ 1 59 60 61 #if defined(__EXPORT_IMPL_STRINGUTF8__) || defined(__EXPORT_CTOR_STRINGUTF8__) 62 # define NCBI_STRINGUTF8_EXPORT NCBI_XNCBI_EXPORT 63 #else 64 # define NCBI_STRINGUTF8_EXPORT 65 #endif 66 67 class NCBI_STRINGUTF8_EXPORT CStringUTF8_DEPRECATED : public string 68 { 69 public: 70 71 /// How to verify the character encoding of the source data 72 enum EValidate { 73 eNoValidate, 74 eValidate 75 }; 76 77 /// How to interpret zeros in the source character buffer - 78 /// as end of string, or as part of the data 79 enum ECharBufferType { 80 eZeroTerminated, ///< Character buffer is zero-terminated 81 eCharBuffer ///< Zeros are part of the data 82 }; 83 CStringUTF8_DEPRECATED(void)84 CStringUTF8_DEPRECATED(void) { 85 } 86 ~CStringUTF8_DEPRECATED(void)87 ~CStringUTF8_DEPRECATED(void) { 88 } 89 90 /// Copy constructor. 91 /// 92 /// @param src 93 /// Source UTF-8 string 94 /// @param validate 95 /// Verify that the source character encoding is really UTF-8 96 CStringUTF8_DEPRECATED(const CStringUTF8_DEPRECATED& src, EValidate validate = eNoValidate); 97 98 /// Constructor from a C/C++ string 99 /// 100 /// @param src 101 /// Source string 102 /// @param encoding 103 /// Character encoding of the source string 104 /// @param validate 105 /// Verify the character encoding of the source 106 /// @deprecated Use utility class CUtf8 instead 107 CStringUTF8_DEPRECATED(const CTempString src); 108 CStringUTF8_DEPRECATED(const char* src ); 109 CStringUTF8_DEPRECATED(const string& src); 110 CStringUTF8_DEPRECATED(const CTempString src, 111 EEncoding encoding, 112 EValidate validate = eNoValidate); 113 CStringUTF8_DEPRECATED(const char* src, 114 EEncoding encoding, 115 EValidate validate = eNoValidate); 116 CStringUTF8_DEPRECATED(const string& src, 117 EEncoding encoding, 118 EValidate validate = eNoValidate); 119 120 /// Constructor from Unicode string 121 /// 122 /// @param src 123 /// Source string 124 /// @deprecated Use utility class CUtf8 instead 125 CStringUTF8_DEPRECATED(const TStringUnicode& src); 126 #if NCBITOOLKIT_USE_LONG_UCS4 127 CStringUTF8_DEPRECATED(const TStringUCS4& src); 128 #endif 129 CStringUTF8_DEPRECATED(const TStringUCS2& src); 130 #if defined(HAVE_WSTRING) 131 CStringUTF8_DEPRECATED(const wstring& src); 132 #endif 133 134 /// Constructor from Unicode character sequence 135 /// 136 /// @param src 137 /// Source zero-terminated character buffer 138 /// @deprecated Use utility class CUtf8 instead 139 CStringUTF8_DEPRECATED(const TUnicodeSymbol* src); 140 #if NCBITOOLKIT_USE_LONG_UCS4 141 CStringUTF8_DEPRECATED(const TCharUCS4* src); 142 #endif 143 CStringUTF8_DEPRECATED(const TCharUCS2* src); 144 #if defined(HAVE_WSTRING) 145 CStringUTF8_DEPRECATED(const wchar_t* src); 146 #endif 147 148 /// Constructor from Unicode character sequence 149 /// 150 /// @param type 151 /// How to interpret zeros in the source character buffer - 152 /// as end of string, or as part of the data 153 /// @param src 154 /// Source character buffer 155 /// @param char_count 156 /// Number of TChars in the buffer 157 /// @deprecated Use utility class CUtf8 instead 158 CStringUTF8_DEPRECATED(ECharBufferType type, 159 const TUnicodeSymbol* src, SIZE_TYPE char_count); 160 #if NCBITOOLKIT_USE_LONG_UCS4 161 CStringUTF8_DEPRECATED(ECharBufferType type, 162 const TCharUCS4* src, SIZE_TYPE char_count); 163 #endif 164 CStringUTF8_DEPRECATED(ECharBufferType type, 165 const TCharUCS2* src, SIZE_TYPE char_count); 166 #if defined(HAVE_WSTRING) 167 CStringUTF8_DEPRECATED(ECharBufferType type, 168 const wchar_t* src, SIZE_TYPE char_count); 169 #endif 170 171 /// Assign UTF8 string 172 CStringUTF8_DEPRECATED& operator= (const CStringUTF8_DEPRECATED& src); 173 174 /// Assign Unicode C++ string 175 /// 176 /// @param src 177 /// Source string 178 /// @deprecated Use utility class CUtf8 instead 179 CStringUTF8_DEPRECATED& operator= (const TStringUnicode& src); 180 #if NCBITOOLKIT_USE_LONG_UCS4 181 CStringUTF8_DEPRECATED& operator= (const TStringUCS4& src); 182 #endif 183 CStringUTF8_DEPRECATED& operator= (const TStringUCS2& src); 184 #if defined(HAVE_WSTRING) 185 CStringUTF8_DEPRECATED& operator= (const wstring& src); 186 #endif 187 188 /// Assign Unicode C string 189 /// 190 /// @param src 191 /// Source zero-terminated character buffer 192 /// @deprecated Use utility class CUtf8 instead 193 CStringUTF8_DEPRECATED& operator= (const TUnicodeSymbol* src); 194 #if NCBITOOLKIT_USE_LONG_UCS4 195 CStringUTF8_DEPRECATED& operator= (const TCharUCS4* src); 196 #endif 197 CStringUTF8_DEPRECATED& operator= (const TCharUCS2* src); 198 #if defined(HAVE_WSTRING) 199 CStringUTF8_DEPRECATED& operator= (const wchar_t* src); 200 #endif 201 202 /// Append UTF8 string 203 CStringUTF8_DEPRECATED& operator+= (const CStringUTF8_DEPRECATED& src); 204 205 /// Append Unicode C++ string 206 /// 207 /// @param src 208 /// Source string 209 /// @deprecated Use utility class CUtf8 instead 210 CStringUTF8_DEPRECATED& operator+= (const TStringUnicode& src); 211 #if NCBITOOLKIT_USE_LONG_UCS4 212 CStringUTF8_DEPRECATED& operator+= (const TStringUCS4& src); 213 #endif 214 CStringUTF8_DEPRECATED& operator+= (const TStringUCS2& src); 215 #if defined(HAVE_WSTRING) 216 CStringUTF8_DEPRECATED& operator+= (const wstring& src); 217 #endif 218 219 /// Append Unicode C string 220 /// 221 /// @param src 222 /// Source zero-terminated character buffer 223 /// @deprecated Use utility class CUtf8 instead 224 CStringUTF8_DEPRECATED& operator+= (const TUnicodeSymbol* src); 225 #if NCBITOOLKIT_USE_LONG_UCS4 226 CStringUTF8_DEPRECATED& operator+= (const TCharUCS4* src); 227 #endif 228 CStringUTF8_DEPRECATED& operator+= (const TCharUCS2* src); 229 #if defined(HAVE_WSTRING) 230 CStringUTF8_DEPRECATED& operator+= (const wchar_t* src); 231 #endif 232 233 /// Assign C/C++ string 234 /// 235 /// @param src 236 /// Source string 237 /// @param encoding 238 /// Character encoding of the source string 239 /// @param validate 240 /// Verify the character encoding of the source 241 /// @deprecated Use utility class CUtf8 instead 242 CStringUTF8_DEPRECATED& Assign(const CTempString src, 243 EEncoding encoding, 244 EValidate validate = eNoValidate); 245 246 /// Assign Unicode C++ string 247 /// 248 /// @param src 249 /// Source string 250 /// @deprecated Use utility class CUtf8 instead 251 CStringUTF8_DEPRECATED& Assign(const TStringUnicode& src); 252 #if NCBITOOLKIT_USE_LONG_UCS4 253 CStringUTF8_DEPRECATED& Assign(const TStringUCS4& src); 254 #endif 255 CStringUTF8_DEPRECATED& Assign(const TStringUCS2& src); 256 #if defined(HAVE_WSTRING) 257 CStringUTF8_DEPRECATED& Assign(const wstring& src); 258 #endif 259 260 /// Assign Unicode C string 261 /// 262 /// @param src 263 /// Source zero-terminated character buffer 264 /// @deprecated Use utility class CUtf8 instead 265 CStringUTF8_DEPRECATED& Assign(const TUnicodeSymbol* src); 266 #if NCBITOOLKIT_USE_LONG_UCS4 267 CStringUTF8_DEPRECATED& Assign(const TCharUCS4* src); 268 #endif 269 CStringUTF8_DEPRECATED& Assign(const TCharUCS2* src); 270 #if defined(HAVE_WSTRING) 271 CStringUTF8_DEPRECATED& Assign(const wchar_t* src); 272 #endif 273 274 /// Assign Unicode C string or character buffer 275 /// 276 /// @param type 277 /// How to interpret zeros in the source character buffer - 278 /// as end of string, or as part of the data 279 /// @param src 280 /// Source character buffer 281 /// @param char_count 282 /// Number of TChars in the buffer 283 /// @deprecated Use utility class CUtf8 instead 284 CStringUTF8_DEPRECATED& Assign(ECharBufferType type, 285 const TUnicodeSymbol* src, SIZE_TYPE char_count); 286 #if NCBITOOLKIT_USE_LONG_UCS4 287 CStringUTF8_DEPRECATED& Assign(ECharBufferType type, 288 const TCharUCS4* src, SIZE_TYPE char_count); 289 #endif 290 CStringUTF8_DEPRECATED& Assign(ECharBufferType type, 291 const TCharUCS2* src, SIZE_TYPE char_count); 292 #if defined(HAVE_WSTRING) 293 CStringUTF8_DEPRECATED& Assign(ECharBufferType type, 294 const wchar_t* src, SIZE_TYPE char_count); 295 #endif 296 297 /// Assign a single character 298 /// 299 /// @param ch 300 /// Character 301 /// @param encoding 302 /// Character encoding 303 /// @deprecated Use utility class CUtf8 instead 304 CStringUTF8_DEPRECATED& Assign(char ch, EEncoding encoding); 305 306 /// Append a C/C++ string 307 /// 308 /// @param src 309 /// Source string 310 /// @param encoding 311 /// Character encoding of the source string 312 /// @param validate 313 /// Verify the character encoding of the source 314 /// @deprecated Use utility class CUtf8 instead 315 CStringUTF8_DEPRECATED& Append(const CTempString src, 316 EEncoding encoding, 317 EValidate validate = eNoValidate); 318 319 /// Append Unicode C++ string 320 /// 321 /// @param src 322 /// Source string 323 /// @deprecated Use utility class CUtf8 instead 324 CStringUTF8_DEPRECATED& Append(const TStringUnicode& src); 325 #if NCBITOOLKIT_USE_LONG_UCS4 326 CStringUTF8_DEPRECATED& Append(const TStringUCS4& src); 327 #endif 328 CStringUTF8_DEPRECATED& Append(const TStringUCS2& src); 329 #if defined(HAVE_WSTRING) 330 CStringUTF8_DEPRECATED& Append(const wstring& src); 331 #endif 332 333 /// Append Unicode C string 334 /// 335 /// @param src 336 /// Source zero-terminated character buffer 337 /// @deprecated Use utility class CUtf8 instead 338 CStringUTF8_DEPRECATED& Append(const TUnicodeSymbol* src); 339 #if NCBITOOLKIT_USE_LONG_UCS4 340 CStringUTF8_DEPRECATED& Append(const TCharUCS4* src); 341 #endif 342 CStringUTF8_DEPRECATED& Append(const TCharUCS2* src); 343 #if defined(HAVE_WSTRING) 344 CStringUTF8_DEPRECATED& Append(const wchar_t* src); 345 #endif 346 347 /// Append Unicode C string or character buffer 348 /// 349 /// @param type 350 /// How to interpret zeros in the source character buffer - 351 /// as end of string, or as part of the data 352 /// @param src 353 /// Source character buffer 354 /// @param char_count 355 /// Number of TChars in the buffer 356 /// @deprecated Use utility class CUtf8 instead 357 CStringUTF8_DEPRECATED& Append(ECharBufferType type, 358 const TUnicodeSymbol* src, SIZE_TYPE char_count); 359 #if NCBITOOLKIT_USE_LONG_UCS4 360 CStringUTF8_DEPRECATED& Append(ECharBufferType type, 361 const TCharUCS4* src, SIZE_TYPE char_count); 362 #endif 363 CStringUTF8_DEPRECATED& Append(ECharBufferType type, 364 const TCharUCS2* src, SIZE_TYPE char_count); 365 #if defined(HAVE_WSTRING) 366 CStringUTF8_DEPRECATED& Append(ECharBufferType type, 367 const wchar_t* src, SIZE_TYPE char_count); 368 #endif 369 370 /// Append single character 371 /// 372 /// @param ch 373 /// Character 374 /// @param encoding 375 /// Character encoding 376 /// @deprecated Use utility class CUtf8 instead 377 CStringUTF8_DEPRECATED& Append(char ch, EEncoding encoding); 378 379 /// Append single Unicode code point 380 /// 381 /// @param ch 382 /// Unicode code point 383 /// @deprecated Use utility class CUtf8 instead 384 CStringUTF8_DEPRECATED& Append(TUnicodeSymbol ch); 385 386 /// Get the number of symbols (code points) in the string 387 /// 388 /// @return 389 /// Number of symbols (code points) 390 /// @deprecated Use utility class CUtf8 instead 391 SIZE_TYPE GetSymbolCount(void) const; 392 393 /// Get the number of symbols (code points) in the string 394 /// 395 /// @return 396 /// Number of symbols (code points) 397 /// @deprecated Use utility class CUtf8 instead 398 #if STRINGUTF8_OBSOLETE_STATIC 399 static SIZE_TYPE GetSymbolCount(const CTempString src); 400 #endif 401 402 /// Get the number of valid UTF-8 symbols (code points) in the buffer 403 /// 404 /// @param src 405 /// Character buffer 406 /// @param buf_size 407 /// The number of bytes in the buffer 408 /// @return 409 /// Number of valid symbols (no exception thrown) 410 /// @deprecated Use utility class CUtf8 instead 411 #if STRINGUTF8_OBSOLETE_STATIC 412 static SIZE_TYPE GetValidSymbolCount(const char* src, SIZE_TYPE buf_size); 413 #endif 414 415 /// Get the number of valid UTF-8 symbols (code points) in the char buffer 416 /// 417 /// @param src 418 /// Zero-terminated character buffer, or string 419 /// @return 420 /// Number of valid symbols (no exception thrown) 421 /// @deprecated Use utility class CUtf8 instead 422 #if STRINGUTF8_OBSOLETE_STATIC 423 static SIZE_TYPE GetValidSymbolCount(const CTempString src); 424 #endif 425 426 /// Get the number of valid UTF-8 bytes (code units) in the buffer 427 /// 428 /// @param src 429 /// Character buffer 430 /// @param buf_size 431 /// The number of bytes in the buffer 432 /// @return 433 /// Number of valid bytes (no exception thrown) 434 /// @deprecated Use utility class CUtf8 instead 435 #if STRINGUTF8_OBSOLETE_STATIC 436 static SIZE_TYPE GetValidBytesCount(const char* src, SIZE_TYPE buf_size); 437 #endif 438 439 /// Get the number of valid UTF-8 bytes (code units) in the char buffer 440 /// 441 /// @param src 442 /// Zero-terminated character buffer, or string 443 /// @return 444 /// Number of valid bytes (no exception thrown) 445 /// @deprecated Use utility class CUtf8 instead 446 #if STRINGUTF8_OBSOLETE_STATIC 447 static SIZE_TYPE GetValidBytesCount(const CTempString src); 448 #endif 449 450 /// Check that the character encoding of the string is valid UTF-8 451 /// 452 /// @return 453 /// Result of the check 454 /// @deprecated Use utility class CUtf8 instead 455 bool IsValid(void) const; 456 457 /// Convert to ISO 8859-1 (Latin1) character representation 458 /// 459 /// Can throw a CStringException if the conversion is impossible 460 /// or the string has invalid UTF-8 encoding. 461 /// @param substitute_on_error 462 /// If the conversion is impossible, append the provided string 463 /// or, if substitute_on_error equals 0, throw the exception 464 /// @deprecated Use utility class CUtf8 instead 465 string AsLatin1(const char* substitute_on_error = 0) const; 466 467 /// Convert the string to a single-byte character representation 468 /// 469 /// Can throw a CStringException if the conversion is impossible 470 /// or the string has invalid UTF-8 encoding. 471 /// @param encoding 472 /// Desired encoding 473 /// @param substitute_on_error 474 /// If the conversion is impossible, append the provided string 475 /// or, if substitute_on_error equals 0, throw the exception 476 /// @return 477 /// C++ string 478 /// @deprecated Use utility class CUtf8 instead 479 string AsSingleByteString(EEncoding encoding, 480 const char* substitute_on_error = 0) const; 481 482 #if defined(HAVE_WSTRING) 483 /// Convert to Unicode (UCS-2 with no surrogates where 484 /// sizeof(wchar_t) == 2 and UCS-4 where sizeof(wchar_t) == 4). 485 /// 486 /// Can throw a CStringException if the conversion is impossible 487 /// or the string has invalid UTF-8 encoding. 488 /// Defined only if wstring is supported by the compiler. 489 /// 490 /// @param substitute_on_error 491 /// If the conversion is impossible, append the provided string 492 /// or, if substitute_on_error equals 0, throw the exception 493 /// @deprecated Use utility class CUtf8 instead 494 wstring AsUnicode(const wchar_t* substitute_on_error = 0) const; 495 #endif // HAVE_WSTRING 496 497 /// Convert to UCS-2 for all platforms 498 /// 499 /// Can throw a CStringException if the conversion is impossible 500 /// or the string has invalid UTF-8 encoding. 501 /// 502 /// @param substitute_on_error 503 /// If the conversion is impossible, append the provided string 504 /// or, if substitute_on_error equals 0, throw the exception 505 /// @deprecated Use utility class CUtf8 instead 506 TStringUCS2 AsUCS2(const TCharUCS2* substitute_on_error = 0) const; 507 508 /// Conversion to Unicode string with any base type we need 509 /// @deprecated Use utility class CUtf8 instead 510 template <typename TChar> 511 basic_string<TChar> AsBasicString(const TChar* substitute_on_error = 0) 512 const; 513 514 /// Conversion to Unicode string with any base type we need 515 /// @deprecated Use utility class CUtf8 instead 516 template <typename TChar> 517 static 518 basic_string<TChar> AsBasicString( 519 const CTempString src, 520 const TChar* substitute_on_error, 521 EValidate validate = eNoValidate); 522 523 /// Conversion to Unicode string with any base type we need 524 /// @deprecated Use utility class CUtf8 instead 525 template <typename TChar> 526 static basic_string<TChar> AsBasicString(const CTempString src); 527 528 /// Guess the encoding of the C/C++ string 529 /// 530 /// It can distinguish between UTF-8, Latin1, and Win1252 only 531 /// @param src 532 /// Source zero-terminated character buffer 533 /// @return 534 /// Encoding 535 /// @deprecated Use utility class CUtf8 instead 536 #if STRINGUTF8_OBSOLETE_STATIC 537 static EEncoding GuessEncoding(const CTempString src); 538 #endif 539 /// Check the encoding of the C/C++ string 540 /// 541 /// Check that the encoding of the source is the same, or 542 /// is compatible with the specified one 543 /// @param src 544 /// Source string 545 /// @param encoding 546 /// Character encoding form to check against 547 /// @return 548 /// Boolean result: encoding is same or compatible 549 /// @deprecated Use utility class CUtf8 instead 550 #if STRINGUTF8_OBSOLETE_STATIC 551 static bool MatchEncoding(const CTempString src, EEncoding encoding); 552 #endif 553 554 /// Give Encoding name as string 555 /// 556 /// NOTE: 557 /// Function throws CStringException on attempt to get name of eEncoding_Unknown 558 /// 559 /// @param encoding 560 /// EEncoding enum 561 /// @return 562 /// Encoding name 563 /// @deprecated Use utility class CUtf8 instead 564 #if STRINGUTF8_OBSOLETE_STATIC 565 static string EncodingToString(EEncoding encoding); 566 #endif 567 568 /// Convert encoding name into EEncoding enum, taking into account synonyms 569 /// as per http://www.iana.org/assignments/character-sets 570 /// 571 /// NOTE: 572 /// Function returns eEncoding_Unknown for unsupported encodings 573 /// 574 /// @param str 575 /// Encoding name 576 /// @return 577 /// EEncoding enum 578 /// @deprecated Use utility class CUtf8 instead 579 #if STRINGUTF8_OBSOLETE_STATIC 580 static EEncoding StringToEncoding(const CTempString str); 581 #endif 582 583 /// Convert encoded character into UTF16 584 /// 585 /// @param ch 586 /// Encoded character 587 /// @param encoding 588 /// Character encoding 589 /// @return 590 /// Code point 591 /// @deprecated Use utility class CUtf8 instead 592 #if STRINGUTF8_OBSOLETE_STATIC 593 static TUnicodeSymbol CharToSymbol(char ch, EEncoding encoding); 594 #endif 595 596 /// Convert Unicode code point into encoded character 597 /// 598 /// @param sym 599 /// Code point 600 /// @param encoding 601 /// Character encoding 602 /// @return 603 /// Encoded character 604 /// @deprecated Use utility class CUtf8 instead 605 #if STRINGUTF8_OBSOLETE_STATIC 606 static char SymbolToChar(TUnicodeSymbol sym, EEncoding encoding); 607 #endif 608 609 /// Convert sequence of UTF8 code units into Unicode code point 610 /// 611 /// @param src 612 /// UTF8 zero-terminated buffer 613 /// @return 614 /// Unicode code point 615 /// @deprecated Use utility class CUtf8 instead 616 #if STRINGUTF8_OBSOLETE_STATIC 617 static TUnicodeSymbol Decode(const char*& src); 618 #ifndef NCBI_COMPILER_WORKSHOP 619 /// @deprecated Use utility class CUtf8 instead 620 static TUnicodeSymbol Decode(string::const_iterator& src); 621 #endif 622 #endif 623 624 /// Determines if a symbol is whitespace 625 /// per http://unicode.org/charts/uca/chart_Whitespace.html 626 /// 627 /// @param chU 628 /// Unicode code point 629 /// @sa 630 /// TruncateSpacesInPlace, TruncateSpaces_Unsafe, TruncateSpaces 631 /// @deprecated Use utility class CUtf8 instead 632 #if STRINGUTF8_OBSOLETE_STATIC 633 static bool IsWhiteSpace(TUnicodeSymbol chU); 634 #endif 635 636 /// Truncate spaces in the string (in-place) 637 /// 638 /// @param side 639 /// Which end of the string to truncate spaces from. Default is to 640 /// truncate spaces from both ends (eTrunc_Both). 641 /// @return 642 /// Reference to itself 643 /// @sa 644 /// IsWhiteSpace, TruncateSpaces_Unsafe, TruncateSpaces 645 /// @deprecated Use utility class CUtf8 instead 646 CStringUTF8_DEPRECATED& TruncateSpacesInPlace(NStr::ETrunc side = NStr::eTrunc_Both); 647 648 /// Truncate spaces in the string 649 /// 650 /// @param str 651 /// source string, in UTF8 encoding 652 /// @param side 653 /// Which end of the string to truncate spaces from. Default is to 654 /// truncate spaces from both ends (eTrunc_Both). 655 /// @attention 656 /// The lifespan of the result string is the same as one of the source. 657 /// So, for example, if the source is temporary string, the result 658 /// will be invalid right away (will point to already released memory). 659 /// @sa 660 /// IsWhiteSpace, TruncateSpacesInPlace, TruncateSpaces, CTempString 661 /// @deprecated Use utility class CUtf8 instead 662 #if STRINGUTF8_OBSOLETE_STATIC 663 static CTempString TruncateSpaces_Unsafe 664 (const CTempString str, NStr::ETrunc side = NStr::eTrunc_Both); 665 #endif 666 667 /// Truncate spaces in the string 668 /// 669 /// @param str 670 /// source string, in UTF8 encoding 671 /// @param side 672 /// Which end of the string to truncate spaces from. Default is to 673 /// truncate spaces from both ends (eTrunc_Both). 674 /// @sa 675 /// IsWhiteSpace, TruncateSpacesInPlace, TruncateSpaces_Unsafe 676 /// @deprecated Use utility class CUtf8 instead 677 #if STRINGUTF8_OBSOLETE_STATIC 678 static CStringUTF8_DEPRECATED TruncateSpaces(const CTempString str, 679 NStr::ETrunc side = NStr::eTrunc_Both); 680 #endif 681 682 /// Convert first character of UTF8 sequence into Unicode 683 /// 684 /// @param ch 685 /// character 686 /// @param more 687 /// if the character is valid, - how many more characters to expect 688 /// @return 689 /// non-zero, if the character is valid 690 /// @deprecated Use utility class CUtf8 instead 691 #if STRINGUTF8_OBSOLETE_STATIC 692 static TUnicodeSymbol DecodeFirst(char ch, SIZE_TYPE& more); 693 #endif 694 695 /// Convert next character of UTF8 sequence into Unicode 696 /// 697 /// @param ch 698 /// character 699 /// @param chU 700 /// Unicode code point 701 /// @return 702 /// non-zero, if the character is valid 703 /// @deprecated Use utility class CUtf8 instead 704 #if STRINGUTF8_OBSOLETE_STATIC 705 static TUnicodeSymbol DecodeNext(TUnicodeSymbol chU, char ch); 706 #endif 707 708 private: 709 710 void x_Validate(void) const; 711 712 /// Convert Unicode code point into UTF8 and append 713 void x_AppendChar(TUnicodeSymbol ch); 714 /// Convert coded character sequence into UTF8 and append 715 void x_Append(const CTempString src, 716 EEncoding encoding, 717 EValidate validate = eNoValidate); 718 719 /// Convert Unicode character sequence into UTF8 and append 720 /// Sequence can be in UCS-4 (TChar == (U)Int4), UCS-2 (TChar == (U)Int2) 721 /// or in ISO8859-1 (TChar == char) 722 template <typename TIterator> 723 void x_Append(TIterator from, TIterator to); 724 725 template <typename TChar> 726 void x_Append(const TChar* src, SIZE_TYPE to = NPOS, 727 ECharBufferType type = eZeroTerminated); 728 729 template <typename TChar> static 730 basic_string<TChar> x_AsBasicString 731 (const CTempString src, 732 const TChar* substitute_on_error, EValidate validate); 733 734 template <typename TIterator> static 735 TUnicodeSymbol x_Decode(TIterator& src); 736 737 /// Check how many bytes is needed to represent the code point in UTF8 738 static SIZE_TYPE x_BytesNeeded(TUnicodeSymbol ch); 739 /// Check if the character is valid first code unit of UTF8 740 static bool x_EvalFirst(char ch, SIZE_TYPE& more); 741 /// Check if the character is valid non-first code unit of UTF8 742 static bool x_EvalNext(char ch); 743 744 // Template class for better error messages 745 // from unimplemented template methods 746 template<class Type> class CNotImplemented {}; 747 }; 748 #endif //STRINGUTF8_DEFINITION 749 750 #endif /* CORELIB___STRINGUTF8_DEPRECATED__HPP */ 751