1 #ifndef CORELIB___STRINGUTF8_DEPRECATED__HPP
2 #define CORELIB___STRINGUTF8_DEPRECATED__HPP
3 
4 /*  $Id: stringutf8_deprecated.hpp 480130 2015-09-28 12:57:50Z ivanov $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author:  Andrei Gourianov
30  *
31  *
32  */
33 
34 #define  STRINGUTF8_DEFINITION      1
35 #define  STRINGUTF8_OBSOLETE_STATIC 0
36 
37 #if STRINGUTF8_DEFINITION
38 /////////////////////////////////////////////////////////////////////////////
39 ///
40 /// CStringUTF8 --
41 ///
42 ///   An UTF-8 string.
43 ///   Stores character data in UTF-8 encoding form.
44 ///   Being initialized, converts source characters into UTF-8.
45 ///   Can convert data back into a particular encoding form (non-UTF8)
46 ///   Supported encodings:
47 ///      ISO 8859-1 (Latin1)
48 ///      Microsoft Windows code page 1252
49 ///      UCS-2, UCS-4 (no surrogates)
50 
51 
52 // On MSVC2010, we cannot export CStringUTF8
53 // So, all its methods must be inline
54 #if !defined(NCBI_COMPILER_MSVC)
55 #  define __EXPORT_CTOR_STRINGUTF8__ 1
56 #endif
57 //#  define __EXPORT_IMPL_STRINGUTF8__ 1
58 //#  define __EXPORT_CTOR_STRINGUTF8__ 1
59 
60 
61 #if defined(__EXPORT_IMPL_STRINGUTF8__) || defined(__EXPORT_CTOR_STRINGUTF8__)
62 #  define NCBI_STRINGUTF8_EXPORT NCBI_XNCBI_EXPORT
63 #else
64 #  define NCBI_STRINGUTF8_EXPORT
65 #endif
66 
67 class NCBI_STRINGUTF8_EXPORT CStringUTF8_DEPRECATED : public string
68 {
69 public:
70 
71     /// How to verify the character encoding of the source data
72     enum EValidate {
73         eNoValidate,
74         eValidate
75     };
76 
77     /// How to interpret zeros in the source character buffer -
78     /// as end of string, or as part of the data
79     enum ECharBufferType {
80         eZeroTerminated, ///< Character buffer is zero-terminated
81         eCharBuffer      ///< Zeros are part of the data
82     };
83 
CStringUTF8_DEPRECATED(void)84     CStringUTF8_DEPRECATED(void) {
85     }
86 
~CStringUTF8_DEPRECATED(void)87     ~CStringUTF8_DEPRECATED(void) {
88     }
89 
90     /// Copy constructor.
91     ///
92     /// @param src
93     ///   Source UTF-8 string
94     /// @param validate
95     ///   Verify that the source character encoding is really UTF-8
96     CStringUTF8_DEPRECATED(const CStringUTF8_DEPRECATED& src, EValidate validate = eNoValidate);
97 
98     /// Constructor from a C/C++ string
99     ///
100     /// @param src
101     ///   Source string
102     /// @param encoding
103     ///   Character encoding of the source string
104     /// @param validate
105     ///   Verify the character encoding of the source
106     /// @deprecated  Use utility class CUtf8 instead
107     CStringUTF8_DEPRECATED(const CTempString src);
108     CStringUTF8_DEPRECATED(const char* src );
109     CStringUTF8_DEPRECATED(const string& src);
110     CStringUTF8_DEPRECATED(const CTempString src,
111                   EEncoding encoding,
112                   EValidate validate = eNoValidate);
113     CStringUTF8_DEPRECATED(const char* src,
114                 EEncoding encoding,
115                 EValidate validate = eNoValidate);
116     CStringUTF8_DEPRECATED(const string& src,
117                 EEncoding encoding,
118                 EValidate validate = eNoValidate);
119 
120     /// Constructor from Unicode string
121     ///
122     /// @param src
123     ///   Source string
124     /// @deprecated  Use utility class CUtf8 instead
125     CStringUTF8_DEPRECATED(const TStringUnicode& src);
126 #if NCBITOOLKIT_USE_LONG_UCS4
127     CStringUTF8_DEPRECATED(const TStringUCS4&    src);
128 #endif
129     CStringUTF8_DEPRECATED(const TStringUCS2&    src);
130 #if defined(HAVE_WSTRING)
131     CStringUTF8_DEPRECATED(const wstring&        src);
132 #endif
133 
134     /// Constructor from Unicode character sequence
135     ///
136     /// @param src
137     ///   Source zero-terminated character buffer
138     /// @deprecated  Use utility class CUtf8 instead
139     CStringUTF8_DEPRECATED(const TUnicodeSymbol* src);
140 #if NCBITOOLKIT_USE_LONG_UCS4
141     CStringUTF8_DEPRECATED(const TCharUCS4*      src);
142 #endif
143     CStringUTF8_DEPRECATED(const TCharUCS2*      src);
144 #if defined(HAVE_WSTRING)
145     CStringUTF8_DEPRECATED(const wchar_t*        src);
146 #endif
147 
148     /// Constructor from Unicode character sequence
149     ///
150     /// @param type
151     ///   How to interpret zeros in the source character buffer -
152     ///   as end of string, or as part of the data
153     /// @param src
154     ///   Source character buffer
155     /// @param char_count
156     ///   Number of TChars in the buffer
157     /// @deprecated  Use utility class CUtf8 instead
158     CStringUTF8_DEPRECATED(ECharBufferType type,
159                 const TUnicodeSymbol* src, SIZE_TYPE char_count);
160 #if NCBITOOLKIT_USE_LONG_UCS4
161     CStringUTF8_DEPRECATED(ECharBufferType type,
162                 const TCharUCS4*      src, SIZE_TYPE char_count);
163 #endif
164     CStringUTF8_DEPRECATED(ECharBufferType type,
165                 const TCharUCS2*      src, SIZE_TYPE char_count);
166 #if defined(HAVE_WSTRING)
167     CStringUTF8_DEPRECATED(ECharBufferType type,
168                 const wchar_t*        src, SIZE_TYPE char_count);
169 #endif
170 
171     /// Assign UTF8 string
172     CStringUTF8_DEPRECATED& operator= (const CStringUTF8_DEPRECATED&  src);
173 
174     /// Assign Unicode C++ string
175     ///
176     /// @param src
177     ///   Source string
178     /// @deprecated  Use utility class CUtf8 instead
179     CStringUTF8_DEPRECATED& operator= (const TStringUnicode& src);
180 #if NCBITOOLKIT_USE_LONG_UCS4
181     CStringUTF8_DEPRECATED& operator= (const TStringUCS4&    src);
182 #endif
183     CStringUTF8_DEPRECATED& operator= (const TStringUCS2&    src);
184 #if defined(HAVE_WSTRING)
185     CStringUTF8_DEPRECATED& operator= (const wstring&        src);
186 #endif
187 
188     /// Assign Unicode C string
189     ///
190     /// @param src
191     ///   Source zero-terminated character buffer
192     /// @deprecated  Use utility class CUtf8 instead
193     CStringUTF8_DEPRECATED& operator= (const TUnicodeSymbol* src);
194 #if NCBITOOLKIT_USE_LONG_UCS4
195     CStringUTF8_DEPRECATED& operator= (const TCharUCS4*      src);
196 #endif
197     CStringUTF8_DEPRECATED& operator= (const TCharUCS2*      src);
198 #if defined(HAVE_WSTRING)
199     CStringUTF8_DEPRECATED& operator= (const wchar_t*        src);
200 #endif
201 
202     /// Append UTF8 string
203     CStringUTF8_DEPRECATED& operator+= (const CStringUTF8_DEPRECATED& src);
204 
205     /// Append Unicode C++ string
206     ///
207     /// @param src
208     ///   Source string
209     /// @deprecated  Use utility class CUtf8 instead
210     CStringUTF8_DEPRECATED& operator+= (const TStringUnicode& src);
211 #if NCBITOOLKIT_USE_LONG_UCS4
212     CStringUTF8_DEPRECATED& operator+= (const TStringUCS4&    src);
213 #endif
214     CStringUTF8_DEPRECATED& operator+= (const TStringUCS2&    src);
215 #if defined(HAVE_WSTRING)
216     CStringUTF8_DEPRECATED& operator+= (const wstring&        src);
217 #endif
218 
219     /// Append Unicode C string
220     ///
221     /// @param src
222     ///   Source zero-terminated character buffer
223     /// @deprecated  Use utility class CUtf8 instead
224     CStringUTF8_DEPRECATED& operator+= (const TUnicodeSymbol* src);
225 #if NCBITOOLKIT_USE_LONG_UCS4
226     CStringUTF8_DEPRECATED& operator+= (const TCharUCS4*      src);
227 #endif
228     CStringUTF8_DEPRECATED& operator+= (const TCharUCS2*      src);
229 #if defined(HAVE_WSTRING)
230     CStringUTF8_DEPRECATED& operator+= (const wchar_t*        src);
231 #endif
232 
233     /// Assign C/C++ string
234     ///
235     /// @param src
236     ///   Source string
237     /// @param encoding
238     ///   Character encoding of the source string
239     /// @param validate
240     ///   Verify the character encoding of the source
241     /// @deprecated  Use utility class CUtf8 instead
242     CStringUTF8_DEPRECATED& Assign(const CTempString src,
243                         EEncoding        encoding,
244                         EValidate        validate = eNoValidate);
245 
246     /// Assign Unicode C++ string
247     ///
248     /// @param src
249     ///   Source string
250     /// @deprecated  Use utility class CUtf8 instead
251     CStringUTF8_DEPRECATED& Assign(const TStringUnicode& src);
252 #if NCBITOOLKIT_USE_LONG_UCS4
253     CStringUTF8_DEPRECATED& Assign(const TStringUCS4&    src);
254 #endif
255     CStringUTF8_DEPRECATED& Assign(const TStringUCS2&    src);
256 #if defined(HAVE_WSTRING)
257     CStringUTF8_DEPRECATED& Assign(const wstring&        src);
258 #endif
259 
260     /// Assign Unicode C string
261     ///
262     /// @param src
263     ///   Source zero-terminated character buffer
264     /// @deprecated  Use utility class CUtf8 instead
265     CStringUTF8_DEPRECATED& Assign(const TUnicodeSymbol* src);
266 #if NCBITOOLKIT_USE_LONG_UCS4
267     CStringUTF8_DEPRECATED& Assign(const TCharUCS4*      src);
268 #endif
269     CStringUTF8_DEPRECATED& Assign(const TCharUCS2*      src);
270 #if defined(HAVE_WSTRING)
271     CStringUTF8_DEPRECATED& Assign(const wchar_t*        src);
272 #endif
273 
274     /// Assign Unicode C string or character buffer
275     ///
276     /// @param type
277     ///   How to interpret zeros in the source character buffer -
278     ///   as end of string, or as part of the data
279     /// @param src
280     ///   Source character buffer
281     /// @param char_count
282     ///   Number of TChars in the buffer
283     /// @deprecated  Use utility class CUtf8 instead
284     CStringUTF8_DEPRECATED& Assign(ECharBufferType type,
285                         const TUnicodeSymbol* src, SIZE_TYPE char_count);
286 #if NCBITOOLKIT_USE_LONG_UCS4
287     CStringUTF8_DEPRECATED& Assign(ECharBufferType type,
288                         const TCharUCS4*      src, SIZE_TYPE char_count);
289 #endif
290     CStringUTF8_DEPRECATED& Assign(ECharBufferType type,
291                         const TCharUCS2*      src, SIZE_TYPE char_count);
292 #if defined(HAVE_WSTRING)
293     CStringUTF8_DEPRECATED& Assign(ECharBufferType type,
294                         const wchar_t*        src, SIZE_TYPE char_count);
295 #endif
296 
297     /// Assign a single character
298     ///
299     /// @param ch
300     ///   Character
301     /// @param encoding
302     ///   Character encoding
303     /// @deprecated  Use utility class CUtf8 instead
304     CStringUTF8_DEPRECATED& Assign(char ch, EEncoding encoding);
305 
306     /// Append a C/C++ string
307     ///
308     /// @param src
309     ///   Source string
310     /// @param encoding
311     ///   Character encoding of the source string
312     /// @param validate
313     ///   Verify the character encoding of the source
314     /// @deprecated  Use utility class CUtf8 instead
315     CStringUTF8_DEPRECATED& Append(const CTempString src,
316                         EEncoding encoding,
317                         EValidate validate = eNoValidate);
318 
319     /// Append Unicode C++ string
320     ///
321     /// @param src
322     ///   Source string
323     /// @deprecated  Use utility class CUtf8 instead
324     CStringUTF8_DEPRECATED& Append(const TStringUnicode& src);
325 #if NCBITOOLKIT_USE_LONG_UCS4
326     CStringUTF8_DEPRECATED& Append(const TStringUCS4&    src);
327 #endif
328     CStringUTF8_DEPRECATED& Append(const TStringUCS2&    src);
329 #if defined(HAVE_WSTRING)
330     CStringUTF8_DEPRECATED& Append(const wstring&        src);
331 #endif
332 
333     /// Append Unicode C string
334     ///
335     /// @param src
336     ///   Source zero-terminated character buffer
337     /// @deprecated  Use utility class CUtf8 instead
338     CStringUTF8_DEPRECATED& Append(const TUnicodeSymbol* src);
339 #if NCBITOOLKIT_USE_LONG_UCS4
340     CStringUTF8_DEPRECATED& Append(const TCharUCS4*      src);
341 #endif
342     CStringUTF8_DEPRECATED& Append(const TCharUCS2*      src);
343 #if defined(HAVE_WSTRING)
344     CStringUTF8_DEPRECATED& Append(const wchar_t*        src);
345 #endif
346 
347     /// Append Unicode C string or character buffer
348     ///
349     /// @param type
350     ///   How to interpret zeros in the source character buffer -
351     ///   as end of string, or as part of the data
352     /// @param src
353     ///   Source character buffer
354     /// @param char_count
355     ///   Number of TChars in the buffer
356     /// @deprecated  Use utility class CUtf8 instead
357     CStringUTF8_DEPRECATED& Append(ECharBufferType type,
358                         const TUnicodeSymbol* src, SIZE_TYPE char_count);
359 #if NCBITOOLKIT_USE_LONG_UCS4
360     CStringUTF8_DEPRECATED& Append(ECharBufferType type,
361                         const TCharUCS4*      src, SIZE_TYPE char_count);
362 #endif
363     CStringUTF8_DEPRECATED& Append(ECharBufferType type,
364                         const TCharUCS2*      src, SIZE_TYPE char_count);
365 #if defined(HAVE_WSTRING)
366     CStringUTF8_DEPRECATED& Append(ECharBufferType type,
367                         const wchar_t*        src, SIZE_TYPE char_count);
368 #endif
369 
370     /// Append single character
371     ///
372     /// @param ch
373     ///   Character
374     /// @param encoding
375     ///   Character encoding
376     /// @deprecated  Use utility class CUtf8 instead
377     CStringUTF8_DEPRECATED& Append(char ch, EEncoding encoding);
378 
379     /// Append single Unicode code point
380     ///
381     /// @param ch
382     ///   Unicode code point
383     /// @deprecated  Use utility class CUtf8 instead
384     CStringUTF8_DEPRECATED& Append(TUnicodeSymbol ch);
385 
386     /// Get the number of symbols (code points) in the string
387     ///
388     /// @return
389     ///   Number of symbols (code points)
390     /// @deprecated  Use utility class CUtf8 instead
391     SIZE_TYPE GetSymbolCount(void) const;
392 
393     /// Get the number of symbols (code points) in the string
394     ///
395     /// @return
396     ///   Number of symbols (code points)
397     /// @deprecated  Use utility class CUtf8 instead
398 #if  STRINGUTF8_OBSOLETE_STATIC
399     static SIZE_TYPE GetSymbolCount(const CTempString src);
400 #endif
401 
402     /// Get the number of valid UTF-8 symbols (code points) in the buffer
403     ///
404     /// @param src
405     ///   Character buffer
406     /// @param buf_size
407     ///   The number of bytes in the buffer
408     /// @return
409     ///   Number of valid symbols (no exception thrown)
410     /// @deprecated  Use utility class CUtf8 instead
411 #if  STRINGUTF8_OBSOLETE_STATIC
412     static SIZE_TYPE GetValidSymbolCount(const char* src, SIZE_TYPE buf_size);
413 #endif
414 
415     /// Get the number of valid UTF-8 symbols (code points) in the char buffer
416     ///
417     /// @param src
418     ///   Zero-terminated character buffer, or string
419     /// @return
420     ///   Number of valid symbols (no exception thrown)
421     /// @deprecated  Use utility class CUtf8 instead
422 #if  STRINGUTF8_OBSOLETE_STATIC
423     static SIZE_TYPE GetValidSymbolCount(const CTempString src);
424 #endif
425 
426     /// Get the number of valid UTF-8 bytes (code units) in the buffer
427     ///
428     /// @param src
429     ///   Character buffer
430     /// @param buf_size
431     ///   The number of bytes in the buffer
432     /// @return
433     ///   Number of valid bytes (no exception thrown)
434     /// @deprecated  Use utility class CUtf8 instead
435 #if  STRINGUTF8_OBSOLETE_STATIC
436     static SIZE_TYPE GetValidBytesCount(const char* src, SIZE_TYPE buf_size);
437 #endif
438 
439     /// Get the number of valid UTF-8 bytes (code units) in the char buffer
440     ///
441     /// @param src
442     ///   Zero-terminated character buffer, or string
443     /// @return
444     ///   Number of valid bytes (no exception thrown)
445     /// @deprecated  Use utility class CUtf8 instead
446 #if  STRINGUTF8_OBSOLETE_STATIC
447     static SIZE_TYPE GetValidBytesCount(const CTempString src);
448 #endif
449 
450     /// Check that the character encoding of the string is valid UTF-8
451     ///
452     /// @return
453     ///   Result of the check
454     /// @deprecated  Use utility class CUtf8 instead
455     bool IsValid(void) const;
456 
457     /// Convert to ISO 8859-1 (Latin1) character representation
458     ///
459     /// Can throw a CStringException if the conversion is impossible
460     /// or the string has invalid UTF-8 encoding.
461     /// @param substitute_on_error
462     ///   If the conversion is impossible, append the provided string
463     ///   or, if substitute_on_error equals 0, throw the exception
464     /// @deprecated  Use utility class CUtf8 instead
465     string AsLatin1(const char* substitute_on_error = 0) const;
466 
467     /// Convert the string to a single-byte character representation
468     ///
469     /// Can throw a CStringException if the conversion is impossible
470     /// or the string has invalid UTF-8 encoding.
471     /// @param encoding
472     ///   Desired encoding
473     /// @param substitute_on_error
474     ///   If the conversion is impossible, append the provided string
475     ///   or, if substitute_on_error equals 0, throw the exception
476     /// @return
477     ///   C++ string
478     /// @deprecated  Use utility class CUtf8 instead
479     string AsSingleByteString(EEncoding   encoding,
480                               const char* substitute_on_error = 0) const;
481 
482 #if defined(HAVE_WSTRING)
483     /// Convert to Unicode (UCS-2 with no surrogates where
484     /// sizeof(wchar_t) == 2 and UCS-4 where sizeof(wchar_t) == 4).
485     ///
486     /// Can throw a CStringException if the conversion is impossible
487     /// or the string has invalid UTF-8 encoding.
488     /// Defined only if wstring is supported by the compiler.
489     ///
490     /// @param substitute_on_error
491     ///   If the conversion is impossible, append the provided string
492     ///   or, if substitute_on_error equals 0, throw the exception
493     /// @deprecated  Use utility class CUtf8 instead
494     wstring AsUnicode(const wchar_t* substitute_on_error = 0) const;
495 #endif // HAVE_WSTRING
496 
497     /// Convert to UCS-2 for all platforms
498     ///
499     /// Can throw a CStringException if the conversion is impossible
500     /// or the string has invalid UTF-8 encoding.
501     ///
502     /// @param substitute_on_error
503     ///   If the conversion is impossible, append the provided string
504     ///   or, if substitute_on_error equals 0, throw the exception
505     /// @deprecated  Use utility class CUtf8 instead
506     TStringUCS2 AsUCS2(const TCharUCS2* substitute_on_error = 0) const;
507 
508     /// Conversion to Unicode string with any base type we need
509     /// @deprecated  Use utility class CUtf8 instead
510     template <typename TChar>
511     basic_string<TChar> AsBasicString(const TChar* substitute_on_error = 0)
512         const;
513 
514     /// Conversion to Unicode string with any base type we need
515     /// @deprecated  Use utility class CUtf8 instead
516     template <typename TChar>
517     static
518     basic_string<TChar> AsBasicString(
519         const CTempString src,
520         const TChar* substitute_on_error,
521         EValidate validate = eNoValidate);
522 
523     /// Conversion to Unicode string with any base type we need
524     /// @deprecated  Use utility class CUtf8 instead
525     template <typename TChar>
526     static basic_string<TChar> AsBasicString(const CTempString src);
527 
528     /// Guess the encoding of the C/C++ string
529     ///
530     /// It can distinguish between UTF-8, Latin1, and Win1252 only
531     /// @param src
532     ///   Source zero-terminated character buffer
533     /// @return
534     ///   Encoding
535     /// @deprecated  Use utility class CUtf8 instead
536 #if  STRINGUTF8_OBSOLETE_STATIC
537     static EEncoding GuessEncoding(const CTempString src);
538 #endif
539     /// Check the encoding of the C/C++ string
540     ///
541     /// Check that the encoding of the source is the same, or
542     /// is compatible with the specified one
543     /// @param src
544     ///   Source string
545     /// @param encoding
546     ///   Character encoding form to check against
547     /// @return
548     ///   Boolean result: encoding is same or compatible
549     /// @deprecated  Use utility class CUtf8 instead
550 #if  STRINGUTF8_OBSOLETE_STATIC
551     static bool MatchEncoding(const CTempString src, EEncoding encoding);
552 #endif
553 
554     /// Give Encoding name as string
555     ///
556     /// NOTE:
557     ///   Function throws CStringException on attempt to get name of eEncoding_Unknown
558     ///
559     /// @param encoding
560     ///   EEncoding enum
561     /// @return
562     ///   Encoding name
563     /// @deprecated  Use utility class CUtf8 instead
564 #if  STRINGUTF8_OBSOLETE_STATIC
565     static string EncodingToString(EEncoding encoding);
566 #endif
567 
568     /// Convert encoding name into EEncoding enum, taking into account synonyms
569     /// as per  http://www.iana.org/assignments/character-sets
570     ///
571     /// NOTE:
572     ///   Function returns eEncoding_Unknown for unsupported encodings
573     ///
574     /// @param str
575     ///   Encoding name
576     /// @return
577     ///   EEncoding enum
578     /// @deprecated  Use utility class CUtf8 instead
579 #if  STRINGUTF8_OBSOLETE_STATIC
580     static EEncoding StringToEncoding(const CTempString str);
581 #endif
582 
583     /// Convert encoded character into UTF16
584     ///
585     /// @param ch
586     ///   Encoded character
587     /// @param encoding
588     ///   Character encoding
589     /// @return
590     ///   Code point
591     /// @deprecated  Use utility class CUtf8 instead
592 #if  STRINGUTF8_OBSOLETE_STATIC
593     static TUnicodeSymbol CharToSymbol(char ch, EEncoding encoding);
594 #endif
595 
596     /// Convert Unicode code point into encoded character
597     ///
598     /// @param sym
599     ///   Code point
600     /// @param encoding
601     ///   Character encoding
602     /// @return
603     ///   Encoded character
604     /// @deprecated  Use utility class CUtf8 instead
605 #if  STRINGUTF8_OBSOLETE_STATIC
606     static char SymbolToChar(TUnicodeSymbol sym, EEncoding encoding);
607 #endif
608 
609     /// Convert sequence of UTF8 code units into Unicode code point
610     ///
611     /// @param src
612     ///   UTF8 zero-terminated buffer
613     /// @return
614     ///   Unicode code point
615     /// @deprecated  Use utility class CUtf8 instead
616 #if  STRINGUTF8_OBSOLETE_STATIC
617     static TUnicodeSymbol Decode(const char*& src);
618 #ifndef NCBI_COMPILER_WORKSHOP
619     /// @deprecated  Use utility class CUtf8 instead
620     static TUnicodeSymbol Decode(string::const_iterator& src);
621 #endif
622 #endif
623 
624     /// Determines if a symbol is whitespace
625     /// per  http://unicode.org/charts/uca/chart_Whitespace.html
626     ///
627     /// @param chU
628     ///   Unicode code point
629     /// @sa
630     ///   TruncateSpacesInPlace, TruncateSpaces_Unsafe, TruncateSpaces
631     /// @deprecated  Use utility class CUtf8 instead
632 #if  STRINGUTF8_OBSOLETE_STATIC
633     static bool IsWhiteSpace(TUnicodeSymbol chU);
634 #endif
635 
636     /// Truncate spaces in the string (in-place)
637     ///
638     /// @param side
639     ///   Which end of the string to truncate spaces from. Default is to
640     ///   truncate spaces from both ends (eTrunc_Both).
641     /// @return
642     ///   Reference to itself
643     /// @sa
644     ///   IsWhiteSpace, TruncateSpaces_Unsafe, TruncateSpaces
645     /// @deprecated  Use utility class CUtf8 instead
646     CStringUTF8_DEPRECATED& TruncateSpacesInPlace(NStr::ETrunc side = NStr::eTrunc_Both);
647 
648     /// Truncate spaces in the string
649     ///
650     /// @param str
651     ///   source string, in UTF8 encoding
652     /// @param side
653     ///   Which end of the string to truncate spaces from. Default is to
654     ///   truncate spaces from both ends (eTrunc_Both).
655     /// @attention
656     ///   The lifespan of the result string is the same as one of the source.
657     ///   So, for example, if the source is temporary string, the result
658     ///   will be invalid right away (will point to already released memory).
659     /// @sa
660     ///   IsWhiteSpace, TruncateSpacesInPlace, TruncateSpaces, CTempString
661     /// @deprecated  Use utility class CUtf8 instead
662 #if  STRINGUTF8_OBSOLETE_STATIC
663     static CTempString TruncateSpaces_Unsafe
664     (const CTempString str, NStr::ETrunc side = NStr::eTrunc_Both);
665 #endif
666 
667     /// Truncate spaces in the string
668     ///
669     /// @param str
670     ///   source string, in UTF8 encoding
671     /// @param side
672     ///   Which end of the string to truncate spaces from. Default is to
673     ///   truncate spaces from both ends (eTrunc_Both).
674     /// @sa
675     ///   IsWhiteSpace, TruncateSpacesInPlace, TruncateSpaces_Unsafe
676     /// @deprecated  Use utility class CUtf8 instead
677 #if  STRINGUTF8_OBSOLETE_STATIC
678     static CStringUTF8_DEPRECATED TruncateSpaces(const CTempString str,
679                                       NStr::ETrunc side = NStr::eTrunc_Both);
680 #endif
681 
682     /// Convert first character of UTF8 sequence into Unicode
683     ///
684     /// @param ch
685     ///   character
686     /// @param more
687     ///   if the character is valid, - how many more characters to expect
688     /// @return
689     ///   non-zero, if the character is valid
690     /// @deprecated  Use utility class CUtf8 instead
691 #if  STRINGUTF8_OBSOLETE_STATIC
692     static TUnicodeSymbol  DecodeFirst(char ch, SIZE_TYPE& more);
693 #endif
694 
695     /// Convert next character of UTF8 sequence into Unicode
696     ///
697     /// @param ch
698     ///   character
699     /// @param chU
700     ///   Unicode code point
701     /// @return
702     ///   non-zero, if the character is valid
703     /// @deprecated  Use utility class CUtf8 instead
704 #if  STRINGUTF8_OBSOLETE_STATIC
705     static TUnicodeSymbol  DecodeNext(TUnicodeSymbol chU, char ch);
706 #endif
707 
708 private:
709 
710     void   x_Validate(void) const;
711 
712     /// Convert Unicode code point into UTF8 and append
713     void   x_AppendChar(TUnicodeSymbol ch);
714     /// Convert coded character sequence into UTF8 and append
715     void   x_Append(const CTempString src,
716                     EEncoding encoding,
717                     EValidate validate = eNoValidate);
718 
719     /// Convert Unicode character sequence into UTF8 and append
720     /// Sequence can be in UCS-4 (TChar == (U)Int4), UCS-2 (TChar == (U)Int2)
721     /// or in ISO8859-1 (TChar == char)
722     template <typename TIterator>
723     void x_Append(TIterator from, TIterator to);
724 
725     template <typename TChar>
726     void x_Append(const TChar* src, SIZE_TYPE to = NPOS,
727                   ECharBufferType type = eZeroTerminated);
728 
729     template <typename TChar> static
730     basic_string<TChar> x_AsBasicString
731     (const CTempString src,
732      const TChar* substitute_on_error, EValidate validate);
733 
734     template <typename TIterator> static
735     TUnicodeSymbol x_Decode(TIterator& src);
736 
737     /// Check how many bytes is needed to represent the code point in UTF8
738     static SIZE_TYPE x_BytesNeeded(TUnicodeSymbol ch);
739     /// Check if the character is valid first code unit of UTF8
740     static bool   x_EvalFirst(char ch, SIZE_TYPE& more);
741     /// Check if the character is valid non-first code unit of UTF8
742     static bool   x_EvalNext(char ch);
743 
744     // Template class for better error messages
745     // from unimplemented template methods
746     template<class Type> class CNotImplemented {};
747 };
748 #endif //STRINGUTF8_DEFINITION
749 
750 #endif  /* CORELIB___STRINGUTF8_DEPRECATED__HPP */
751