1 /* $Id: ncbistre.cpp 633612 2021-06-22 17:38:24Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Denis Vakatov
27  *
28  * File Description:
29  *   NCBI C++ stream class wrappers
30  *   Triggering between "new" and "old" C++ stream libraries
31  *
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbisys.hpp>
37 #include <corelib/ncbistre.hpp>
38 #include <corelib/stream_utils.hpp>
39 #if defined(NCBI_OS_UNIX)
40 #  include <unistd.h>
41 #endif
42 
43 
44 BEGIN_NCBI_SCOPE
45 
46 
47 #if defined(NCBI_OS_MSWIN) && defined(_UNICODE)
ncbi_Utf8ToWstring(const char * utf8)48 wstring ncbi_Utf8ToWstring(const char *utf8)
49 {
50     return _T_XSTRING(utf8);
51 }
52 #endif
53 
54 
NcbiGetline(CNcbiIstream & is,string & str,const string & delims,SIZE_TYPE * count)55 CNcbiIstream& NcbiGetline(CNcbiIstream& is, string& str, const string& delims,
56                           SIZE_TYPE* count)
57 {
58     str.erase();
59 
60     IOS_BASE::fmtflags f = is.flags();
61     is.unsetf(IOS_BASE::skipws);
62 #ifdef NO_PUBSYNC
63     if ( !is.ipfx(1) ) {
64         is.flags(f);
65         is.setstate(NcbiFailbit);
66         return is;
67     }
68 #else
69     CNcbiIstream::sentry s(is);
70     if ( !s ) {
71         is.flags(f);
72         is.setstate(NcbiFailbit);
73         return is;
74     }
75 #endif //NO_PUBSYNC
76     _ASSERT( is.good() );
77 
78     char buf[1024];
79     SIZE_TYPE pos = 0;
80     SIZE_TYPE size = 0;
81     SIZE_TYPE max_size = str.max_size();
82     SIZE_TYPE delim_count = 0;
83     IOS_BASE::iostate iostate = NcbiGoodbit/*0*/;
84     for (;;) {
85         CT_INT_TYPE ch = is.rdbuf()->sbumpc();
86         if ( CT_EQ_INT_TYPE(ch, CT_EOF) ) {
87             iostate = NcbiEofbit;
88             break;
89         }
90         SIZE_TYPE delim_pos = delims.find(CT_TO_CHAR_TYPE(ch));
91         if (delim_pos != NPOS) {
92             // Special case -- if two different delimiters are back to
93             // back and in the same order as in delims, treat them as
94             // a single delimiter (necessary for correct handling of
95             // DOS/MAC-style CR/LF endings).
96             ch = is.rdbuf()->sgetc();
97             if (!CT_EQ_INT_TYPE(ch, CT_EOF)
98                 &&  delims.find(CT_TO_CHAR_TYPE(ch), delim_pos + 1) != NPOS) {
99                 is.rdbuf()->sbumpc();
100                 delim_count = 2;
101             } else {
102                 delim_count = 1;
103             }
104             break;
105         }
106         if (size == max_size) {
107             CT_INT_TYPE bk = is.rdbuf()->sungetc();
108             iostate = CT_EQ_INT_TYPE(bk, ch) ? NcbiFailbit : NcbiBadbit;
109             break;
110         }
111 
112         buf[pos++] = CT_TO_CHAR_TYPE(ch);
113         if (pos == sizeof(buf)) {
114             str.append(buf, pos);
115             pos  = 0;
116         }
117         size++;
118     }
119     if (pos > 0)
120         str.append(buf, pos);
121     if (count != NULL)
122         *count = size + delim_count;
123 
124 #ifdef NO_PUBSYNC
125     is.isfx();
126 #endif //NO_PUBSYNC
127     is.flags(f);
128     if (iostate) {
129         if (iostate == NcbiEofbit  &&  str.empty())
130             iostate |= NcbiFailbit;
131         is.clear(iostate);
132     }
133     return is;
134 }
135 
136 
NcbiGetline(CNcbiIstream & is,string & str,char delim,SIZE_TYPE * count)137 extern CNcbiIstream& NcbiGetline(CNcbiIstream& is, string& str, char delim,
138                                  SIZE_TYPE* count)
139 {
140 #if   defined(NCBI_USE_OLD_IOSTREAM)
141     return NcbiGetline(is, str, string(1, delim), count);
142 #else
143     str.erase();
144 
145     if ( !is.good() ) {
146         is.setstate(NcbiFailbit);
147         return is;
148     }
149 
150     char buf[1024];
151     SIZE_TYPE size = 0;
152     SIZE_TYPE max_size = str.max_size();
153     do {
154         CT_INT_TYPE nextc = is.get();
155         if (CT_EQ_INT_TYPE(nextc, CT_EOF)
156             ||  CT_EQ_INT_TYPE(nextc, CT_TO_INT_TYPE(delim))) {
157             ++size;
158             break;
159         }
160         if ( !is.unget() )
161             break;
162         if (size == max_size) {
163             is.clear(NcbiFailbit);
164             break;
165         }
166         SIZE_TYPE n = max_size - size;
167         is.get(buf, n < sizeof(buf) ? n : sizeof(buf), delim);
168         n = (size_t) is.gcount();
169         str.append(buf, n);
170         size += n;
171         _ASSERT(size == str.length());
172     } while ( is.good() );
173 #endif
174 
175     if (is.rdstate() == NcbiEofbit  &&  str.empty())
176         is.setstate(NcbiFailbit);
177     if (count != NULL)
178         *count = size;
179     return is;
180 }
181 
182 
183 // Platform-specific EndOfLine
Endl(void)184 const char* Endl(void)
185 {
186 #if defined(NCBI_OS_MSWIN)
187     static const char s_Endl[] = "\r\n";
188 #else /* assume UNIX-like EOLs */
189     static const char s_Endl[] = "\n";
190 #endif
191     return s_Endl;
192 }
193 
194 
195 // Get a line taking into account platform-specific of End-Of-Line
NcbiGetlineEOL(CNcbiIstream & is,string & str,SIZE_TYPE * count)196 CNcbiIstream& NcbiGetlineEOL(CNcbiIstream& is, string& str, SIZE_TYPE* count)
197 {
198 #if   defined(NCBI_OS_MSWIN)
199     NcbiGetline(is, str, '\n', count);
200     if (!str.empty()  &&  str[str.length() - 1] == '\r')
201         str.resize(str.length() - 1);
202 #elif defined(NCBI_OS_DARWIN)
203     NcbiGetline(is, str, "\r\n", count);
204 #else /* assume UNIX-like EOLs */
205     NcbiGetline(is, str, '\n', count);
206 #endif //NCBI_OS_...
207     return is;
208 }
209 
210 
NcbiStreamCopy(CNcbiOstream & os,CNcbiIstream & is)211 bool NcbiStreamCopy(CNcbiOstream& os, CNcbiIstream& is)
212 {
213     if (!os.good()  ||  is.bad())
214         return false;
215     if (CT_EQ_INT_TYPE(is.peek(), CT_EOF)) {
216         // NB: C++ Std says nothing about eofbit (27.6.1.3.27)
217         return !is.bad();
218     }
219     os << is.rdbuf();
220     return os.good()  &&  os.flush() ? true : false;
221 }
222 
223 
NcbiStreamCopyThrow(CNcbiOstream & os,CNcbiIstream & is)224 void NcbiStreamCopyThrow(CNcbiOstream& os, CNcbiIstream& is)
225 {
226     bool success = false;
227     try {
228         success = NcbiStreamCopy(os, is);
229     }
230     NCBI_CATCH_ALL("NcbiStreamCopy()");
231     if (!success) {
232         NCBI_THROW(CCoreException, eCore, "NcbiStreamCopy() failed");
233     }
234 }
235 
236 
NcbiStreamToString(string * str,CNcbiIstream & is,size_t pos)237 size_t NcbiStreamToString(string* str, CNcbiIstream& is, size_t pos)
238 {
239     if (!is.good()) {
240         // Can't extract anything
241         if (str)
242             str->resize(pos);
243         is.setstate(NcbiFailbit);
244         return 0;
245     }
246 
247     char   buf[5120];
248     size_t buf_size = sizeof(buf);
249     size_t str_size;
250 
251     if (str) {
252         str_size = pos;
253         if (str->size() < str_size + buf_size)
254             str->resize(str_size + buf_size);
255     } else
256         str_size = pos = 0;
257 
258     do {
259         try {
260             is.read(str ? &(*str)[str_size] : buf, buf_size);
261         } catch (...) {
262             if (str)
263                 str->resize(str_size);
264             throw;
265         }
266         streamsize count = is.gcount();
267         str_size += (size_t) count;
268         if (str) {
269             if ((size_t) count == buf_size) {
270                 if (buf_size < (1UL << 20))
271                     buf_size <<= 1;
272                 str->resize(str_size + buf_size);
273             } else
274                 _ASSERT(!is.good());
275         }
276     } while (is.good());
277 
278     _ASSERT(str_size >= pos);
279     if (str)
280         str->resize(str_size);
281 
282     if (!(str_size -= pos)) {
283         // Nothing extracted
284         is.setstate(NcbiFailbit);
285         return 0;
286     }
287 
288     // NB: istream::read() sets both bits at EOF (27.6.1.3.28)
289     IOS_BASE::iostate iostate = is.rdstate();
290     if (iostate != (NcbiFailbit | NcbiEofbit))
291         return 0;
292     is.clear(iostate & ~NcbiFailbit);
293     return str_size;
294 }
295 
296 
NcbiStreamCompare(CNcbiIstream & is1,CNcbiIstream & is2)297 bool NcbiStreamCompare(CNcbiIstream& is1, CNcbiIstream& is2)
298 {
299     while (is1 && is2) {
300         char c1 = (char)is1.get();
301         char c2 = (char)is2.get();
302         if (c1 != c2) {
303             return false;
304         }
305     }
306     return is1.eof() && is2.eof();
307 }
308 
309 
310 static inline
x_GetChar(CNcbiIstream & is,ECompareTextMode mode,char * buf,size_t buf_size,char * & pos,size_t & sizeleft)311 char x_GetChar(CNcbiIstream& is, ECompareTextMode mode,
312                char* buf, size_t buf_size, char*& pos, size_t& sizeleft)
313 {
314     char c;
315     do {
316         if ( !sizeleft ) {
317             is.read(buf, buf_size);
318             sizeleft = (size_t) is.gcount();
319             pos = buf;
320         }
321         if (sizeleft > 0) {
322             c = *pos++;
323             --sizeleft;
324         } else {
325             return '\0';
326         }
327     } while ( (mode == eCompareText_IgnoreEol
328                &&  (c == '\n'  ||  c == '\r'))  ||
329               (mode == eCompareText_IgnoreWhiteSpace
330                &&  isspace((unsigned char) c)) );
331     return c;
332 }
333 
334 
NcbiStreamCompareText(CNcbiIstream & is1,CNcbiIstream & is2,ECompareTextMode mode,size_t buf_size)335 bool NcbiStreamCompareText(CNcbiIstream& is1, CNcbiIstream& is2,
336                            ECompareTextMode mode, size_t buf_size)
337 {
338     if ( !buf_size ) {
339         buf_size = 4 * 1024;
340     }
341     char*  buf1  = new char[buf_size];
342     char*  buf2  = new char[buf_size];
343     size_t size1 = 0, size2 = 0;
344     char   *pos1 = 0, *pos2 = 0;
345     bool   equal = true;
346     do {
347         char c1 = x_GetChar(is1, mode, buf1, buf_size, pos1, size1);
348         char c2 = x_GetChar(is2, mode, buf2, buf_size, pos2, size2);
349         equal = (c1 == c2);
350         if (!c1  ||  !c2) {
351             break;
352         }
353     } while ( equal );
354     delete[] buf1;
355     delete[] buf2;
356     return equal  &&  is1.eof()  &&  is2.eof();
357 }
358 
359 
NcbiStreamCompareText(CNcbiIstream & is,const string & str,ECompareTextMode mode,size_t buf_size)360 bool NcbiStreamCompareText(CNcbiIstream& is, const string& str,
361                            ECompareTextMode mode, size_t buf_size)
362 {
363     CNcbiIstrstream istr(str);
364     return NcbiStreamCompareText(is, istr, mode, buf_size);
365 }
366 
367 
operator string(void) const368 CNcbiOstrstreamToString::operator string(void) const
369 {
370 #ifdef NCBI_SHUN_OSTRSTREAM
371     return m_Out.str();
372 #else
373     SIZE_TYPE len = (SIZE_TYPE) m_Out.pcount();
374     if ( !len ) {
375         return string();
376     }
377     const char* str = m_Out.str();
378     m_Out.freeze(false);
379     return string(str, len);
380 #endif
381 }
382 
383 
operator <<(CNcbiOstream & out,const CNcbiOstrstreamToString & s)384 CNcbiOstream& operator<<(CNcbiOstream& out, const CNcbiOstrstreamToString& s)
385 {
386 #ifdef NCBI_SHUN_OSTRSTREAM
387     out << s.m_Out.str();
388 #else
389     SIZE_TYPE len = (SIZE_TYPE) s.m_Out.pcount();
390     if ( len ) {
391         const char* str = s.m_Out.str();
392         s.m_Out.freeze(false);
393         out.write(str, len);
394     }
395 #endif
396     return out;
397 }
398 
399 
operator <<(CNcbiOstream & out,CUpcaseStringConverter s)400 CNcbiOstream& operator<<(CNcbiOstream& out, CUpcaseStringConverter s)
401 {
402     ITERATE ( string, c, s.m_String ) {
403         out.put(char(toupper((unsigned char)(*c))));
404     }
405     return out;
406 }
407 
408 
operator <<(CNcbiOstream & out,CLocaseStringConverter s)409 CNcbiOstream& operator<<(CNcbiOstream& out, CLocaseStringConverter s)
410 {
411     ITERATE ( string, c, s.m_String ) {
412         out.put(char(tolower((unsigned char)(*c))));
413     }
414     return out;
415 }
416 
417 
operator <<(CNcbiOstream & out,CUpcaseCharPtrConverter s)418 CNcbiOstream& operator<<(CNcbiOstream& out, CUpcaseCharPtrConverter s)
419 {
420     for ( const char* c = s.m_String; *c; ++c ) {
421         out.put(char(toupper((unsigned char)(*c))));
422     }
423     return out;
424 }
425 
426 
operator <<(CNcbiOstream & out,CLocaseCharPtrConverter s)427 CNcbiOstream& operator<<(CNcbiOstream& out, CLocaseCharPtrConverter s)
428 {
429     for ( const char* c = s.m_String; *c; ++c ) {
430         out.put(char(tolower((unsigned char)(*c))));
431     }
432     return out;
433 }
434 
435 
436 #ifdef NCBI_COMPILER_MSVC
437 #  if _MSC_VER >= 1200  &&  _MSC_VER < 1300
operator <<(CNcbiOstream & out,__int64 val)438 CNcbiOstream& operator<<(CNcbiOstream& out, __int64 val)
439 {
440     return (out << NStr::Int8ToString(val));
441 }
442 #  endif
443 #endif
444 
445 
Printable(char c)446 string Printable(char c)
447 {
448     static const char kHex[] = "0123456789ABCDEF";
449 
450     string s;
451     switch ( c ) {
452     case '\0':  s += "\\0";   break;
453     case '\t':  s += "\\t";   break;
454     case '\v':  s += "\\v";   break;
455     case '\b':  s += "\\b";   break;
456     case '\r':  s += "\\r";   break;
457     case '\f':  s += "\\f";   break;
458     case '\a':  s += "\\a";   break;
459     case '\n':  s += "\\n";   break;
460     case '\\':  s += "\\\\";  break;
461     case '\'':  s += "\\'";   break;
462     case '"':   s += "\\\"";  break;
463     default:
464         if ( !isprint((unsigned char) c) ) {
465             s += "\\x";
466             s += kHex[(unsigned char) c / 16];
467             s += kHex[(unsigned char) c % 16];
468         } else
469             s += c;
470         break;
471     }
472     return s;
473 }
474 
475 
476 static inline
s_IsQuoted(char c)477 bool s_IsQuoted(char c)
478 {
479     return (c == '\t'  ||   c == '\v'  ||  c == '\b'  ||
480             c == '\r'  ||   c == '\f'  ||  c == '\a'  ||
481             c == '\n'  ||   c == '\\'  ||  c == '\''  ||
482             c == '"'   ||  !isprint((unsigned char) c) ? true : false);
483 }
484 
485 
486 static inline
s_WritePrintable(CNcbiOstream & out,char c,char n)487 void s_WritePrintable(CNcbiOstream& out, char c, char n)
488 {
489     switch ( c ) {
490     case '\t':  out.write("\\t",  2);  return;
491     case '\v':  out.write("\\v",  2);  return;
492     case '\b':  out.write("\\b",  2);  return;
493     case '\r':  out.write("\\r",  2);  return;
494     case '\f':  out.write("\\f",  2);  return;
495     case '\a':  out.write("\\a",  2);  return;
496     case '\n':  out.write("\\n",  2);  return;
497     case '\\':  out.write("\\\\", 2);  return;
498     case '\'':  out.write("\\'",  2);  return;
499     case '"':   out.write("\\\"", 2);  return;
500     default:
501         if ( isprint((unsigned char) c) ) {
502             out.put(c);
503             return;
504         }
505         break;
506     }
507 
508     bool full = !s_IsQuoted(n)  &&  '0' <= n  &&  n <= '7' ? true : false;
509     unsigned char v;
510     char octal[4];
511     int k = 1;
512 
513     *octal = '\\';
514     v = (unsigned char)((unsigned char) c >> 6);
515     if (v  ||  full) {
516         octal[k++] = char('0' + v);
517         full = true;
518     }
519     v = ((unsigned char) c >> 3) & 7;
520     if (v  ||  full) {
521         octal[k++] = char('0' + v);
522     }
523     v = (unsigned char) c & 7;
524     octal[k++] = char('0' + v);
525     out.write(octal, k);
526 }
527 
528 
operator <<(CNcbiOstream & out,CPrintableStringConverter s)529 CNcbiOstream& operator<<(CNcbiOstream& out, CPrintableStringConverter s)
530 {
531     size_t size = s.m_String.size();
532     if (size) {
533         const char* data = s.m_String.data();
534         for (size_t i = 0;  i < size - 1;  ++i) {
535             s_WritePrintable(out, data[i], data[i + 1]);
536         }
537         s_WritePrintable(out, data[size - 1], '\0');
538     }
539     return out;
540 }
541 
542 
operator <<(CNcbiOstream & out,CPrintableCharPtrConverter s)543 CNcbiOstream& operator<<(CNcbiOstream& out, CPrintableCharPtrConverter s)
544 {
545     const char* p = s.m_String;
546     char        c = *p;
547     while (c) {
548         char n = *++p;
549         s_WritePrintable(out, c, n);
550         c = n;
551     }
552     return out;
553 }
554 
555 
556 #if defined(NCBI_COMPILER_WORKSHOP)
557 // We have to use two #if's here because KAI C++ cannot handle #if foo == bar
558 #  if (NCBI_COMPILER_VERSION == 530)
559 // The version that ships with the compiler is buggy.
560 // Here's a working (and simpler!) one.
561 template<>
read(char * s,streamsize n)562 istream& istream::read(char *s, streamsize n)
563 {
564     sentry ipfx(*this, 1);
565 
566     try {
567         if (rdbuf()->sgetc() == traits_type::eof()) {
568             // Workaround for bug in sgetn.  *SIGH*.
569             __chcount = 0;
570             setstate(eofbit);
571             return *this;
572         }
573         __chcount = rdbuf()->sgetn(s, n);
574         if (__chcount == 0) {
575             setstate(eofbit);
576         } else if (__chcount < n) {
577             setstate(eofbit | failbit);
578         } else if (!ipfx) {
579             setstate(failbit);
580         }
581     } catch (...) {
582         setstate(badbit | failbit);
583     }
584 
585     return *this;
586 }
587 #  endif  /* NCBI_COMPILER_VERSION == 530 */
588 #endif  /* NCBI_COMPILER_WORKSHOP */
589 
590 
ReadIntoUtf8(CNcbiIstream & input,CStringUTF8 * result,EEncodingForm ef,EReadUnknownNoBOM what_if_no_bom)591 EEncodingForm ReadIntoUtf8(
592     CNcbiIstream&     input,
593     CStringUTF8*      result,
594     EEncodingForm     ef             /* = eEncodingForm_Unknown */,
595     EReadUnknownNoBOM what_if_no_bom /* = eNoBOM_GuessEncoding  */
596 )
597 {
598     EEncodingForm ef_bom = eEncodingForm_Unknown;
599     result->erase();
600     if (!input.good()) {
601         return ef_bom;
602     }
603 
604     const int buf_size = 4096;//2048;//256;
605     char tmp[buf_size+2];
606     Uint2* us = reinterpret_cast<Uint2*>(tmp);
607 
608     // check for Byte Order Mark
609     const int bom_max = 4;
610     memset(tmp,0,bom_max);
611     input.read(tmp,bom_max);
612     int n = (int)input.gcount();
613     {
614         int bom_len=0;
615         Uchar* uc = reinterpret_cast<Uchar*>(tmp);
616         if (n >= 3  &&  uc[0] == 0xEF  &&  uc[1] == 0xBB  &&  uc[2] == 0xBF) {
617             ef_bom = eEncodingForm_Utf8;
618             uc[0] = uc[3];
619             bom_len=3;
620         }
621         else if (n >= 2 && (us[0] == 0xFEFF || us[0] == 0xFFFE)) {
622             if (us[0] == 0xFEFF) {
623                 ef_bom = eEncodingForm_Utf16Native;
624             } else {
625                 ef_bom = eEncodingForm_Utf16Foreign;
626             }
627             us[0] = us[1];
628             bom_len=2;
629         }
630         if (ef == eEncodingForm_Unknown  ||  ef == ef_bom) {
631             ef = ef_bom;
632             n -= bom_len;
633         }
634         // else proceed at user's risk
635     }
636 
637     // keep reading
638     while (n != 0  ||  (input.good()  &&  !input.eof())) {
639 
640         if (n == 0) {
641             input.read(tmp, buf_size);
642             n = (int) input.gcount();
643             result->reserve(max(result->capacity(), result->size() + n));
644         }
645         tmp[n] = '\0';
646 
647         switch (ef) {
648         case eEncodingForm_Utf16Foreign:
649             {
650                 char buf[buf_size];
651                 NcbiSys_swab(tmp, buf, n);
652                 memcpy(tmp, buf, n);
653             }
654             // no break here
655         case eEncodingForm_Utf16Native:
656             {
657                 Uint2* u = us;
658 #if 0
659                 for (n = n/2;  n--;  ++u) {
660                     result->Append(*u);
661                 }
662 #else
663                 *result += CUtf8::AsUTF8(u, n/2);
664 #endif
665             }
666             break;
667         case eEncodingForm_ISO8859_1:
668             //result->Append(tmp,eEncoding_ISO8859_1);
669             *result += CUtf8::AsUTF8(tmp,eEncoding_ISO8859_1);
670             break;
671         case eEncodingForm_Windows_1252:
672             //result->Append(tmp,eEncoding_Windows_1252);
673             *result += CUtf8::AsUTF8(tmp,eEncoding_Windows_1252);
674             break;
675         case eEncodingForm_Utf8:
676             //result->Append(tmp,eEncoding_UTF8);
677             result->append(tmp,n);
678             break;
679         default:
680             if (what_if_no_bom == eNoBOM_GuessEncoding) {
681                 if (n == bom_max) {
682                     input.read(tmp + n, buf_size - n);
683                     n += (int) input.gcount();
684                     result->reserve(max(result->capacity(), result->size() + n));
685                 }
686                 tmp[n] = '\0';
687                 EEncoding enc = CUtf8::GuessEncoding(tmp);
688                 switch (enc) {
689                 default:
690                 case eEncoding_Unknown:
691                     if (CUtf8::GetValidBytesCount( CTempString(tmp, n)) != 0) {
692                         ef = eEncodingForm_Utf8;
693                         //result->Append(tmp, enc);
694                         *result += CUtf8::AsUTF8(tmp, enc);
695                     }
696                     else {
697                         NCBI_THROW(CCoreException, eCore,
698                                 "ReadIntoUtf8: cannot guess text encoding");
699                     }
700                     break;
701                 case eEncoding_UTF8:
702                     ef = eEncodingForm_Utf8;
703                     // no break here
704                 case eEncoding_Ascii:
705                 case eEncoding_ISO8859_1:
706                 case eEncoding_Windows_1252:
707                     //result->Append(tmp, enc);
708                     *result += CUtf8::AsUTF8(tmp,enc);
709                     break;
710                 }
711             } else {
712                 //result->Append(tmp, eEncoding_UTF8);
713                 result->append(tmp, n);
714             }
715             break;
716         }
717         n = 0;
718     }
719     return ef_bom;
720 }
721 
722 
GetTextEncodingForm(CNcbiIstream & input,EBOMDiscard discard_bom)723 EEncodingForm GetTextEncodingForm(CNcbiIstream& input,
724                                   EBOMDiscard   discard_bom)
725 {
726     EEncodingForm ef = eEncodingForm_Unknown;
727     if (input.good()) {
728         const int bom_max = 4;
729         char tmp[bom_max];
730         memset(tmp, 0, bom_max);
731         Uint2* us = reinterpret_cast<Uint2*>(tmp);
732         Uchar* uc = reinterpret_cast<Uchar*>(tmp);
733         input.get(tmp[0]);
734         int n = (int) input.gcount();
735         if (n == 1  &&  (uc[0] == 0xEF  ||  uc[0] == 0xFE  ||  uc[0] == 0xFF)){
736             input.get(tmp[1]);
737             if (input.gcount() == 1) {
738                 ++n;
739                 if (us[0] == 0xFEFF) {
740                     ef = eEncodingForm_Utf16Native;
741                 } else if (us[0] == 0xFFFE) {
742                     ef = eEncodingForm_Utf16Foreign;
743                 } else if (uc[1] == 0xBB) {
744                     input.get(tmp[2]);
745                     if (input.gcount() == 1) {
746                         ++n;
747                         if (uc[2] == 0xBF) {
748                             ef = eEncodingForm_Utf8;
749                         }
750                     }
751                 }
752             }
753         }
754         if (ef == eEncodingForm_Unknown) {
755             if (n > 1) {
756                 CStreamUtils::Pushback(input, tmp, n);
757             } else if (n == 1) {
758                 input.unget();
759             }
760         } else {
761             if (discard_bom == eBOM_Keep) {
762                 CStreamUtils::Pushback(input, tmp, n);
763             }
764         }
765     }
766     return ef;
767 }
768 
operator <<(CNcbiOstream & str,const CByteOrderMark & bom)769 CNcbiOstream& operator<< (CNcbiOstream& str, const CByteOrderMark&  bom)
770 {
771     switch (bom.GetEncodingForm()) {
772     /// Stream has no BOM.
773     default:
774     case eEncodingForm_Unknown:
775     case eEncodingForm_ISO8859_1:
776     case eEncodingForm_Windows_1252:
777         break;
778     case eEncodingForm_Utf8:
779         str << Uint1(0xEF) << Uint1(0xBB) << Uint1(0xBF);
780         break;
781     case eEncodingForm_Utf16Native:
782 #ifdef WORDS_BIGENDIAN
783         str << Uint1(0xFE) << Uint1(0xFF);
784 #else
785         str << Uint1(0xFF) << Uint1(0xFE);
786 #endif
787         break;
788     case eEncodingForm_Utf16Foreign:
789 #ifdef WORDS_BIGENDIAN
790         str << Uint1(0xFF) << Uint1(0xFE);
791 #else
792         str << Uint1(0xFE) << Uint1(0xFF);
793 #endif
794         break;
795     }
796     return str;
797 }
798 
799 
800 #include "ncbi_base64.c"
801 
802 
803 END_NCBI_SCOPE
804 
805 
806 // See in the header why it is outside of NCBI scope (SunPro bug workaround...)
807 
808 #if defined(NCBI_USE_OLD_IOSTREAM)
809 
operator <<(NCBI_NS_NCBI::CNcbiOstream & os,const NCBI_NS_STD::string & str)810 extern NCBI_NS_NCBI::CNcbiOstream& operator<<(NCBI_NS_NCBI::CNcbiOstream& os,
811                                               const NCBI_NS_STD::string& str)
812 {
813     return str.empty() ? os : os << str.c_str();
814 }
815 
816 
operator >>(NCBI_NS_NCBI::CNcbiIstream & is,NCBI_NS_STD::string & str)817 extern NCBI_NS_NCBI::CNcbiIstream& operator>>(NCBI_NS_NCBI::CNcbiIstream& is,
818                                               NCBI_NS_STD::string& str)
819 {
820     int ch;
821     if ( !is.ipfx() )
822         return is;
823 
824     str.erase();
825 
826     SIZE_TYPE end = str.max_size();
827     if ( is.width() )
828         end = (streamsize) end < is.width() ? end : is.width();
829 
830     SIZE_TYPE i = 0;
831     for (ch = is.rdbuf()->sbumpc();
832          ch != EOF  &&  !isspace((unsigned char) ch);
833          ch = is.rdbuf()->sbumpc()) {
834         str.append(1, (char) ch);
835         if (++i == end)
836             break;
837     }
838     if (ch == EOF)
839         is.clear(NcbiEofbit | is.rdstate());
840     if ( !i )
841         is.clear(NcbiFailbit | is.rdstate());
842 
843     is.width(0);
844     return is;
845 }
846 
847 #endif  /* NCBI_USE_OLD_IOSTREAM */
848