1 /* $Id: ncbistre.cpp 633612 2021-06-22 17:38:24Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Denis Vakatov
27 *
28 * File Description:
29 * NCBI C++ stream class wrappers
30 * Triggering between "new" and "old" C++ stream libraries
31 *
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbisys.hpp>
37 #include <corelib/ncbistre.hpp>
38 #include <corelib/stream_utils.hpp>
39 #if defined(NCBI_OS_UNIX)
40 # include <unistd.h>
41 #endif
42
43
44 BEGIN_NCBI_SCOPE
45
46
47 #if defined(NCBI_OS_MSWIN) && defined(_UNICODE)
ncbi_Utf8ToWstring(const char * utf8)48 wstring ncbi_Utf8ToWstring(const char *utf8)
49 {
50 return _T_XSTRING(utf8);
51 }
52 #endif
53
54
NcbiGetline(CNcbiIstream & is,string & str,const string & delims,SIZE_TYPE * count)55 CNcbiIstream& NcbiGetline(CNcbiIstream& is, string& str, const string& delims,
56 SIZE_TYPE* count)
57 {
58 str.erase();
59
60 IOS_BASE::fmtflags f = is.flags();
61 is.unsetf(IOS_BASE::skipws);
62 #ifdef NO_PUBSYNC
63 if ( !is.ipfx(1) ) {
64 is.flags(f);
65 is.setstate(NcbiFailbit);
66 return is;
67 }
68 #else
69 CNcbiIstream::sentry s(is);
70 if ( !s ) {
71 is.flags(f);
72 is.setstate(NcbiFailbit);
73 return is;
74 }
75 #endif //NO_PUBSYNC
76 _ASSERT( is.good() );
77
78 char buf[1024];
79 SIZE_TYPE pos = 0;
80 SIZE_TYPE size = 0;
81 SIZE_TYPE max_size = str.max_size();
82 SIZE_TYPE delim_count = 0;
83 IOS_BASE::iostate iostate = NcbiGoodbit/*0*/;
84 for (;;) {
85 CT_INT_TYPE ch = is.rdbuf()->sbumpc();
86 if ( CT_EQ_INT_TYPE(ch, CT_EOF) ) {
87 iostate = NcbiEofbit;
88 break;
89 }
90 SIZE_TYPE delim_pos = delims.find(CT_TO_CHAR_TYPE(ch));
91 if (delim_pos != NPOS) {
92 // Special case -- if two different delimiters are back to
93 // back and in the same order as in delims, treat them as
94 // a single delimiter (necessary for correct handling of
95 // DOS/MAC-style CR/LF endings).
96 ch = is.rdbuf()->sgetc();
97 if (!CT_EQ_INT_TYPE(ch, CT_EOF)
98 && delims.find(CT_TO_CHAR_TYPE(ch), delim_pos + 1) != NPOS) {
99 is.rdbuf()->sbumpc();
100 delim_count = 2;
101 } else {
102 delim_count = 1;
103 }
104 break;
105 }
106 if (size == max_size) {
107 CT_INT_TYPE bk = is.rdbuf()->sungetc();
108 iostate = CT_EQ_INT_TYPE(bk, ch) ? NcbiFailbit : NcbiBadbit;
109 break;
110 }
111
112 buf[pos++] = CT_TO_CHAR_TYPE(ch);
113 if (pos == sizeof(buf)) {
114 str.append(buf, pos);
115 pos = 0;
116 }
117 size++;
118 }
119 if (pos > 0)
120 str.append(buf, pos);
121 if (count != NULL)
122 *count = size + delim_count;
123
124 #ifdef NO_PUBSYNC
125 is.isfx();
126 #endif //NO_PUBSYNC
127 is.flags(f);
128 if (iostate) {
129 if (iostate == NcbiEofbit && str.empty())
130 iostate |= NcbiFailbit;
131 is.clear(iostate);
132 }
133 return is;
134 }
135
136
NcbiGetline(CNcbiIstream & is,string & str,char delim,SIZE_TYPE * count)137 extern CNcbiIstream& NcbiGetline(CNcbiIstream& is, string& str, char delim,
138 SIZE_TYPE* count)
139 {
140 #if defined(NCBI_USE_OLD_IOSTREAM)
141 return NcbiGetline(is, str, string(1, delim), count);
142 #else
143 str.erase();
144
145 if ( !is.good() ) {
146 is.setstate(NcbiFailbit);
147 return is;
148 }
149
150 char buf[1024];
151 SIZE_TYPE size = 0;
152 SIZE_TYPE max_size = str.max_size();
153 do {
154 CT_INT_TYPE nextc = is.get();
155 if (CT_EQ_INT_TYPE(nextc, CT_EOF)
156 || CT_EQ_INT_TYPE(nextc, CT_TO_INT_TYPE(delim))) {
157 ++size;
158 break;
159 }
160 if ( !is.unget() )
161 break;
162 if (size == max_size) {
163 is.clear(NcbiFailbit);
164 break;
165 }
166 SIZE_TYPE n = max_size - size;
167 is.get(buf, n < sizeof(buf) ? n : sizeof(buf), delim);
168 n = (size_t) is.gcount();
169 str.append(buf, n);
170 size += n;
171 _ASSERT(size == str.length());
172 } while ( is.good() );
173 #endif
174
175 if (is.rdstate() == NcbiEofbit && str.empty())
176 is.setstate(NcbiFailbit);
177 if (count != NULL)
178 *count = size;
179 return is;
180 }
181
182
183 // Platform-specific EndOfLine
Endl(void)184 const char* Endl(void)
185 {
186 #if defined(NCBI_OS_MSWIN)
187 static const char s_Endl[] = "\r\n";
188 #else /* assume UNIX-like EOLs */
189 static const char s_Endl[] = "\n";
190 #endif
191 return s_Endl;
192 }
193
194
195 // Get a line taking into account platform-specific of End-Of-Line
NcbiGetlineEOL(CNcbiIstream & is,string & str,SIZE_TYPE * count)196 CNcbiIstream& NcbiGetlineEOL(CNcbiIstream& is, string& str, SIZE_TYPE* count)
197 {
198 #if defined(NCBI_OS_MSWIN)
199 NcbiGetline(is, str, '\n', count);
200 if (!str.empty() && str[str.length() - 1] == '\r')
201 str.resize(str.length() - 1);
202 #elif defined(NCBI_OS_DARWIN)
203 NcbiGetline(is, str, "\r\n", count);
204 #else /* assume UNIX-like EOLs */
205 NcbiGetline(is, str, '\n', count);
206 #endif //NCBI_OS_...
207 return is;
208 }
209
210
NcbiStreamCopy(CNcbiOstream & os,CNcbiIstream & is)211 bool NcbiStreamCopy(CNcbiOstream& os, CNcbiIstream& is)
212 {
213 if (!os.good() || is.bad())
214 return false;
215 if (CT_EQ_INT_TYPE(is.peek(), CT_EOF)) {
216 // NB: C++ Std says nothing about eofbit (27.6.1.3.27)
217 return !is.bad();
218 }
219 os << is.rdbuf();
220 return os.good() && os.flush() ? true : false;
221 }
222
223
NcbiStreamCopyThrow(CNcbiOstream & os,CNcbiIstream & is)224 void NcbiStreamCopyThrow(CNcbiOstream& os, CNcbiIstream& is)
225 {
226 bool success = false;
227 try {
228 success = NcbiStreamCopy(os, is);
229 }
230 NCBI_CATCH_ALL("NcbiStreamCopy()");
231 if (!success) {
232 NCBI_THROW(CCoreException, eCore, "NcbiStreamCopy() failed");
233 }
234 }
235
236
NcbiStreamToString(string * str,CNcbiIstream & is,size_t pos)237 size_t NcbiStreamToString(string* str, CNcbiIstream& is, size_t pos)
238 {
239 if (!is.good()) {
240 // Can't extract anything
241 if (str)
242 str->resize(pos);
243 is.setstate(NcbiFailbit);
244 return 0;
245 }
246
247 char buf[5120];
248 size_t buf_size = sizeof(buf);
249 size_t str_size;
250
251 if (str) {
252 str_size = pos;
253 if (str->size() < str_size + buf_size)
254 str->resize(str_size + buf_size);
255 } else
256 str_size = pos = 0;
257
258 do {
259 try {
260 is.read(str ? &(*str)[str_size] : buf, buf_size);
261 } catch (...) {
262 if (str)
263 str->resize(str_size);
264 throw;
265 }
266 streamsize count = is.gcount();
267 str_size += (size_t) count;
268 if (str) {
269 if ((size_t) count == buf_size) {
270 if (buf_size < (1UL << 20))
271 buf_size <<= 1;
272 str->resize(str_size + buf_size);
273 } else
274 _ASSERT(!is.good());
275 }
276 } while (is.good());
277
278 _ASSERT(str_size >= pos);
279 if (str)
280 str->resize(str_size);
281
282 if (!(str_size -= pos)) {
283 // Nothing extracted
284 is.setstate(NcbiFailbit);
285 return 0;
286 }
287
288 // NB: istream::read() sets both bits at EOF (27.6.1.3.28)
289 IOS_BASE::iostate iostate = is.rdstate();
290 if (iostate != (NcbiFailbit | NcbiEofbit))
291 return 0;
292 is.clear(iostate & ~NcbiFailbit);
293 return str_size;
294 }
295
296
NcbiStreamCompare(CNcbiIstream & is1,CNcbiIstream & is2)297 bool NcbiStreamCompare(CNcbiIstream& is1, CNcbiIstream& is2)
298 {
299 while (is1 && is2) {
300 char c1 = (char)is1.get();
301 char c2 = (char)is2.get();
302 if (c1 != c2) {
303 return false;
304 }
305 }
306 return is1.eof() && is2.eof();
307 }
308
309
310 static inline
x_GetChar(CNcbiIstream & is,ECompareTextMode mode,char * buf,size_t buf_size,char * & pos,size_t & sizeleft)311 char x_GetChar(CNcbiIstream& is, ECompareTextMode mode,
312 char* buf, size_t buf_size, char*& pos, size_t& sizeleft)
313 {
314 char c;
315 do {
316 if ( !sizeleft ) {
317 is.read(buf, buf_size);
318 sizeleft = (size_t) is.gcount();
319 pos = buf;
320 }
321 if (sizeleft > 0) {
322 c = *pos++;
323 --sizeleft;
324 } else {
325 return '\0';
326 }
327 } while ( (mode == eCompareText_IgnoreEol
328 && (c == '\n' || c == '\r')) ||
329 (mode == eCompareText_IgnoreWhiteSpace
330 && isspace((unsigned char) c)) );
331 return c;
332 }
333
334
NcbiStreamCompareText(CNcbiIstream & is1,CNcbiIstream & is2,ECompareTextMode mode,size_t buf_size)335 bool NcbiStreamCompareText(CNcbiIstream& is1, CNcbiIstream& is2,
336 ECompareTextMode mode, size_t buf_size)
337 {
338 if ( !buf_size ) {
339 buf_size = 4 * 1024;
340 }
341 char* buf1 = new char[buf_size];
342 char* buf2 = new char[buf_size];
343 size_t size1 = 0, size2 = 0;
344 char *pos1 = 0, *pos2 = 0;
345 bool equal = true;
346 do {
347 char c1 = x_GetChar(is1, mode, buf1, buf_size, pos1, size1);
348 char c2 = x_GetChar(is2, mode, buf2, buf_size, pos2, size2);
349 equal = (c1 == c2);
350 if (!c1 || !c2) {
351 break;
352 }
353 } while ( equal );
354 delete[] buf1;
355 delete[] buf2;
356 return equal && is1.eof() && is2.eof();
357 }
358
359
NcbiStreamCompareText(CNcbiIstream & is,const string & str,ECompareTextMode mode,size_t buf_size)360 bool NcbiStreamCompareText(CNcbiIstream& is, const string& str,
361 ECompareTextMode mode, size_t buf_size)
362 {
363 CNcbiIstrstream istr(str);
364 return NcbiStreamCompareText(is, istr, mode, buf_size);
365 }
366
367
operator string(void) const368 CNcbiOstrstreamToString::operator string(void) const
369 {
370 #ifdef NCBI_SHUN_OSTRSTREAM
371 return m_Out.str();
372 #else
373 SIZE_TYPE len = (SIZE_TYPE) m_Out.pcount();
374 if ( !len ) {
375 return string();
376 }
377 const char* str = m_Out.str();
378 m_Out.freeze(false);
379 return string(str, len);
380 #endif
381 }
382
383
operator <<(CNcbiOstream & out,const CNcbiOstrstreamToString & s)384 CNcbiOstream& operator<<(CNcbiOstream& out, const CNcbiOstrstreamToString& s)
385 {
386 #ifdef NCBI_SHUN_OSTRSTREAM
387 out << s.m_Out.str();
388 #else
389 SIZE_TYPE len = (SIZE_TYPE) s.m_Out.pcount();
390 if ( len ) {
391 const char* str = s.m_Out.str();
392 s.m_Out.freeze(false);
393 out.write(str, len);
394 }
395 #endif
396 return out;
397 }
398
399
operator <<(CNcbiOstream & out,CUpcaseStringConverter s)400 CNcbiOstream& operator<<(CNcbiOstream& out, CUpcaseStringConverter s)
401 {
402 ITERATE ( string, c, s.m_String ) {
403 out.put(char(toupper((unsigned char)(*c))));
404 }
405 return out;
406 }
407
408
operator <<(CNcbiOstream & out,CLocaseStringConverter s)409 CNcbiOstream& operator<<(CNcbiOstream& out, CLocaseStringConverter s)
410 {
411 ITERATE ( string, c, s.m_String ) {
412 out.put(char(tolower((unsigned char)(*c))));
413 }
414 return out;
415 }
416
417
operator <<(CNcbiOstream & out,CUpcaseCharPtrConverter s)418 CNcbiOstream& operator<<(CNcbiOstream& out, CUpcaseCharPtrConverter s)
419 {
420 for ( const char* c = s.m_String; *c; ++c ) {
421 out.put(char(toupper((unsigned char)(*c))));
422 }
423 return out;
424 }
425
426
operator <<(CNcbiOstream & out,CLocaseCharPtrConverter s)427 CNcbiOstream& operator<<(CNcbiOstream& out, CLocaseCharPtrConverter s)
428 {
429 for ( const char* c = s.m_String; *c; ++c ) {
430 out.put(char(tolower((unsigned char)(*c))));
431 }
432 return out;
433 }
434
435
436 #ifdef NCBI_COMPILER_MSVC
437 # if _MSC_VER >= 1200 && _MSC_VER < 1300
operator <<(CNcbiOstream & out,__int64 val)438 CNcbiOstream& operator<<(CNcbiOstream& out, __int64 val)
439 {
440 return (out << NStr::Int8ToString(val));
441 }
442 # endif
443 #endif
444
445
Printable(char c)446 string Printable(char c)
447 {
448 static const char kHex[] = "0123456789ABCDEF";
449
450 string s;
451 switch ( c ) {
452 case '\0': s += "\\0"; break;
453 case '\t': s += "\\t"; break;
454 case '\v': s += "\\v"; break;
455 case '\b': s += "\\b"; break;
456 case '\r': s += "\\r"; break;
457 case '\f': s += "\\f"; break;
458 case '\a': s += "\\a"; break;
459 case '\n': s += "\\n"; break;
460 case '\\': s += "\\\\"; break;
461 case '\'': s += "\\'"; break;
462 case '"': s += "\\\""; break;
463 default:
464 if ( !isprint((unsigned char) c) ) {
465 s += "\\x";
466 s += kHex[(unsigned char) c / 16];
467 s += kHex[(unsigned char) c % 16];
468 } else
469 s += c;
470 break;
471 }
472 return s;
473 }
474
475
476 static inline
s_IsQuoted(char c)477 bool s_IsQuoted(char c)
478 {
479 return (c == '\t' || c == '\v' || c == '\b' ||
480 c == '\r' || c == '\f' || c == '\a' ||
481 c == '\n' || c == '\\' || c == '\'' ||
482 c == '"' || !isprint((unsigned char) c) ? true : false);
483 }
484
485
486 static inline
s_WritePrintable(CNcbiOstream & out,char c,char n)487 void s_WritePrintable(CNcbiOstream& out, char c, char n)
488 {
489 switch ( c ) {
490 case '\t': out.write("\\t", 2); return;
491 case '\v': out.write("\\v", 2); return;
492 case '\b': out.write("\\b", 2); return;
493 case '\r': out.write("\\r", 2); return;
494 case '\f': out.write("\\f", 2); return;
495 case '\a': out.write("\\a", 2); return;
496 case '\n': out.write("\\n", 2); return;
497 case '\\': out.write("\\\\", 2); return;
498 case '\'': out.write("\\'", 2); return;
499 case '"': out.write("\\\"", 2); return;
500 default:
501 if ( isprint((unsigned char) c) ) {
502 out.put(c);
503 return;
504 }
505 break;
506 }
507
508 bool full = !s_IsQuoted(n) && '0' <= n && n <= '7' ? true : false;
509 unsigned char v;
510 char octal[4];
511 int k = 1;
512
513 *octal = '\\';
514 v = (unsigned char)((unsigned char) c >> 6);
515 if (v || full) {
516 octal[k++] = char('0' + v);
517 full = true;
518 }
519 v = ((unsigned char) c >> 3) & 7;
520 if (v || full) {
521 octal[k++] = char('0' + v);
522 }
523 v = (unsigned char) c & 7;
524 octal[k++] = char('0' + v);
525 out.write(octal, k);
526 }
527
528
operator <<(CNcbiOstream & out,CPrintableStringConverter s)529 CNcbiOstream& operator<<(CNcbiOstream& out, CPrintableStringConverter s)
530 {
531 size_t size = s.m_String.size();
532 if (size) {
533 const char* data = s.m_String.data();
534 for (size_t i = 0; i < size - 1; ++i) {
535 s_WritePrintable(out, data[i], data[i + 1]);
536 }
537 s_WritePrintable(out, data[size - 1], '\0');
538 }
539 return out;
540 }
541
542
operator <<(CNcbiOstream & out,CPrintableCharPtrConverter s)543 CNcbiOstream& operator<<(CNcbiOstream& out, CPrintableCharPtrConverter s)
544 {
545 const char* p = s.m_String;
546 char c = *p;
547 while (c) {
548 char n = *++p;
549 s_WritePrintable(out, c, n);
550 c = n;
551 }
552 return out;
553 }
554
555
556 #if defined(NCBI_COMPILER_WORKSHOP)
557 // We have to use two #if's here because KAI C++ cannot handle #if foo == bar
558 # if (NCBI_COMPILER_VERSION == 530)
559 // The version that ships with the compiler is buggy.
560 // Here's a working (and simpler!) one.
561 template<>
read(char * s,streamsize n)562 istream& istream::read(char *s, streamsize n)
563 {
564 sentry ipfx(*this, 1);
565
566 try {
567 if (rdbuf()->sgetc() == traits_type::eof()) {
568 // Workaround for bug in sgetn. *SIGH*.
569 __chcount = 0;
570 setstate(eofbit);
571 return *this;
572 }
573 __chcount = rdbuf()->sgetn(s, n);
574 if (__chcount == 0) {
575 setstate(eofbit);
576 } else if (__chcount < n) {
577 setstate(eofbit | failbit);
578 } else if (!ipfx) {
579 setstate(failbit);
580 }
581 } catch (...) {
582 setstate(badbit | failbit);
583 }
584
585 return *this;
586 }
587 # endif /* NCBI_COMPILER_VERSION == 530 */
588 #endif /* NCBI_COMPILER_WORKSHOP */
589
590
ReadIntoUtf8(CNcbiIstream & input,CStringUTF8 * result,EEncodingForm ef,EReadUnknownNoBOM what_if_no_bom)591 EEncodingForm ReadIntoUtf8(
592 CNcbiIstream& input,
593 CStringUTF8* result,
594 EEncodingForm ef /* = eEncodingForm_Unknown */,
595 EReadUnknownNoBOM what_if_no_bom /* = eNoBOM_GuessEncoding */
596 )
597 {
598 EEncodingForm ef_bom = eEncodingForm_Unknown;
599 result->erase();
600 if (!input.good()) {
601 return ef_bom;
602 }
603
604 const int buf_size = 4096;//2048;//256;
605 char tmp[buf_size+2];
606 Uint2* us = reinterpret_cast<Uint2*>(tmp);
607
608 // check for Byte Order Mark
609 const int bom_max = 4;
610 memset(tmp,0,bom_max);
611 input.read(tmp,bom_max);
612 int n = (int)input.gcount();
613 {
614 int bom_len=0;
615 Uchar* uc = reinterpret_cast<Uchar*>(tmp);
616 if (n >= 3 && uc[0] == 0xEF && uc[1] == 0xBB && uc[2] == 0xBF) {
617 ef_bom = eEncodingForm_Utf8;
618 uc[0] = uc[3];
619 bom_len=3;
620 }
621 else if (n >= 2 && (us[0] == 0xFEFF || us[0] == 0xFFFE)) {
622 if (us[0] == 0xFEFF) {
623 ef_bom = eEncodingForm_Utf16Native;
624 } else {
625 ef_bom = eEncodingForm_Utf16Foreign;
626 }
627 us[0] = us[1];
628 bom_len=2;
629 }
630 if (ef == eEncodingForm_Unknown || ef == ef_bom) {
631 ef = ef_bom;
632 n -= bom_len;
633 }
634 // else proceed at user's risk
635 }
636
637 // keep reading
638 while (n != 0 || (input.good() && !input.eof())) {
639
640 if (n == 0) {
641 input.read(tmp, buf_size);
642 n = (int) input.gcount();
643 result->reserve(max(result->capacity(), result->size() + n));
644 }
645 tmp[n] = '\0';
646
647 switch (ef) {
648 case eEncodingForm_Utf16Foreign:
649 {
650 char buf[buf_size];
651 NcbiSys_swab(tmp, buf, n);
652 memcpy(tmp, buf, n);
653 }
654 // no break here
655 case eEncodingForm_Utf16Native:
656 {
657 Uint2* u = us;
658 #if 0
659 for (n = n/2; n--; ++u) {
660 result->Append(*u);
661 }
662 #else
663 *result += CUtf8::AsUTF8(u, n/2);
664 #endif
665 }
666 break;
667 case eEncodingForm_ISO8859_1:
668 //result->Append(tmp,eEncoding_ISO8859_1);
669 *result += CUtf8::AsUTF8(tmp,eEncoding_ISO8859_1);
670 break;
671 case eEncodingForm_Windows_1252:
672 //result->Append(tmp,eEncoding_Windows_1252);
673 *result += CUtf8::AsUTF8(tmp,eEncoding_Windows_1252);
674 break;
675 case eEncodingForm_Utf8:
676 //result->Append(tmp,eEncoding_UTF8);
677 result->append(tmp,n);
678 break;
679 default:
680 if (what_if_no_bom == eNoBOM_GuessEncoding) {
681 if (n == bom_max) {
682 input.read(tmp + n, buf_size - n);
683 n += (int) input.gcount();
684 result->reserve(max(result->capacity(), result->size() + n));
685 }
686 tmp[n] = '\0';
687 EEncoding enc = CUtf8::GuessEncoding(tmp);
688 switch (enc) {
689 default:
690 case eEncoding_Unknown:
691 if (CUtf8::GetValidBytesCount( CTempString(tmp, n)) != 0) {
692 ef = eEncodingForm_Utf8;
693 //result->Append(tmp, enc);
694 *result += CUtf8::AsUTF8(tmp, enc);
695 }
696 else {
697 NCBI_THROW(CCoreException, eCore,
698 "ReadIntoUtf8: cannot guess text encoding");
699 }
700 break;
701 case eEncoding_UTF8:
702 ef = eEncodingForm_Utf8;
703 // no break here
704 case eEncoding_Ascii:
705 case eEncoding_ISO8859_1:
706 case eEncoding_Windows_1252:
707 //result->Append(tmp, enc);
708 *result += CUtf8::AsUTF8(tmp,enc);
709 break;
710 }
711 } else {
712 //result->Append(tmp, eEncoding_UTF8);
713 result->append(tmp, n);
714 }
715 break;
716 }
717 n = 0;
718 }
719 return ef_bom;
720 }
721
722
GetTextEncodingForm(CNcbiIstream & input,EBOMDiscard discard_bom)723 EEncodingForm GetTextEncodingForm(CNcbiIstream& input,
724 EBOMDiscard discard_bom)
725 {
726 EEncodingForm ef = eEncodingForm_Unknown;
727 if (input.good()) {
728 const int bom_max = 4;
729 char tmp[bom_max];
730 memset(tmp, 0, bom_max);
731 Uint2* us = reinterpret_cast<Uint2*>(tmp);
732 Uchar* uc = reinterpret_cast<Uchar*>(tmp);
733 input.get(tmp[0]);
734 int n = (int) input.gcount();
735 if (n == 1 && (uc[0] == 0xEF || uc[0] == 0xFE || uc[0] == 0xFF)){
736 input.get(tmp[1]);
737 if (input.gcount() == 1) {
738 ++n;
739 if (us[0] == 0xFEFF) {
740 ef = eEncodingForm_Utf16Native;
741 } else if (us[0] == 0xFFFE) {
742 ef = eEncodingForm_Utf16Foreign;
743 } else if (uc[1] == 0xBB) {
744 input.get(tmp[2]);
745 if (input.gcount() == 1) {
746 ++n;
747 if (uc[2] == 0xBF) {
748 ef = eEncodingForm_Utf8;
749 }
750 }
751 }
752 }
753 }
754 if (ef == eEncodingForm_Unknown) {
755 if (n > 1) {
756 CStreamUtils::Pushback(input, tmp, n);
757 } else if (n == 1) {
758 input.unget();
759 }
760 } else {
761 if (discard_bom == eBOM_Keep) {
762 CStreamUtils::Pushback(input, tmp, n);
763 }
764 }
765 }
766 return ef;
767 }
768
operator <<(CNcbiOstream & str,const CByteOrderMark & bom)769 CNcbiOstream& operator<< (CNcbiOstream& str, const CByteOrderMark& bom)
770 {
771 switch (bom.GetEncodingForm()) {
772 /// Stream has no BOM.
773 default:
774 case eEncodingForm_Unknown:
775 case eEncodingForm_ISO8859_1:
776 case eEncodingForm_Windows_1252:
777 break;
778 case eEncodingForm_Utf8:
779 str << Uint1(0xEF) << Uint1(0xBB) << Uint1(0xBF);
780 break;
781 case eEncodingForm_Utf16Native:
782 #ifdef WORDS_BIGENDIAN
783 str << Uint1(0xFE) << Uint1(0xFF);
784 #else
785 str << Uint1(0xFF) << Uint1(0xFE);
786 #endif
787 break;
788 case eEncodingForm_Utf16Foreign:
789 #ifdef WORDS_BIGENDIAN
790 str << Uint1(0xFF) << Uint1(0xFE);
791 #else
792 str << Uint1(0xFE) << Uint1(0xFF);
793 #endif
794 break;
795 }
796 return str;
797 }
798
799
800 #include "ncbi_base64.c"
801
802
803 END_NCBI_SCOPE
804
805
806 // See in the header why it is outside of NCBI scope (SunPro bug workaround...)
807
808 #if defined(NCBI_USE_OLD_IOSTREAM)
809
operator <<(NCBI_NS_NCBI::CNcbiOstream & os,const NCBI_NS_STD::string & str)810 extern NCBI_NS_NCBI::CNcbiOstream& operator<<(NCBI_NS_NCBI::CNcbiOstream& os,
811 const NCBI_NS_STD::string& str)
812 {
813 return str.empty() ? os : os << str.c_str();
814 }
815
816
operator >>(NCBI_NS_NCBI::CNcbiIstream & is,NCBI_NS_STD::string & str)817 extern NCBI_NS_NCBI::CNcbiIstream& operator>>(NCBI_NS_NCBI::CNcbiIstream& is,
818 NCBI_NS_STD::string& str)
819 {
820 int ch;
821 if ( !is.ipfx() )
822 return is;
823
824 str.erase();
825
826 SIZE_TYPE end = str.max_size();
827 if ( is.width() )
828 end = (streamsize) end < is.width() ? end : is.width();
829
830 SIZE_TYPE i = 0;
831 for (ch = is.rdbuf()->sbumpc();
832 ch != EOF && !isspace((unsigned char) ch);
833 ch = is.rdbuf()->sbumpc()) {
834 str.append(1, (char) ch);
835 if (++i == end)
836 break;
837 }
838 if (ch == EOF)
839 is.clear(NcbiEofbit | is.rdstate());
840 if ( !i )
841 is.clear(NcbiFailbit | is.rdstate());
842
843 is.width(0);
844 return is;
845 }
846
847 #endif /* NCBI_USE_OLD_IOSTREAM */
848