1 /* $Id: objistrxml.cpp 604400 2020-03-27 12:53:54Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Eugene Vasilchenko
27 *
28 * File Description:
29 * !!! PUT YOUR DESCRIPTION HERE !!!
30 *
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/tempstr.hpp>
36 #include <serial/objistrxml.hpp>
37 #include <serial/enumvalues.hpp>
38 #include <serial/objhook.hpp>
39 #include <serial/impl/classinfo.hpp>
40 #include <serial/impl/choice.hpp>
41 #include <serial/impl/ptrinfo.hpp>
42 #include <serial/impl/continfo.hpp>
43 #include <serial/impl/aliasinfo.hpp>
44 #include <serial/impl/memberlist.hpp>
45 #include <serial/impl/memberid.hpp>
46
47 BEGIN_NCBI_SCOPE
48
49 static
50 const char* s_SchemaInstanceNamespace = "http://www.w3.org/2001/XMLSchema-instance";
51
CreateObjectIStreamXml()52 CObjectIStream* CObjectIStream::CreateObjectIStreamXml()
53 {
54 return new CObjectIStreamXml();
55 }
56
CObjectIStreamXml(void)57 CObjectIStreamXml::CObjectIStreamXml(void)
58 : CObjectIStream(eSerial_Xml),
59 m_TagState(eTagOutside), m_Attlist(false),
60 m_StdXml(false), m_Doctype_found(false), m_IsNil(false),
61 m_Encoding( eEncoding_Unknown ),
62 m_StringEncoding( eEncoding_UTF8 ),
63 m_SkipNextTag(false)
64 {
65 m_Utf8Pos = m_Utf8Buf.begin();
66 }
67
CObjectIStreamXml(CNcbiIstream & in,EOwnership deleteIn)68 CObjectIStreamXml::CObjectIStreamXml(CNcbiIstream& in, EOwnership deleteIn)
69 : CObjectIStream(eSerial_Xml),
70 m_TagState(eTagOutside), m_Attlist(false),
71 m_StdXml(false), m_Doctype_found(false), m_IsNil(false),
72 m_Encoding( eEncoding_Unknown ),
73 m_StringEncoding( eEncoding_UTF8 ),
74 m_SkipNextTag(false)
75 {
76 m_Utf8Pos = m_Utf8Buf.begin();
77 Open(in, deleteIn);
78 }
79
~CObjectIStreamXml(void)80 CObjectIStreamXml::~CObjectIStreamXml(void)
81 {
82 }
83
ResetState(void)84 void CObjectIStreamXml::ResetState(void)
85 {
86 CObjectIStream::ResetState();
87 if (GetStackDepth() > 1) {
88 return;
89 }
90 m_TagState = eTagOutside;
91 m_LastTag.clear();
92 m_RejectedTag.clear();
93 m_Attlist = false;
94 m_IsNil = false;;
95 m_LastPrimitive.clear();
96 m_CurrNsPrefix.clear();
97 m_Utf8Buf.clear();
98 m_Utf8Pos = m_Utf8Buf.begin();
99 m_SkipNextTag = false;
100 }
101
GetEncoding(void) const102 EEncoding CObjectIStreamXml::GetEncoding(void) const
103 {
104 return m_Encoding;
105 }
106
SetDefaultStringEncoding(EEncoding enc)107 void CObjectIStreamXml::SetDefaultStringEncoding(EEncoding enc)
108 {
109 m_StringEncoding = enc;
110 }
111
GetDefaultStringEncoding(void) const112 EEncoding CObjectIStreamXml::GetDefaultStringEncoding(void) const
113 {
114 return m_StringEncoding;
115 }
116
EndOfData(void)117 bool CObjectIStreamXml::EndOfData(void)
118 {
119 if (CObjectIStream::EndOfData()) {
120 return true;
121 }
122 try {
123 SkipWSAndComments();
124 } catch (...) {
125 return true;
126 }
127 return false;
128 }
129
GetPosition(void) const130 string CObjectIStreamXml::GetPosition(void) const
131 {
132 return "line "+NStr::SizetToString(m_Input.GetLine());
133 }
134
SetEnforcedStdXml(bool set)135 void CObjectIStreamXml::SetEnforcedStdXml(bool set)
136 {
137 if ( set ) {
138 SetFlags(fFlagEnforcedStdXml);
139 }
140 else {
141 ClearFlags(fFlagEnforcedStdXml);
142 }
143 if (set) {
144 m_StdXml = false;
145 }
146 }
147
148 template<typename Type> inline
x_UseMemberDefault(void)149 Type CObjectIStreamXml::x_UseMemberDefault(void)
150 {
151 return GetMemberDefault() ? CTypeConverter<Type>::Get(GetMemberDefault()) : Type();
152 }
153
154 static inline
IsBaseChar(char c)155 bool IsBaseChar(char c)
156 {
157 return
158 (c >= 'A' && c <='Z') ||
159 (c >= 'a' && c <= 'z') ||
160 (c >= '\xC0' && c <= '\xD6') ||
161 (c >= '\xD8' && c <= '\xF6') ||
162 (c >= '\xF8' && c <= '\xFF');
163 }
164
165 static inline
IsDigit(char c)166 bool IsDigit(char c)
167 {
168 return c >= '0' && c <= '9';
169 }
170
171 static inline
IsIdeographic(char)172 bool IsIdeographic(char /*c*/)
173 {
174 return false;
175 }
176
177 static inline
IsLetter(char c)178 bool IsLetter(char c)
179 {
180 return IsBaseChar(c) || IsIdeographic(c);
181 }
182
183 static inline
IsFirstNameChar(char c)184 bool IsFirstNameChar(char c)
185 {
186 return IsLetter(c) || c == '_' || c == ':';
187 }
188
189 static inline
IsCombiningChar(char)190 bool IsCombiningChar(char /*c*/)
191 {
192 return false;
193 }
194
195 static inline
IsExtender(char c)196 bool IsExtender(char c)
197 {
198 return c == '\xB7';
199 }
200
201 static inline
IsNameChar(char c)202 bool IsNameChar(char c)
203 {
204 return IsFirstNameChar(c) ||
205 IsDigit(c) || c == '.' || c == '-' ||
206 IsCombiningChar(c) || IsExtender(c);
207 }
208
209 static inline
IsWhiteSpace(char c)210 bool IsWhiteSpace(char c)
211 {
212 return c == ' ' || c == '\t' || c == '\n' || c == '\r';
213 }
214
215 static inline
IsEndOfTagChar(char c)216 bool IsEndOfTagChar(char c)
217 {
218 return c == '>' || c == '/';
219 }
220
SkipWS(void)221 char CObjectIStreamXml::SkipWS(void)
222 {
223 // _ASSERT(InsideTag());
224 for ( ;; ) {
225 char c = m_Input.SkipSpaces();
226 switch ( c ) {
227 case '\t':
228 m_Input.SkipChar();
229 continue;
230 case '\r':
231 case '\n':
232 m_Input.SkipChar();
233 m_Input.SkipEndOfLine(c);
234 continue;
235 default:
236 return c;
237 }
238 }
239 }
240
SkipWSAndComments(void)241 char CObjectIStreamXml::SkipWSAndComments(void)
242 {
243 _ASSERT(OutsideTag());
244 for ( ;; ) {
245 char c = m_Input.SkipSpaces();
246 switch ( c ) {
247 case '\t':
248 m_Input.SkipChar();
249 continue;
250 case '\r':
251 case '\n':
252 m_Input.SkipChar();
253 m_Input.SkipEndOfLine(c);
254 continue;
255 case '<':
256 // http://www.w3.org/TR/REC-xml/#dt-comment
257 if ( m_Input.PeekChar(1) == '!' &&
258 m_Input.PeekChar(2) == '-' &&
259 m_Input.PeekChar(3) == '-' ) {
260 // start of comment
261 m_Input.SkipChars(4);
262 if (m_Input.PeekChar(0) == '-' &&
263 m_Input.PeekChar(1) == '-') {
264 ThrowError(fFormatError,
265 "double-hyphen '--' is not allowed in XML comments");
266 }
267 for ( ;; ) {
268 m_Input.FindChar('-');
269 if ( m_Input.PeekChar(1) == '-' ) {
270 // --
271 if ( m_Input.PeekChar(2) == '>' ) {
272 // -->
273 m_Input.SkipChars(3);
274 break;
275 }
276 else {
277 // --[^>]
278 ThrowError(fFormatError,
279 "double-hyphen '--' is not allowed in XML comments");
280 }
281 }
282 else {
283 // -[^-]
284 m_Input.SkipChars(2);
285 }
286
287 }
288 continue; // skip the next WS or comment
289 }
290 return '<';
291 default:
292 return c;
293 }
294 }
295 }
296
EndTag(void)297 void CObjectIStreamXml::EndTag(void)
298 {
299 char c = SkipWS();
300 if (m_Attlist) {
301 if (c == '=') {
302 m_Input.SkipChar();
303 c = SkipWS();
304 if (c == '\"') {
305 m_Input.SkipChar();
306 return;
307 }
308 }
309 if (c == '\"') {
310 m_Input.SkipChar();
311 m_TagState = eTagInsideOpening;
312 return;
313 }
314 if (c == '/' && m_Input.PeekChar(1) == '>' ) {
315 m_Input.SkipChars(2);
316 m_TagState = eTagInsideOpening;
317 Found_slash_gt();
318 return;
319 }
320 }
321 if ( c != '>' ) {
322 c = ReadUndefinedAttributes();
323 if ( c != '>' ) {
324 ThrowError(fFormatError, "'>' expected");
325 }
326 }
327 m_Input.SkipChar();
328 Found_gt();
329 }
330
EndOpeningTagSelfClosed(void)331 bool CObjectIStreamXml::EndOpeningTagSelfClosed(void)
332 {
333 if (!StackIsEmpty() && TopFrame().GetNotag()) {
334 if (SelfClosedTag()) {
335 return true;
336 }
337 }
338 if( InsideOpeningTag() ) {
339 char c = SkipWS();
340 if (m_Attlist) {
341 return false;
342 }
343 if ( c == '/' && m_Input.PeekChar(1) == '>' ) {
344 // end of self closed tag
345 m_Input.SkipChars(2);
346 Found_slash_gt();
347 return true;
348 }
349
350 if ( c != '>' ) {
351 c = ReadUndefinedAttributes();
352 if ( c == '/' && m_Input.PeekChar(1) == '>' ) {
353 // end of self closed tag
354 m_Input.SkipChars(2);
355 Found_slash_gt();
356 return true;
357 }
358 if ( c != '>' )
359 ThrowError(fFormatError, "end of tag expected");
360 }
361
362 // end of open tag
363 m_Input.SkipChar(); // '>'
364 Found_gt();
365 }
366 return SelfClosedTag();
367 }
368
UseSpecialCaseRead(void)369 bool CObjectIStreamXml::UseSpecialCaseRead(void)
370 {
371 if (ExpectSpecialCase()==0) {
372 return false;
373 }
374 bool empty = !m_Attlist &&
375 ( SelfClosedTag() ||
376 EndOpeningTagSelfClosed() ||
377 (m_Input.PeekChar(0) == '<' && m_Input.PeekChar(1) == '/')
378 );
379 if (empty) {
380 if (m_IsNil && (ExpectSpecialCase() & CObjectIStream::eReadAsNil)!=0) {
381 m_IsNil=false;
382 SetSpecialCaseUsed(CObjectIStream::eReadAsNil);
383 // NCBI_THROW(CSerialException,eNullValue, kEmptyStr);
384 return true;
385 }
386 if ( GetMemberDefault()) {
387 SetSpecialCaseUsed(CObjectIStream::eReadAsDefault);
388 return true;
389 }
390 }
391 return false;
392 }
393
BeginOpeningTag(void)394 char CObjectIStreamXml::BeginOpeningTag(void)
395 {
396 BeginData();
397 // find beginning '<'
398 char c = SkipWSAndComments();
399 if ( c != '<' )
400 ThrowError(fFormatError, "'<' expected");
401 c = m_Input.PeekChar(1);
402 if ( c == '/' )
403 ThrowError(fFormatError, "unexpected '</'");
404 m_Input.SkipChar();
405 Found_lt();
406 return c;
407 }
408
BeginClosingTag(void)409 char CObjectIStreamXml::BeginClosingTag(void)
410 {
411 BeginData();
412 // find beginning '<'
413 char c = SkipWSAndComments();
414 if ( c != '<' || m_Input.PeekChar(1) != '/' )
415 ThrowError(fFormatError, "'</' expected");
416 m_Input.SkipChars(2);
417 Found_lt_slash();
418 return m_Input.PeekChar();
419 }
420
ReadName(char c)421 CTempString CObjectIStreamXml::ReadName(char c)
422 {
423 _ASSERT(InsideTag());
424 if ( !IsFirstNameChar(c) )
425 ThrowError(fFormatError,
426 "Name begins with an invalid character: #"
427 +NStr::UIntToString((unsigned int)c));
428
429 // find end of tag name
430 size_t i = 1, iColon = 0;
431 while ( IsNameChar(c = m_Input.PeekChar(i)) ) {
432 if (!m_Doctype_found && c == ':') {
433 iColon = i+1;
434 }
435 ++i;
436 }
437
438 // save beginning of tag name
439 const char* ptr = m_Input.GetCurrentPos();
440
441 // check end of tag name
442 m_Input.SkipChars(i);
443 if (c == '\n' || c == '\r') {
444 m_Input.SkipChar();
445 m_Input.SkipEndOfLine(c);
446 }
447 m_LastTag = CTempString(ptr+iColon, i-iColon);
448 if (iColon > 1) {
449 string ns_prefix( CTempString(ptr, iColon-1));
450 if (ns_prefix == "xmlns") {
451 string value;
452 ReadAttributeValue(value, true);
453 if (m_LastTag == m_CurrNsPrefix) {
454 size_t depth = GetStackDepth();
455 TTypeInfo type=0;
456 if (depth > 1 && FetchFrameFromTop(1).HasTypeInfo()) {
457 type = FetchFrameFromTop(1).GetTypeInfo();
458 if (type->GetName().empty() &&
459 depth > 3 && FetchFrameFromTop(3).HasTypeInfo()) {
460 type = FetchFrameFromTop(3).GetTypeInfo();
461 }
462 }
463 if (type && type->HasNamespaceName()) {
464 type->SetNamespacePrefix(m_CurrNsPrefix);
465 }
466 }
467 m_NsPrefixToName[m_LastTag] = value;
468 m_NsNameToPrefix[value] = m_LastTag;
469 char ch = SkipWS();
470 return IsEndOfTagChar(ch) ? CTempString() : ReadName(ch);
471 } else if (ns_prefix == "xml") {
472 iColon = 0;
473 } else {
474 m_CurrNsPrefix = ns_prefix;
475 }
476 } else {
477 if (!m_Attlist) {
478 m_CurrNsPrefix.erase();
479 }
480 if (m_Attlist && m_LastTag == "xmlns") {
481 string value;
482 ReadAttributeValue(value, true);
483 if (GetStackDepth() > 1 && FetchFrameFromTop(1).HasTypeInfo()) {
484 TTypeInfo type = FetchFrameFromTop(1).GetTypeInfo();
485 if (type->HasNamespaceName()) {
486 type->SetNamespacePrefix(m_CurrNsPrefix);
487 }
488 }
489 m_NsPrefixToName[m_LastTag] = value;
490 m_NsNameToPrefix[value] = m_LastTag;
491 char ch = SkipWS();
492 return IsEndOfTagChar(ch) ? CTempString() : ReadName(ch);
493 }
494 }
495 #if defined(NCBI_SERIAL_IO_TRACE)
496 cout << ", Read= " << m_LastTag;
497 #endif
498 if (m_Attlist && m_LastTag.size() == 3 &&
499 m_LastTag == "nil" &&
500 (m_NsPrefixToName.find(m_CurrNsPrefix) == m_NsPrefixToName.end() ||
501 NStr::strcmp(m_NsPrefixToName[m_CurrNsPrefix].c_str(),s_SchemaInstanceNamespace)== 0)) {
502 string value;
503 ReadAttributeValue(value, true);
504 m_IsNil = NStr::StringToBool(value);
505 char ch = SkipWS();
506 return IsEndOfTagChar(ch) ? CTempString() : ReadName(ch);
507 }
508 return CTempString(ptr+iColon, i-iColon);
509 }
510
RejectedName(void)511 CTempString CObjectIStreamXml::RejectedName(void)
512 {
513 _ASSERT(!m_RejectedTag.empty());
514 m_LastTag = m_RejectedTag;
515 m_RejectedTag.erase();
516 m_TagState = eTagInsideOpening;
517 #if defined(NCBI_SERIAL_IO_TRACE)
518 cout << ", Redo= " << m_LastTag;
519 #endif
520 return m_LastTag;
521 }
522
SkipAttributeValue(char c)523 void CObjectIStreamXml::SkipAttributeValue(char c)
524 {
525 _ASSERT(InsideOpeningTag());
526 m_Input.SkipChar();
527 m_Input.FindChar(c);
528 m_Input.SkipChar();
529 }
530
SkipQDecl(void)531 void CObjectIStreamXml::SkipQDecl(void)
532 {
533 _ASSERT(InsideOpeningTag());
534 m_Input.SkipChar();
535
536 CTempString tagName;
537 tagName = ReadName( SkipWS());
538 // _ASSERT(tagName == "xml");
539 for (;;) {
540 char ch = SkipWS();
541 if (ch == '?') {
542 break;
543 }
544 tagName = ReadName(ch);
545 string value;
546 ReadAttributeValue(value);
547 if (tagName == "encoding") {
548 if (NStr::CompareNocase(value.c_str(),"UTF-8") == 0) {
549 m_Encoding = eEncoding_UTF8;
550 } else if (NStr::CompareNocase(value.c_str(),"ISO-8859-1") == 0) {
551 m_Encoding = eEncoding_ISO8859_1;
552 } else if (NStr::CompareNocase(value.c_str(),"Windows-1252") == 0) {
553 m_Encoding = eEncoding_Windows_1252;
554 } else {
555 ThrowError(fFormatError, "unsupported encoding: " + value);
556 }
557 break;
558 }
559 }
560 for ( ;; ) {
561 m_Input.FindChar('?');
562 if ( m_Input.PeekChar(1) == '>' ) {
563 // ?>
564 m_Input.SkipChars(2);
565 Found_gt();
566 return;
567 }
568 else
569 m_Input.SkipChar();
570 }
571 }
572
ReadFileHeader(void)573 string CObjectIStreamXml::ReadFileHeader(void)
574 {
575 // check for UTF8 Byte Order Mark (EF BB BF)
576 // http://unicode.org/faq/utf_bom.html#BOM
577 {
578 char c = m_Input.PeekChar();
579 if ((unsigned char)c == 0xEF) {
580 if ((unsigned char)m_Input.PeekChar(1) == 0xBB &&
581 (unsigned char)m_Input.PeekChar(2) == 0xBF) {
582 m_Input.SkipChars(3);
583 m_Encoding = eEncoding_UTF8;
584 }
585 }
586 }
587
588 m_Doctype_found = false;
589 for ( ;; ) {
590 switch ( BeginOpeningTag() ) {
591 case '?':
592 SkipQDecl();
593 break;
594 case '!':
595 {
596 m_Input.SkipChar();
597 CTempString tagName = ReadName(m_Input.PeekChar());
598 if ( tagName == "DOCTYPE" ) {
599 m_Doctype_found = true;
600 ReadName(SkipWS());
601 // skip the rest of !DOCTYPE
602 for ( ;; ) {
603 char c = SkipWS();
604 if ( c == '>' ) {
605 m_Input.SkipChar();
606 Found_gt();
607 break;
608 }
609 else if ( c == '"' || c == '\'' ) {
610 SkipAttributeValue(c);
611 }
612 else {
613 ReadName(c);
614 }
615 }
616 }
617 else {
618 // unknown tag
619 ThrowError(fFormatError,
620 "unknown tag in file header: "+string(tagName));
621 }
622 }
623 break;
624 default:
625 {
626 string typeName = ReadName(m_Input.PeekChar());
627 if (!m_Doctype_found && !StackIsEmpty()) {
628 // verify typename
629 const CObjectStack::TFrame& top = TopFrame();
630 if (top.GetFrameType() == CObjectStackFrame::eFrameNamed &&
631 top.HasTypeInfo()) {
632 const string& tname = top.GetTypeInfo()->GetName();
633 if ( !typeName.empty() && !tname.empty() && typeName != tname ) {
634 string tmp = m_CurrNsPrefix + ":" + typeName;
635 if (tmp == tname) {
636 typeName = tmp;
637 m_LastTag = tmp;
638 m_CurrNsPrefix.erase();
639 m_Doctype_found = true;
640 }
641 }
642 }
643 }
644 UndoClassMember();
645 return typeName;
646 }
647 /*
648 m_Input.UngetChar('<');
649 Back_lt();
650 ThrowError(fFormatError, "unknown DOCTYPE");
651 */
652 }
653 }
654 return NcbiEmptyString;
655 }
656
PeekNextTypeName(void)657 string CObjectIStreamXml::PeekNextTypeName(void)
658 {
659 if (!m_RejectedTag.empty()) {
660 return m_RejectedTag;
661 }
662 string typeName = ReadName(BeginOpeningTag());
663 UndoClassMember();
664 return typeName;
665 }
666
FindFileHeader(bool find_XMLDecl)667 void CObjectIStreamXml::FindFileHeader(bool find_XMLDecl)
668 {
669 char c;
670 for (;;) {
671 c = m_Input.PeekChar();
672 if (c == '<') {
673 if (!find_XMLDecl) {
674 return;
675 }
676 if (m_Input.PeekChar(1) == '?' &&
677 m_Input.PeekChar(2) == 'x' &&
678 m_Input.PeekChar(3) == 'm' &&
679 m_Input.PeekChar(4) == 'l') {
680 return;
681 }
682 }
683 m_Input.SkipChar();
684 }
685 }
686
x_EndTypeNamespace(void)687 void CObjectIStreamXml::x_EndTypeNamespace(void)
688 {
689 if (x_IsStdXml()) {
690 if (TopFrame().HasTypeInfo()) {
691 TTypeInfo type = TopFrame().GetTypeInfo();
692 if (type->HasNamespaceName()) {
693 string nsName = type->GetNamespaceName();
694 string nsPrefix = m_NsNameToPrefix[nsName];
695 // not sure about it - should we erase them or not?
696 // m_NsNameToPrefix.erase(nsName);
697 // m_NsPrefixToName.erase(nsPrefix);
698 }
699 }
700 if (GetStackDepth() <= 2) {
701 m_NsNameToPrefix.clear();
702 m_NsPrefixToName.clear();
703 }
704 }
705 }
706
ReadEscapedChar(char endingChar,bool * encoded)707 int CObjectIStreamXml::ReadEscapedChar(char endingChar, bool* encoded)
708 {
709 char c = m_Input.PeekChar();
710 if (encoded) {
711 *encoded = false;
712 }
713 if ( c == '&' ) {
714 if (encoded) {
715 *encoded = true;
716 }
717 m_Input.SkipChar();
718 const size_t limit = 32;
719 size_t offset = m_Input.PeekFindChar(';', limit);
720 if ( offset >= limit )
721 ThrowError(fFormatError, "entity reference is too long");
722 const char* p = m_Input.GetCurrentPos(); // save entity string pointer
723 m_Input.SkipChars(offset + 1); // skip it
724 if ( offset == 0 )
725 ThrowError(fFormatError, "invalid entity reference");
726 if ( *p == '#' ) {
727 const char* end = p + offset;
728 ++p;
729 // char ref
730 if ( p == end )
731 ThrowError(fFormatError, "invalid char reference");
732 unsigned v = 0;
733 if ( *p == 'x' ) {
734 // hex
735 if ( ++p == end )
736 ThrowError(fFormatError, "invalid char reference");
737 do {
738 c = *p++;
739 if ( c >= '0' && c <= '9' )
740 v = v * 16 + (c - '0');
741 else if ( c >= 'A' && c <='F' )
742 v = v * 16 + (c - 'A' + 0xA);
743 else if ( c >= 'a' && c <='f' )
744 v = v * 16 + (c - 'a' + 0xA);
745 else
746 ThrowError(fFormatError,
747 "invalid symbol in char reference");
748 } while ( p < end );
749 }
750 else {
751 // dec
752 if ( p == end )
753 ThrowError(fFormatError, "invalid char reference");
754 do {
755 c = *p++;
756 if ( c >= '0' && c <= '9' )
757 v = v * 10 + (c - '0');
758 else
759 ThrowError(fFormatError,
760 "invalid symbol in char reference");
761 } while ( p < end );
762 }
763 return v;
764 }
765 else {
766 CTempString e(p, offset);
767 if ( e == "lt" )
768 return '<';
769 if ( e == "gt" )
770 return '>';
771 if ( e == "amp" )
772 return '&';
773 if ( e == "apos" )
774 return '\'';
775 if ( e == "quot" )
776 return '"';
777 ThrowError(fFormatError, "unknown entity name: " + string(e));
778 }
779 }
780 else if ( c == endingChar ) {
781 return -1;
782 }
783 m_Input.SkipChar();
784 return c & 0xFF;
785 }
786
787 /*
788 In XML 1.1, almost all chars are allowed:
789 http://www.w3.org/TR/xml11/#NT-Char
790 BUT, we declare this as xml 1.0:
791 CObjectOStreamXml::WriteFileHeader
792 Once so, some chars are not allowed
793 http://www.w3.org/TR/xml/#charsets
794
795 */
BAD_CHAR(int x)796 inline bool BAD_CHAR(int x) {
797 return (x < 0x20 && x > 0x0 && x != 0x9 && x != 0xA && x != 0xD);
798 }
x_VerifyChar(int x)799 inline int CObjectIStreamXml::x_VerifyChar(int x) {
800 return BAD_CHAR(x) ?
801 ReplaceVisibleChar((char)x, x_FixCharsMethod(), this, kEmptyStr, x_FixCharsSubst()) : x;
802 }
803 inline
ReadEncodedChar(char endingChar,EStringType type,bool & encoded)804 int CObjectIStreamXml::ReadEncodedChar(char endingChar, EStringType type, bool& encoded)
805 {
806 return x_VerifyChar(x_ReadEncodedChar(endingChar,type,encoded));
807 }
808
x_ReadEncodedChar(char endingChar,EStringType type,bool & encoded)809 int CObjectIStreamXml::x_ReadEncodedChar(char endingChar, EStringType type, bool& encoded)
810 {
811 EEncoding enc_out( type == eStringTypeUTF8 ? eEncoding_UTF8 : m_StringEncoding);
812 EEncoding enc_in(m_Encoding == eEncoding_Unknown ? eEncoding_UTF8 : m_Encoding);
813
814 if (enc_out == eEncoding_UTF8 &&
815 !m_Utf8Buf.empty() && m_Utf8Pos != m_Utf8Buf.end()) {
816 if (++m_Utf8Pos != m_Utf8Buf.end()) {
817 return *m_Utf8Pos & 0xFF;
818 } else {
819 m_Utf8Buf.clear();
820 }
821 }
822 int c = ReadEscapedChar(endingChar, &encoded);
823 if (c < 0) {
824 return c;
825 }
826 if (enc_out != eEncoding_Unknown) {
827 if (encoded) {
828 TUnicodeSymbol chU = c;
829 if (enc_out == eEncoding_UTF8) {
830 m_Utf8Buf = CUtf8::AsUTF8( &chU, 1);
831 m_Utf8Pos = m_Utf8Buf.begin();
832 return *m_Utf8Pos & 0xFF;
833 } else {
834 return CUtf8::SymbolToChar( chU, enc_out);
835 }
836 }
837 if (enc_in != enc_out) {
838 if (enc_out != eEncoding_UTF8) {
839 TUnicodeSymbol chU = enc_in == eEncoding_UTF8 ?
840 ReadUtf8Char((char)c) : CUtf8::CharToSymbol((char)c, enc_in);
841 Uint1 ch = CUtf8::SymbolToChar( chU, enc_out);
842 return ch & 0xFF;
843 }
844 if ((c & 0x80) == 0) {
845 return c;
846 }
847 char ch = (char)c;
848 m_Utf8Buf = CUtf8::AsUTF8( CTempString(&ch,1), enc_in);
849 m_Utf8Pos = m_Utf8Buf.begin();
850 return *m_Utf8Pos & 0xFF;
851 }
852 }
853 return c;
854 }
855
ReadUtf8Char(char c)856 TUnicodeSymbol CObjectIStreamXml::ReadUtf8Char(char c)
857 {
858 size_t more = 0;
859 TUnicodeSymbol chU = CUtf8::DecodeFirst(c, more);
860 while (chU && more--) {
861 chU = CUtf8::DecodeNext(chU, m_Input.GetChar());
862 }
863 if (chU == 0) {
864 ThrowError(fInvalidData, "invalid UTF8 string");
865 }
866 return chU;
867 }
868
ReadAttributeName(void)869 CTempString CObjectIStreamXml::ReadAttributeName(void)
870 {
871 if ( OutsideTag() )
872 ThrowError(fFormatError, "attribute expected");
873 return ReadName(SkipWS());
874 }
875
ReadAttributeValue(string & value,bool skipClosing)876 void CObjectIStreamXml::ReadAttributeValue(string& value, bool skipClosing)
877 {
878 if ( SkipWS() != '=' )
879 ThrowError(fFormatError, "'=' expected");
880 m_Input.SkipChar(); // '='
881 char startChar = SkipWS();
882 if ( startChar != '\'' && startChar != '\"' )
883 ThrowError(fFormatError, "attribute value must start with ' or \"");
884 m_Input.SkipChar();
885 bool encoded = false;
886 for ( ;; ) {
887 int c = ReadEncodedChar(startChar,eStringTypeUTF8,encoded);
888 if ( c < 0 )
889 break;
890 if (c != 0) {
891 value += char(c);
892 }
893 }
894 if (!m_Attlist || skipClosing) {
895 m_Input.SkipChar();
896 }
897 }
898
ReadUndefinedAttributes(void)899 char CObjectIStreamXml::ReadUndefinedAttributes(void)
900 {
901 char c;
902 m_Attlist = true;
903 for (;;) {
904 c = SkipWS();
905 if (IsEndOfTagChar(c)) {
906 m_Attlist = false;
907 break;
908 }
909 CTempString tagName = ReadName(c);
910 if (!tagName.empty()) {
911 string value;
912 ReadAttributeValue(value, true);
913 }
914 }
915 return c;
916 }
917
ReadBool(void)918 bool CObjectIStreamXml::ReadBool(void)
919 {
920 CTempString attr;
921 // accept both <a>true</a> and <a value="true"/>
922 // for compatibility with ASN-generated classes
923 string sValue;
924 bool haveattr=false;
925 if (!m_Attlist) {
926 while (HasAttlist()) {
927 attr = ReadAttributeName();
928 if ( attr == "value" ) {
929 ReadAttributeValue(sValue);
930 haveattr = true;
931 continue;
932 }
933 if ( attr == "nil") {
934 m_IsNil = true;
935 }
936 string value;
937 ReadAttributeValue(value);
938 }
939 if (ExpectSpecialCase()!=0 && UseSpecialCaseRead()) {
940 return x_UseMemberDefault<bool>();
941 }
942 }
943 if (!haveattr) {
944 ReadWord(sValue);
945 }
946 NStr::TruncateSpacesInPlace(sValue);
947
948 // http://www.w3.org/TR/xmlschema11-2/#boolean
949 bool value;
950 if ( sValue == "true" || sValue == "1")
951 value = true;
952 else {
953 if ( sValue != "false" && sValue != "0") {
954 ThrowError(fFormatError,
955 "'true' or 'false' value expected: "+sValue);
956 }
957 value = false;
958 }
959 if ( !m_Attlist && !EndOpeningTagSelfClosed() && !NextTagIsClosing() )
960 ThrowError(fFormatError, "boolean tag must have empty contents");
961 return value;
962 }
963
ReadChar(void)964 char CObjectIStreamXml::ReadChar(void)
965 {
966 if (ExpectSpecialCase()!=0 && UseSpecialCaseRead()) {
967 return x_UseMemberDefault<char>();
968 }
969 BeginData();
970 int c = ReadEscapedChar('<');
971 if ( c < 0 || m_Input.PeekChar() != '<' )
972 ThrowError(fFormatError, "one char tag content expected");
973 return (char)c;
974 }
975
ReadInt4(void)976 Int4 CObjectIStreamXml::ReadInt4(void)
977 {
978 if (ExpectSpecialCase()!=0 && UseSpecialCaseRead()) {
979 return x_UseMemberDefault<Int4>();
980 }
981 BeginData();
982 return m_Input.GetInt4();
983 }
984
ReadUint4(void)985 Uint4 CObjectIStreamXml::ReadUint4(void)
986 {
987 if (ExpectSpecialCase()!=0 && UseSpecialCaseRead()) {
988 return x_UseMemberDefault<Uint4>();
989 }
990 BeginData();
991 return m_Input.GetUint4();
992 }
993
ReadInt8(void)994 Int8 CObjectIStreamXml::ReadInt8(void)
995 {
996 if (ExpectSpecialCase()!=0 && UseSpecialCaseRead()) {
997 return x_UseMemberDefault<Int8>();
998 }
999 BeginData();
1000 return m_Input.GetInt8();
1001 }
1002
ReadUint8(void)1003 Uint8 CObjectIStreamXml::ReadUint8(void)
1004 {
1005 if (ExpectSpecialCase()!=0 && UseSpecialCaseRead()) {
1006 return x_UseMemberDefault<Uint8>();
1007 }
1008 BeginData();
1009 return m_Input.GetUint8();
1010 }
1011
ReadDouble(void)1012 double CObjectIStreamXml::ReadDouble(void)
1013 {
1014 if (ExpectSpecialCase()!=0 && UseSpecialCaseRead()) {
1015 return x_UseMemberDefault<double>();
1016 }
1017 string s;
1018 ReadWord(s);
1019 char* endptr;
1020 double result = NStr::StringToDoublePosix(s.c_str(), &endptr, NStr::fDecimalPosixFinite);
1021 while (IsWhiteSpace(*endptr)) {
1022 ++endptr;
1023 }
1024 if ( *endptr != 0 )
1025 ThrowError(fFormatError, "invalid float number");
1026 return result;
1027 }
1028
ReadNull(void)1029 void CObjectIStreamXml::ReadNull(void)
1030 {
1031 if ( !EndOpeningTagSelfClosed() && !NextTagIsClosing() )
1032 ThrowError(fFormatError, "empty tag expected");
1033 }
1034
ReadAnyContent(const string & ns_prefix,string & value)1035 bool CObjectIStreamXml::ReadAnyContent(const string& ns_prefix, string& value)
1036 {
1037 if (ThisTagIsSelfClosed()) {
1038 EndSelfClosedTag();
1039 return false;
1040 }
1041 while (!NextTagIsClosing()) {
1042 while (NextIsTag()) {
1043 string tagAny;
1044 tagAny = ReadName(BeginOpeningTag());
1045 value += '<';
1046 value += tagAny;
1047 while (HasAttlist()) {
1048 string attribName = ReadName(SkipWS());
1049 if (attribName.empty()) {
1050 break;
1051 }
1052 if (m_CurrNsPrefix.empty() || m_CurrNsPrefix == ns_prefix) {
1053 value += " ";
1054 value += attribName;
1055 value += "=\"";
1056 string attribValue;
1057 ReadAttributeValue(attribValue, true);
1058 value += attribValue;
1059 value += "\"";
1060 } else {
1061 // skip attrib from different namespaces
1062 string attribValue;
1063 ReadAttributeValue(attribValue, true);
1064 }
1065 }
1066 string value2;
1067 if (ReadAnyContent(ns_prefix, value2)) {
1068 CloseTag(tagAny);
1069 }
1070 if (value2.empty()) {
1071 value += "/>";
1072 } else {
1073 value += '>';
1074 value += value2;
1075 value += "</";
1076 value += tagAny;
1077 value += '>';
1078 }
1079 }
1080 string data;
1081 ReadTagData(data,eStringTypeUTF8);
1082 value += data;
1083 }
1084 return true;
1085 }
1086
ReadAnyContentObject(CAnyContentObject & obj)1087 void CObjectIStreamXml::ReadAnyContentObject(CAnyContentObject& obj)
1088 {
1089 obj.Reset();
1090 string tagName;
1091 if (!m_RejectedTag.empty()) {
1092 tagName = RejectedName();
1093 obj.SetName( tagName);
1094 } else if (!StackIsEmpty() && TopFrame().HasMemberId()) {
1095 obj.SetName( TopFrame().GetMemberId().GetName());
1096 }
1097 string ns_prefix(m_CurrNsPrefix);
1098
1099 BEGIN_OBJECT_FRAME(eFrameOther);
1100 while (HasAttlist()) {
1101 string attribName = ReadName(SkipWS());
1102 if (attribName.empty()) {
1103 break;
1104 }
1105 string value;
1106 ReadAttributeValue(value, true);
1107 if (attribName == "xmlns") {
1108 m_NsPrefixToName[ns_prefix] = value;
1109 m_NsNameToPrefix[value] = ns_prefix;
1110 } else {
1111 obj.AddAttribute( attribName, m_NsPrefixToName[m_CurrNsPrefix],CUtf8::AsUTF8(value,eEncoding_UTF8));
1112 }
1113 }
1114 obj.SetNamespacePrefix(ns_prefix);
1115 obj.SetNamespaceName(m_NsPrefixToName[ns_prefix]);
1116 string value;
1117 if (ReadAnyContent(ns_prefix,value) && !tagName.empty()) {
1118 CloseTag(tagName);
1119 }
1120 obj.SetValue(CUtf8::AsUTF8(value,eEncoding_UTF8));
1121 END_OBJECT_FRAME();
1122 }
1123
SkipAnyContent(void)1124 bool CObjectIStreamXml::SkipAnyContent(void)
1125 {
1126 if (SelfClosedTag() || ThisTagIsSelfClosed()) {
1127 //EndSelfClosedTag();
1128 return true;
1129 }
1130 if ( m_Attlist && InsideOpeningTag() ) {
1131 ReadUndefinedAttributes();
1132 m_Attlist = true;
1133 return true;
1134 }
1135 while (!NextTagIsClosing()) {
1136 while (NextIsTag()) {
1137 string tagName = ReadName(BeginOpeningTag());
1138 if (SkipAnyContent()) {
1139 CloseTag(tagName);
1140 }
1141 }
1142 string data;
1143 ReadTagData(data);
1144 }
1145 return true;
1146 }
1147
SkipAnyContentObject(void)1148 void CObjectIStreamXml::SkipAnyContentObject(void)
1149 {
1150 string tagName;
1151 if (!m_RejectedTag.empty()) {
1152 tagName = RejectedName();
1153 } else if (OutsideTag()) {
1154 tagName = ReadName(BeginOpeningTag());
1155 }
1156 if (SkipAnyContent() && !tagName.empty()) {
1157 CloseTag(tagName);
1158 }
1159 }
1160
ReadBitString(CBitString & obj)1161 void CObjectIStreamXml::ReadBitString(CBitString& obj)
1162 {
1163 obj.clear();
1164 #if BITSTRING_AS_VECTOR
1165 if (EndOpeningTagSelfClosed()) {
1166 return;
1167 }
1168 BeginData();
1169 size_t reserve;
1170 const size_t step=128;
1171 obj.reserve( reserve=step );
1172 for (int c= GetHexChar(); c >= 0; c= GetHexChar()) {
1173 Uint1 byte = c;
1174 for (Uint1 mask= 0x8; mask != 0; mask >>= 1) {
1175 obj.push_back( (byte & mask) != 0 );
1176 if (--reserve == 0) {
1177 obj.reserve(obj.size() + (reserve=step));
1178 }
1179 }
1180 }
1181 obj.reserve(obj.size());
1182 #else
1183 obj.resize(0);
1184 if (EndOpeningTagSelfClosed()) {
1185 return;
1186 }
1187 if (IsCompressed()) {
1188 ReadCompressedBitString(obj);
1189 return;
1190 }
1191 BeginData();
1192 CBitString::size_type len = 0;
1193 for ( ;; ++len) {
1194 char c = m_Input.GetChar();
1195 if (c == '1') {
1196 obj.resize(len+1);
1197 obj.set_bit(len);
1198 } else if (c != '0') {
1199 if (IsWhiteSpace(c)) {
1200 --len;
1201 continue;
1202 }
1203 m_Input.UngetChar(c);
1204 if ( c == '<' )
1205 break;
1206 ThrowError(fFormatError, "invalid char in bit string");
1207 }
1208 }
1209 obj.resize(len);
1210 #endif
1211 }
1212
SkipBitString(void)1213 void CObjectIStreamXml::SkipBitString(void)
1214 {
1215 SkipByteBlock();
1216 }
1217
ReadString(string & str,EStringType type)1218 void CObjectIStreamXml::ReadString(string& str, EStringType type)
1219 {
1220 str.erase();
1221 if (ExpectSpecialCase()!=0 && UseSpecialCaseRead()) {
1222 EEncoding enc_in(m_Encoding == eEncoding_Unknown ? eEncoding_UTF8 : m_Encoding);
1223 CStringUTF8 u( CUtf8::AsUTF8(x_UseMemberDefault<string>(),enc_in));
1224 if (type == eStringTypeUTF8 || m_StringEncoding == eEncoding_Unknown) {
1225 str = u;
1226 } else {
1227 str = CUtf8::AsSingleByteString(u,m_StringEncoding);
1228 }
1229 return;
1230 }
1231 if (EndOpeningTagSelfClosed()) {
1232 return;
1233 }
1234 #if 0
1235 if (TopFrame().GetNotag()) {
1236 ReadWord(str, type);
1237 } else {
1238 ReadTagData(str, type);
1239 }
1240 #else
1241 ReadTagData(str, type);
1242 #endif
1243 }
1244
ReadCString(void)1245 char* CObjectIStreamXml::ReadCString(void)
1246 {
1247 if ( EndOpeningTagSelfClosed() ) {
1248 // null pointer string
1249 return 0;
1250 }
1251 string str;
1252 ReadTagData(str);
1253 return NcbiSysChar_strdup(str.c_str());
1254 }
1255
ReadCDSection(string & str)1256 bool CObjectIStreamXml::ReadCDSection(string& str)
1257 // http://www.w3.org/TR/2000/REC-xml-20001006#dt-cdsection
1258 // must begin with <![CDATA[
1259 // must end with ]]>
1260 {
1261 if (m_Input.PeekChar() != '<' || m_Input.PeekChar(1) != '!') {
1262 return false;
1263 }
1264 m_Input.SkipChars(2);
1265 const char* open = "[CDATA[";
1266 for ( ; *open; ++open) {
1267 if (m_Input.PeekChar() != *open) {
1268 ThrowError(fFormatError, "CDATA section expected");
1269 }
1270 m_Input.SkipChar();
1271 }
1272 while ( m_Input.PeekChar(0) != ']' ||
1273 m_Input.PeekChar(1) != ']' ||
1274 m_Input.PeekChar(2) != '>') {
1275 str += m_Input.PeekChar();
1276 m_Input.SkipChar();
1277 }
1278 m_Input.SkipChars(3);
1279 return true;
1280 }
1281
ReadTagData(string & str,EStringType type)1282 void CObjectIStreamXml::ReadTagData(string& str, EStringType type)
1283 /*
1284 White Space Handling:
1285 http://www.w3.org/TR/2000/REC-xml-20001006#sec-white-space
1286
1287 End-of-Line Handling
1288 http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
1289
1290 Attribute-Value Normalization
1291 http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
1292 */
1293 {
1294 BeginData();
1295 bool encoded = false;
1296 bool CR = false;
1297 try {
1298 for ( ;; ) {
1299 int c = ReadEncodedChar(m_Attlist ? '\"' : '<', type, encoded);
1300 if ( c < 0 ) {
1301 if (m_Attlist || !ReadCDSection(str)) {
1302 break;
1303 }
1304 CR = false;
1305 continue;
1306 }
1307 if (c == 0) {
1308 continue;
1309 }
1310 if (CR) {
1311 if (c == '\n') {
1312 CR = false;
1313 } else if (c == '\r') {
1314 c = '\n';
1315 }
1316 } else if (c == '\r') {
1317 CR = true;
1318 continue;
1319 }
1320 if (m_Attlist && !encoded && IsWhiteSpace((char)c)) {
1321 c = ' ';
1322 }
1323 str += (char)c;
1324 // pre-allocate memory for long strings
1325 if ( str.size() > 128 && (double)str.capacity()/((double)str.size()+1.0) < 1.1 ) {
1326 str.reserve(str.size()*2);
1327 }
1328 }
1329 } catch (CEofException&) {
1330 }
1331 str.reserve(str.size());
1332 }
1333
ReadWord(string & str,EStringType type)1334 void CObjectIStreamXml::ReadWord(string& str, EStringType type)
1335 {
1336 BeginData();
1337 bool encoded = false;
1338 SkipWS();
1339 try {
1340 for ( ;; ) {
1341 int c = ReadEncodedChar(m_Attlist ? '\"' : '<', type, encoded);
1342 if ( c < 0 || IsWhiteSpace((char)c)) {
1343 break;
1344 }
1345 if (c != 0) {
1346 str += (char)c;
1347 }
1348 }
1349 } catch (CEofException&) {
1350 }
1351 str.reserve(str.size());
1352 }
1353
ReadEnum(const CEnumeratedTypeValues & values)1354 TEnumValueType CObjectIStreamXml::ReadEnum(const CEnumeratedTypeValues& values)
1355 {
1356 TEnumValueType value;
1357 bool valueonly = m_StdXml;
1358 if (valueonly) {
1359 if (values.IsInteger()) {
1360 value = ReadInt4();
1361 } else {
1362 string str;
1363 ReadString(str);
1364 value = values.FindValue( str);
1365 }
1366 return value;
1367 }
1368 const string& enumName = values.GetName();
1369 if ( !m_SkipNextTag && !enumName.empty() ) {
1370 // global enum
1371 OpenTag(enumName);
1372 _ASSERT(InsideOpeningTag());
1373 }
1374 if ( InsideOpeningTag() ) {
1375 // try to read attribute 'value'
1376 if ( IsEndOfTagChar( SkipWS()) ) {
1377 // no attribute
1378 if ( !values.IsInteger() )
1379 ThrowError(fFormatError, "attribute 'value' expected");
1380 m_Input.SkipChar();
1381 Found_gt();
1382 BeginData();
1383 value = m_Input.GetInt4();
1384 }
1385 else {
1386 if (m_Attlist) {
1387 string valueName;
1388 ReadAttributeValue(valueName);
1389 NStr::TruncateSpacesInPlace(valueName);
1390 value = values.FindValue(valueName);
1391 } else {
1392 CTempString attr;
1393 while (HasAttlist()) {
1394 attr = ReadAttributeName();
1395 if ( attr == "value" ) {
1396 break;
1397 }
1398 string value_tmp;
1399 ReadAttributeValue(value_tmp);
1400 }
1401 if ( attr != "value" ) {
1402 EndOpeningTagSelfClosed();
1403 ThrowError(fMissingValue,"attribute 'value' is missing");
1404 }
1405 string valueName;
1406 ReadAttributeValue(valueName);
1407 NStr::TruncateSpacesInPlace(valueName);
1408 value = values.FindValue(valueName);
1409 if ( !EndOpeningTagSelfClosed() && values.IsInteger() ) {
1410 // read integer value
1411 SkipWSAndComments();
1412 if ( value != m_Input.GetInt4() )
1413 ThrowError(fInvalidData,
1414 "incompatible name and value of named integer");
1415 }
1416 }
1417 }
1418 }
1419 else {
1420 // outside of tag
1421 if ( !values.IsInteger() )
1422 ThrowError(fFormatError, "attribute 'value' expected");
1423 BeginData();
1424 value = m_Input.GetInt4();
1425 }
1426 if ( !m_SkipNextTag && !enumName.empty() ) {
1427 // global enum
1428 CloseTag(enumName);
1429 }
1430 return value;
1431 }
1432
ReadPointerType(void)1433 CObjectIStream::EPointerType CObjectIStreamXml::ReadPointerType(void)
1434 {
1435 if ((ExpectSpecialCase() & CObjectIStream::eReadAsNil)!=0) {
1436 if (m_IsNil) {
1437 m_IsNil=false;
1438 SetSpecialCaseUsed(CObjectIStream::eReadAsNil);
1439 return eNullPointer;
1440 }
1441 }
1442 if ( (!m_SkipNextTag || ExpectSpecialCase()) && !HasAttlist() && ((InsideOpeningTag() && EndOpeningTagSelfClosed()) || SelfClosedTag()) ) {
1443 // self closed tag
1444 return eNullPointer;
1445 }
1446 return eThisPointer;
1447 }
1448
ReadObjectPointer(void)1449 CObjectIStreamXml::TObjectIndex CObjectIStreamXml::ReadObjectPointer(void)
1450 {
1451 ThrowError(fNotImplemented, "Not Implemented");
1452 return 0;
1453 /*
1454 CTempString attr = ReadAttributeName();
1455 if ( attr != "index" )
1456 ThrowError(fIllegalCall, "attribute 'index' expected");
1457 string index;
1458 ReadAttributeValue(index);
1459 EndOpeningTagSelfClosed();
1460 return NStr::StringToInt(index);
1461 */
1462 }
1463
ReadOtherPointer(void)1464 string CObjectIStreamXml::ReadOtherPointer(void)
1465 {
1466 ThrowError(fNotImplemented, "Not Implemented");
1467 return NcbiEmptyString;
1468 }
1469
StartDelayBuffer(void)1470 void CObjectIStreamXml::StartDelayBuffer(void)
1471 {
1472 BeginData();
1473 CObjectIStream::StartDelayBuffer();
1474 if (!m_RejectedTag.empty()) {
1475 m_Input.GetSubSourceCollector()->AddChunk("<", 1);
1476 m_Input.GetSubSourceCollector()->AddChunk(m_RejectedTag.c_str(), m_RejectedTag.size());
1477 }
1478 }
1479
EndDelayBuffer(void)1480 CRef<CByteSource> CObjectIStreamXml::EndDelayBuffer(void)
1481 {
1482 _ASSERT(OutsideTag());
1483 return CObjectIStream::EndDelayBuffer();
1484 }
1485
SkipTagName(CTempString tag,const char * str,size_t length)1486 CTempString CObjectIStreamXml::SkipTagName(CTempString tag,
1487 const char* str, size_t length)
1488 {
1489 if ( tag.size() < length ||
1490 memcmp(tag.data(), str, length) != 0 )
1491 ThrowError(fFormatError, "invalid tag name: "+string(tag));
1492 return CTempString(tag.data() + length, tag.size() - length);
1493 }
1494
SkipStackTagName(CTempString tag,size_t level)1495 CTempString CObjectIStreamXml::SkipStackTagName(CTempString tag,
1496 size_t level)
1497 {
1498 const TFrame& frame = FetchFrameFromTop(level);
1499 switch ( frame.GetFrameType() ) {
1500 case TFrame::eFrameNamed:
1501 case TFrame::eFrameArray:
1502 case TFrame::eFrameClass:
1503 case TFrame::eFrameChoice:
1504 {
1505 const string& name = frame.GetTypeInfo()->GetName();
1506 if ( !name.empty() )
1507 return SkipTagName(tag, name);
1508 else
1509 return SkipStackTagName(tag, level + 1);
1510 }
1511 case TFrame::eFrameClassMember:
1512 case TFrame::eFrameChoiceVariant:
1513 {
1514 tag = SkipStackTagName(tag, level + 1, '_');
1515 return SkipTagName(tag, frame.GetMemberId().GetName());
1516 }
1517 case TFrame::eFrameArrayElement:
1518 {
1519 if (GetStackDepth() > level+1) {
1520 tag = SkipStackTagName(tag, level + 1);
1521 return SkipTagName(tag, "_E");
1522 }
1523 return CTempString();
1524 }
1525 default:
1526 break;
1527 }
1528 ThrowError(fIllegalCall, "illegal frame type");
1529 return tag;
1530 }
1531
SkipStackTagName(CTempString tag,size_t level,char c)1532 CTempString CObjectIStreamXml::SkipStackTagName(CTempString tag,
1533 size_t level, char c)
1534 {
1535 tag = SkipStackTagName(tag, level);
1536 if ( tag.empty() || tag[0] != c )
1537 ThrowError(fFormatError, "invalid tag name: "+string(tag));
1538 return CTempString(tag.data() + 1, tag.size() - 1);
1539 }
1540
OpenTag(const string & e)1541 void CObjectIStreamXml::OpenTag(const string& e)
1542 {
1543 CTempString tagName;
1544 if (m_RejectedTag.empty()) {
1545 tagName = ReadName(BeginOpeningTag());
1546 } else {
1547 tagName = RejectedName();
1548 }
1549 if ( tagName != e )
1550 ThrowError(fFormatError, "tag '"+e+"' expected: "+string(tagName));
1551 }
1552
CloseTag(const string & e)1553 void CObjectIStreamXml::CloseTag(const string& e)
1554 {
1555 if ( SelfClosedTag() ) {
1556 EndSelfClosedTag();
1557 }
1558 else {
1559 CTempString tagName = ReadName(BeginClosingTag());
1560 if ( tagName != e )
1561 ThrowError(fFormatError, "tag '"+e+"' expected: "+string(tagName));
1562 EndClosingTag();
1563 }
1564 }
1565
OpenStackTag(size_t level)1566 void CObjectIStreamXml::OpenStackTag(size_t level)
1567 {
1568 CTempString tagName;
1569 if (m_RejectedTag.empty()) {
1570 tagName = ReadName(BeginOpeningTag());
1571 if (!x_IsStdXml()) {
1572 CTempString rest = SkipStackTagName(tagName, level);
1573 if ( !rest.empty() )
1574 ThrowError(fFormatError,
1575 "unexpected tag: "+string(tagName)+string(rest));
1576 }
1577 } else {
1578 tagName = RejectedName();
1579 }
1580 }
1581
CloseStackTag(size_t level)1582 void CObjectIStreamXml::CloseStackTag(size_t level)
1583 {
1584 if ( SelfClosedTag() ) {
1585 EndSelfClosedTag();
1586 }
1587 else {
1588 if (m_Attlist) {
1589 m_TagState = eTagInsideClosing;
1590 } else {
1591 CTempString tagName = ReadName(BeginClosingTag());
1592 if (!x_IsStdXml()) {
1593 CTempString rest = SkipStackTagName(tagName, level);
1594 if ( !rest.empty() )
1595 ThrowError(fFormatError,
1596 "unexpected tag: "+string(tagName)+string(rest));
1597 }
1598 }
1599 EndClosingTag();
1600 }
1601 }
1602
OpenTagIfNamed(TTypeInfo type)1603 void CObjectIStreamXml::OpenTagIfNamed(TTypeInfo type)
1604 {
1605 if ( !type->GetName().empty() ) {
1606 OpenTag(type->GetName());
1607 }
1608 }
1609
CloseTagIfNamed(TTypeInfo type)1610 void CObjectIStreamXml::CloseTagIfNamed(TTypeInfo type)
1611 {
1612 if ( !type->GetName().empty() )
1613 CloseTag(type->GetName());
1614 }
1615
WillHaveName(TTypeInfo elementType)1616 bool CObjectIStreamXml::WillHaveName(TTypeInfo elementType)
1617 {
1618 while ( elementType->GetName().empty() ) {
1619 if ( elementType->GetTypeFamily() != eTypeFamilyPointer )
1620 return false;
1621 elementType = CTypeConverter<CPointerTypeInfo>::SafeCast(
1622 elementType)->GetPointedType();
1623 }
1624 // found named type
1625 return true;
1626 }
1627
HasAttlist(void)1628 bool CObjectIStreamXml::HasAttlist(void)
1629 {
1630 if (InsideTag()) {
1631 return !IsEndOfTagChar( SkipWS() );
1632 }
1633 return false;
1634 }
1635
NextIsTag(void)1636 bool CObjectIStreamXml::NextIsTag(void)
1637 {
1638 BeginData();
1639 return SkipWSAndComments() == '<' &&
1640 m_Input.PeekChar(1) != '/' &&
1641 m_Input.PeekChar(1) != '!';
1642 }
1643
NextTagIsClosing(void)1644 bool CObjectIStreamXml::NextTagIsClosing(void)
1645 {
1646 BeginData();
1647 return SkipWSAndComments() == '<' && m_Input.PeekChar(1) == '/';
1648 }
1649
ThisTagIsSelfClosed(void)1650 bool CObjectIStreamXml::ThisTagIsSelfClosed(void)
1651 {
1652 if (InsideOpeningTag()) {
1653 return EndOpeningTagSelfClosed();
1654 }
1655 return false;
1656 }
1657
1658
1659 void
BeginContainer(const CContainerTypeInfo * containerType)1660 CObjectIStreamXml::BeginContainer(const CContainerTypeInfo* containerType)
1661 {
1662 if (!m_StdXml) {
1663 if (TopFrame().GetFrameType() == CObjectStackFrame::eFrameArray &&
1664 FetchFrameFromTop(1).GetFrameType() == CObjectStackFrame::eFrameNamed) {
1665 const CClassTypeInfo* clType =
1666 dynamic_cast<const CClassTypeInfo*>(FetchFrameFromTop(1).GetTypeInfo());
1667 if (clType && clType->Implicit()) {
1668 TopFrame().SetNotag();
1669 return;
1670 }
1671 }
1672 OpenTagIfNamed(containerType);
1673 }
1674 }
1675
EndContainer(void)1676 void CObjectIStreamXml::EndContainer(void)
1677 {
1678 if (!m_StdXml && !TopFrame().GetNotag()) {
1679 CloseTagIfNamed(TopFrame().GetTypeInfo());
1680 }
1681 }
1682
BeginContainerElement(TTypeInfo elementType)1683 bool CObjectIStreamXml::BeginContainerElement(TTypeInfo elementType)
1684 {
1685 if (!HasMoreElements(elementType)) {
1686 return false;
1687 }
1688 if ( !WillHaveName(elementType) ) {
1689 BeginArrayElement(elementType);
1690 }
1691 return true;
1692 }
1693
EndContainerElement(void)1694 void CObjectIStreamXml::EndContainerElement(void)
1695 {
1696 if ( !WillHaveName(TopFrame().GetTypeInfo()) ) {
1697 EndArrayElement();
1698 }
1699 }
1700
HasAnyContent(const CClassTypeInfoBase * classType,TMemberIndex pos)1701 TMemberIndex CObjectIStreamXml::HasAnyContent(const CClassTypeInfoBase* classType, TMemberIndex pos)
1702 {
1703 const CItemsInfo& items = classType->GetItems();
1704 TMemberIndex i = (pos != kInvalidMember ? pos : items.FirstIndex());
1705 for (; i <= items.LastIndex(); ++i) {
1706 const CItemInfo* itemInfo = items.GetItemInfo( i );
1707 if (itemInfo->GetId().HasAnyContent()) {
1708 return i;
1709 }
1710 if (itemInfo->GetId().HasNotag()) {
1711 if (itemInfo->GetTypeInfo()->GetTypeFamily() == eTypeFamilyContainer) {
1712 CObjectTypeInfo elem = CObjectTypeInfo(itemInfo->GetTypeInfo()).GetElementType();
1713 if (elem.GetTypeFamily() == eTypeFamilyPointer) {
1714 elem = elem.GetPointedType();
1715 }
1716 if (elem.GetTypeFamily() == eTypeFamilyPrimitive &&
1717 elem.GetPrimitiveValueType() == ePrimitiveValueAny) {
1718 return i;
1719 }
1720 }
1721 }
1722 }
1723 /*
1724 if (items.Size() == 1) {
1725 const CItemInfo* itemInfo = items.GetItemInfo( items.FirstIndex() );
1726 if (itemInfo->GetId().HasNotag()) {
1727 if (itemInfo->GetTypeInfo()->GetTypeFamily() == eTypeFamilyContainer) {
1728 CObjectTypeInfo elem = CObjectTypeInfo(itemInfo->GetTypeInfo()).GetElementType();
1729 if (elem.GetTypeFamily() == eTypeFamilyPointer) {
1730 elem = elem.GetPointedType();
1731 }
1732 if (elem.GetTypeFamily() == eTypeFamilyPrimitive &&
1733 elem.GetPrimitiveValueType() == ePrimitiveValueAny) {
1734 return items.FirstIndex();
1735 }
1736 }
1737 }
1738 }
1739 */
1740 return kInvalidMember;
1741 }
1742
HasMoreElements(TTypeInfo elementType)1743 bool CObjectIStreamXml::HasMoreElements(TTypeInfo elementType)
1744 {
1745 bool no_more=false;
1746 try {
1747 no_more = ThisTagIsSelfClosed() || NextTagIsClosing();
1748 } catch (CEofException&) {
1749 no_more = true;
1750 }
1751 if (no_more) {
1752 m_LastPrimitive.erase();
1753 return false;
1754 }
1755 if (x_IsStdXml()) {
1756 CTempString tagName;
1757 TTypeInfo type = GetRealTypeInfo(elementType);
1758 // this is to handle STL containers of primitive types
1759 if (GetRealTypeFamily(type) == eTypeFamilyPrimitive) {
1760 if (m_SkipNextTag) {
1761 return true;
1762 } else if (!m_RejectedTag.empty()) {
1763 m_LastPrimitive = m_RejectedTag;
1764 return true;
1765 } else {
1766 tagName = ReadName(BeginOpeningTag());
1767 UndoClassMember();
1768 bool res = (m_LastPrimitive.empty() ||
1769 tagName == m_LastPrimitive || tagName == type->GetName() ||
1770 CObjectTypeInfo(type).GetPrimitiveValueType() == ePrimitiveValueAny);
1771 if (!res) {
1772 m_LastPrimitive.erase();
1773 }
1774 return res;
1775 }
1776 }
1777 const CClassTypeInfoBase* classType =
1778 dynamic_cast<const CClassTypeInfoBase*>(type);
1779 const CAliasTypeInfo* aliasType = classType ? NULL :
1780 dynamic_cast<const CAliasTypeInfo*>(type);
1781 if (aliasType && aliasType->IsFullAlias()) {
1782 classType = dynamic_cast<const CClassTypeInfoBase*>(GetRealTypeInfo(aliasType));
1783 }
1784 if (classType || aliasType) {
1785 if (m_RejectedTag.empty()) {
1786 if (!NextIsTag()) {
1787 return true;
1788 }
1789 tagName = ReadName(BeginOpeningTag());
1790 } else {
1791 tagName = RejectedName();
1792 }
1793 UndoClassMember();
1794
1795 if (classType && classType->GetName().empty()) {
1796 return classType->GetItems().FindDeep(tagName) != kInvalidMember ||
1797 HasAnyContent(classType) != kInvalidMember;
1798 }
1799 return (classType && tagName == classType->GetName()) || (aliasType && tagName == aliasType->GetName());
1800 }
1801 }
1802 return true;
1803 }
1804
1805
FindDeep(TTypeInfo type,const CTempString & name) const1806 TMemberIndex CObjectIStreamXml::FindDeep(TTypeInfo type,
1807 const CTempString& name) const
1808 {
1809 for (;;) {
1810 if (type->GetTypeFamily() == eTypeFamilyContainer) {
1811 const CContainerTypeInfo* cont =
1812 dynamic_cast<const CContainerTypeInfo*>(type);
1813 if (cont) {
1814 type = cont->GetElementType();
1815 }
1816 } else if (type->GetTypeFamily() == eTypeFamilyPointer) {
1817 const CPointerTypeInfo* ptr =
1818 dynamic_cast<const CPointerTypeInfo*>(type);
1819 if (ptr) {
1820 type = ptr->GetPointedType();
1821 }
1822 } else {
1823 break;
1824 }
1825 }
1826 const CClassTypeInfoBase* classType =
1827 dynamic_cast<const CClassTypeInfoBase*>(type);
1828 if (classType) {
1829 TMemberIndex i = classType->GetItems().FindDeep(name);
1830 if (i != kInvalidMember) {
1831 return i;
1832 }
1833 }
1834 return kInvalidMember;
1835 }
1836
1837 #ifdef VIRTUAL_MID_LEVEL_IO
ReadContainer(const CContainerTypeInfo * containerType,TObjectPtr containerPtr)1838 void CObjectIStreamXml::ReadContainer(const CContainerTypeInfo* containerType,
1839 TObjectPtr containerPtr)
1840 {
1841 if ( m_StdXml || containerType->GetName().empty() ) {
1842 ReadContainerContents(containerType, containerPtr);
1843 }
1844 else {
1845 BEGIN_OBJECT_FRAME2(eFrameArray, containerType);
1846 OpenTag(containerType);
1847
1848 ReadContainerContents(containerType, containerPtr);
1849
1850 CloseTag(containerType);
1851 END_OBJECT_FRAME();
1852 }
1853 }
1854
SkipContainer(const CContainerTypeInfo * containerType)1855 void CObjectIStreamXml::SkipContainer(const CContainerTypeInfo* containerType)
1856 {
1857 if ( m_StdXml || containerType->GetName().empty() ) {
1858 SkipContainerContents(containerType);
1859 }
1860 else {
1861 BEGIN_OBJECT_FRAME2(eFrameArray, containerType);
1862 OpenTag(containerType);
1863
1864 SkipContainerContents(containerType);
1865
1866 CloseTag(containerType);
1867 END_OBJECT_FRAME();
1868 }
1869 }
1870 #endif
1871
1872
BeginArrayElement(TTypeInfo elementType)1873 void CObjectIStreamXml::BeginArrayElement(TTypeInfo elementType)
1874 {
1875 if (x_IsStdXml()) {
1876 CObjectTypeInfo type(GetRealTypeInfo(elementType));
1877 if (type.GetTypeFamily() != eTypeFamilyPrimitive ||
1878 type.GetPrimitiveValueType() == ePrimitiveValueAny) {
1879 TopFrame().SetNotag();
1880 return;
1881 }
1882 if (m_SkipNextTag && type.GetTypeFamily() == eTypeFamilyPrimitive) {
1883 TopFrame().SetNotag();
1884 return;
1885 }
1886 }
1887 OpenStackTag(0);
1888 }
1889
EndArrayElement(void)1890 void CObjectIStreamXml::EndArrayElement(void)
1891 {
1892 if (TopFrame().GetNotag()) {
1893 TopFrame().SetNotag(false);
1894 } else {
1895 CloseStackTag(0);
1896 }
1897 }
1898
ReadContainerContents(const CContainerTypeInfo * cType,TObjectPtr containerPtr)1899 void CObjectIStreamXml::ReadContainerContents(const CContainerTypeInfo* cType,
1900 TObjectPtr containerPtr)
1901 {
1902 int count = 0;
1903 TTypeInfo elementType = cType->GetElementType();
1904 if ( !WillHaveName(elementType) ) {
1905 BEGIN_OBJECT_FRAME2(eFrameArrayElement, elementType);
1906
1907 CContainerTypeInfo::CIterator iter;
1908 bool old_element = cType->InitIterator(iter, containerPtr);
1909 while ( HasMoreElements(elementType) ) {
1910 BeginArrayElement(elementType);
1911 do {
1912 if ( old_element ) {
1913 elementType->ReadData(*this, cType->GetElementPtr(iter));
1914 old_element = cType->NextElement(iter);
1915 }
1916 else {
1917 cType->AddElement(containerPtr, *this);
1918 }
1919 } while (!m_RejectedTag.empty() &&
1920 FindDeep(elementType,m_RejectedTag) != kInvalidMember);
1921 EndArrayElement();
1922 ++count;
1923 }
1924 if ( old_element ) {
1925 cType->EraseAllElements(iter);
1926 }
1927
1928 END_OBJECT_FRAME();
1929 }
1930 else {
1931 CContainerTypeInfo::CIterator iter;
1932 bool old_element = cType->InitIterator(iter, containerPtr);
1933 while ( HasMoreElements(elementType) ) {
1934 if ( old_element ) {
1935 elementType->ReadData(*this, cType->GetElementPtr(iter));
1936 old_element = cType->NextElement(iter);
1937 }
1938 else {
1939 cType->AddElement(containerPtr, *this);
1940 }
1941 ++count;
1942 }
1943 if ( old_element ) {
1944 cType->EraseAllElements(iter);
1945 }
1946 }
1947 if (count == 0) {
1948 const TFrame& frame = FetchFrameFromTop(0);
1949 if (frame.GetFrameType() == CObjectStackFrame::eFrameNamed) {
1950 const CClassTypeInfo* clType =
1951 dynamic_cast<const CClassTypeInfo*>(frame.GetTypeInfo());
1952 if (clType && clType->Implicit() && clType->IsImplicitNonEmpty()) {
1953 ThrowError(fFormatError, "container is empty");
1954 }
1955 }
1956 }
1957 }
1958
SkipContainerContents(const CContainerTypeInfo * cType)1959 void CObjectIStreamXml::SkipContainerContents(const CContainerTypeInfo* cType)
1960 {
1961 TTypeInfo elementType = cType->GetElementType();
1962 if ( !WillHaveName(elementType) ) {
1963 BEGIN_OBJECT_FRAME2(eFrameArrayElement, elementType);
1964
1965 while ( HasMoreElements(elementType) ) {
1966 BeginArrayElement(elementType);
1967 SkipObject(elementType);
1968 EndArrayElement();
1969 }
1970
1971 END_OBJECT_FRAME();
1972 }
1973 else {
1974 while ( HasMoreElements(elementType) ) {
1975 SkipObject(elementType);
1976 }
1977 }
1978 }
1979
BeginNamedType(TTypeInfo namedTypeInfo)1980 void CObjectIStreamXml::BeginNamedType(TTypeInfo namedTypeInfo)
1981 {
1982 CheckStdXml(namedTypeInfo);
1983 if (m_SkipNextTag || namedTypeInfo->GetName().empty()) {
1984 TopFrame().SetNotag();
1985 m_SkipNextTag = false;
1986 } else {
1987 TTypeInfo realtype = GetRealTypeInfo(namedTypeInfo);
1988 if (realtype->GetTypeFamily() == eTypeFamilyPrimitive &&
1989 GetStackDepth() > 2 && m_StdXml) {
1990 TopFrame().SetNotag();
1991 m_SkipNextTag = false;
1992 return;
1993 }
1994 OpenTag(namedTypeInfo);
1995 }
1996 const CAliasTypeInfo* aliasType =
1997 dynamic_cast<const CAliasTypeInfo*>(namedTypeInfo);
1998 if (aliasType) {
1999 m_SkipNextTag = aliasType->IsFullAlias();
2000 }
2001 else if (m_StdXml) {
2002 const CClassTypeInfo* classType = dynamic_cast<const CClassTypeInfo*>(namedTypeInfo);
2003 m_SkipNextTag = (classType && classType->Implicit());
2004 }
2005 }
2006
EndNamedType(void)2007 void CObjectIStreamXml::EndNamedType(void)
2008 {
2009 m_SkipNextTag = false;
2010 if (TopFrame().GetNotag()) {
2011 TopFrame().SetNotag(false);
2012 return;
2013 }
2014 CloseTag(TopFrame().GetTypeInfo()->GetName());
2015 }
2016
2017 #ifdef VIRTUAL_MID_LEVEL_IO
2018
ReadNamedType(TTypeInfo namedTypeInfo,TTypeInfo typeInfo,TObjectPtr object)2019 void CObjectIStreamXml::ReadNamedType(TTypeInfo namedTypeInfo,
2020 TTypeInfo typeInfo,
2021 TObjectPtr object)
2022 {
2023 BEGIN_OBJECT_FRAME2(eFrameNamed, namedTypeInfo);
2024
2025 BeginNamedType(namedTypeInfo);
2026 ReadObject(object, typeInfo);
2027 EndNamedType();
2028
2029 END_OBJECT_FRAME();
2030 }
2031 #endif
2032
CheckStdXml(TTypeInfo typeinfo)2033 void CObjectIStreamXml::CheckStdXml(TTypeInfo typeinfo)
2034 {
2035 if (typeinfo->GetCodeVersion() > 21600) {
2036 m_StdXml = typeinfo->GetDataSpec() != EDataSpec::eASN;
2037 } else {
2038 const CClassTypeInfo* classType =
2039 dynamic_cast<const CClassTypeInfo*>(typeinfo);
2040 if (classType) {
2041 TMemberIndex first = classType->GetItems().FirstIndex();
2042 m_StdXml = classType->GetItems().GetItemInfo(first)->GetId().HaveNoPrefix();
2043 }
2044 }
2045 }
2046
BeginClass(const CClassTypeInfo * classInfo)2047 void CObjectIStreamXml::BeginClass(const CClassTypeInfo* classInfo)
2048 {
2049 CheckStdXml(classInfo);
2050 if (m_SkipNextTag) {
2051 TopFrame().SetNotag();
2052 m_SkipNextTag = false;
2053 return;
2054 }
2055 if (x_IsStdXml()) {
2056 if (!m_Attlist) {
2057 // if class spec defines no attributes, but there are some - skip them
2058 if (HasAttlist() && !classInfo->GetMemberInfo(
2059 classInfo->GetMembers().FirstIndex())->GetId().IsAttlist()) {
2060 ReadUndefinedAttributes();
2061 }
2062 }
2063 if (m_Attlist || HasAttlist()) {
2064 TopFrame().SetNotag();
2065 } else {
2066 OpenTagIfNamed(classInfo);
2067 }
2068 } else {
2069 OpenTagIfNamed(classInfo);
2070 }
2071 }
2072
EndClass(void)2073 void CObjectIStreamXml::EndClass(void)
2074 {
2075 if (TopFrame().GetNotag()) {
2076 TopFrame().SetNotag(false);
2077 } else {
2078 CloseTagIfNamed(TopFrame().GetTypeInfo());
2079 }
2080 x_EndTypeNamespace();
2081 }
2082
UnexpectedMember(const CTempString & id,const CItemsInfo & items)2083 void CObjectIStreamXml::UnexpectedMember(const CTempString& id,
2084 const CItemsInfo& items)
2085 {
2086 string message =
2087 "\""+string(id)+"\": unexpected member, should be one of: ";
2088 for ( CItemsInfo::CIterator i(items); i.Valid(); ++i ) {
2089 message += '\"' + items.GetItemInfo(i)->GetId().ToString() + "\" ";
2090 }
2091 ThrowError(fFormatError, message);
2092 }
2093
2094 TMemberIndex
BeginClassMember(const CClassTypeInfo * classType)2095 CObjectIStreamXml::BeginClassMember(const CClassTypeInfo* classType)
2096 {
2097 CTempString tagName;
2098 bool more;
2099 do {
2100 more = false;
2101 if (m_RejectedTag.empty()) {
2102 if (m_Attlist && InsideTag()) {
2103 if (HasAttlist()) {
2104 tagName = ReadName(SkipWS());
2105 } else {
2106 return kInvalidMember;
2107 }
2108 } else {
2109 if (!m_Attlist && InsideOpeningTag()) {
2110 TMemberIndex first = classType->GetMembers().FirstIndex();
2111 if (classType->GetMemberInfo(first)->GetId().IsAttlist()) {
2112 m_Attlist = true;
2113 return first;
2114 }
2115 }
2116 m_Attlist = false;
2117 if ( NextTagIsClosing() )
2118 return kInvalidMember;
2119 tagName = ReadName(BeginOpeningTag());
2120 }
2121 } else {
2122 tagName = RejectedName();
2123 }
2124 TMemberIndex ind = classType->GetMembers().Find(tagName);
2125 if ( ind != kInvalidMember ) {
2126 if (x_IsStdXml()) {
2127 const CMemberInfo *mem_info = classType->GetMemberInfo(ind);
2128 ETypeFamily type = GetRealTypeFamily(mem_info->GetTypeInfo());
2129 bool needUndo = false;
2130 if (!GetEnforcedStdXml()) {
2131 needUndo = (type != eTypeFamilyPrimitive);
2132 }
2133 if (needUndo) {
2134 TopFrame().SetNotag();
2135 UndoClassMember();
2136 }
2137 return ind;
2138 }
2139 }
2140 // if it is an attribute list, but the tag is unrecognized - just skip it
2141 if (m_Attlist) {
2142 if (ind == kInvalidMember && tagName.empty()) {
2143 return ind;
2144 }
2145 string value;
2146 ReadAttributeValue(value);
2147 m_Input.SkipChar();
2148 more = true;
2149 }
2150 } while (more);
2151
2152 CTempString id = SkipStackTagName(tagName, 1, '_');
2153 TMemberIndex index = classType->GetMembers().Find(id);
2154 if ( index == kInvalidMember ) {
2155 if (CanSkipUnknownMembers()) {
2156 SetFailFlags(fUnknownValue);
2157 string tag(tagName);
2158 if (SkipAnyContent()) {
2159 CloseTag(tag);
2160 }
2161 return BeginClassMember(classType);
2162 } else {
2163 UnexpectedMember(id, classType->GetMembers());
2164 }
2165 }
2166 return index;
2167 }
2168
2169 TMemberIndex
BeginClassMember(const CClassTypeInfo * classType,TMemberIndex pos)2170 CObjectIStreamXml::BeginClassMember(const CClassTypeInfo* classType,
2171 TMemberIndex pos)
2172 {
2173 CTempString tagName;
2174 TMemberIndex first = classType->GetMembers().FirstIndex();
2175 if (m_RejectedTag.empty()) {
2176 if (m_Attlist && InsideTag()) {
2177 if (HasAttlist()) {
2178 for (;;) {
2179 char ch = SkipWS();
2180 if (IsEndOfTagChar(ch)) {
2181 return kInvalidMember;
2182 }
2183 tagName = ReadName(ch);
2184 if (!tagName.empty()) {
2185 if (classType->GetMembers().Find(tagName) != kInvalidMember) {
2186 break;
2187 }
2188 string value;
2189 ReadAttributeValue(value, true);
2190 }
2191 }
2192 } else {
2193 return kInvalidMember;
2194 }
2195 } else {
2196 if (!m_Attlist) {
2197 if (pos == first) {
2198 if (classType->GetMemberInfo(first)->GetId().IsAttlist()) {
2199 m_Attlist = true;
2200 if (m_TagState == eTagOutside) {
2201 m_Input.UngetChar('>');
2202 m_TagState = eTagInsideOpening;
2203 }
2204 return first;
2205 }
2206 // if class spec defines no attributes, but there are some - skip them
2207 if (HasAttlist()) {
2208 ReadUndefinedAttributes();
2209 }
2210 }
2211 }
2212 if (m_Attlist && !SelfClosedTag()) {
2213 m_Attlist = false;
2214 TMemberIndex ind = first+1;
2215 if (classType->GetMemberInfo(ind)->GetId().HasNotag()) {
2216 TopFrame().SetNotag();
2217 return ind;
2218 }
2219 if ( NextTagIsClosing() )
2220 return kInvalidMember;
2221 /*
2222 if (!NextIsTag()) {
2223 TMemberIndex ind = first+1;
2224 if (classType->GetMemberInfo(ind)->GetId().HasNotag()) {
2225 TopFrame().SetNotag();
2226 return ind;
2227 }
2228 }
2229 */
2230 }
2231 if ( SelfClosedTag() || ThisTagIsSelfClosed()) {
2232 m_Attlist = false;
2233 TMemberIndex last = classType->GetMembers().LastIndex();
2234 if (pos == last) {
2235 if (classType->GetMemberInfo(pos)->GetId().HasNotag() &&
2236 !classType->GetMemberInfo(pos)->GetId().HasAnyContent()) {
2237 TopFrame().SetNotag();
2238 return pos;
2239 }
2240 }
2241 return kInvalidMember;
2242 }
2243 if ( NextTagIsClosing() )
2244 return kInvalidMember;
2245 if (pos <= classType->GetItems().LastIndex()) {
2246 const CMemberInfo* mem_info = classType->GetMemberInfo(pos);
2247 if (mem_info->GetId().HasNotag() &&
2248 !mem_info->GetId().HasAnyContent()) {
2249 if (GetRealTypeFamily(mem_info->GetTypeInfo()) == eTypeFamilyPrimitive) {
2250 TopFrame().SetNotag();
2251 return pos;
2252 }
2253 }
2254 } else {
2255 if (CanSkipUnknownMembers()) {
2256 while (NextIsTag()) {
2257 tagName = ReadName(BeginOpeningTag());
2258 UndoClassMember();
2259 if (IsKnownElement(tagName)) {
2260 break;
2261 }
2262 SetFailFlags(fUnknownValue);
2263 SkipAnyContentObject();
2264 }
2265 }
2266 return kInvalidMember;
2267 }
2268 if (!NextIsTag()) {
2269 return kInvalidMember;
2270 }
2271 tagName = ReadName(BeginOpeningTag());
2272 }
2273 } else {
2274 tagName = RejectedName();
2275 }
2276
2277 TMemberIndex ind = classType->GetMembers().Find(tagName);
2278 if (ind == kInvalidMember) {
2279 ind = classType->GetMembers().FindDeep(tagName);
2280 if (ind != kInvalidMember && ind >= pos) {
2281 TopFrame().SetNotag();
2282 UndoClassMember();
2283 return ind;
2284 }
2285 } else {
2286 const CMemberInfo *mem_info = classType->GetMemberInfo(ind);
2287 if (x_IsStdXml()) {
2288 ETypeFamily type = GetRealTypeFamily(mem_info->GetTypeInfo());
2289 bool needUndo = false;
2290 if (GetEnforcedStdXml()) {
2291 if (type == eTypeFamilyContainer) {
2292 TTypeInfo mem_type = GetRealTypeInfo(mem_info->GetTypeInfo());
2293 TTypeInfo elem_type = GetContainerElementTypeInfo(mem_type);
2294 needUndo = (elem_type->GetTypeFamily() == eTypeFamilyPrimitive &&
2295 elem_type->GetName() == mem_type->GetName());
2296 }
2297 } else {
2298 needUndo = mem_info->GetId().HasNotag() || mem_info->GetId().HasAnyContent() || type == eTypeFamilyContainer;
2299 m_SkipNextTag = type != eTypeFamilyPrimitive && type != eTypeFamilyContainer;
2300 }
2301 if (needUndo) {
2302 TopFrame().SetNotag();
2303 UndoClassMember();
2304 }
2305 return ind;
2306 }
2307 }
2308 if (x_IsStdXml()) {
2309 UndoClassMember();
2310 ind = HasAnyContent(classType,pos);
2311 if (ind != kInvalidMember) {
2312 TopFrame().SetNotag();
2313 return ind;
2314 }
2315 if (CanSkipUnknownMembers() &&
2316 pos <= classType->GetMembers().LastIndex()) {
2317 SetFailFlags(fUnknownValue);
2318 string tag(RejectedName());
2319 if (SkipAnyContent()) {
2320 CloseTag(tag);
2321 }
2322 return BeginClassMember(classType, pos);
2323 }
2324 return kInvalidMember;
2325 }
2326 CTempString id = SkipStackTagName(tagName, 1, '_');
2327 TMemberIndex index = classType->GetMembers().Find(id, pos);
2328 if ( index == kInvalidMember ) {
2329 if (CanSkipUnknownMembers()) {
2330 SetFailFlags(fUnknownValue);
2331 string tag(tagName);
2332 if (SkipAnyContent()) {
2333 CloseTag(tag);
2334 }
2335 return BeginClassMember(classType, pos);
2336 } else {
2337 UnexpectedMember(id, classType->GetMembers());
2338 }
2339 }
2340 return index;
2341 }
2342
EndClassMember(void)2343 void CObjectIStreamXml::EndClassMember(void)
2344 {
2345 m_SkipNextTag = false;
2346 if (TopFrame().GetNotag()) {
2347 TopFrame().SetNotag(false);
2348 } else {
2349 CloseStackTag(0);
2350 }
2351 }
2352
UndoClassMember(void)2353 void CObjectIStreamXml::UndoClassMember(void)
2354 {
2355 if (InsideOpeningTag()) {
2356 m_RejectedTag = m_LastTag;
2357 m_TagState = eTagOutside;
2358 #if defined(NCBI_SERIAL_IO_TRACE)
2359 cout << ", Undo= " << m_LastTag;
2360 #endif
2361 }
2362 }
2363
BeginChoice(const CChoiceTypeInfo * choiceType)2364 void CObjectIStreamXml::BeginChoice(const CChoiceTypeInfo* choiceType)
2365 {
2366 CheckStdXml(choiceType);
2367 if (m_SkipNextTag) {
2368 TopFrame().SetNotag();
2369 m_SkipNextTag = false;
2370 return;
2371 }
2372 OpenTagIfNamed(choiceType);
2373 }
EndChoice(void)2374 void CObjectIStreamXml::EndChoice(void)
2375 {
2376 if (TopFrame().GetNotag()) {
2377 TopFrame().SetNotag(false);
2378 return;
2379 }
2380 CloseTagIfNamed(TopFrame().GetTypeInfo());
2381 x_EndTypeNamespace();
2382 }
2383
BeginChoiceVariant(const CChoiceTypeInfo * choiceType)2384 TMemberIndex CObjectIStreamXml::BeginChoiceVariant(const CChoiceTypeInfo* choiceType)
2385 {
2386 CTempString tagName;
2387 TMemberIndex first = choiceType->GetVariants().FirstIndex();
2388 if (m_RejectedTag.empty()) {
2389 if (!m_Attlist) {
2390 if (choiceType->GetVariantInfo(first)->GetId().IsAttlist()) {
2391 m_Attlist = true;
2392 if (m_TagState == eTagOutside) {
2393 m_Input.UngetChar('>');
2394 m_TagState = eTagInsideOpening;
2395 }
2396 TopFrame().SetNotag();
2397 return first;
2398 }
2399 // if spec defines no attributes, but there are some - skip them
2400 if (HasAttlist()) {
2401 ReadUndefinedAttributes();
2402 }
2403 }
2404 m_Attlist = false;
2405 if ( SelfClosedTag() ) {
2406 return kInvalidMember;
2407 }
2408 if ( NextTagIsClosing() ) {
2409 if (choiceType->MayBeEmpty()) {
2410 return kInvalidMember;
2411 }
2412 TMemberIndex ind = choiceType->GetVariants().FindEmpty();
2413 if (ind != kInvalidMember) {
2414 TopFrame().SetNotag();
2415 }
2416 return ind;
2417 }
2418 if (!NextIsTag()) {
2419 const CItemsInfo& items = choiceType->GetItems();
2420 for (TMemberIndex i = items.FirstIndex(); i <= items.LastIndex(); ++i) {
2421 if (items.GetItemInfo(i)->GetId().HasNotag()) {
2422 if (GetRealTypeFamily(items.GetItemInfo(i)->GetTypeInfo()) == eTypeFamilyPrimitive) {
2423 TopFrame().SetNotag();
2424 return i;
2425 }
2426 }
2427 }
2428
2429 }
2430 tagName = ReadName(BeginOpeningTag());
2431 } else {
2432 tagName = RejectedName();
2433 }
2434 TMemberIndex ind = choiceType->GetVariants().Find(tagName);
2435 if (ind == kInvalidMember) {
2436 ind = choiceType->GetVariants().FindDeep(tagName);
2437 if (ind != kInvalidMember) {
2438 TopFrame().SetNotag();
2439 UndoClassMember();
2440 return ind;
2441 }
2442 } else {
2443 const CVariantInfo *var_info = choiceType->GetVariantInfo(ind);
2444 if (x_IsStdXml()) {
2445 ETypeFamily type = GetRealTypeFamily(var_info->GetTypeInfo());
2446 bool needUndo = false;
2447 if (GetEnforcedStdXml()) {
2448 if (type == eTypeFamilyContainer) {
2449 TTypeInfo var_type = GetRealTypeInfo(var_info->GetTypeInfo());
2450 TTypeInfo elem_type = GetContainerElementTypeInfo(var_type);
2451 needUndo = (elem_type->GetTypeFamily() == eTypeFamilyPrimitive &&
2452 elem_type->GetName() == var_type->GetName());
2453 }
2454 } else {
2455 needUndo = var_info->GetId().HasNotag() || var_info->GetId().HasAnyContent() || type == eTypeFamilyContainer;
2456 m_SkipNextTag = type != eTypeFamilyPrimitive && type != eTypeFamilyContainer;
2457 }
2458 if (needUndo) {
2459 TopFrame().SetNotag();
2460 UndoClassMember();
2461 }
2462 return ind;
2463 }
2464 }
2465 if (x_IsStdXml()) {
2466 UndoClassMember();
2467 UnexpectedMember(tagName, choiceType->GetVariants());
2468 }
2469 CTempString id = SkipStackTagName(tagName, 1, '_');
2470 ind = choiceType->GetVariants().Find(id);
2471 if ( ind == kInvalidMember ) {
2472 if (CanSkipUnknownVariants()) {
2473 SetFailFlags(fUnknownValue);
2474 UndoClassMember();
2475 } else {
2476 UnexpectedMember(tagName, choiceType->GetVariants());
2477 }
2478 }
2479 return ind;
2480 }
2481
EndChoiceVariant(void)2482 void CObjectIStreamXml::EndChoiceVariant(void)
2483 {
2484 m_SkipNextTag = false;
2485 if (TopFrame().GetNotag()) {
2486 TopFrame().SetNotag(false);
2487 } else {
2488 CloseStackTag(0);
2489 }
2490 }
2491
BeginBytes(ByteBlock &)2492 void CObjectIStreamXml::BeginBytes(ByteBlock& )
2493 {
2494 BeginData();
2495 }
2496
GetHexChar(void)2497 int CObjectIStreamXml::GetHexChar(void)
2498 {
2499 char c = m_Input.GetChar();
2500 if ( c >= '0' && c <= '9' ) {
2501 return c - '0';
2502 }
2503 else if ( c >= 'A' && c <= 'Z' ) {
2504 return c - 'A' + 10;
2505 }
2506 else if ( c >= 'a' && c <= 'z' ) {
2507 return c - 'a' + 10;
2508 }
2509 else {
2510 m_Input.UngetChar(c);
2511 if ( c != '<' )
2512 ThrowError(fFormatError, "invalid char in octet string");
2513 }
2514 return -1;
2515 }
2516
GetBase64Char(void)2517 int CObjectIStreamXml::GetBase64Char(void)
2518 {
2519 char c = SkipWS();
2520 if ( IsDigit(c) ||
2521 ( c >= 'A' && c <= 'Z' ) ||
2522 ( c >= 'a' && c <= 'z' ) ||
2523 ( c == '+' || c == '/' || c == '=')) {
2524 return c;
2525 }
2526 else {
2527 if ( c != '<' )
2528 ThrowError(fFormatError, "invalid char in base64Binary data");
2529 }
2530 return -1;
2531 }
2532
ReadBytes(ByteBlock & block,char * dst,size_t length)2533 size_t CObjectIStreamXml::ReadBytes(ByteBlock& block,
2534 char* dst, size_t length)
2535 {
2536 size_t count = 0;
2537 if (IsCompressed()) {
2538 bool end_of_data = false;
2539 const size_t chunk_in = 80;
2540 char src_buf[chunk_in];
2541 size_t bytes_left = length;
2542 size_t src_size, src_read, dst_written;
2543 while (!end_of_data && bytes_left > chunk_in && bytes_left <= length) {
2544 for ( src_size = 0; src_size < chunk_in; ) {
2545 int c = GetBase64Char();
2546 if (c < 0) {
2547 end_of_data = true;
2548 break;
2549 }
2550 /*if (c != '=')*/ {
2551 src_buf[ src_size++ ] = (char)c;
2552 }
2553 m_Input.SkipChar();
2554 }
2555 BASE64_Decode( src_buf, src_size, &src_read,
2556 dst, bytes_left, &dst_written);
2557 if (src_size != src_read) {
2558 ThrowError(fFail, "error decoding base64Binary data");
2559 }
2560 count += dst_written;
2561 bytes_left -= dst_written;
2562 dst += dst_written;
2563 }
2564 if (end_of_data) {
2565 block.EndOfBlock();
2566 }
2567 return count;;
2568 }
2569 while ( length-- > 0 ) {
2570 int c1 = GetHexChar();
2571 if ( c1 < 0 ) {
2572 block.EndOfBlock();
2573 return count;
2574 }
2575 int c2 = GetHexChar();
2576 if ( c2 < 0 ) {
2577 *dst++ = char(c1 << 4);
2578 count++;
2579 block.EndOfBlock();
2580 return count;
2581 }
2582 else {
2583 *dst++ = char((c1 << 4) | c2);
2584 count++;
2585 }
2586 }
2587 return count;
2588 }
2589
BeginChars(CharBlock &)2590 void CObjectIStreamXml::BeginChars(CharBlock& )
2591 {
2592 BeginData();
2593 }
2594
ReadChars(CharBlock & block,char * dst,size_t length)2595 size_t CObjectIStreamXml::ReadChars(CharBlock& block,
2596 char* dst, size_t length)
2597 {
2598 size_t count = 0;
2599 while ( length-- > 0 ) {
2600 char c = m_Input.GetChar();
2601 if (c == '<') {
2602 block.EndOfBlock();
2603 break;
2604 }
2605 *dst++ = c;
2606 count++;
2607 }
2608 return count;
2609 }
2610
SkipBool(void)2611 void CObjectIStreamXml::SkipBool(void)
2612 {
2613 ReadBool();
2614 }
2615
SkipChar(void)2616 void CObjectIStreamXml::SkipChar(void)
2617 {
2618 ReadChar();
2619 }
2620
SkipSNumber(void)2621 void CObjectIStreamXml::SkipSNumber(void)
2622 {
2623 if (ExpectSpecialCase()!=0 && UseSpecialCaseRead()) {
2624 return;
2625 }
2626 BeginData();
2627 size_t i;
2628 char c = SkipWSAndComments();
2629 switch ( c ) {
2630 case '+':
2631 case '-':
2632 c = m_Input.PeekChar(1);
2633 // next char
2634 i = 2;
2635 break;
2636 default:
2637 // next char
2638 i = 1;
2639 break;
2640 }
2641 if ( c < '0' || c > '9' ) {
2642 ThrowError(fFormatError, "invalid symbol in number");
2643 }
2644 while ( (c = m_Input.PeekCharNoEOF(i)) >= '0' && c <= '9' ) {
2645 ++i;
2646 }
2647 m_Input.SkipChars(i);
2648 }
2649
SkipUNumber(void)2650 void CObjectIStreamXml::SkipUNumber(void)
2651 {
2652 if (ExpectSpecialCase()!=0 && UseSpecialCaseRead()) {
2653 return;
2654 }
2655 BeginData();
2656 size_t i;
2657 char c = SkipWSAndComments();
2658 switch ( c ) {
2659 case '+':
2660 c = m_Input.PeekChar(1);
2661 // next char
2662 i = 2;
2663 break;
2664 default:
2665 // next char
2666 i = 1;
2667 break;
2668 }
2669 if ( c < '0' || c > '9' ) {
2670 ThrowError(fFormatError, "invalid symbol in number");
2671 }
2672 while ( (c = m_Input.PeekCharNoEOF(i)) >= '0' && c <= '9' ) {
2673 ++i;
2674 }
2675 m_Input.SkipChars(i);
2676 }
2677
SkipFNumber(void)2678 void CObjectIStreamXml::SkipFNumber(void)
2679 {
2680 ReadDouble();
2681 }
2682
SkipString(EStringType type)2683 void CObjectIStreamXml::SkipString(EStringType type)
2684 {
2685 if (ExpectSpecialCase()!=0 && UseSpecialCaseRead()) {
2686 return;
2687 }
2688 BeginData();
2689 EEncoding enc = m_Encoding;
2690 if (type == eStringTypeUTF8) {
2691 m_Encoding = eEncoding_ISO8859_1;
2692 }
2693 while ( ReadEscapedChar(m_Attlist ? '\"' : '<') >= 0 )
2694 continue;
2695 m_Encoding = enc;
2696 }
2697
SkipNull(void)2698 void CObjectIStreamXml::SkipNull(void)
2699 {
2700 if ( !EndOpeningTagSelfClosed() )
2701 ThrowError(fFormatError, "empty tag expected");
2702 }
2703
SkipByteBlock(void)2704 void CObjectIStreamXml::SkipByteBlock(void)
2705 {
2706 BeginData();
2707 for ( ;; ) {
2708 char c = m_Input.GetChar();
2709 if ( IsDigit(c) ) {
2710 continue;
2711 }
2712 else if ( c >= 'A' && c <= 'Z' ) {
2713 continue;
2714 }
2715 else if ( c >= 'a' && c <= 'z' ) {
2716 continue;
2717 }
2718 else if ( c == '\r' || c == '\n' ) {
2719 m_Input.SkipEndOfLine(c);
2720 continue;
2721 }
2722 else if ( c == '+' || c == '/' || c == '=' ) {
2723 // to allow base64 byte blocks
2724 continue;
2725 }
2726 else if ( c == '<' ) {
2727 m_Input.UngetChar(c);
2728 break;
2729 }
2730 else {
2731 m_Input.UngetChar(c);
2732 ThrowError(fFormatError, "invalid char in octet string");
2733 }
2734 }
2735 }
2736
2737 END_NCBI_SCOPE
2738