1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fxfa/parser/cxfa_simple_parser.h"
8 
9 #include "xfa/fgas/crt/fgas_codepage.h"
10 #include "xfa/fxfa/include/fxfa.h"
11 #include "xfa/fxfa/include/xfa_checksum.h"
12 #include "xfa/fxfa/parser/cxfa_xml_parser.h"
13 #include "xfa/fxfa/parser/xfa_document.h"
14 
15 namespace {
16 
GetDocumentNode(CFDE_XMLDoc * pXMLDoc,FX_BOOL bVerifyWellFormness=FALSE)17 CFDE_XMLNode* GetDocumentNode(CFDE_XMLDoc* pXMLDoc,
18                               FX_BOOL bVerifyWellFormness = FALSE) {
19   if (!pXMLDoc)
20     return nullptr;
21 
22   for (CFDE_XMLNode* pXMLNode =
23            pXMLDoc->GetRoot()->GetNodeItem(CFDE_XMLNode::FirstChild);
24        pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
25     if (pXMLNode->GetType() != FDE_XMLNODE_Element)
26       continue;
27 
28     if (!bVerifyWellFormness)
29       return pXMLNode;
30 
31     for (CFDE_XMLNode* pNextNode =
32              pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling);
33          pNextNode;
34          pNextNode = pNextNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
35       if (pNextNode->GetType() == FDE_XMLNODE_Element)
36         return FALSE;
37     }
38     return pXMLNode;
39   }
40   return nullptr;
41 }
42 
GetElementTagNamespaceURI(CFDE_XMLElement * pElement,CFX_WideString & wsNamespaceURI)43 void GetElementTagNamespaceURI(CFDE_XMLElement* pElement,
44                                CFX_WideString& wsNamespaceURI) {
45   CFX_WideString wsNodeStr;
46   pElement->GetNamespacePrefix(wsNodeStr);
47   if (!XFA_FDEExtension_ResolveNamespaceQualifier(
48           pElement, wsNodeStr.AsStringC(), wsNamespaceURI)) {
49     wsNamespaceURI.clear();
50   }
51 }
52 
MatchNodeName(CFDE_XMLNode * pNode,const CFX_WideStringC & wsLocalTagName,const CFX_WideStringC & wsNamespaceURIPrefix,uint32_t eMatchFlags=XFA_XDPPACKET_FLAGS_NOMATCH)53 FX_BOOL MatchNodeName(CFDE_XMLNode* pNode,
54                       const CFX_WideStringC& wsLocalTagName,
55                       const CFX_WideStringC& wsNamespaceURIPrefix,
56                       uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) {
57   if (!pNode || pNode->GetType() != FDE_XMLNODE_Element)
58     return FALSE;
59 
60   CFDE_XMLElement* pElement = reinterpret_cast<CFDE_XMLElement*>(pNode);
61   CFX_WideString wsNodeStr;
62   pElement->GetLocalTagName(wsNodeStr);
63   if (wsNodeStr != wsLocalTagName)
64     return FALSE;
65 
66   GetElementTagNamespaceURI(pElement, wsNodeStr);
67   if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH)
68     return TRUE;
69   if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) {
70     return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) ==
71            wsNamespaceURIPrefix;
72   }
73   return wsNodeStr == wsNamespaceURIPrefix;
74 }
75 
GetAttributeLocalName(const CFX_WideStringC & wsAttributeName,CFX_WideString & wsLocalAttrName)76 FX_BOOL GetAttributeLocalName(const CFX_WideStringC& wsAttributeName,
77                               CFX_WideString& wsLocalAttrName) {
78   CFX_WideString wsAttrName(wsAttributeName);
79   FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
80   if (iFind < 0) {
81     wsLocalAttrName = wsAttrName;
82     return FALSE;
83   }
84   wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - iFind - 1);
85   return TRUE;
86 }
87 
ResolveAttribute(CFDE_XMLElement * pElement,const CFX_WideStringC & wsAttributeName,CFX_WideString & wsLocalAttrName,CFX_WideString & wsNamespaceURI)88 FX_BOOL ResolveAttribute(CFDE_XMLElement* pElement,
89                          const CFX_WideStringC& wsAttributeName,
90                          CFX_WideString& wsLocalAttrName,
91                          CFX_WideString& wsNamespaceURI) {
92   CFX_WideString wsAttrName(wsAttributeName);
93   CFX_WideString wsNSPrefix;
94   if (GetAttributeLocalName(wsAttributeName, wsLocalAttrName)) {
95     wsNSPrefix = wsAttrName.Left(wsAttributeName.GetLength() -
96                                  wsLocalAttrName.GetLength() - 1);
97   }
98   if (wsLocalAttrName == FX_WSTRC(L"xmlns") ||
99       wsNSPrefix == FX_WSTRC(L"xmlns") || wsNSPrefix == FX_WSTRC(L"xml")) {
100     return FALSE;
101   }
102   if (!XFA_FDEExtension_ResolveNamespaceQualifier(
103           pElement, wsNSPrefix.AsStringC(), wsNamespaceURI)) {
104     wsNamespaceURI.clear();
105     return FALSE;
106   }
107   return TRUE;
108 }
109 
FindAttributeWithNS(CFDE_XMLElement * pElement,const CFX_WideStringC & wsLocalAttributeName,const CFX_WideStringC & wsNamespaceURIPrefix,CFX_WideString & wsValue,FX_BOOL bMatchNSAsPrefix=FALSE)110 FX_BOOL FindAttributeWithNS(CFDE_XMLElement* pElement,
111                             const CFX_WideStringC& wsLocalAttributeName,
112                             const CFX_WideStringC& wsNamespaceURIPrefix,
113                             CFX_WideString& wsValue,
114                             FX_BOOL bMatchNSAsPrefix = FALSE) {
115   if (!pElement)
116     return FALSE;
117 
118   CFX_WideString wsAttrName;
119   CFX_WideString wsAttrValue;
120   CFX_WideString wsAttrNS;
121   for (int32_t iAttrCount = pElement->CountAttributes(), i = 0; i < iAttrCount;
122        i++) {
123     pElement->GetAttribute(i, wsAttrName, wsAttrValue);
124     FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
125     CFX_WideString wsNSPrefix;
126     if (iFind < 0) {
127       if (wsLocalAttributeName != wsAttrName)
128         continue;
129     } else {
130       if (wsLocalAttributeName !=
131           wsAttrName.Right(wsAttrName.GetLength() - iFind - 1)) {
132         continue;
133       }
134       wsNSPrefix = wsAttrName.Left(iFind);
135     }
136     if (!XFA_FDEExtension_ResolveNamespaceQualifier(
137             pElement, wsNSPrefix.AsStringC(), wsAttrNS)) {
138       continue;
139     }
140     if (bMatchNSAsPrefix) {
141       if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) !=
142           wsNamespaceURIPrefix) {
143         continue;
144       }
145     } else {
146       if (wsAttrNS != wsNamespaceURIPrefix)
147         continue;
148     }
149     wsValue = wsAttrValue;
150     return TRUE;
151   }
152   return FALSE;
153 }
154 
GetDataSetsFromXDP(CFDE_XMLNode * pXMLDocumentNode)155 CFDE_XMLNode* GetDataSetsFromXDP(CFDE_XMLNode* pXMLDocumentNode) {
156   if (MatchNodeName(pXMLDocumentNode,
157                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
158                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
159                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
160     return pXMLDocumentNode;
161   }
162   if (!MatchNodeName(pXMLDocumentNode,
163                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
164                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
165                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
166     return nullptr;
167   }
168   for (CFDE_XMLNode* pDatasetsNode =
169            pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
170        pDatasetsNode;
171        pDatasetsNode = pDatasetsNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
172     if (!MatchNodeName(pDatasetsNode,
173                        XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
174                        XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
175                        XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
176       continue;
177     }
178     return pDatasetsNode;
179   }
180   return nullptr;
181 }
182 
IsStringAllWhitespace(CFX_WideString wsText)183 FX_BOOL IsStringAllWhitespace(CFX_WideString wsText) {
184   wsText.TrimRight(L"\x20\x9\xD\xA");
185   return wsText.IsEmpty();
186 }
187 
ConvertXMLToPlainText(CFDE_XMLElement * pRootXMLNode,CFX_WideString & wsOutput)188 void ConvertXMLToPlainText(CFDE_XMLElement* pRootXMLNode,
189                            CFX_WideString& wsOutput) {
190   for (CFDE_XMLNode* pXMLChild =
191            pRootXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
192        pXMLChild;
193        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
194     switch (pXMLChild->GetType()) {
195       case FDE_XMLNODE_Element: {
196         CFX_WideString wsTextData;
197         static_cast<CFDE_XMLElement*>(pXMLChild)->GetTextData(wsTextData);
198         wsTextData += FX_WSTRC(L"\n");
199         wsOutput += wsTextData;
200         break;
201       }
202       case FDE_XMLNODE_Text: {
203         CFX_WideString wsText;
204         static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
205         if (IsStringAllWhitespace(wsText))
206           continue;
207 
208         wsOutput = wsText;
209         break;
210       }
211       case FDE_XMLNODE_CharData: {
212         CFX_WideString wsCharData;
213         static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsCharData);
214         if (IsStringAllWhitespace(wsCharData))
215           continue;
216 
217         wsOutput = wsCharData;
218         break;
219       }
220       default:
221         ASSERT(FALSE);
222         break;
223     }
224   }
225 }
226 
227 }  // namespace
228 
XFA_RecognizeRichText(CFDE_XMLElement * pRichTextXMLNode)229 FX_BOOL XFA_RecognizeRichText(CFDE_XMLElement* pRichTextXMLNode) {
230   if (pRichTextXMLNode) {
231     CFX_WideString wsNamespaceURI;
232     GetElementTagNamespaceURI(pRichTextXMLNode, wsNamespaceURI);
233     if (wsNamespaceURI == FX_WSTRC(L"http://www.w3.org/1999/xhtml"))
234       return TRUE;
235   }
236   return FALSE;
237 }
238 
CXFA_SimpleParser(CXFA_Document * pFactory,bool bDocumentParser)239 CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory,
240                                      bool bDocumentParser)
241     : m_pXMLParser(nullptr),
242       m_pXMLDoc(nullptr),
243       m_pStream(nullptr),
244       m_pFileRead(nullptr),
245       m_pFactory(pFactory),
246       m_pRootNode(nullptr),
247       m_ePacketID(XFA_XDPPACKET_UNKNOWN),
248       m_bDocumentParser(bDocumentParser) {}
249 
~CXFA_SimpleParser()250 CXFA_SimpleParser::~CXFA_SimpleParser() {}
251 
SetFactory(CXFA_Document * pFactory)252 void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) {
253   m_pFactory = pFactory;
254 }
255 
StartParse(IFX_FileRead * pStream,XFA_XDPPACKET ePacketID)256 int32_t CXFA_SimpleParser::StartParse(IFX_FileRead* pStream,
257                                       XFA_XDPPACKET ePacketID) {
258   CloseParser();
259   m_pFileRead = pStream;
260   m_pStream.reset(IFX_Stream::CreateStream(
261       pStream, FX_STREAMACCESS_Read | FX_STREAMACCESS_Text));
262   if (!m_pStream)
263     return XFA_PARSESTATUS_StreamErr;
264 
265   uint16_t wCodePage = m_pStream->GetCodePage();
266   if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
267       wCodePage != FX_CODEPAGE_UTF8) {
268     m_pStream->SetCodePage(FX_CODEPAGE_UTF8);
269   }
270   m_pXMLDoc.reset(new CFDE_XMLDoc);
271   m_pXMLParser = new CXFA_XMLParser(m_pXMLDoc->GetRoot(), m_pStream.get());
272   if (!m_pXMLDoc->LoadXML(m_pXMLParser))
273     return XFA_PARSESTATUS_StatusErr;
274 
275   m_ePacketID = ePacketID;
276   return XFA_PARSESTATUS_Ready;
277 }
278 
DoParse(IFX_Pause * pPause)279 int32_t CXFA_SimpleParser::DoParse(IFX_Pause* pPause) {
280   if (!m_pXMLDoc || m_ePacketID == XFA_XDPPACKET_UNKNOWN)
281     return XFA_PARSESTATUS_StatusErr;
282 
283   int32_t iRet = m_pXMLDoc->DoLoad(pPause);
284   if (iRet < 0)
285     return XFA_PARSESTATUS_SyntaxErr;
286   if (iRet < 100)
287     return iRet / 2;
288 
289   m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), m_ePacketID);
290   m_pXMLDoc->CloseXML();
291   m_pStream.reset();
292 
293   if (!m_pRootNode)
294     return XFA_PARSESTATUS_StatusErr;
295   return XFA_PARSESTATUS_Done;
296 }
297 
ParseXMLData(const CFX_WideString & wsXML,CFDE_XMLNode * & pXMLNode,IFX_Pause * pPause)298 int32_t CXFA_SimpleParser::ParseXMLData(const CFX_WideString& wsXML,
299                                         CFDE_XMLNode*& pXMLNode,
300                                         IFX_Pause* pPause) {
301   CloseParser();
302   pXMLNode = nullptr;
303 
304   std::unique_ptr<IFX_Stream> pStream(XFA_CreateWideTextRead(wsXML));
305   if (!pStream)
306     return XFA_PARSESTATUS_StreamErr;
307 
308   m_pXMLDoc.reset(new CFDE_XMLDoc);
309   CXFA_XMLParser* pParser =
310       new CXFA_XMLParser(m_pXMLDoc->GetRoot(), pStream.get());
311   pParser->m_dwCheckStatus = 0x03;
312   if (!m_pXMLDoc->LoadXML(pParser))
313     return XFA_PARSESTATUS_StatusErr;
314 
315   int32_t iRet = m_pXMLDoc->DoLoad(pPause);
316   if (iRet < 0 || iRet >= 100)
317     m_pXMLDoc->CloseXML();
318   if (iRet < 0)
319     return XFA_PARSESTATUS_SyntaxErr;
320   if (iRet < 100)
321     return iRet / 2;
322 
323   pXMLNode = GetDocumentNode(m_pXMLDoc.get());
324   return XFA_PARSESTATUS_Done;
325 }
326 
ConstructXFANode(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode)327 void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode,
328                                          CFDE_XMLNode* pXMLNode) {
329   XFA_XDPPACKET ePacketID = (XFA_XDPPACKET)pXFANode->GetPacketID();
330   if (ePacketID == XFA_XDPPACKET_Datasets) {
331     if (pXFANode->GetElementType() == XFA_Element::DataValue) {
332       for (CFDE_XMLNode* pXMLChild =
333                pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
334            pXMLChild;
335            pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
336         FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
337         if (eNodeType == FDE_XMLNODE_Instruction)
338           continue;
339 
340         if (eNodeType == FDE_XMLNODE_Element) {
341           CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
342                                                         XFA_Element::DataValue);
343           if (!pXFAChild)
344             return;
345 
346           CFX_WideString wsNodeStr;
347           CFDE_XMLElement* child = static_cast<CFDE_XMLElement*>(pXMLChild);
348           child->GetLocalTagName(wsNodeStr);
349           pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
350           CFX_WideString wsChildValue;
351           XFA_GetPlainTextFromRichText(child, wsChildValue);
352           if (!wsChildValue.IsEmpty())
353             pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsChildValue);
354 
355           pXFANode->InsertChild(pXFAChild);
356           pXFAChild->SetXMLMappingNode(pXMLChild);
357           pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
358           break;
359         }
360       }
361       m_pRootNode = pXFANode;
362     } else {
363       m_pRootNode = DataLoader(pXFANode, pXMLNode, TRUE);
364     }
365   } else if (pXFANode->IsContentNode()) {
366     ParseContentNode(pXFANode, pXMLNode, ePacketID);
367     m_pRootNode = pXFANode;
368   } else {
369     m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID);
370   }
371 }
372 
GetRootNode() const373 CXFA_Node* CXFA_SimpleParser::GetRootNode() const {
374   return m_pRootNode;
375 }
376 
GetXMLDoc() const377 CFDE_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const {
378   return m_pXMLDoc.get();
379 }
380 
XFA_FDEExtension_ResolveNamespaceQualifier(CFDE_XMLElement * pNode,const CFX_WideStringC & wsQualifier,CFX_WideString & wsNamespaceURI)381 FX_BOOL XFA_FDEExtension_ResolveNamespaceQualifier(
382     CFDE_XMLElement* pNode,
383     const CFX_WideStringC& wsQualifier,
384     CFX_WideString& wsNamespaceURI) {
385   if (!pNode)
386     return FALSE;
387 
388   CFDE_XMLNode* pFakeRoot = pNode->GetNodeItem(CFDE_XMLNode::Root);
389   CFX_WideString wsNSAttribute;
390   FX_BOOL bRet = FALSE;
391   if (wsQualifier.IsEmpty()) {
392     wsNSAttribute = FX_WSTRC(L"xmlns");
393     bRet = TRUE;
394   } else {
395     wsNSAttribute = FX_WSTRC(L"xmlns:") + wsQualifier;
396   }
397   for (; pNode != pFakeRoot; pNode = static_cast<CFDE_XMLElement*>(
398                                  pNode->GetNodeItem(CFDE_XMLNode::Parent))) {
399     if (pNode->GetType() != FDE_XMLNODE_Element)
400       continue;
401 
402     if (pNode->HasAttribute(wsNSAttribute.c_str())) {
403       pNode->GetString(wsNSAttribute.c_str(), wsNamespaceURI);
404       return TRUE;
405     }
406   }
407   wsNamespaceURI.clear();
408   return bRet;
409 }
410 
ParseAsXDPPacket(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)411 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFDE_XMLNode* pXMLDocumentNode,
412                                                XFA_XDPPACKET ePacketID) {
413   switch (ePacketID) {
414     case XFA_XDPPACKET_UNKNOWN:
415       return nullptr;
416     case XFA_XDPPACKET_XDP:
417       return ParseAsXDPPacket_XDP(pXMLDocumentNode, ePacketID);
418     case XFA_XDPPACKET_Config:
419       return ParseAsXDPPacket_Config(pXMLDocumentNode, ePacketID);
420     case XFA_XDPPACKET_Template:
421     case XFA_XDPPACKET_Form:
422       return ParseAsXDPPacket_TemplateForm(pXMLDocumentNode, ePacketID);
423     case XFA_XDPPACKET_Datasets:
424       return ParseAsXDPPacket_Data(pXMLDocumentNode, ePacketID);
425     case XFA_XDPPACKET_Xdc:
426       return ParseAsXDPPacket_Xdc(pXMLDocumentNode, ePacketID);
427     case XFA_XDPPACKET_LocaleSet:
428     case XFA_XDPPACKET_ConnectionSet:
429     case XFA_XDPPACKET_SourceSet:
430       return ParseAsXDPPacket_LocaleConnectionSourceSet(pXMLDocumentNode,
431                                                         ePacketID);
432     default:
433       return ParseAsXDPPacket_User(pXMLDocumentNode, ePacketID);
434   }
435 }
436 
ParseAsXDPPacket_XDP(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)437 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP(
438     CFDE_XMLNode* pXMLDocumentNode,
439     XFA_XDPPACKET ePacketID) {
440   if (!MatchNodeName(pXMLDocumentNode,
441                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
442                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
443                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
444     return nullptr;
445   }
446   CXFA_Node* pXFARootNode =
447       m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Xfa);
448   if (!pXFARootNode)
449     return nullptr;
450 
451   m_pRootNode = pXFARootNode;
452   pXFARootNode->SetCData(XFA_ATTRIBUTE_Name, L"xfa");
453   {
454     CFDE_XMLElement* pElement = static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
455     int32_t iAttributeCount = pElement->CountAttributes();
456     for (int32_t i = 0; i < iAttributeCount; i++) {
457       CFX_WideString wsAttriName, wsAttriValue;
458       pElement->GetAttribute(i, wsAttriName, wsAttriValue);
459       if (wsAttriName == FX_WSTRC(L"uuid"))
460         pXFARootNode->SetCData(XFA_ATTRIBUTE_Uuid, wsAttriValue);
461       else if (wsAttriName == FX_WSTRC(L"timeStamp"))
462         pXFARootNode->SetCData(XFA_ATTRIBUTE_TimeStamp, wsAttriValue);
463     }
464   }
465 
466   CFDE_XMLNode* pXMLConfigDOMRoot = nullptr;
467   CXFA_Node* pXFAConfigDOMRoot = nullptr;
468   {
469     for (CFDE_XMLNode* pChildItem =
470              pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
471          pChildItem;
472          pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
473       const XFA_PACKETINFO* pPacketInfo =
474           XFA_GetPacketByIndex(XFA_PACKET_Config);
475       if (!MatchNodeName(pChildItem, pPacketInfo->pName, pPacketInfo->pURI,
476                          pPacketInfo->eFlags)) {
477         continue;
478       }
479       if (pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
480         return nullptr;
481       }
482       pXMLConfigDOMRoot = pChildItem;
483       pXFAConfigDOMRoot =
484           ParseAsXDPPacket_Config(pXMLConfigDOMRoot, XFA_XDPPACKET_Config);
485       pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr);
486     }
487   }
488 
489   CFDE_XMLNode* pXMLDatasetsDOMRoot = nullptr;
490   CFDE_XMLNode* pXMLFormDOMRoot = nullptr;
491   CFDE_XMLNode* pXMLTemplateDOMRoot = nullptr;
492   {
493     for (CFDE_XMLNode* pChildItem =
494              pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
495          pChildItem;
496          pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
497       if (!pChildItem || pChildItem->GetType() != FDE_XMLNODE_Element)
498         continue;
499       if (pChildItem == pXMLConfigDOMRoot)
500         continue;
501 
502       CFDE_XMLElement* pElement =
503           reinterpret_cast<CFDE_XMLElement*>(pChildItem);
504       CFX_WideString wsPacketName;
505       pElement->GetLocalTagName(wsPacketName);
506       const XFA_PACKETINFO* pPacketInfo =
507           XFA_GetPacketByName(wsPacketName.AsStringC());
508       if (pPacketInfo && pPacketInfo->pURI) {
509         if (!MatchNodeName(pElement, pPacketInfo->pName, pPacketInfo->pURI,
510                            pPacketInfo->eFlags)) {
511           pPacketInfo = nullptr;
512         }
513       }
514       XFA_XDPPACKET ePacket =
515           pPacketInfo ? pPacketInfo->eName : XFA_XDPPACKET_USER;
516       if (ePacket == XFA_XDPPACKET_XDP)
517         continue;
518       if (ePacket == XFA_XDPPACKET_Datasets) {
519         if (pXMLDatasetsDOMRoot)
520           return nullptr;
521 
522         pXMLDatasetsDOMRoot = pElement;
523       } else if (ePacket == XFA_XDPPACKET_Form) {
524         if (pXMLFormDOMRoot)
525           return nullptr;
526 
527         pXMLFormDOMRoot = pElement;
528       } else if (ePacket == XFA_XDPPACKET_Template) {
529         if (pXMLTemplateDOMRoot) {
530           // Found a duplicate template packet.
531           return nullptr;
532         }
533         CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
534         if (pPacketNode) {
535           pXMLTemplateDOMRoot = pElement;
536           pXFARootNode->InsertChild(pPacketNode);
537         }
538       } else {
539         CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
540         if (pPacketNode) {
541           if (pPacketInfo &&
542               (pPacketInfo->eFlags & XFA_XDPPACKET_FLAGS_SUPPORTONE) &&
543               pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
544             return nullptr;
545           }
546           pXFARootNode->InsertChild(pPacketNode);
547         }
548       }
549     }
550   }
551 
552   if (!pXMLTemplateDOMRoot) {
553     // No template is found.
554     return nullptr;
555   }
556   if (pXMLDatasetsDOMRoot) {
557     CXFA_Node* pPacketNode =
558         ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_XDPPACKET_Datasets);
559     if (pPacketNode)
560       pXFARootNode->InsertChild(pPacketNode);
561   }
562   if (pXMLFormDOMRoot) {
563     CXFA_Node* pPacketNode =
564         ParseAsXDPPacket(pXMLFormDOMRoot, XFA_XDPPACKET_Form);
565     if (pPacketNode)
566       pXFARootNode->InsertChild(pPacketNode);
567   }
568   pXFARootNode->SetXMLMappingNode(pXMLDocumentNode);
569   return pXFARootNode;
570 }
571 
ParseAsXDPPacket_Config(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)572 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config(
573     CFDE_XMLNode* pXMLDocumentNode,
574     XFA_XDPPACKET ePacketID) {
575   if (!MatchNodeName(pXMLDocumentNode,
576                      XFA_GetPacketByIndex(XFA_PACKET_Config)->pName,
577                      XFA_GetPacketByIndex(XFA_PACKET_Config)->pURI,
578                      XFA_GetPacketByIndex(XFA_PACKET_Config)->eFlags)) {
579     return nullptr;
580   }
581   CXFA_Node* pNode =
582       m_pFactory->CreateNode(XFA_XDPPACKET_Config, XFA_Element::Config);
583   if (!pNode)
584     return nullptr;
585 
586   pNode->SetCData(XFA_ATTRIBUTE_Name,
587                   XFA_GetPacketByIndex(XFA_PACKET_Config)->pName);
588   if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID))
589     return nullptr;
590 
591   pNode->SetXMLMappingNode(pXMLDocumentNode);
592   return pNode;
593 }
594 
ParseAsXDPPacket_TemplateForm(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)595 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_TemplateForm(
596     CFDE_XMLNode* pXMLDocumentNode,
597     XFA_XDPPACKET ePacketID) {
598   CXFA_Node* pNode = nullptr;
599   if (ePacketID == XFA_XDPPACKET_Template) {
600     if (MatchNodeName(pXMLDocumentNode,
601                       XFA_GetPacketByIndex(XFA_PACKET_Template)->pName,
602                       XFA_GetPacketByIndex(XFA_PACKET_Template)->pURI,
603                       XFA_GetPacketByIndex(XFA_PACKET_Template)->eFlags)) {
604       pNode =
605           m_pFactory->CreateNode(XFA_XDPPACKET_Template, XFA_Element::Template);
606       if (!pNode)
607         return nullptr;
608 
609       pNode->SetCData(XFA_ATTRIBUTE_Name,
610                       XFA_GetPacketByIndex(XFA_PACKET_Template)->pName);
611       if (m_bDocumentParser) {
612         CFX_WideString wsNamespaceURI;
613         CFDE_XMLElement* pXMLDocumentElement =
614             static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
615         pXMLDocumentElement->GetNamespaceURI(wsNamespaceURI);
616         if (wsNamespaceURI.IsEmpty())
617           pXMLDocumentElement->GetString(L"xmlns:xfa", wsNamespaceURI);
618 
619         pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
620       }
621       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID))
622         return nullptr;
623     }
624   } else if (ePacketID == XFA_XDPPACKET_Form) {
625     if (MatchNodeName(pXMLDocumentNode,
626                       XFA_GetPacketByIndex(XFA_PACKET_Form)->pName,
627                       XFA_GetPacketByIndex(XFA_PACKET_Form)->pURI,
628                       XFA_GetPacketByIndex(XFA_PACKET_Form)->eFlags)) {
629       CFDE_XMLElement* pXMLDocumentElement =
630           static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
631       CFX_WideString wsChecksum;
632       pXMLDocumentElement->GetString(L"checksum", wsChecksum);
633       if (wsChecksum.GetLength() != 28 ||
634           m_pXMLParser->m_dwCheckStatus != 0x03) {
635         return nullptr;
636       }
637       std::unique_ptr<CXFA_ChecksumContext> pChecksum(new CXFA_ChecksumContext);
638       pChecksum->StartChecksum();
639       pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0],
640                                 m_pXMLParser->m_nSize[0]);
641       pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1],
642                                 m_pXMLParser->m_nSize[1]);
643       pChecksum->FinishChecksum();
644       CFX_ByteString bsCheck = pChecksum->GetChecksum();
645       if (bsCheck != wsChecksum.UTF8Encode())
646         return nullptr;
647 
648       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Form, XFA_Element::Form);
649       if (!pNode)
650         return nullptr;
651 
652       pNode->SetCData(XFA_ATTRIBUTE_Name,
653                       XFA_GetPacketByIndex(XFA_PACKET_Form)->pName);
654       pNode->SetAttribute(XFA_ATTRIBUTE_Checksum, wsChecksum.AsStringC());
655       CXFA_Node* pTemplateRoot =
656           m_pRootNode->GetFirstChildByClass(XFA_Element::Template);
657       CXFA_Node* pTemplateChosen =
658           pTemplateRoot
659               ? pTemplateRoot->GetFirstChildByClass(XFA_Element::Subform)
660               : nullptr;
661       FX_BOOL bUseAttribute = TRUE;
662       if (pTemplateChosen &&
663           pTemplateChosen->GetEnum(XFA_ATTRIBUTE_RestoreState) !=
664               XFA_ATTRIBUTEENUM_Auto) {
665         bUseAttribute = FALSE;
666       }
667       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, bUseAttribute))
668         return nullptr;
669     }
670   }
671   if (pNode)
672     pNode->SetXMLMappingNode(pXMLDocumentNode);
673 
674   return pNode;
675 }
676 
ParseAsXDPPacket_Data(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)677 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data(
678     CFDE_XMLNode* pXMLDocumentNode,
679     XFA_XDPPACKET ePacketID) {
680   CFDE_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
681   if (pDatasetsXMLNode) {
682     CXFA_Node* pNode =
683         m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataModel);
684     if (!pNode)
685       return nullptr;
686 
687     pNode->SetCData(XFA_ATTRIBUTE_Name,
688                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName);
689     if (!DataLoader(pNode, pDatasetsXMLNode, FALSE))
690       return nullptr;
691 
692     pNode->SetXMLMappingNode(pDatasetsXMLNode);
693     return pNode;
694   }
695 
696   CFDE_XMLNode* pDataXMLNode = nullptr;
697   if (MatchNodeName(pXMLDocumentNode, FX_WSTRC(L"data"),
698                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
699                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
700     static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
701         ->RemoveAttribute(L"xmlns:xfa");
702     pDataXMLNode = pXMLDocumentNode;
703   } else {
704     CFDE_XMLElement* pDataElement = new CFDE_XMLElement(L"xfa:data");
705     CFDE_XMLNode* pParentXMLNode =
706         pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::Parent);
707     if (pParentXMLNode)
708       pParentXMLNode->RemoveChildNode(pXMLDocumentNode);
709 
710     ASSERT(pXMLDocumentNode->GetType() == FDE_XMLNODE_Element);
711     if (pXMLDocumentNode->GetType() == FDE_XMLNODE_Element) {
712       static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
713           ->RemoveAttribute(L"xmlns:xfa");
714     }
715     pDataElement->InsertChildNode(pXMLDocumentNode);
716     pDataXMLNode = pDataElement;
717   }
718 
719   if (pDataXMLNode) {
720     CXFA_Node* pNode =
721         m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataGroup);
722     if (!pNode) {
723       if (pDataXMLNode != pXMLDocumentNode)
724         pDataXMLNode->Release();
725       return nullptr;
726     }
727     CFX_WideString wsLocalName;
728     static_cast<CFDE_XMLElement*>(pDataXMLNode)->GetLocalTagName(wsLocalName);
729     pNode->SetCData(XFA_ATTRIBUTE_Name, wsLocalName);
730     if (!DataLoader(pNode, pDataXMLNode, TRUE))
731       return nullptr;
732 
733     pNode->SetXMLMappingNode(pDataXMLNode);
734     if (pDataXMLNode != pXMLDocumentNode)
735       pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false);
736     return pNode;
737   }
738   return nullptr;
739 }
740 
ParseAsXDPPacket_LocaleConnectionSourceSet(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)741 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet(
742     CFDE_XMLNode* pXMLDocumentNode,
743     XFA_XDPPACKET ePacketID) {
744   CXFA_Node* pNode = nullptr;
745   if (ePacketID == XFA_XDPPACKET_LocaleSet) {
746     if (MatchNodeName(pXMLDocumentNode,
747                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName,
748                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pURI,
749                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->eFlags)) {
750       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_LocaleSet,
751                                      XFA_Element::LocaleSet);
752       if (!pNode)
753         return nullptr;
754 
755       pNode->SetCData(XFA_ATTRIBUTE_Name,
756                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName);
757       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID))
758         return nullptr;
759     }
760   } else if (ePacketID == XFA_XDPPACKET_ConnectionSet) {
761     if (MatchNodeName(pXMLDocumentNode,
762                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName,
763                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pURI,
764                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->eFlags)) {
765       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_ConnectionSet,
766                                      XFA_Element::ConnectionSet);
767       if (!pNode)
768         return nullptr;
769 
770       pNode->SetCData(XFA_ATTRIBUTE_Name,
771                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName);
772       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID))
773         return nullptr;
774     }
775   } else if (ePacketID == XFA_XDPPACKET_SourceSet) {
776     if (MatchNodeName(pXMLDocumentNode,
777                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName,
778                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pURI,
779                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->eFlags)) {
780       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_SourceSet,
781                                      XFA_Element::SourceSet);
782       if (!pNode)
783         return nullptr;
784 
785       pNode->SetCData(XFA_ATTRIBUTE_Name,
786                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName);
787       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID))
788         return nullptr;
789     }
790   }
791   if (pNode)
792     pNode->SetXMLMappingNode(pXMLDocumentNode);
793   return pNode;
794 }
795 
ParseAsXDPPacket_Xdc(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)796 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc(
797     CFDE_XMLNode* pXMLDocumentNode,
798     XFA_XDPPACKET ePacketID) {
799   if (!MatchNodeName(pXMLDocumentNode,
800                      XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName,
801                      XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pURI,
802                      XFA_GetPacketByIndex(XFA_PACKET_Xdc)->eFlags))
803     return nullptr;
804 
805   CXFA_Node* pNode =
806       m_pFactory->CreateNode(XFA_XDPPACKET_Xdc, XFA_Element::Xdc);
807   if (!pNode)
808     return nullptr;
809 
810   pNode->SetCData(XFA_ATTRIBUTE_Name,
811                   XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName);
812   pNode->SetXMLMappingNode(pXMLDocumentNode);
813   return pNode;
814 }
815 
ParseAsXDPPacket_User(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)816 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User(
817     CFDE_XMLNode* pXMLDocumentNode,
818     XFA_XDPPACKET ePacketID) {
819   CXFA_Node* pNode =
820       m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Packet);
821   if (!pNode)
822     return nullptr;
823 
824   CFX_WideString wsName;
825   static_cast<CFDE_XMLElement*>(pXMLDocumentNode)->GetLocalTagName(wsName);
826   pNode->SetCData(XFA_ATTRIBUTE_Name, wsName);
827   if (!UserPacketLoader(pNode, pXMLDocumentNode))
828     return nullptr;
829 
830   pNode->SetXMLMappingNode(pXMLDocumentNode);
831   return pNode;
832 }
833 
UserPacketLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc)834 CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode,
835                                                CFDE_XMLNode* pXMLDoc) {
836   return pXFANode;
837 }
838 
DataLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc,FX_BOOL bDoTransform)839 CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode,
840                                          CFDE_XMLNode* pXMLDoc,
841                                          FX_BOOL bDoTransform) {
842   ParseDataGroup(pXFANode, pXMLDoc, XFA_XDPPACKET_Datasets);
843   return pXFANode;
844 }
845 
NormalLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc,XFA_XDPPACKET ePacketID,FX_BOOL bUseAttribute)846 CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode,
847                                            CFDE_XMLNode* pXMLDoc,
848                                            XFA_XDPPACKET ePacketID,
849                                            FX_BOOL bUseAttribute) {
850   FX_BOOL bOneOfPropertyFound = FALSE;
851   for (CFDE_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFDE_XMLNode::FirstChild);
852        pXMLChild;
853        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
854     switch (pXMLChild->GetType()) {
855       case FDE_XMLNODE_Element: {
856         CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
857         CFX_WideString wsTagName;
858         pXMLElement->GetLocalTagName(wsTagName);
859         XFA_Element eType = XFA_GetElementTypeForName(wsTagName.AsStringC());
860         if (eType == XFA_Element::Unknown)
861           continue;
862 
863         const XFA_PROPERTY* pPropertyInfo = XFA_GetPropertyOfElement(
864             pXFANode->GetElementType(), eType, ePacketID);
865         if (pPropertyInfo &&
866             ((pPropertyInfo->uFlags &
867               (XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) != 0)) {
868           if (bOneOfPropertyFound)
869             break;
870 
871           bOneOfPropertyFound = TRUE;
872         }
873         CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType);
874         if (!pXFAChild)
875           return nullptr;
876         if (ePacketID == XFA_XDPPACKET_Config)
877           pXFAChild->SetAttribute(XFA_ATTRIBUTE_Name, wsTagName.AsStringC());
878 
879         FX_BOOL IsNeedValue = TRUE;
880         for (int32_t i = 0, count = pXMLElement->CountAttributes(); i < count;
881              i++) {
882           CFX_WideString wsAttrQualifiedName;
883           CFX_WideString wsAttrName;
884           CFX_WideString wsAttrValue;
885           pXMLElement->GetAttribute(i, wsAttrQualifiedName, wsAttrValue);
886           GetAttributeLocalName(wsAttrQualifiedName.AsStringC(), wsAttrName);
887           if (wsAttrName == FX_WSTRC(L"nil") &&
888               wsAttrValue == FX_WSTRC(L"true")) {
889             IsNeedValue = FALSE;
890           }
891           const XFA_ATTRIBUTEINFO* lpAttrInfo =
892               XFA_GetAttributeByName(wsAttrName.AsStringC());
893           if (!lpAttrInfo)
894             continue;
895 
896           if (!bUseAttribute && lpAttrInfo->eName != XFA_ATTRIBUTE_Name &&
897               lpAttrInfo->eName != XFA_ATTRIBUTE_Save) {
898             continue;
899           }
900           pXFAChild->SetAttribute(lpAttrInfo->eName, wsAttrValue.AsStringC());
901         }
902         pXFANode->InsertChild(pXFAChild);
903         if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) {
904           if (ePacketID == XFA_XDPPACKET_Config)
905             ParseContentNode(pXFAChild, pXMLElement, ePacketID);
906           else
907             NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
908 
909           break;
910         }
911         switch (pXFAChild->GetObjectType()) {
912           case XFA_ObjectType::ContentNode:
913           case XFA_ObjectType::TextNode:
914           case XFA_ObjectType::NodeC:
915           case XFA_ObjectType::NodeV:
916             if (IsNeedValue)
917               ParseContentNode(pXFAChild, pXMLElement, ePacketID);
918             break;
919           default:
920             NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
921             break;
922         }
923       } break;
924       case FDE_XMLNODE_Instruction:
925         ParseInstruction(pXFANode, static_cast<CFDE_XMLInstruction*>(pXMLChild),
926                          ePacketID);
927         break;
928       default:
929         break;
930     }
931   }
932   return pXFANode;
933 }
934 
ParseContentNode(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)935 void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode,
936                                          CFDE_XMLNode* pXMLNode,
937                                          XFA_XDPPACKET ePacketID) {
938   XFA_Element element = XFA_Element::Sharptext;
939   if (pXFANode->GetElementType() == XFA_Element::ExData) {
940     CFX_WideStringC wsContentType =
941         pXFANode->GetCData(XFA_ATTRIBUTE_ContentType);
942     if (wsContentType == FX_WSTRC(L"text/html"))
943       element = XFA_Element::SharpxHTML;
944     else if (wsContentType == FX_WSTRC(L"text/xml"))
945       element = XFA_Element::Sharpxml;
946   }
947   if (element == XFA_Element::SharpxHTML)
948     pXFANode->SetXMLMappingNode(pXMLNode);
949 
950   CFX_WideString wsValue;
951   for (CFDE_XMLNode* pXMLChild =
952            pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
953        pXMLChild;
954        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
955     FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
956     if (eNodeType == FDE_XMLNODE_Instruction)
957       continue;
958 
959     if (element == XFA_Element::SharpxHTML) {
960       if (eNodeType != FDE_XMLNODE_Element)
961         break;
962 
963       if (XFA_RecognizeRichText(static_cast<CFDE_XMLElement*>(pXMLChild)))
964         XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
965                                      wsValue);
966     } else if (element == XFA_Element::Sharpxml) {
967       if (eNodeType != FDE_XMLNODE_Element)
968         break;
969 
970       ConvertXMLToPlainText(static_cast<CFDE_XMLElement*>(pXMLChild), wsValue);
971     } else {
972       if (eNodeType == FDE_XMLNODE_Element)
973         break;
974       if (eNodeType == FDE_XMLNODE_Text)
975         static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsValue);
976       else if (eNodeType == FDE_XMLNODE_CharData)
977         static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsValue);
978     }
979     break;
980   }
981   if (!wsValue.IsEmpty()) {
982     if (pXFANode->IsContentNode()) {
983       CXFA_Node* pContentRawDataNode =
984           m_pFactory->CreateNode(ePacketID, element);
985       ASSERT(pContentRawDataNode);
986       pContentRawDataNode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
987       pXFANode->InsertChild(pContentRawDataNode);
988     } else {
989       pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
990     }
991   }
992 }
993 
ParseDataGroup(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)994 void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode,
995                                        CFDE_XMLNode* pXMLNode,
996                                        XFA_XDPPACKET ePacketID) {
997   for (CFDE_XMLNode* pXMLChild =
998            pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
999        pXMLChild;
1000        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
1001     switch (pXMLChild->GetType()) {
1002       case FDE_XMLNODE_Element: {
1003         CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
1004         {
1005           CFX_WideString wsNamespaceURI;
1006           GetElementTagNamespaceURI(pXMLElement, wsNamespaceURI);
1007           if (wsNamespaceURI ==
1008                   FX_WSTRC(L"http://www.xfa.com/schema/xfa-package/") ||
1009               wsNamespaceURI ==
1010                   FX_WSTRC(L"http://www.xfa.org/schema/xfa-package/") ||
1011               wsNamespaceURI ==
1012                   FX_WSTRC(L"http://www.w3.org/2001/XMLSchema-instance")) {
1013             continue;
1014           }
1015         }
1016 
1017         XFA_Element eNodeType = XFA_Element::DataModel;
1018         if (eNodeType == XFA_Element::DataModel) {
1019           CFX_WideString wsDataNodeAttr;
1020           if (FindAttributeWithNS(
1021                   pXMLElement, FX_WSTRC(L"dataNode"),
1022                   FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/"),
1023                   wsDataNodeAttr)) {
1024             if (wsDataNodeAttr == FX_WSTRC(L"dataGroup"))
1025               eNodeType = XFA_Element::DataGroup;
1026             else if (wsDataNodeAttr == FX_WSTRC(L"dataValue"))
1027               eNodeType = XFA_Element::DataValue;
1028           }
1029         }
1030         CFX_WideString wsContentType;
1031         if (eNodeType == XFA_Element::DataModel) {
1032           if (FindAttributeWithNS(
1033                   pXMLElement, FX_WSTRC(L"contentType"),
1034                   FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/"),
1035                   wsContentType)) {
1036             if (!wsContentType.IsEmpty())
1037               eNodeType = XFA_Element::DataValue;
1038           }
1039         }
1040         if (eNodeType == XFA_Element::DataModel) {
1041           for (CFDE_XMLNode* pXMLDataChild =
1042                    pXMLElement->GetNodeItem(CFDE_XMLNode::FirstChild);
1043                pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem(
1044                                   CFDE_XMLNode::NextSibling)) {
1045             if (pXMLDataChild->GetType() == FDE_XMLNODE_Element) {
1046               if (!XFA_RecognizeRichText(
1047                       static_cast<CFDE_XMLElement*>(pXMLDataChild))) {
1048                 eNodeType = XFA_Element::DataGroup;
1049                 break;
1050               }
1051             }
1052           }
1053         }
1054         if (eNodeType == XFA_Element::DataModel)
1055           eNodeType = XFA_Element::DataValue;
1056 
1057         CXFA_Node* pXFAChild =
1058             m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, eNodeType);
1059         if (!pXFAChild)
1060           return;
1061 
1062         CFX_WideString wsNodeName;
1063         pXMLElement->GetLocalTagName(wsNodeName);
1064         pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeName);
1065         bool bNeedValue = true;
1066         for (int32_t i = 0; i < pXMLElement->CountAttributes(); ++i) {
1067           CFX_WideString wsQualifiedName;
1068           CFX_WideString wsValue;
1069           CFX_WideString wsName;
1070           CFX_WideString wsNS;
1071           pXMLElement->GetAttribute(i, wsQualifiedName, wsValue);
1072           if (!ResolveAttribute(pXMLElement, wsQualifiedName.AsStringC(),
1073                                 wsName, wsNS)) {
1074             continue;
1075           }
1076           if (wsName == FX_WSTRC(L"nil") && wsValue == FX_WSTRC(L"true")) {
1077             bNeedValue = false;
1078             continue;
1079           }
1080           if (wsNS == FX_WSTRC(L"http://www.xfa.com/schema/xfa-package/") ||
1081               wsNS == FX_WSTRC(L"http://www.xfa.org/schema/xfa-package/") ||
1082               wsNS == FX_WSTRC(L"http://www.w3.org/2001/XMLSchema-instance") ||
1083               wsNS == FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/")) {
1084             continue;
1085           }
1086           CXFA_Node* pXFAMetaData = m_pFactory->CreateNode(
1087               XFA_XDPPACKET_Datasets, XFA_Element::DataValue);
1088           if (!pXFAMetaData)
1089             return;
1090 
1091           pXFAMetaData->SetCData(XFA_ATTRIBUTE_Name, wsName);
1092           pXFAMetaData->SetCData(XFA_ATTRIBUTE_QualifiedName, wsQualifiedName);
1093           pXFAMetaData->SetCData(XFA_ATTRIBUTE_Value, wsValue);
1094           pXFAMetaData->SetEnum(XFA_ATTRIBUTE_Contains,
1095                                 XFA_ATTRIBUTEENUM_MetaData);
1096           pXFAChild->InsertChild(pXFAMetaData);
1097           pXFAMetaData->SetXMLMappingNode(pXMLElement);
1098           pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false);
1099         }
1100 
1101         if (!bNeedValue) {
1102           CFX_WideString wsNilName(L"xsi:nil");
1103           pXMLElement->RemoveAttribute(wsNilName.c_str());
1104         }
1105         pXFANode->InsertChild(pXFAChild);
1106         if (eNodeType == XFA_Element::DataGroup)
1107           ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
1108         else if (bNeedValue)
1109           ParseDataValue(pXFAChild, pXMLChild, XFA_XDPPACKET_Datasets);
1110 
1111         pXFAChild->SetXMLMappingNode(pXMLElement);
1112         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1113         continue;
1114       }
1115       case FDE_XMLNODE_CharData: {
1116         CFDE_XMLCharData* pXMLCharData =
1117             static_cast<CFDE_XMLCharData*>(pXMLChild);
1118         CFX_WideString wsCharData;
1119         pXMLCharData->GetCharData(wsCharData);
1120         if (IsStringAllWhitespace(wsCharData))
1121           continue;
1122 
1123         CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
1124                                                       XFA_Element::DataValue);
1125         if (!pXFAChild)
1126           return;
1127 
1128         pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCharData);
1129         pXFANode->InsertChild(pXFAChild);
1130         pXFAChild->SetXMLMappingNode(pXMLCharData);
1131         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1132         continue;
1133       }
1134       case FDE_XMLNODE_Text: {
1135         CFDE_XMLText* pXMLText = static_cast<CFDE_XMLText*>(pXMLChild);
1136         CFX_WideString wsText;
1137         pXMLText->GetText(wsText);
1138         if (IsStringAllWhitespace(wsText))
1139           continue;
1140 
1141         CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
1142                                                       XFA_Element::DataValue);
1143         if (!pXFAChild)
1144           return;
1145 
1146         pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsText);
1147         pXFANode->InsertChild(pXFAChild);
1148         pXFAChild->SetXMLMappingNode(pXMLText);
1149         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1150         continue;
1151       }
1152       default:
1153         continue;
1154     }
1155   }
1156 }
1157 
ParseDataValue(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)1158 void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode,
1159                                        CFDE_XMLNode* pXMLNode,
1160                                        XFA_XDPPACKET ePacketID) {
1161   CFX_WideTextBuf wsValueTextBuf;
1162   CFX_WideTextBuf wsCurValueTextBuf;
1163   FX_BOOL bMarkAsCompound = FALSE;
1164   CFDE_XMLNode* pXMLCurValueNode = nullptr;
1165   for (CFDE_XMLNode* pXMLChild =
1166            pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
1167        pXMLChild;
1168        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
1169     FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
1170     if (eNodeType == FDE_XMLNODE_Instruction)
1171       continue;
1172 
1173     CFX_WideString wsText;
1174     if (eNodeType == FDE_XMLNODE_Text) {
1175       static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
1176       if (!pXMLCurValueNode)
1177         pXMLCurValueNode = pXMLChild;
1178 
1179       wsCurValueTextBuf << wsText;
1180     } else if (eNodeType == FDE_XMLNODE_CharData) {
1181       static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsText);
1182       if (!pXMLCurValueNode)
1183         pXMLCurValueNode = pXMLChild;
1184 
1185       wsCurValueTextBuf << wsText;
1186     } else if (XFA_RecognizeRichText(
1187                    static_cast<CFDE_XMLElement*>(pXMLChild))) {
1188       XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
1189                                    wsText);
1190       if (!pXMLCurValueNode)
1191         pXMLCurValueNode = pXMLChild;
1192 
1193       wsCurValueTextBuf << wsText;
1194     } else {
1195       bMarkAsCompound = TRUE;
1196       if (pXMLCurValueNode) {
1197         CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
1198         if (!wsCurValue.IsEmpty()) {
1199           CXFA_Node* pXFAChild =
1200               m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1201           if (!pXFAChild)
1202             return;
1203 
1204           pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
1205           pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
1206           pXFANode->InsertChild(pXFAChild);
1207           pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1208           pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1209           wsValueTextBuf << wsCurValue;
1210           wsCurValueTextBuf.Clear();
1211         }
1212         pXMLCurValueNode = nullptr;
1213       }
1214       CXFA_Node* pXFAChild =
1215           m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1216       if (!pXFAChild)
1217         return;
1218 
1219       CFX_WideString wsNodeStr;
1220       static_cast<CFDE_XMLElement*>(pXMLChild)->GetLocalTagName(wsNodeStr);
1221       pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
1222       ParseDataValue(pXFAChild, pXMLChild, ePacketID);
1223       pXFANode->InsertChild(pXFAChild);
1224       pXFAChild->SetXMLMappingNode(pXMLChild);
1225       pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1226       CFX_WideStringC wsCurValue = pXFAChild->GetCData(XFA_ATTRIBUTE_Value);
1227       wsValueTextBuf << wsCurValue;
1228     }
1229   }
1230   if (pXMLCurValueNode) {
1231     CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
1232     if (!wsCurValue.IsEmpty()) {
1233       if (bMarkAsCompound) {
1234         CXFA_Node* pXFAChild =
1235             m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1236         if (!pXFAChild)
1237           return;
1238 
1239         pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
1240         pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
1241         pXFANode->InsertChild(pXFAChild);
1242         pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1243         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1244       }
1245       wsValueTextBuf << wsCurValue;
1246       wsCurValueTextBuf.Clear();
1247     }
1248     pXMLCurValueNode = nullptr;
1249   }
1250   CFX_WideString wsNodeValue = wsValueTextBuf.MakeString();
1251   pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsNodeValue);
1252 }
1253 
ParseInstruction(CXFA_Node * pXFANode,CFDE_XMLInstruction * pXMLInstruction,XFA_XDPPACKET ePacketID)1254 void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode,
1255                                          CFDE_XMLInstruction* pXMLInstruction,
1256                                          XFA_XDPPACKET ePacketID) {
1257   if (!m_bDocumentParser)
1258     return;
1259 
1260   CFX_WideString wsTargetName;
1261   pXMLInstruction->GetTargetName(wsTargetName);
1262   if (wsTargetName == FX_WSTRC(L"originalXFAVersion")) {
1263     CFX_WideString wsData;
1264     if (pXMLInstruction->GetData(0, wsData) &&
1265         (pXFANode->GetDocument()->RecognizeXFAVersionNumber(wsData) !=
1266          XFA_VERSION_UNKNOWN)) {
1267       wsData.clear();
1268       if (pXMLInstruction->GetData(1, wsData) &&
1269           wsData == FX_WSTRC(L"v2.7-scripting:1")) {
1270         pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, TRUE);
1271       }
1272     }
1273   } else if (wsTargetName == FX_WSTRC(L"acrobat")) {
1274     CFX_WideString wsData;
1275     if (pXMLInstruction->GetData(0, wsData) &&
1276         wsData == FX_WSTRC(L"JavaScript")) {
1277       if (pXMLInstruction->GetData(1, wsData) &&
1278           wsData == FX_WSTRC(L"strictScoping")) {
1279         pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, TRUE);
1280       }
1281     }
1282   }
1283 }
1284 
CloseParser()1285 void CXFA_SimpleParser::CloseParser() {
1286   m_pXMLDoc.reset();
1287   m_pStream.reset();
1288 }
1289