1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "xfa/fxfa/parser/cxfa_simple_parser.h"
8
9 #include "xfa/fgas/crt/fgas_codepage.h"
10 #include "xfa/fxfa/include/fxfa.h"
11 #include "xfa/fxfa/include/xfa_checksum.h"
12 #include "xfa/fxfa/parser/cxfa_xml_parser.h"
13 #include "xfa/fxfa/parser/xfa_document.h"
14
15 namespace {
16
GetDocumentNode(CFDE_XMLDoc * pXMLDoc,FX_BOOL bVerifyWellFormness=FALSE)17 CFDE_XMLNode* GetDocumentNode(CFDE_XMLDoc* pXMLDoc,
18 FX_BOOL bVerifyWellFormness = FALSE) {
19 if (!pXMLDoc)
20 return nullptr;
21
22 for (CFDE_XMLNode* pXMLNode =
23 pXMLDoc->GetRoot()->GetNodeItem(CFDE_XMLNode::FirstChild);
24 pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
25 if (pXMLNode->GetType() != FDE_XMLNODE_Element)
26 continue;
27
28 if (!bVerifyWellFormness)
29 return pXMLNode;
30
31 for (CFDE_XMLNode* pNextNode =
32 pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling);
33 pNextNode;
34 pNextNode = pNextNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
35 if (pNextNode->GetType() == FDE_XMLNODE_Element)
36 return FALSE;
37 }
38 return pXMLNode;
39 }
40 return nullptr;
41 }
42
GetElementTagNamespaceURI(CFDE_XMLElement * pElement,CFX_WideString & wsNamespaceURI)43 void GetElementTagNamespaceURI(CFDE_XMLElement* pElement,
44 CFX_WideString& wsNamespaceURI) {
45 CFX_WideString wsNodeStr;
46 pElement->GetNamespacePrefix(wsNodeStr);
47 if (!XFA_FDEExtension_ResolveNamespaceQualifier(
48 pElement, wsNodeStr.AsStringC(), wsNamespaceURI)) {
49 wsNamespaceURI.clear();
50 }
51 }
52
MatchNodeName(CFDE_XMLNode * pNode,const CFX_WideStringC & wsLocalTagName,const CFX_WideStringC & wsNamespaceURIPrefix,uint32_t eMatchFlags=XFA_XDPPACKET_FLAGS_NOMATCH)53 FX_BOOL MatchNodeName(CFDE_XMLNode* pNode,
54 const CFX_WideStringC& wsLocalTagName,
55 const CFX_WideStringC& wsNamespaceURIPrefix,
56 uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) {
57 if (!pNode || pNode->GetType() != FDE_XMLNODE_Element)
58 return FALSE;
59
60 CFDE_XMLElement* pElement = reinterpret_cast<CFDE_XMLElement*>(pNode);
61 CFX_WideString wsNodeStr;
62 pElement->GetLocalTagName(wsNodeStr);
63 if (wsNodeStr != wsLocalTagName)
64 return FALSE;
65
66 GetElementTagNamespaceURI(pElement, wsNodeStr);
67 if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH)
68 return TRUE;
69 if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) {
70 return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) ==
71 wsNamespaceURIPrefix;
72 }
73 return wsNodeStr == wsNamespaceURIPrefix;
74 }
75
GetAttributeLocalName(const CFX_WideStringC & wsAttributeName,CFX_WideString & wsLocalAttrName)76 FX_BOOL GetAttributeLocalName(const CFX_WideStringC& wsAttributeName,
77 CFX_WideString& wsLocalAttrName) {
78 CFX_WideString wsAttrName(wsAttributeName);
79 FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
80 if (iFind < 0) {
81 wsLocalAttrName = wsAttrName;
82 return FALSE;
83 }
84 wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - iFind - 1);
85 return TRUE;
86 }
87
ResolveAttribute(CFDE_XMLElement * pElement,const CFX_WideStringC & wsAttributeName,CFX_WideString & wsLocalAttrName,CFX_WideString & wsNamespaceURI)88 FX_BOOL ResolveAttribute(CFDE_XMLElement* pElement,
89 const CFX_WideStringC& wsAttributeName,
90 CFX_WideString& wsLocalAttrName,
91 CFX_WideString& wsNamespaceURI) {
92 CFX_WideString wsAttrName(wsAttributeName);
93 CFX_WideString wsNSPrefix;
94 if (GetAttributeLocalName(wsAttributeName, wsLocalAttrName)) {
95 wsNSPrefix = wsAttrName.Left(wsAttributeName.GetLength() -
96 wsLocalAttrName.GetLength() - 1);
97 }
98 if (wsLocalAttrName == FX_WSTRC(L"xmlns") ||
99 wsNSPrefix == FX_WSTRC(L"xmlns") || wsNSPrefix == FX_WSTRC(L"xml")) {
100 return FALSE;
101 }
102 if (!XFA_FDEExtension_ResolveNamespaceQualifier(
103 pElement, wsNSPrefix.AsStringC(), wsNamespaceURI)) {
104 wsNamespaceURI.clear();
105 return FALSE;
106 }
107 return TRUE;
108 }
109
FindAttributeWithNS(CFDE_XMLElement * pElement,const CFX_WideStringC & wsLocalAttributeName,const CFX_WideStringC & wsNamespaceURIPrefix,CFX_WideString & wsValue,FX_BOOL bMatchNSAsPrefix=FALSE)110 FX_BOOL FindAttributeWithNS(CFDE_XMLElement* pElement,
111 const CFX_WideStringC& wsLocalAttributeName,
112 const CFX_WideStringC& wsNamespaceURIPrefix,
113 CFX_WideString& wsValue,
114 FX_BOOL bMatchNSAsPrefix = FALSE) {
115 if (!pElement)
116 return FALSE;
117
118 CFX_WideString wsAttrName;
119 CFX_WideString wsAttrValue;
120 CFX_WideString wsAttrNS;
121 for (int32_t iAttrCount = pElement->CountAttributes(), i = 0; i < iAttrCount;
122 i++) {
123 pElement->GetAttribute(i, wsAttrName, wsAttrValue);
124 FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
125 CFX_WideString wsNSPrefix;
126 if (iFind < 0) {
127 if (wsLocalAttributeName != wsAttrName)
128 continue;
129 } else {
130 if (wsLocalAttributeName !=
131 wsAttrName.Right(wsAttrName.GetLength() - iFind - 1)) {
132 continue;
133 }
134 wsNSPrefix = wsAttrName.Left(iFind);
135 }
136 if (!XFA_FDEExtension_ResolveNamespaceQualifier(
137 pElement, wsNSPrefix.AsStringC(), wsAttrNS)) {
138 continue;
139 }
140 if (bMatchNSAsPrefix) {
141 if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) !=
142 wsNamespaceURIPrefix) {
143 continue;
144 }
145 } else {
146 if (wsAttrNS != wsNamespaceURIPrefix)
147 continue;
148 }
149 wsValue = wsAttrValue;
150 return TRUE;
151 }
152 return FALSE;
153 }
154
GetDataSetsFromXDP(CFDE_XMLNode * pXMLDocumentNode)155 CFDE_XMLNode* GetDataSetsFromXDP(CFDE_XMLNode* pXMLDocumentNode) {
156 if (MatchNodeName(pXMLDocumentNode,
157 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
158 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
159 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
160 return pXMLDocumentNode;
161 }
162 if (!MatchNodeName(pXMLDocumentNode,
163 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
164 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
165 XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
166 return nullptr;
167 }
168 for (CFDE_XMLNode* pDatasetsNode =
169 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
170 pDatasetsNode;
171 pDatasetsNode = pDatasetsNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
172 if (!MatchNodeName(pDatasetsNode,
173 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
174 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
175 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
176 continue;
177 }
178 return pDatasetsNode;
179 }
180 return nullptr;
181 }
182
IsStringAllWhitespace(CFX_WideString wsText)183 FX_BOOL IsStringAllWhitespace(CFX_WideString wsText) {
184 wsText.TrimRight(L"\x20\x9\xD\xA");
185 return wsText.IsEmpty();
186 }
187
ConvertXMLToPlainText(CFDE_XMLElement * pRootXMLNode,CFX_WideString & wsOutput)188 void ConvertXMLToPlainText(CFDE_XMLElement* pRootXMLNode,
189 CFX_WideString& wsOutput) {
190 for (CFDE_XMLNode* pXMLChild =
191 pRootXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
192 pXMLChild;
193 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
194 switch (pXMLChild->GetType()) {
195 case FDE_XMLNODE_Element: {
196 CFX_WideString wsTextData;
197 static_cast<CFDE_XMLElement*>(pXMLChild)->GetTextData(wsTextData);
198 wsTextData += FX_WSTRC(L"\n");
199 wsOutput += wsTextData;
200 break;
201 }
202 case FDE_XMLNODE_Text: {
203 CFX_WideString wsText;
204 static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
205 if (IsStringAllWhitespace(wsText))
206 continue;
207
208 wsOutput = wsText;
209 break;
210 }
211 case FDE_XMLNODE_CharData: {
212 CFX_WideString wsCharData;
213 static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsCharData);
214 if (IsStringAllWhitespace(wsCharData))
215 continue;
216
217 wsOutput = wsCharData;
218 break;
219 }
220 default:
221 ASSERT(FALSE);
222 break;
223 }
224 }
225 }
226
227 } // namespace
228
XFA_RecognizeRichText(CFDE_XMLElement * pRichTextXMLNode)229 FX_BOOL XFA_RecognizeRichText(CFDE_XMLElement* pRichTextXMLNode) {
230 if (pRichTextXMLNode) {
231 CFX_WideString wsNamespaceURI;
232 GetElementTagNamespaceURI(pRichTextXMLNode, wsNamespaceURI);
233 if (wsNamespaceURI == FX_WSTRC(L"http://www.w3.org/1999/xhtml"))
234 return TRUE;
235 }
236 return FALSE;
237 }
238
CXFA_SimpleParser(CXFA_Document * pFactory,bool bDocumentParser)239 CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory,
240 bool bDocumentParser)
241 : m_pXMLParser(nullptr),
242 m_pXMLDoc(nullptr),
243 m_pStream(nullptr),
244 m_pFileRead(nullptr),
245 m_pFactory(pFactory),
246 m_pRootNode(nullptr),
247 m_ePacketID(XFA_XDPPACKET_UNKNOWN),
248 m_bDocumentParser(bDocumentParser) {}
249
~CXFA_SimpleParser()250 CXFA_SimpleParser::~CXFA_SimpleParser() {}
251
SetFactory(CXFA_Document * pFactory)252 void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) {
253 m_pFactory = pFactory;
254 }
255
StartParse(IFX_FileRead * pStream,XFA_XDPPACKET ePacketID)256 int32_t CXFA_SimpleParser::StartParse(IFX_FileRead* pStream,
257 XFA_XDPPACKET ePacketID) {
258 CloseParser();
259 m_pFileRead = pStream;
260 m_pStream.reset(IFX_Stream::CreateStream(
261 pStream, FX_STREAMACCESS_Read | FX_STREAMACCESS_Text));
262 if (!m_pStream)
263 return XFA_PARSESTATUS_StreamErr;
264
265 uint16_t wCodePage = m_pStream->GetCodePage();
266 if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
267 wCodePage != FX_CODEPAGE_UTF8) {
268 m_pStream->SetCodePage(FX_CODEPAGE_UTF8);
269 }
270 m_pXMLDoc.reset(new CFDE_XMLDoc);
271 m_pXMLParser = new CXFA_XMLParser(m_pXMLDoc->GetRoot(), m_pStream.get());
272 if (!m_pXMLDoc->LoadXML(m_pXMLParser))
273 return XFA_PARSESTATUS_StatusErr;
274
275 m_ePacketID = ePacketID;
276 return XFA_PARSESTATUS_Ready;
277 }
278
DoParse(IFX_Pause * pPause)279 int32_t CXFA_SimpleParser::DoParse(IFX_Pause* pPause) {
280 if (!m_pXMLDoc || m_ePacketID == XFA_XDPPACKET_UNKNOWN)
281 return XFA_PARSESTATUS_StatusErr;
282
283 int32_t iRet = m_pXMLDoc->DoLoad(pPause);
284 if (iRet < 0)
285 return XFA_PARSESTATUS_SyntaxErr;
286 if (iRet < 100)
287 return iRet / 2;
288
289 m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), m_ePacketID);
290 m_pXMLDoc->CloseXML();
291 m_pStream.reset();
292
293 if (!m_pRootNode)
294 return XFA_PARSESTATUS_StatusErr;
295 return XFA_PARSESTATUS_Done;
296 }
297
ParseXMLData(const CFX_WideString & wsXML,CFDE_XMLNode * & pXMLNode,IFX_Pause * pPause)298 int32_t CXFA_SimpleParser::ParseXMLData(const CFX_WideString& wsXML,
299 CFDE_XMLNode*& pXMLNode,
300 IFX_Pause* pPause) {
301 CloseParser();
302 pXMLNode = nullptr;
303
304 std::unique_ptr<IFX_Stream> pStream(XFA_CreateWideTextRead(wsXML));
305 if (!pStream)
306 return XFA_PARSESTATUS_StreamErr;
307
308 m_pXMLDoc.reset(new CFDE_XMLDoc);
309 CXFA_XMLParser* pParser =
310 new CXFA_XMLParser(m_pXMLDoc->GetRoot(), pStream.get());
311 pParser->m_dwCheckStatus = 0x03;
312 if (!m_pXMLDoc->LoadXML(pParser))
313 return XFA_PARSESTATUS_StatusErr;
314
315 int32_t iRet = m_pXMLDoc->DoLoad(pPause);
316 if (iRet < 0 || iRet >= 100)
317 m_pXMLDoc->CloseXML();
318 if (iRet < 0)
319 return XFA_PARSESTATUS_SyntaxErr;
320 if (iRet < 100)
321 return iRet / 2;
322
323 pXMLNode = GetDocumentNode(m_pXMLDoc.get());
324 return XFA_PARSESTATUS_Done;
325 }
326
ConstructXFANode(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode)327 void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode,
328 CFDE_XMLNode* pXMLNode) {
329 XFA_XDPPACKET ePacketID = (XFA_XDPPACKET)pXFANode->GetPacketID();
330 if (ePacketID == XFA_XDPPACKET_Datasets) {
331 if (pXFANode->GetElementType() == XFA_Element::DataValue) {
332 for (CFDE_XMLNode* pXMLChild =
333 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
334 pXMLChild;
335 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
336 FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
337 if (eNodeType == FDE_XMLNODE_Instruction)
338 continue;
339
340 if (eNodeType == FDE_XMLNODE_Element) {
341 CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
342 XFA_Element::DataValue);
343 if (!pXFAChild)
344 return;
345
346 CFX_WideString wsNodeStr;
347 CFDE_XMLElement* child = static_cast<CFDE_XMLElement*>(pXMLChild);
348 child->GetLocalTagName(wsNodeStr);
349 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
350 CFX_WideString wsChildValue;
351 XFA_GetPlainTextFromRichText(child, wsChildValue);
352 if (!wsChildValue.IsEmpty())
353 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsChildValue);
354
355 pXFANode->InsertChild(pXFAChild);
356 pXFAChild->SetXMLMappingNode(pXMLChild);
357 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
358 break;
359 }
360 }
361 m_pRootNode = pXFANode;
362 } else {
363 m_pRootNode = DataLoader(pXFANode, pXMLNode, TRUE);
364 }
365 } else if (pXFANode->IsContentNode()) {
366 ParseContentNode(pXFANode, pXMLNode, ePacketID);
367 m_pRootNode = pXFANode;
368 } else {
369 m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID);
370 }
371 }
372
GetRootNode() const373 CXFA_Node* CXFA_SimpleParser::GetRootNode() const {
374 return m_pRootNode;
375 }
376
GetXMLDoc() const377 CFDE_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const {
378 return m_pXMLDoc.get();
379 }
380
XFA_FDEExtension_ResolveNamespaceQualifier(CFDE_XMLElement * pNode,const CFX_WideStringC & wsQualifier,CFX_WideString & wsNamespaceURI)381 FX_BOOL XFA_FDEExtension_ResolveNamespaceQualifier(
382 CFDE_XMLElement* pNode,
383 const CFX_WideStringC& wsQualifier,
384 CFX_WideString& wsNamespaceURI) {
385 if (!pNode)
386 return FALSE;
387
388 CFDE_XMLNode* pFakeRoot = pNode->GetNodeItem(CFDE_XMLNode::Root);
389 CFX_WideString wsNSAttribute;
390 FX_BOOL bRet = FALSE;
391 if (wsQualifier.IsEmpty()) {
392 wsNSAttribute = FX_WSTRC(L"xmlns");
393 bRet = TRUE;
394 } else {
395 wsNSAttribute = FX_WSTRC(L"xmlns:") + wsQualifier;
396 }
397 for (; pNode != pFakeRoot; pNode = static_cast<CFDE_XMLElement*>(
398 pNode->GetNodeItem(CFDE_XMLNode::Parent))) {
399 if (pNode->GetType() != FDE_XMLNODE_Element)
400 continue;
401
402 if (pNode->HasAttribute(wsNSAttribute.c_str())) {
403 pNode->GetString(wsNSAttribute.c_str(), wsNamespaceURI);
404 return TRUE;
405 }
406 }
407 wsNamespaceURI.clear();
408 return bRet;
409 }
410
ParseAsXDPPacket(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)411 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFDE_XMLNode* pXMLDocumentNode,
412 XFA_XDPPACKET ePacketID) {
413 switch (ePacketID) {
414 case XFA_XDPPACKET_UNKNOWN:
415 return nullptr;
416 case XFA_XDPPACKET_XDP:
417 return ParseAsXDPPacket_XDP(pXMLDocumentNode, ePacketID);
418 case XFA_XDPPACKET_Config:
419 return ParseAsXDPPacket_Config(pXMLDocumentNode, ePacketID);
420 case XFA_XDPPACKET_Template:
421 case XFA_XDPPACKET_Form:
422 return ParseAsXDPPacket_TemplateForm(pXMLDocumentNode, ePacketID);
423 case XFA_XDPPACKET_Datasets:
424 return ParseAsXDPPacket_Data(pXMLDocumentNode, ePacketID);
425 case XFA_XDPPACKET_Xdc:
426 return ParseAsXDPPacket_Xdc(pXMLDocumentNode, ePacketID);
427 case XFA_XDPPACKET_LocaleSet:
428 case XFA_XDPPACKET_ConnectionSet:
429 case XFA_XDPPACKET_SourceSet:
430 return ParseAsXDPPacket_LocaleConnectionSourceSet(pXMLDocumentNode,
431 ePacketID);
432 default:
433 return ParseAsXDPPacket_User(pXMLDocumentNode, ePacketID);
434 }
435 }
436
ParseAsXDPPacket_XDP(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)437 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP(
438 CFDE_XMLNode* pXMLDocumentNode,
439 XFA_XDPPACKET ePacketID) {
440 if (!MatchNodeName(pXMLDocumentNode,
441 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
442 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
443 XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
444 return nullptr;
445 }
446 CXFA_Node* pXFARootNode =
447 m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Xfa);
448 if (!pXFARootNode)
449 return nullptr;
450
451 m_pRootNode = pXFARootNode;
452 pXFARootNode->SetCData(XFA_ATTRIBUTE_Name, L"xfa");
453 {
454 CFDE_XMLElement* pElement = static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
455 int32_t iAttributeCount = pElement->CountAttributes();
456 for (int32_t i = 0; i < iAttributeCount; i++) {
457 CFX_WideString wsAttriName, wsAttriValue;
458 pElement->GetAttribute(i, wsAttriName, wsAttriValue);
459 if (wsAttriName == FX_WSTRC(L"uuid"))
460 pXFARootNode->SetCData(XFA_ATTRIBUTE_Uuid, wsAttriValue);
461 else if (wsAttriName == FX_WSTRC(L"timeStamp"))
462 pXFARootNode->SetCData(XFA_ATTRIBUTE_TimeStamp, wsAttriValue);
463 }
464 }
465
466 CFDE_XMLNode* pXMLConfigDOMRoot = nullptr;
467 CXFA_Node* pXFAConfigDOMRoot = nullptr;
468 {
469 for (CFDE_XMLNode* pChildItem =
470 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
471 pChildItem;
472 pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
473 const XFA_PACKETINFO* pPacketInfo =
474 XFA_GetPacketByIndex(XFA_PACKET_Config);
475 if (!MatchNodeName(pChildItem, pPacketInfo->pName, pPacketInfo->pURI,
476 pPacketInfo->eFlags)) {
477 continue;
478 }
479 if (pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
480 return nullptr;
481 }
482 pXMLConfigDOMRoot = pChildItem;
483 pXFAConfigDOMRoot =
484 ParseAsXDPPacket_Config(pXMLConfigDOMRoot, XFA_XDPPACKET_Config);
485 pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr);
486 }
487 }
488
489 CFDE_XMLNode* pXMLDatasetsDOMRoot = nullptr;
490 CFDE_XMLNode* pXMLFormDOMRoot = nullptr;
491 CFDE_XMLNode* pXMLTemplateDOMRoot = nullptr;
492 {
493 for (CFDE_XMLNode* pChildItem =
494 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
495 pChildItem;
496 pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
497 if (!pChildItem || pChildItem->GetType() != FDE_XMLNODE_Element)
498 continue;
499 if (pChildItem == pXMLConfigDOMRoot)
500 continue;
501
502 CFDE_XMLElement* pElement =
503 reinterpret_cast<CFDE_XMLElement*>(pChildItem);
504 CFX_WideString wsPacketName;
505 pElement->GetLocalTagName(wsPacketName);
506 const XFA_PACKETINFO* pPacketInfo =
507 XFA_GetPacketByName(wsPacketName.AsStringC());
508 if (pPacketInfo && pPacketInfo->pURI) {
509 if (!MatchNodeName(pElement, pPacketInfo->pName, pPacketInfo->pURI,
510 pPacketInfo->eFlags)) {
511 pPacketInfo = nullptr;
512 }
513 }
514 XFA_XDPPACKET ePacket =
515 pPacketInfo ? pPacketInfo->eName : XFA_XDPPACKET_USER;
516 if (ePacket == XFA_XDPPACKET_XDP)
517 continue;
518 if (ePacket == XFA_XDPPACKET_Datasets) {
519 if (pXMLDatasetsDOMRoot)
520 return nullptr;
521
522 pXMLDatasetsDOMRoot = pElement;
523 } else if (ePacket == XFA_XDPPACKET_Form) {
524 if (pXMLFormDOMRoot)
525 return nullptr;
526
527 pXMLFormDOMRoot = pElement;
528 } else if (ePacket == XFA_XDPPACKET_Template) {
529 if (pXMLTemplateDOMRoot) {
530 // Found a duplicate template packet.
531 return nullptr;
532 }
533 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
534 if (pPacketNode) {
535 pXMLTemplateDOMRoot = pElement;
536 pXFARootNode->InsertChild(pPacketNode);
537 }
538 } else {
539 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
540 if (pPacketNode) {
541 if (pPacketInfo &&
542 (pPacketInfo->eFlags & XFA_XDPPACKET_FLAGS_SUPPORTONE) &&
543 pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
544 return nullptr;
545 }
546 pXFARootNode->InsertChild(pPacketNode);
547 }
548 }
549 }
550 }
551
552 if (!pXMLTemplateDOMRoot) {
553 // No template is found.
554 return nullptr;
555 }
556 if (pXMLDatasetsDOMRoot) {
557 CXFA_Node* pPacketNode =
558 ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_XDPPACKET_Datasets);
559 if (pPacketNode)
560 pXFARootNode->InsertChild(pPacketNode);
561 }
562 if (pXMLFormDOMRoot) {
563 CXFA_Node* pPacketNode =
564 ParseAsXDPPacket(pXMLFormDOMRoot, XFA_XDPPACKET_Form);
565 if (pPacketNode)
566 pXFARootNode->InsertChild(pPacketNode);
567 }
568 pXFARootNode->SetXMLMappingNode(pXMLDocumentNode);
569 return pXFARootNode;
570 }
571
ParseAsXDPPacket_Config(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)572 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config(
573 CFDE_XMLNode* pXMLDocumentNode,
574 XFA_XDPPACKET ePacketID) {
575 if (!MatchNodeName(pXMLDocumentNode,
576 XFA_GetPacketByIndex(XFA_PACKET_Config)->pName,
577 XFA_GetPacketByIndex(XFA_PACKET_Config)->pURI,
578 XFA_GetPacketByIndex(XFA_PACKET_Config)->eFlags)) {
579 return nullptr;
580 }
581 CXFA_Node* pNode =
582 m_pFactory->CreateNode(XFA_XDPPACKET_Config, XFA_Element::Config);
583 if (!pNode)
584 return nullptr;
585
586 pNode->SetCData(XFA_ATTRIBUTE_Name,
587 XFA_GetPacketByIndex(XFA_PACKET_Config)->pName);
588 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID))
589 return nullptr;
590
591 pNode->SetXMLMappingNode(pXMLDocumentNode);
592 return pNode;
593 }
594
ParseAsXDPPacket_TemplateForm(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)595 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_TemplateForm(
596 CFDE_XMLNode* pXMLDocumentNode,
597 XFA_XDPPACKET ePacketID) {
598 CXFA_Node* pNode = nullptr;
599 if (ePacketID == XFA_XDPPACKET_Template) {
600 if (MatchNodeName(pXMLDocumentNode,
601 XFA_GetPacketByIndex(XFA_PACKET_Template)->pName,
602 XFA_GetPacketByIndex(XFA_PACKET_Template)->pURI,
603 XFA_GetPacketByIndex(XFA_PACKET_Template)->eFlags)) {
604 pNode =
605 m_pFactory->CreateNode(XFA_XDPPACKET_Template, XFA_Element::Template);
606 if (!pNode)
607 return nullptr;
608
609 pNode->SetCData(XFA_ATTRIBUTE_Name,
610 XFA_GetPacketByIndex(XFA_PACKET_Template)->pName);
611 if (m_bDocumentParser) {
612 CFX_WideString wsNamespaceURI;
613 CFDE_XMLElement* pXMLDocumentElement =
614 static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
615 pXMLDocumentElement->GetNamespaceURI(wsNamespaceURI);
616 if (wsNamespaceURI.IsEmpty())
617 pXMLDocumentElement->GetString(L"xmlns:xfa", wsNamespaceURI);
618
619 pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
620 }
621 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID))
622 return nullptr;
623 }
624 } else if (ePacketID == XFA_XDPPACKET_Form) {
625 if (MatchNodeName(pXMLDocumentNode,
626 XFA_GetPacketByIndex(XFA_PACKET_Form)->pName,
627 XFA_GetPacketByIndex(XFA_PACKET_Form)->pURI,
628 XFA_GetPacketByIndex(XFA_PACKET_Form)->eFlags)) {
629 CFDE_XMLElement* pXMLDocumentElement =
630 static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
631 CFX_WideString wsChecksum;
632 pXMLDocumentElement->GetString(L"checksum", wsChecksum);
633 if (wsChecksum.GetLength() != 28 ||
634 m_pXMLParser->m_dwCheckStatus != 0x03) {
635 return nullptr;
636 }
637 std::unique_ptr<CXFA_ChecksumContext> pChecksum(new CXFA_ChecksumContext);
638 pChecksum->StartChecksum();
639 pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0],
640 m_pXMLParser->m_nSize[0]);
641 pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1],
642 m_pXMLParser->m_nSize[1]);
643 pChecksum->FinishChecksum();
644 CFX_ByteString bsCheck = pChecksum->GetChecksum();
645 if (bsCheck != wsChecksum.UTF8Encode())
646 return nullptr;
647
648 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Form, XFA_Element::Form);
649 if (!pNode)
650 return nullptr;
651
652 pNode->SetCData(XFA_ATTRIBUTE_Name,
653 XFA_GetPacketByIndex(XFA_PACKET_Form)->pName);
654 pNode->SetAttribute(XFA_ATTRIBUTE_Checksum, wsChecksum.AsStringC());
655 CXFA_Node* pTemplateRoot =
656 m_pRootNode->GetFirstChildByClass(XFA_Element::Template);
657 CXFA_Node* pTemplateChosen =
658 pTemplateRoot
659 ? pTemplateRoot->GetFirstChildByClass(XFA_Element::Subform)
660 : nullptr;
661 FX_BOOL bUseAttribute = TRUE;
662 if (pTemplateChosen &&
663 pTemplateChosen->GetEnum(XFA_ATTRIBUTE_RestoreState) !=
664 XFA_ATTRIBUTEENUM_Auto) {
665 bUseAttribute = FALSE;
666 }
667 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, bUseAttribute))
668 return nullptr;
669 }
670 }
671 if (pNode)
672 pNode->SetXMLMappingNode(pXMLDocumentNode);
673
674 return pNode;
675 }
676
ParseAsXDPPacket_Data(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)677 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data(
678 CFDE_XMLNode* pXMLDocumentNode,
679 XFA_XDPPACKET ePacketID) {
680 CFDE_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
681 if (pDatasetsXMLNode) {
682 CXFA_Node* pNode =
683 m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataModel);
684 if (!pNode)
685 return nullptr;
686
687 pNode->SetCData(XFA_ATTRIBUTE_Name,
688 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName);
689 if (!DataLoader(pNode, pDatasetsXMLNode, FALSE))
690 return nullptr;
691
692 pNode->SetXMLMappingNode(pDatasetsXMLNode);
693 return pNode;
694 }
695
696 CFDE_XMLNode* pDataXMLNode = nullptr;
697 if (MatchNodeName(pXMLDocumentNode, FX_WSTRC(L"data"),
698 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
699 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
700 static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
701 ->RemoveAttribute(L"xmlns:xfa");
702 pDataXMLNode = pXMLDocumentNode;
703 } else {
704 CFDE_XMLElement* pDataElement = new CFDE_XMLElement(L"xfa:data");
705 CFDE_XMLNode* pParentXMLNode =
706 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::Parent);
707 if (pParentXMLNode)
708 pParentXMLNode->RemoveChildNode(pXMLDocumentNode);
709
710 ASSERT(pXMLDocumentNode->GetType() == FDE_XMLNODE_Element);
711 if (pXMLDocumentNode->GetType() == FDE_XMLNODE_Element) {
712 static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
713 ->RemoveAttribute(L"xmlns:xfa");
714 }
715 pDataElement->InsertChildNode(pXMLDocumentNode);
716 pDataXMLNode = pDataElement;
717 }
718
719 if (pDataXMLNode) {
720 CXFA_Node* pNode =
721 m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataGroup);
722 if (!pNode) {
723 if (pDataXMLNode != pXMLDocumentNode)
724 pDataXMLNode->Release();
725 return nullptr;
726 }
727 CFX_WideString wsLocalName;
728 static_cast<CFDE_XMLElement*>(pDataXMLNode)->GetLocalTagName(wsLocalName);
729 pNode->SetCData(XFA_ATTRIBUTE_Name, wsLocalName);
730 if (!DataLoader(pNode, pDataXMLNode, TRUE))
731 return nullptr;
732
733 pNode->SetXMLMappingNode(pDataXMLNode);
734 if (pDataXMLNode != pXMLDocumentNode)
735 pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false);
736 return pNode;
737 }
738 return nullptr;
739 }
740
ParseAsXDPPacket_LocaleConnectionSourceSet(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)741 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet(
742 CFDE_XMLNode* pXMLDocumentNode,
743 XFA_XDPPACKET ePacketID) {
744 CXFA_Node* pNode = nullptr;
745 if (ePacketID == XFA_XDPPACKET_LocaleSet) {
746 if (MatchNodeName(pXMLDocumentNode,
747 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName,
748 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pURI,
749 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->eFlags)) {
750 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_LocaleSet,
751 XFA_Element::LocaleSet);
752 if (!pNode)
753 return nullptr;
754
755 pNode->SetCData(XFA_ATTRIBUTE_Name,
756 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName);
757 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID))
758 return nullptr;
759 }
760 } else if (ePacketID == XFA_XDPPACKET_ConnectionSet) {
761 if (MatchNodeName(pXMLDocumentNode,
762 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName,
763 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pURI,
764 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->eFlags)) {
765 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_ConnectionSet,
766 XFA_Element::ConnectionSet);
767 if (!pNode)
768 return nullptr;
769
770 pNode->SetCData(XFA_ATTRIBUTE_Name,
771 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName);
772 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID))
773 return nullptr;
774 }
775 } else if (ePacketID == XFA_XDPPACKET_SourceSet) {
776 if (MatchNodeName(pXMLDocumentNode,
777 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName,
778 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pURI,
779 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->eFlags)) {
780 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_SourceSet,
781 XFA_Element::SourceSet);
782 if (!pNode)
783 return nullptr;
784
785 pNode->SetCData(XFA_ATTRIBUTE_Name,
786 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName);
787 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID))
788 return nullptr;
789 }
790 }
791 if (pNode)
792 pNode->SetXMLMappingNode(pXMLDocumentNode);
793 return pNode;
794 }
795
ParseAsXDPPacket_Xdc(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)796 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc(
797 CFDE_XMLNode* pXMLDocumentNode,
798 XFA_XDPPACKET ePacketID) {
799 if (!MatchNodeName(pXMLDocumentNode,
800 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName,
801 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pURI,
802 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->eFlags))
803 return nullptr;
804
805 CXFA_Node* pNode =
806 m_pFactory->CreateNode(XFA_XDPPACKET_Xdc, XFA_Element::Xdc);
807 if (!pNode)
808 return nullptr;
809
810 pNode->SetCData(XFA_ATTRIBUTE_Name,
811 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName);
812 pNode->SetXMLMappingNode(pXMLDocumentNode);
813 return pNode;
814 }
815
ParseAsXDPPacket_User(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)816 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User(
817 CFDE_XMLNode* pXMLDocumentNode,
818 XFA_XDPPACKET ePacketID) {
819 CXFA_Node* pNode =
820 m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Packet);
821 if (!pNode)
822 return nullptr;
823
824 CFX_WideString wsName;
825 static_cast<CFDE_XMLElement*>(pXMLDocumentNode)->GetLocalTagName(wsName);
826 pNode->SetCData(XFA_ATTRIBUTE_Name, wsName);
827 if (!UserPacketLoader(pNode, pXMLDocumentNode))
828 return nullptr;
829
830 pNode->SetXMLMappingNode(pXMLDocumentNode);
831 return pNode;
832 }
833
UserPacketLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc)834 CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode,
835 CFDE_XMLNode* pXMLDoc) {
836 return pXFANode;
837 }
838
DataLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc,FX_BOOL bDoTransform)839 CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode,
840 CFDE_XMLNode* pXMLDoc,
841 FX_BOOL bDoTransform) {
842 ParseDataGroup(pXFANode, pXMLDoc, XFA_XDPPACKET_Datasets);
843 return pXFANode;
844 }
845
NormalLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc,XFA_XDPPACKET ePacketID,FX_BOOL bUseAttribute)846 CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode,
847 CFDE_XMLNode* pXMLDoc,
848 XFA_XDPPACKET ePacketID,
849 FX_BOOL bUseAttribute) {
850 FX_BOOL bOneOfPropertyFound = FALSE;
851 for (CFDE_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFDE_XMLNode::FirstChild);
852 pXMLChild;
853 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
854 switch (pXMLChild->GetType()) {
855 case FDE_XMLNODE_Element: {
856 CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
857 CFX_WideString wsTagName;
858 pXMLElement->GetLocalTagName(wsTagName);
859 XFA_Element eType = XFA_GetElementTypeForName(wsTagName.AsStringC());
860 if (eType == XFA_Element::Unknown)
861 continue;
862
863 const XFA_PROPERTY* pPropertyInfo = XFA_GetPropertyOfElement(
864 pXFANode->GetElementType(), eType, ePacketID);
865 if (pPropertyInfo &&
866 ((pPropertyInfo->uFlags &
867 (XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) != 0)) {
868 if (bOneOfPropertyFound)
869 break;
870
871 bOneOfPropertyFound = TRUE;
872 }
873 CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType);
874 if (!pXFAChild)
875 return nullptr;
876 if (ePacketID == XFA_XDPPACKET_Config)
877 pXFAChild->SetAttribute(XFA_ATTRIBUTE_Name, wsTagName.AsStringC());
878
879 FX_BOOL IsNeedValue = TRUE;
880 for (int32_t i = 0, count = pXMLElement->CountAttributes(); i < count;
881 i++) {
882 CFX_WideString wsAttrQualifiedName;
883 CFX_WideString wsAttrName;
884 CFX_WideString wsAttrValue;
885 pXMLElement->GetAttribute(i, wsAttrQualifiedName, wsAttrValue);
886 GetAttributeLocalName(wsAttrQualifiedName.AsStringC(), wsAttrName);
887 if (wsAttrName == FX_WSTRC(L"nil") &&
888 wsAttrValue == FX_WSTRC(L"true")) {
889 IsNeedValue = FALSE;
890 }
891 const XFA_ATTRIBUTEINFO* lpAttrInfo =
892 XFA_GetAttributeByName(wsAttrName.AsStringC());
893 if (!lpAttrInfo)
894 continue;
895
896 if (!bUseAttribute && lpAttrInfo->eName != XFA_ATTRIBUTE_Name &&
897 lpAttrInfo->eName != XFA_ATTRIBUTE_Save) {
898 continue;
899 }
900 pXFAChild->SetAttribute(lpAttrInfo->eName, wsAttrValue.AsStringC());
901 }
902 pXFANode->InsertChild(pXFAChild);
903 if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) {
904 if (ePacketID == XFA_XDPPACKET_Config)
905 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
906 else
907 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
908
909 break;
910 }
911 switch (pXFAChild->GetObjectType()) {
912 case XFA_ObjectType::ContentNode:
913 case XFA_ObjectType::TextNode:
914 case XFA_ObjectType::NodeC:
915 case XFA_ObjectType::NodeV:
916 if (IsNeedValue)
917 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
918 break;
919 default:
920 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
921 break;
922 }
923 } break;
924 case FDE_XMLNODE_Instruction:
925 ParseInstruction(pXFANode, static_cast<CFDE_XMLInstruction*>(pXMLChild),
926 ePacketID);
927 break;
928 default:
929 break;
930 }
931 }
932 return pXFANode;
933 }
934
ParseContentNode(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)935 void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode,
936 CFDE_XMLNode* pXMLNode,
937 XFA_XDPPACKET ePacketID) {
938 XFA_Element element = XFA_Element::Sharptext;
939 if (pXFANode->GetElementType() == XFA_Element::ExData) {
940 CFX_WideStringC wsContentType =
941 pXFANode->GetCData(XFA_ATTRIBUTE_ContentType);
942 if (wsContentType == FX_WSTRC(L"text/html"))
943 element = XFA_Element::SharpxHTML;
944 else if (wsContentType == FX_WSTRC(L"text/xml"))
945 element = XFA_Element::Sharpxml;
946 }
947 if (element == XFA_Element::SharpxHTML)
948 pXFANode->SetXMLMappingNode(pXMLNode);
949
950 CFX_WideString wsValue;
951 for (CFDE_XMLNode* pXMLChild =
952 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
953 pXMLChild;
954 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
955 FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
956 if (eNodeType == FDE_XMLNODE_Instruction)
957 continue;
958
959 if (element == XFA_Element::SharpxHTML) {
960 if (eNodeType != FDE_XMLNODE_Element)
961 break;
962
963 if (XFA_RecognizeRichText(static_cast<CFDE_XMLElement*>(pXMLChild)))
964 XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
965 wsValue);
966 } else if (element == XFA_Element::Sharpxml) {
967 if (eNodeType != FDE_XMLNODE_Element)
968 break;
969
970 ConvertXMLToPlainText(static_cast<CFDE_XMLElement*>(pXMLChild), wsValue);
971 } else {
972 if (eNodeType == FDE_XMLNODE_Element)
973 break;
974 if (eNodeType == FDE_XMLNODE_Text)
975 static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsValue);
976 else if (eNodeType == FDE_XMLNODE_CharData)
977 static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsValue);
978 }
979 break;
980 }
981 if (!wsValue.IsEmpty()) {
982 if (pXFANode->IsContentNode()) {
983 CXFA_Node* pContentRawDataNode =
984 m_pFactory->CreateNode(ePacketID, element);
985 ASSERT(pContentRawDataNode);
986 pContentRawDataNode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
987 pXFANode->InsertChild(pContentRawDataNode);
988 } else {
989 pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
990 }
991 }
992 }
993
ParseDataGroup(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)994 void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode,
995 CFDE_XMLNode* pXMLNode,
996 XFA_XDPPACKET ePacketID) {
997 for (CFDE_XMLNode* pXMLChild =
998 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
999 pXMLChild;
1000 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
1001 switch (pXMLChild->GetType()) {
1002 case FDE_XMLNODE_Element: {
1003 CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
1004 {
1005 CFX_WideString wsNamespaceURI;
1006 GetElementTagNamespaceURI(pXMLElement, wsNamespaceURI);
1007 if (wsNamespaceURI ==
1008 FX_WSTRC(L"http://www.xfa.com/schema/xfa-package/") ||
1009 wsNamespaceURI ==
1010 FX_WSTRC(L"http://www.xfa.org/schema/xfa-package/") ||
1011 wsNamespaceURI ==
1012 FX_WSTRC(L"http://www.w3.org/2001/XMLSchema-instance")) {
1013 continue;
1014 }
1015 }
1016
1017 XFA_Element eNodeType = XFA_Element::DataModel;
1018 if (eNodeType == XFA_Element::DataModel) {
1019 CFX_WideString wsDataNodeAttr;
1020 if (FindAttributeWithNS(
1021 pXMLElement, FX_WSTRC(L"dataNode"),
1022 FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/"),
1023 wsDataNodeAttr)) {
1024 if (wsDataNodeAttr == FX_WSTRC(L"dataGroup"))
1025 eNodeType = XFA_Element::DataGroup;
1026 else if (wsDataNodeAttr == FX_WSTRC(L"dataValue"))
1027 eNodeType = XFA_Element::DataValue;
1028 }
1029 }
1030 CFX_WideString wsContentType;
1031 if (eNodeType == XFA_Element::DataModel) {
1032 if (FindAttributeWithNS(
1033 pXMLElement, FX_WSTRC(L"contentType"),
1034 FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/"),
1035 wsContentType)) {
1036 if (!wsContentType.IsEmpty())
1037 eNodeType = XFA_Element::DataValue;
1038 }
1039 }
1040 if (eNodeType == XFA_Element::DataModel) {
1041 for (CFDE_XMLNode* pXMLDataChild =
1042 pXMLElement->GetNodeItem(CFDE_XMLNode::FirstChild);
1043 pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem(
1044 CFDE_XMLNode::NextSibling)) {
1045 if (pXMLDataChild->GetType() == FDE_XMLNODE_Element) {
1046 if (!XFA_RecognizeRichText(
1047 static_cast<CFDE_XMLElement*>(pXMLDataChild))) {
1048 eNodeType = XFA_Element::DataGroup;
1049 break;
1050 }
1051 }
1052 }
1053 }
1054 if (eNodeType == XFA_Element::DataModel)
1055 eNodeType = XFA_Element::DataValue;
1056
1057 CXFA_Node* pXFAChild =
1058 m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, eNodeType);
1059 if (!pXFAChild)
1060 return;
1061
1062 CFX_WideString wsNodeName;
1063 pXMLElement->GetLocalTagName(wsNodeName);
1064 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeName);
1065 bool bNeedValue = true;
1066 for (int32_t i = 0; i < pXMLElement->CountAttributes(); ++i) {
1067 CFX_WideString wsQualifiedName;
1068 CFX_WideString wsValue;
1069 CFX_WideString wsName;
1070 CFX_WideString wsNS;
1071 pXMLElement->GetAttribute(i, wsQualifiedName, wsValue);
1072 if (!ResolveAttribute(pXMLElement, wsQualifiedName.AsStringC(),
1073 wsName, wsNS)) {
1074 continue;
1075 }
1076 if (wsName == FX_WSTRC(L"nil") && wsValue == FX_WSTRC(L"true")) {
1077 bNeedValue = false;
1078 continue;
1079 }
1080 if (wsNS == FX_WSTRC(L"http://www.xfa.com/schema/xfa-package/") ||
1081 wsNS == FX_WSTRC(L"http://www.xfa.org/schema/xfa-package/") ||
1082 wsNS == FX_WSTRC(L"http://www.w3.org/2001/XMLSchema-instance") ||
1083 wsNS == FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/")) {
1084 continue;
1085 }
1086 CXFA_Node* pXFAMetaData = m_pFactory->CreateNode(
1087 XFA_XDPPACKET_Datasets, XFA_Element::DataValue);
1088 if (!pXFAMetaData)
1089 return;
1090
1091 pXFAMetaData->SetCData(XFA_ATTRIBUTE_Name, wsName);
1092 pXFAMetaData->SetCData(XFA_ATTRIBUTE_QualifiedName, wsQualifiedName);
1093 pXFAMetaData->SetCData(XFA_ATTRIBUTE_Value, wsValue);
1094 pXFAMetaData->SetEnum(XFA_ATTRIBUTE_Contains,
1095 XFA_ATTRIBUTEENUM_MetaData);
1096 pXFAChild->InsertChild(pXFAMetaData);
1097 pXFAMetaData->SetXMLMappingNode(pXMLElement);
1098 pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false);
1099 }
1100
1101 if (!bNeedValue) {
1102 CFX_WideString wsNilName(L"xsi:nil");
1103 pXMLElement->RemoveAttribute(wsNilName.c_str());
1104 }
1105 pXFANode->InsertChild(pXFAChild);
1106 if (eNodeType == XFA_Element::DataGroup)
1107 ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
1108 else if (bNeedValue)
1109 ParseDataValue(pXFAChild, pXMLChild, XFA_XDPPACKET_Datasets);
1110
1111 pXFAChild->SetXMLMappingNode(pXMLElement);
1112 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1113 continue;
1114 }
1115 case FDE_XMLNODE_CharData: {
1116 CFDE_XMLCharData* pXMLCharData =
1117 static_cast<CFDE_XMLCharData*>(pXMLChild);
1118 CFX_WideString wsCharData;
1119 pXMLCharData->GetCharData(wsCharData);
1120 if (IsStringAllWhitespace(wsCharData))
1121 continue;
1122
1123 CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
1124 XFA_Element::DataValue);
1125 if (!pXFAChild)
1126 return;
1127
1128 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCharData);
1129 pXFANode->InsertChild(pXFAChild);
1130 pXFAChild->SetXMLMappingNode(pXMLCharData);
1131 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1132 continue;
1133 }
1134 case FDE_XMLNODE_Text: {
1135 CFDE_XMLText* pXMLText = static_cast<CFDE_XMLText*>(pXMLChild);
1136 CFX_WideString wsText;
1137 pXMLText->GetText(wsText);
1138 if (IsStringAllWhitespace(wsText))
1139 continue;
1140
1141 CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
1142 XFA_Element::DataValue);
1143 if (!pXFAChild)
1144 return;
1145
1146 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsText);
1147 pXFANode->InsertChild(pXFAChild);
1148 pXFAChild->SetXMLMappingNode(pXMLText);
1149 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1150 continue;
1151 }
1152 default:
1153 continue;
1154 }
1155 }
1156 }
1157
ParseDataValue(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)1158 void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode,
1159 CFDE_XMLNode* pXMLNode,
1160 XFA_XDPPACKET ePacketID) {
1161 CFX_WideTextBuf wsValueTextBuf;
1162 CFX_WideTextBuf wsCurValueTextBuf;
1163 FX_BOOL bMarkAsCompound = FALSE;
1164 CFDE_XMLNode* pXMLCurValueNode = nullptr;
1165 for (CFDE_XMLNode* pXMLChild =
1166 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
1167 pXMLChild;
1168 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
1169 FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
1170 if (eNodeType == FDE_XMLNODE_Instruction)
1171 continue;
1172
1173 CFX_WideString wsText;
1174 if (eNodeType == FDE_XMLNODE_Text) {
1175 static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
1176 if (!pXMLCurValueNode)
1177 pXMLCurValueNode = pXMLChild;
1178
1179 wsCurValueTextBuf << wsText;
1180 } else if (eNodeType == FDE_XMLNODE_CharData) {
1181 static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsText);
1182 if (!pXMLCurValueNode)
1183 pXMLCurValueNode = pXMLChild;
1184
1185 wsCurValueTextBuf << wsText;
1186 } else if (XFA_RecognizeRichText(
1187 static_cast<CFDE_XMLElement*>(pXMLChild))) {
1188 XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
1189 wsText);
1190 if (!pXMLCurValueNode)
1191 pXMLCurValueNode = pXMLChild;
1192
1193 wsCurValueTextBuf << wsText;
1194 } else {
1195 bMarkAsCompound = TRUE;
1196 if (pXMLCurValueNode) {
1197 CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
1198 if (!wsCurValue.IsEmpty()) {
1199 CXFA_Node* pXFAChild =
1200 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1201 if (!pXFAChild)
1202 return;
1203
1204 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
1205 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
1206 pXFANode->InsertChild(pXFAChild);
1207 pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1208 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1209 wsValueTextBuf << wsCurValue;
1210 wsCurValueTextBuf.Clear();
1211 }
1212 pXMLCurValueNode = nullptr;
1213 }
1214 CXFA_Node* pXFAChild =
1215 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1216 if (!pXFAChild)
1217 return;
1218
1219 CFX_WideString wsNodeStr;
1220 static_cast<CFDE_XMLElement*>(pXMLChild)->GetLocalTagName(wsNodeStr);
1221 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
1222 ParseDataValue(pXFAChild, pXMLChild, ePacketID);
1223 pXFANode->InsertChild(pXFAChild);
1224 pXFAChild->SetXMLMappingNode(pXMLChild);
1225 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1226 CFX_WideStringC wsCurValue = pXFAChild->GetCData(XFA_ATTRIBUTE_Value);
1227 wsValueTextBuf << wsCurValue;
1228 }
1229 }
1230 if (pXMLCurValueNode) {
1231 CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
1232 if (!wsCurValue.IsEmpty()) {
1233 if (bMarkAsCompound) {
1234 CXFA_Node* pXFAChild =
1235 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1236 if (!pXFAChild)
1237 return;
1238
1239 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
1240 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
1241 pXFANode->InsertChild(pXFAChild);
1242 pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1243 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1244 }
1245 wsValueTextBuf << wsCurValue;
1246 wsCurValueTextBuf.Clear();
1247 }
1248 pXMLCurValueNode = nullptr;
1249 }
1250 CFX_WideString wsNodeValue = wsValueTextBuf.MakeString();
1251 pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsNodeValue);
1252 }
1253
ParseInstruction(CXFA_Node * pXFANode,CFDE_XMLInstruction * pXMLInstruction,XFA_XDPPACKET ePacketID)1254 void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode,
1255 CFDE_XMLInstruction* pXMLInstruction,
1256 XFA_XDPPACKET ePacketID) {
1257 if (!m_bDocumentParser)
1258 return;
1259
1260 CFX_WideString wsTargetName;
1261 pXMLInstruction->GetTargetName(wsTargetName);
1262 if (wsTargetName == FX_WSTRC(L"originalXFAVersion")) {
1263 CFX_WideString wsData;
1264 if (pXMLInstruction->GetData(0, wsData) &&
1265 (pXFANode->GetDocument()->RecognizeXFAVersionNumber(wsData) !=
1266 XFA_VERSION_UNKNOWN)) {
1267 wsData.clear();
1268 if (pXMLInstruction->GetData(1, wsData) &&
1269 wsData == FX_WSTRC(L"v2.7-scripting:1")) {
1270 pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, TRUE);
1271 }
1272 }
1273 } else if (wsTargetName == FX_WSTRC(L"acrobat")) {
1274 CFX_WideString wsData;
1275 if (pXMLInstruction->GetData(0, wsData) &&
1276 wsData == FX_WSTRC(L"JavaScript")) {
1277 if (pXMLInstruction->GetData(1, wsData) &&
1278 wsData == FX_WSTRC(L"strictScoping")) {
1279 pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, TRUE);
1280 }
1281 }
1282 }
1283 }
1284
CloseParser()1285 void CXFA_SimpleParser::CloseParser() {
1286 m_pXMLDoc.reset();
1287 m_pStream.reset();
1288 }
1289