1 /**
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 /*
21  * XSEC
22  *
23  * TXFMXPath := Class that performs XPath transforms
24  *
25  * $Id: TXFMXPath.cpp 1894293 2021-10-15 14:14:50Z scantor $
26  *
27  */
28 
29 
30 #include <xsec/dsig/DSIGConstants.hpp>
31 #include <xsec/framework/XSECError.hpp>
32 #include <xsec/transformers/TXFMXPath.hpp>
33 #include <xsec/transformers/TXFMParser.hpp>
34 
35 #ifdef XSEC_HAVE_XALAN
36 
37 #include "../utils/XSECDOMUtils.hpp"
38 
39 #if defined(_MSC_VER)
40 #	pragma warning(disable: 4267)
41 #endif
42 
43 #include <xalanc/XPath/XObjectFactoryDefault.hpp>
44 #include <xalanc/XPath/XPathExecutionContextDefault.hpp>
45 
46 #if defined(_MSC_VER)
47 #	pragma warning(default: 4267)
48 #endif
49 
50 // If this isn't defined, we're on Xalan 1.12+ and require modern C++
51 #ifndef XALAN_USING_XALAN
52 # define XALAN_USING_XALAN(NAME) using xalanc :: NAME;
53 #endif
54 
55 // Xalan namespace usage
56 XALAN_USING_XALAN(XPathProcessorImpl)
XALAN_USING_XALAN(XercesDOMSupport)57 XALAN_USING_XALAN(XercesDOMSupport)
58 XALAN_USING_XALAN(XercesParserLiaison)
59 XALAN_USING_XALAN(XercesDocumentWrapper)
60 XALAN_USING_XALAN(XercesWrapperNavigator)
61 XALAN_USING_XALAN(XPathEvaluator)
62 XALAN_USING_XALAN(XPathFactoryDefault)
63 XALAN_USING_XALAN(XPathConstructionContextDefault)
64 XALAN_USING_XALAN(XalanDocument)
65 XALAN_USING_XALAN(XalanNode)
66 XALAN_USING_XALAN(XalanDOMChar)
67 XALAN_USING_XALAN(XPathEnvSupportDefault)
68 XALAN_USING_XALAN(XObjectFactoryDefault)
69 XALAN_USING_XALAN(XPathExecutionContextDefault)
70 XALAN_USING_XALAN(ElementPrefixResolverProxy)
71 XALAN_USING_XALAN(XPath)
72 XALAN_USING_XALAN(NodeRefListBase)
73 XALAN_USING_XALAN(XSLTResultTarget)
74 XALAN_USING_XALAN(XSLException)
75 
76 #endif
77 
78 XERCES_CPP_NAMESPACE_USE
79 
80 #ifdef XSEC_HAVE_XPATH
81 
82 #include <iostream>
83 
84 #define KLUDGE_PREFIX "berindsig"
85 
86 // Helper function
87 
88 void setXPathNS(DOMDocument *d,
89 		DOMNamedNodeMap *xAtts,
90 		XSECXPathNodeList &addedNodes,
91 		XSECSafeBufferFormatter *formatter,
92 		XSECNameSpaceExpander * nse) {
93 
94 	// if set then set the name spaces in the attribute list else clear them
95 
96 	DOMElement * e = d->getDocumentElement();
97 
98 	if (e == NULL) {
99 
100 		throw XSECException(XSECException::XPathError, "Element node not found in Document");
101 
102 	}
103 
104 	if (xAtts != 0) {
105 
106 		int xAttsCount = xAtts->getLength();
107 
108 		// Check all is OK with the Xalan Document and first element
109 
110 		if (d == NULL) {
111 
112 			throw XSECException(XSECException::XPathError, "Attempt to define XPath Name Space before setInput called");
113 
114 		}
115 
116 		// Run through each attribute looking for name spaces
117 		const XMLCh *xpName;
118 		safeBuffer xpNameSB;
119 		const XMLCh *xpLocalName;
120 		const XMLCh *xpValue;
121 
122 		for (int xCounter = 0; xCounter < xAttsCount; ++xCounter) {
123 
124 			if (nse == NULL || !nse->nodeWasAdded(xAtts->item(xCounter))) {
125 
126 				xpName = xAtts->item(xCounter)->getNodeName();
127 				xpNameSB << (*formatter << xpName);
128 
129 				if (xpNameSB.sbStrncmp("xmlns", 5) == 0) {
130 
131 					// Check whether a node of this name already exists
132 					xpLocalName = xAtts->item(xCounter)->getLocalName();
133 					xpValue = xAtts->item(xCounter)->getNodeValue();
134 					if (e->hasAttributeNS(DSIGConstants::s_unicodeStrURIXMLNS, xpLocalName) == false) {
135 
136 						// Nope
137 
138 						e->setAttributeNS(DSIGConstants::s_unicodeStrURIXMLNS, xpName, xpValue);
139 						addedNodes.addNode(e->getAttributeNodeNS(DSIGConstants::s_unicodeStrURIXMLNS, xpLocalName));
140 					}
141 
142 				}
143 
144 			}
145 
146 		}
147 
148 	}
149 
150 	// Insert the kludge namespace
151 	safeBuffer k("xmlns:");
152 	k.sbStrcatIn(KLUDGE_PREFIX);
153 
154 	e->setAttributeNS(DSIGConstants::s_unicodeStrURIXMLNS,
155 					 MAKE_UNICODE_STRING(k.rawCharBuffer()),
156 					 DSIGConstants::s_unicodeStrURIDSIG);
157 }
158 
clearXPathNS(DOMDocument * d,XSECXPathNodeList & toRemove,XSECSafeBufferFormatter * formatter,XSECNameSpaceExpander * nse)159 void clearXPathNS(DOMDocument *d,
160 				  XSECXPathNodeList &toRemove,
161 				  XSECSafeBufferFormatter *formatter,
162 				  XSECNameSpaceExpander * nse) {
163 
164 	// Clear the XPath name spaces in the document element attribute list
165 
166 	DOMElement * e = d->getDocumentElement();
167 
168 	if (e == NULL) {
169 
170 		throw XSECException(XSECException::XPathError, "Element node not found in Document");
171 
172 	}
173 
174 	// Run through each node in the added nodes
175 
176 	const DOMNode * r = toRemove.getFirstNode();
177 	while (r != NULL) {
178 		e->removeAttributeNS(DSIGConstants::s_unicodeStrURIXMLNS,
179 					r->getLocalName());
180 		r = toRemove.getNextNode();
181 	}
182 
183 	e->removeAttributeNS(DSIGConstants::s_unicodeStrURIXMLNS,
184 					 MAKE_UNICODE_STRING(KLUDGE_PREFIX));
185 
186 }
187 
TXFMXPath(DOMDocument * doc)188 TXFMXPath::TXFMXPath(DOMDocument *doc) :
189 	TXFMBase(doc) {
190 
191 	document = NULL;
192 	XPathAtts = NULL;
193 
194 	// Formatter is used for handling attribute name space inputs
195 
196 	XSECnew(formatter, XSECSafeBufferFormatter("UTF-8",XMLFormatter::NoEscapes,
197 												XMLFormatter::UnRep_CharRef));
198 
199 }
200 
~TXFMXPath()201 TXFMXPath::~TXFMXPath() {
202 
203 	if (formatter != NULL)
204 		delete formatter;
205 
206 }
207 
setNameSpace(DOMNamedNodeMap * xpAtts)208 void TXFMXPath::setNameSpace(DOMNamedNodeMap *xpAtts) {
209 
210 	// A name space needs to be set on the document
211 
212 	XPathAtts = xpAtts;
213 
214 }
215 
216 // Methods to set the inputs
217 
setInput(TXFMBase * newInput)218 void TXFMXPath::setInput(TXFMBase *newInput) {
219 
220 	if (newInput->getOutputType() == TXFMBase::BYTE_STREAM) {
221 
222 		//throw XSECException(XSECException::TransformInputOutputFail, "C14n canonicalisation transform requires DOM_NODES input");
223 		// Need to parse into DOM_NODES
224 		TXFMParser * parser;
225 		XSECnew(parser, TXFMParser(mp_expansionDoc));
226 		try{
227 			parser->setInput(newInput);
228 		}
229 		catch (...) {
230 			delete parser;
231 			input = newInput;
232 			throw;
233 		}
234 
235 		input = parser;
236 		parser->expandNameSpaces();
237 	}
238 	else
239 		input = newInput;
240 
241 	// Set up for the new document
242 	document = input->getDocument();
243 
244 	// Expand if necessary
245 	this->expandNameSpaces();
246 
247 	keepComments = input->getCommentsStatus();
248 
249 }
250 
separator(unsigned char c)251 bool separator(unsigned char c) {
252 
253 	if (c >= 'a' && c <= 'z')
254 		return false;
255 
256 	if (c >= 'A' && c <= 'Z')
257 		return false;
258 
259 	return true;
260 
261 }
262 
findHereNodeFromXalan(XercesWrapperNavigator * xwn,XalanNode * n,DOMNode * h)263 XalanNode * findHereNodeFromXalan(XercesWrapperNavigator * xwn, XalanNode * n, DOMNode *h) {
264 
265 	const DOMNode * m = xwn->mapNode(n);
266 	const XalanNode * ret;
267 
268 	if (m == h)
269 		return n;
270 
271 	// Not this one - check the children
272 
273 	XalanNode * c = n->getFirstChild();
274 
275 	while (c != 0) {
276 		ret = findHereNodeFromXalan(xwn, c, h);
277 		if (ret != 0)
278 			return (XalanNode *) ret;
279 		c = c->getNextSibling();
280 	}
281 
282 	return 0;
283 }
284 
285 
286 
evaluateExpr(DOMNode * h,safeBuffer inexpr)287 void TXFMXPath::evaluateExpr(DOMNode *h, safeBuffer inexpr) {
288 
289 	// Temporarily add any necessary name spaces into the document
290 
291 	XSECXPathNodeList addedNodes;
292 	setXPathNS(document, XPathAtts, addedNodes, formatter, mp_nse);
293 
294 	XPathProcessorImpl	xppi;					// The processor
295 	XercesParserLiaison xpl;
296 	XercesDOMSupport	xds(xpl);
297 	XPathEvaluator		xpe;
298 	XPathFactoryDefault xpf;
299 	XPathConstructionContextDefault xpcc;
300 
301 	XalanDocument		* xd;
302 	XalanNode			* contextNode;
303 
304 	// Xalan can throw exceptions in all functions, so do one broad catch point.
305 
306 	try {
307 
308 		// Map to Xalan
309 		xd = xpl.createDocument(document);
310 
311 		// For performing mapping
312 		XercesDocumentWrapper *xdw = xpl.mapDocumentToWrapper(xd);
313 		XercesWrapperNavigator xwn(xdw);
314 
315 		// Map the "here" node - but only if part of current document
316 
317 		XalanNode * hereNode = NULL;
318 
319 		if (h->getOwnerDocument() == document) {
320 
321 			hereNode = xwn.mapNode(h);
322 
323 			if (hereNode == NULL) {
324 
325 				hereNode = findHereNodeFromXalan(&xwn, xd, h);
326 
327 				if (hereNode == NULL) {
328 
329 					throw XSECException(XSECException::XPathError,
330 					   "Unable to find here node in Xalan Wrapper map");
331 				}
332 
333 			}
334 		}
335 
336 		// Now work out what we have to set up in the new processing
337 
338 		TXFMBase::nodeType inputType = input->getNodeType();
339 
340 		XalanDOMString cd;		// For the moment assume the root is the context
341 
342 		const XalanDOMChar * cexpr;
343 
344 		safeBuffer contextExpr;
345 
346 		switch (inputType) {
347 
348 		case DOM_NODE_DOCUMENT :
349 		case DOM_NODE_XPATH_NODESET :
350 			// do XPath over the whole document and, if the input was an
351 			// XPath Nodeset, then later intersect the result with the input nodelist
352 			cd = XalanDOMString("/");		// Root node
353 			cexpr = cd.c_str();
354 
355 			// The context node is the "root" node
356 			contextNode =
357 				xpe.selectSingleNode(
358 				xds,
359 				xd,
360 				cexpr,
361 				xd->getDocumentElement());
362 
363 			break;
364 
365 		case DOM_NODE_DOCUMENT_FRAGMENT :
366 			{
367 
368 				// Need to map the DOM_Node that we are given from the input to the appropriate XalanNode
369 
370 				// Create the XPath expression to find the node
371 
372 				if (input->getFragmentId() != NULL) {
373 
374 					contextExpr.sbTranscodeIn("//descendant-or-self::node()[attribute::Id='");
375 					contextExpr.sbXMLChCat(input->getFragmentId());
376 					contextExpr.sbXMLChCat("']");
377 
378 					// Map the node
379 
380 					contextNode =
381 						xpe.selectSingleNode(
382 						xds,
383 						xd,
384 						contextExpr.rawXMLChBuffer(), //XalanDOMString((char *) contextExpr.rawBuffer()).c_str(),
385 						xd->getDocumentElement());
386 
387 
388 					if (contextNode == NULL) {
389 						// Last Ditch
390 						contextNode = xwn.mapNode(input->getFragmentNode());
391 
392 					}
393 
394 				}
395 				else
396 					contextNode = xwn.mapNode(input->getFragmentNode());
397 
398 				if (contextNode == NULL) {
399 
400 					// Something wrong
401 					throw XSECException(XSECException::XPathError, "Error mapping context node");
402 
403 				}
404 
405 				break;
406 			}
407 
408 		default :
409 
410 			throw XSECException(XSECException::XPathError);	// Should never get here
411 
412 		}
413 
414 		safeBuffer str;
415 		XPathEnvSupportDefault xpesd;
416 		XObjectFactoryDefault			xof;
417 		XPathExecutionContextDefault	xpec(xpesd, xds, xof);
418 
419 		ElementPrefixResolverProxy pr(xd->getDocumentElement(), xpesd, xds);
420 
421 		// Work around the fact that the XPath implementation is designed for XSLT, so does
422 		// not allow here() as a NCName.
423 
424 		// THIS IS A KLUDGE AND SHOULD BE DONE BETTER
425 
426 		safeBuffer k(KLUDGE_PREFIX);
427 		k.sbStrcatIn(":");
428 
429 		XMLSSize_t offset = inexpr.sbStrstr("here()");
430 
431 		while (offset >= 0) {
432 
433 			if (offset == 0 || offset == 1 ||
434 				(!(inexpr[offset - 1] == ':' && inexpr[offset - 2] != ':') &&
435 				separator(inexpr[offset - 1]))) {
436 
437 				inexpr.sbStrinsIn(k.rawCharBuffer(), offset);
438 
439 			}
440 
441 			offset = inexpr.sbOffsetStrstr("here()", offset + 11);
442 
443 		}
444 
445 		// Install the External function in the Environment handler
446 
447 		if (hereNode != NULL) {
448 
449 			xpesd.installExternalFunctionLocal(XalanDOMString(URI_ID_DSIG), XalanDOMString("here"), DSIGXPathHere(hereNode));
450 
451 		}
452 
453 		str.sbStrcpyIn("(descendant-or-self::node() | descendant-or-self::node()/attribute::* | descendant-or-self::node()/namespace::*)[");
454 		str.sbStrcatIn(inexpr);
455 		str.sbStrcatIn("]");
456 
457 		XPath * xp = xpf.create();
458 
459 		XalanDOMString Xexpr((char *) str.rawBuffer());
460 		xppi.initXPath(*xp, xpcc, Xexpr, pr);
461 
462 		// Now resolve
463 
464 		XObjectPtr xObj = xp->execute(contextNode, pr, xpec);
465 
466 		// Now map to a list that others can use (naieve list at this time)
467 
468 		const NodeRefListBase&	lst = xObj->nodeset();
469 
470 		int size = (int) lst.getLength();
471 		const DOMNode *item;
472 
473 		for (int i = 0; i < size; ++ i) {
474 
475 			if (lst.item(i) == xd)
476 				m_XPathMap.addNode(document);
477 			else {
478 				item = xwn.mapNode(lst.item(i));
479 				m_XPathMap.addNode(item);
480 			}
481 		}
482 
483 		if (inputType == DOM_NODE_XPATH_NODESET) {
484 			//the input list was a XPATH nodeset, so we must intersect the
485 			// results of the XPath processing done above with the input nodeset
486 			m_XPathMap.intersect(input->getXPathNodeList());
487 		}
488 	}
489 
490 	catch (const XSLException &e) {
491 
492 		safeBuffer msg;
493 
494 		// Whatever happens - fix any changes to the original document
495 		clearXPathNS(document, addedNodes, formatter, mp_nse);
496 
497 		// Collate the exception message into an XSEC message.
498 		msg.sbTranscodeIn("Xalan Exception : ");
499 		msg.sbXMLChCat(e.getType());
500 		msg.sbXMLChCat(" caught.  Message : ");
501 		msg.sbXMLChCat(e.getMessage().c_str());
502 
503 		throw XSECException(XSECException::XPathError,
504 			msg.rawXMLChBuffer());
505 	}
506 
507 	clearXPathNS(document, addedNodes, formatter, mp_nse);
508 
509 }
510 
evaluateEnvelope(DOMNode * t)511 void TXFMXPath::evaluateEnvelope(DOMNode *t) {
512 
513 	// A special case where the XPath expression is already known
514 
515 	if (document == NULL) {
516 
517 		throw XSECException(XSECException::XPathError,
518 		   "Attempt to define XPath Name Space before setInput called");
519 
520 	}
521 
522 	DOMElement * e = document->getDocumentElement();
523 
524 	if (e == NULL) {
525 
526 		throw XSECException(XSECException::XPathError,
527               "Element node not found in Document");
528 
529 	}
530 
531 	// Set the xmlns:dsig="http://www.w3.org/2000/09/xmldsig#"
532 
533 	e->setAttributeNS(DSIGConstants::s_unicodeStrURIXMLNS, MAKE_UNICODE_STRING("xmlns:dsig"), DSIGConstants::s_unicodeStrURIDSIG);
534 
535 
536 	// Evaluate
537 
538 	evaluateExpr(t, XPATH_EXPR_ENVELOPE);
539 
540 	// Now we are done, remove the namespace
541 
542 	e->removeAttributeNS(DSIGConstants::s_unicodeStrURIXMLNS, MAKE_UNICODE_STRING("dsig"));
543 
544 }
545 
546 // Methods to get tranform output type and input requirement
547 
getInputType(void) const548 TXFMBase::ioType TXFMXPath::getInputType(void) const {
549 
550 	return TXFMBase::DOM_NODES;
551 
552 }
getOutputType(void) const553 TXFMBase::ioType TXFMXPath::getOutputType(void) const {
554 
555 	return TXFMBase::DOM_NODES;
556 
557 }
558 
getNodeType(void) const559 TXFMBase::nodeType TXFMXPath::getNodeType(void) const {
560 
561 	return TXFMBase::DOM_NODE_XPATH_NODESET;
562 
563 }
564 
565 // Methods to get output data
566 
readBytes(XMLByte * const toFill,unsigned int maxToFill)567 unsigned int TXFMXPath::readBytes(XMLByte * const toFill, unsigned int maxToFill) {
568 
569 	return 0;
570 
571 }
572 
getDocument() const573 DOMDocument *TXFMXPath::getDocument() const {
574 
575 	return document;
576 
577 }
578 
579 #endif /* XSEC_HAVE_XPATH */
580