1 /*
2 # PostgreSQL Database Modeler (pgModeler)
3 #
4 # Copyright 2006-2020 - Raphael Araújo e Silva <raphael@pgmodeler.io>
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation version 3.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14 #
15 # The complete text of GPLv3 is at LICENSE file on source code root directory.
16 # Also, you can get the complete GNU General Public License at <http://www.gnu.org/licenses/>
17 */
18 
19 #include "xmlparser.h"
20 #include <QUrl>
21 
22 const QString XmlParser::CharAmp("&amp;");
23 const QString XmlParser::CharLt("&lt;");
24 const QString XmlParser::CharGt("&gt;");
25 const QString XmlParser::CharQuot("&quot;");
26 const QString XmlParser::CharApos("&apos;");
27 const QString XmlParser::CdataStart("<![CDATA[");
28 const QString XmlParser::CdataEnd("]]>");
29 const QString XmlParser::CommentStart("<!--");
30 const QString XmlParser::CommentEnd("-->");
31 
XmlParser()32 XmlParser::XmlParser()
33 {
34 	root_elem=nullptr;
35 	curr_elem=nullptr;
36 	xml_doc=nullptr;
37 	curr_line = 0;
38 	xmlInitParser();
39 }
40 
~XmlParser()41 XmlParser::~XmlParser()
42 {
43 	restartParser();
44 	xmlCleanupParser();
45 }
46 
removeDTD()47 void XmlParser::removeDTD()
48 {
49 	int pos1=-1, pos2=-1, pos3=-1, len;
50 
51 	if(!xml_buffer.isEmpty())
52 	{
53 		/* Removes the current DTD from document.
54 		 If the user attempts to manipulate the structure of
55 		 document damaging its integrity. */
56 		pos1=xml_buffer.indexOf(QLatin1String("<!DOCTYPE"));
57 		pos2=xml_buffer.indexOf(QLatin1String("]>\n"));
58 		pos3=xml_buffer.indexOf(QLatin1String("\">\n"));
59 		if(pos1 >=0 && (pos2 >=0 || pos3 >= 0))
60 		{
61 			len=((pos2 > pos3) ? (pos2-pos1)+3 :  (pos3-pos2)+3);
62 			xml_buffer.replace(pos1,len,"");
63 		}
64 	}
65 }
66 
loadXMLFile(const QString & filename)67 void XmlParser::loadXMLFile(const QString &filename)
68 {
69 	try
70 	{
71 		QFile input;
72 		QString buffer;
73 
74 		if(!filename.isEmpty())
75 		{
76 			//Opens a file stream using the file name
77 			input.setFileName(filename);
78 			input.open(QFile::ReadOnly);
79 
80 			//Case the file opening was not sucessful
81 			if(!input.isOpen())
82 			{
83 				throw Exception(Exception::getErrorMessage(ErrorCode::FileDirectoryNotAccessed).arg(filename),
84 								ErrorCode::FileDirectoryNotAccessed,__PRETTY_FUNCTION__,__FILE__,__LINE__);
85 			}
86 
87 			buffer=input.readAll();
88 			input.close();
89 
90 			xml_doc_filename=filename;
91 			loadXMLBuffer(buffer);
92 		}
93 	}
94 	catch(Exception &e)
95 	{
96 		throw Exception(e.getErrorMessage(), e.getErrorCode(), __PRETTY_FUNCTION__,__FILE__,__LINE__, &e);
97 	}
98 }
99 
loadXMLBuffer(const QString & xml_buf)100 void XmlParser::loadXMLBuffer(const QString &xml_buf)
101 {
102 	try
103 	{
104 		int pos1=-1, pos2=-1, tam=0;
105 
106 		if(xml_buf.isEmpty())
107 			throw Exception(ErrorCode::AsgEmptyXMLBuffer,__PRETTY_FUNCTION__,__FILE__,__LINE__);
108 
109 		pos1=xml_buf.indexOf(QLatin1String("<?xml"));
110 		pos2=xml_buf.indexOf(QLatin1String("?>"));
111 		xml_buffer=xml_buf;
112 
113 		if(pos1 >= 0 && pos2 >= 0)
114 		{
115 			tam=(pos2-pos1)+3;
116 			xml_decl=xml_buffer.mid(pos1, tam);
117 			xml_buffer.replace(pos1,tam,"");
118 		}
119 		else
120 			xml_decl=QString("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
121 
122 		removeDTD();
123 		readBuffer();
124 	}
125 	catch(Exception &e)
126 	{
127 		throw Exception(e.getErrorMessage(), e.getErrorCode(), __PRETTY_FUNCTION__,__FILE__,__LINE__, &e);
128 	}
129 }
130 
setDTDFile(const QString & dtd_file,const QString & dtd_name)131 void XmlParser::setDTDFile(const QString &dtd_file, const QString &dtd_name)
132 {
133 	QString fmt_dtd_file;
134 
135 	if(dtd_file.isEmpty())
136 		throw Exception(ErrorCode::AsgEmptyDTDFile,__PRETTY_FUNCTION__,__FILE__,__LINE__);
137 
138 	if(dtd_name.isEmpty())
139 		throw Exception(ErrorCode::AsgEmptyDTDName,__PRETTY_FUNCTION__,__FILE__,__LINE__);
140 
141 #ifndef Q_OS_WIN
142 	fmt_dtd_file=QString("file://");
143 #else
144 	fmt_dtd_file=QString("file:///");
145 #endif
146 
147 	//Formats the dtd file path to URL style (converting to percentage format the non reserved chars)
148 	fmt_dtd_file=QUrl::toPercentEncoding(QFileInfo(dtd_file).absoluteFilePath(), "/:");
149 	dtd_decl=QString("<!DOCTYPE ") + dtd_name +
150 			 QString(" SYSTEM ") +
151 			 QString("\"") +
152 			 fmt_dtd_file + QString("\">\n");
153 }
154 
readBuffer()155 void XmlParser::readBuffer()
156 {
157 	QByteArray buffer;
158 	QString msg, file;
159 	xmlError *xml_error=nullptr;
160 	int parser_opt;
161 
162 	if(!xml_buffer.isEmpty())
163 	{
164 		//Inserts the XML declaration
165 		buffer+=xml_decl;
166 
167 		//Configures the parser, initially, to not validate the document against the dtd
168 		parser_opt=( XML_PARSE_NOBLANKS | XML_PARSE_NONET | XML_PARSE_NOENT | XML_PARSE_BIG_LINES);
169 
170 		//If the dtd declarions is setup
171 		if(!dtd_decl.isEmpty())
172 		{
173 			//Inserts the default software DTD declarion into XML buffer
174 			buffer+=dtd_decl;
175 
176 			//Now configures the parser to validate the buffer against the DTD
177 			parser_opt=(parser_opt | XML_PARSE_DTDLOAD | XML_PARSE_DTDVALID);
178 		}
179 
180 		buffer+=xml_buffer;
181 
182 		//Create an xml document from the buffer
183 		xml_doc=xmlReadMemory(buffer.data(), buffer.size(),	nullptr, nullptr, parser_opt);
184 
185 		//In case the document criation fails, gets the last xml parser error
186 		xml_error=xmlGetLastError();
187 
188 		//If some error is set
189 		if(xml_error)
190 		{
191 			//Formats the error
192 			msg=xml_error->message;
193 			file=xml_error->file;
194 			if(!file.isEmpty()) file=QString("(%1)").arg(file);
195 			msg.replace("\n"," ");
196 
197 			//Restarts the parser
198 			if(xml_doc) restartParser();
199 
200 			//Raise an exception with the error massege from the parser xml
201 			throw Exception(Exception::getErrorMessage(ErrorCode::LibXMLError)
202 							.arg(xml_error->line).arg(xml_error->int2).arg(msg).arg(file),
203 							ErrorCode::LibXMLError,__PRETTY_FUNCTION__,__FILE__,__LINE__);
204 		}
205 
206 		//Gets the referênce to the root element on the document
207 		root_elem=curr_elem=xmlDocGetRootElement(xml_doc);
208 	}
209 }
210 
savePosition()211 void XmlParser::savePosition()
212 {
213 	if(!root_elem)
214 		throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
215 
216 	elems_stack.push(curr_elem);
217 }
218 
restorePosition()219 void XmlParser::restorePosition()
220 {
221 	if(!root_elem)
222 		throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
223 
224 	if(elems_stack.empty())
225 		curr_elem=root_elem;
226 	else
227 	{
228 		curr_elem=elems_stack.top();
229 		elems_stack.pop();
230 	}
231 }
232 
restorePosition(const xmlNode * elem)233 void XmlParser::restorePosition(const xmlNode *elem)
234 {
235 	if(!elem)
236 		throw Exception(ErrorCode::OprNotAllocatedElement,__PRETTY_FUNCTION__,__FILE__,__LINE__);
237 	else if(elem->doc!=xml_doc)
238 		throw Exception(ErrorCode::OprInexistentElement,__PRETTY_FUNCTION__,__FILE__,__LINE__);
239 
240 	restartNavigation();
241 	curr_elem=const_cast<xmlNode *>(elem);
242 }
243 
restartNavigation()244 void XmlParser::restartNavigation()
245 {
246 	if(!root_elem)
247 		throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
248 
249 	curr_elem=root_elem;
250 
251 	while(!elems_stack.empty())
252 		elems_stack.pop();
253 }
254 
restartParser()255 void XmlParser::restartParser()
256 {
257 	root_elem=curr_elem=nullptr;
258 	curr_line = 0;
259 
260 	if(xml_doc)
261 	{
262 		xmlFreeDoc(xml_doc);
263 		xml_doc=nullptr;
264 	}
265 	dtd_decl=xml_buffer=xml_decl="";
266 
267 	while(!elems_stack.empty())
268 		elems_stack.pop();
269 
270 	xml_doc_filename="";
271 	xmlResetLastError();
272 }
273 
accessElement(unsigned elem_type)274 bool XmlParser::accessElement(unsigned elem_type)
275 {
276 	bool has_elem;
277 	xmlNode *elems[4];
278 
279 	if(!root_elem)
280 		throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
281 
282 	elems[RootElement]=curr_elem->parent;
283 	elems[ChildElement]=curr_elem->children;
284 	elems[NextElement]=curr_elem->next;
285 	elems[PreviousElement]=curr_elem->prev;
286 
287 	/* Checks whether the current element has the element that
288 		is to  be accessed. The flag 'has_elem' is also used
289 		on the method return to indicate if the element has been
290 		accessed or not. */
291 	has_elem=hasElement(elem_type);
292 
293 	if(has_elem)
294 	{
295 		curr_elem=elems[elem_type];
296 
297 		/* NOTE: Due to XML2 implementation big line numbers are stored in the psvi
298 		 * attribute so we need to convert the void* to char and convert it back to integer value */
299 		if(curr_elem->line == 65535 && curr_elem->next && curr_elem->next->psvi != nullptr)
300 		{
301 			char hex_value[10] = "";
302 			int aux_line = 0;
303 
304 			sprintf(hex_value, "%p", curr_elem->next->psvi);
305 			aux_line = static_cast<int>(strtol(hex_value, nullptr, 16));
306 
307 			if(curr_line < aux_line)
308 				curr_line = aux_line;
309 		}
310 		else if(curr_elem->line > curr_line)
311 			curr_line = curr_elem->line;
312 	}
313 
314 	return has_elem;
315 }
316 
hasElement(unsigned elem_type,xmlElementType xml_node_type)317 bool XmlParser::hasElement(unsigned elem_type, xmlElementType xml_node_type)
318 {
319 	if(!root_elem)
320 		throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
321 
322 	if(elem_type==RootElement)
323 		/* Returns the verification if the current element has a parent.
324 		 The element must be different from the root, because the root element
325 		 is not connected to a parent */
326 		return (curr_elem!=root_elem && curr_elem->parent!=nullptr &&
327 						(xml_node_type==0 || (xml_node_type!=0 && curr_elem->parent->type==xml_node_type)));
328 	else if(elem_type==ChildElement)
329 		//Returns the verification if the current element has children
330 		return (curr_elem->children!=nullptr &&
331 						(xml_node_type==0 || (xml_node_type!=0 && curr_elem->children->type==xml_node_type)));
332 	else if(elem_type==NextElement)
333 		return (curr_elem->next!=nullptr &&
334 						(xml_node_type==0 || (xml_node_type!=0 && curr_elem->next->type==xml_node_type)));
335 	else
336 		/* The second comparison in the expression is made for the root element
337 		 because libxml2 places the previous element as the root itself */
338 		return (curr_elem->prev!=nullptr && curr_elem->prev!=root_elem &&
339 															(xml_node_type==0 || (xml_node_type!=0 && curr_elem->prev->type==xml_node_type)));
340 }
341 
hasAttributes()342 bool XmlParser::hasAttributes()
343 {
344 	if(!root_elem)
345 		throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
346 
347 	return (curr_elem->properties != nullptr);
348 }
349 
getElementContent()350 QString XmlParser::getElementContent()
351 {
352 	if(!root_elem)
353 		throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
354 
355 	/* If the current element has  <![CDATA[]]> node returns the content of the CDATA instead
356 	of return the content of the element itself */
357 	if(curr_elem->next && curr_elem->next->type == XML_CDATA_SECTION_NODE)
358 		return QString(reinterpret_cast<char *>(curr_elem->next->content));
359 	else
360 		//Return the content of the element when is not a CDATA node
361 		return QString(reinterpret_cast<char *>(curr_elem->content));
362 }
363 
getElementName()364 QString XmlParser::getElementName()
365 {
366 	if(!root_elem)
367 		throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
368 
369 	return QString(reinterpret_cast<const char *>(curr_elem->name));
370 }
371 
getElementType()372 xmlElementType XmlParser::getElementType()
373 {
374 	if(!root_elem)
375 		throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
376 
377 	return curr_elem->type;
378 }
379 
getCurrentElement()380 const xmlNode *XmlParser::getCurrentElement()
381 {
382 	return curr_elem;
383 }
384 
getElementAttributes(attribs_map & attributes)385 void XmlParser::getElementAttributes(attribs_map &attributes)
386 {
387 	xmlAttr *elem_attribs=nullptr;
388 	QString attrib, value;
389 
390 	if(!root_elem)
391 		throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
392 
393 	//Always clears the passed attributes maps
394 	attributes.clear();
395 
396 	//Gets the references to the element properties
397 	elem_attribs=curr_elem->properties;
398 
399 	while(elem_attribs)
400 	{
401 		//Gets the attribute name
402 		attrib=QString(reinterpret_cast<const char *>(elem_attribs->name));
403 		//Gets the attribute value
404 		value=QString(reinterpret_cast<char *>(elem_attribs->children->content));
405 
406 		/* Assigns to the attribute map in the index specified by the
407 		 attribute name the obtained value */
408 		attributes[attrib]=value;
409 
410 		//Step to the next element attribute
411 		elem_attribs=elem_attribs->next;
412 	}
413 }
414 
getLoadedFilename()415 QString XmlParser::getLoadedFilename()
416 {
417 	return xml_doc_filename;
418 }
419 
getXMLBuffer()420 QString XmlParser::getXMLBuffer()
421 {
422 	return xml_buffer;
423 }
424 
getCurrentBufferLine()425 int XmlParser::getCurrentBufferLine()
426 {
427 	if(curr_elem)
428 		return curr_line;
429 	else
430 		return 0;
431 }
432 
getBufferLineCount()433 int XmlParser::getBufferLineCount()
434 {
435 	if(xml_doc)
436 	{
437 		/* To get the very last line of the document is necessary to call
438 		the last element of the last because xml_doc->last->line stores the
439 		last line of the root element.
440 
441 		NOTE: Due to XML2 implementation big line numbers are stored in the psvi
442 		attribute so we need to convert the void* to char and convert it back to integer value */
443 		if(xml_doc->last->last->line == 65535 && xml_doc->last->last->psvi != nullptr)
444 		{
445 			char hex_value[10] = "";
446 			sprintf(hex_value, "%p", xml_doc->last->last->psvi);
447 			return static_cast<int>(strtol(hex_value, nullptr, 16));
448 		}
449 
450 		return xml_doc->last->last->line;
451 	}
452 	else
453 		return 0;
454 }
455 
convertCharsToXMLEntities(QString buf)456 QString XmlParser::convertCharsToXMLEntities(QString buf)
457 {
458 	QTextStream ts(&buf);
459 	QRegExp attr_regexp=QRegExp("([a-z]|\\-)+( )*(=\\\")"),
460 			attr_end_regexp=QRegExp("(\\\")(( )|(\\t)|(\\n)|((\\/\\>)|(\\>)))");
461 	int attr_start=0, attr_end=0, count=0, cdata_start = -1,
462 			cdata_end = -1, start = -1, end = -1, pos = 0;
463 	QString value, fmt_buf, lin;
464 
465 	while(!ts.atEnd())
466 	{
467 		lin = ts.readLine();
468 		lin += "\n";
469 
470 		// Ignoring the xml header
471 		if(lin.indexOf("<?xml") >= 0)
472 		{
473 			fmt_buf += lin;
474 			continue;
475 		}
476 
477 		// Checking if the current line has at least one attribute in form (attr="value")
478 		attr_start = -1;
479 		attr_start = attr_regexp.indexIn(lin);
480 
481 		if(attr_start >= 0)
482 		{
483 			/* Checking the presence of <![[CDATA ]]> tag in the current line.
484 			 * In case of finding it we need to perform specific operation to avoid
485 			 * replacing contents within that tag */
486 			cdata_start = lin.indexOf(CdataStart);
487 			cdata_end = lin.indexOf(CdataEnd);
488 			start = min<int>(cdata_start, cdata_end);
489 			end = max<int>(cdata_start, cdata_end);
490 
491 			do
492 			{
493 				attr_end = attr_end_regexp.indexIn(lin, attr_start + attr_regexp.matchedLength());
494 
495 				if(attr_start >= 0 && attr_end >= 0 &&
496 					 //CDATA absent in the current line
497 					 ((start < 0 && end < 0) ||
498 						//The attribute is at left of the CDATA tag
499 						(start >= 0 && attr_start < start && attr_end < start) ||
500 						//The attribute is at right of the CDATA tag
501 						(end >= 0 && attr_start > end && attr_end > end)))
502 				{
503 					// Calculates the initial position where the value to be retrived is (in that case rigth after attrib=")
504 					pos = attr_start + attr_regexp.matchedLength();
505 					count = attr_end - pos;
506 					value = lin.mid(pos, count);
507 				}
508 				else
509 					break;
510 
511 				/* If the extracted value has one of the expected special chars
512 				 * in order to perform the replacemnt to xml entities */
513 				if(value.contains(QRegExp("(&|\\<|\\>|\")")))
514 				{
515 					if(!value.contains(CharQuot) && !value.contains(CharLt) &&
516 						 !value.contains(CharGt) && !value.contains(CharAmp) &&
517 						 !value.contains(CharApos) && value.contains('&'))
518 							value.replace('&', CharAmp);
519 
520 						value.replace('"', CharQuot);
521 						value.replace('<', CharLt);
522 						value.replace('>', CharGt);
523 
524 					//Puts in the original XML definition the modified string
525 					lin.replace(pos, count, value);
526 				}
527 
528 				// Moving the position to the next attribute in the line (if existent)
529 				pos += value.length() + 1;
530 				attr_start = attr_regexp.indexIn(lin, pos);
531 				value.clear();
532 			}
533 			while(attr_start >=0 && attr_start < lin.size());
534 		}
535 
536 		fmt_buf += lin;
537 		lin.clear();
538 	}
539 
540 	return fmt_buf;
541 }
542