1 /*
2 # PostgreSQL Database Modeler (pgModeler)
3 #
4 # Copyright 2006-2020 - Raphael Araújo e Silva <raphael@pgmodeler.io>
5 #
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation version 3.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # The complete text of GPLv3 is at LICENSE file on source code root directory.
16 # Also, you can get the complete GNU General Public License at <http://www.gnu.org/licenses/>
17 */
18
19 #include "xmlparser.h"
20 #include <QUrl>
21
22 const QString XmlParser::CharAmp("&");
23 const QString XmlParser::CharLt("<");
24 const QString XmlParser::CharGt(">");
25 const QString XmlParser::CharQuot(""");
26 const QString XmlParser::CharApos("'");
27 const QString XmlParser::CdataStart("<![CDATA[");
28 const QString XmlParser::CdataEnd("]]>");
29 const QString XmlParser::CommentStart("<!--");
30 const QString XmlParser::CommentEnd("-->");
31
XmlParser()32 XmlParser::XmlParser()
33 {
34 root_elem=nullptr;
35 curr_elem=nullptr;
36 xml_doc=nullptr;
37 curr_line = 0;
38 xmlInitParser();
39 }
40
~XmlParser()41 XmlParser::~XmlParser()
42 {
43 restartParser();
44 xmlCleanupParser();
45 }
46
removeDTD()47 void XmlParser::removeDTD()
48 {
49 int pos1=-1, pos2=-1, pos3=-1, len;
50
51 if(!xml_buffer.isEmpty())
52 {
53 /* Removes the current DTD from document.
54 If the user attempts to manipulate the structure of
55 document damaging its integrity. */
56 pos1=xml_buffer.indexOf(QLatin1String("<!DOCTYPE"));
57 pos2=xml_buffer.indexOf(QLatin1String("]>\n"));
58 pos3=xml_buffer.indexOf(QLatin1String("\">\n"));
59 if(pos1 >=0 && (pos2 >=0 || pos3 >= 0))
60 {
61 len=((pos2 > pos3) ? (pos2-pos1)+3 : (pos3-pos2)+3);
62 xml_buffer.replace(pos1,len,"");
63 }
64 }
65 }
66
loadXMLFile(const QString & filename)67 void XmlParser::loadXMLFile(const QString &filename)
68 {
69 try
70 {
71 QFile input;
72 QString buffer;
73
74 if(!filename.isEmpty())
75 {
76 //Opens a file stream using the file name
77 input.setFileName(filename);
78 input.open(QFile::ReadOnly);
79
80 //Case the file opening was not sucessful
81 if(!input.isOpen())
82 {
83 throw Exception(Exception::getErrorMessage(ErrorCode::FileDirectoryNotAccessed).arg(filename),
84 ErrorCode::FileDirectoryNotAccessed,__PRETTY_FUNCTION__,__FILE__,__LINE__);
85 }
86
87 buffer=input.readAll();
88 input.close();
89
90 xml_doc_filename=filename;
91 loadXMLBuffer(buffer);
92 }
93 }
94 catch(Exception &e)
95 {
96 throw Exception(e.getErrorMessage(), e.getErrorCode(), __PRETTY_FUNCTION__,__FILE__,__LINE__, &e);
97 }
98 }
99
loadXMLBuffer(const QString & xml_buf)100 void XmlParser::loadXMLBuffer(const QString &xml_buf)
101 {
102 try
103 {
104 int pos1=-1, pos2=-1, tam=0;
105
106 if(xml_buf.isEmpty())
107 throw Exception(ErrorCode::AsgEmptyXMLBuffer,__PRETTY_FUNCTION__,__FILE__,__LINE__);
108
109 pos1=xml_buf.indexOf(QLatin1String("<?xml"));
110 pos2=xml_buf.indexOf(QLatin1String("?>"));
111 xml_buffer=xml_buf;
112
113 if(pos1 >= 0 && pos2 >= 0)
114 {
115 tam=(pos2-pos1)+3;
116 xml_decl=xml_buffer.mid(pos1, tam);
117 xml_buffer.replace(pos1,tam,"");
118 }
119 else
120 xml_decl=QString("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
121
122 removeDTD();
123 readBuffer();
124 }
125 catch(Exception &e)
126 {
127 throw Exception(e.getErrorMessage(), e.getErrorCode(), __PRETTY_FUNCTION__,__FILE__,__LINE__, &e);
128 }
129 }
130
setDTDFile(const QString & dtd_file,const QString & dtd_name)131 void XmlParser::setDTDFile(const QString &dtd_file, const QString &dtd_name)
132 {
133 QString fmt_dtd_file;
134
135 if(dtd_file.isEmpty())
136 throw Exception(ErrorCode::AsgEmptyDTDFile,__PRETTY_FUNCTION__,__FILE__,__LINE__);
137
138 if(dtd_name.isEmpty())
139 throw Exception(ErrorCode::AsgEmptyDTDName,__PRETTY_FUNCTION__,__FILE__,__LINE__);
140
141 #ifndef Q_OS_WIN
142 fmt_dtd_file=QString("file://");
143 #else
144 fmt_dtd_file=QString("file:///");
145 #endif
146
147 //Formats the dtd file path to URL style (converting to percentage format the non reserved chars)
148 fmt_dtd_file=QUrl::toPercentEncoding(QFileInfo(dtd_file).absoluteFilePath(), "/:");
149 dtd_decl=QString("<!DOCTYPE ") + dtd_name +
150 QString(" SYSTEM ") +
151 QString("\"") +
152 fmt_dtd_file + QString("\">\n");
153 }
154
readBuffer()155 void XmlParser::readBuffer()
156 {
157 QByteArray buffer;
158 QString msg, file;
159 xmlError *xml_error=nullptr;
160 int parser_opt;
161
162 if(!xml_buffer.isEmpty())
163 {
164 //Inserts the XML declaration
165 buffer+=xml_decl;
166
167 //Configures the parser, initially, to not validate the document against the dtd
168 parser_opt=( XML_PARSE_NOBLANKS | XML_PARSE_NONET | XML_PARSE_NOENT | XML_PARSE_BIG_LINES);
169
170 //If the dtd declarions is setup
171 if(!dtd_decl.isEmpty())
172 {
173 //Inserts the default software DTD declarion into XML buffer
174 buffer+=dtd_decl;
175
176 //Now configures the parser to validate the buffer against the DTD
177 parser_opt=(parser_opt | XML_PARSE_DTDLOAD | XML_PARSE_DTDVALID);
178 }
179
180 buffer+=xml_buffer;
181
182 //Create an xml document from the buffer
183 xml_doc=xmlReadMemory(buffer.data(), buffer.size(), nullptr, nullptr, parser_opt);
184
185 //In case the document criation fails, gets the last xml parser error
186 xml_error=xmlGetLastError();
187
188 //If some error is set
189 if(xml_error)
190 {
191 //Formats the error
192 msg=xml_error->message;
193 file=xml_error->file;
194 if(!file.isEmpty()) file=QString("(%1)").arg(file);
195 msg.replace("\n"," ");
196
197 //Restarts the parser
198 if(xml_doc) restartParser();
199
200 //Raise an exception with the error massege from the parser xml
201 throw Exception(Exception::getErrorMessage(ErrorCode::LibXMLError)
202 .arg(xml_error->line).arg(xml_error->int2).arg(msg).arg(file),
203 ErrorCode::LibXMLError,__PRETTY_FUNCTION__,__FILE__,__LINE__);
204 }
205
206 //Gets the referênce to the root element on the document
207 root_elem=curr_elem=xmlDocGetRootElement(xml_doc);
208 }
209 }
210
savePosition()211 void XmlParser::savePosition()
212 {
213 if(!root_elem)
214 throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
215
216 elems_stack.push(curr_elem);
217 }
218
restorePosition()219 void XmlParser::restorePosition()
220 {
221 if(!root_elem)
222 throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
223
224 if(elems_stack.empty())
225 curr_elem=root_elem;
226 else
227 {
228 curr_elem=elems_stack.top();
229 elems_stack.pop();
230 }
231 }
232
restorePosition(const xmlNode * elem)233 void XmlParser::restorePosition(const xmlNode *elem)
234 {
235 if(!elem)
236 throw Exception(ErrorCode::OprNotAllocatedElement,__PRETTY_FUNCTION__,__FILE__,__LINE__);
237 else if(elem->doc!=xml_doc)
238 throw Exception(ErrorCode::OprInexistentElement,__PRETTY_FUNCTION__,__FILE__,__LINE__);
239
240 restartNavigation();
241 curr_elem=const_cast<xmlNode *>(elem);
242 }
243
restartNavigation()244 void XmlParser::restartNavigation()
245 {
246 if(!root_elem)
247 throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
248
249 curr_elem=root_elem;
250
251 while(!elems_stack.empty())
252 elems_stack.pop();
253 }
254
restartParser()255 void XmlParser::restartParser()
256 {
257 root_elem=curr_elem=nullptr;
258 curr_line = 0;
259
260 if(xml_doc)
261 {
262 xmlFreeDoc(xml_doc);
263 xml_doc=nullptr;
264 }
265 dtd_decl=xml_buffer=xml_decl="";
266
267 while(!elems_stack.empty())
268 elems_stack.pop();
269
270 xml_doc_filename="";
271 xmlResetLastError();
272 }
273
accessElement(unsigned elem_type)274 bool XmlParser::accessElement(unsigned elem_type)
275 {
276 bool has_elem;
277 xmlNode *elems[4];
278
279 if(!root_elem)
280 throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
281
282 elems[RootElement]=curr_elem->parent;
283 elems[ChildElement]=curr_elem->children;
284 elems[NextElement]=curr_elem->next;
285 elems[PreviousElement]=curr_elem->prev;
286
287 /* Checks whether the current element has the element that
288 is to be accessed. The flag 'has_elem' is also used
289 on the method return to indicate if the element has been
290 accessed or not. */
291 has_elem=hasElement(elem_type);
292
293 if(has_elem)
294 {
295 curr_elem=elems[elem_type];
296
297 /* NOTE: Due to XML2 implementation big line numbers are stored in the psvi
298 * attribute so we need to convert the void* to char and convert it back to integer value */
299 if(curr_elem->line == 65535 && curr_elem->next && curr_elem->next->psvi != nullptr)
300 {
301 char hex_value[10] = "";
302 int aux_line = 0;
303
304 sprintf(hex_value, "%p", curr_elem->next->psvi);
305 aux_line = static_cast<int>(strtol(hex_value, nullptr, 16));
306
307 if(curr_line < aux_line)
308 curr_line = aux_line;
309 }
310 else if(curr_elem->line > curr_line)
311 curr_line = curr_elem->line;
312 }
313
314 return has_elem;
315 }
316
hasElement(unsigned elem_type,xmlElementType xml_node_type)317 bool XmlParser::hasElement(unsigned elem_type, xmlElementType xml_node_type)
318 {
319 if(!root_elem)
320 throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
321
322 if(elem_type==RootElement)
323 /* Returns the verification if the current element has a parent.
324 The element must be different from the root, because the root element
325 is not connected to a parent */
326 return (curr_elem!=root_elem && curr_elem->parent!=nullptr &&
327 (xml_node_type==0 || (xml_node_type!=0 && curr_elem->parent->type==xml_node_type)));
328 else if(elem_type==ChildElement)
329 //Returns the verification if the current element has children
330 return (curr_elem->children!=nullptr &&
331 (xml_node_type==0 || (xml_node_type!=0 && curr_elem->children->type==xml_node_type)));
332 else if(elem_type==NextElement)
333 return (curr_elem->next!=nullptr &&
334 (xml_node_type==0 || (xml_node_type!=0 && curr_elem->next->type==xml_node_type)));
335 else
336 /* The second comparison in the expression is made for the root element
337 because libxml2 places the previous element as the root itself */
338 return (curr_elem->prev!=nullptr && curr_elem->prev!=root_elem &&
339 (xml_node_type==0 || (xml_node_type!=0 && curr_elem->prev->type==xml_node_type)));
340 }
341
hasAttributes()342 bool XmlParser::hasAttributes()
343 {
344 if(!root_elem)
345 throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
346
347 return (curr_elem->properties != nullptr);
348 }
349
getElementContent()350 QString XmlParser::getElementContent()
351 {
352 if(!root_elem)
353 throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
354
355 /* If the current element has <![CDATA[]]> node returns the content of the CDATA instead
356 of return the content of the element itself */
357 if(curr_elem->next && curr_elem->next->type == XML_CDATA_SECTION_NODE)
358 return QString(reinterpret_cast<char *>(curr_elem->next->content));
359 else
360 //Return the content of the element when is not a CDATA node
361 return QString(reinterpret_cast<char *>(curr_elem->content));
362 }
363
getElementName()364 QString XmlParser::getElementName()
365 {
366 if(!root_elem)
367 throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
368
369 return QString(reinterpret_cast<const char *>(curr_elem->name));
370 }
371
getElementType()372 xmlElementType XmlParser::getElementType()
373 {
374 if(!root_elem)
375 throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
376
377 return curr_elem->type;
378 }
379
getCurrentElement()380 const xmlNode *XmlParser::getCurrentElement()
381 {
382 return curr_elem;
383 }
384
getElementAttributes(attribs_map & attributes)385 void XmlParser::getElementAttributes(attribs_map &attributes)
386 {
387 xmlAttr *elem_attribs=nullptr;
388 QString attrib, value;
389
390 if(!root_elem)
391 throw Exception(ErrorCode::OprNotAllocatedElementTree,__PRETTY_FUNCTION__,__FILE__,__LINE__);
392
393 //Always clears the passed attributes maps
394 attributes.clear();
395
396 //Gets the references to the element properties
397 elem_attribs=curr_elem->properties;
398
399 while(elem_attribs)
400 {
401 //Gets the attribute name
402 attrib=QString(reinterpret_cast<const char *>(elem_attribs->name));
403 //Gets the attribute value
404 value=QString(reinterpret_cast<char *>(elem_attribs->children->content));
405
406 /* Assigns to the attribute map in the index specified by the
407 attribute name the obtained value */
408 attributes[attrib]=value;
409
410 //Step to the next element attribute
411 elem_attribs=elem_attribs->next;
412 }
413 }
414
getLoadedFilename()415 QString XmlParser::getLoadedFilename()
416 {
417 return xml_doc_filename;
418 }
419
getXMLBuffer()420 QString XmlParser::getXMLBuffer()
421 {
422 return xml_buffer;
423 }
424
getCurrentBufferLine()425 int XmlParser::getCurrentBufferLine()
426 {
427 if(curr_elem)
428 return curr_line;
429 else
430 return 0;
431 }
432
getBufferLineCount()433 int XmlParser::getBufferLineCount()
434 {
435 if(xml_doc)
436 {
437 /* To get the very last line of the document is necessary to call
438 the last element of the last because xml_doc->last->line stores the
439 last line of the root element.
440
441 NOTE: Due to XML2 implementation big line numbers are stored in the psvi
442 attribute so we need to convert the void* to char and convert it back to integer value */
443 if(xml_doc->last->last->line == 65535 && xml_doc->last->last->psvi != nullptr)
444 {
445 char hex_value[10] = "";
446 sprintf(hex_value, "%p", xml_doc->last->last->psvi);
447 return static_cast<int>(strtol(hex_value, nullptr, 16));
448 }
449
450 return xml_doc->last->last->line;
451 }
452 else
453 return 0;
454 }
455
convertCharsToXMLEntities(QString buf)456 QString XmlParser::convertCharsToXMLEntities(QString buf)
457 {
458 QTextStream ts(&buf);
459 QRegExp attr_regexp=QRegExp("([a-z]|\\-)+( )*(=\\\")"),
460 attr_end_regexp=QRegExp("(\\\")(( )|(\\t)|(\\n)|((\\/\\>)|(\\>)))");
461 int attr_start=0, attr_end=0, count=0, cdata_start = -1,
462 cdata_end = -1, start = -1, end = -1, pos = 0;
463 QString value, fmt_buf, lin;
464
465 while(!ts.atEnd())
466 {
467 lin = ts.readLine();
468 lin += "\n";
469
470 // Ignoring the xml header
471 if(lin.indexOf("<?xml") >= 0)
472 {
473 fmt_buf += lin;
474 continue;
475 }
476
477 // Checking if the current line has at least one attribute in form (attr="value")
478 attr_start = -1;
479 attr_start = attr_regexp.indexIn(lin);
480
481 if(attr_start >= 0)
482 {
483 /* Checking the presence of <![[CDATA ]]> tag in the current line.
484 * In case of finding it we need to perform specific operation to avoid
485 * replacing contents within that tag */
486 cdata_start = lin.indexOf(CdataStart);
487 cdata_end = lin.indexOf(CdataEnd);
488 start = min<int>(cdata_start, cdata_end);
489 end = max<int>(cdata_start, cdata_end);
490
491 do
492 {
493 attr_end = attr_end_regexp.indexIn(lin, attr_start + attr_regexp.matchedLength());
494
495 if(attr_start >= 0 && attr_end >= 0 &&
496 //CDATA absent in the current line
497 ((start < 0 && end < 0) ||
498 //The attribute is at left of the CDATA tag
499 (start >= 0 && attr_start < start && attr_end < start) ||
500 //The attribute is at right of the CDATA tag
501 (end >= 0 && attr_start > end && attr_end > end)))
502 {
503 // Calculates the initial position where the value to be retrived is (in that case rigth after attrib=")
504 pos = attr_start + attr_regexp.matchedLength();
505 count = attr_end - pos;
506 value = lin.mid(pos, count);
507 }
508 else
509 break;
510
511 /* If the extracted value has one of the expected special chars
512 * in order to perform the replacemnt to xml entities */
513 if(value.contains(QRegExp("(&|\\<|\\>|\")")))
514 {
515 if(!value.contains(CharQuot) && !value.contains(CharLt) &&
516 !value.contains(CharGt) && !value.contains(CharAmp) &&
517 !value.contains(CharApos) && value.contains('&'))
518 value.replace('&', CharAmp);
519
520 value.replace('"', CharQuot);
521 value.replace('<', CharLt);
522 value.replace('>', CharGt);
523
524 //Puts in the original XML definition the modified string
525 lin.replace(pos, count, value);
526 }
527
528 // Moving the position to the next attribute in the line (if existent)
529 pos += value.length() + 1;
530 attr_start = attr_regexp.indexIn(lin, pos);
531 value.clear();
532 }
533 while(attr_start >=0 && attr_start < lin.size());
534 }
535
536 fmt_buf += lin;
537 lin.clear();
538 }
539
540 return fmt_buf;
541 }
542