1 /***************************************************************************
2     pseudoDtd.cpp
3     copyright           : (C) 2001-2002 by Daniel Naber
4     email               : daniel.naber@t-online.de
5  ***************************************************************************/
6 
7 /***************************************************************************
8  This program is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License
10  as published by the Free Software Foundation; either version 2
11  of the License, or ( at your option ) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
21  ***************************************************************************/
22 
23 #include "pseudo_dtd.h"
24 
25 #include <QRegExp>
26 
27 #include <KLocalizedString>
28 #include <KMessageBox>
29 
PseudoDTD()30 PseudoDTD::PseudoDTD()
31 {
32     // "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4:
33     m_sgmlSupport = true; // TODO: make this an run-time option ( maybe automatically set )
34 }
35 
~PseudoDTD()36 PseudoDTD::~PseudoDTD()
37 {
38 }
39 
analyzeDTD(QString & metaDtdUrl,QString & metaDtd)40 void PseudoDTD::analyzeDTD(QString &metaDtdUrl, QString &metaDtd)
41 {
42     QDomDocument doc(QStringLiteral("dtdIn_xml"));
43     if (!doc.setContent(metaDtd)) {
44         KMessageBox::error(nullptr,
45                            i18n("The file '%1' could not be parsed. "
46                                 "Please check that the file is well-formed XML.",
47                                 metaDtdUrl),
48                            i18n("XML Plugin Error"));
49         return;
50     }
51 
52     if (doc.doctype().name() != QLatin1String("dtd")) {
53         KMessageBox::error(nullptr,
54                            i18n("The file '%1' is not in the expected format. "
55                                 "Please check that the file is of this type:\n"
56                                 "-//Norman Walsh//DTD DTDParse V2.0//EN\n"
57                                 "You can produce such files with dtdparse. "
58                                 "See the Kate Plugin documentation for more information.",
59                                 metaDtdUrl),
60                            i18n("XML Plugin Error"));
61         return;
62     }
63 
64     uint listLength = 0;
65     listLength += doc.elementsByTagName(QStringLiteral("entity")).count();
66     listLength += doc.elementsByTagName(QStringLiteral("element")).count();
67     // count this twice, as it will be iterated twice ( TODO: optimize that? ):
68     listLength += doc.elementsByTagName(QStringLiteral("attlist")).count() * 2;
69 
70     QProgressDialog progress(i18n("Analyzing meta DTD..."), i18n("Cancel"), 0, listLength);
71     progress.setMinimumDuration(400);
72     progress.setValue(0);
73 
74     // Get information from meta DTD and put it in Qt data structures for fast access:
75     if (!parseEntities(&doc, &progress)) {
76         return;
77     }
78 
79     if (!parseElements(&doc, &progress)) {
80         return;
81     }
82 
83     if (!parseAttributes(&doc, &progress)) {
84         return;
85     }
86 
87     if (!parseAttributeValues(&doc, &progress)) {
88         return;
89     }
90 
91     progress.setValue(listLength); // just to make sure the dialog disappears
92 }
93 
94 // ========================================================================
95 // DOM stuff:
96 
97 /**
98  * Iterate through the XML to get a mapping which sub-elements are allowed for
99  * all elements.
100  */
parseElements(QDomDocument * doc,QProgressDialog * progress)101 bool PseudoDTD::parseElements(QDomDocument *doc, QProgressDialog *progress)
102 {
103     m_elementsList.clear();
104     // We only display a list, i.e. we pretend that the content model is just
105     // a set, so we use a map. This is necessary e.g. for xhtml 1.0's head element,
106     // which would otherwise display some elements twice.
107     QMap<QString, bool> subelementList; // the bool is not used
108 
109     QDomNodeList list = doc->elementsByTagName(QStringLiteral("element"));
110     uint listLength = list.count(); // speedup (really! )
111 
112     for (uint i = 0; i < listLength; i++) {
113         if (progress->wasCanceled()) {
114             return false;
115         }
116 
117         progress->setValue(progress->value() + 1);
118         // FIXME!:
119         // qApp->processEvents();
120 
121         subelementList.clear();
122         QDomNode node = list.item(i);
123         QDomElement elem = node.toElement();
124 
125         if (!elem.isNull()) {
126             // Enter the expanded content model, which may also include stuff not allowed.
127             // We do not care if it's a <sequence-group> or whatever.
128             QDomNodeList contentModelList = elem.elementsByTagName(QStringLiteral("content-model-expanded"));
129             QDomNode contentModelNode = contentModelList.item(0);
130             QDomElement contentModelElem = contentModelNode.toElement();
131             if (!contentModelElem.isNull()) {
132                 // check for <pcdata/>:
133                 QDomNodeList pcdataList = contentModelElem.elementsByTagName(QStringLiteral("pcdata"));
134 
135                 // check for other sub elements:
136                 QDomNodeList subList = contentModelElem.elementsByTagName(QStringLiteral("element-name"));
137                 uint subListLength = subList.count();
138                 for (uint l = 0; l < subListLength; l++) {
139                     QDomNode subNode = subList.item(l);
140                     QDomElement subElem = subNode.toElement();
141                     if (!subElem.isNull()) {
142                         subelementList[subElem.attribute(QStringLiteral("name"))] = true;
143                     }
144                 }
145 
146                 // anders: check if this is an EMPTY element, and put "__EMPTY" in the
147                 // sub list, so that we can insert tags in empty form if required.
148                 QDomNodeList emptyList = elem.elementsByTagName(QStringLiteral("empty"));
149                 if (emptyList.count()) {
150                     subelementList[QStringLiteral("__EMPTY")] = true;
151                 }
152             }
153 
154             // Now remove the elements not allowed (e.g. <a> is explicitly not allowed in <a>
155             // in the HTML 4.01 Strict DTD):
156             QDomNodeList exclusionsList = elem.elementsByTagName(QStringLiteral("exclusions"));
157             if (exclusionsList.length() > 0) {
158                 // sometimes there are no exclusions ( e.g. in XML DTDs there are never exclusions )
159                 QDomNode exclusionsNode = exclusionsList.item(0);
160                 QDomElement exclusionsElem = exclusionsNode.toElement();
161                 if (!exclusionsElem.isNull()) {
162                     QDomNodeList subList = exclusionsElem.elementsByTagName(QStringLiteral("element-name"));
163                     uint subListLength = subList.count();
164                     for (uint l = 0; l < subListLength; l++) {
165                         QDomNode subNode = subList.item(l);
166                         QDomElement subElem = subNode.toElement();
167                         if (!subElem.isNull()) {
168                             QMap<QString, bool>::Iterator it = subelementList.find(subElem.attribute(QStringLiteral("name")));
169                             if (it != subelementList.end()) {
170                                 subelementList.erase(it);
171                             }
172                         }
173                     }
174                 }
175             }
176 
177             // turn the map into a list:
178             QStringList subelementListTmp;
179             QMap<QString, bool>::Iterator it;
180             for (it = subelementList.begin(); it != subelementList.end(); ++it) {
181                 subelementListTmp.append(it.key());
182             }
183 
184             m_elementsList.insert(elem.attribute(QStringLiteral("name")), subelementListTmp);
185         }
186 
187     } // end iteration over all <element> nodes
188     return true;
189 }
190 
191 /**
192  * Check which elements are allowed inside a parent element. This returns
193  * a list of allowed elements, but it doesn't care about order or if only a certain
194  * number of occurrences is allowed.
195  */
allowedElements(const QString & parentElement)196 QStringList PseudoDTD::allowedElements(const QString &parentElement)
197 {
198     if (m_sgmlSupport) {
199         // find the matching element, ignoring case:
200         QMap<QString, QStringList>::Iterator it;
201         for (it = m_elementsList.begin(); it != m_elementsList.end(); ++it) {
202             if (it.key().compare(parentElement, Qt::CaseInsensitive) == 0) {
203                 return it.value();
204             }
205         }
206     } else if (m_elementsList.contains(parentElement)) {
207         return m_elementsList[parentElement];
208     }
209 
210     return QStringList();
211 }
212 
213 /**
214  * Iterate through the XML to get a mapping which attributes are allowed inside
215  * all elements.
216  */
parseAttributes(QDomDocument * doc,QProgressDialog * progress)217 bool PseudoDTD::parseAttributes(QDomDocument *doc, QProgressDialog *progress)
218 {
219     m_attributesList.clear();
220     //   QStringList allowedAttributes;
221     QDomNodeList list = doc->elementsByTagName(QStringLiteral("attlist"));
222     uint listLength = list.count();
223 
224     for (uint i = 0; i < listLength; i++) {
225         if (progress->wasCanceled()) {
226             return false;
227         }
228 
229         progress->setValue(progress->value() + 1);
230         // FIXME!!
231         // qApp->processEvents();
232 
233         ElementAttributes attrs;
234         QDomNode node = list.item(i);
235         QDomElement elem = node.toElement();
236         if (!elem.isNull()) {
237             QDomNodeList attributeList = elem.elementsByTagName(QStringLiteral("attribute"));
238             uint attributeListLength = attributeList.count();
239             for (uint l = 0; l < attributeListLength; l++) {
240                 QDomNode attributeNode = attributeList.item(l);
241                 QDomElement attributeElem = attributeNode.toElement();
242 
243                 if (!attributeElem.isNull()) {
244                     if (attributeElem.attribute(QStringLiteral("type")) == QLatin1String("#REQUIRED")) {
245                         attrs.requiredAttributes.append(attributeElem.attribute(QStringLiteral("name")));
246                     } else {
247                         attrs.optionalAttributes.append(attributeElem.attribute(QStringLiteral("name")));
248                     }
249                 }
250             }
251             m_attributesList.insert(elem.attribute(QStringLiteral("name")), attrs);
252         }
253     }
254 
255     return true;
256 }
257 
258 /** Check which attributes are allowed for an element.
259  */
allowedAttributes(const QString & element)260 QStringList PseudoDTD::allowedAttributes(const QString &element)
261 {
262     if (m_sgmlSupport) {
263         // find the matching element, ignoring case:
264         QMap<QString, ElementAttributes>::Iterator it;
265         for (it = m_attributesList.begin(); it != m_attributesList.end(); ++it) {
266             if (it.key().compare(element, Qt::CaseInsensitive) == 0) {
267                 return it.value().optionalAttributes + it.value().requiredAttributes;
268             }
269         }
270     } else if (m_attributesList.contains(element)) {
271         return m_attributesList[element].optionalAttributes + m_attributesList[element].requiredAttributes;
272     }
273 
274     return QStringList();
275 }
276 
requiredAttributes(const QString & element) const277 QStringList PseudoDTD::requiredAttributes(const QString &element) const
278 {
279     if (m_sgmlSupport) {
280         QMap<QString, ElementAttributes>::ConstIterator it;
281         for (it = m_attributesList.begin(); it != m_attributesList.end(); ++it) {
282             if (it.key().compare(element, Qt::CaseInsensitive) == 0) {
283                 return it.value().requiredAttributes;
284             }
285         }
286     } else if (m_attributesList.contains(element)) {
287         return m_attributesList[element].requiredAttributes;
288     }
289 
290     return QStringList();
291 }
292 
293 /**
294  * Iterate through the XML to get a mapping which attribute values are allowed
295  * for all attributes inside all elements.
296  */
parseAttributeValues(QDomDocument * doc,QProgressDialog * progress)297 bool PseudoDTD::parseAttributeValues(QDomDocument *doc, QProgressDialog *progress)
298 {
299     m_attributevaluesList.clear(); // 1 element : n possible attributes
300     QMap<QString, QStringList> attributevaluesTmp; // 1 attribute : n possible values
301     QDomNodeList list = doc->elementsByTagName(QStringLiteral("attlist"));
302     uint listLength = list.count();
303 
304     for (uint i = 0; i < listLength; i++) {
305         if (progress->wasCanceled()) {
306             return false;
307         }
308 
309         progress->setValue(progress->value() + 1);
310         // FIXME!
311         // qApp->processEvents();
312 
313         attributevaluesTmp.clear();
314         QDomNode node = list.item(i);
315         QDomElement elem = node.toElement();
316         if (!elem.isNull()) {
317             // Enter the list of <attribute>:
318             QDomNodeList attributeList = elem.elementsByTagName(QStringLiteral("attribute"));
319             uint attributeListLength = attributeList.count();
320             for (uint l = 0; l < attributeListLength; l++) {
321                 QDomNode attributeNode = attributeList.item(l);
322                 QDomElement attributeElem = attributeNode.toElement();
323                 if (!attributeElem.isNull()) {
324                     QString value = attributeElem.attribute(QStringLiteral("value"));
325                     attributevaluesTmp.insert(attributeElem.attribute(QStringLiteral("name")), value.split(QChar(' ')));
326                 }
327             }
328             m_attributevaluesList.insert(elem.attribute(QStringLiteral("name")), attributevaluesTmp);
329         }
330     }
331     return true;
332 }
333 
334 /**
335  * Check which attributes values are allowed for an attribute in an element
336  * (the element is necessary because e.g. "href" inside <a> could be different
337  * to an "href" inside <link>):
338  */
attributeValues(const QString & element,const QString & attribute)339 QStringList PseudoDTD::attributeValues(const QString &element, const QString &attribute)
340 {
341     // Direct access would be faster than iteration of course but not always correct,
342     // because we need to be case-insensitive.
343     if (m_sgmlSupport) {
344         // first find the matching element, ignoring case:
345         QMap<QString, QMap<QString, QStringList>>::Iterator it;
346         for (it = m_attributevaluesList.begin(); it != m_attributevaluesList.end(); ++it) {
347             if (it.key().compare(element, Qt::CaseInsensitive) == 0) {
348                 QMap<QString, QStringList> attrVals = it.value();
349                 QMap<QString, QStringList>::Iterator itV;
350                 // then find the matching attribute for that element, ignoring case:
351                 for (itV = attrVals.begin(); itV != attrVals.end(); ++itV) {
352                     if (itV.key().compare(attribute, Qt::CaseInsensitive) == 0) {
353                         return (itV.value());
354                     }
355                 }
356             }
357         }
358     } else if (m_attributevaluesList.contains(element)) {
359         QMap<QString, QStringList> attrVals = m_attributevaluesList[element];
360         if (attrVals.contains(attribute)) {
361             return attrVals[attribute];
362         }
363     }
364 
365     // no predefined values available:
366     return QStringList();
367 }
368 
369 /**
370  * Iterate through the XML to get a mapping of all entity names and their expanded
371  * version, e.g. nbsp => &#160;. Parameter entities are ignored.
372  */
parseEntities(QDomDocument * doc,QProgressDialog * progress)373 bool PseudoDTD::parseEntities(QDomDocument *doc, QProgressDialog *progress)
374 {
375     m_entityList.clear();
376     QDomNodeList list = doc->elementsByTagName(QStringLiteral("entity"));
377     uint listLength = list.count();
378 
379     for (uint i = 0; i < listLength; i++) {
380         if (progress->wasCanceled()) {
381             return false;
382         }
383 
384         progress->setValue(progress->value() + 1);
385         // FIXME!!
386         // qApp->processEvents();
387         QDomNode node = list.item(i);
388         QDomElement elem = node.toElement();
389         if (!elem.isNull() && elem.attribute(QStringLiteral("type")) != QLatin1String("param")) {
390             // TODO: what's cdata <-> gen ?
391             QDomNodeList expandedList = elem.elementsByTagName(QStringLiteral("text-expanded"));
392             QDomNode expandedNode = expandedList.item(0);
393             QDomElement expandedElem = expandedNode.toElement();
394             if (!expandedElem.isNull()) {
395                 QString exp = expandedElem.text();
396                 // TODO: support more than one &#...; in the expanded text
397                 /* TODO include do this when the unicode font problem is solved:
398                 if( exp.contains(QRegularExpression("^&#x[a-zA-Z0-9]+;$")) ) {
399                 // hexadecimal numbers, e.g. "&#x236;"
400                 uint end = exp.find( ";" );
401                 exp = exp.mid( 3, end-3 );
402                 exp = QChar();
403                 } else if( exp.contains(QRegularExpression("^&#[0-9]+;$")) ) {
404                 // decimal numbers, e.g. "&#236;"
405                 uint end = exp.find( ";" );
406                 exp = exp.mid( 2, end-2 );
407                 exp = QChar( exp.toInt() );
408                 }
409                 */
410                 m_entityList.insert(elem.attribute(QStringLiteral("name")), exp);
411             } else {
412                 m_entityList.insert(elem.attribute(QStringLiteral("name")), QString());
413             }
414         }
415     }
416     return true;
417 }
418 
419 /**
420  * Get a list of all ( non-parameter ) entities that start with a certain string.
421  */
entities(const QString & start)422 QStringList PseudoDTD::entities(const QString &start)
423 {
424     QStringList entities;
425     QMap<QString, QString>::Iterator it;
426     for (it = m_entityList.begin(); it != m_entityList.end(); ++it) {
427         if ((*it).startsWith(start)) {
428             const QString &str = it.key();
429             /* TODO: show entities as unicode character
430             if( !it.data().isEmpty() ) {
431             //str += " -- " + it.data();
432             QRegExp re( "&#(\\d+);" );
433             if( re.search(it.data()) != -1 ) {
434             uint ch = re.cap( 1).toUInt();
435             str += " -- " + QChar( ch).decomposition();
436             }
437             //qDebug() << "#" << it.data();
438             }
439             */
440             entities.append(str);
441             // TODO: later use a table view
442         }
443     }
444     return entities;
445 }
446 
447 // kate: space-indent on; indent-width 4; replace-tabs on; mixed-indent off;
448