1 /***************************************************************************
2 pseudoDtd.cpp
3 copyright : (C) 2001-2002 by Daniel Naber
4 email : daniel.naber@t-online.de
5 ***************************************************************************/
6
7 /***************************************************************************
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2
11 of the License, or ( at your option ) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 ***************************************************************************/
22
23 #include "pseudo_dtd.h"
24
25 #include <QRegExp>
26
27 #include <KLocalizedString>
28 #include <KMessageBox>
29
PseudoDTD()30 PseudoDTD::PseudoDTD()
31 {
32 // "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4:
33 m_sgmlSupport = true; // TODO: make this an run-time option ( maybe automatically set )
34 }
35
~PseudoDTD()36 PseudoDTD::~PseudoDTD()
37 {
38 }
39
analyzeDTD(QString & metaDtdUrl,QString & metaDtd)40 void PseudoDTD::analyzeDTD(QString &metaDtdUrl, QString &metaDtd)
41 {
42 QDomDocument doc(QStringLiteral("dtdIn_xml"));
43 if (!doc.setContent(metaDtd)) {
44 KMessageBox::error(nullptr,
45 i18n("The file '%1' could not be parsed. "
46 "Please check that the file is well-formed XML.",
47 metaDtdUrl),
48 i18n("XML Plugin Error"));
49 return;
50 }
51
52 if (doc.doctype().name() != QLatin1String("dtd")) {
53 KMessageBox::error(nullptr,
54 i18n("The file '%1' is not in the expected format. "
55 "Please check that the file is of this type:\n"
56 "-//Norman Walsh//DTD DTDParse V2.0//EN\n"
57 "You can produce such files with dtdparse. "
58 "See the Kate Plugin documentation for more information.",
59 metaDtdUrl),
60 i18n("XML Plugin Error"));
61 return;
62 }
63
64 uint listLength = 0;
65 listLength += doc.elementsByTagName(QStringLiteral("entity")).count();
66 listLength += doc.elementsByTagName(QStringLiteral("element")).count();
67 // count this twice, as it will be iterated twice ( TODO: optimize that? ):
68 listLength += doc.elementsByTagName(QStringLiteral("attlist")).count() * 2;
69
70 QProgressDialog progress(i18n("Analyzing meta DTD..."), i18n("Cancel"), 0, listLength);
71 progress.setMinimumDuration(400);
72 progress.setValue(0);
73
74 // Get information from meta DTD and put it in Qt data structures for fast access:
75 if (!parseEntities(&doc, &progress)) {
76 return;
77 }
78
79 if (!parseElements(&doc, &progress)) {
80 return;
81 }
82
83 if (!parseAttributes(&doc, &progress)) {
84 return;
85 }
86
87 if (!parseAttributeValues(&doc, &progress)) {
88 return;
89 }
90
91 progress.setValue(listLength); // just to make sure the dialog disappears
92 }
93
94 // ========================================================================
95 // DOM stuff:
96
97 /**
98 * Iterate through the XML to get a mapping which sub-elements are allowed for
99 * all elements.
100 */
parseElements(QDomDocument * doc,QProgressDialog * progress)101 bool PseudoDTD::parseElements(QDomDocument *doc, QProgressDialog *progress)
102 {
103 m_elementsList.clear();
104 // We only display a list, i.e. we pretend that the content model is just
105 // a set, so we use a map. This is necessary e.g. for xhtml 1.0's head element,
106 // which would otherwise display some elements twice.
107 QMap<QString, bool> subelementList; // the bool is not used
108
109 QDomNodeList list = doc->elementsByTagName(QStringLiteral("element"));
110 uint listLength = list.count(); // speedup (really! )
111
112 for (uint i = 0; i < listLength; i++) {
113 if (progress->wasCanceled()) {
114 return false;
115 }
116
117 progress->setValue(progress->value() + 1);
118 // FIXME!:
119 // qApp->processEvents();
120
121 subelementList.clear();
122 QDomNode node = list.item(i);
123 QDomElement elem = node.toElement();
124
125 if (!elem.isNull()) {
126 // Enter the expanded content model, which may also include stuff not allowed.
127 // We do not care if it's a <sequence-group> or whatever.
128 QDomNodeList contentModelList = elem.elementsByTagName(QStringLiteral("content-model-expanded"));
129 QDomNode contentModelNode = contentModelList.item(0);
130 QDomElement contentModelElem = contentModelNode.toElement();
131 if (!contentModelElem.isNull()) {
132 // check for <pcdata/>:
133 QDomNodeList pcdataList = contentModelElem.elementsByTagName(QStringLiteral("pcdata"));
134
135 // check for other sub elements:
136 QDomNodeList subList = contentModelElem.elementsByTagName(QStringLiteral("element-name"));
137 uint subListLength = subList.count();
138 for (uint l = 0; l < subListLength; l++) {
139 QDomNode subNode = subList.item(l);
140 QDomElement subElem = subNode.toElement();
141 if (!subElem.isNull()) {
142 subelementList[subElem.attribute(QStringLiteral("name"))] = true;
143 }
144 }
145
146 // anders: check if this is an EMPTY element, and put "__EMPTY" in the
147 // sub list, so that we can insert tags in empty form if required.
148 QDomNodeList emptyList = elem.elementsByTagName(QStringLiteral("empty"));
149 if (emptyList.count()) {
150 subelementList[QStringLiteral("__EMPTY")] = true;
151 }
152 }
153
154 // Now remove the elements not allowed (e.g. <a> is explicitly not allowed in <a>
155 // in the HTML 4.01 Strict DTD):
156 QDomNodeList exclusionsList = elem.elementsByTagName(QStringLiteral("exclusions"));
157 if (exclusionsList.length() > 0) {
158 // sometimes there are no exclusions ( e.g. in XML DTDs there are never exclusions )
159 QDomNode exclusionsNode = exclusionsList.item(0);
160 QDomElement exclusionsElem = exclusionsNode.toElement();
161 if (!exclusionsElem.isNull()) {
162 QDomNodeList subList = exclusionsElem.elementsByTagName(QStringLiteral("element-name"));
163 uint subListLength = subList.count();
164 for (uint l = 0; l < subListLength; l++) {
165 QDomNode subNode = subList.item(l);
166 QDomElement subElem = subNode.toElement();
167 if (!subElem.isNull()) {
168 QMap<QString, bool>::Iterator it = subelementList.find(subElem.attribute(QStringLiteral("name")));
169 if (it != subelementList.end()) {
170 subelementList.erase(it);
171 }
172 }
173 }
174 }
175 }
176
177 // turn the map into a list:
178 QStringList subelementListTmp;
179 QMap<QString, bool>::Iterator it;
180 for (it = subelementList.begin(); it != subelementList.end(); ++it) {
181 subelementListTmp.append(it.key());
182 }
183
184 m_elementsList.insert(elem.attribute(QStringLiteral("name")), subelementListTmp);
185 }
186
187 } // end iteration over all <element> nodes
188 return true;
189 }
190
191 /**
192 * Check which elements are allowed inside a parent element. This returns
193 * a list of allowed elements, but it doesn't care about order or if only a certain
194 * number of occurrences is allowed.
195 */
allowedElements(const QString & parentElement)196 QStringList PseudoDTD::allowedElements(const QString &parentElement)
197 {
198 if (m_sgmlSupport) {
199 // find the matching element, ignoring case:
200 QMap<QString, QStringList>::Iterator it;
201 for (it = m_elementsList.begin(); it != m_elementsList.end(); ++it) {
202 if (it.key().compare(parentElement, Qt::CaseInsensitive) == 0) {
203 return it.value();
204 }
205 }
206 } else if (m_elementsList.contains(parentElement)) {
207 return m_elementsList[parentElement];
208 }
209
210 return QStringList();
211 }
212
213 /**
214 * Iterate through the XML to get a mapping which attributes are allowed inside
215 * all elements.
216 */
parseAttributes(QDomDocument * doc,QProgressDialog * progress)217 bool PseudoDTD::parseAttributes(QDomDocument *doc, QProgressDialog *progress)
218 {
219 m_attributesList.clear();
220 // QStringList allowedAttributes;
221 QDomNodeList list = doc->elementsByTagName(QStringLiteral("attlist"));
222 uint listLength = list.count();
223
224 for (uint i = 0; i < listLength; i++) {
225 if (progress->wasCanceled()) {
226 return false;
227 }
228
229 progress->setValue(progress->value() + 1);
230 // FIXME!!
231 // qApp->processEvents();
232
233 ElementAttributes attrs;
234 QDomNode node = list.item(i);
235 QDomElement elem = node.toElement();
236 if (!elem.isNull()) {
237 QDomNodeList attributeList = elem.elementsByTagName(QStringLiteral("attribute"));
238 uint attributeListLength = attributeList.count();
239 for (uint l = 0; l < attributeListLength; l++) {
240 QDomNode attributeNode = attributeList.item(l);
241 QDomElement attributeElem = attributeNode.toElement();
242
243 if (!attributeElem.isNull()) {
244 if (attributeElem.attribute(QStringLiteral("type")) == QLatin1String("#REQUIRED")) {
245 attrs.requiredAttributes.append(attributeElem.attribute(QStringLiteral("name")));
246 } else {
247 attrs.optionalAttributes.append(attributeElem.attribute(QStringLiteral("name")));
248 }
249 }
250 }
251 m_attributesList.insert(elem.attribute(QStringLiteral("name")), attrs);
252 }
253 }
254
255 return true;
256 }
257
258 /** Check which attributes are allowed for an element.
259 */
allowedAttributes(const QString & element)260 QStringList PseudoDTD::allowedAttributes(const QString &element)
261 {
262 if (m_sgmlSupport) {
263 // find the matching element, ignoring case:
264 QMap<QString, ElementAttributes>::Iterator it;
265 for (it = m_attributesList.begin(); it != m_attributesList.end(); ++it) {
266 if (it.key().compare(element, Qt::CaseInsensitive) == 0) {
267 return it.value().optionalAttributes + it.value().requiredAttributes;
268 }
269 }
270 } else if (m_attributesList.contains(element)) {
271 return m_attributesList[element].optionalAttributes + m_attributesList[element].requiredAttributes;
272 }
273
274 return QStringList();
275 }
276
requiredAttributes(const QString & element) const277 QStringList PseudoDTD::requiredAttributes(const QString &element) const
278 {
279 if (m_sgmlSupport) {
280 QMap<QString, ElementAttributes>::ConstIterator it;
281 for (it = m_attributesList.begin(); it != m_attributesList.end(); ++it) {
282 if (it.key().compare(element, Qt::CaseInsensitive) == 0) {
283 return it.value().requiredAttributes;
284 }
285 }
286 } else if (m_attributesList.contains(element)) {
287 return m_attributesList[element].requiredAttributes;
288 }
289
290 return QStringList();
291 }
292
293 /**
294 * Iterate through the XML to get a mapping which attribute values are allowed
295 * for all attributes inside all elements.
296 */
parseAttributeValues(QDomDocument * doc,QProgressDialog * progress)297 bool PseudoDTD::parseAttributeValues(QDomDocument *doc, QProgressDialog *progress)
298 {
299 m_attributevaluesList.clear(); // 1 element : n possible attributes
300 QMap<QString, QStringList> attributevaluesTmp; // 1 attribute : n possible values
301 QDomNodeList list = doc->elementsByTagName(QStringLiteral("attlist"));
302 uint listLength = list.count();
303
304 for (uint i = 0; i < listLength; i++) {
305 if (progress->wasCanceled()) {
306 return false;
307 }
308
309 progress->setValue(progress->value() + 1);
310 // FIXME!
311 // qApp->processEvents();
312
313 attributevaluesTmp.clear();
314 QDomNode node = list.item(i);
315 QDomElement elem = node.toElement();
316 if (!elem.isNull()) {
317 // Enter the list of <attribute>:
318 QDomNodeList attributeList = elem.elementsByTagName(QStringLiteral("attribute"));
319 uint attributeListLength = attributeList.count();
320 for (uint l = 0; l < attributeListLength; l++) {
321 QDomNode attributeNode = attributeList.item(l);
322 QDomElement attributeElem = attributeNode.toElement();
323 if (!attributeElem.isNull()) {
324 QString value = attributeElem.attribute(QStringLiteral("value"));
325 attributevaluesTmp.insert(attributeElem.attribute(QStringLiteral("name")), value.split(QChar(' ')));
326 }
327 }
328 m_attributevaluesList.insert(elem.attribute(QStringLiteral("name")), attributevaluesTmp);
329 }
330 }
331 return true;
332 }
333
334 /**
335 * Check which attributes values are allowed for an attribute in an element
336 * (the element is necessary because e.g. "href" inside <a> could be different
337 * to an "href" inside <link>):
338 */
attributeValues(const QString & element,const QString & attribute)339 QStringList PseudoDTD::attributeValues(const QString &element, const QString &attribute)
340 {
341 // Direct access would be faster than iteration of course but not always correct,
342 // because we need to be case-insensitive.
343 if (m_sgmlSupport) {
344 // first find the matching element, ignoring case:
345 QMap<QString, QMap<QString, QStringList>>::Iterator it;
346 for (it = m_attributevaluesList.begin(); it != m_attributevaluesList.end(); ++it) {
347 if (it.key().compare(element, Qt::CaseInsensitive) == 0) {
348 QMap<QString, QStringList> attrVals = it.value();
349 QMap<QString, QStringList>::Iterator itV;
350 // then find the matching attribute for that element, ignoring case:
351 for (itV = attrVals.begin(); itV != attrVals.end(); ++itV) {
352 if (itV.key().compare(attribute, Qt::CaseInsensitive) == 0) {
353 return (itV.value());
354 }
355 }
356 }
357 }
358 } else if (m_attributevaluesList.contains(element)) {
359 QMap<QString, QStringList> attrVals = m_attributevaluesList[element];
360 if (attrVals.contains(attribute)) {
361 return attrVals[attribute];
362 }
363 }
364
365 // no predefined values available:
366 return QStringList();
367 }
368
369 /**
370 * Iterate through the XML to get a mapping of all entity names and their expanded
371 * version, e.g. nbsp =>  . Parameter entities are ignored.
372 */
parseEntities(QDomDocument * doc,QProgressDialog * progress)373 bool PseudoDTD::parseEntities(QDomDocument *doc, QProgressDialog *progress)
374 {
375 m_entityList.clear();
376 QDomNodeList list = doc->elementsByTagName(QStringLiteral("entity"));
377 uint listLength = list.count();
378
379 for (uint i = 0; i < listLength; i++) {
380 if (progress->wasCanceled()) {
381 return false;
382 }
383
384 progress->setValue(progress->value() + 1);
385 // FIXME!!
386 // qApp->processEvents();
387 QDomNode node = list.item(i);
388 QDomElement elem = node.toElement();
389 if (!elem.isNull() && elem.attribute(QStringLiteral("type")) != QLatin1String("param")) {
390 // TODO: what's cdata <-> gen ?
391 QDomNodeList expandedList = elem.elementsByTagName(QStringLiteral("text-expanded"));
392 QDomNode expandedNode = expandedList.item(0);
393 QDomElement expandedElem = expandedNode.toElement();
394 if (!expandedElem.isNull()) {
395 QString exp = expandedElem.text();
396 // TODO: support more than one &#...; in the expanded text
397 /* TODO include do this when the unicode font problem is solved:
398 if( exp.contains(QRegularExpression("^&#x[a-zA-Z0-9]+;$")) ) {
399 // hexadecimal numbers, e.g. "ȶ"
400 uint end = exp.find( ";" );
401 exp = exp.mid( 3, end-3 );
402 exp = QChar();
403 } else if( exp.contains(QRegularExpression("^&#[0-9]+;$")) ) {
404 // decimal numbers, e.g. "ì"
405 uint end = exp.find( ";" );
406 exp = exp.mid( 2, end-2 );
407 exp = QChar( exp.toInt() );
408 }
409 */
410 m_entityList.insert(elem.attribute(QStringLiteral("name")), exp);
411 } else {
412 m_entityList.insert(elem.attribute(QStringLiteral("name")), QString());
413 }
414 }
415 }
416 return true;
417 }
418
419 /**
420 * Get a list of all ( non-parameter ) entities that start with a certain string.
421 */
entities(const QString & start)422 QStringList PseudoDTD::entities(const QString &start)
423 {
424 QStringList entities;
425 QMap<QString, QString>::Iterator it;
426 for (it = m_entityList.begin(); it != m_entityList.end(); ++it) {
427 if ((*it).startsWith(start)) {
428 const QString &str = it.key();
429 /* TODO: show entities as unicode character
430 if( !it.data().isEmpty() ) {
431 //str += " -- " + it.data();
432 QRegExp re( "&#(\\d+);" );
433 if( re.search(it.data()) != -1 ) {
434 uint ch = re.cap( 1).toUInt();
435 str += " -- " + QChar( ch).decomposition();
436 }
437 //qDebug() << "#" << it.data();
438 }
439 */
440 entities.append(str);
441 // TODO: later use a table view
442 }
443 }
444 return entities;
445 }
446
447 // kate: space-indent on; indent-width 4; replace-tabs on; mixed-indent off;
448