1 /* This file is part of the KDE project
2    Copyright (C) 2004 David Faure <faure@kde.org>
3    Copyright (C) 2007 Thomas Zander <zander@kde.org>
4 
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Library General Public
7    License as published by the Free Software Foundation; either
8    version 2 of the License, or (at your option) any later version.
9 
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Library General Public License for more details.
14 
15    You should have received a copy of the GNU Library General Public License
16    along with this library; see the file COPYING.LIB.  If not, write to
17    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19 */
20 
21 #include "KoXmlWriter.h"
22 
23 #include <StoreDebug.h>
24 #include <QByteArray>
25 #include <QStack>
26 #include <float.h>
27 
28 static const int s_indentBufferLength = 100;
29 static const int s_escapeBufferLen = 10000;
30 
31 class Q_DECL_HIDDEN KoXmlWriter::Private
32 {
33 public:
Private(QIODevice * dev_,int indentLevel=0)34     Private(QIODevice* dev_, int indentLevel = 0) : dev(dev_), baseIndentLevel(indentLevel) {}
~Private()35     ~Private() {
36         delete[] indentBuffer;
37         delete[] escapeBuffer;
38         //TODO: look at if we must delete "dev". For me we must delete it otherwise we will leak it
39     }
40 
41     QIODevice* dev;
42     QStack<Tag> tags;
43     int baseIndentLevel;
44 
45     char* indentBuffer; // maybe make it static, but then it needs a K_GLOBAL_STATIC
46     // and would eat 1K all the time... Maybe refcount it :)
47     char* escapeBuffer; // can't really be static if we want to be thread-safe
48 };
49 
KoXmlWriter(QIODevice * dev,int indentLevel)50 KoXmlWriter::KoXmlWriter(QIODevice* dev, int indentLevel)
51         : d(new Private(dev, indentLevel))
52 {
53     init();
54 }
55 
init()56 void KoXmlWriter::init()
57 {
58     d->indentBuffer = new char[ s_indentBufferLength ];
59     memset(d->indentBuffer, ' ', s_indentBufferLength);
60     *d->indentBuffer = '\n'; // write newline before indentation, in one go
61 
62     d->escapeBuffer = new char[s_escapeBufferLen];
63     if (!d->dev->isOpen())
64         d->dev->open(QIODevice::WriteOnly);
65 }
66 
~KoXmlWriter()67 KoXmlWriter::~KoXmlWriter()
68 {
69     delete d;
70 }
71 
startDocument(const char * rootElemName,const char * publicId,const char * systemId)72 void KoXmlWriter::startDocument(const char* rootElemName, const char* publicId, const char* systemId)
73 {
74     Q_ASSERT(d->tags.isEmpty());
75     writeCString("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
76     // There isn't much point in a doctype if there's no DTD to refer to
77     // (I'm told that files that are validated by a RelaxNG schema cannot refer to the schema)
78     if (publicId) {
79         writeCString("<!DOCTYPE ");
80         writeCString(rootElemName);
81         writeCString(" PUBLIC \"");
82         writeCString(publicId);
83         writeCString("\" \"");
84         writeCString(systemId);
85         writeCString("\"");
86         writeCString(">\n");
87     }
88 }
89 
endDocument()90 void KoXmlWriter::endDocument()
91 {
92     // just to do exactly like QDom does (newline at end of file).
93     writeChar('\n');
94     Q_ASSERT(d->tags.isEmpty());
95 }
96 
97 // returns the value of indentInside of the parent
prepareForChild()98 bool KoXmlWriter::prepareForChild()
99 {
100     if (!d->tags.isEmpty()) {
101         Tag& parent = d->tags.top();
102         if (!parent.hasChildren) {
103             closeStartElement(parent);
104             parent.hasChildren = true;
105             parent.lastChildIsText = false;
106         }
107         if (parent.indentInside) {
108             writeIndent();
109         }
110         return parent.indentInside;
111     }
112     return true;
113 }
114 
prepareForTextNode()115 void KoXmlWriter::prepareForTextNode()
116 {
117     if (d->tags.isEmpty())
118         return;
119     Tag& parent = d->tags.top();
120     if (!parent.hasChildren) {
121         closeStartElement(parent);
122         parent.hasChildren = true;
123         parent.lastChildIsText = true;
124     }
125 }
126 
startElement(const char * tagName,bool indentInside)127 void KoXmlWriter::startElement(const char* tagName, bool indentInside)
128 {
129     Q_ASSERT(tagName != 0);
130 
131     // Tell parent that it has children
132     bool parentIndent = prepareForChild();
133 
134     d->tags.push(Tag(tagName, parentIndent && indentInside));
135     writeChar('<');
136     writeCString(tagName);
137     //kDebug(s_area) << tagName;
138 }
139 
addCompleteElement(const char * cstr)140 void KoXmlWriter::addCompleteElement(const char* cstr)
141 {
142     prepareForChild();
143     writeCString(cstr);
144 }
145 
146 
addCompleteElement(QIODevice * indev)147 void KoXmlWriter::addCompleteElement(QIODevice* indev)
148 {
149     prepareForChild();
150     const bool wasOpen = indev->isOpen();
151     // Always (re)open the device in readonly mode, it might be
152     // already open but for writing, and we need to rewind.
153     const bool openOk = indev->open(QIODevice::ReadOnly);
154     Q_ASSERT(openOk);
155     if (!openOk) {
156         warnStore << "Failed to re-open the device! wasOpen=" << wasOpen;
157         return;
158     }
159 
160     static const int MAX_CHUNK_SIZE = 8 * 1024; // 8 KB
161     QByteArray buffer;
162     buffer.resize(MAX_CHUNK_SIZE);
163     while (!indev->atEnd()) {
164         qint64 len = indev->read(buffer.data(), buffer.size());
165         if (len <= 0)   // e.g. on error
166             break;
167         d->dev->write(buffer.data(), len);
168     }
169     if (!wasOpen) {
170         // Restore initial state
171         indev->close();
172     }
173 }
174 
endElement()175 void KoXmlWriter::endElement()
176 {
177     if (d->tags.isEmpty())
178         warnStore << "EndElement() was called more times than startElement(). "
179                      "The generated XML will be invalid! "
180                      "Please report this bug (by saving the document to another format...)" << endl;
181 
182     Tag tag = d->tags.pop();
183 
184     if (!tag.hasChildren) {
185         writeCString("/>");
186     } else {
187         if (tag.indentInside && !tag.lastChildIsText) {
188             writeIndent();
189         }
190         writeCString("</");
191         Q_ASSERT(tag.tagName != 0);
192         writeCString(tag.tagName);
193         writeChar('>');
194     }
195 }
196 
addTextNode(const QByteArray & cstr)197 void KoXmlWriter::addTextNode(const QByteArray& cstr)
198 {
199     // Same as the const char* version below, but here we know the size
200     prepareForTextNode();
201     char* escaped = escapeForXML(cstr.constData(), cstr.size());
202     writeCString(escaped);
203     if (escaped != d->escapeBuffer)
204         delete[] escaped;
205 }
206 
addTextNode(const char * cstr)207 void KoXmlWriter::addTextNode(const char* cstr)
208 {
209     prepareForTextNode();
210     char* escaped = escapeForXML(cstr, -1);
211     writeCString(escaped);
212     if (escaped != d->escapeBuffer)
213         delete[] escaped;
214 }
215 
addProcessingInstruction(const char * cstr)216 void KoXmlWriter::addProcessingInstruction(const char* cstr)
217 {
218     prepareForTextNode();
219     writeCString("<?");
220     addTextNode(cstr);
221     writeCString("?>");
222 }
223 
addAttribute(const char * attrName,const QByteArray & value)224 void KoXmlWriter::addAttribute(const char* attrName, const QByteArray& value)
225 {
226     // Same as the const char* one, but here we know the size
227     writeChar(' ');
228     writeCString(attrName);
229     writeCString("=\"");
230     char* escaped = escapeForXML(value.constData(), value.size());
231     writeCString(escaped);
232     if (escaped != d->escapeBuffer)
233         delete[] escaped;
234     writeChar('"');
235 }
236 
addAttribute(const char * attrName,const char * value)237 void KoXmlWriter::addAttribute(const char* attrName, const char* value)
238 {
239     writeChar(' ');
240     writeCString(attrName);
241     writeCString("=\"");
242     char* escaped = escapeForXML(value, -1);
243     writeCString(escaped);
244     if (escaped != d->escapeBuffer)
245         delete[] escaped;
246     writeChar('"');
247 }
248 
addAttribute(const char * attrName,double value)249 void KoXmlWriter::addAttribute(const char* attrName, double value)
250 {
251     QByteArray str;
252     str.setNum(value, 'f', 11);
253     addAttribute(attrName, str.data());
254 }
255 
addAttribute(const char * attrName,float value)256 void KoXmlWriter::addAttribute(const char* attrName, float value)
257 {
258     QByteArray str;
259     str.setNum(value, 'f', FLT_DIG);
260     addAttribute(attrName, str.data());
261 }
262 
addAttributePt(const char * attrName,double value)263 void KoXmlWriter::addAttributePt(const char* attrName, double value)
264 {
265     QByteArray str;
266     str.setNum(value, 'f', 11);
267     str += "pt";
268     addAttribute(attrName, str.data());
269 }
270 
addAttributePt(const char * attrName,float value)271 void KoXmlWriter::addAttributePt(const char* attrName, float value)
272 {
273     QByteArray str;
274     str.setNum(value, 'f', FLT_DIG);
275     str += "pt";
276     addAttribute(attrName, str.data());
277 }
278 
writeIndent()279 void KoXmlWriter::writeIndent()
280 {
281     // +1 because of the leading '\n'
282     d->dev->write(d->indentBuffer, qMin(indentLevel() + 1,
283                                         s_indentBufferLength));
284 }
285 
writeString(const QString & str)286 void KoXmlWriter::writeString(const QString& str)
287 {
288     // cachegrind says .utf8() is where most of the time is spent
289     const QByteArray cstr = str.toUtf8();
290     d->dev->write(cstr);
291 }
292 
293 // In case of a reallocation (ret value != d->buffer), the caller owns the return value,
294 // it must delete it (with [])
escapeForXML(const char * source,int length=-1) const295 char* KoXmlWriter::escapeForXML(const char* source, int length = -1) const
296 {
297     // we're going to be pessimistic on char length; so lets make the outputLength less
298     // the amount one char can take: 6
299     char* destBoundary = d->escapeBuffer + s_escapeBufferLen - 6;
300     char* destination = d->escapeBuffer;
301     char* output = d->escapeBuffer;
302     const char* src = source; // src moves, source remains
303     for (;;) {
304         if (destination >= destBoundary) {
305             // When we come to realize that our escaped string is going to
306             // be bigger than the escape buffer (this shouldn't happen very often...),
307             // we drop the idea of using it, and we allocate a bigger buffer.
308             // Note that this if() can only be hit once per call to the method.
309             if (length == -1)
310                 length = qstrlen(source);   // expensive...
311             uint newLength = length * 6 + 1; // worst case. 6 is due to &quot; and &apos;
312             char* buffer = new char[ newLength ];
313             destBoundary = buffer + newLength;
314             uint amountOfCharsAlreadyCopied = destination - d->escapeBuffer;
315             memcpy(buffer, d->escapeBuffer, amountOfCharsAlreadyCopied);
316             output = buffer;
317             destination = buffer + amountOfCharsAlreadyCopied;
318         }
319         switch (*src) {
320         case 60: // <
321             memcpy(destination, "&lt;", 4);
322             destination += 4;
323             break;
324         case 62: // >
325             memcpy(destination, "&gt;", 4);
326             destination += 4;
327             break;
328         case 34: // "
329             memcpy(destination, "&quot;", 6);
330             destination += 6;
331             break;
332 #if 0 // needed?
333         case 39: // '
334             memcpy(destination, "&apos;", 6);
335             destination += 6;
336             break;
337 #endif
338         case 38: // &
339             memcpy(destination, "&amp;", 5);
340             destination += 5;
341             break;
342         case 0:
343             *destination = '\0';
344             return output;
345         // Control codes accepted in XML 1.0 documents.
346         case 9:
347         case 10:
348         case 13:
349             *destination++ = *src++;
350             continue;
351         default:
352             // Don't add control codes not accepted in XML 1.0 documents.
353             if (*src > 0 && *src < 32) {
354                 ++src;
355             } else {
356                 *destination++ = *src++;
357             }
358             continue;
359         }
360         ++src;
361     }
362     // NOTREACHED (see case 0)
363     return output;
364 }
365 
addManifestEntry(const QString & fullPath,const QString & mediaType)366 void KoXmlWriter::addManifestEntry(const QString& fullPath, const QString& mediaType)
367 {
368     startElement("manifest:file-entry");
369     addAttribute("manifest:media-type", mediaType);
370     addAttribute("manifest:full-path", fullPath);
371     endElement();
372 }
373 
addConfigItem(const QString & configName,const QString & value)374 void KoXmlWriter::addConfigItem(const QString & configName, const QString& value)
375 {
376     startElement("config:config-item");
377     addAttribute("config:name", configName);
378     addAttribute("config:type",  "string");
379     addTextNode(value);
380     endElement();
381 }
382 
addConfigItem(const QString & configName,bool value)383 void KoXmlWriter::addConfigItem(const QString & configName, bool value)
384 {
385     startElement("config:config-item");
386     addAttribute("config:name", configName);
387     addAttribute("config:type",  "boolean");
388     addTextNode(value ? "true" : "false");
389     endElement();
390 }
391 
addConfigItem(const QString & configName,int value)392 void KoXmlWriter::addConfigItem(const QString & configName, int value)
393 {
394     startElement("config:config-item");
395     addAttribute("config:name", configName);
396     addAttribute("config:type",  "int");
397     addTextNode(QString::number(value));
398     endElement();
399 }
400 
addConfigItem(const QString & configName,double value)401 void KoXmlWriter::addConfigItem(const QString & configName, double value)
402 {
403     startElement("config:config-item");
404     addAttribute("config:name", configName);
405     addAttribute("config:type", "double");
406     addTextNode(QString::number(value));
407     endElement();
408 }
409 
addConfigItem(const QString & configName,float value)410 void KoXmlWriter::addConfigItem(const QString & configName, float value)
411 {
412     startElement("config:config-item");
413     addAttribute("config:name", configName);
414     addAttribute("config:type", "double");
415     addTextNode(QString::number(value));
416     endElement();
417 }
418 
addConfigItem(const QString & configName,long value)419 void KoXmlWriter::addConfigItem(const QString & configName, long value)
420 {
421     startElement("config:config-item");
422     addAttribute("config:name", configName);
423     addAttribute("config:type", "long");
424     addTextNode(QString::number(value));
425     endElement();
426 }
427 
addConfigItem(const QString & configName,short value)428 void KoXmlWriter::addConfigItem(const QString & configName, short value)
429 {
430     startElement("config:config-item");
431     addAttribute("config:name", configName);
432     addAttribute("config:type", "short");
433     addTextNode(QString::number(value));
434     endElement();
435 }
436 
addTextSpan(const QString & text)437 void KoXmlWriter::addTextSpan(const QString& text)
438 {
439     QMap<int, int> tabCache;
440     addTextSpan(text, tabCache);
441 }
442 
addTextSpan(const QString & text,const QMap<int,int> & tabCache)443 void KoXmlWriter::addTextSpan(const QString& text, const QMap<int, int>& tabCache)
444 {
445     int len = text.length();
446     int nrSpaces = 0; // number of consecutive spaces
447     bool leadingSpace = false;
448     QString str;
449     str.reserve(len);
450 
451     // Accumulate chars either in str or in nrSpaces (for spaces).
452     // Flush str when writing a subelement (for spaces or for another reason)
453     // Flush nrSpaces when encountering two or more consecutive spaces
454     for (int i = 0; i < len ; ++i) {
455         QChar ch = text[i];
456         ushort unicode = ch.unicode();
457         if (unicode == ' ') {
458             if (i == 0)
459                 leadingSpace = true;
460             ++nrSpaces;
461         } else {
462             if (nrSpaces > 0) {
463                 // For the first space we use ' '.
464                 // "it is good practice to use (text:s) for the second and all following SPACE
465                 // characters in a sequence." (per the ODF spec)
466                 // however, per the HTML spec, "authors should not rely on user agents to render
467                 // white space immediately after a start tag or immediately before an end tag"
468                 // (and both we and OO.o ignore leading spaces in <text:p> or <text:h> elements...)
469                 if (!leadingSpace) {
470                     str += ' ';
471                     --nrSpaces;
472                 }
473                 if (nrSpaces > 0) {   // there are more spaces
474                     if (!str.isEmpty())
475                         addTextNode(str);
476                     str.clear();
477                     startElement("text:s");
478                     if (nrSpaces > 1)   // it's 1 by default
479                         addAttribute("text:c", nrSpaces);
480                     endElement();
481                 }
482             }
483             nrSpaces = 0;
484             leadingSpace = false;
485 
486             switch (unicode) {
487             case '\t':
488                 if (!str.isEmpty())
489                     addTextNode(str);
490                 str.clear();
491                 startElement("text:tab");
492                 if (tabCache.contains(i))
493                     addAttribute("text:tab-ref", tabCache[i] + 1);
494                 endElement();
495                 break;
496             // gracefully handle \f form feed in text input.
497             // otherwise the xml will not be valid.
498             // \f can be added e.g. in ascii import filter.
499             case '\f':
500             case '\n':
501             case QChar::LineSeparator:
502                 if (!str.isEmpty())
503                     addTextNode(str);
504                 str.clear();
505                 startElement("text:line-break");
506                 endElement();
507                 break;
508             default:
509                 // don't add stuff that is not allowed in xml. The stuff we need we have already handled above
510                 if (ch.unicode() >= 0x20) {
511                     str += text[i];
512                 }
513                 break;
514             }
515         }
516     }
517     // either we still have text in str or we have spaces in nrSpaces
518     if (!str.isEmpty()) {
519         addTextNode(str);
520     }
521     if (nrSpaces > 0) {   // there are more spaces
522         startElement("text:s");
523         if (nrSpaces > 1)   // it's 1 by default
524             addAttribute("text:c", nrSpaces);
525         endElement();
526     }
527 }
528 
device() const529 QIODevice *KoXmlWriter::device() const
530 {
531     return d->dev;
532 }
533 
indentLevel() const534 int KoXmlWriter::indentLevel() const
535 {
536     return d->tags.size() + d->baseIndentLevel;
537 }
538 
tagHierarchy() const539 QList<const char*> KoXmlWriter::tagHierarchy() const
540 {
541     QList<const char*> answer;
542     foreach(const Tag & tag, d->tags)
543         answer.append(tag.tagName);
544 
545     return answer;
546 }
547 
toString() const548 QString KoXmlWriter::toString() const
549 {
550     Q_ASSERT(!d->dev->isSequential());
551     if (d->dev->isSequential())
552         return QString();
553     bool wasOpen = d->dev->isOpen();
554     qint64 oldPos = -1;
555     if (wasOpen) {
556         oldPos = d->dev->pos();
557         if (oldPos > 0)
558             d->dev->seek(0);
559     } else {
560         const bool openOk = d->dev->open(QIODevice::ReadOnly);
561         Q_ASSERT(openOk);
562         if (!openOk)
563             return QString();
564     }
565     QString s = QString::fromUtf8(d->dev->readAll());
566     if (wasOpen)
567         d->dev->seek(oldPos);
568     else
569         d->dev->close();
570     return s;
571 }
572