1 /* This file is part of the KDE project
2 Copyright (C) 2004 David Faure <faure@kde.org>
3 Copyright (C) 2007 Thomas Zander <zander@kde.org>
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public License
16 along with this library; see the file COPYING.LIB. If not, write to
17 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 */
20
21 #include "KoXmlWriter.h"
22
23 #include <StoreDebug.h>
24 #include <QByteArray>
25 #include <QStack>
26 #include <float.h>
27
28 static const int s_indentBufferLength = 100;
29 static const int s_escapeBufferLen = 10000;
30
31 class Q_DECL_HIDDEN KoXmlWriter::Private
32 {
33 public:
Private(QIODevice * dev_,int indentLevel=0)34 Private(QIODevice* dev_, int indentLevel = 0) : dev(dev_), baseIndentLevel(indentLevel) {}
~Private()35 ~Private() {
36 delete[] indentBuffer;
37 delete[] escapeBuffer;
38 //TODO: look at if we must delete "dev". For me we must delete it otherwise we will leak it
39 }
40
41 QIODevice* dev;
42 QStack<Tag> tags;
43 int baseIndentLevel;
44
45 char* indentBuffer; // maybe make it static, but then it needs a K_GLOBAL_STATIC
46 // and would eat 1K all the time... Maybe refcount it :)
47 char* escapeBuffer; // can't really be static if we want to be thread-safe
48 };
49
KoXmlWriter(QIODevice * dev,int indentLevel)50 KoXmlWriter::KoXmlWriter(QIODevice* dev, int indentLevel)
51 : d(new Private(dev, indentLevel))
52 {
53 init();
54 }
55
init()56 void KoXmlWriter::init()
57 {
58 d->indentBuffer = new char[ s_indentBufferLength ];
59 memset(d->indentBuffer, ' ', s_indentBufferLength);
60 *d->indentBuffer = '\n'; // write newline before indentation, in one go
61
62 d->escapeBuffer = new char[s_escapeBufferLen];
63 if (!d->dev->isOpen())
64 d->dev->open(QIODevice::WriteOnly);
65 }
66
~KoXmlWriter()67 KoXmlWriter::~KoXmlWriter()
68 {
69 delete d;
70 }
71
startDocument(const char * rootElemName,const char * publicId,const char * systemId)72 void KoXmlWriter::startDocument(const char* rootElemName, const char* publicId, const char* systemId)
73 {
74 Q_ASSERT(d->tags.isEmpty());
75 writeCString("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
76 // There isn't much point in a doctype if there's no DTD to refer to
77 // (I'm told that files that are validated by a RelaxNG schema cannot refer to the schema)
78 if (publicId) {
79 writeCString("<!DOCTYPE ");
80 writeCString(rootElemName);
81 writeCString(" PUBLIC \"");
82 writeCString(publicId);
83 writeCString("\" \"");
84 writeCString(systemId);
85 writeCString("\"");
86 writeCString(">\n");
87 }
88 }
89
endDocument()90 void KoXmlWriter::endDocument()
91 {
92 // just to do exactly like QDom does (newline at end of file).
93 writeChar('\n');
94 Q_ASSERT(d->tags.isEmpty());
95 }
96
97 // returns the value of indentInside of the parent
prepareForChild()98 bool KoXmlWriter::prepareForChild()
99 {
100 if (!d->tags.isEmpty()) {
101 Tag& parent = d->tags.top();
102 if (!parent.hasChildren) {
103 closeStartElement(parent);
104 parent.hasChildren = true;
105 parent.lastChildIsText = false;
106 }
107 if (parent.indentInside) {
108 writeIndent();
109 }
110 return parent.indentInside;
111 }
112 return true;
113 }
114
prepareForTextNode()115 void KoXmlWriter::prepareForTextNode()
116 {
117 if (d->tags.isEmpty())
118 return;
119 Tag& parent = d->tags.top();
120 if (!parent.hasChildren) {
121 closeStartElement(parent);
122 parent.hasChildren = true;
123 parent.lastChildIsText = true;
124 }
125 }
126
startElement(const char * tagName,bool indentInside)127 void KoXmlWriter::startElement(const char* tagName, bool indentInside)
128 {
129 Q_ASSERT(tagName != 0);
130
131 // Tell parent that it has children
132 bool parentIndent = prepareForChild();
133
134 d->tags.push(Tag(tagName, parentIndent && indentInside));
135 writeChar('<');
136 writeCString(tagName);
137 //kDebug(s_area) << tagName;
138 }
139
addCompleteElement(const char * cstr)140 void KoXmlWriter::addCompleteElement(const char* cstr)
141 {
142 prepareForChild();
143 writeCString(cstr);
144 }
145
146
addCompleteElement(QIODevice * indev)147 void KoXmlWriter::addCompleteElement(QIODevice* indev)
148 {
149 prepareForChild();
150 const bool wasOpen = indev->isOpen();
151 // Always (re)open the device in readonly mode, it might be
152 // already open but for writing, and we need to rewind.
153 const bool openOk = indev->open(QIODevice::ReadOnly);
154 Q_ASSERT(openOk);
155 if (!openOk) {
156 warnStore << "Failed to re-open the device! wasOpen=" << wasOpen;
157 return;
158 }
159
160 static const int MAX_CHUNK_SIZE = 8 * 1024; // 8 KB
161 QByteArray buffer;
162 buffer.resize(MAX_CHUNK_SIZE);
163 while (!indev->atEnd()) {
164 qint64 len = indev->read(buffer.data(), buffer.size());
165 if (len <= 0) // e.g. on error
166 break;
167 d->dev->write(buffer.data(), len);
168 }
169 if (!wasOpen) {
170 // Restore initial state
171 indev->close();
172 }
173 }
174
endElement()175 void KoXmlWriter::endElement()
176 {
177 if (d->tags.isEmpty())
178 warnStore << "EndElement() was called more times than startElement(). "
179 "The generated XML will be invalid! "
180 "Please report this bug (by saving the document to another format...)" << endl;
181
182 Tag tag = d->tags.pop();
183
184 if (!tag.hasChildren) {
185 writeCString("/>");
186 } else {
187 if (tag.indentInside && !tag.lastChildIsText) {
188 writeIndent();
189 }
190 writeCString("</");
191 Q_ASSERT(tag.tagName != 0);
192 writeCString(tag.tagName);
193 writeChar('>');
194 }
195 }
196
addTextNode(const QByteArray & cstr)197 void KoXmlWriter::addTextNode(const QByteArray& cstr)
198 {
199 // Same as the const char* version below, but here we know the size
200 prepareForTextNode();
201 char* escaped = escapeForXML(cstr.constData(), cstr.size());
202 writeCString(escaped);
203 if (escaped != d->escapeBuffer)
204 delete[] escaped;
205 }
206
addTextNode(const char * cstr)207 void KoXmlWriter::addTextNode(const char* cstr)
208 {
209 prepareForTextNode();
210 char* escaped = escapeForXML(cstr, -1);
211 writeCString(escaped);
212 if (escaped != d->escapeBuffer)
213 delete[] escaped;
214 }
215
addProcessingInstruction(const char * cstr)216 void KoXmlWriter::addProcessingInstruction(const char* cstr)
217 {
218 prepareForTextNode();
219 writeCString("<?");
220 addTextNode(cstr);
221 writeCString("?>");
222 }
223
addAttribute(const char * attrName,const QByteArray & value)224 void KoXmlWriter::addAttribute(const char* attrName, const QByteArray& value)
225 {
226 // Same as the const char* one, but here we know the size
227 writeChar(' ');
228 writeCString(attrName);
229 writeCString("=\"");
230 char* escaped = escapeForXML(value.constData(), value.size());
231 writeCString(escaped);
232 if (escaped != d->escapeBuffer)
233 delete[] escaped;
234 writeChar('"');
235 }
236
addAttribute(const char * attrName,const char * value)237 void KoXmlWriter::addAttribute(const char* attrName, const char* value)
238 {
239 writeChar(' ');
240 writeCString(attrName);
241 writeCString("=\"");
242 char* escaped = escapeForXML(value, -1);
243 writeCString(escaped);
244 if (escaped != d->escapeBuffer)
245 delete[] escaped;
246 writeChar('"');
247 }
248
addAttribute(const char * attrName,double value)249 void KoXmlWriter::addAttribute(const char* attrName, double value)
250 {
251 QByteArray str;
252 str.setNum(value, 'f', 11);
253 addAttribute(attrName, str.data());
254 }
255
addAttribute(const char * attrName,float value)256 void KoXmlWriter::addAttribute(const char* attrName, float value)
257 {
258 QByteArray str;
259 str.setNum(value, 'f', FLT_DIG);
260 addAttribute(attrName, str.data());
261 }
262
addAttributePt(const char * attrName,double value)263 void KoXmlWriter::addAttributePt(const char* attrName, double value)
264 {
265 QByteArray str;
266 str.setNum(value, 'f', 11);
267 str += "pt";
268 addAttribute(attrName, str.data());
269 }
270
addAttributePt(const char * attrName,float value)271 void KoXmlWriter::addAttributePt(const char* attrName, float value)
272 {
273 QByteArray str;
274 str.setNum(value, 'f', FLT_DIG);
275 str += "pt";
276 addAttribute(attrName, str.data());
277 }
278
writeIndent()279 void KoXmlWriter::writeIndent()
280 {
281 // +1 because of the leading '\n'
282 d->dev->write(d->indentBuffer, qMin(indentLevel() + 1,
283 s_indentBufferLength));
284 }
285
writeString(const QString & str)286 void KoXmlWriter::writeString(const QString& str)
287 {
288 // cachegrind says .utf8() is where most of the time is spent
289 const QByteArray cstr = str.toUtf8();
290 d->dev->write(cstr);
291 }
292
293 // In case of a reallocation (ret value != d->buffer), the caller owns the return value,
294 // it must delete it (with [])
escapeForXML(const char * source,int length=-1) const295 char* KoXmlWriter::escapeForXML(const char* source, int length = -1) const
296 {
297 // we're going to be pessimistic on char length; so lets make the outputLength less
298 // the amount one char can take: 6
299 char* destBoundary = d->escapeBuffer + s_escapeBufferLen - 6;
300 char* destination = d->escapeBuffer;
301 char* output = d->escapeBuffer;
302 const char* src = source; // src moves, source remains
303 for (;;) {
304 if (destination >= destBoundary) {
305 // When we come to realize that our escaped string is going to
306 // be bigger than the escape buffer (this shouldn't happen very often...),
307 // we drop the idea of using it, and we allocate a bigger buffer.
308 // Note that this if() can only be hit once per call to the method.
309 if (length == -1)
310 length = qstrlen(source); // expensive...
311 uint newLength = length * 6 + 1; // worst case. 6 is due to " and '
312 char* buffer = new char[ newLength ];
313 destBoundary = buffer + newLength;
314 uint amountOfCharsAlreadyCopied = destination - d->escapeBuffer;
315 memcpy(buffer, d->escapeBuffer, amountOfCharsAlreadyCopied);
316 output = buffer;
317 destination = buffer + amountOfCharsAlreadyCopied;
318 }
319 switch (*src) {
320 case 60: // <
321 memcpy(destination, "<", 4);
322 destination += 4;
323 break;
324 case 62: // >
325 memcpy(destination, ">", 4);
326 destination += 4;
327 break;
328 case 34: // "
329 memcpy(destination, """, 6);
330 destination += 6;
331 break;
332 #if 0 // needed?
333 case 39: // '
334 memcpy(destination, "'", 6);
335 destination += 6;
336 break;
337 #endif
338 case 38: // &
339 memcpy(destination, "&", 5);
340 destination += 5;
341 break;
342 case 0:
343 *destination = '\0';
344 return output;
345 // Control codes accepted in XML 1.0 documents.
346 case 9:
347 case 10:
348 case 13:
349 *destination++ = *src++;
350 continue;
351 default:
352 // Don't add control codes not accepted in XML 1.0 documents.
353 if (*src > 0 && *src < 32) {
354 ++src;
355 } else {
356 *destination++ = *src++;
357 }
358 continue;
359 }
360 ++src;
361 }
362 // NOTREACHED (see case 0)
363 return output;
364 }
365
addManifestEntry(const QString & fullPath,const QString & mediaType)366 void KoXmlWriter::addManifestEntry(const QString& fullPath, const QString& mediaType)
367 {
368 startElement("manifest:file-entry");
369 addAttribute("manifest:media-type", mediaType);
370 addAttribute("manifest:full-path", fullPath);
371 endElement();
372 }
373
addConfigItem(const QString & configName,const QString & value)374 void KoXmlWriter::addConfigItem(const QString & configName, const QString& value)
375 {
376 startElement("config:config-item");
377 addAttribute("config:name", configName);
378 addAttribute("config:type", "string");
379 addTextNode(value);
380 endElement();
381 }
382
addConfigItem(const QString & configName,bool value)383 void KoXmlWriter::addConfigItem(const QString & configName, bool value)
384 {
385 startElement("config:config-item");
386 addAttribute("config:name", configName);
387 addAttribute("config:type", "boolean");
388 addTextNode(value ? "true" : "false");
389 endElement();
390 }
391
addConfigItem(const QString & configName,int value)392 void KoXmlWriter::addConfigItem(const QString & configName, int value)
393 {
394 startElement("config:config-item");
395 addAttribute("config:name", configName);
396 addAttribute("config:type", "int");
397 addTextNode(QString::number(value));
398 endElement();
399 }
400
addConfigItem(const QString & configName,double value)401 void KoXmlWriter::addConfigItem(const QString & configName, double value)
402 {
403 startElement("config:config-item");
404 addAttribute("config:name", configName);
405 addAttribute("config:type", "double");
406 addTextNode(QString::number(value));
407 endElement();
408 }
409
addConfigItem(const QString & configName,float value)410 void KoXmlWriter::addConfigItem(const QString & configName, float value)
411 {
412 startElement("config:config-item");
413 addAttribute("config:name", configName);
414 addAttribute("config:type", "double");
415 addTextNode(QString::number(value));
416 endElement();
417 }
418
addConfigItem(const QString & configName,long value)419 void KoXmlWriter::addConfigItem(const QString & configName, long value)
420 {
421 startElement("config:config-item");
422 addAttribute("config:name", configName);
423 addAttribute("config:type", "long");
424 addTextNode(QString::number(value));
425 endElement();
426 }
427
addConfigItem(const QString & configName,short value)428 void KoXmlWriter::addConfigItem(const QString & configName, short value)
429 {
430 startElement("config:config-item");
431 addAttribute("config:name", configName);
432 addAttribute("config:type", "short");
433 addTextNode(QString::number(value));
434 endElement();
435 }
436
addTextSpan(const QString & text)437 void KoXmlWriter::addTextSpan(const QString& text)
438 {
439 QMap<int, int> tabCache;
440 addTextSpan(text, tabCache);
441 }
442
addTextSpan(const QString & text,const QMap<int,int> & tabCache)443 void KoXmlWriter::addTextSpan(const QString& text, const QMap<int, int>& tabCache)
444 {
445 int len = text.length();
446 int nrSpaces = 0; // number of consecutive spaces
447 bool leadingSpace = false;
448 QString str;
449 str.reserve(len);
450
451 // Accumulate chars either in str or in nrSpaces (for spaces).
452 // Flush str when writing a subelement (for spaces or for another reason)
453 // Flush nrSpaces when encountering two or more consecutive spaces
454 for (int i = 0; i < len ; ++i) {
455 QChar ch = text[i];
456 ushort unicode = ch.unicode();
457 if (unicode == ' ') {
458 if (i == 0)
459 leadingSpace = true;
460 ++nrSpaces;
461 } else {
462 if (nrSpaces > 0) {
463 // For the first space we use ' '.
464 // "it is good practice to use (text:s) for the second and all following SPACE
465 // characters in a sequence." (per the ODF spec)
466 // however, per the HTML spec, "authors should not rely on user agents to render
467 // white space immediately after a start tag or immediately before an end tag"
468 // (and both we and OO.o ignore leading spaces in <text:p> or <text:h> elements...)
469 if (!leadingSpace) {
470 str += ' ';
471 --nrSpaces;
472 }
473 if (nrSpaces > 0) { // there are more spaces
474 if (!str.isEmpty())
475 addTextNode(str);
476 str.clear();
477 startElement("text:s");
478 if (nrSpaces > 1) // it's 1 by default
479 addAttribute("text:c", nrSpaces);
480 endElement();
481 }
482 }
483 nrSpaces = 0;
484 leadingSpace = false;
485
486 switch (unicode) {
487 case '\t':
488 if (!str.isEmpty())
489 addTextNode(str);
490 str.clear();
491 startElement("text:tab");
492 if (tabCache.contains(i))
493 addAttribute("text:tab-ref", tabCache[i] + 1);
494 endElement();
495 break;
496 // gracefully handle \f form feed in text input.
497 // otherwise the xml will not be valid.
498 // \f can be added e.g. in ascii import filter.
499 case '\f':
500 case '\n':
501 case QChar::LineSeparator:
502 if (!str.isEmpty())
503 addTextNode(str);
504 str.clear();
505 startElement("text:line-break");
506 endElement();
507 break;
508 default:
509 // don't add stuff that is not allowed in xml. The stuff we need we have already handled above
510 if (ch.unicode() >= 0x20) {
511 str += text[i];
512 }
513 break;
514 }
515 }
516 }
517 // either we still have text in str or we have spaces in nrSpaces
518 if (!str.isEmpty()) {
519 addTextNode(str);
520 }
521 if (nrSpaces > 0) { // there are more spaces
522 startElement("text:s");
523 if (nrSpaces > 1) // it's 1 by default
524 addAttribute("text:c", nrSpaces);
525 endElement();
526 }
527 }
528
device() const529 QIODevice *KoXmlWriter::device() const
530 {
531 return d->dev;
532 }
533
indentLevel() const534 int KoXmlWriter::indentLevel() const
535 {
536 return d->tags.size() + d->baseIndentLevel;
537 }
538
tagHierarchy() const539 QList<const char*> KoXmlWriter::tagHierarchy() const
540 {
541 QList<const char*> answer;
542 foreach(const Tag & tag, d->tags)
543 answer.append(tag.tagName);
544
545 return answer;
546 }
547
toString() const548 QString KoXmlWriter::toString() const
549 {
550 Q_ASSERT(!d->dev->isSequential());
551 if (d->dev->isSequential())
552 return QString();
553 bool wasOpen = d->dev->isOpen();
554 qint64 oldPos = -1;
555 if (wasOpen) {
556 oldPos = d->dev->pos();
557 if (oldPos > 0)
558 d->dev->seek(0);
559 } else {
560 const bool openOk = d->dev->open(QIODevice::ReadOnly);
561 Q_ASSERT(openOk);
562 if (!openOk)
563 return QString();
564 }
565 QString s = QString::fromUtf8(d->dev->readAll());
566 if (wasOpen)
567 d->dev->seek(oldPos);
568 else
569 d->dev->close();
570 return s;
571 }
572