1 /**********************************************************************
2
3 Audacity: A Digital Audio Editor
4
5 XMLWriter.cpp
6
7 Leland Lucius
8
9 *******************************************************************//**
10
11 \class XMLWriter
12 \brief Base class for XMLFileWriter and XMLStringWriter that provides
13 the general functionality for creating XML in UTF8 encoding.
14
15 *//****************************************************************//**
16
17 \class XMLFileWriter
18 \brief Wrapper to output XML data to files.
19
20 *//****************************************************************//**
21
22 \class XMLStringWriter
23 \brief Wrapper to output XML data to strings.
24
25 *//*******************************************************************/
26
27 #include "XMLWriter.h"
28
29 #include <wx/defs.h>
30 #include <wx/ffile.h>
31 #include <wx/intl.h>
32
33 #include <cstring>
34
35 #include "ToChars.h"
36
37 #include "InconsistencyException.h"
38
39 //table for xml encoding compatibility with expat decoding
40 //see wxWidgets-2.8.12/src/expat/lib/xmltok_impl.h
41 //and wxWidgets-2.8.12/src/expat/lib/asciitab.h
42 static int charXMLCompatiblity[] =
43 {
44
45 /* 0x00 */ 0, 0, 0, 0,
46 /* 0x04 */ 0, 0, 0, 0,
47 /* 0x08 */ 0, 1, 1, 0,
48 /* 0x0C */ 0, 1, 0, 0,
49 /* 0x10 */ 0, 0, 0, 0,
50 /* 0x14 */ 0, 0, 0, 0,
51 /* 0x18 */ 0, 0, 0, 0,
52 /* 0x1C */ 0, 0, 0, 0,
53 };
54
55 // These are used by XMLEsc to handle surrogate pairs and filter invalid characters outside the ASCII range.
56 #define MIN_HIGH_SURROGATE static_cast<wxUChar>(0xD800)
57 #define MAX_HIGH_SURROGATE static_cast<wxUChar>(0xDBFF)
58 #define MIN_LOW_SURROGATE static_cast<wxUChar>(0xDC00)
59 #define MAX_LOW_SURROGATE static_cast<wxUChar>(0xDFFF)
60
61 // Unicode defines other noncharacters, but only these two are invalid in XML.
62 #define NONCHARACTER_FFFE static_cast<wxUChar>(0xFFFE)
63 #define NONCHARACTER_FFFF static_cast<wxUChar>(0xFFFF)
64
65
66 ///
67 /// XMLWriter base class
68 ///
XMLWriter()69 XMLWriter::XMLWriter()
70 {
71 mDepth = 0;
72 mInTag = false;
73 mHasKids.push_back(false);
74 }
75
~XMLWriter()76 XMLWriter::~XMLWriter()
77 {
78 }
79
StartTag(const wxString & name)80 void XMLWriter::StartTag(const wxString &name)
81 // may throw
82 {
83 int i;
84
85 if (mInTag) {
86 Write(wxT(">\n"));
87 mInTag = false;
88 }
89
90 for (i = 0; i < mDepth; i++) {
91 Write(wxT("\t"));
92 }
93
94 Write(wxString::Format(wxT("<%s"), name));
95
96 mTagstack.insert(mTagstack.begin(), name);
97 mHasKids[0] = true;
98 mHasKids.insert(mHasKids.begin(), false);
99 mDepth++;
100 mInTag = true;
101 }
102
EndTag(const wxString & name)103 void XMLWriter::EndTag(const wxString &name)
104 // may throw
105 {
106 int i;
107
108 if (mTagstack.size() > 0) {
109 if (mTagstack[0] == name) {
110 if (mHasKids[1]) { // There will always be at least 2 at this point
111 if (mInTag) {
112 Write(wxT("/>\n"));
113 }
114 else {
115 for (i = 0; i < mDepth - 1; i++) {
116 Write(wxT("\t"));
117 }
118 Write(wxString::Format(wxT("</%s>\n"), name));
119 }
120 }
121 else {
122 Write(wxT(">\n"));
123 }
124 mTagstack.erase( mTagstack.begin() );
125 mHasKids.erase(mHasKids.begin());
126 }
127 }
128
129 mDepth--;
130 mInTag = false;
131 }
132
WriteAttr(const wxString & name,const wxString & value)133 void XMLWriter::WriteAttr(const wxString &name, const wxString &value)
134 // may throw from Write()
135 {
136 Write(wxString::Format(wxT(" %s=\"%s\""),
137 name,
138 XMLEsc(value)));
139 }
140
WriteAttr(const wxString & name,const wxChar * value)141 void XMLWriter::WriteAttr(const wxString &name, const wxChar *value)
142 // may throw from Write()
143 {
144 WriteAttr(name, wxString(value));
145 }
146
WriteAttr(const wxString & name,int value)147 void XMLWriter::WriteAttr(const wxString &name, int value)
148 // may throw from Write()
149 {
150 Write(wxString::Format(wxT(" %s=\"%d\""),
151 name,
152 value));
153 }
154
WriteAttr(const wxString & name,bool value)155 void XMLWriter::WriteAttr(const wxString &name, bool value)
156 // may throw from Write()
157 {
158 Write(wxString::Format(wxT(" %s=\"%d\""),
159 name,
160 value));
161 }
162
WriteAttr(const wxString & name,long value)163 void XMLWriter::WriteAttr(const wxString &name, long value)
164 // may throw from Write()
165 {
166 Write(wxString::Format(wxT(" %s=\"%ld\""),
167 name,
168 value));
169 }
170
WriteAttr(const wxString & name,long long value)171 void XMLWriter::WriteAttr(const wxString &name, long long value)
172 // may throw from Write()
173 {
174 Write(wxString::Format(wxT(" %s=\"%lld\""),
175 name,
176 value));
177 }
178
WriteAttr(const wxString & name,size_t value)179 void XMLWriter::WriteAttr(const wxString &name, size_t value)
180 // may throw from Write()
181 {
182 Write(wxString::Format(wxT(" %s=\"%lld\""),
183 name,
184 (long long) value));
185 }
186
WriteAttr(const wxString & name,float value,int digits)187 void XMLWriter::WriteAttr(const wxString &name, float value, int digits)
188 // may throw from Write()
189 {
190 Write(wxString::Format(wxT(" %s=\"%s\""),
191 name,
192 Internat::ToString(value, digits)));
193 }
194
WriteAttr(const wxString & name,double value,int digits)195 void XMLWriter::WriteAttr(const wxString &name, double value, int digits)
196 // may throw from Write()
197 {
198 Write(wxString::Format(wxT(" %s=\"%s\""),
199 name,
200 Internat::ToString(value, digits)));
201 }
202
WriteData(const wxString & value)203 void XMLWriter::WriteData(const wxString &value)
204 // may throw from Write()
205 {
206 int i;
207
208 for (i = 0; i < mDepth; i++) {
209 Write(wxT("\t"));
210 }
211
212 Write(XMLEsc(value));
213 }
214
WriteSubTree(const wxString & value)215 void XMLWriter::WriteSubTree(const wxString &value)
216 // may throw from Write()
217 {
218 if (mInTag) {
219 Write(wxT(">\n"));
220 mInTag = false;
221 mHasKids[0] = true;
222 }
223
224 Write(value);
225 }
226
227 // See http://www.w3.org/TR/REC-xml for reference
XMLEsc(const wxString & s)228 wxString XMLWriter::XMLEsc(const wxString & s)
229 {
230 wxString result;
231 int len = s.length();
232
233 for(int i=0; i<len; i++) {
234 wxUChar c = s.GetChar(i);
235
236 switch (c) {
237 case wxT('\''):
238 result += wxT("'");
239 break;
240
241 case wxT('"'):
242 result += wxT(""");
243 break;
244
245 case wxT('&'):
246 result += wxT("&");
247 break;
248
249 case wxT('<'):
250 result += wxT("<");
251 break;
252
253 case wxT('>'):
254 result += wxT(">");
255 break;
256
257 default:
258 if (sizeof(c) == 2 && c >= MIN_HIGH_SURROGATE && c <= MAX_HIGH_SURROGATE && i < len - 1) {
259 // If wxUChar is 2 bytes, then supplementary characters (those greater than U+FFFF) are represented
260 // with a high surrogate (U+D800..U+DBFF) followed by a low surrogate (U+DC00..U+DFFF).
261 // Handle those here.
262 wxUChar c2 = s.GetChar(++i);
263 if (c2 >= MIN_LOW_SURROGATE && c2 <= MAX_LOW_SURROGATE) {
264 // Surrogate pair found; simply add it to the output string.
265 result += c;
266 result += c2;
267 }
268 else {
269 // That high surrogate isn't paired, so ignore it.
270 i--;
271 }
272 }
273 else if (!wxIsprint(c)) {
274 //ignore several characters such ase eot (0x04) and stx (0x02) because it makes expat parser bail
275 //see xmltok.c in expat checkCharRefNumber() to see how expat bails on these chars.
276 //also see wxWidgets-2.8.12/src/expat/lib/asciitab.h to see which characters are nonxml compatible
277 //post decode (we can still encode '&' and '<' with this table, but it prevents us from encoding eot)
278 //everything is compatible past ascii 0x20 except for surrogates and the noncharacters U+FFFE and U+FFFF,
279 //so we don't check the compatibility table higher than this.
280 if((c> 0x1F || charXMLCompatiblity[c]!=0) &&
281 (c < MIN_HIGH_SURROGATE || c > MAX_LOW_SURROGATE) &&
282 c != NONCHARACTER_FFFE && c != NONCHARACTER_FFFF)
283 result += wxString::Format(wxT("&#x%04x;"), c);
284 }
285 else {
286 result += c;
287 }
288 break;
289 }
290 }
291
292 return result;
293 }
294
295 ///
296 /// XMLFileWriter class
297 ///
XMLFileWriter(const FilePath & outputPath,const TranslatableString & caption,bool keepBackup)298 XMLFileWriter::XMLFileWriter(
299 const FilePath &outputPath, const TranslatableString &caption, bool keepBackup )
300 : mOutputPath{ outputPath }
301 , mCaption{ caption }
302 , mKeepBackup{ keepBackup }
303 // may throw
304 {
305 auto tempPath = wxFileName::CreateTempFileName( outputPath );
306 if (!wxFFile::Open(tempPath, wxT("wb")) || !IsOpened())
307 ThrowException( outputPath, mCaption );
308
309 if (mKeepBackup) {
310 int index = 0;
311 wxString backupName;
312
313 do {
314 wxFileName outputFn{ mOutputPath };
315 index++;
316 mBackupName =
317 outputFn.GetPath() + wxFILE_SEP_PATH +
318 outputFn.GetName() + wxT("_bak") +
319 wxString::Format(wxT("%d"), index) + wxT(".") +
320 outputFn.GetExt();
321 } while( ::wxFileExists( mBackupName ) );
322
323 // Open the backup file to be sure we can write it and reserve it
324 // until committing
325 if (! mBackupFile.Open( mBackupName, "wb" ) || ! mBackupFile.IsOpened() )
326 ThrowException( mBackupName, mCaption );
327 }
328 }
329
330
~XMLFileWriter()331 XMLFileWriter::~XMLFileWriter()
332 {
333 // Don't let a destructor throw!
334 GuardedCall( [&] {
335 if (!mCommitted) {
336 auto fileName = GetName();
337 if ( IsOpened() )
338 CloseWithoutEndingTags();
339 ::wxRemoveFile( fileName );
340 }
341 } );
342 }
343
Commit()344 void XMLFileWriter::Commit()
345 // may throw
346 {
347 PreCommit();
348 PostCommit();
349 }
350
PreCommit()351 void XMLFileWriter::PreCommit()
352 // may throw
353 {
354 while (mTagstack.size()) {
355 EndTag(mTagstack[0]);
356 }
357
358 CloseWithoutEndingTags();
359 }
360
PostCommit()361 void XMLFileWriter::PostCommit()
362 // may throw
363 {
364 FilePath tempPath = GetName();
365 if (mKeepBackup) {
366 if (! mBackupFile.Close() ||
367 ! wxRenameFile( mOutputPath, mBackupName ) )
368 ThrowException( mBackupName, mCaption );
369 }
370 else {
371 if ( wxFileName::FileExists( mOutputPath ) &&
372 ! wxRemoveFile( mOutputPath ) )
373 ThrowException( mOutputPath, mCaption );
374 }
375
376 // Now we have vacated the file at the output path and are committed.
377 // But not completely finished with steps of the commit operation.
378 // If this step fails, we haven't lost the successfully written data,
379 // but just failed to put it in the right place.
380 if (! wxRenameFile( tempPath, mOutputPath ) )
381 throw FileException{
382 FileException::Cause::Rename, tempPath, mCaption, mOutputPath
383 };
384
385 mCommitted = true;
386 }
387
CloseWithoutEndingTags()388 void XMLFileWriter::CloseWithoutEndingTags()
389 // may throw
390 {
391 // Before closing, we first flush it, because if Flush() fails because of a
392 // "disk full" condition, we can still at least try to close the file.
393 if (!wxFFile::Flush())
394 {
395 wxFFile::Close();
396 ThrowException( GetName(), mCaption );
397 }
398
399 // Note that this should never fail if flushing worked.
400 if (!wxFFile::Close())
401 ThrowException( GetName(), mCaption );
402 }
403
Write(const wxString & data)404 void XMLFileWriter::Write(const wxString &data)
405 // may throw
406 {
407 if (!wxFFile::Write(data, wxConvUTF8) || Error())
408 {
409 // When writing fails, we try to close the file before throwing the
410 // exception, so it can at least be deleted.
411 wxFFile::Close();
412 ThrowException( GetName(), mCaption );
413 }
414 }
415
416 ///
417 /// XMLStringWriter class
418 ///
XMLStringWriter(size_t initialSize)419 XMLStringWriter::XMLStringWriter(size_t initialSize)
420 {
421 if (initialSize)
422 {
423 reserve(initialSize);
424 }
425 }
426
~XMLStringWriter()427 XMLStringWriter::~XMLStringWriter()
428 {
429 }
430
Write(const wxString & data)431 void XMLStringWriter::Write(const wxString &data)
432 {
433 Append(data);
434 }
435
StartTag(const std::string_view & name)436 void XMLUtf8BufferWriter::StartTag(const std::string_view& name)
437 {
438 if (mInTag)
439 Write(">");
440
441 Write("<");
442 Write(name);
443
444 mInTag = true;
445 }
446
EndTag(const std::string_view & name)447 void XMLUtf8BufferWriter::EndTag(const std::string_view& name)
448 {
449 if (mInTag)
450 {
451 Write("/>");
452 mInTag = false;
453 }
454 else
455 {
456 Write("</");
457 Write(name);
458 Write(">");
459 }
460 }
461
WriteAttr(const std::string_view & name,const Identifier & value)462 void XMLUtf8BufferWriter::WriteAttr(const std::string_view& name, const Identifier& value)
463 {
464 const wxScopedCharBuffer utf8Value = value.GET().utf8_str();
465
466 WriteAttr(name, { utf8Value.data(), utf8Value.length() });
467 }
468
WriteAttr(const std::string_view & name,const std::string_view & value)469 void XMLUtf8BufferWriter::WriteAttr(
470 const std::string_view& name, const std::string_view& value)
471 {
472 assert(mInTag);
473
474 Write(" ");
475 Write(name);
476 Write("=\"");
477 WriteEscaped(value);
478 Write("\"");
479 }
480
WriteAttr(const std::string_view & name,int value)481 void XMLUtf8BufferWriter::WriteAttr(const std::string_view& name, int value)
482 {
483 WriteAttr(name, static_cast<long long>(value));
484 }
485
WriteAttr(const std::string_view & name,bool value)486 void XMLUtf8BufferWriter::WriteAttr(const std::string_view& name, bool value)
487 {
488 WriteAttr(name, static_cast<long long>(value));
489 }
490
WriteAttr(const std::string_view & name,long value)491 void XMLUtf8BufferWriter::WriteAttr(const std::string_view& name, long value)
492 {
493 // long can be int or long long. Assume the longest!
494 WriteAttr(name, static_cast<long long>(value));
495 }
496
WriteAttr(const std::string_view & name,long long value)497 void XMLUtf8BufferWriter::WriteAttr(
498 const std::string_view& name, long long value)
499 {
500 // -9223372036854775807 is the worst case
501 constexpr size_t bufferSize = 21;
502 char buffer[bufferSize];
503
504 const auto result = ToChars(buffer, buffer + bufferSize, value);
505
506 if (result.ec != std::errc())
507 THROW_INCONSISTENCY_EXCEPTION;
508
509 WriteAttr(name, std::string_view(buffer, result.ptr - buffer));
510 }
511
WriteAttr(const std::string_view & name,size_t value)512 void XMLUtf8BufferWriter::WriteAttr(const std::string_view& name, size_t value)
513 {
514 // Well, that maintains the original behavior
515 WriteAttr(name, static_cast<long long>(value));
516 }
517
WriteAttr(const std::string_view & name,float value,int digits)518 void XMLUtf8BufferWriter::WriteAttr(
519 const std::string_view& name, float value, int digits /*= -1*/)
520 {
521 constexpr size_t bufferSize = std::numeric_limits<float>::max_digits10 +
522 5 + // No constexpr log2 yet! example - e-308
523 3; // Dot, sign an 0 separator
524
525 char buffer[bufferSize];
526
527 const auto result = ToChars(buffer, buffer + bufferSize, value, digits);
528
529 if (result.ec != std::errc())
530 THROW_INCONSISTENCY_EXCEPTION;
531
532 WriteAttr(name, std::string_view(buffer, result.ptr - buffer));
533 }
534
WriteAttr(const std::string_view & name,double value,int digits)535 void XMLUtf8BufferWriter::WriteAttr(
536 const std::string_view& name, double value, int digits /*= -1*/)
537 {
538 constexpr size_t bufferSize = std::numeric_limits<double>::max_digits10 +
539 5 + // No constexpr log2 yet!
540 3; // Dot, sign an 0 separator
541
542 char buffer[bufferSize];
543
544 const auto result = ToChars(buffer, buffer + bufferSize, value, digits);
545
546 if (result.ec != std::errc())
547 THROW_INCONSISTENCY_EXCEPTION;
548
549 WriteAttr(name, std::string_view(buffer, result.ptr - buffer));
550 }
551
WriteData(const std::string_view & value)552 void XMLUtf8BufferWriter::WriteData(const std::string_view& value)
553 {
554 if (mInTag)
555 {
556 Write(">");
557 mInTag = false;
558 }
559
560 WriteEscaped(value);
561 }
562
WriteSubTree(const std::string_view & value)563 void XMLUtf8BufferWriter::WriteSubTree(const std::string_view& value)
564 {
565 if (mInTag)
566 {
567 Write(">");
568 mInTag = false;
569 }
570
571 Write(value);
572 }
573
Write(const std::string_view & value)574 void XMLUtf8BufferWriter::Write(const std::string_view& value)
575 {
576 mStream.AppendData(value.data(), value.length());
577 }
578
ConsumeResult()579 MemoryStream XMLUtf8BufferWriter::ConsumeResult()
580 {
581 return std::move(mStream);
582 }
583
WriteEscaped(const std::string_view & value)584 void XMLUtf8BufferWriter::WriteEscaped(const std::string_view& value)
585 {
586 for (auto c : value)
587 {
588 switch (c)
589 {
590 case wxT('\''):
591 Write("'");
592 break;
593
594 case wxT('"'):
595 Write(""");
596 break;
597
598 case wxT('&'):
599 Write("&");
600 break;
601
602 case wxT('<'):
603 Write("<");
604 break;
605
606 case wxT('>'):
607 Write(">");
608 break;
609 default:
610 if (static_cast<uint8_t>(c) > 0x1F || charXMLCompatiblity[c] != 0)
611 mStream.AppendByte(c);
612 }
613 }
614 }
615