1 /**********************************************************************
2 
3   Audacity: A Digital Audio Editor
4 
5   XMLWriter.cpp
6 
7   Leland Lucius
8 
9 *******************************************************************//**
10 
11 \class XMLWriter
12 \brief Base class for XMLFileWriter and XMLStringWriter that provides
13 the general functionality for creating XML in UTF8 encoding.
14 
15 *//****************************************************************//**
16 
17 \class XMLFileWriter
18 \brief Wrapper to output XML data to files.
19 
20 *//****************************************************************//**
21 
22 \class XMLStringWriter
23 \brief Wrapper to output XML data to strings.
24 
25 *//*******************************************************************/
26 
27 #include "XMLWriter.h"
28 
29 #include <wx/defs.h>
30 #include <wx/ffile.h>
31 #include <wx/intl.h>
32 
33 #include <cstring>
34 
35 #include "ToChars.h"
36 
37 #include "InconsistencyException.h"
38 
39 //table for xml encoding compatibility with expat decoding
40 //see wxWidgets-2.8.12/src/expat/lib/xmltok_impl.h
41 //and wxWidgets-2.8.12/src/expat/lib/asciitab.h
42 static int charXMLCompatiblity[] =
43   {
44 
45 /* 0x00 */ 0, 0, 0, 0,
46 /* 0x04 */ 0, 0, 0, 0,
47 /* 0x08 */ 0, 1, 1, 0,
48 /* 0x0C */ 0, 1, 0, 0,
49 /* 0x10 */ 0, 0, 0, 0,
50 /* 0x14 */ 0, 0, 0, 0,
51 /* 0x18 */ 0, 0, 0, 0,
52 /* 0x1C */ 0, 0, 0, 0,
53   };
54 
55 // These are used by XMLEsc to handle surrogate pairs and filter invalid characters outside the ASCII range.
56 #define MIN_HIGH_SURROGATE static_cast<wxUChar>(0xD800)
57 #define MAX_HIGH_SURROGATE static_cast<wxUChar>(0xDBFF)
58 #define MIN_LOW_SURROGATE static_cast<wxUChar>(0xDC00)
59 #define MAX_LOW_SURROGATE static_cast<wxUChar>(0xDFFF)
60 
61 // Unicode defines other noncharacters, but only these two are invalid in XML.
62 #define NONCHARACTER_FFFE static_cast<wxUChar>(0xFFFE)
63 #define NONCHARACTER_FFFF static_cast<wxUChar>(0xFFFF)
64 
65 
66 ///
67 /// XMLWriter base class
68 ///
XMLWriter()69 XMLWriter::XMLWriter()
70 {
71    mDepth = 0;
72    mInTag = false;
73    mHasKids.push_back(false);
74 }
75 
~XMLWriter()76 XMLWriter::~XMLWriter()
77 {
78 }
79 
StartTag(const wxString & name)80 void XMLWriter::StartTag(const wxString &name)
81 // may throw
82 {
83    int i;
84 
85    if (mInTag) {
86       Write(wxT(">\n"));
87       mInTag = false;
88    }
89 
90    for (i = 0; i < mDepth; i++) {
91       Write(wxT("\t"));
92    }
93 
94    Write(wxString::Format(wxT("<%s"), name));
95 
96    mTagstack.insert(mTagstack.begin(), name);
97    mHasKids[0] = true;
98    mHasKids.insert(mHasKids.begin(), false);
99    mDepth++;
100    mInTag = true;
101 }
102 
EndTag(const wxString & name)103 void XMLWriter::EndTag(const wxString &name)
104 // may throw
105 {
106    int i;
107 
108    if (mTagstack.size() > 0) {
109       if (mTagstack[0] == name) {
110          if (mHasKids[1]) {  // There will always be at least 2 at this point
111             if (mInTag) {
112                Write(wxT("/>\n"));
113             }
114             else {
115                for (i = 0; i < mDepth - 1; i++) {
116                   Write(wxT("\t"));
117                }
118                Write(wxString::Format(wxT("</%s>\n"), name));
119             }
120          }
121          else {
122             Write(wxT(">\n"));
123          }
124          mTagstack.erase( mTagstack.begin() );
125          mHasKids.erase(mHasKids.begin());
126       }
127    }
128 
129    mDepth--;
130    mInTag = false;
131 }
132 
WriteAttr(const wxString & name,const wxString & value)133 void XMLWriter::WriteAttr(const wxString &name, const wxString &value)
134 // may throw from Write()
135 {
136    Write(wxString::Format(wxT(" %s=\"%s\""),
137       name,
138       XMLEsc(value)));
139 }
140 
WriteAttr(const wxString & name,const wxChar * value)141 void XMLWriter::WriteAttr(const wxString &name, const wxChar *value)
142 // may throw from Write()
143 {
144    WriteAttr(name, wxString(value));
145 }
146 
WriteAttr(const wxString & name,int value)147 void XMLWriter::WriteAttr(const wxString &name, int value)
148 // may throw from Write()
149 {
150    Write(wxString::Format(wxT(" %s=\"%d\""),
151       name,
152       value));
153 }
154 
WriteAttr(const wxString & name,bool value)155 void XMLWriter::WriteAttr(const wxString &name, bool value)
156 // may throw from Write()
157 {
158    Write(wxString::Format(wxT(" %s=\"%d\""),
159       name,
160       value));
161 }
162 
WriteAttr(const wxString & name,long value)163 void XMLWriter::WriteAttr(const wxString &name, long value)
164 // may throw from Write()
165 {
166    Write(wxString::Format(wxT(" %s=\"%ld\""),
167       name,
168       value));
169 }
170 
WriteAttr(const wxString & name,long long value)171 void XMLWriter::WriteAttr(const wxString &name, long long value)
172 // may throw from Write()
173 {
174    Write(wxString::Format(wxT(" %s=\"%lld\""),
175       name,
176       value));
177 }
178 
WriteAttr(const wxString & name,size_t value)179 void XMLWriter::WriteAttr(const wxString &name, size_t value)
180 // may throw from Write()
181 {
182    Write(wxString::Format(wxT(" %s=\"%lld\""),
183       name,
184       (long long) value));
185 }
186 
WriteAttr(const wxString & name,float value,int digits)187 void XMLWriter::WriteAttr(const wxString &name, float value, int digits)
188 // may throw from Write()
189 {
190    Write(wxString::Format(wxT(" %s=\"%s\""),
191       name,
192       Internat::ToString(value, digits)));
193 }
194 
WriteAttr(const wxString & name,double value,int digits)195 void XMLWriter::WriteAttr(const wxString &name, double value, int digits)
196 // may throw from Write()
197 {
198    Write(wxString::Format(wxT(" %s=\"%s\""),
199       name,
200       Internat::ToString(value, digits)));
201 }
202 
WriteData(const wxString & value)203 void XMLWriter::WriteData(const wxString &value)
204 // may throw from Write()
205 {
206    int i;
207 
208    for (i = 0; i < mDepth; i++) {
209       Write(wxT("\t"));
210    }
211 
212    Write(XMLEsc(value));
213 }
214 
WriteSubTree(const wxString & value)215 void XMLWriter::WriteSubTree(const wxString &value)
216 // may throw from Write()
217 {
218    if (mInTag) {
219       Write(wxT(">\n"));
220       mInTag = false;
221       mHasKids[0] = true;
222    }
223 
224    Write(value);
225 }
226 
227 // See http://www.w3.org/TR/REC-xml for reference
XMLEsc(const wxString & s)228 wxString XMLWriter::XMLEsc(const wxString & s)
229 {
230    wxString result;
231    int len = s.length();
232 
233    for(int i=0; i<len; i++) {
234       wxUChar c = s.GetChar(i);
235 
236       switch (c) {
237          case wxT('\''):
238             result += wxT("&apos;");
239          break;
240 
241          case wxT('"'):
242             result += wxT("&quot;");
243          break;
244 
245          case wxT('&'):
246             result += wxT("&amp;");
247          break;
248 
249          case wxT('<'):
250             result += wxT("&lt;");
251          break;
252 
253          case wxT('>'):
254             result += wxT("&gt;");
255          break;
256 
257          default:
258             if (sizeof(c) == 2 && c >= MIN_HIGH_SURROGATE && c <= MAX_HIGH_SURROGATE && i < len - 1) {
259                // If wxUChar is 2 bytes, then supplementary characters (those greater than U+FFFF) are represented
260                // with a high surrogate (U+D800..U+DBFF) followed by a low surrogate (U+DC00..U+DFFF).
261                // Handle those here.
262                wxUChar c2 = s.GetChar(++i);
263                if (c2 >= MIN_LOW_SURROGATE && c2 <= MAX_LOW_SURROGATE) {
264                   // Surrogate pair found; simply add it to the output string.
265                   result += c;
266                   result += c2;
267                }
268                else {
269                   // That high surrogate isn't paired, so ignore it.
270                   i--;
271                }
272             }
273             else if (!wxIsprint(c)) {
274                //ignore several characters such ase eot (0x04) and stx (0x02) because it makes expat parser bail
275                //see xmltok.c in expat checkCharRefNumber() to see how expat bails on these chars.
276                //also see wxWidgets-2.8.12/src/expat/lib/asciitab.h to see which characters are nonxml compatible
277                //post decode (we can still encode '&' and '<' with this table, but it prevents us from encoding eot)
278                //everything is compatible past ascii 0x20 except for surrogates and the noncharacters U+FFFE and U+FFFF,
279                //so we don't check the compatibility table higher than this.
280                if((c> 0x1F || charXMLCompatiblity[c]!=0) &&
281                      (c < MIN_HIGH_SURROGATE || c > MAX_LOW_SURROGATE) &&
282                      c != NONCHARACTER_FFFE && c != NONCHARACTER_FFFF)
283                   result += wxString::Format(wxT("&#x%04x;"), c);
284             }
285             else {
286                result += c;
287             }
288          break;
289       }
290    }
291 
292    return result;
293 }
294 
295 ///
296 /// XMLFileWriter class
297 ///
XMLFileWriter(const FilePath & outputPath,const TranslatableString & caption,bool keepBackup)298 XMLFileWriter::XMLFileWriter(
299    const FilePath &outputPath, const TranslatableString &caption, bool keepBackup )
300    : mOutputPath{ outputPath }
301    , mCaption{ caption }
302    , mKeepBackup{ keepBackup }
303 // may throw
304 {
305    auto tempPath = wxFileName::CreateTempFileName( outputPath );
306    if (!wxFFile::Open(tempPath, wxT("wb")) || !IsOpened())
307       ThrowException( outputPath, mCaption );
308 
309    if (mKeepBackup) {
310       int index = 0;
311       wxString backupName;
312 
313       do {
314          wxFileName outputFn{ mOutputPath };
315          index++;
316          mBackupName =
317          outputFn.GetPath() + wxFILE_SEP_PATH +
318          outputFn.GetName() + wxT("_bak") +
319          wxString::Format(wxT("%d"), index) + wxT(".") +
320          outputFn.GetExt();
321       } while( ::wxFileExists( mBackupName ) );
322 
323       // Open the backup file to be sure we can write it and reserve it
324       // until committing
325       if (! mBackupFile.Open( mBackupName, "wb" ) || ! mBackupFile.IsOpened() )
326          ThrowException( mBackupName, mCaption );
327    }
328 }
329 
330 
~XMLFileWriter()331 XMLFileWriter::~XMLFileWriter()
332 {
333    // Don't let a destructor throw!
334    GuardedCall( [&] {
335       if (!mCommitted) {
336          auto fileName = GetName();
337          if ( IsOpened() )
338             CloseWithoutEndingTags();
339          ::wxRemoveFile( fileName );
340       }
341    } );
342 }
343 
Commit()344 void XMLFileWriter::Commit()
345 // may throw
346 {
347    PreCommit();
348    PostCommit();
349 }
350 
PreCommit()351 void XMLFileWriter::PreCommit()
352 // may throw
353 {
354    while (mTagstack.size()) {
355       EndTag(mTagstack[0]);
356    }
357 
358    CloseWithoutEndingTags();
359 }
360 
PostCommit()361 void XMLFileWriter::PostCommit()
362 // may throw
363 {
364    FilePath tempPath = GetName();
365    if (mKeepBackup) {
366       if (! mBackupFile.Close() ||
367           ! wxRenameFile( mOutputPath, mBackupName ) )
368          ThrowException( mBackupName, mCaption );
369    }
370    else {
371       if ( wxFileName::FileExists( mOutputPath ) &&
372            ! wxRemoveFile( mOutputPath ) )
373          ThrowException( mOutputPath, mCaption );
374    }
375 
376    // Now we have vacated the file at the output path and are committed.
377    // But not completely finished with steps of the commit operation.
378    // If this step fails, we haven't lost the successfully written data,
379    // but just failed to put it in the right place.
380    if (! wxRenameFile( tempPath, mOutputPath ) )
381       throw FileException{
382          FileException::Cause::Rename, tempPath, mCaption, mOutputPath
383       };
384 
385    mCommitted = true;
386 }
387 
CloseWithoutEndingTags()388 void XMLFileWriter::CloseWithoutEndingTags()
389 // may throw
390 {
391    // Before closing, we first flush it, because if Flush() fails because of a
392    // "disk full" condition, we can still at least try to close the file.
393    if (!wxFFile::Flush())
394    {
395       wxFFile::Close();
396       ThrowException( GetName(), mCaption );
397    }
398 
399    // Note that this should never fail if flushing worked.
400    if (!wxFFile::Close())
401       ThrowException( GetName(), mCaption );
402 }
403 
Write(const wxString & data)404 void XMLFileWriter::Write(const wxString &data)
405 // may throw
406 {
407    if (!wxFFile::Write(data, wxConvUTF8) || Error())
408    {
409       // When writing fails, we try to close the file before throwing the
410       // exception, so it can at least be deleted.
411       wxFFile::Close();
412       ThrowException( GetName(), mCaption );
413    }
414 }
415 
416 ///
417 /// XMLStringWriter class
418 ///
XMLStringWriter(size_t initialSize)419 XMLStringWriter::XMLStringWriter(size_t initialSize)
420 {
421    if (initialSize)
422    {
423       reserve(initialSize);
424    }
425 }
426 
~XMLStringWriter()427 XMLStringWriter::~XMLStringWriter()
428 {
429 }
430 
Write(const wxString & data)431 void XMLStringWriter::Write(const wxString &data)
432 {
433    Append(data);
434 }
435 
StartTag(const std::string_view & name)436 void XMLUtf8BufferWriter::StartTag(const std::string_view& name)
437 {
438    if (mInTag)
439       Write(">");
440 
441    Write("<");
442    Write(name);
443 
444    mInTag = true;
445 }
446 
EndTag(const std::string_view & name)447 void XMLUtf8BufferWriter::EndTag(const std::string_view& name)
448 {
449    if (mInTag)
450    {
451       Write("/>");
452       mInTag = false;
453    }
454    else
455    {
456       Write("</");
457       Write(name);
458       Write(">");
459    }
460 }
461 
WriteAttr(const std::string_view & name,const Identifier & value)462 void XMLUtf8BufferWriter::WriteAttr(const std::string_view& name, const Identifier& value)
463 {
464    const wxScopedCharBuffer utf8Value = value.GET().utf8_str();
465 
466    WriteAttr(name, { utf8Value.data(), utf8Value.length() });
467 }
468 
WriteAttr(const std::string_view & name,const std::string_view & value)469 void XMLUtf8BufferWriter::WriteAttr(
470    const std::string_view& name, const std::string_view& value)
471 {
472    assert(mInTag);
473 
474    Write(" ");
475    Write(name);
476    Write("=\"");
477    WriteEscaped(value);
478    Write("\"");
479 }
480 
WriteAttr(const std::string_view & name,int value)481 void XMLUtf8BufferWriter::WriteAttr(const std::string_view& name, int value)
482 {
483    WriteAttr(name, static_cast<long long>(value));
484 }
485 
WriteAttr(const std::string_view & name,bool value)486 void XMLUtf8BufferWriter::WriteAttr(const std::string_view& name, bool value)
487 {
488    WriteAttr(name, static_cast<long long>(value));
489 }
490 
WriteAttr(const std::string_view & name,long value)491 void XMLUtf8BufferWriter::WriteAttr(const std::string_view& name, long value)
492 {
493    // long can be int or long long. Assume the longest!
494    WriteAttr(name, static_cast<long long>(value));
495 }
496 
WriteAttr(const std::string_view & name,long long value)497 void XMLUtf8BufferWriter::WriteAttr(
498    const std::string_view& name, long long value)
499 {
500    // -9223372036854775807 is the worst case
501    constexpr size_t bufferSize = 21;
502    char buffer[bufferSize];
503 
504    const auto result = ToChars(buffer, buffer + bufferSize, value);
505 
506    if (result.ec != std::errc())
507       THROW_INCONSISTENCY_EXCEPTION;
508 
509    WriteAttr(name, std::string_view(buffer, result.ptr - buffer));
510 }
511 
WriteAttr(const std::string_view & name,size_t value)512 void XMLUtf8BufferWriter::WriteAttr(const std::string_view& name, size_t value)
513 {
514    // Well, that maintains the original behavior
515    WriteAttr(name, static_cast<long long>(value));
516 }
517 
WriteAttr(const std::string_view & name,float value,int digits)518 void XMLUtf8BufferWriter::WriteAttr(
519    const std::string_view& name, float value, int digits /*= -1*/)
520 {
521    constexpr size_t bufferSize = std::numeric_limits<float>::max_digits10 +
522                                  5 + // No constexpr log2 yet! example - e-308
523                                  3; // Dot, sign an 0 separator
524 
525    char buffer[bufferSize];
526 
527    const auto result = ToChars(buffer, buffer + bufferSize, value, digits);
528 
529    if (result.ec != std::errc())
530       THROW_INCONSISTENCY_EXCEPTION;
531 
532    WriteAttr(name, std::string_view(buffer, result.ptr - buffer));
533 }
534 
WriteAttr(const std::string_view & name,double value,int digits)535 void XMLUtf8BufferWriter::WriteAttr(
536    const std::string_view& name, double value, int digits /*= -1*/)
537 {
538    constexpr size_t bufferSize = std::numeric_limits<double>::max_digits10 +
539                                  5 + // No constexpr log2 yet!
540                                  3;  // Dot, sign an 0 separator
541 
542    char buffer[bufferSize];
543 
544    const auto result = ToChars(buffer, buffer + bufferSize, value, digits);
545 
546    if (result.ec != std::errc())
547       THROW_INCONSISTENCY_EXCEPTION;
548 
549    WriteAttr(name, std::string_view(buffer, result.ptr - buffer));
550 }
551 
WriteData(const std::string_view & value)552 void XMLUtf8BufferWriter::WriteData(const std::string_view& value)
553 {
554    if (mInTag)
555    {
556       Write(">");
557       mInTag = false;
558    }
559 
560    WriteEscaped(value);
561 }
562 
WriteSubTree(const std::string_view & value)563 void XMLUtf8BufferWriter::WriteSubTree(const std::string_view& value)
564 {
565    if (mInTag)
566    {
567       Write(">");
568       mInTag = false;
569    }
570 
571    Write(value);
572 }
573 
Write(const std::string_view & value)574 void XMLUtf8BufferWriter::Write(const std::string_view& value)
575 {
576    mStream.AppendData(value.data(), value.length());
577 }
578 
ConsumeResult()579 MemoryStream XMLUtf8BufferWriter::ConsumeResult()
580 {
581    return std::move(mStream);
582 }
583 
WriteEscaped(const std::string_view & value)584 void XMLUtf8BufferWriter::WriteEscaped(const std::string_view& value)
585 {
586    for (auto c : value)
587    {
588       switch (c)
589       {
590       case wxT('\''):
591          Write("&apos;");
592          break;
593 
594       case wxT('"'):
595          Write("&quot;");
596          break;
597 
598       case wxT('&'):
599          Write("&amp;");
600          break;
601 
602       case wxT('<'):
603          Write("&lt;");
604          break;
605 
606       case wxT('>'):
607          Write("&gt;");
608          break;
609       default:
610          if (static_cast<uint8_t>(c) > 0x1F || charXMLCompatiblity[c] != 0)
611             mStream.AppendByte(c);
612       }
613    }
614 }
615