1 /**********************************************************************
2
3 Audacity: A Digital Audio Editor
4 Audacity(R) is copyright (c) 1999-2010 Audacity Team.
5 License: GPL v2. See License.txt.
6
7 ProjectSerializer.cpp
8
9 *******************************************************************//**
10
11 \class ProjectSerializer
12 \brief a class used to (de)serialize the project catalog
13
14 *//********************************************************************/
15
16
17 #include "ProjectSerializer.h"
18
19 #include <algorithm>
20 #include <cstdint>
21 #include <mutex>
22 #include <wx/ustring.h>
23 #include <codecvt>
24 #include <locale>
25 #include <deque>
26
27 #include <wx/log.h>
28
29 #include "BufferedStreamReader.h"
30
31 ///
32 /// ProjectSerializer class
33 ///
34
35 // Simple "binary xml" format used exclusively for project documents.
36 //
37 // It is not intended that the user view or modify the file.
38 //
39 // It IS intended that very little work be done during auto save, so numbers
40 // and strings are written in their native format. They will be converted
41 // during recovery.
42 //
43 // The file has 3 main sections:
44 //
45 // character size 1 (UTF-8), 2 (UTF-16) or 4 (UTF-32)
46 // name dictionary dictionary of all names used in the document
47 // data fields the "encoded" XML document
48 //
49 // If a subtree is added, it will be preceded with FT_Push to tell the decoder
50 // to preserve the active dictionary. The decoder will then restore the
51 // dictionary when an FT_Pop is encountered. Nesting is unlimited.
52 //
53 // To save space, each name (attribute or element) encountered is stored in
54 // the name dictionary and replaced with the assigned 2-byte identifier.
55 //
56 // All strings are in native unicode format, 2-byte or 4-byte.
57 //
58 // All name "lengths" are 2-byte signed, so are limited to 32767 bytes long.
59 // All string/data "lengths" are 4-byte signed.
60
61 enum FieldTypes
62 {
63 FT_CharSize, // type, ID, value
64 FT_StartTag, // type, ID
65 FT_EndTag, // type, ID
66 FT_String, // type, ID, string length, string
67 FT_Int, // type, ID, value
68 FT_Bool, // type, ID, value
69 FT_Long, // type, ID, value
70 FT_LongLong, // type, ID, value
71 FT_SizeT, // type, ID, value
72 FT_Float, // type, ID, value, digits
73 FT_Double, // type, ID, value, digits
74 FT_Data, // type, string length, string
75 FT_Raw, // type, string length, string
76 FT_Push, // type only
77 FT_Pop, // type only
78 FT_Name // type, ID, name length, name
79 };
80
81 // Static so that the dict can be reused each time.
82 //
83 // If entries get added later, like when an envelope node (for example)
84 // is written and then the envelope is later removed, the dict will still
85 // contain the envelope name, but that's not a problem.
86
87 NameMap ProjectSerializer::mNames;
88 MemoryStream ProjectSerializer::mDict;
89
FailureMessage(const FilePath &)90 TranslatableString ProjectSerializer::FailureMessage( const FilePath &/*filePath*/ )
91 {
92 return
93 XO("This recovery file was saved by Audacity 2.3.0 or before.\n"
94 "You need to run that version of Audacity to recover the project." );
95 }
96
97 namespace
98 {
99 // Aliases for the FIXED-WIDTH integer types that are used in the file
100 // format.
101
102 // Chosen so that among the four build types (32 bit Windows, 64
103 // bit Windows, 64 bit Mac clang, Linux g++) presently done (3.0.0
104 // development), we use the narrowest width of the type on any of them, so
105 // that anything saved on one build will be read back identically on all
106 // builds. (Although this means that very large values on some systems might
107 // be saved and then read back with loss.)
108
109 // In fact the only types for which this matters are long (only 32 bits on
110 // 32 and 64 bit Windows) and size_t (only 32 bits on 32 bit Windows).
111
112 using UShort = std::uint16_t;
113 using Int = std::int32_t;
114
115 using Long = std::int32_t; // To save long values
116 using ULong = std::uint32_t; // To save size_t values
117
118 using LongLong = std::int64_t;
119
120 // Detect this computer's endianness
IsLittleEndian()121 bool IsLittleEndian()
122 {
123 const std::uint32_t x = 1u;
124 return static_cast<const unsigned char*>(static_cast<const void*>(&x))[0];
125 // We will assume the same for other widths!
126 }
127 // In C++20 this could be
128 // constexpr bool IsLittleEndian = (std::endian::native == std::endian::little);
129 // static_assert( IsLittleEndian || (std::endian::native == std::endian::big),
130 // "Oh no! I'm mixed-endian!" );
131
132 // Functions that can read and write native integer types to a canonicalized
133 // little-endian file format. (We don't bother to do the same for floating
134 // point numbers.)
135
136 // Write native little-endian to little-endian file format
137 template <typename Number>
WriteLittleEndian(MemoryStream & out,Number value)138 void WriteLittleEndian(MemoryStream& out, Number value)
139 {
140 out.AppendData(&value, sizeof(value));
141 }
142
143 // Write native big-endian to little-endian file format
WriteBigEndian(MemoryStream & out,Number value)144 template <typename Number> void WriteBigEndian(MemoryStream& out, Number value)
145 {
146 auto begin = static_cast<unsigned char*>(static_cast<void*>(&value));
147 std::reverse(begin, begin + sizeof(value));
148 out.AppendData(&value, sizeof(value));
149 }
150
151 // Read little-endian file format to native little-endian
ReadLittleEndian(BufferedStreamReader & in)152 template <typename Number> Number ReadLittleEndian(BufferedStreamReader& in)
153 {
154 Number result;
155 in.ReadValue(result);
156 return result;
157 }
158
159 // Read little-endian file format to native big-endian
ReadBigEndian(BufferedStreamReader & in)160 template <typename Number> Number ReadBigEndian(BufferedStreamReader& in)
161 {
162 Number result;
163 in.ReadValue(result);
164 auto begin = static_cast<unsigned char*>(static_cast<void*>(&result));
165 std::reverse(begin, begin + sizeof(result));
166 return result;
167 }
168
169 // Choose between implementations!
170 static const auto WriteUShort =
171 IsLittleEndian() ? &WriteLittleEndian<UShort> : &WriteBigEndian<UShort>;
172 static const auto WriteInt =
173 IsLittleEndian() ? &WriteLittleEndian<Int> : &WriteBigEndian<Int>;
174 static const auto WriteLong =
175 IsLittleEndian() ? &WriteLittleEndian<Long> : &WriteBigEndian<Long>;
176 static const auto WriteULong =
177 IsLittleEndian() ? &WriteLittleEndian<ULong> : &WriteBigEndian<ULong>;
178 static const auto WriteLongLong =
179 IsLittleEndian() ? &WriteLittleEndian<LongLong> : &WriteBigEndian<LongLong>;
180
181 static const auto ReadUShort =
182 IsLittleEndian() ? &ReadLittleEndian<UShort> : &ReadBigEndian<UShort>;
183 static const auto ReadInt =
184 IsLittleEndian() ? &ReadLittleEndian<Int> : &ReadBigEndian<Int>;
185 static const auto ReadLong =
186 IsLittleEndian() ? &ReadLittleEndian<Long> : &ReadBigEndian<Long>;
187 static const auto ReadULong =
188 IsLittleEndian() ? &ReadLittleEndian<ULong> : &ReadBigEndian<ULong>;
189 static const auto ReadLongLong =
190 IsLittleEndian() ? &ReadLittleEndian<LongLong> : &ReadBigEndian<LongLong>;
191
192 // Functions to read and write certain lengths -- maybe we will change
193 // our choices for widths or signedness?
194
195 using Length = Int; // Instead, as wide as size_t?
196 static const auto WriteLength = WriteInt;
197 static const auto ReadLength = ReadInt;
198
199 using Digits = Int; // Instead, just an unsigned char?
200 static const auto WriteDigits = WriteInt;
201 static const auto ReadDigits = ReadInt;
202
203 class XMLTagHandlerAdapter final
204 {
205 public:
XMLTagHandlerAdapter(XMLTagHandler * handler)206 explicit XMLTagHandlerAdapter(XMLTagHandler* handler) noexcept
207 : mBaseHandler(handler)
208 {
209 }
210
EmitStartTag(const std::string_view & name)211 void EmitStartTag(const std::string_view& name)
212 {
213 if (mInTag)
214 EmitStartTag();
215
216 mCurrentTagName = name;
217 mInTag = true;
218 }
219
EndTag(const std::string_view & name)220 void EndTag(const std::string_view& name)
221 {
222 if (mInTag)
223 EmitStartTag();
224
225 if (XMLTagHandler* const handler = mHandlers.back())
226 handler->HandleXMLEndTag(name);
227
228 mHandlers.pop_back();
229 }
230
WriteAttr(const std::string_view & name,std::string value)231 void WriteAttr(const std::string_view& name, std::string value)
232 {
233 assert(mInTag);
234
235 if (!mInTag)
236 return;
237
238 mAttributes.emplace_back(name, CacheString(std::move(value)));
239 }
240
WriteAttr(const std::string_view & name,T value)241 template <typename T> void WriteAttr(const std::string_view& name, T value)
242 {
243 assert(mInTag);
244
245 if (!mInTag)
246 return;
247
248 mAttributes.emplace_back(name, XMLAttributeValueView(value));
249 }
250
WriteData(std::string value)251 void WriteData(std::string value)
252 {
253 if (mInTag)
254 EmitStartTag();
255
256 if (XMLTagHandler* const handler = mHandlers.back())
257 handler->HandleXMLContent(CacheString(std::move(value)));
258 }
259
WriteRaw(std::string)260 void WriteRaw(std::string)
261 {
262 // This method is intentionally left empty.
263 // The only data that is serialized by FT_Raw
264 // is the boilerplate code like <?xml > and <!DOCTYPE>
265 // which are ignored
266 }
267
Finalize()268 bool Finalize()
269 {
270 if (mInTag)
271 {
272 EmitStartTag();
273 EndTag(mCurrentTagName);
274 }
275
276 return mBaseHandler != nullptr;
277 }
278
279 private:
EmitStartTag()280 void EmitStartTag()
281 {
282 if (mHandlers.empty())
283 {
284 mHandlers.push_back(mBaseHandler);
285 }
286 else
287 {
288 if (XMLTagHandler* const handler = mHandlers.back())
289 mHandlers.push_back(handler->HandleXMLChild(mCurrentTagName));
290 else
291 mHandlers.push_back(NULL);
292 }
293
294 if (XMLTagHandler*& handler = mHandlers.back())
295 {
296 if (!handler->HandleXMLTag(mCurrentTagName, mAttributes))
297 {
298 handler = nullptr;
299
300 if (mHandlers.size() == 1)
301 mBaseHandler = nullptr;
302 }
303 }
304
305 mStringsCache.clear();
306 mAttributes.clear();
307 mInTag = false;
308 }
309
CacheString(std::string string)310 std::string_view CacheString(std::string string)
311 {
312 mStringsCache.emplace_back(std::move(string));
313 return mStringsCache.back();
314 }
315
316 XMLTagHandler* mBaseHandler;
317
318 std::vector<XMLTagHandler*> mHandlers;
319
320 std::string_view mCurrentTagName;
321
322 std::deque<std::string> mStringsCache;
323 AttributesList mAttributes;
324
325 bool mInTag { false };
326 };
327
328 // template<typename BaseCharType>
329 // std::string FastStringConvertFromAscii(const BaseCharType* begin, const BaseCharType* end)
330 // {
331 //
332 // }
333
334 template<typename BaseCharType>
FastStringConvert(const void * bytes,int bytesCount)335 std::string FastStringConvert(const void* bytes, int bytesCount)
336 {
337 constexpr int charSize = sizeof(BaseCharType);
338
339 assert(bytesCount % charSize == 0);
340
341 const auto begin = static_cast<const BaseCharType*>(bytes);
342 const auto end = begin + bytesCount / charSize;
343
344 const bool isAscii = std::all_of(
345 begin, end,
346 [](BaseCharType c)
347 { return static_cast<std::make_unsigned_t<BaseCharType>>(c) < 0x7f; });
348
349 if (isAscii)
350 return std::string(begin, end);
351
352 return std::wstring_convert<std::codecvt_utf8<BaseCharType>, BaseCharType>()
353 .to_bytes(begin, end);
354 }
355 } // namespace
356
ProjectSerializer(size_t allocSize)357 ProjectSerializer::ProjectSerializer(size_t allocSize)
358 {
359 static std::once_flag flag;
360 std::call_once(flag, []{
361 // Just once per run, store header information in the unique static
362 // dictionary that will be written into each project that is saved.
363 // Store the size of "wxStringCharType" so we can convert during recovery
364 // in case the file is used on a system with a different character size.
365 char size = sizeof(wxStringCharType);
366 mDict.AppendByte(FT_CharSize);
367 mDict.AppendData(&size, 1);
368 });
369
370 mDictChanged = false;
371 }
372
~ProjectSerializer()373 ProjectSerializer::~ProjectSerializer()
374 {
375 }
376
StartTag(const wxString & name)377 void ProjectSerializer::StartTag(const wxString & name)
378 {
379 mBuffer.AppendByte(FT_StartTag);
380 WriteName(name);
381 }
382
EndTag(const wxString & name)383 void ProjectSerializer::EndTag(const wxString & name)
384 {
385 mBuffer.AppendByte(FT_EndTag);
386 WriteName(name);
387 }
388
WriteAttr(const wxString & name,const wxChar * value)389 void ProjectSerializer::WriteAttr(const wxString & name, const wxChar *value)
390 {
391 WriteAttr(name, wxString(value));
392 }
393
WriteAttr(const wxString & name,const wxString & value)394 void ProjectSerializer::WriteAttr(const wxString & name, const wxString & value)
395 {
396 mBuffer.AppendByte(FT_String);
397 WriteName(name);
398
399 const Length len = value.length() * sizeof(wxStringCharType);
400 WriteLength( mBuffer, len );
401 mBuffer.AppendData(value.wx_str(), len);
402 }
403
WriteAttr(const wxString & name,int value)404 void ProjectSerializer::WriteAttr(const wxString & name, int value)
405 {
406 mBuffer.AppendByte(FT_Int);
407 WriteName(name);
408
409 WriteInt( mBuffer, value );
410 }
411
WriteAttr(const wxString & name,bool value)412 void ProjectSerializer::WriteAttr(const wxString & name, bool value)
413 {
414 mBuffer.AppendByte(FT_Bool);
415 WriteName(name);
416
417 mBuffer.AppendByte(value);
418 }
419
WriteAttr(const wxString & name,long value)420 void ProjectSerializer::WriteAttr(const wxString & name, long value)
421 {
422 mBuffer.AppendByte(FT_Long);
423 WriteName(name);
424
425 WriteLong( mBuffer, value );
426 }
427
WriteAttr(const wxString & name,long long value)428 void ProjectSerializer::WriteAttr(const wxString & name, long long value)
429 {
430 mBuffer.AppendByte(FT_LongLong);
431 WriteName(name);
432
433 WriteLongLong( mBuffer, value );
434 }
435
WriteAttr(const wxString & name,size_t value)436 void ProjectSerializer::WriteAttr(const wxString & name, size_t value)
437 {
438 mBuffer.AppendByte(FT_SizeT);
439 WriteName(name);
440
441 WriteULong( mBuffer, value );
442 }
443
WriteAttr(const wxString & name,float value,int digits)444 void ProjectSerializer::WriteAttr(const wxString & name, float value, int digits)
445 {
446 mBuffer.AppendByte(FT_Float);
447 WriteName(name);
448
449 mBuffer.AppendData(&value, sizeof(value));
450 WriteDigits( mBuffer, digits );
451 }
452
WriteAttr(const wxString & name,double value,int digits)453 void ProjectSerializer::WriteAttr(const wxString & name, double value, int digits)
454 {
455 mBuffer.AppendByte(FT_Double);
456 WriteName(name);
457
458 mBuffer.AppendData(&value, sizeof(value));
459 WriteDigits( mBuffer, digits );
460 }
461
WriteData(const wxString & value)462 void ProjectSerializer::WriteData(const wxString & value)
463 {
464 mBuffer.AppendByte(FT_Data);
465
466 Length len = value.length() * sizeof(wxStringCharType);
467 WriteLength( mBuffer, len );
468 mBuffer.AppendData(value.wx_str(), len);
469 }
470
Write(const wxString & value)471 void ProjectSerializer::Write(const wxString & value)
472 {
473 mBuffer.AppendByte(FT_Raw);
474 Length len = value.length() * sizeof(wxStringCharType);
475 WriteLength( mBuffer, len );
476 mBuffer.AppendData(value.wx_str(), len);
477 }
478
WriteName(const wxString & name)479 void ProjectSerializer::WriteName(const wxString & name)
480 {
481 wxASSERT(name.length() * sizeof(wxStringCharType) <= SHRT_MAX);
482 UShort id;
483
484 auto nameiter = mNames.find(name);
485 if (nameiter != mNames.end())
486 {
487 id = nameiter->second;
488 }
489 else
490 {
491 // mNames is static. This appends each name to static mDict only once
492 // in each run.
493 UShort len = name.length() * sizeof(wxStringCharType);
494
495 id = mNames.size();
496 mNames[name] = id;
497
498 mDict.AppendByte(FT_Name);
499 WriteUShort( mDict, id );
500 WriteUShort( mDict, len );
501 mDict.AppendData(name.wx_str(), len);
502
503 mDictChanged = true;
504 }
505
506 WriteUShort( mBuffer, id );
507 }
508
GetDict() const509 const MemoryStream &ProjectSerializer::GetDict() const
510 {
511 return mDict;
512 }
513
GetData() const514 const MemoryStream& ProjectSerializer::GetData() const
515 {
516 return mBuffer;
517 }
518
IsEmpty() const519 bool ProjectSerializer::IsEmpty() const
520 {
521 return mBuffer.GetSize() == 0;
522 }
523
DictChanged() const524 bool ProjectSerializer::DictChanged() const
525 {
526 return mDictChanged;
527 }
528
529 // See ProjectFileIO::LoadProject() for explanation of the blockids arg
Decode(BufferedStreamReader & in,XMLTagHandler * handler)530 bool ProjectSerializer::Decode(BufferedStreamReader& in, XMLTagHandler* handler)
531 {
532 if (handler == nullptr)
533 return false;
534
535 XMLTagHandlerAdapter adapter(handler);
536
537 std::vector<char> bytes;
538 IdMap mIds;
539 std::vector<IdMap> mIdStack;
540 char mCharSize = 0;
541
542 mIds.clear();
543
544 struct Error{}; // exception type for short-range try/catch
545 auto Lookup = [&mIds]( UShort id ) -> std::string_view
546 {
547 auto iter = mIds.find( id );
548 if (iter == mIds.end())
549 {
550 throw Error{};
551 }
552
553 return iter->second;
554 };
555
556 int64_t stringsCount = 0;
557 int64_t stringsLength = 0;
558
559 auto ReadString = [&mCharSize, &in, &bytes, &stringsCount, &stringsLength](int len) -> std::string
560 {
561 bytes.reserve( len );
562 auto data = bytes.data();
563 in.Read( data, len );
564
565 stringsCount++;
566 stringsLength += len;
567
568 switch (mCharSize)
569 {
570 case 1:
571 return std::string(bytes.data(), len);
572
573 case 2:
574 return FastStringConvert<char16_t>(bytes.data(), len);
575
576 case 4:
577 return FastStringConvert<char32_t>(bytes.data(), len);
578
579 default:
580 wxASSERT_MSG(false, wxT("Characters size not 1, 2, or 4"));
581 break;
582 }
583
584 return {};
585 };
586
587 try
588 {
589 while (!in.Eof())
590 {
591 UShort id;
592
593 switch (in.GetC())
594 {
595 case FT_Push:
596 {
597 mIdStack.push_back(mIds);
598 mIds.clear();
599 }
600 break;
601
602 case FT_Pop:
603 {
604 mIds = mIdStack.back();
605 mIdStack.pop_back();
606 }
607 break;
608
609 case FT_Name:
610 {
611 id = ReadUShort( in );
612 auto len = ReadUShort( in );
613 mIds[id] = ReadString(len);
614 }
615 break;
616
617 case FT_StartTag:
618 {
619 id = ReadUShort( in );
620
621 adapter.EmitStartTag(Lookup(id));
622 }
623 break;
624
625 case FT_EndTag:
626 {
627 id = ReadUShort( in );
628
629 adapter.EndTag(Lookup(id));
630 }
631 break;
632
633 case FT_String:
634 {
635 id = ReadUShort( in );
636 int len = ReadLength( in );
637
638 adapter.WriteAttr(Lookup(id), ReadString(len));
639 }
640 break;
641
642 case FT_Float:
643 {
644 float val;
645
646 id = ReadUShort( in );
647 in.Read(&val, sizeof(val));
648 /* int dig = */ReadDigits(in);
649
650 adapter.WriteAttr(Lookup(id), val);
651 }
652 break;
653
654 case FT_Double:
655 {
656 double val;
657
658 id = ReadUShort( in );
659 in.Read(&val, sizeof(val));
660 /*int dig = */ReadDigits(in);
661
662 adapter.WriteAttr(Lookup(id), val);
663 }
664 break;
665
666 case FT_Int:
667 {
668 id = ReadUShort( in );
669 int val = ReadInt( in );
670
671 adapter.WriteAttr(Lookup(id), val);
672 }
673 break;
674
675 case FT_Bool:
676 {
677 unsigned char val;
678
679 id = ReadUShort( in );
680 in.Read(&val, 1);
681
682 adapter.WriteAttr(Lookup(id), val);
683 }
684 break;
685
686 case FT_Long:
687 {
688 id = ReadUShort( in );
689 long val = ReadLong( in );
690
691 adapter.WriteAttr(Lookup(id), val);
692 }
693 break;
694
695 case FT_LongLong:
696 {
697 id = ReadUShort( in );
698 long long val = ReadLongLong( in );
699 adapter.WriteAttr(Lookup(id), val);
700 }
701 break;
702
703 case FT_SizeT:
704 {
705 id = ReadUShort( in );
706 size_t val = ReadULong( in );
707
708 adapter.WriteAttr(Lookup(id), val);
709 }
710 break;
711
712 case FT_Data:
713 {
714 int len = ReadLength( in );
715 adapter.WriteData(ReadString(len));
716 }
717 break;
718
719 case FT_Raw:
720 {
721 int len = ReadLength( in );
722 adapter.WriteRaw(ReadString(len));
723 }
724 break;
725
726 case FT_CharSize:
727 {
728 in.Read(&mCharSize, 1);
729 }
730 break;
731
732 default:
733 wxASSERT(true);
734 break;
735 }
736 }
737 }
738 catch( const Error& )
739 {
740 // Document was corrupt, or platform differences in size or endianness
741 // were not well canonicalized
742 return false;
743 }
744
745 wxLogInfo(
746 "Loaded %lld string %f Kb in size", stringsCount, stringsLength / 1024.0);
747
748 return adapter.Finalize();
749 }
750