1 /*
2 Copyright (c) 2010 Peter "Corsix" Cawley
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy of
5 this software and associated documentation files (the "Software"), to deal in
6 the Software without restriction, including without limitation the rights to
7 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8 of the Software, and to permit persons to whom the Software is furnished to do
9 so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all
12 copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 SOFTWARE.
21 */
22 
23 #include "iso_fs.h"
24 
25 #include <algorithm>
26 #include <array>
27 #include <cstdarg>
28 #include <cstdio>
29 #include <cstdlib>
30 #include <cstring>
31 #include <iterator>
32 #include <memory>
33 #include <stdexcept>
34 #include <vector>
35 
36 #include "th.h"
37 
38 namespace {
39 
40 enum iso_volume_descriptor_type : uint8_t {
41   vdt_primary_volume = 0x01,
42   // Other type numbers are either reserved for future use, or are not
43   // interesting to us.
44   vdt_terminator = 0xFF,
45 };
46 
47 /// Flag values for directory table entries. The flag itself is a bitmask.
48 enum iso_dir_ent_flag : uint8_t {
49   def_hidden = 0x01,
50   def_directory = 0x02,
51   def_multi_extent = 0x80,
52 };
53 
54 /// Offset to the 32bit sector of the file data
55 /// from the start of the file entry.
56 constexpr ptrdiff_t file_sector_offset = 2;
57 
58 /// Offset to the 32bit length of the file data
59 /// from the start of the file entry
60 constexpr ptrdiff_t file_data_length_offset = 10;
61 
62 /// The offset of the file flags (e.g. directory vs file)
63 /// from the start of the file entry.
64 constexpr ptrdiff_t file_flags_offset = 25;
65 
66 /// The offset of the byte that stores the length of the filename
67 /// from the start of the file entry.
68 constexpr ptrdiff_t filename_length_offset = 32;
69 
70 /// The offset of the start of the filename or directory identifier
71 /// from the start of the file entry.
72 constexpr ptrdiff_t filename_offset = 33;
73 
74 /// The minimum valid size of a valid file entry.
75 /// Accounts for all fixed header value offsets and even number padding.
76 constexpr uint8_t minimum_file_entry_size = 34;
77 
78 /// Formal depth limit in spec is 8. We allows for loose implementations.
79 constexpr int max_directory_depth = 16;
80 
81 /// Reasonably unique name of a file from Theme Hospital that can be used to
82 /// indicate that we've loaded the right directory.
83 constexpr const char* vblk_0_filename = "VBLK-0.TAB";
84 
85 /// Sector sizes can vary, but they must be powers of two, and the minimum
86 /// size is 2048.
87 constexpr size_t min_sector_size = 2048;
88 
89 /// Offset of the sector size from the primary volume descriptor
90 constexpr size_t sector_size_offset = 128;
91 
92 /// Offset of the root directory entry from the primary volume descriptor
93 constexpr ptrdiff_t root_directory_offset = 156;
94 
95 /// The root directory entry is a fixed size.
96 constexpr size_t root_directory_entry_size = 34;
97 
98 /// ISO 9660 has a 32kb reserve area at the beginning of the file formal
99 /// e.g. boot information.
100 constexpr uint32_t first_filesystem_sector = 16;
101 
102 /// Finds the length of the file name within a file identifier.
103 /// The file identifier is `filename;file id`.
trim_file_id(const uint8_t * sIdent,uint8_t & iLength)104 void trim_file_id(const uint8_t* sIdent, uint8_t& iLength) {
105   for (uint8_t i = 0; i < iLength; ++i) {
106     if (sIdent[i] == ';') {
107       iLength = i;
108       return;
109     }
110   }
111 }
112 
113 /// Convert character to filename normalized format conforming to ISO filename
114 /// limitations. All letters are converted to upper case, and `_` to `-`.
normalise(char c)115 char normalise(char c) {
116   if (c == '_') {
117     return '-';
118   } else if ('a' <= c && c <= 'z') {
119     return static_cast<char>(c - 'a' + 'A');
120   } else {
121     return c;
122   }
123 }
124 
125 /// Convert length bytes from the start pointer to a normalized filename
126 /// string. All ASCII letters are converted to upper case, and `_` to `-`.
normalise(const uint8_t * start,size_t length)127 std::string normalise(const uint8_t* start, size_t length) {
128   std::string ret;
129   const uint8_t* p = start;
130   for (size_t i = 0; i < length; i++) {
131     ret.push_back(normalise(static_cast<char>(*p)));
132     ++p;
133   }
134   return ret;
135 }
136 
137 /// Convert c string to normalized filename string. All ASCII letters are
138 /// converted to upper case, and `_` to `-`.
normalise(const char * str)139 std::string normalise(const char* str) {
140   std::string ret;
141   const char* p = str;
142   while (*p != '\0') {
143     ret.push_back(normalise(*p));
144     ++p;
145   }
146   return ret;
147 }
148 
149 /// A file entry from the directory table
150 class iso_file_entry {
151  public:
152   /// Construct dummy entry.
153   iso_file_entry() = default;
154 
155   /// Construct entry from the given memory location.
156   /// The first byte is the size of the entry. Other useful headers are read
157   /// from their offsets from that location.
158   ///
159   /// \param Pointer to first byte of file entry in directory table.
iso_file_entry(const uint8_t * b)160   iso_file_entry(const uint8_t* b) {
161     uint8_t size = *b;
162     if (size < minimum_file_entry_size) {
163       throw std::runtime_error("size specified for file entry is too small.");
164     }
165 
166     uint8_t filename_length = b[filename_length_offset];
167     if (filename_length + filename_offset > size) {
168       throw std::runtime_error("size specified for file entry is too small.");
169     }
170     trim_file_id(b + filename_offset, filename_length);
171     filename = normalise(b + filename_offset, filename_length);
172 
173     data_sector = bytes_to_uint32_le(b + file_sector_offset);
174     data_length = bytes_to_uint32_le(b + file_data_length_offset);
175     flags = b[file_flags_offset];
176   }
177 
178   /// Logical location of the data for this file in the ISO image.
179   uint32_t data_sector;
180 
181   /// The length of the data for this file.
182   uint32_t data_length;
183 
184   /// Flags that indicate whether this entry is a file or directory, along
185   /// with other properties.
186   ///
187   /// \see iso_dir_ent_flag
188   uint8_t flags;
189 
190   /// The filename of this entry.
191   std::string filename;
192 };
193 
194 /**
195  * Input iterator (forward only, read only) for an ISO 9660 directory table
196  * stored in a byte buffer.
197  */
198 class iso_directory_iterator final {
199   using iterator_category = std::input_iterator_tag;
200   using value_type = const iso_file_entry;
201   using difference_type = ptrdiff_t;
202   using pointer = const iso_file_entry*;
203   using reference = const iso_file_entry&;
204 
205  public:
206   iso_directory_iterator() = delete;
207 
208   /**
209    * Initialize an iterator for the directory table in the memory region
210    * defined by by begin and end. This iterator is aware of its container
211    * and will throw an exception if an attempt is made to access it out of
212    * range.
213    *
214    * \param begin pointer to the first byte of the directory table.
215    * \param end pointer to the first byte following the directory table.
216    */
iso_directory_iterator(const uint8_t * begin,const uint8_t * end)217   iso_directory_iterator(const uint8_t* begin, const uint8_t* end) {
218     directory_ptr = begin;
219     end_ptr = end;
220     if (directory_ptr >= end_ptr) {
221       // dummy value, not accessible.
222       entry = iso_file_entry();
223     } else {
224       entry = iso_file_entry(begin);
225     }
226   }
227 
228   /**
229    * Copy the given iso_directory_iterator
230    */
iso_directory_iterator(iso_directory_iterator & it)231   iso_directory_iterator(iso_directory_iterator& it) {
232     directory_ptr = it.directory_ptr;
233     end_ptr = it.end_ptr;
234     entry = it.entry;
235   }
236 
237   /**
238    * Move the given iso_directory_iterator
239    */
iso_directory_iterator(iso_directory_iterator && it)240   iso_directory_iterator(iso_directory_iterator&& it) noexcept {
241     directory_ptr = it.directory_ptr;
242     end_ptr = it.end_ptr;
243     entry = std::move(it.entry);
244     it.directory_ptr = nullptr;
245     it.end_ptr = nullptr;
246     it.entry = iso_file_entry();
247   }
248 
249   ~iso_directory_iterator() = default;
250 
251   /**
252    * Determine whether two iso_directory_iterators point to the same table
253    * entry.
254    */
operator ==(const iso_directory_iterator & rhs) const255   bool operator==(const iso_directory_iterator& rhs) const {
256     return (this->directory_ptr == rhs.directory_ptr);
257   }
258 
259   /**
260    * Determine whether to iso_directory_iterators do not point to the same
261    * table entry.
262    */
operator !=(const iso_directory_iterator & rhs) const263   bool operator!=(const iso_directory_iterator& rhs) const {
264     return !((*this) == rhs);
265   }
266 
267   /**
268    * Get the file entry pointed to by the iterator.
269    */
operator *() const270   reference operator*() const {
271     if (directory_ptr >= end_ptr) {
272       throw std::out_of_range("iso directory iterator is past end of input");
273     }
274     return entry;
275   }
276 
277   /**
278    * Assign this iterator the value of another iterator by copy
279    */
280   iso_directory_iterator& operator=(iso_directory_iterator& rhs) = default;
281 
282   /**
283    * Assign this iterator the value of another iterator by move
284    */
operator =(iso_directory_iterator && rhs)285   iso_directory_iterator& operator=(iso_directory_iterator&& rhs) noexcept {
286     directory_ptr = rhs.directory_ptr;
287     end_ptr = rhs.end_ptr;
288     entry = std::move(rhs.entry);
289     rhs.directory_ptr = nullptr;
290     rhs.end_ptr = nullptr;
291     rhs.entry = {};
292     return *this;
293   }
294 
295   /**
296    * Advance this iterator to the next file entry in the directory table,
297    * returning the result.
298    * In cases where advancing the iterator would read past the end of the
299    * directory table, an exception is thrown and the iterator is not
300    * advanced.
301    */
operator ++()302   iso_directory_iterator& operator++() {
303     if (directory_ptr >= end_ptr) {
304       throw std::out_of_range(
305           "Cannot advance iso directory iterator past end of input");
306     }
307 
308     const uint8_t* new_dir_ptr = directory_ptr + *directory_ptr;
309     while (new_dir_ptr < end_ptr && *new_dir_ptr == 0) {
310       ++new_dir_ptr;
311     }
312 
313     // Catch a malformed directory entry where the size would extend past
314     // the end of the table.
315     if (new_dir_ptr < end_ptr && new_dir_ptr + *new_dir_ptr > end_ptr) {
316       throw std::runtime_error(
317           "The last directory entry was larger than the defined "
318           "table region.");
319     }
320 
321     if (new_dir_ptr < end_ptr) {
322       entry = iso_file_entry(new_dir_ptr);
323     } else {
324       entry = iso_file_entry();
325     }
326     directory_ptr = new_dir_ptr;
327     return *this;
328   }
329 
330   /**
331    * Advance this iterator to the next file entry in the directory table,
332    * returning a copy of the old iterator.
333    */
operator ++(int)334   iso_directory_iterator operator++(int) {
335     iso_directory_iterator old(*this);
336     ++(*this);
337     return old;
338   }
339 
340  private:
341   /// Pointer to the current entry.
342   const uint8_t* directory_ptr;
343 
344   /// Pointer to the end of the directory table.
345   const uint8_t* end_ptr;
346 
347   /// Current entry.
348   iso_file_entry entry;
349 };
350 
351 }  // namespace
352 
iso_filesystem()353 iso_filesystem::iso_filesystem()
354     : raw_file(nullptr), error(nullptr), files(), path_seperator('\\') {}
355 
~iso_filesystem()356 iso_filesystem::~iso_filesystem() { clear(); }
357 
clear()358 void iso_filesystem::clear() {
359   delete[] error;
360   error = nullptr;
361   files.clear();
362   if (raw_file) {
363     std::fclose(raw_file);
364     raw_file = nullptr;
365   }
366 }
367 
set_path_separator(char cSeparator)368 void iso_filesystem::set_path_separator(char cSeparator) {
369   path_seperator = cSeparator;
370 }
371 
initialise(const char * path)372 bool iso_filesystem::initialise(const char* path) {
373   clear();
374   FILE* f = std::fopen(path, "rb");
375   if (!f) {
376     set_error("Failed to open ISO file");
377     return false;
378   }
379   raw_file = f;
380 
381   // Until we know better, assume that sectors are 2048 bytes.
382   sector_size = min_sector_size;
383 
384   // The first 16 sectors are reserved for bootable media.
385   // Volume descriptor records follow this, with one record per sector.
386   for (uint32_t iSector = first_filesystem_sector; seek_to_sector(iSector);
387        ++iSector) {
388     uint8_t aBuffer[root_directory_offset + root_directory_entry_size];
389     if (!read_data(sizeof(aBuffer), aBuffer)) {
390       break;
391     }
392     // CD001 is a standard identifier, \x01 is a version number
393     if (std::memcmp(aBuffer + 1, "CD001\x01", 6) == 0) {
394       if (aBuffer[0] == vdt_primary_volume) {
395         sector_size = bytes_to_uint16_le(aBuffer + sector_size_offset);
396         try {
397           find_hosp_directory(aBuffer + root_directory_offset,
398                               root_directory_entry_size, 0);
399           if (files.empty()) {
400             set_error(
401                 "Could not find Theme Hospital data "
402                 "directory.");
403             return false;
404           } else {
405             return true;
406           }
407         } catch (const std::exception& ex) {
408           set_error(ex.what());
409           return false;
410         }
411       } else if (aBuffer[0] == vdt_terminator) {
412         break;
413       }
414     }
415   }
416   set_error("Could not find primary volume descriptor.");
417   return false;
418 }
419 
file_metadata_less(const file_metadata & lhs,const file_metadata & rhs)420 bool iso_filesystem::file_metadata_less(const file_metadata& lhs,
421                                         const file_metadata& rhs) {
422   return lhs.path < rhs.path;
423 }
424 
find_hosp_directory(const uint8_t * pDirEnt,int iDirEntsSize,int iLevel)425 int iso_filesystem::find_hosp_directory(const uint8_t* pDirEnt,
426                                         int iDirEntsSize, int iLevel) {
427   // Sanity check
428   // Apart from at the root level, directory record arrays must take up whole
429   // sectors, whose sizes are powers of two and at least 2048.
430   // The formal limit on directory depth is 8, so hitting 16 is insane.
431   if ((iLevel != 0 && (iDirEntsSize & (min_sector_size - 1)) != 0) ||
432       iLevel > max_directory_depth)
433     return 0;
434 
435   std::unique_ptr<uint8_t[]> pBuffer(nullptr);
436   uint32_t iBufferSize = 0;
437   iso_directory_iterator dir_iter(pDirEnt, pDirEnt + iDirEntsSize);
438   iso_directory_iterator end_iter(pDirEnt + iDirEntsSize,
439                                   pDirEnt + iDirEntsSize);
440   for (; dir_iter != end_iter; ++dir_iter) {
441     const iso_file_entry& ent = *dir_iter;
442     if (ent.flags & def_directory) {
443       // The names "\x00" and "\x01" are used for the current directory
444       // the parent directory respectively. We only want to visit these
445       // when at the root level.
446       if (iLevel == 0 || !(ent.filename == std::string(1, '\x00') ||
447                            ent.filename == std::string(1, '\x01'))) {
448         if (ent.data_length > iBufferSize) {
449           iBufferSize = ent.data_length;
450           pBuffer = std::make_unique<uint8_t[]>(iBufferSize);
451         }
452         if (seek_to_sector(ent.data_sector) &&
453             read_data(ent.data_length, pBuffer.get())) {
454           int iFoundLevel =
455               find_hosp_directory(pBuffer.get(), ent.data_length, iLevel + 1);
456           if (iFoundLevel != 0) {
457             if (iFoundLevel == 2) {
458               build_file_lookup_table(ent.data_sector, ent.data_length,
459                                       std::string(""));
460             }
461             return iFoundLevel + 1;
462           }
463         }
464       }
465     } else {
466       // Look for VBLK-0.TAB to serve as indication that we've found the
467       // Theme Hospital data.
468       if (ent.filename == vblk_0_filename) {
469         return 1;
470       }
471     }
472   }
473 
474   return 0;
475 }
476 
build_file_lookup_table(uint32_t iSector,int iDirEntsSize,const std::string & prefix)477 void iso_filesystem::build_file_lookup_table(uint32_t iSector, int iDirEntsSize,
478                                              const std::string& prefix) {
479   // Sanity check
480   // Apart from at the root level, directory record arrays must take up whole
481   // sectors, whose sizes are powers of two and at least 2048.
482   // Path lengths shouldn't exceed 256 either (or at least not for the files
483   // which we're interested in).
484   if ((prefix.size() != 0 && (iDirEntsSize & 0x7FF)) || (prefix.size() > 256))
485     return;
486 
487   uint8_t* pBuffer = new uint8_t[iDirEntsSize];
488   if (!seek_to_sector(iSector) || !read_data(iDirEntsSize, pBuffer)) {
489     delete[] pBuffer;
490     return;
491   }
492 
493   uint8_t* pDirEnt = pBuffer;
494   iso_directory_iterator dir_iter(pDirEnt, pDirEnt + iDirEntsSize);
495   iso_directory_iterator end_iter(pDirEnt + iDirEntsSize,
496                                   pDirEnt + iDirEntsSize);
497   for (; dir_iter != end_iter; ++dir_iter) {
498     const iso_file_entry& ent = *dir_iter;
499     std::string path;
500     if (prefix.empty()) {
501       path = ent.filename;
502     } else {
503       path = prefix + path_seperator + ent.filename;
504     }
505 
506     if (ent.flags & def_directory) {
507       // None of the directories which we're interested in have length 1.
508       // This also avoids the dummy "current" and "parent" directories.
509       if (ent.filename.size() > 1) {
510         build_file_lookup_table(ent.data_sector, ent.data_length, path);
511       }
512     } else {
513       file_metadata file{};
514       file.path = std::move(path);
515       file.sector = ent.data_sector;
516       file.size = ent.data_length;
517       files.push_back(file);
518     }
519   }
520   delete[] pBuffer;
521 
522   if (prefix.size() == 0) {
523     // The lookup table will be ordered by the underlying ordering of the
524     // disk. we want it sorted by the path for ease of lookup.
525     std::sort(files.begin(), files.end(), file_metadata_less);
526   }
527 }
528 
visit_directory_files(const char * sPath,void (* fnCallback)(void *,const char *,const char *),void * pCallbackData) const529 void iso_filesystem::visit_directory_files(
530     const char* sPath, void (*fnCallback)(void*, const char*, const char*),
531     void* pCallbackData) const {
532   std::string normalised_path = normalise(sPath);
533 
534   // Inefficient (better would be to binary search for first and last files
535   // which begin with sPath), but who cares - this isn't called often
536   for (const file_metadata& file : files) {
537     if (normalised_path.size() < file.path.size() &&
538         std::equal(normalised_path.begin(), normalised_path.end(),
539                    file.path.begin())) {
540       size_t filename_pos = normalised_path.size();
541       if (file.path.at(normalised_path.size()) == path_seperator) {
542         ++filename_pos;
543       }
544       std::string filename(file.path.substr(filename_pos));
545 
546       if (filename.find(path_seperator) == filename.npos) {
547         fnCallback(pCallbackData, filename.c_str(), file.path.c_str());
548       }
549     }
550   }
551 }
552 
find_file(const char * sPath) const553 iso_filesystem::file_handle iso_filesystem::find_file(const char* sPath) const {
554   std::string normalised_path = normalise(sPath);
555 
556   // Standard binary search over sorted list of files
557   int iLower = 0;
558   int iUpper = static_cast<int>(files.size());
559   while (iLower != iUpper) {
560     int iMid = (iLower + iUpper) / 2;
561     int iComp = normalised_path.compare(files[iMid].path);
562     if (iComp == 0) {
563       return iMid + 1;
564     } else if (iComp < 0) {
565       iUpper = iMid;
566     } else {
567       iLower = iMid + 1;
568     }
569   }
570   return 0;
571 }
572 
get_file_size(file_handle iFile) const573 uint32_t iso_filesystem::get_file_size(file_handle iFile) const {
574   if (iFile <= 0 || static_cast<size_t>(iFile) > files.size())
575     return 0;
576   else
577     return files[iFile - 1].size;
578 }
579 
get_file_data(file_handle iFile,uint8_t * pBuffer)580 bool iso_filesystem::get_file_data(file_handle iFile, uint8_t* pBuffer) {
581   if (iFile <= 0 || static_cast<size_t>(iFile) > files.size()) {
582     set_error("Invalid file handle.");
583     return false;
584   } else {
585     return seek_to_sector(files[iFile - 1].sector) &&
586            read_data(files[iFile - 1].size, pBuffer);
587   }
588 }
589 
get_error() const590 const char* iso_filesystem::get_error() const { return error; }
591 
seek_to_sector(uint32_t iSector)592 bool iso_filesystem::seek_to_sector(uint32_t iSector) {
593   if (!raw_file) {
594     set_error("No raw file.");
595     return false;
596   }
597   int res =
598       std::fseek(raw_file, sector_size * static_cast<long>(iSector), SEEK_SET);
599   if (res == 0) {
600     return true;
601   } else {
602     set_error("Unable to seek to sector %i.", static_cast<int>(iSector));
603     return false;
604   }
605 }
606 
read_data(uint32_t iByteCount,uint8_t * pBuffer)607 bool iso_filesystem::read_data(uint32_t iByteCount, uint8_t* pBuffer) {
608   if (!raw_file) {
609     set_error("No raw file.");
610     return false;
611   }
612   if (std::fread(pBuffer, 1, iByteCount, raw_file) == iByteCount)
613     return true;
614   else {
615     set_error("Unable to read %i bytes.", static_cast<int>(iByteCount));
616     return false;
617   }
618 }
619 
set_error(const char * sFormat,...)620 void iso_filesystem::set_error(const char* sFormat, ...) {
621   if (error == nullptr) {
622     // None of the errors which we generate will be longer than 1024.
623     error = new char[1024];
624   }
625   va_list a;
626   va_start(a, sFormat);
627   // The valist test is buggy in this environment
628   // (https://bugs.llvm.org/show_bug.cgi?id=41311)
629   // NOLINTNEXTLINE(clang-analyzer-valist.Uninitialized)
630   std::vsnprintf(error, 1024, sFormat, a);
631   va_end(a);
632 }
633