1 // Archive/WimIn.h
2 
3 #ifndef __ARCHIVE_WIM_IN_H
4 #define __ARCHIVE_WIM_IN_H
5 
6 #include "../../../../C/Alloc.h"
7 
8 #include "../../../Common/MyBuffer.h"
9 #include "../../../Common/MyXml.h"
10 
11 #include "../../../Windows/PropVariant.h"
12 
13 #include "../../Compress/CopyCoder.h"
14 #include "../../Compress/LzmsDecoder.h"
15 #include "../../Compress/LzxDecoder.h"
16 
17 #include "../IArchive.h"
18 
19 namespace NArchive {
20 namespace NWim {
21 
22 /*
23 WIM versions:
24 hexVer : headerSize : ver
25  : 1.07.01 - 1.08.01 : Longhorn.4001-4015 - another header, no signature, CAB compression
26 10900 : 60 : 1.09 : Longhorn.4029-4039 (2003)
27 10A00 : 60 : 1.10 : Longhorn.4083 (2004) image starting from 1
28 10B00 : ?? : 1.11 : ??
29 10C00 : 74 : 1.12 : Longhorn.4093 - VistaBeta1.5112 (2005) - (Multi-Part, SHA1)
30 10D00 : D0 : 1.13 : VistaBeta2 - Win10, (NumImages, BootIndex, IntegrityResource)
31 00E00 : D0 : 0.14 : LZMS, solid, esd, dism
32 */
33 
34 const unsigned kDirRecordSizeOld = 62;
35 const unsigned kDirRecordSize = 102;
36 
37 /*
38   There is error in WIM specification about dwReparseTag, dwReparseReserved and liHardLink fields.
39 
40   Correct DIRENTRY structure:
41   {
42     hex offset
43      0    UInt64  Len;
44      8    UInt32  Attrib;
45      C    UInt32  SecurityId;
46 
47     10    UInt64  SubdirOffset; // = 0 for files
48 
49     18    UInt64  unused1; // = 0?
50     20    UInt64  unused2; // = 0?
51 
52     28    UInt64  CTime;
53     30    UInt64  ATime;
54     38    UInt64  MTime;
55 
56     40    Byte    Sha1[20];
57 
58     54    UInt32  Unknown1; // is it 0 always?
59 
60 
61     union
62     {
63     58    UInt64  NtNodeId;
64         {
65     58    UInt32  ReparseTag;
66     5C    UInt32  ReparseFlags; // is it 0 always? Check with new imagex.
67         }
68     }
69 
70     60    UInt16  Streams;
71 
72     62    UInt16  ShortNameLen;
73     64    UInt16  FileNameLen;
74 
75     66    UInt16  Name[];
76           UInt16  ShortName[];
77   }
78 
79   // DIRENTRY for WIM_VERSION <= 1.10
80   DIRENTRY_OLD structure:
81   {
82     hex offset
83      0    UInt64  Len;
84      8    UInt32  Attrib;
85      C    UInt32  SecurityId;
86 
87     union
88     {
89     10    UInt64  SubdirOffset; //
90 
91     10    UInt32  OldWimFileId; // used for files in old WIMs
92     14    UInt32  OldWimFileId_Reserved; // = 0
93     }
94 
95     18    UInt64  CTime;
96     20    UInt64  ATime;
97     28    UInt64  MTime;
98 
99     30    UInt64  Unknown; // NtNodeId ?
100 
101     38    UInt16  Streams;
102     3A    UInt16  ShortNameLen;
103     3C    UInt16  FileNameLen;
104     3E    UInt16  FileName[];
105           UInt16  ShortName[];
106   }
107 
108   ALT_STREAM structure:
109   {
110     hex offset
111      0    UInt64  Len;
112      8    UInt64  Unused;
113     10    Byte    Sha1[20];
114     24    UInt16  FileNameLen;
115     26    UInt16  FileName[];
116   }
117 
118   ALT_STREAM_OLD structure:
119   {
120     hex offset
121      0    UInt64  Len;
122      8    UInt64  StreamId; // 32-bit value
123     10    UInt16  FileNameLen;
124     12    UInt16  FileName[];
125   }
126 
127   If item is file (not Directory) and there are alternative streams,
128   there is additional ALT_STREAM item of main "unnamed" stream in Streams array.
129 
130 */
131 
132 
133 namespace NResourceFlags
134 {
135   // const Byte kFree = 1 << 0;
136   const Byte kMetadata = 1 << 1;
137   const Byte kCompressed = 1 << 2;
138   // const Byte kSpanned = 1 << 3;
139   const Byte kSolid = 1 << 4;
140 }
141 
142 const UInt64 k_SolidBig_Resource_Marker = (UInt64)1 << 32;
143 
144 struct CResource
145 {
146   UInt64 PackSize;
147   UInt64 Offset;
148   UInt64 UnpackSize;
149   Byte Flags;
150   bool KeepSolid;
151   int SolidIndex;
152 
ClearCResource153   void Clear()
154   {
155     PackSize = 0;
156     Offset = 0;
157     UnpackSize = 0;
158     Flags = 0;
159     KeepSolid = false;
160     SolidIndex = -1;
161   }
162 
GetEndLimitCResource163   UInt64 GetEndLimit() const { return Offset + PackSize; }
164   void Parse(const Byte *p);
ParseAndUpdatePhySizeCResource165   void ParseAndUpdatePhySize(const Byte *p, UInt64 &phySize)
166   {
167     Parse(p);
168     UInt64 v = GetEndLimit();
169     if (phySize < v)
170       phySize = v;
171   }
172 
173   void WriteTo(Byte *p) const;
174 
IsMetadataCResource175   bool IsMetadata() const { return (Flags & NResourceFlags::kMetadata) != 0; }
IsCompressedCResource176   bool IsCompressed() const { return (Flags & NResourceFlags::kCompressed) != 0; }
IsSolidCResource177   bool IsSolid() const { return (Flags & NResourceFlags::kSolid) != 0; }
IsSolidBigCResource178   bool IsSolidBig() const { return IsSolid() && UnpackSize == k_SolidBig_Resource_Marker; }
IsSolidSmallCResource179   bool IsSolidSmall() const { return IsSolid() && UnpackSize == 0; }
180 
IsEmptyCResource181   bool IsEmpty() const { return (UnpackSize == 0); }
182 };
183 
184 
185 struct CSolid
186 {
187   unsigned StreamIndex;
188   // unsigned NumRefs;
189   int FirstSmallStream;
190 
191   UInt64 SolidOffset;
192 
193   UInt64 UnpackSize;
194   int Method;
195   int ChunkSizeBits;
196 
197   UInt64 HeadersSize;
198   // size_t NumChunks;
199   CObjArray<UInt64> Chunks; // [NumChunks + 1] (start offset)
200 
GetChunkPackSizeCSolid201   UInt64 GetChunkPackSize(size_t chunkIndex) const { return Chunks[chunkIndex + 1] - Chunks[chunkIndex]; }
202 
CSolidCSolid203   CSolid():
204       FirstSmallStream(-1),
205       // NumRefs(0),
206       Method(-1)
207       {}
208 };
209 
210 
211 namespace NHeaderFlags
212 {
213   const UInt32 kCompression  = 1 << 1;
214   const UInt32 kReadOnly     = 1 << 2;
215   const UInt32 kSpanned      = 1 << 3;
216   const UInt32 kResourceOnly = 1 << 4;
217   const UInt32 kMetadataOnly = 1 << 5;
218   const UInt32 kWriteInProgress = 1 << 6;
219   const UInt32 kReparsePointFixup = 1 << 7;
220 
221   const UInt32 kXPRESS       = (UInt32)1 << 17;
222   const UInt32 kLZX          = (UInt32)1 << 18;
223   const UInt32 kLZMS         = (UInt32)1 << 19;
224   const UInt32 kXPRESS2      = (UInt32)1 << 21; // XPRESS with nonstandard chunk size ?
225 
226   const UInt32 kMethodMask   = 0xFFFE0000;
227 }
228 
229 
230 namespace NMethod
231 {
232   const UInt32 kXPRESS = 1;
233   const UInt32 kLZX    = 2;
234   const UInt32 kLZMS   = 3;
235 }
236 
237 
238 const UInt32 k_Version_NonSolid = 0x10D00;
239 const UInt32 k_Version_Solid = 0xE00;
240 
241 const unsigned kHeaderSizeMax = 0xD0;
242 const unsigned kSignatureSize = 8;
243 extern const Byte kSignature[kSignatureSize];
244 
245 const unsigned kChunkSizeBits = 15;
246 const UInt32 kChunkSize = (UInt32)1 << kChunkSizeBits;
247 
248 
249 struct CHeader
250 {
251   UInt32 Version;
252   UInt32 Flags;
253   UInt32 ChunkSize;
254   unsigned ChunkSizeBits;
255   Byte Guid[16];
256   UInt16 PartNumber;
257   UInt16 NumParts;
258   UInt32 NumImages;
259   UInt32 BootIndex;
260 
261   bool _IsOldVersion; // 1.10-
262   bool _IsNewVersion; // 1.13+ or 0.14
263 
264   CResource OffsetResource;
265   CResource XmlResource;
266   CResource MetadataResource;
267   CResource IntegrityResource;
268 
269   void SetDefaultFields(bool useLZX);
270 
271   void WriteTo(Byte *p) const;
272   HRESULT Parse(const Byte *p, UInt64 &phySize);
273 
IsCompressedCHeader274   bool IsCompressed() const { return (Flags & NHeaderFlags::kCompression) != 0; }
275 
IsSupportedCHeader276   bool IsSupported() const
277   {
278     return (!IsCompressed()
279         || (Flags & NHeaderFlags::kLZX) != 0
280         || (Flags & NHeaderFlags::kXPRESS) != 0
281         || (Flags & NHeaderFlags::kLZMS) != 0
282         || (Flags & NHeaderFlags::kXPRESS2) != 0);
283   }
284 
GetMethodCHeader285   unsigned GetMethod() const
286   {
287     if (!IsCompressed())
288       return 0;
289     UInt32 mask = (Flags & NHeaderFlags::kMethodMask);
290     if (mask == 0) return 0;
291     if (mask == NHeaderFlags::kXPRESS) return NMethod::kXPRESS;
292     if (mask == NHeaderFlags::kLZX) return NMethod::kLZX;
293     if (mask == NHeaderFlags::kLZMS) return NMethod::kLZMS;
294     if (mask == NHeaderFlags::kXPRESS2) return NMethod::kXPRESS;
295     return mask;
296   }
297 
IsOldVersionCHeader298   bool IsOldVersion() const { return _IsOldVersion; }
IsNewVersionCHeader299   bool IsNewVersion() const { return _IsNewVersion; }
IsSolidVersionCHeader300   bool IsSolidVersion() const { return (Version == k_Version_Solid); }
301 
AreFromOnArchiveCHeader302   bool AreFromOnArchive(const CHeader &h)
303   {
304     return (memcmp(Guid, h.Guid, sizeof(Guid)) == 0) && (h.NumParts == NumParts);
305   }
306 };
307 
308 
309 const unsigned kHashSize = 20;
310 
IsEmptySha(const Byte * data)311 inline bool IsEmptySha(const Byte *data)
312 {
313   for (unsigned i = 0; i < kHashSize; i++)
314     if (data[i] != 0)
315       return false;
316   return true;
317 }
318 
319 const unsigned kStreamInfoSize = 24 + 2 + 4 + kHashSize;
320 
321 struct CStreamInfo
322 {
323   CResource Resource;
324   UInt16 PartNumber;      // for NEW WIM format, we set it to 1 for OLD WIM format
325   UInt32 RefCount;
326   UInt32 Id;              // for OLD WIM format
327   Byte Hash[kHashSize];
328 
IsEmptyHashCStreamInfo329   bool IsEmptyHash() const { return IsEmptySha(Hash); }
330 
331   void WriteTo(Byte *p) const;
332 };
333 
334 
335 struct CItem
336 {
337   size_t Offset;
338   int IndexInSorted;
339   int StreamIndex;
340   int Parent;
341   int ImageIndex; // -1 means that file is unreferenced in Images (deleted item?)
342   bool IsDir;
343   bool IsAltStream;
344 
HasMetadataCItem345   bool HasMetadata() const { return ImageIndex >= 0; }
346 
CItemCItem347   CItem():
348     IndexInSorted(-1),
349     StreamIndex(-1),
350     Parent(-1),
351     IsDir(false),
352     IsAltStream(false)
353     {}
354 };
355 
356 struct CImage
357 {
358   CByteBuffer Meta;
359   CRecordVector<UInt32> SecurOffsets;
360   unsigned StartItem;
361   unsigned NumItems;
362   unsigned NumEmptyRootItems;
363   int VirtualRootIndex; // index in CDatabase::VirtualRoots[]
364   UString RootName;
365   CByteBuffer RootNameBuf;
366 
CImageCImage367   CImage(): VirtualRootIndex(-1) {}
368 };
369 
370 
371 struct CImageInfo
372 {
373   bool CTimeDefined;
374   bool MTimeDefined;
375   bool NameDefined;
376   bool IndexDefined;
377 
378   FILETIME CTime;
379   FILETIME MTime;
380   UString Name;
381 
382   UInt64 DirCount;
383   UInt64 FileCount;
384   UInt32 Index;
385 
386   int ItemIndexInXml;
387 
GetTotalFilesAndDirsCImageInfo388   UInt64 GetTotalFilesAndDirs() const { return DirCount + FileCount; }
389 
CImageInfoCImageInfo390   CImageInfo(): CTimeDefined(false), MTimeDefined(false), NameDefined(false),
391       IndexDefined(false), ItemIndexInXml(-1) {}
392   void Parse(const CXmlItem &item);
393 };
394 
395 
396 struct CWimXml
397 {
398   CByteBuffer Data;
399   CXml Xml;
400 
401   UInt16 VolIndex;
402   CObjectVector<CImageInfo> Images;
403 
404   UString FileName;
405   bool IsEncrypted;
406 
GetTotalFilesAndDirsCWimXml407   UInt64 GetTotalFilesAndDirs() const
408   {
409     UInt64 sum = 0;
410     FOR_VECTOR (i, Images)
411       sum += Images[i].GetTotalFilesAndDirs();
412     return sum;
413   }
414 
415   void ToUnicode(UString &s);
416   bool Parse();
417 
CWimXmlCWimXml418   CWimXml(): IsEncrypted(false) {}
419 };
420 
421 
422 struct CVolume
423 {
424   CHeader Header;
425   CMyComPtr<IInStream> Stream;
426 };
427 
428 
429 class CDatabase
430 {
431   Byte *DirData;
432   size_t DirSize;
433   size_t DirProcessed;
434   size_t DirStartOffset;
435   IArchiveOpenCallback *OpenCallback;
436 
437   HRESULT ParseDirItem(size_t pos, int parent);
438   HRESULT ParseImageDirs(CByteBuffer &buf, int parent);
439 
440 public:
441   CRecordVector<CStreamInfo> DataStreams;
442   CRecordVector<CStreamInfo> MetaStreams;
443 
444   CObjectVector<CSolid> Solids;
445 
446   CRecordVector<CItem> Items;
447   CObjectVector<CByteBuffer> ReparseItems;
448   CIntVector ItemToReparse; // from index_in_Items to index_in_ReparseItems
449                             // -1 means no reparse;
450 
451   CObjectVector<CImage> Images;
452 
453   bool IsOldVersion9;
454   bool IsOldVersion;
455   bool ThereAreDeletedStreams;
456   bool ThereAreAltStreams;
457   bool RefCountError;
458   bool HeadersError;
459 
GetStartImageIndex()460   bool GetStartImageIndex() const { return IsOldVersion9 ? 0 : 1; }
GetDirAlignMask()461   unsigned GetDirAlignMask() const { return IsOldVersion9 ? 3 : 7; }
462 
463   // User Items can contain all images or just one image from all.
464   CUIntVector SortedItems;
465   int IndexOfUserImage;    // -1 : if more than one images was filled to Sorted Items
466 
467   unsigned NumExcludededItems;
468   int ExludedItem;          // -1 : if there are no exclude items
469   CUIntVector VirtualRoots; // we use them for old 1.10 WIM archives
470 
ThereIsError()471   bool ThereIsError() const { return RefCountError || HeadersError; }
472 
GetNumUserItemsInImage(unsigned imageIndex)473   unsigned GetNumUserItemsInImage(unsigned imageIndex) const
474   {
475     if (IndexOfUserImage >= 0 && imageIndex != (unsigned)IndexOfUserImage)
476       return 0;
477     if (imageIndex >= Images.Size())
478       return 0;
479     return Images[imageIndex].NumItems - NumExcludededItems;
480   }
481 
482   bool ItemHasStream(const CItem &item) const;
483 
Get_UnpackSize_of_Resource(const CResource & r)484   UInt64 Get_UnpackSize_of_Resource(const CResource &r) const
485   {
486     if (!r.IsSolid())
487       return r.UnpackSize;
488     if (r.IsSolidSmall())
489       return r.PackSize;
490     if (r.IsSolidBig() && r.SolidIndex >= 0)
491       return Solids[(unsigned)r.SolidIndex].UnpackSize;
492     return 0;
493   }
494 
Get_PackSize_of_Resource(unsigned streamIndex)495   UInt64 Get_PackSize_of_Resource(unsigned streamIndex) const
496   {
497     const CResource &r = DataStreams[streamIndex].Resource;
498     if (!r.IsSolidSmall())
499       return r.PackSize;
500     if (r.SolidIndex >= 0)
501     {
502       const CSolid &ss = Solids[(unsigned)r.SolidIndex];
503       if (ss.FirstSmallStream == (int)streamIndex)
504         return DataStreams[ss.StreamIndex].Resource.PackSize;
505     }
506     return 0;
507   }
508 
GetUnpackSize()509   UInt64 GetUnpackSize() const
510   {
511     UInt64 res = 0;
512     FOR_VECTOR (i, DataStreams)
513       res += DataStreams[i].Resource.UnpackSize;
514     return res;
515   }
516 
GetPackSize()517   UInt64 GetPackSize() const
518   {
519     UInt64 res = 0;
520     FOR_VECTOR (i, DataStreams)
521       res += DataStreams[i].Resource.PackSize;
522     return res;
523   }
524 
Clear()525   void Clear()
526   {
527     DataStreams.Clear();
528     MetaStreams.Clear();
529     Solids.Clear();
530 
531     Items.Clear();
532     ReparseItems.Clear();
533     ItemToReparse.Clear();
534 
535     SortedItems.Clear();
536 
537     Images.Clear();
538     VirtualRoots.Clear();
539 
540     IsOldVersion = false;
541     ThereAreDeletedStreams = false;
542     ThereAreAltStreams = false;
543     RefCountError = false;
544     HeadersError = false;
545   }
546 
CDatabase()547   CDatabase():
548     RefCountError(false),
549     HeadersError(false)
550     {}
551 
552   void GetShortName(unsigned index, NWindows::NCOM::CPropVariant &res) const;
553   void GetItemName(unsigned index1, NWindows::NCOM::CPropVariant &res) const;
554   void GetItemPath(unsigned index, bool showImageNumber, NWindows::NCOM::CPropVariant &res) const;
555 
556   HRESULT OpenXml(IInStream *inStream, const CHeader &h, CByteBuffer &xml);
557   HRESULT Open(IInStream *inStream, const CHeader &h, unsigned numItemsReserve, IArchiveOpenCallback *openCallback);
558   HRESULT FillAndCheck(const CObjectVector<CVolume> &volumes);
559 
560   /*
561     imageIndex showImageNumber NumImages
562          *        true           *       Show Image_Number
563         -1           *          >1       Show Image_Number
564         -1        false          1       Don't show Image_Number
565          N        false          *       Don't show Image_Number
566   */
567   HRESULT GenerateSortedItems(int imageIndex, bool showImageNumber);
568 
569   HRESULT ExtractReparseStreams(const CObjectVector<CVolume> &volumes, IArchiveOpenCallback *openCallback);
570 };
571 
572 HRESULT ReadHeader(IInStream *inStream, CHeader &header, UInt64 &phySize);
573 
574 
575 struct CMidBuf
576 {
577   Byte *Data;
578   size_t _size;
579 
CMidBufCMidBuf580   CMidBuf(): Data(NULL), _size(0) {}
581 
EnsureCapacityCMidBuf582   void EnsureCapacity(size_t size)
583   {
584     if (size > _size)
585     {
586       ::MidFree(Data);
587       _size = 0;
588       Data = (Byte *)::MidAlloc(size);
589       if (Data)
590         _size = size;
591     }
592   }
593 
~CMidBufCMidBuf594   ~CMidBuf() { ::MidFree(Data); }
595 };
596 
597 
598 class CUnpacker
599 {
600   NCompress::CCopyCoder *copyCoderSpec;
601   CMyComPtr<ICompressCoder> copyCoder;
602 
603   NCompress::NLzx::CDecoder *lzxDecoderSpec;
604   CMyComPtr<IUnknown> lzxDecoder;
605 
606   NCompress::NLzms::CDecoder *lzmsDecoder;
607 
608   CByteBuffer sizesBuf;
609 
610   CMidBuf packBuf;
611   CMidBuf unpackBuf;
612 
613   // solid resource
614   int _solidIndex;
615   size_t _unpackedChunkIndex;
616 
617   HRESULT UnpackChunk(
618       ISequentialInStream *inStream,
619       unsigned method, unsigned chunkSizeBits,
620       size_t inSize, size_t outSize,
621       ISequentialOutStream *outStream);
622 
623   HRESULT Unpack2(
624       IInStream *inStream,
625       const CResource &res,
626       const CHeader &header,
627       const CDatabase *db,
628       ISequentialOutStream *outStream,
629       ICompressProgressInfo *progress);
630 
631 public:
632   UInt64 TotalPacked;
633 
CUnpacker()634   CUnpacker():
635       lzmsDecoder(NULL),
636       _solidIndex(-1),
637       _unpackedChunkIndex(0),
638       TotalPacked(0)
639       {}
640   ~CUnpacker();
641 
642   HRESULT Unpack(
643       IInStream *inStream,
644       const CResource &res,
645       const CHeader &header,
646       const CDatabase *db,
647       ISequentialOutStream *outStream,
648       ICompressProgressInfo *progress,
649       Byte *digest);
650 
651   HRESULT UnpackData(IInStream *inStream,
652       const CResource &resource, const CHeader &header,
653       const CDatabase *db,
654       CByteBuffer &buf, Byte *digest);
655 };
656 
657 }}
658 
659 #endif
660