1 // Archive/WimIn.h
2
3 #ifndef __ARCHIVE_WIM_IN_H
4 #define __ARCHIVE_WIM_IN_H
5
6 #include "../../../../C/Alloc.h"
7
8 #include "../../../Common/MyBuffer.h"
9 #include "../../../Common/MyXml.h"
10
11 #include "../../../Windows/PropVariant.h"
12
13 #include "../../Compress/CopyCoder.h"
14 #include "../../Compress/LzmsDecoder.h"
15 #include "../../Compress/LzxDecoder.h"
16
17 #include "../IArchive.h"
18
19 namespace NArchive {
20 namespace NWim {
21
22 /*
23 WIM versions:
24 hexVer : headerSize : ver
25 : 1.07.01 - 1.08.01 : Longhorn.4001-4015 - another header, no signature, CAB compression
26 10900 : 60 : 1.09 : Longhorn.4029-4039 (2003)
27 10A00 : 60 : 1.10 : Longhorn.4083 (2004) image starting from 1
28 10B00 : ?? : 1.11 : ??
29 10C00 : 74 : 1.12 : Longhorn.4093 - VistaBeta1.5112 (2005) - (Multi-Part, SHA1)
30 10D00 : D0 : 1.13 : VistaBeta2 - Win10, (NumImages, BootIndex, IntegrityResource)
31 00E00 : D0 : 0.14 : LZMS, solid, esd, dism
32 */
33
34 const unsigned kDirRecordSizeOld = 62;
35 const unsigned kDirRecordSize = 102;
36
37 /*
38 There is error in WIM specification about dwReparseTag, dwReparseReserved and liHardLink fields.
39
40 Correct DIRENTRY structure:
41 {
42 hex offset
43 0 UInt64 Len;
44 8 UInt32 Attrib;
45 C UInt32 SecurityId;
46
47 10 UInt64 SubdirOffset; // = 0 for files
48
49 18 UInt64 unused1; // = 0?
50 20 UInt64 unused2; // = 0?
51
52 28 UInt64 CTime;
53 30 UInt64 ATime;
54 38 UInt64 MTime;
55
56 40 Byte Sha1[20];
57
58 54 UInt32 Unknown1; // is it 0 always?
59
60
61 union
62 {
63 58 UInt64 NtNodeId;
64 {
65 58 UInt32 ReparseTag;
66 5C UInt32 ReparseFlags; // is it 0 always? Check with new imagex.
67 }
68 }
69
70 60 UInt16 Streams;
71
72 62 UInt16 ShortNameLen;
73 64 UInt16 FileNameLen;
74
75 66 UInt16 Name[];
76 UInt16 ShortName[];
77 }
78
79 // DIRENTRY for WIM_VERSION <= 1.10
80 DIRENTRY_OLD structure:
81 {
82 hex offset
83 0 UInt64 Len;
84 8 UInt32 Attrib;
85 C UInt32 SecurityId;
86
87 union
88 {
89 10 UInt64 SubdirOffset; //
90
91 10 UInt32 OldWimFileId; // used for files in old WIMs
92 14 UInt32 OldWimFileId_Reserved; // = 0
93 }
94
95 18 UInt64 CTime;
96 20 UInt64 ATime;
97 28 UInt64 MTime;
98
99 30 UInt64 Unknown; // NtNodeId ?
100
101 38 UInt16 Streams;
102 3A UInt16 ShortNameLen;
103 3C UInt16 FileNameLen;
104 3E UInt16 FileName[];
105 UInt16 ShortName[];
106 }
107
108 ALT_STREAM structure:
109 {
110 hex offset
111 0 UInt64 Len;
112 8 UInt64 Unused;
113 10 Byte Sha1[20];
114 24 UInt16 FileNameLen;
115 26 UInt16 FileName[];
116 }
117
118 ALT_STREAM_OLD structure:
119 {
120 hex offset
121 0 UInt64 Len;
122 8 UInt64 StreamId; // 32-bit value
123 10 UInt16 FileNameLen;
124 12 UInt16 FileName[];
125 }
126
127 If item is file (not Directory) and there are alternative streams,
128 there is additional ALT_STREAM item of main "unnamed" stream in Streams array.
129
130 */
131
132
133 namespace NResourceFlags
134 {
135 // const Byte kFree = 1 << 0;
136 const Byte kMetadata = 1 << 1;
137 const Byte kCompressed = 1 << 2;
138 // const Byte kSpanned = 1 << 3;
139 const Byte kSolid = 1 << 4;
140 }
141
142 const UInt64 k_SolidBig_Resource_Marker = (UInt64)1 << 32;
143
144 struct CResource
145 {
146 UInt64 PackSize;
147 UInt64 Offset;
148 UInt64 UnpackSize;
149 Byte Flags;
150 bool KeepSolid;
151 int SolidIndex;
152
ClearCResource153 void Clear()
154 {
155 PackSize = 0;
156 Offset = 0;
157 UnpackSize = 0;
158 Flags = 0;
159 KeepSolid = false;
160 SolidIndex = -1;
161 }
162
GetEndLimitCResource163 UInt64 GetEndLimit() const { return Offset + PackSize; }
164 void Parse(const Byte *p);
ParseAndUpdatePhySizeCResource165 void ParseAndUpdatePhySize(const Byte *p, UInt64 &phySize)
166 {
167 Parse(p);
168 UInt64 v = GetEndLimit();
169 if (phySize < v)
170 phySize = v;
171 }
172
173 void WriteTo(Byte *p) const;
174
IsMetadataCResource175 bool IsMetadata() const { return (Flags & NResourceFlags::kMetadata) != 0; }
IsCompressedCResource176 bool IsCompressed() const { return (Flags & NResourceFlags::kCompressed) != 0; }
IsSolidCResource177 bool IsSolid() const { return (Flags & NResourceFlags::kSolid) != 0; }
IsSolidBigCResource178 bool IsSolidBig() const { return IsSolid() && UnpackSize == k_SolidBig_Resource_Marker; }
IsSolidSmallCResource179 bool IsSolidSmall() const { return IsSolid() && UnpackSize == 0; }
180
IsEmptyCResource181 bool IsEmpty() const { return (UnpackSize == 0); }
182 };
183
184
185 struct CSolid
186 {
187 unsigned StreamIndex;
188 // unsigned NumRefs;
189 int FirstSmallStream;
190
191 UInt64 SolidOffset;
192
193 UInt64 UnpackSize;
194 int Method;
195 int ChunkSizeBits;
196
197 UInt64 HeadersSize;
198 // size_t NumChunks;
199 CObjArray<UInt64> Chunks; // [NumChunks + 1] (start offset)
200
GetChunkPackSizeCSolid201 UInt64 GetChunkPackSize(size_t chunkIndex) const { return Chunks[chunkIndex + 1] - Chunks[chunkIndex]; }
202
CSolidCSolid203 CSolid():
204 FirstSmallStream(-1),
205 // NumRefs(0),
206 Method(-1)
207 {}
208 };
209
210
211 namespace NHeaderFlags
212 {
213 const UInt32 kCompression = 1 << 1;
214 const UInt32 kReadOnly = 1 << 2;
215 const UInt32 kSpanned = 1 << 3;
216 const UInt32 kResourceOnly = 1 << 4;
217 const UInt32 kMetadataOnly = 1 << 5;
218 const UInt32 kWriteInProgress = 1 << 6;
219 const UInt32 kReparsePointFixup = 1 << 7;
220
221 const UInt32 kXPRESS = (UInt32)1 << 17;
222 const UInt32 kLZX = (UInt32)1 << 18;
223 const UInt32 kLZMS = (UInt32)1 << 19;
224 const UInt32 kXPRESS2 = (UInt32)1 << 21; // XPRESS with nonstandard chunk size ?
225
226 const UInt32 kMethodMask = 0xFFFE0000;
227 }
228
229
230 namespace NMethod
231 {
232 const UInt32 kXPRESS = 1;
233 const UInt32 kLZX = 2;
234 const UInt32 kLZMS = 3;
235 }
236
237
238 const UInt32 k_Version_NonSolid = 0x10D00;
239 const UInt32 k_Version_Solid = 0xE00;
240
241 const unsigned kHeaderSizeMax = 0xD0;
242 const unsigned kSignatureSize = 8;
243 extern const Byte kSignature[kSignatureSize];
244
245 const unsigned kChunkSizeBits = 15;
246 const UInt32 kChunkSize = (UInt32)1 << kChunkSizeBits;
247
248
249 struct CHeader
250 {
251 UInt32 Version;
252 UInt32 Flags;
253 UInt32 ChunkSize;
254 unsigned ChunkSizeBits;
255 Byte Guid[16];
256 UInt16 PartNumber;
257 UInt16 NumParts;
258 UInt32 NumImages;
259 UInt32 BootIndex;
260
261 bool _IsOldVersion; // 1.10-
262 bool _IsNewVersion; // 1.13+ or 0.14
263
264 CResource OffsetResource;
265 CResource XmlResource;
266 CResource MetadataResource;
267 CResource IntegrityResource;
268
269 void SetDefaultFields(bool useLZX);
270
271 void WriteTo(Byte *p) const;
272 HRESULT Parse(const Byte *p, UInt64 &phySize);
273
IsCompressedCHeader274 bool IsCompressed() const { return (Flags & NHeaderFlags::kCompression) != 0; }
275
IsSupportedCHeader276 bool IsSupported() const
277 {
278 return (!IsCompressed()
279 || (Flags & NHeaderFlags::kLZX) != 0
280 || (Flags & NHeaderFlags::kXPRESS) != 0
281 || (Flags & NHeaderFlags::kLZMS) != 0
282 || (Flags & NHeaderFlags::kXPRESS2) != 0);
283 }
284
GetMethodCHeader285 unsigned GetMethod() const
286 {
287 if (!IsCompressed())
288 return 0;
289 UInt32 mask = (Flags & NHeaderFlags::kMethodMask);
290 if (mask == 0) return 0;
291 if (mask == NHeaderFlags::kXPRESS) return NMethod::kXPRESS;
292 if (mask == NHeaderFlags::kLZX) return NMethod::kLZX;
293 if (mask == NHeaderFlags::kLZMS) return NMethod::kLZMS;
294 if (mask == NHeaderFlags::kXPRESS2) return NMethod::kXPRESS;
295 return mask;
296 }
297
IsOldVersionCHeader298 bool IsOldVersion() const { return _IsOldVersion; }
IsNewVersionCHeader299 bool IsNewVersion() const { return _IsNewVersion; }
IsSolidVersionCHeader300 bool IsSolidVersion() const { return (Version == k_Version_Solid); }
301
AreFromOnArchiveCHeader302 bool AreFromOnArchive(const CHeader &h)
303 {
304 return (memcmp(Guid, h.Guid, sizeof(Guid)) == 0) && (h.NumParts == NumParts);
305 }
306 };
307
308
309 const unsigned kHashSize = 20;
310
IsEmptySha(const Byte * data)311 inline bool IsEmptySha(const Byte *data)
312 {
313 for (unsigned i = 0; i < kHashSize; i++)
314 if (data[i] != 0)
315 return false;
316 return true;
317 }
318
319 const unsigned kStreamInfoSize = 24 + 2 + 4 + kHashSize;
320
321 struct CStreamInfo
322 {
323 CResource Resource;
324 UInt16 PartNumber; // for NEW WIM format, we set it to 1 for OLD WIM format
325 UInt32 RefCount;
326 UInt32 Id; // for OLD WIM format
327 Byte Hash[kHashSize];
328
IsEmptyHashCStreamInfo329 bool IsEmptyHash() const { return IsEmptySha(Hash); }
330
331 void WriteTo(Byte *p) const;
332 };
333
334
335 struct CItem
336 {
337 size_t Offset;
338 int IndexInSorted;
339 int StreamIndex;
340 int Parent;
341 int ImageIndex; // -1 means that file is unreferenced in Images (deleted item?)
342 bool IsDir;
343 bool IsAltStream;
344
HasMetadataCItem345 bool HasMetadata() const { return ImageIndex >= 0; }
346
CItemCItem347 CItem():
348 IndexInSorted(-1),
349 StreamIndex(-1),
350 Parent(-1),
351 IsDir(false),
352 IsAltStream(false)
353 {}
354 };
355
356 struct CImage
357 {
358 CByteBuffer Meta;
359 CRecordVector<UInt32> SecurOffsets;
360 unsigned StartItem;
361 unsigned NumItems;
362 unsigned NumEmptyRootItems;
363 int VirtualRootIndex; // index in CDatabase::VirtualRoots[]
364 UString RootName;
365 CByteBuffer RootNameBuf;
366
CImageCImage367 CImage(): VirtualRootIndex(-1) {}
368 };
369
370
371 struct CImageInfo
372 {
373 bool CTimeDefined;
374 bool MTimeDefined;
375 bool NameDefined;
376 bool IndexDefined;
377
378 FILETIME CTime;
379 FILETIME MTime;
380 UString Name;
381
382 UInt64 DirCount;
383 UInt64 FileCount;
384 UInt32 Index;
385
386 int ItemIndexInXml;
387
GetTotalFilesAndDirsCImageInfo388 UInt64 GetTotalFilesAndDirs() const { return DirCount + FileCount; }
389
CImageInfoCImageInfo390 CImageInfo(): CTimeDefined(false), MTimeDefined(false), NameDefined(false),
391 IndexDefined(false), ItemIndexInXml(-1) {}
392 void Parse(const CXmlItem &item);
393 };
394
395
396 struct CWimXml
397 {
398 CByteBuffer Data;
399 CXml Xml;
400
401 UInt16 VolIndex;
402 CObjectVector<CImageInfo> Images;
403
404 UString FileName;
405 bool IsEncrypted;
406
GetTotalFilesAndDirsCWimXml407 UInt64 GetTotalFilesAndDirs() const
408 {
409 UInt64 sum = 0;
410 FOR_VECTOR (i, Images)
411 sum += Images[i].GetTotalFilesAndDirs();
412 return sum;
413 }
414
415 void ToUnicode(UString &s);
416 bool Parse();
417
CWimXmlCWimXml418 CWimXml(): IsEncrypted(false) {}
419 };
420
421
422 struct CVolume
423 {
424 CHeader Header;
425 CMyComPtr<IInStream> Stream;
426 };
427
428
429 class CDatabase
430 {
431 Byte *DirData;
432 size_t DirSize;
433 size_t DirProcessed;
434 size_t DirStartOffset;
435 IArchiveOpenCallback *OpenCallback;
436
437 HRESULT ParseDirItem(size_t pos, int parent);
438 HRESULT ParseImageDirs(CByteBuffer &buf, int parent);
439
440 public:
441 CRecordVector<CStreamInfo> DataStreams;
442 CRecordVector<CStreamInfo> MetaStreams;
443
444 CObjectVector<CSolid> Solids;
445
446 CRecordVector<CItem> Items;
447 CObjectVector<CByteBuffer> ReparseItems;
448 CIntVector ItemToReparse; // from index_in_Items to index_in_ReparseItems
449 // -1 means no reparse;
450
451 CObjectVector<CImage> Images;
452
453 bool IsOldVersion9;
454 bool IsOldVersion;
455 bool ThereAreDeletedStreams;
456 bool ThereAreAltStreams;
457 bool RefCountError;
458 bool HeadersError;
459
GetStartImageIndex()460 bool GetStartImageIndex() const { return IsOldVersion9 ? 0 : 1; }
GetDirAlignMask()461 unsigned GetDirAlignMask() const { return IsOldVersion9 ? 3 : 7; }
462
463 // User Items can contain all images or just one image from all.
464 CUIntVector SortedItems;
465 int IndexOfUserImage; // -1 : if more than one images was filled to Sorted Items
466
467 unsigned NumExcludededItems;
468 int ExludedItem; // -1 : if there are no exclude items
469 CUIntVector VirtualRoots; // we use them for old 1.10 WIM archives
470
ThereIsError()471 bool ThereIsError() const { return RefCountError || HeadersError; }
472
GetNumUserItemsInImage(unsigned imageIndex)473 unsigned GetNumUserItemsInImage(unsigned imageIndex) const
474 {
475 if (IndexOfUserImage >= 0 && imageIndex != (unsigned)IndexOfUserImage)
476 return 0;
477 if (imageIndex >= Images.Size())
478 return 0;
479 return Images[imageIndex].NumItems - NumExcludededItems;
480 }
481
482 bool ItemHasStream(const CItem &item) const;
483
Get_UnpackSize_of_Resource(const CResource & r)484 UInt64 Get_UnpackSize_of_Resource(const CResource &r) const
485 {
486 if (!r.IsSolid())
487 return r.UnpackSize;
488 if (r.IsSolidSmall())
489 return r.PackSize;
490 if (r.IsSolidBig() && r.SolidIndex >= 0)
491 return Solids[(unsigned)r.SolidIndex].UnpackSize;
492 return 0;
493 }
494
Get_PackSize_of_Resource(unsigned streamIndex)495 UInt64 Get_PackSize_of_Resource(unsigned streamIndex) const
496 {
497 const CResource &r = DataStreams[streamIndex].Resource;
498 if (!r.IsSolidSmall())
499 return r.PackSize;
500 if (r.SolidIndex >= 0)
501 {
502 const CSolid &ss = Solids[(unsigned)r.SolidIndex];
503 if (ss.FirstSmallStream == (int)streamIndex)
504 return DataStreams[ss.StreamIndex].Resource.PackSize;
505 }
506 return 0;
507 }
508
GetUnpackSize()509 UInt64 GetUnpackSize() const
510 {
511 UInt64 res = 0;
512 FOR_VECTOR (i, DataStreams)
513 res += DataStreams[i].Resource.UnpackSize;
514 return res;
515 }
516
GetPackSize()517 UInt64 GetPackSize() const
518 {
519 UInt64 res = 0;
520 FOR_VECTOR (i, DataStreams)
521 res += DataStreams[i].Resource.PackSize;
522 return res;
523 }
524
Clear()525 void Clear()
526 {
527 DataStreams.Clear();
528 MetaStreams.Clear();
529 Solids.Clear();
530
531 Items.Clear();
532 ReparseItems.Clear();
533 ItemToReparse.Clear();
534
535 SortedItems.Clear();
536
537 Images.Clear();
538 VirtualRoots.Clear();
539
540 IsOldVersion = false;
541 ThereAreDeletedStreams = false;
542 ThereAreAltStreams = false;
543 RefCountError = false;
544 HeadersError = false;
545 }
546
CDatabase()547 CDatabase():
548 RefCountError(false),
549 HeadersError(false)
550 {}
551
552 void GetShortName(unsigned index, NWindows::NCOM::CPropVariant &res) const;
553 void GetItemName(unsigned index1, NWindows::NCOM::CPropVariant &res) const;
554 void GetItemPath(unsigned index, bool showImageNumber, NWindows::NCOM::CPropVariant &res) const;
555
556 HRESULT OpenXml(IInStream *inStream, const CHeader &h, CByteBuffer &xml);
557 HRESULT Open(IInStream *inStream, const CHeader &h, unsigned numItemsReserve, IArchiveOpenCallback *openCallback);
558 HRESULT FillAndCheck(const CObjectVector<CVolume> &volumes);
559
560 /*
561 imageIndex showImageNumber NumImages
562 * true * Show Image_Number
563 -1 * >1 Show Image_Number
564 -1 false 1 Don't show Image_Number
565 N false * Don't show Image_Number
566 */
567 HRESULT GenerateSortedItems(int imageIndex, bool showImageNumber);
568
569 HRESULT ExtractReparseStreams(const CObjectVector<CVolume> &volumes, IArchiveOpenCallback *openCallback);
570 };
571
572 HRESULT ReadHeader(IInStream *inStream, CHeader &header, UInt64 &phySize);
573
574
575 struct CMidBuf
576 {
577 Byte *Data;
578 size_t _size;
579
CMidBufCMidBuf580 CMidBuf(): Data(NULL), _size(0) {}
581
EnsureCapacityCMidBuf582 void EnsureCapacity(size_t size)
583 {
584 if (size > _size)
585 {
586 ::MidFree(Data);
587 _size = 0;
588 Data = (Byte *)::MidAlloc(size);
589 if (Data)
590 _size = size;
591 }
592 }
593
~CMidBufCMidBuf594 ~CMidBuf() { ::MidFree(Data); }
595 };
596
597
598 class CUnpacker
599 {
600 NCompress::CCopyCoder *copyCoderSpec;
601 CMyComPtr<ICompressCoder> copyCoder;
602
603 NCompress::NLzx::CDecoder *lzxDecoderSpec;
604 CMyComPtr<IUnknown> lzxDecoder;
605
606 NCompress::NLzms::CDecoder *lzmsDecoder;
607
608 CByteBuffer sizesBuf;
609
610 CMidBuf packBuf;
611 CMidBuf unpackBuf;
612
613 // solid resource
614 int _solidIndex;
615 size_t _unpackedChunkIndex;
616
617 HRESULT UnpackChunk(
618 ISequentialInStream *inStream,
619 unsigned method, unsigned chunkSizeBits,
620 size_t inSize, size_t outSize,
621 ISequentialOutStream *outStream);
622
623 HRESULT Unpack2(
624 IInStream *inStream,
625 const CResource &res,
626 const CHeader &header,
627 const CDatabase *db,
628 ISequentialOutStream *outStream,
629 ICompressProgressInfo *progress);
630
631 public:
632 UInt64 TotalPacked;
633
CUnpacker()634 CUnpacker():
635 lzmsDecoder(NULL),
636 _solidIndex(-1),
637 _unpackedChunkIndex(0),
638 TotalPacked(0)
639 {}
640 ~CUnpacker();
641
642 HRESULT Unpack(
643 IInStream *inStream,
644 const CResource &res,
645 const CHeader &header,
646 const CDatabase *db,
647 ISequentialOutStream *outStream,
648 ICompressProgressInfo *progress,
649 Byte *digest);
650
651 HRESULT UnpackData(IInStream *inStream,
652 const CResource &resource, const CHeader &header,
653 const CDatabase *db,
654 CByteBuffer &buf, Byte *digest);
655 };
656
657 }}
658
659 #endif
660