1 #define UNP_READ_SIZE_MT        0x400000
2 #define UNP_BLOCKS_PER_THREAD          2
3 
4 
5 struct UnpackThreadDataList
6 {
7   UnpackThreadData *D;
8   uint BlockCount;
9 };
10 
11 
THREAD_PROC(UnpackDecodeThread)12 THREAD_PROC(UnpackDecodeThread)
13 {
14   UnpackThreadDataList *DL=(UnpackThreadDataList *)Data;
15   for (uint I=0;I<DL->BlockCount;I++)
16     DL->D->UnpackPtr->UnpackDecode(DL->D[I]);
17 }
18 
19 
InitMT()20 void Unpack::InitMT()
21 {
22   if (ReadBufMT==NULL)
23   {
24     // Even getbits32 can read up to 3 additional bytes after current
25     // and our block header and table reading code can look much further.
26     // Let's allocate the additional space here, so we do not need to check
27     // bounds for every bit field access.
28     const size_t Overflow=1024;
29 
30     ReadBufMT=new byte[UNP_READ_SIZE_MT+Overflow];
31     memset(ReadBufMT,0,UNP_READ_SIZE_MT+Overflow);
32   }
33   if (UnpThreadData==NULL)
34   {
35     uint MaxItems=MaxUserThreads*UNP_BLOCKS_PER_THREAD;
36     UnpThreadData=new UnpackThreadData[MaxItems];
37     memset(UnpThreadData,0,sizeof(UnpackThreadData)*MaxItems);
38 
39     for (uint I=0;I<MaxItems;I++)
40     {
41       UnpackThreadData *CurData=UnpThreadData+I;
42       if (CurData->Decoded==NULL)
43       {
44         // Typical number of items in RAR blocks does not exceed 0x4000.
45         CurData->DecodedAllocated=0x4100;
46         // It will be freed in the object destructor, not in this file.
47         CurData->Decoded=(UnpackDecodedItem *)malloc(CurData->DecodedAllocated*sizeof(UnpackDecodedItem));
48         if (CurData->Decoded==NULL)
49           ErrHandler.MemoryError();
50       }
51     }
52   }
53 }
54 
55 
Unpack5MT(bool Solid)56 void Unpack::Unpack5MT(bool Solid)
57 {
58   InitMT();
59   UnpInitData(Solid);
60 
61   for (uint I=0;I<MaxUserThreads*UNP_BLOCKS_PER_THREAD;I++)
62   {
63     UnpackThreadData *CurData=UnpThreadData+I;
64     CurData->LargeBlock=false;
65     CurData->Incomplete=false;
66   }
67 
68   UnpThreadData[0].BlockHeader=BlockHeader;
69   UnpThreadData[0].BlockTables=BlockTables;
70   uint LastBlockNum=0;
71 
72   int DataSize=0;
73   int BlockStart=0;
74 
75 
76   // 'true' if we found a block too large for multithreaded extraction,
77   // so we switched to single threaded mode until the end of file.
78   // Large blocks could cause too high memory use in multithreaded mode.
79   bool LargeBlock=false;
80 
81   bool Done=false;
82   while (!Done)
83   {
84     // Data amount, which is guaranteed to fit block header and tables,
85     // so we can safely read them without additional checks.
86     const int TooSmallToProcess=1024;
87 
88     int ReadSize=UnpIO->UnpRead(ReadBufMT+DataSize,(UNP_READ_SIZE_MT-DataSize)&~0xf);
89     if (ReadSize<0)
90       break;
91     DataSize+=ReadSize;
92     if (DataSize==0)
93       break;
94 
95     // First read chunk can be small if we are near the end of volume
96     // and we want it to fit block header and tables.
97     if (ReadSize>0 && DataSize<TooSmallToProcess)
98       continue;
99 
100     while (BlockStart<DataSize && !Done)
101     {
102       uint BlockNumber=0,BlockNumberMT=0;
103       while (BlockNumber<MaxUserThreads*UNP_BLOCKS_PER_THREAD)
104       {
105         UnpackThreadData *CurData=UnpThreadData+BlockNumber;
106         LastBlockNum=BlockNumber;
107         CurData->UnpackPtr=this;
108 
109         // 'Incomplete' thread is present. This is a thread processing block
110         // in the end of buffer, split between two read operations.
111         if (CurData->Incomplete)
112           CurData->DataSize=DataSize;
113         else
114         {
115           CurData->Inp.SetExternalBuffer(ReadBufMT+BlockStart);
116           CurData->Inp.InitBitInput();
117           CurData->DataSize=DataSize-BlockStart;
118           if (CurData->DataSize==0)
119             break;
120           CurData->DamagedData=false;
121           CurData->HeaderRead=false;
122           CurData->TableRead=false;
123         }
124 
125         // We should not use 'last block in file' block flag here unless
126         // we'll check the block size, because even if block is last in file,
127         // it can exceed the current buffer and require more reading.
128         CurData->NoDataLeft=(ReadSize==0);
129 
130         CurData->Incomplete=false;
131         CurData->ThreadNumber=BlockNumber;
132 
133         if (!CurData->HeaderRead)
134         {
135           CurData->HeaderRead=true;
136           if (!ReadBlockHeader(CurData->Inp,CurData->BlockHeader) ||
137               !CurData->BlockHeader.TablePresent && !TablesRead5)
138           {
139             Done=true;
140             break;
141           }
142           TablesRead5=true;
143         }
144 
145         // To prevent too high memory use we switch to single threaded mode
146         // if block exceeds this size. Typically RAR blocks do not exceed
147         // 64 KB, so this protection should not affect most of valid archives.
148         const int LargeBlockSize=0x20000;
149         if (LargeBlock || CurData->BlockHeader.BlockSize>LargeBlockSize)
150           LargeBlock=CurData->LargeBlock=true;
151         else
152           BlockNumberMT++; // Number of normal blocks processed in MT mode.
153 
154         BlockStart+=CurData->BlockHeader.HeaderSize+CurData->BlockHeader.BlockSize;
155 
156         BlockNumber++;
157 
158         int DataLeft=DataSize-BlockStart;
159         if (DataLeft>=0 && CurData->BlockHeader.LastBlockInFile)
160           break;
161 
162         // For second and following threads we move smaller blocks to buffer
163         // start to ensure that we have enough data to fit block header
164         // and tables.
165         if (DataLeft<TooSmallToProcess)
166           break;
167       }
168 
169 //#undef USE_THREADS
170       UnpackThreadDataList UTDArray[MaxPoolThreads];
171       uint UTDArrayPos=0;
172 
173       uint MaxBlockPerThread=BlockNumberMT/MaxUserThreads;
174       if (BlockNumberMT%MaxUserThreads!=0)
175         MaxBlockPerThread++;
176 
177       // Decode all normal blocks until the first 'large' if any.
178       for (uint CurBlock=0;CurBlock<BlockNumberMT;CurBlock+=MaxBlockPerThread)
179       {
180         UnpackThreadDataList *UTD=UTDArray+UTDArrayPos++;
181         UTD->D=UnpThreadData+CurBlock;
182         UTD->BlockCount=Min(MaxBlockPerThread,BlockNumberMT-CurBlock);
183 
184 #ifdef USE_THREADS
185         if (BlockNumber==1)
186           UnpackDecode(*UTD->D);
187         else
188           UnpThreadPool->AddTask(UnpackDecodeThread,(void*)UTD);
189 #else
190         for (uint I=0;I<UTD->BlockCount;I++)
191           UnpackDecode(UTD->D[I]);
192 #endif
193       }
194 
195       if (BlockNumber==0)
196         break;
197 
198 #ifdef USE_THREADS
199       UnpThreadPool->WaitDone();
200 #endif
201 
202       bool IncompleteThread=false;
203 
204       for (uint Block=0;Block<BlockNumber;Block++)
205       {
206         UnpackThreadData *CurData=UnpThreadData+Block;
207         if (!CurData->LargeBlock && !ProcessDecoded(*CurData) ||
208             CurData->LargeBlock && !UnpackLargeBlock(*CurData) ||
209             CurData->DamagedData)
210         {
211           Done=true;
212           break;
213         }
214         if (CurData->Incomplete)
215         {
216           int BufPos=int(CurData->Inp.InBuf+CurData->Inp.InAddr-ReadBufMT);
217           if (DataSize<=BufPos) // Thread exceeded input buffer boundary.
218           {
219             Done=true;
220             break;
221           }
222           IncompleteThread=true;
223           memmove(ReadBufMT,ReadBufMT+BufPos,DataSize-BufPos);
224           CurData->BlockHeader.BlockSize-=CurData->Inp.InAddr-CurData->BlockHeader.BlockStart;
225           CurData->BlockHeader.HeaderSize=0;
226           CurData->BlockHeader.BlockStart=0;
227           CurData->Inp.InBuf=ReadBufMT;
228           CurData->Inp.InAddr=0;
229 
230           if (Block!=0)
231           {
232             // Move the incomplete thread entry to the first position,
233             // so we'll start processing from it. Preserve the original
234             // buffer for decoded data.
235             UnpackDecodedItem *Decoded=UnpThreadData[0].Decoded;
236             uint DecodedAllocated=UnpThreadData[0].DecodedAllocated;
237             UnpThreadData[0]=*CurData;
238             UnpThreadData[0].Decoded=Decoded;
239             UnpThreadData[0].DecodedAllocated=DecodedAllocated;
240             CurData->Incomplete=false;
241           }
242 
243           BlockStart=0;
244           DataSize-=BufPos;
245           break;
246         }
247         else
248           if (CurData->BlockHeader.LastBlockInFile)
249           {
250             Done=true;
251             break;
252           }
253       }
254 
255       if (IncompleteThread || Done)
256         break; // Current buffer is done, read more data or quit.
257       else
258       {
259         int DataLeft=DataSize-BlockStart;
260         if (DataLeft<TooSmallToProcess)
261         {
262           if (DataLeft<0) // Invalid data, must not happen in valid archive.
263           {
264             Done=true;
265             break;
266           }
267 
268           // If we do not have incomplete thread and have some data
269           // in the end of buffer, too small for single thread,
270           // let's move it to beginning of next buffer.
271           if (DataLeft>0)
272             memmove(ReadBufMT,ReadBufMT+BlockStart,DataLeft);
273           DataSize=DataLeft;
274           BlockStart=0;
275           break; // Current buffer is done, try to read more data.
276         }
277       }
278     }
279   }
280   UnpPtr&=MaxWinMask; // ProcessDecoded and maybe others can leave UnpPtr > MaxWinMask here.
281   UnpWriteBuf();
282 
283   BlockHeader=UnpThreadData[LastBlockNum].BlockHeader;
284   BlockTables=UnpThreadData[LastBlockNum].BlockTables;
285 }
286 
287 
288 // Decode Huffman block and save decoded data to memory.
UnpackDecode(UnpackThreadData & D)289 void Unpack::UnpackDecode(UnpackThreadData &D)
290 {
291   if (!D.TableRead)
292   {
293     D.TableRead=true;
294     if (!ReadTables(D.Inp,D.BlockHeader,D.BlockTables))
295     {
296       D.DamagedData=true;
297       return;
298     }
299   }
300 
301   if (D.Inp.InAddr>D.BlockHeader.HeaderSize+D.BlockHeader.BlockSize)
302   {
303     D.DamagedData=true;
304     return;
305   }
306 
307   D.DecodedSize=0;
308   int BlockBorder=D.BlockHeader.BlockStart+D.BlockHeader.BlockSize-1;
309 
310   // Reserve enough space even for filter entry.
311   int DataBorder=D.DataSize-16;
312   int ReadBorder=Min(BlockBorder,DataBorder);
313 
314   while (true)
315   {
316     if (D.Inp.InAddr>=ReadBorder)
317     {
318       if (D.Inp.InAddr>BlockBorder || D.Inp.InAddr==BlockBorder &&
319           D.Inp.InBit>=D.BlockHeader.BlockBitSize)
320         break;
321 
322       // If we do not have any more data in file to read, we must process
323       // what we have until last byte. Otherwise we can return and append
324       // more data to unprocessed few bytes.
325       if ((D.Inp.InAddr>=DataBorder) && !D.NoDataLeft || D.Inp.InAddr>=D.DataSize)
326       {
327         D.Incomplete=true;
328         break;
329       }
330     }
331     if (D.DecodedSize>D.DecodedAllocated-8) // Filter can use several slots.
332     {
333       D.DecodedAllocated=D.DecodedAllocated*2;
334       void *Decoded=realloc(D.Decoded,D.DecodedAllocated*sizeof(UnpackDecodedItem));
335       if (Decoded==NULL)
336         ErrHandler.MemoryError(); // D.Decoded will be freed in the destructor.
337       D.Decoded=(UnpackDecodedItem *)Decoded;
338     }
339 
340     UnpackDecodedItem *CurItem=D.Decoded+D.DecodedSize++;
341 
342     uint MainSlot=DecodeNumber(D.Inp,&D.BlockTables.LD);
343     if (MainSlot<256)
344     {
345       if (D.DecodedSize>1)
346       {
347         UnpackDecodedItem *PrevItem=CurItem-1;
348         if (PrevItem->Type==UNPDT_LITERAL && PrevItem->Length<3)
349         {
350           PrevItem->Length++;
351           PrevItem->Literal[PrevItem->Length]=(byte)MainSlot;
352           D.DecodedSize--;
353           continue;
354         }
355       }
356       CurItem->Type=UNPDT_LITERAL;
357       CurItem->Literal[0]=(byte)MainSlot;
358       CurItem->Length=0;
359       continue;
360     }
361     if (MainSlot>=262)
362     {
363       uint Length=SlotToLength(D.Inp,MainSlot-262);
364 
365       uint DBits,Distance=1,DistSlot=DecodeNumber(D.Inp,&D.BlockTables.DD);
366       if (DistSlot<4)
367       {
368         DBits=0;
369         Distance+=DistSlot;
370       }
371       else
372       {
373         DBits=DistSlot/2 - 1;
374         Distance+=(2 | (DistSlot & 1)) << DBits;
375       }
376 
377       if (DBits>0)
378       {
379         if (DBits>=4)
380         {
381           if (DBits>4)
382           {
383             Distance+=((D.Inp.getbits32()>>(36-DBits))<<4);
384             D.Inp.addbits(DBits-4);
385           }
386           uint LowDist=DecodeNumber(D.Inp,&D.BlockTables.LDD);
387           Distance+=LowDist;
388         }
389         else
390         {
391           Distance+=D.Inp.getbits32()>>(32-DBits);
392           D.Inp.addbits(DBits);
393         }
394       }
395 
396       if (Distance>0x100)
397       {
398         Length++;
399         if (Distance>0x2000)
400         {
401           Length++;
402           if (Distance>0x40000)
403             Length++;
404         }
405       }
406 
407       CurItem->Type=UNPDT_MATCH;
408       CurItem->Length=(ushort)Length;
409       CurItem->Distance=Distance;
410       continue;
411     }
412     if (MainSlot==256)
413     {
414       UnpackFilter Filter;
415       ReadFilter(D.Inp,Filter);
416 
417       CurItem->Type=UNPDT_FILTER;
418       CurItem->Length=Filter.Type;
419       CurItem->Distance=Filter.BlockStart;
420 
421       CurItem=D.Decoded+D.DecodedSize++;
422 
423       CurItem->Type=UNPDT_FILTER;
424       CurItem->Length=Filter.Channels;
425       CurItem->Distance=Filter.BlockLength;
426 
427       continue;
428     }
429     if (MainSlot==257)
430     {
431       CurItem->Type=UNPDT_FULLREP;
432       continue;
433     }
434     if (MainSlot<262)
435     {
436       CurItem->Type=UNPDT_REP;
437       CurItem->Distance=MainSlot-258;
438       uint LengthSlot=DecodeNumber(D.Inp,&D.BlockTables.RD);
439       uint Length=SlotToLength(D.Inp,LengthSlot);
440       CurItem->Length=(ushort)Length;
441       continue;
442     }
443   }
444 }
445 
446 
447 // Process decoded Huffman block data.
ProcessDecoded(UnpackThreadData & D)448 bool Unpack::ProcessDecoded(UnpackThreadData &D)
449 {
450   UnpackDecodedItem *Item=D.Decoded,*Border=D.Decoded+D.DecodedSize;
451   while (Item<Border)
452   {
453     UnpPtr&=MaxWinMask;
454     if (((WriteBorder-UnpPtr) & MaxWinMask)<MAX_INC_LZ_MATCH && WriteBorder!=UnpPtr)
455     {
456       UnpWriteBuf();
457       if (WrittenFileSize>DestUnpSize)
458         return false;
459     }
460 
461     if (Item->Type==UNPDT_LITERAL)
462     {
463 #if defined(LITTLE_ENDIAN) && defined(ALLOW_MISALIGNED)
464       if (Item->Length==3 && UnpPtr<MaxWinSize-4)
465       {
466         *(uint32 *)(Window+UnpPtr)=*(uint32 *)Item->Literal;
467         UnpPtr+=4;
468       }
469       else
470 #endif
471         for (uint I=0;I<=Item->Length;I++)
472           Window[UnpPtr++ & MaxWinMask]=Item->Literal[I];
473     }
474     else
475       if (Item->Type==UNPDT_MATCH)
476       {
477         InsertOldDist(Item->Distance);
478         LastLength=Item->Length;
479         CopyString(Item->Length,Item->Distance);
480       }
481       else
482         if (Item->Type==UNPDT_REP)
483         {
484           uint Distance=OldDist[Item->Distance];
485           for (uint I=Item->Distance;I>0;I--)
486             OldDist[I]=OldDist[I-1];
487           OldDist[0]=Distance;
488           LastLength=Item->Length;
489           CopyString(Item->Length,Distance);
490         }
491         else
492           if (Item->Type==UNPDT_FULLREP)
493           {
494             if (LastLength!=0)
495               CopyString(LastLength,OldDist[0]);
496           }
497           else
498             if (Item->Type==UNPDT_FILTER)
499             {
500               UnpackFilter Filter;
501 
502               Filter.Type=(byte)Item->Length;
503               Filter.BlockStart=Item->Distance;
504 
505               Item++;
506 
507               Filter.Channels=(byte)Item->Length;
508               Filter.BlockLength=Item->Distance;
509 
510               AddFilter(Filter);
511             }
512     Item++;
513   }
514   return true;
515 }
516 
517 
518 // For large blocks we decode and process in same function in single threaded
519 // mode, so we do not need to store intermediate data in memory.
UnpackLargeBlock(UnpackThreadData & D)520 bool Unpack::UnpackLargeBlock(UnpackThreadData &D)
521 {
522   if (!D.TableRead)
523   {
524     D.TableRead=true;
525     if (!ReadTables(D.Inp,D.BlockHeader,D.BlockTables))
526     {
527       D.DamagedData=true;
528       return false;
529     }
530   }
531 
532   if (D.Inp.InAddr>D.BlockHeader.HeaderSize+D.BlockHeader.BlockSize)
533   {
534     D.DamagedData=true;
535     return false;
536   }
537 
538   int BlockBorder=D.BlockHeader.BlockStart+D.BlockHeader.BlockSize-1;
539 
540   // Reserve enough space even for filter entry.
541   int DataBorder=D.DataSize-16;
542   int ReadBorder=Min(BlockBorder,DataBorder);
543 
544   while (true)
545   {
546     UnpPtr&=MaxWinMask;
547     if (D.Inp.InAddr>=ReadBorder)
548     {
549       if (D.Inp.InAddr>BlockBorder || D.Inp.InAddr==BlockBorder &&
550           D.Inp.InBit>=D.BlockHeader.BlockBitSize)
551         break;
552 
553       // If we do not have any more data in file to read, we must process
554       // what we have until last byte. Otherwise we can return and append
555       // more data to unprocessed few bytes.
556       if ((D.Inp.InAddr>=DataBorder) && !D.NoDataLeft || D.Inp.InAddr>=D.DataSize)
557       {
558         D.Incomplete=true;
559         break;
560       }
561     }
562     if (((WriteBorder-UnpPtr) & MaxWinMask)<MAX_INC_LZ_MATCH && WriteBorder!=UnpPtr)
563     {
564       UnpWriteBuf();
565       if (WrittenFileSize>DestUnpSize)
566         return false;
567     }
568 
569     uint MainSlot=DecodeNumber(D.Inp,&D.BlockTables.LD);
570     if (MainSlot<256)
571     {
572       Window[UnpPtr++]=(byte)MainSlot;
573       continue;
574     }
575     if (MainSlot>=262)
576     {
577       uint Length=SlotToLength(D.Inp,MainSlot-262);
578 
579       uint DBits,Distance=1,DistSlot=DecodeNumber(D.Inp,&D.BlockTables.DD);
580       if (DistSlot<4)
581       {
582         DBits=0;
583         Distance+=DistSlot;
584       }
585       else
586       {
587         DBits=DistSlot/2 - 1;
588         Distance+=(2 | (DistSlot & 1)) << DBits;
589       }
590 
591       if (DBits>0)
592       {
593         if (DBits>=4)
594         {
595           if (DBits>4)
596           {
597             Distance+=((D.Inp.getbits32()>>(36-DBits))<<4);
598             D.Inp.addbits(DBits-4);
599           }
600           uint LowDist=DecodeNumber(D.Inp,&D.BlockTables.LDD);
601           Distance+=LowDist;
602         }
603         else
604         {
605           Distance+=D.Inp.getbits32()>>(32-DBits);
606           D.Inp.addbits(DBits);
607         }
608       }
609 
610       if (Distance>0x100)
611       {
612         Length++;
613         if (Distance>0x2000)
614         {
615           Length++;
616           if (Distance>0x40000)
617             Length++;
618         }
619       }
620 
621       InsertOldDist(Distance);
622       LastLength=Length;
623       CopyString(Length,Distance);
624       continue;
625     }
626     if (MainSlot==256)
627     {
628       UnpackFilter Filter;
629       if (!ReadFilter(D.Inp,Filter) || !AddFilter(Filter))
630         break;
631       continue;
632     }
633     if (MainSlot==257)
634     {
635       if (LastLength!=0)
636         CopyString(LastLength,OldDist[0]);
637       continue;
638     }
639     if (MainSlot<262)
640     {
641       uint DistNum=MainSlot-258;
642       uint Distance=OldDist[DistNum];
643       for (uint I=DistNum;I>0;I--)
644         OldDist[I]=OldDist[I-1];
645       OldDist[0]=Distance;
646 
647       uint LengthSlot=DecodeNumber(D.Inp,&D.BlockTables.RD);
648       uint Length=SlotToLength(D.Inp,LengthSlot);
649       LastLength=Length;
650       CopyString(Length,Distance);
651       continue;
652     }
653   }
654   return true;
655 }
656