1 /* LzmaSpec.cpp -- LZMA Reference Decoder
2 2015-06-14 : Igor Pavlov : Public domain */
3 
4 // This code implements LZMA file decoding according to LZMA specification.
5 // This code is not optimized for speed.
6 
7 #include <stdio.h>
8 
9 #ifdef _MSC_VER
10   #pragma warning(disable : 4710) // function not inlined
11   #pragma warning(disable : 4996) // This function or variable may be unsafe
12 #endif
13 
14 typedef unsigned char Byte;
15 typedef unsigned short UInt16;
16 
17 #ifdef _LZMA_UINT32_IS_ULONG
18   typedef unsigned long UInt32;
19 #else
20   typedef unsigned int UInt32;
21 #endif
22 
23 #if defined(_MSC_VER) || defined(__BORLANDC__)
24   typedef unsigned __int64 UInt64;
25 #else
26   typedef unsigned long long int UInt64;
27 #endif
28 
29 
30 struct CInputStream
31 {
32   FILE *File;
33   UInt64 Processed;
34 
InitCInputStream35   void Init() { Processed = 0; }
36 
ReadByteCInputStream37   Byte ReadByte()
38   {
39     int c = getc(File);
40     if (c < 0)
41       throw "Unexpected end of file";
42     Processed++;
43     return (Byte)c;
44   }
45 };
46 
47 
48 struct COutStream
49 {
50   FILE *File;
51   UInt64 Processed;
52 
InitCOutStream53   void Init() { Processed = 0; }
54 
WriteByteCOutStream55   void WriteByte(Byte b)
56   {
57     if (putc(b, File) == EOF)
58       throw "File writing error";
59     Processed++;
60   }
61 };
62 
63 
64 class COutWindow
65 {
66   Byte *Buf;
67   UInt32 Pos;
68   UInt32 Size;
69   bool IsFull;
70 
71 public:
72   unsigned TotalPos;
73   COutStream OutStream;
74 
COutWindow()75   COutWindow(): Buf(NULL) {}
~COutWindow()76   ~COutWindow() { delete []Buf; }
77 
Create(UInt32 dictSize)78   void Create(UInt32 dictSize)
79   {
80     Buf = new Byte[dictSize];
81     Pos = 0;
82     Size = dictSize;
83     IsFull = false;
84     TotalPos = 0;
85   }
86 
PutByte(Byte b)87   void PutByte(Byte b)
88   {
89     TotalPos++;
90     Buf[Pos++] = b;
91     if (Pos == Size)
92     {
93       Pos = 0;
94       IsFull = true;
95     }
96     OutStream.WriteByte(b);
97   }
98 
GetByte(UInt32 dist) const99   Byte GetByte(UInt32 dist) const
100   {
101     return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];
102   }
103 
CopyMatch(UInt32 dist,unsigned len)104   void CopyMatch(UInt32 dist, unsigned len)
105   {
106     for (; len > 0; len--)
107       PutByte(GetByte(dist));
108   }
109 
CheckDistance(UInt32 dist) const110   bool CheckDistance(UInt32 dist) const
111   {
112     return dist <= Pos || IsFull;
113   }
114 
IsEmpty() const115   bool IsEmpty() const
116   {
117     return Pos == 0 && !IsFull;
118   }
119 };
120 
121 
122 #define kNumBitModelTotalBits 11
123 #define kNumMoveBits 5
124 
125 typedef UInt16 CProb;
126 
127 #define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)
128 
129 #define INIT_PROBS(p) \
130  { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }
131 
132 class CRangeDecoder
133 {
134   UInt32 Range;
135   UInt32 Code;
136 
137   void Normalize();
138 
139 public:
140 
141   CInputStream *InStream;
142   bool Corrupted;
143 
144   bool Init();
IsFinishedOK() const145   bool IsFinishedOK() const { return Code == 0; }
146 
147   UInt32 DecodeDirectBits(unsigned numBits);
148   unsigned DecodeBit(CProb *prob);
149 };
150 
Init()151 bool CRangeDecoder::Init()
152 {
153   Corrupted = false;
154   Range = 0xFFFFFFFF;
155   Code = 0;
156 
157   Byte b = InStream->ReadByte();
158 
159   for (int i = 0; i < 4; i++)
160     Code = (Code << 8) | InStream->ReadByte();
161 
162   if (b != 0 || Code == Range)
163     Corrupted = true;
164   return b == 0;
165 }
166 
167 #define kTopValue ((UInt32)1 << 24)
168 
Normalize()169 void CRangeDecoder::Normalize()
170 {
171   if (Range < kTopValue)
172   {
173     Range <<= 8;
174     Code = (Code << 8) | InStream->ReadByte();
175   }
176 }
177 
DecodeDirectBits(unsigned numBits)178 UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)
179 {
180   UInt32 res = 0;
181   do
182   {
183     Range >>= 1;
184     Code -= Range;
185     UInt32 t = 0 - ((UInt32)Code >> 31);
186     Code += Range & t;
187 
188     if (Code == Range)
189       Corrupted = true;
190 
191     Normalize();
192     res <<= 1;
193     res += t + 1;
194   }
195   while (--numBits);
196   return res;
197 }
198 
DecodeBit(CProb * prob)199 unsigned CRangeDecoder::DecodeBit(CProb *prob)
200 {
201   unsigned v = *prob;
202   UInt32 bound = (Range >> kNumBitModelTotalBits) * v;
203   unsigned symbol;
204   if (Code < bound)
205   {
206     v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;
207     Range = bound;
208     symbol = 0;
209   }
210   else
211   {
212     v -= v >> kNumMoveBits;
213     Code -= bound;
214     Range -= bound;
215     symbol = 1;
216   }
217   *prob = (CProb)v;
218   Normalize();
219   return symbol;
220 }
221 
222 
BitTreeReverseDecode(CProb * probs,unsigned numBits,CRangeDecoder * rc)223 unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)
224 {
225   unsigned m = 1;
226   unsigned symbol = 0;
227   for (unsigned i = 0; i < numBits; i++)
228   {
229     unsigned bit = rc->DecodeBit(&probs[m]);
230     m <<= 1;
231     m += bit;
232     symbol |= (bit << i);
233   }
234   return symbol;
235 }
236 
237 template <unsigned NumBits>
238 class CBitTreeDecoder
239 {
240   CProb Probs[(unsigned)1 << NumBits];
241 
242 public:
243 
Init()244   void Init()
245   {
246     INIT_PROBS(Probs);
247   }
248 
Decode(CRangeDecoder * rc)249   unsigned Decode(CRangeDecoder *rc)
250   {
251     unsigned m = 1;
252     for (unsigned i = 0; i < NumBits; i++)
253       m = (m << 1) + rc->DecodeBit(&Probs[m]);
254     return m - ((unsigned)1 << NumBits);
255   }
256 
ReverseDecode(CRangeDecoder * rc)257   unsigned ReverseDecode(CRangeDecoder *rc)
258   {
259     return BitTreeReverseDecode(Probs, NumBits, rc);
260   }
261 };
262 
263 #define kNumPosBitsMax 4
264 
265 #define kNumStates 12
266 #define kNumLenToPosStates 4
267 #define kNumAlignBits 4
268 #define kStartPosModelIndex 4
269 #define kEndPosModelIndex 14
270 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
271 #define kMatchMinLen 2
272 
273 class CLenDecoder
274 {
275   CProb Choice;
276   CProb Choice2;
277   CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];
278   CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];
279   CBitTreeDecoder<8> HighCoder;
280 
281 public:
282 
Init()283   void Init()
284   {
285     Choice = PROB_INIT_VAL;
286     Choice2 = PROB_INIT_VAL;
287     HighCoder.Init();
288     for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)
289     {
290       LowCoder[i].Init();
291       MidCoder[i].Init();
292     }
293   }
294 
Decode(CRangeDecoder * rc,unsigned posState)295   unsigned Decode(CRangeDecoder *rc, unsigned posState)
296   {
297     if (rc->DecodeBit(&Choice) == 0)
298       return LowCoder[posState].Decode(rc);
299     if (rc->DecodeBit(&Choice2) == 0)
300       return 8 + MidCoder[posState].Decode(rc);
301     return 16 + HighCoder.Decode(rc);
302   }
303 };
304 
UpdateState_Literal(unsigned state)305 unsigned UpdateState_Literal(unsigned state)
306 {
307   if (state < 4) return 0;
308   else if (state < 10) return state - 3;
309   else return state - 6;
310 }
UpdateState_Match(unsigned state)311 unsigned UpdateState_Match   (unsigned state) { return state < 7 ? 7 : 10; }
UpdateState_Rep(unsigned state)312 unsigned UpdateState_Rep     (unsigned state) { return state < 7 ? 8 : 11; }
UpdateState_ShortRep(unsigned state)313 unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }
314 
315 #define LZMA_DIC_MIN (1 << 12)
316 
317 class CLzmaDecoder
318 {
319 public:
320   CRangeDecoder RangeDec;
321   COutWindow OutWindow;
322 
323   bool markerIsMandatory;
324   unsigned lc, pb, lp;
325   UInt32 dictSize;
326   UInt32 dictSizeInProperties;
327 
DecodeProperties(const Byte * properties)328   void DecodeProperties(const Byte *properties)
329   {
330     unsigned d = properties[0];
331     if (d >= (9 * 5 * 5))
332       throw "Incorrect LZMA properties";
333     lc = d % 9;
334     d /= 9;
335     pb = d / 5;
336     lp = d % 5;
337     dictSizeInProperties = 0;
338     for (int i = 0; i < 4; i++)
339       dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);
340     dictSize = dictSizeInProperties;
341     if (dictSize < LZMA_DIC_MIN)
342       dictSize = LZMA_DIC_MIN;
343   }
344 
CLzmaDecoder()345   CLzmaDecoder(): LitProbs(NULL) {}
~CLzmaDecoder()346   ~CLzmaDecoder() { delete []LitProbs; }
347 
Create()348   void Create()
349   {
350     OutWindow.Create(dictSize);
351     CreateLiterals();
352   }
353 
354   int Decode(bool unpackSizeDefined, UInt64 unpackSize);
355 
356 private:
357 
358   CProb *LitProbs;
359 
CreateLiterals()360   void CreateLiterals()
361   {
362     LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];
363   }
364 
InitLiterals()365   void InitLiterals()
366   {
367     UInt32 num = (UInt32)0x300 << (lc + lp);
368     for (UInt32 i = 0; i < num; i++)
369       LitProbs[i] = PROB_INIT_VAL;
370   }
371 
DecodeLiteral(unsigned state,UInt32 rep0)372   void DecodeLiteral(unsigned state, UInt32 rep0)
373   {
374     unsigned prevByte = 0;
375     if (!OutWindow.IsEmpty())
376       prevByte = OutWindow.GetByte(1);
377 
378     unsigned symbol = 1;
379     unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));
380     CProb *probs = &LitProbs[(UInt32)0x300 * litState];
381 
382     if (state >= 7)
383     {
384       unsigned matchByte = OutWindow.GetByte(rep0 + 1);
385       do
386       {
387         unsigned matchBit = (matchByte >> 7) & 1;
388         matchByte <<= 1;
389         unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);
390         symbol = (symbol << 1) | bit;
391         if (matchBit != bit)
392           break;
393       }
394       while (symbol < 0x100);
395     }
396     while (symbol < 0x100)
397       symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);
398     OutWindow.PutByte((Byte)(symbol - 0x100));
399   }
400 
401   CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];
402   CBitTreeDecoder<kNumAlignBits> AlignDecoder;
403   CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];
404 
InitDist()405   void InitDist()
406   {
407     for (unsigned i = 0; i < kNumLenToPosStates; i++)
408       PosSlotDecoder[i].Init();
409     AlignDecoder.Init();
410     INIT_PROBS(PosDecoders);
411   }
412 
DecodeDistance(unsigned len)413   unsigned DecodeDistance(unsigned len)
414   {
415     unsigned lenState = len;
416     if (lenState > kNumLenToPosStates - 1)
417       lenState = kNumLenToPosStates - 1;
418 
419     unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
420     if (posSlot < 4)
421       return posSlot;
422 
423     unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);
424     UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);
425     if (posSlot < kEndPosModelIndex)
426       dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);
427     else
428     {
429       dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;
430       dist += AlignDecoder.ReverseDecode(&RangeDec);
431     }
432     return dist;
433   }
434 
435   CProb IsMatch[kNumStates << kNumPosBitsMax];
436   CProb IsRep[kNumStates];
437   CProb IsRepG0[kNumStates];
438   CProb IsRepG1[kNumStates];
439   CProb IsRepG2[kNumStates];
440   CProb IsRep0Long[kNumStates << kNumPosBitsMax];
441 
442   CLenDecoder LenDecoder;
443   CLenDecoder RepLenDecoder;
444 
Init()445   void Init()
446   {
447     InitLiterals();
448     InitDist();
449 
450     INIT_PROBS(IsMatch);
451     INIT_PROBS(IsRep);
452     INIT_PROBS(IsRepG0);
453     INIT_PROBS(IsRepG1);
454     INIT_PROBS(IsRepG2);
455     INIT_PROBS(IsRep0Long);
456 
457     LenDecoder.Init();
458     RepLenDecoder.Init();
459   }
460 };
461 
462 
463 #define LZMA_RES_ERROR                   0
464 #define LZMA_RES_FINISHED_WITH_MARKER    1
465 #define LZMA_RES_FINISHED_WITHOUT_MARKER 2
466 
Decode(bool unpackSizeDefined,UInt64 unpackSize)467 int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize)
468 {
469   if (!RangeDec.Init())
470     return LZMA_RES_ERROR;
471 
472   Init();
473 
474   UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;
475   unsigned state = 0;
476 
477   for (;;)
478   {
479     if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)
480       if (RangeDec.IsFinishedOK())
481         return LZMA_RES_FINISHED_WITHOUT_MARKER;
482 
483     unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
484 
485     if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0)
486     {
487       if (unpackSizeDefined && unpackSize == 0)
488         return LZMA_RES_ERROR;
489       DecodeLiteral(state, rep0);
490       state = UpdateState_Literal(state);
491       unpackSize--;
492       continue;
493     }
494 
495     unsigned len;
496 
497     if (RangeDec.DecodeBit(&IsRep[state]) != 0)
498     {
499       if (unpackSizeDefined && unpackSize == 0)
500         return LZMA_RES_ERROR;
501       if (OutWindow.IsEmpty())
502         return LZMA_RES_ERROR;
503       if (RangeDec.DecodeBit(&IsRepG0[state]) == 0)
504       {
505         if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0)
506         {
507           state = UpdateState_ShortRep(state);
508           OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));
509           unpackSize--;
510           continue;
511         }
512       }
513       else
514       {
515         UInt32 dist;
516         if (RangeDec.DecodeBit(&IsRepG1[state]) == 0)
517           dist = rep1;
518         else
519         {
520           if (RangeDec.DecodeBit(&IsRepG2[state]) == 0)
521             dist = rep2;
522           else
523           {
524             dist = rep3;
525             rep3 = rep2;
526           }
527           rep2 = rep1;
528         }
529         rep1 = rep0;
530         rep0 = dist;
531       }
532       len = RepLenDecoder.Decode(&RangeDec, posState);
533       state = UpdateState_Rep(state);
534     }
535     else
536     {
537       rep3 = rep2;
538       rep2 = rep1;
539       rep1 = rep0;
540       len = LenDecoder.Decode(&RangeDec, posState);
541       state = UpdateState_Match(state);
542       rep0 = DecodeDistance(len);
543       if (rep0 == 0xFFFFFFFF)
544         return RangeDec.IsFinishedOK() ?
545             LZMA_RES_FINISHED_WITH_MARKER :
546             LZMA_RES_ERROR;
547 
548       if (unpackSizeDefined && unpackSize == 0)
549         return LZMA_RES_ERROR;
550       if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))
551         return LZMA_RES_ERROR;
552     }
553     len += kMatchMinLen;
554     bool isError = false;
555     if (unpackSizeDefined && unpackSize < len)
556     {
557       len = (unsigned)unpackSize;
558       isError = true;
559     }
560     OutWindow.CopyMatch(rep0 + 1, len);
561     unpackSize -= len;
562     if (isError)
563       return LZMA_RES_ERROR;
564   }
565 }
566 
Print(const char * s)567 static void Print(const char *s)
568 {
569   fputs(s, stdout);
570 }
571 
PrintError(const char * s)572 static void PrintError(const char *s)
573 {
574   fputs(s, stderr);
575 }
576 
577 
578 #define CONVERT_INT_TO_STR(charType, tempSize) \
579 
ConvertUInt64ToString(UInt64 val,char * s)580 void ConvertUInt64ToString(UInt64 val, char *s)
581 {
582   char temp[32];
583   unsigned i = 0;
584   while (val >= 10)
585   {
586     temp[i++] = (char)('0' + (unsigned)(val % 10));
587     val /= 10;
588   }
589   *s++ = (char)('0' + (unsigned)val);
590   while (i != 0)
591   {
592     i--;
593     *s++ = temp[i];
594   }
595   *s = 0;
596 }
597 
PrintUInt64(const char * title,UInt64 v)598 void PrintUInt64(const char *title, UInt64 v)
599 {
600   Print(title);
601   Print(" : ");
602   char s[32];
603   ConvertUInt64ToString(v, s);
604   Print(s);
605   Print(" bytes \n");
606 }
607 
main2(int numArgs,const char * args[])608 int main2(int numArgs, const char *args[])
609 {
610   Print("\nLZMA Reference Decoder 15.00 : Igor Pavlov : Public domain : 2015-04-16\n");
611   if (numArgs == 1)
612     Print("\nUse: lzmaSpec a.lzma outFile");
613 
614   if (numArgs != 3)
615     throw "you must specify two parameters";
616 
617   CInputStream inStream;
618   inStream.File = fopen(args[1], "rb");
619   inStream.Init();
620   if (inStream.File == 0)
621     throw "Can't open input file";
622 
623   CLzmaDecoder lzmaDecoder;
624   lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+");
625   lzmaDecoder.OutWindow.OutStream.Init();
626   if (inStream.File == 0)
627     throw "Can't open output file";
628 
629   Byte header[13];
630   int i;
631   for (i = 0; i < 13; i++)
632     header[i] = inStream.ReadByte();
633 
634   lzmaDecoder.DecodeProperties(header);
635 
636   printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb);
637   printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties);
638   printf("\nDictionary Size for decoding  = %u", lzmaDecoder.dictSize);
639 
640   UInt64 unpackSize = 0;
641   bool unpackSizeDefined = false;
642   for (i = 0; i < 8; i++)
643   {
644     Byte b = header[5 + i];
645     if (b != 0xFF)
646       unpackSizeDefined = true;
647     unpackSize |= (UInt64)b << (8 * i);
648   }
649 
650   lzmaDecoder.markerIsMandatory = !unpackSizeDefined;
651 
652   Print("\n");
653   if (unpackSizeDefined)
654     PrintUInt64("Uncompressed Size", unpackSize);
655   else
656     Print("End marker is expected\n");
657   lzmaDecoder.RangeDec.InStream = &inStream;
658 
659   Print("\n");
660 
661   lzmaDecoder.Create();
662 
663   int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize);
664 
665   PrintUInt64("Read    ", inStream.Processed);
666   PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed);
667 
668   if (res == LZMA_RES_ERROR)
669     throw "LZMA decoding error";
670   else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER)
671     Print("Finished without end marker");
672   else if (res == LZMA_RES_FINISHED_WITH_MARKER)
673   {
674     if (unpackSizeDefined)
675     {
676       if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize)
677         throw "Finished with end marker before than specified size";
678       Print("Warning: ");
679     }
680     Print("Finished with end marker");
681   }
682   else
683     throw "Internal Error";
684 
685   Print("\n");
686 
687   if (lzmaDecoder.RangeDec.Corrupted)
688   {
689     Print("\nWarning: LZMA stream is corrupted\n");
690   }
691 
692   return 0;
693 }
694 
695 
696 int
697   #ifdef _MSC_VER
698     __cdecl
699   #endif
main(int numArgs,const char * args[])700 main(int numArgs, const char *args[])
701 {
702   try { return main2(numArgs, args); }
703   catch (const char *s)
704   {
705     PrintError("\nError:\n");
706     PrintError(s);
707     PrintError("\n");
708     return 1;
709   }
710   catch(...)
711   {
712     PrintError("\nError\n");
713     return 1;
714   }
715 }
716