1 /* Lzma2Dec.c -- LZMA2 Decoder
2 2021-02-09 : Igor Pavlov : Public domain */
3 
4 /* #define SHOW_DEBUG_INFO */
5 
6 #include "Precomp.h"
7 
8 #ifdef SHOW_DEBUG_INFO
9 #include <stdio.h>
10 #endif
11 
12 #include <string.h>
13 
14 #include "Lzma2Dec.h"
15 
16 /*
17 00000000  -  End of data
18 00000001 U U  -  Uncompressed, reset dic, need reset state and set new prop
19 00000010 U U  -  Uncompressed, no reset
20 100uuuuu U U P P  -  LZMA, no reset
21 101uuuuu U U P P  -  LZMA, reset state
22 110uuuuu U U P P S  -  LZMA, reset state + set new prop
23 111uuuuu U U P P S  -  LZMA, reset state + set new prop, reset dic
24 
25   u, U - Unpack Size
26   P - Pack Size
27   S - Props
28 */
29 
30 #define LZMA2_CONTROL_COPY_RESET_DIC 1
31 
32 #define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & (1 << 7)) == 0)
33 
34 #define LZMA2_LCLP_MAX 4
35 #define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
36 
37 #ifdef SHOW_DEBUG_INFO
38 #define PRF(x) x
39 #else
40 #define PRF(x)
41 #endif
42 
43 typedef enum
44 {
45   LZMA2_STATE_CONTROL,
46   LZMA2_STATE_UNPACK0,
47   LZMA2_STATE_UNPACK1,
48   LZMA2_STATE_PACK0,
49   LZMA2_STATE_PACK1,
50   LZMA2_STATE_PROP,
51   LZMA2_STATE_DATA,
52   LZMA2_STATE_DATA_CONT,
53   LZMA2_STATE_FINISHED,
54   LZMA2_STATE_ERROR
55 } ELzma2State;
56 
Lzma2Dec_GetOldProps(Byte prop,Byte * props)57 static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
58 {
59   UInt32 dicSize;
60   if (prop > 40)
61     return SZ_ERROR_UNSUPPORTED;
62   dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop);
63   props[0] = (Byte)LZMA2_LCLP_MAX;
64   props[1] = (Byte)(dicSize);
65   props[2] = (Byte)(dicSize >> 8);
66   props[3] = (Byte)(dicSize >> 16);
67   props[4] = (Byte)(dicSize >> 24);
68   return SZ_OK;
69 }
70 
Lzma2Dec_AllocateProbs(CLzma2Dec * p,Byte prop,ISzAllocPtr alloc)71 SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
72 {
73   Byte props[LZMA_PROPS_SIZE];
74   RINOK(Lzma2Dec_GetOldProps(prop, props));
75   return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
76 }
77 
Lzma2Dec_Allocate(CLzma2Dec * p,Byte prop,ISzAllocPtr alloc)78 SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
79 {
80   Byte props[LZMA_PROPS_SIZE];
81   RINOK(Lzma2Dec_GetOldProps(prop, props));
82   return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
83 }
84 
Lzma2Dec_Init(CLzma2Dec * p)85 void Lzma2Dec_Init(CLzma2Dec *p)
86 {
87   p->state = LZMA2_STATE_CONTROL;
88   p->needInitLevel = 0xE0;
89   p->isExtraMode = False;
90   p->unpackSize = 0;
91 
92   // p->decoder.dicPos = 0; // we can use it instead of full init
93   LzmaDec_Init(&p->decoder);
94 }
95 
96 // ELzma2State
Lzma2Dec_UpdateState(CLzma2Dec * p,Byte b)97 static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
98 {
99   switch (p->state)
100   {
101     case LZMA2_STATE_CONTROL:
102       p->isExtraMode = False;
103       p->control = b;
104       PRF(printf("\n %8X", (unsigned)p->decoder.dicPos));
105       PRF(printf(" %02X", (unsigned)b));
106       if (b == 0)
107         return LZMA2_STATE_FINISHED;
108       if (LZMA2_IS_UNCOMPRESSED_STATE(p))
109       {
110         if (b == LZMA2_CONTROL_COPY_RESET_DIC)
111           p->needInitLevel = 0xC0;
112         else if (b > 2 || p->needInitLevel == 0xE0)
113           return LZMA2_STATE_ERROR;
114       }
115       else
116       {
117         if (b < p->needInitLevel)
118           return LZMA2_STATE_ERROR;
119         p->needInitLevel = 0;
120         p->unpackSize = (UInt32)(b & 0x1F) << 16;
121       }
122       return LZMA2_STATE_UNPACK0;
123 
124     case LZMA2_STATE_UNPACK0:
125       p->unpackSize |= (UInt32)b << 8;
126       return LZMA2_STATE_UNPACK1;
127 
128     case LZMA2_STATE_UNPACK1:
129       p->unpackSize |= (UInt32)b;
130       p->unpackSize++;
131       PRF(printf(" %7u", (unsigned)p->unpackSize));
132       return LZMA2_IS_UNCOMPRESSED_STATE(p) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0;
133 
134     case LZMA2_STATE_PACK0:
135       p->packSize = (UInt32)b << 8;
136       return LZMA2_STATE_PACK1;
137 
138     case LZMA2_STATE_PACK1:
139       p->packSize |= (UInt32)b;
140       p->packSize++;
141       // if (p->packSize < 5) return LZMA2_STATE_ERROR;
142       PRF(printf(" %5u", (unsigned)p->packSize));
143       return (p->control & 0x40) ? LZMA2_STATE_PROP : LZMA2_STATE_DATA;
144 
145     case LZMA2_STATE_PROP:
146     {
147       unsigned lc, lp;
148       if (b >= (9 * 5 * 5))
149         return LZMA2_STATE_ERROR;
150       lc = b % 9;
151       b /= 9;
152       p->decoder.prop.pb = (Byte)(b / 5);
153       lp = b % 5;
154       if (lc + lp > LZMA2_LCLP_MAX)
155         return LZMA2_STATE_ERROR;
156       p->decoder.prop.lc = (Byte)lc;
157       p->decoder.prop.lp = (Byte)lp;
158       return LZMA2_STATE_DATA;
159     }
160   }
161   return LZMA2_STATE_ERROR;
162 }
163 
LzmaDec_UpdateWithUncompressed(CLzmaDec * p,const Byte * src,SizeT size)164 static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
165 {
166   memcpy(p->dic + p->dicPos, src, size);
167   p->dicPos += size;
168   if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size)
169     p->checkDicSize = p->prop.dicSize;
170   p->processedPos += (UInt32)size;
171 }
172 
173 void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
174 
175 
Lzma2Dec_DecodeToDic(CLzma2Dec * p,SizeT dicLimit,const Byte * src,SizeT * srcLen,ELzmaFinishMode finishMode,ELzmaStatus * status)176 SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
177     const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
178 {
179   SizeT inSize = *srcLen;
180   *srcLen = 0;
181   *status = LZMA_STATUS_NOT_SPECIFIED;
182 
183   while (p->state != LZMA2_STATE_ERROR)
184   {
185     SizeT dicPos;
186 
187     if (p->state == LZMA2_STATE_FINISHED)
188     {
189       *status = LZMA_STATUS_FINISHED_WITH_MARK;
190       return SZ_OK;
191     }
192 
193     dicPos = p->decoder.dicPos;
194 
195     if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY)
196     {
197       *status = LZMA_STATUS_NOT_FINISHED;
198       return SZ_OK;
199     }
200 
201     if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
202     {
203       if (*srcLen == inSize)
204       {
205         *status = LZMA_STATUS_NEEDS_MORE_INPUT;
206         return SZ_OK;
207       }
208       (*srcLen)++;
209       p->state = Lzma2Dec_UpdateState(p, *src++);
210       if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED)
211         break;
212       continue;
213     }
214 
215     {
216       SizeT inCur = inSize - *srcLen;
217       SizeT outCur = dicLimit - dicPos;
218       ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY;
219 
220       if (outCur >= p->unpackSize)
221       {
222         outCur = (SizeT)p->unpackSize;
223         curFinishMode = LZMA_FINISH_END;
224       }
225 
226       if (LZMA2_IS_UNCOMPRESSED_STATE(p))
227       {
228         if (inCur == 0)
229         {
230           *status = LZMA_STATUS_NEEDS_MORE_INPUT;
231           return SZ_OK;
232         }
233 
234         if (p->state == LZMA2_STATE_DATA)
235         {
236           BoolInt initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
237           LzmaDec_InitDicAndState(&p->decoder, initDic, False);
238         }
239 
240         if (inCur > outCur)
241           inCur = outCur;
242         if (inCur == 0)
243           break;
244 
245         LzmaDec_UpdateWithUncompressed(&p->decoder, src, inCur);
246 
247         src += inCur;
248         *srcLen += inCur;
249         p->unpackSize -= (UInt32)inCur;
250         p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
251       }
252       else
253       {
254         SRes res;
255 
256         if (p->state == LZMA2_STATE_DATA)
257         {
258           BoolInt initDic = (p->control >= 0xE0);
259           BoolInt initState = (p->control >= 0xA0);
260           LzmaDec_InitDicAndState(&p->decoder, initDic, initState);
261           p->state = LZMA2_STATE_DATA_CONT;
262         }
263 
264         if (inCur > p->packSize)
265           inCur = (SizeT)p->packSize;
266 
267         res = LzmaDec_DecodeToDic(&p->decoder, dicPos + outCur, src, &inCur, curFinishMode, status);
268 
269         src += inCur;
270         *srcLen += inCur;
271         p->packSize -= (UInt32)inCur;
272         outCur = p->decoder.dicPos - dicPos;
273         p->unpackSize -= (UInt32)outCur;
274 
275         if (res != 0)
276           break;
277 
278         if (*status == LZMA_STATUS_NEEDS_MORE_INPUT)
279         {
280           if (p->packSize == 0)
281             break;
282           return SZ_OK;
283         }
284 
285         if (inCur == 0 && outCur == 0)
286         {
287           if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
288               || p->unpackSize != 0
289               || p->packSize != 0)
290             break;
291           p->state = LZMA2_STATE_CONTROL;
292         }
293 
294         *status = LZMA_STATUS_NOT_SPECIFIED;
295       }
296     }
297   }
298 
299   *status = LZMA_STATUS_NOT_SPECIFIED;
300   p->state = LZMA2_STATE_ERROR;
301   return SZ_ERROR_DATA;
302 }
303 
304 
305 
306 
Lzma2Dec_Parse(CLzma2Dec * p,SizeT outSize,const Byte * src,SizeT * srcLen,int checkFinishBlock)307 ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
308     SizeT outSize,
309     const Byte *src, SizeT *srcLen,
310     int checkFinishBlock)
311 {
312   SizeT inSize = *srcLen;
313   *srcLen = 0;
314 
315   while (p->state != LZMA2_STATE_ERROR)
316   {
317     if (p->state == LZMA2_STATE_FINISHED)
318       return (ELzma2ParseStatus)LZMA_STATUS_FINISHED_WITH_MARK;
319 
320     if (outSize == 0 && !checkFinishBlock)
321       return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
322 
323     if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
324     {
325       if (*srcLen == inSize)
326         return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
327       (*srcLen)++;
328 
329       p->state = Lzma2Dec_UpdateState(p, *src++);
330 
331       if (p->state == LZMA2_STATE_UNPACK0)
332       {
333         // if (p->decoder.dicPos != 0)
334         if (p->control == LZMA2_CONTROL_COPY_RESET_DIC || p->control >= 0xE0)
335           return LZMA2_PARSE_STATUS_NEW_BLOCK;
336         // if (outSize == 0) return LZMA_STATUS_NOT_FINISHED;
337       }
338 
339       // The following code can be commented.
340       // It's not big problem, if we read additional input bytes.
341       // It will be stopped later in LZMA2_STATE_DATA / LZMA2_STATE_DATA_CONT state.
342 
343       if (outSize == 0 && p->state != LZMA2_STATE_FINISHED)
344       {
345         // checkFinishBlock is true. So we expect that block must be finished,
346         // We can return LZMA_STATUS_NOT_SPECIFIED or LZMA_STATUS_NOT_FINISHED here
347         // break;
348         return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
349       }
350 
351       if (p->state == LZMA2_STATE_DATA)
352         return LZMA2_PARSE_STATUS_NEW_CHUNK;
353 
354       continue;
355     }
356 
357     if (outSize == 0)
358       return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
359 
360     {
361       SizeT inCur = inSize - *srcLen;
362 
363       if (LZMA2_IS_UNCOMPRESSED_STATE(p))
364       {
365         if (inCur == 0)
366           return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
367         if (inCur > p->unpackSize)
368           inCur = p->unpackSize;
369         if (inCur > outSize)
370           inCur = outSize;
371         p->decoder.dicPos += inCur;
372         src += inCur;
373         *srcLen += inCur;
374         outSize -= inCur;
375         p->unpackSize -= (UInt32)inCur;
376         p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
377       }
378       else
379       {
380         p->isExtraMode = True;
381 
382         if (inCur == 0)
383         {
384           if (p->packSize != 0)
385             return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
386         }
387         else if (p->state == LZMA2_STATE_DATA)
388         {
389           p->state = LZMA2_STATE_DATA_CONT;
390           if (*src != 0)
391           {
392             // first byte of lzma chunk must be Zero
393             *srcLen += 1;
394             p->packSize--;
395             break;
396           }
397         }
398 
399         if (inCur > p->packSize)
400           inCur = (SizeT)p->packSize;
401 
402         src += inCur;
403         *srcLen += inCur;
404         p->packSize -= (UInt32)inCur;
405 
406         if (p->packSize == 0)
407         {
408           SizeT rem = outSize;
409           if (rem > p->unpackSize)
410             rem = p->unpackSize;
411           p->decoder.dicPos += rem;
412           p->unpackSize -= (UInt32)rem;
413           outSize -= rem;
414           if (p->unpackSize == 0)
415             p->state = LZMA2_STATE_CONTROL;
416         }
417       }
418     }
419   }
420 
421   p->state = LZMA2_STATE_ERROR;
422   return (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED;
423 }
424 
425 
426 
427 
Lzma2Dec_DecodeToBuf(CLzma2Dec * p,Byte * dest,SizeT * destLen,const Byte * src,SizeT * srcLen,ELzmaFinishMode finishMode,ELzmaStatus * status)428 SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
429 {
430   SizeT outSize = *destLen, inSize = *srcLen;
431   *srcLen = *destLen = 0;
432 
433   for (;;)
434   {
435     SizeT inCur = inSize, outCur, dicPos;
436     ELzmaFinishMode curFinishMode;
437     SRes res;
438 
439     if (p->decoder.dicPos == p->decoder.dicBufSize)
440       p->decoder.dicPos = 0;
441     dicPos = p->decoder.dicPos;
442     curFinishMode = LZMA_FINISH_ANY;
443     outCur = p->decoder.dicBufSize - dicPos;
444 
445     if (outCur >= outSize)
446     {
447       outCur = outSize;
448       curFinishMode = finishMode;
449     }
450 
451     res = Lzma2Dec_DecodeToDic(p, dicPos + outCur, src, &inCur, curFinishMode, status);
452 
453     src += inCur;
454     inSize -= inCur;
455     *srcLen += inCur;
456     outCur = p->decoder.dicPos - dicPos;
457     memcpy(dest, p->decoder.dic + dicPos, outCur);
458     dest += outCur;
459     outSize -= outCur;
460     *destLen += outCur;
461     if (res != 0)
462       return res;
463     if (outCur == 0 || outSize == 0)
464       return SZ_OK;
465   }
466 }
467 
468 
Lzma2Decode(Byte * dest,SizeT * destLen,const Byte * src,SizeT * srcLen,Byte prop,ELzmaFinishMode finishMode,ELzmaStatus * status,ISzAllocPtr alloc)469 SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
470     Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc)
471 {
472   CLzma2Dec p;
473   SRes res;
474   SizeT outSize = *destLen, inSize = *srcLen;
475   *destLen = *srcLen = 0;
476   *status = LZMA_STATUS_NOT_SPECIFIED;
477   Lzma2Dec_Construct(&p);
478   RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc));
479   p.decoder.dic = dest;
480   p.decoder.dicBufSize = outSize;
481   Lzma2Dec_Init(&p);
482   *srcLen = inSize;
483   res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
484   *destLen = p.decoder.dicPos;
485   if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
486     res = SZ_ERROR_INPUT_EOF;
487   Lzma2Dec_FreeProbs(&p, alloc);
488   return res;
489 }
490