1 /****************************************************************************
2  *  This file is part of PPMd project                                       *
3  *  Written and distributed to public domain by Dmitry Shkarin 1997,        *
4  *  1999-2000                                                               *
5  *  Contents: model description and encoding/decoding routines              *
6  ****************************************************************************/
7 
8 static const int MAX_O=64; /* maximum allowed model order */
9 const uint TOP=1 << 24, BOT=1 << 15;
10 
11 template <class T>
_PPMD_SWAP(T & t1,T & t2)12 inline void _PPMD_SWAP(T& t1,T& t2) { T tmp=t1; t1=t2; t2=tmp; }
13 
14 
createChild(ModelPPM * Model,RARPPM_STATE * pStats,RARPPM_STATE & FirstState)15 inline RARPPM_CONTEXT* RARPPM_CONTEXT::createChild(ModelPPM *Model,RARPPM_STATE* pStats,
16                                              RARPPM_STATE& FirstState)
17 {
18   RARPPM_CONTEXT* pc = (RARPPM_CONTEXT*) Model->SubAlloc.AllocContext();
19   if ( pc )
20   {
21     pc->NumStats=1;
22     pc->OneState=FirstState;
23     pc->Suffix=this;
24     pStats->Successor=pc;
25   }
26   return pc;
27 }
28 
29 
ModelPPM()30 ModelPPM::ModelPPM()
31 {
32   MinContext=NULL;
33   MaxContext=NULL;
34   MedContext=NULL;
35 }
36 
37 
RestartModelRare()38 void ModelPPM::RestartModelRare()
39 {
40   int i, k, m;
41   memset(CharMask,0,sizeof(CharMask));
42   SubAlloc.InitSubAllocator();
43   InitRL=-(MaxOrder < 12 ? MaxOrder:12)-1;
44   MinContext = MaxContext = (RARPPM_CONTEXT*) SubAlloc.AllocContext();
45   if (MinContext == NULL)
46     throw std::bad_alloc();
47   MinContext->Suffix=NULL;
48   OrderFall=MaxOrder;
49   MinContext->U.SummFreq=(MinContext->NumStats=256)+1;
50   FoundState=MinContext->U.Stats=(RARPPM_STATE*)SubAlloc.AllocUnits(256/2);
51   if (FoundState == NULL)
52     throw std::bad_alloc();
53   for (RunLength=InitRL, PrevSuccess=i=0;i < 256;i++)
54   {
55     MinContext->U.Stats[i].Symbol=i;
56     MinContext->U.Stats[i].Freq=1;
57     MinContext->U.Stats[i].Successor=NULL;
58   }
59 
60   static const ushort InitBinEsc[]={
61     0x3CDD,0x1F3F,0x59BF,0x48F3,0x64A1,0x5ABC,0x6632,0x6051
62   };
63 
64   for (i=0;i < 128;i++)
65     for (k=0;k < 8;k++)
66       for (m=0;m < 64;m += 8)
67         BinSumm[i][k+m]=BIN_SCALE-InitBinEsc[k]/(i+2);
68   for (i=0;i < 25;i++)
69     for (k=0;k < 16;k++)
70       SEE2Cont[i][k].init(5*i+10);
71 }
72 
73 
StartModelRare(int MaxOrder)74 void ModelPPM::StartModelRare(int MaxOrder)
75 {
76   int i, k, m ,Step;
77   EscCount=1;
78 /*
79   if (MaxOrder < 2)
80   {
81     memset(CharMask,0,sizeof(CharMask));
82     OrderFall=ModelPPM::MaxOrder;
83     MinContext=MaxContext;
84     while (MinContext->Suffix != NULL)
85     {
86       MinContext=MinContext->Suffix;
87       OrderFall--;
88     }
89     FoundState=MinContext->U.Stats;
90     MinContext=MaxContext;
91   }
92   else
93 */
94   {
95     ModelPPM::MaxOrder=MaxOrder;
96     RestartModelRare();
97     NS2BSIndx[0]=2*0;
98     NS2BSIndx[1]=2*1;
99     memset(NS2BSIndx+2,2*2,9);
100     memset(NS2BSIndx+11,2*3,256-11);
101     for (i=0;i < 3;i++)
102       NS2Indx[i]=i;
103     for (m=i, k=Step=1;i < 256;i++)
104     {
105       NS2Indx[i]=m;
106       if ( !--k )
107       {
108         k = ++Step;
109         m++;
110       }
111     }
112     memset(HB2Flag,0,0x40);
113     memset(HB2Flag+0x40,0x08,0x100-0x40);
114     DummySEE2Cont.Shift=PERIOD_BITS;
115   }
116 }
117 
118 
rescale(ModelPPM * Model)119 void RARPPM_CONTEXT::rescale(ModelPPM *Model)
120 {
121   int OldNS=NumStats, i=NumStats-1, Adder, EscFreq;
122   RARPPM_STATE* p1, * p;
123   for (p=Model->FoundState;p != U.Stats;p--)
124     _PPMD_SWAP(p[0],p[-1]);
125   U.Stats->Freq += 4;
126   U.SummFreq += 4;
127   EscFreq=U.SummFreq-p->Freq;
128   Adder=(Model->OrderFall != 0);
129   U.SummFreq = (p->Freq=(p->Freq+Adder) >> 1);
130   do
131   {
132     EscFreq -= (++p)->Freq;
133     U.SummFreq += (p->Freq=(p->Freq+Adder) >> 1);
134     if (p[0].Freq > p[-1].Freq)
135     {
136       RARPPM_STATE tmp=*(p1=p);
137       do
138       {
139         p1[0]=p1[-1];
140       } while (--p1 != U.Stats && tmp.Freq > p1[-1].Freq);
141       *p1=tmp;
142     }
143   } while ( --i );
144   if (p->Freq == 0)
145   {
146     do
147     {
148       i++;
149     } while ((--p)->Freq == 0);
150     EscFreq += i;
151     if ((NumStats -= i) == 1)
152     {
153       RARPPM_STATE tmp=*U.Stats;
154       do
155       {
156         tmp.Freq-=(tmp.Freq >> 1);
157         EscFreq>>=1;
158       } while (EscFreq > 1);
159       Model->SubAlloc.FreeUnits(U.Stats,(OldNS+1) >> 1);
160       *(Model->FoundState=&OneState)=tmp;  return;
161     }
162   }
163   U.SummFreq += (EscFreq -= (EscFreq >> 1));
164   int n0=(OldNS+1) >> 1, n1=(NumStats+1) >> 1;
165   if (n0 != n1)
166     U.Stats = (RARPPM_STATE*) Model->SubAlloc.ShrinkUnits(U.Stats,n0,n1);
167   Model->FoundState=U.Stats;
168 }
169 
170 
CreateSuccessors(bool Skip,RARPPM_STATE * p1)171 inline RARPPM_CONTEXT* ModelPPM::CreateSuccessors(bool Skip,RARPPM_STATE* p1)
172 {
173   RARPPM_STATE UpState;
174   RARPPM_CONTEXT* pc=MinContext, * UpBranch=FoundState->Successor;
175   RARPPM_STATE * p, * ps[MAX_O], ** pps=ps;
176   if ( !Skip )
177   {
178     *pps++ = FoundState;
179     if ( !pc->Suffix )
180       goto NO_LOOP;
181   }
182   if ( p1 )
183   {
184     p=p1;
185     pc=pc->Suffix;
186     goto LOOP_ENTRY;
187   }
188   do
189   {
190     pc=pc->Suffix;
191     if (pc->NumStats != 1)
192     {
193       if ((p=pc->U.Stats)->Symbol != FoundState->Symbol)
194         do
195         {
196           p++;
197         } while (p->Symbol != FoundState->Symbol);
198     }
199     else
200       p=&(pc->OneState);
201 LOOP_ENTRY:
202     if (p->Successor != UpBranch)
203     {
204       pc=p->Successor;
205       break;
206 
207     }
208     // We ensure that PPM order input parameter does not exceed MAX_O (64),
209     // so we do not really need this check and added it for extra safety.
210     // See CVE-2017-17969 for details.
211     if (pps>=ps+ASIZE(ps))
212       return NULL;
213 
214     *pps++ = p;
215   } while ( pc->Suffix );
216 NO_LOOP:
217   if (pps == ps)
218     return pc;
219   UpState.Symbol=*(byte*) UpBranch;
220   UpState.Successor=(RARPPM_CONTEXT*) (((byte*) UpBranch)+1);
221   if (pc->NumStats != 1)
222   {
223     if ((byte*) pc <= SubAlloc.pText)
224       return(NULL);
225     if ((p=pc->U.Stats)->Symbol != UpState.Symbol)
226     do
227     {
228       p++;
229     } while (p->Symbol != UpState.Symbol);
230     uint cf=p->Freq-1;
231     uint s0=pc->U.SummFreq-pc->NumStats-cf;
232     UpState.Freq=1+((2*cf <= s0)?(5*cf > s0):((2*cf+3*s0-1)/(2*s0)));
233   }
234   else
235     UpState.Freq=pc->OneState.Freq;
236   do
237   {
238     pc = pc->createChild(this,*--pps,UpState);
239     if ( !pc )
240       return NULL;
241   } while (pps != ps);
242   return pc;
243 }
244 
245 
UpdateModel()246 inline void ModelPPM::UpdateModel()
247 {
248   RARPPM_STATE fs = *FoundState, *p = NULL;
249   RARPPM_CONTEXT *pc, *Successor;
250   uint ns1, ns, cf, sf, s0;
251   if (fs.Freq < MAX_FREQ/4 && (pc=MinContext->Suffix) != NULL)
252   {
253     if (pc->NumStats != 1)
254     {
255       if ((p=pc->U.Stats)->Symbol != fs.Symbol)
256       {
257         do
258         {
259           p++;
260         } while (p->Symbol != fs.Symbol);
261         if (p[0].Freq >= p[-1].Freq)
262         {
263           _PPMD_SWAP(p[0],p[-1]);
264           p--;
265         }
266       }
267       if (p->Freq < MAX_FREQ-9)
268       {
269         p->Freq += 2;
270         pc->U.SummFreq += 2;
271       }
272     }
273     else
274     {
275       p=&(pc->OneState);
276       p->Freq += (p->Freq < 32);
277     }
278   }
279   if ( !OrderFall )
280   {
281     MinContext=MaxContext=FoundState->Successor=CreateSuccessors(TRUE,p);
282     if ( !MinContext )
283       goto RESTART_MODEL;
284     return;
285   }
286   *SubAlloc.pText++ = fs.Symbol;
287   Successor = (RARPPM_CONTEXT*) SubAlloc.pText;
288   if (SubAlloc.pText >= SubAlloc.FakeUnitsStart)
289     goto RESTART_MODEL;
290   if ( fs.Successor )
291   {
292     if ((byte*) fs.Successor <= SubAlloc.pText &&
293         (fs.Successor=CreateSuccessors(FALSE,p)) == NULL)
294       goto RESTART_MODEL;
295     if ( !--OrderFall )
296     {
297       Successor=fs.Successor;
298       SubAlloc.pText -= (MaxContext != MinContext);
299     }
300   }
301   else
302   {
303     FoundState->Successor=Successor;
304     fs.Successor=MinContext;
305   }
306   s0=MinContext->U.SummFreq-(ns=MinContext->NumStats)-(fs.Freq-1);
307   for (pc=MaxContext;pc != MinContext;pc=pc->Suffix)
308   {
309     if ((ns1=pc->NumStats) != 1)
310     {
311       if ((ns1 & 1) == 0)
312       {
313         pc->U.Stats=(RARPPM_STATE*) SubAlloc.ExpandUnits(pc->U.Stats,ns1 >> 1);
314         if ( !pc->U.Stats )
315           goto RESTART_MODEL;
316       }
317       pc->U.SummFreq += (2*ns1 < ns)+2*((4*ns1 <= ns) & (pc->U.SummFreq <= 8*ns1));
318     }
319     else
320     {
321       p=(RARPPM_STATE*) SubAlloc.AllocUnits(1);
322       if ( !p )
323         goto RESTART_MODEL;
324       *p=pc->OneState;
325       pc->U.Stats=p;
326       if (p->Freq < MAX_FREQ/4-1)
327         p->Freq += p->Freq;
328       else
329         p->Freq  = MAX_FREQ-4;
330       pc->U.SummFreq=p->Freq+InitEsc+(ns > 3);
331     }
332     cf=2*fs.Freq*(pc->U.SummFreq+6);
333     sf=s0+pc->U.SummFreq;
334     if (cf < 6*sf)
335     {
336       cf=1+(cf > sf)+(cf >= 4*sf);
337       pc->U.SummFreq += 3;
338     }
339     else
340     {
341       cf=4+(cf >= 9*sf)+(cf >= 12*sf)+(cf >= 15*sf);
342       pc->U.SummFreq += cf;
343     }
344     p=pc->U.Stats+ns1;
345     p->Successor=Successor;
346     p->Symbol = fs.Symbol;
347     p->Freq = cf;
348     pc->NumStats=++ns1;
349   }
350   MaxContext=MinContext=fs.Successor;
351   return;
352 RESTART_MODEL:
353   RestartModelRare();
354   EscCount=0;
355 }
356 
357 
358 // Tabulated escapes for exponential symbol distribution
359 static const byte ExpEscape[16]={ 25,14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
360 #define GET_MEAN(SUMM,SHIFT,ROUND) ((SUMM+(1 << (SHIFT-ROUND))) >> (SHIFT))
361 
362 
363 
decodeBinSymbol(ModelPPM * Model)364 inline void RARPPM_CONTEXT::decodeBinSymbol(ModelPPM *Model)
365 {
366   RARPPM_STATE& rs=OneState;
367   Model->HiBitsFlag=Model->HB2Flag[Model->FoundState->Symbol];
368   ushort& bs=Model->BinSumm[rs.Freq-1][Model->PrevSuccess+
369            Model->NS2BSIndx[Suffix->NumStats-1]+
370            Model->HiBitsFlag+2*Model->HB2Flag[rs.Symbol]+
371            ((Model->RunLength >> 26) & 0x20)];
372   if (Model->Coder.GetCurrentShiftCount(TOT_BITS) < bs)
373   {
374     Model->FoundState=&rs;
375     rs.Freq += (rs.Freq < 128);
376     Model->Coder.SubRange.LowCount=0;
377     Model->Coder.SubRange.HighCount=bs;
378     bs = GET_SHORT16(bs+INTERVAL-GET_MEAN(bs,PERIOD_BITS,2));
379     Model->PrevSuccess=1;
380     Model->RunLength++;
381   }
382   else
383   {
384     Model->Coder.SubRange.LowCount=bs;
385     bs = GET_SHORT16(bs-GET_MEAN(bs,PERIOD_BITS,2));
386     Model->Coder.SubRange.HighCount=BIN_SCALE;
387     Model->InitEsc=ExpEscape[bs >> 10];
388     Model->NumMasked=1;
389     Model->CharMask[rs.Symbol]=Model->EscCount;
390     Model->PrevSuccess=0;
391     Model->FoundState=NULL;
392   }
393 }
394 
395 
update1(ModelPPM * Model,RARPPM_STATE * p)396 inline void RARPPM_CONTEXT::update1(ModelPPM *Model,RARPPM_STATE* p)
397 {
398   (Model->FoundState=p)->Freq += 4;
399   U.SummFreq += 4;
400   if (p[0].Freq > p[-1].Freq)
401   {
402     _PPMD_SWAP(p[0],p[-1]);
403     Model->FoundState=--p;
404     if (p->Freq > MAX_FREQ)
405       rescale(Model);
406   }
407 }
408 
409 
410 
411 
decodeSymbol1(ModelPPM * Model)412 inline bool RARPPM_CONTEXT::decodeSymbol1(ModelPPM *Model)
413 {
414   Model->Coder.SubRange.scale=U.SummFreq;
415   RARPPM_STATE* p=U.Stats;
416   int i, HiCnt;
417   int count=Model->Coder.GetCurrentCount();
418   if (count>=(int)Model->Coder.SubRange.scale)
419     return(false);
420   if (count < (HiCnt=p->Freq))
421   {
422     Model->PrevSuccess=(2*(Model->Coder.SubRange.HighCount=HiCnt) > Model->Coder.SubRange.scale);
423     Model->RunLength += Model->PrevSuccess;
424     (Model->FoundState=p)->Freq=(HiCnt += 4);
425     U.SummFreq += 4;
426     if (HiCnt > MAX_FREQ)
427       rescale(Model);
428     Model->Coder.SubRange.LowCount=0;
429     return(true);
430   }
431   else
432     if (Model->FoundState==NULL)
433       return(false);
434   Model->PrevSuccess=0;
435   i=NumStats-1;
436   while ((HiCnt += (++p)->Freq) <= count)
437     if (--i == 0)
438     {
439       Model->HiBitsFlag=Model->HB2Flag[Model->FoundState->Symbol];
440       Model->Coder.SubRange.LowCount=HiCnt;
441       Model->CharMask[p->Symbol]=Model->EscCount;
442       i=(Model->NumMasked=NumStats)-1;
443       Model->FoundState=NULL;
444       do
445       {
446         Model->CharMask[(--p)->Symbol]=Model->EscCount;
447       } while ( --i );
448       Model->Coder.SubRange.HighCount=Model->Coder.SubRange.scale;
449       return(true);
450     }
451   Model->Coder.SubRange.LowCount=(Model->Coder.SubRange.HighCount=HiCnt)-p->Freq;
452   update1(Model,p);
453   return(true);
454 }
455 
456 
update2(ModelPPM * Model,RARPPM_STATE * p)457 inline void RARPPM_CONTEXT::update2(ModelPPM *Model,RARPPM_STATE* p)
458 {
459   (Model->FoundState=p)->Freq += 4;
460   U.SummFreq += 4;
461   if (p->Freq > MAX_FREQ)
462     rescale(Model);
463   Model->EscCount++;
464   Model->RunLength=Model->InitRL;
465 }
466 
467 
makeEscFreq2(ModelPPM * Model,int Diff)468 inline RARPPM_SEE2_CONTEXT* RARPPM_CONTEXT::makeEscFreq2(ModelPPM *Model,int Diff)
469 {
470   RARPPM_SEE2_CONTEXT* psee2c;
471   if (NumStats != 256)
472   {
473     psee2c=Model->SEE2Cont[Model->NS2Indx[Diff-1]]+
474            (Diff < Suffix->NumStats-NumStats)+
475            2*(U.SummFreq < 11*NumStats)+4*(Model->NumMasked > Diff)+
476            Model->HiBitsFlag;
477     Model->Coder.SubRange.scale=psee2c->getMean();
478   }
479   else
480   {
481     psee2c=&Model->DummySEE2Cont;
482     Model->Coder.SubRange.scale=1;
483   }
484   return psee2c;
485 }
486 
487 
488 
489 
decodeSymbol2(ModelPPM * Model)490 inline bool RARPPM_CONTEXT::decodeSymbol2(ModelPPM *Model)
491 {
492   int count, HiCnt, i=NumStats-Model->NumMasked;
493   RARPPM_SEE2_CONTEXT* psee2c=makeEscFreq2(Model,i);
494   RARPPM_STATE* ps[256], ** pps=ps, * p=U.Stats-1;
495   HiCnt=0;
496   do
497   {
498     do
499     {
500       p++;
501     } while (Model->CharMask[p->Symbol] == Model->EscCount);
502     HiCnt += p->Freq;
503 
504     // We do not reuse PPMd coder in unstable state, so we do not really need
505     // this check and added it for extra safety. See CVE-2017-17969 for details.
506     if (pps>=ps+ASIZE(ps))
507       return false;
508 
509     *pps++ = p;
510   } while ( --i );
511   Model->Coder.SubRange.scale += HiCnt;
512   count=Model->Coder.GetCurrentCount();
513   if (count>=(int)Model->Coder.SubRange.scale)
514     return(false);
515   p=*(pps=ps);
516   if (count < HiCnt)
517   {
518     HiCnt=0;
519     while ((HiCnt += p->Freq) <= count)
520     {
521       pps++;
522       if (pps>=ps+ASIZE(ps)) // Extra safety check.
523         return false;
524       p=*pps;
525     }
526     Model->Coder.SubRange.LowCount = (Model->Coder.SubRange.HighCount=HiCnt)-p->Freq;
527     psee2c->update();
528     update2(Model,p);
529   }
530   else
531   {
532     Model->Coder.SubRange.LowCount=HiCnt;
533     Model->Coder.SubRange.HighCount=Model->Coder.SubRange.scale;
534     i=NumStats-Model->NumMasked;
535     pps--;
536     do
537     {
538       pps++;
539       if (pps>=ps+ASIZE(ps)) // Extra safety check.
540         return false;
541       Model->CharMask[(*pps)->Symbol]=Model->EscCount;
542     } while ( --i );
543     psee2c->Summ += Model->Coder.SubRange.scale;
544     Model->NumMasked = NumStats;
545   }
546   return true;
547 }
548 
549 
ClearMask()550 inline void ModelPPM::ClearMask()
551 {
552   EscCount=1;
553   memset(CharMask,0,sizeof(CharMask));
554 }
555 
556 
557 
558 
559 // reset PPM variables after data error allowing safe resuming
560 // of further data processing
CleanUp()561 void ModelPPM::CleanUp()
562 {
563   SubAlloc.StopSubAllocator();
564   SubAlloc.StartSubAllocator(1);
565   StartModelRare(2);
566 }
567 
568 
DecodeInit(Unpack * UnpackRead,int & EscChar)569 bool ModelPPM::DecodeInit(Unpack *UnpackRead,int &EscChar)
570 {
571   int MaxOrder=UnpackRead->GetChar();
572   bool Reset=(MaxOrder & 0x20)!=0;
573 
574   int MaxMB;
575   if (Reset)
576     MaxMB=UnpackRead->GetChar();
577   else
578     if (SubAlloc.GetAllocatedMemory()==0)
579       return(false);
580   if (MaxOrder & 0x40)
581     EscChar=UnpackRead->GetChar();
582   Coder.InitDecoder(UnpackRead);
583   if (Reset)
584   {
585     MaxOrder=(MaxOrder & 0x1f)+1;
586     if (MaxOrder>16)
587       MaxOrder=16+(MaxOrder-16)*3;
588     if (MaxOrder==1)
589     {
590       SubAlloc.StopSubAllocator();
591       return(false);
592     }
593     SubAlloc.StartSubAllocator(MaxMB+1);
594     StartModelRare(MaxOrder);
595   }
596   return(MinContext!=NULL);
597 }
598 
599 
DecodeChar()600 int ModelPPM::DecodeChar()
601 {
602   if ((byte*)MinContext <= SubAlloc.pText || (byte*)MinContext>SubAlloc.HeapEnd)
603     return(-1);
604   if (MinContext->NumStats != 1)
605   {
606     if ((byte*)MinContext->U.Stats <= SubAlloc.pText || (byte*)MinContext->U.Stats>SubAlloc.HeapEnd)
607       return(-1);
608     if (!MinContext->decodeSymbol1(this))
609       return(-1);
610   }
611   else
612     MinContext->decodeBinSymbol(this);
613   Coder.Decode();
614   while ( !FoundState )
615   {
616     ARI_DEC_NORMALIZE(Coder.code,Coder.low,Coder.range,Coder.UnpackRead);
617     do
618     {
619       OrderFall++;
620       MinContext=MinContext->Suffix;
621       if ((byte*)MinContext <= SubAlloc.pText || (byte*)MinContext>SubAlloc.HeapEnd)
622         return(-1);
623     } while (MinContext->NumStats == NumMasked);
624     if (!MinContext->decodeSymbol2(this))
625       return(-1);
626     Coder.Decode();
627   }
628   int Symbol=FoundState->Symbol;
629   if (!OrderFall && (byte*) FoundState->Successor > SubAlloc.pText)
630     MinContext=MaxContext=FoundState->Successor;
631   else
632   {
633     UpdateModel();
634     if (EscCount == 0)
635       ClearMask();
636   }
637   ARI_DEC_NORMALIZE(Coder.code,Coder.low,Coder.range,Coder.UnpackRead);
638   return(Symbol);
639 }
640