1 /****************************************************************************
2 * This file is part of PPMd project *
3 * Written and distributed to public domain by Dmitry Shkarin 1997, *
4 * 1999-2000 *
5 * Contents: model description and encoding/decoding routines *
6 ****************************************************************************/
7
8 static const int MAX_O=64; /* maximum allowed model order */
9 const uint TOP=1 << 24, BOT=1 << 15;
10
11 template <class T>
_PPMD_SWAP(T & t1,T & t2)12 inline void _PPMD_SWAP(T& t1,T& t2) { T tmp=t1; t1=t2; t2=tmp; }
13
14
createChild(ModelPPM * Model,RARPPM_STATE * pStats,RARPPM_STATE & FirstState)15 inline RARPPM_CONTEXT* RARPPM_CONTEXT::createChild(ModelPPM *Model,RARPPM_STATE* pStats,
16 RARPPM_STATE& FirstState)
17 {
18 RARPPM_CONTEXT* pc = (RARPPM_CONTEXT*) Model->SubAlloc.AllocContext();
19 if ( pc )
20 {
21 pc->NumStats=1;
22 pc->OneState=FirstState;
23 pc->Suffix=this;
24 pStats->Successor=pc;
25 }
26 return pc;
27 }
28
29
ModelPPM()30 ModelPPM::ModelPPM()
31 {
32 MinContext=NULL;
33 MaxContext=NULL;
34 MedContext=NULL;
35 }
36
37
RestartModelRare()38 void ModelPPM::RestartModelRare()
39 {
40 int i, k, m;
41 memset(CharMask,0,sizeof(CharMask));
42 SubAlloc.InitSubAllocator();
43 InitRL=-(MaxOrder < 12 ? MaxOrder:12)-1;
44 MinContext = MaxContext = (RARPPM_CONTEXT*) SubAlloc.AllocContext();
45 if (MinContext == NULL)
46 throw std::bad_alloc();
47 MinContext->Suffix=NULL;
48 OrderFall=MaxOrder;
49 MinContext->U.SummFreq=(MinContext->NumStats=256)+1;
50 FoundState=MinContext->U.Stats=(RARPPM_STATE*)SubAlloc.AllocUnits(256/2);
51 if (FoundState == NULL)
52 throw std::bad_alloc();
53 for (RunLength=InitRL, PrevSuccess=i=0;i < 256;i++)
54 {
55 MinContext->U.Stats[i].Symbol=i;
56 MinContext->U.Stats[i].Freq=1;
57 MinContext->U.Stats[i].Successor=NULL;
58 }
59
60 static const ushort InitBinEsc[]={
61 0x3CDD,0x1F3F,0x59BF,0x48F3,0x64A1,0x5ABC,0x6632,0x6051
62 };
63
64 for (i=0;i < 128;i++)
65 for (k=0;k < 8;k++)
66 for (m=0;m < 64;m += 8)
67 BinSumm[i][k+m]=BIN_SCALE-InitBinEsc[k]/(i+2);
68 for (i=0;i < 25;i++)
69 for (k=0;k < 16;k++)
70 SEE2Cont[i][k].init(5*i+10);
71 }
72
73
StartModelRare(int MaxOrder)74 void ModelPPM::StartModelRare(int MaxOrder)
75 {
76 int i, k, m ,Step;
77 EscCount=1;
78 /*
79 if (MaxOrder < 2)
80 {
81 memset(CharMask,0,sizeof(CharMask));
82 OrderFall=ModelPPM::MaxOrder;
83 MinContext=MaxContext;
84 while (MinContext->Suffix != NULL)
85 {
86 MinContext=MinContext->Suffix;
87 OrderFall--;
88 }
89 FoundState=MinContext->U.Stats;
90 MinContext=MaxContext;
91 }
92 else
93 */
94 {
95 ModelPPM::MaxOrder=MaxOrder;
96 RestartModelRare();
97 NS2BSIndx[0]=2*0;
98 NS2BSIndx[1]=2*1;
99 memset(NS2BSIndx+2,2*2,9);
100 memset(NS2BSIndx+11,2*3,256-11);
101 for (i=0;i < 3;i++)
102 NS2Indx[i]=i;
103 for (m=i, k=Step=1;i < 256;i++)
104 {
105 NS2Indx[i]=m;
106 if ( !--k )
107 {
108 k = ++Step;
109 m++;
110 }
111 }
112 memset(HB2Flag,0,0x40);
113 memset(HB2Flag+0x40,0x08,0x100-0x40);
114 DummySEE2Cont.Shift=PERIOD_BITS;
115 }
116 }
117
118
rescale(ModelPPM * Model)119 void RARPPM_CONTEXT::rescale(ModelPPM *Model)
120 {
121 int OldNS=NumStats, i=NumStats-1, Adder, EscFreq;
122 RARPPM_STATE* p1, * p;
123 for (p=Model->FoundState;p != U.Stats;p--)
124 _PPMD_SWAP(p[0],p[-1]);
125 U.Stats->Freq += 4;
126 U.SummFreq += 4;
127 EscFreq=U.SummFreq-p->Freq;
128 Adder=(Model->OrderFall != 0);
129 U.SummFreq = (p->Freq=(p->Freq+Adder) >> 1);
130 do
131 {
132 EscFreq -= (++p)->Freq;
133 U.SummFreq += (p->Freq=(p->Freq+Adder) >> 1);
134 if (p[0].Freq > p[-1].Freq)
135 {
136 RARPPM_STATE tmp=*(p1=p);
137 do
138 {
139 p1[0]=p1[-1];
140 } while (--p1 != U.Stats && tmp.Freq > p1[-1].Freq);
141 *p1=tmp;
142 }
143 } while ( --i );
144 if (p->Freq == 0)
145 {
146 do
147 {
148 i++;
149 } while ((--p)->Freq == 0);
150 EscFreq += i;
151 if ((NumStats -= i) == 1)
152 {
153 RARPPM_STATE tmp=*U.Stats;
154 do
155 {
156 tmp.Freq-=(tmp.Freq >> 1);
157 EscFreq>>=1;
158 } while (EscFreq > 1);
159 Model->SubAlloc.FreeUnits(U.Stats,(OldNS+1) >> 1);
160 *(Model->FoundState=&OneState)=tmp; return;
161 }
162 }
163 U.SummFreq += (EscFreq -= (EscFreq >> 1));
164 int n0=(OldNS+1) >> 1, n1=(NumStats+1) >> 1;
165 if (n0 != n1)
166 U.Stats = (RARPPM_STATE*) Model->SubAlloc.ShrinkUnits(U.Stats,n0,n1);
167 Model->FoundState=U.Stats;
168 }
169
170
CreateSuccessors(bool Skip,RARPPM_STATE * p1)171 inline RARPPM_CONTEXT* ModelPPM::CreateSuccessors(bool Skip,RARPPM_STATE* p1)
172 {
173 RARPPM_STATE UpState;
174 RARPPM_CONTEXT* pc=MinContext, * UpBranch=FoundState->Successor;
175 RARPPM_STATE * p, * ps[MAX_O], ** pps=ps;
176 if ( !Skip )
177 {
178 *pps++ = FoundState;
179 if ( !pc->Suffix )
180 goto NO_LOOP;
181 }
182 if ( p1 )
183 {
184 p=p1;
185 pc=pc->Suffix;
186 goto LOOP_ENTRY;
187 }
188 do
189 {
190 pc=pc->Suffix;
191 if (pc->NumStats != 1)
192 {
193 if ((p=pc->U.Stats)->Symbol != FoundState->Symbol)
194 do
195 {
196 p++;
197 } while (p->Symbol != FoundState->Symbol);
198 }
199 else
200 p=&(pc->OneState);
201 LOOP_ENTRY:
202 if (p->Successor != UpBranch)
203 {
204 pc=p->Successor;
205 break;
206
207 }
208 // We ensure that PPM order input parameter does not exceed MAX_O (64),
209 // so we do not really need this check and added it for extra safety.
210 // See CVE-2017-17969 for details.
211 if (pps>=ps+ASIZE(ps))
212 return NULL;
213
214 *pps++ = p;
215 } while ( pc->Suffix );
216 NO_LOOP:
217 if (pps == ps)
218 return pc;
219 UpState.Symbol=*(byte*) UpBranch;
220 UpState.Successor=(RARPPM_CONTEXT*) (((byte*) UpBranch)+1);
221 if (pc->NumStats != 1)
222 {
223 if ((byte*) pc <= SubAlloc.pText)
224 return(NULL);
225 if ((p=pc->U.Stats)->Symbol != UpState.Symbol)
226 do
227 {
228 p++;
229 } while (p->Symbol != UpState.Symbol);
230 uint cf=p->Freq-1;
231 uint s0=pc->U.SummFreq-pc->NumStats-cf;
232 UpState.Freq=1+((2*cf <= s0)?(5*cf > s0):((2*cf+3*s0-1)/(2*s0)));
233 }
234 else
235 UpState.Freq=pc->OneState.Freq;
236 do
237 {
238 pc = pc->createChild(this,*--pps,UpState);
239 if ( !pc )
240 return NULL;
241 } while (pps != ps);
242 return pc;
243 }
244
245
UpdateModel()246 inline void ModelPPM::UpdateModel()
247 {
248 RARPPM_STATE fs = *FoundState, *p = NULL;
249 RARPPM_CONTEXT *pc, *Successor;
250 uint ns1, ns, cf, sf, s0;
251 if (fs.Freq < MAX_FREQ/4 && (pc=MinContext->Suffix) != NULL)
252 {
253 if (pc->NumStats != 1)
254 {
255 if ((p=pc->U.Stats)->Symbol != fs.Symbol)
256 {
257 do
258 {
259 p++;
260 } while (p->Symbol != fs.Symbol);
261 if (p[0].Freq >= p[-1].Freq)
262 {
263 _PPMD_SWAP(p[0],p[-1]);
264 p--;
265 }
266 }
267 if (p->Freq < MAX_FREQ-9)
268 {
269 p->Freq += 2;
270 pc->U.SummFreq += 2;
271 }
272 }
273 else
274 {
275 p=&(pc->OneState);
276 p->Freq += (p->Freq < 32);
277 }
278 }
279 if ( !OrderFall )
280 {
281 MinContext=MaxContext=FoundState->Successor=CreateSuccessors(TRUE,p);
282 if ( !MinContext )
283 goto RESTART_MODEL;
284 return;
285 }
286 *SubAlloc.pText++ = fs.Symbol;
287 Successor = (RARPPM_CONTEXT*) SubAlloc.pText;
288 if (SubAlloc.pText >= SubAlloc.FakeUnitsStart)
289 goto RESTART_MODEL;
290 if ( fs.Successor )
291 {
292 if ((byte*) fs.Successor <= SubAlloc.pText &&
293 (fs.Successor=CreateSuccessors(FALSE,p)) == NULL)
294 goto RESTART_MODEL;
295 if ( !--OrderFall )
296 {
297 Successor=fs.Successor;
298 SubAlloc.pText -= (MaxContext != MinContext);
299 }
300 }
301 else
302 {
303 FoundState->Successor=Successor;
304 fs.Successor=MinContext;
305 }
306 s0=MinContext->U.SummFreq-(ns=MinContext->NumStats)-(fs.Freq-1);
307 for (pc=MaxContext;pc != MinContext;pc=pc->Suffix)
308 {
309 if ((ns1=pc->NumStats) != 1)
310 {
311 if ((ns1 & 1) == 0)
312 {
313 pc->U.Stats=(RARPPM_STATE*) SubAlloc.ExpandUnits(pc->U.Stats,ns1 >> 1);
314 if ( !pc->U.Stats )
315 goto RESTART_MODEL;
316 }
317 pc->U.SummFreq += (2*ns1 < ns)+2*((4*ns1 <= ns) & (pc->U.SummFreq <= 8*ns1));
318 }
319 else
320 {
321 p=(RARPPM_STATE*) SubAlloc.AllocUnits(1);
322 if ( !p )
323 goto RESTART_MODEL;
324 *p=pc->OneState;
325 pc->U.Stats=p;
326 if (p->Freq < MAX_FREQ/4-1)
327 p->Freq += p->Freq;
328 else
329 p->Freq = MAX_FREQ-4;
330 pc->U.SummFreq=p->Freq+InitEsc+(ns > 3);
331 }
332 cf=2*fs.Freq*(pc->U.SummFreq+6);
333 sf=s0+pc->U.SummFreq;
334 if (cf < 6*sf)
335 {
336 cf=1+(cf > sf)+(cf >= 4*sf);
337 pc->U.SummFreq += 3;
338 }
339 else
340 {
341 cf=4+(cf >= 9*sf)+(cf >= 12*sf)+(cf >= 15*sf);
342 pc->U.SummFreq += cf;
343 }
344 p=pc->U.Stats+ns1;
345 p->Successor=Successor;
346 p->Symbol = fs.Symbol;
347 p->Freq = cf;
348 pc->NumStats=++ns1;
349 }
350 MaxContext=MinContext=fs.Successor;
351 return;
352 RESTART_MODEL:
353 RestartModelRare();
354 EscCount=0;
355 }
356
357
358 // Tabulated escapes for exponential symbol distribution
359 static const byte ExpEscape[16]={ 25,14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
360 #define GET_MEAN(SUMM,SHIFT,ROUND) ((SUMM+(1 << (SHIFT-ROUND))) >> (SHIFT))
361
362
363
decodeBinSymbol(ModelPPM * Model)364 inline void RARPPM_CONTEXT::decodeBinSymbol(ModelPPM *Model)
365 {
366 RARPPM_STATE& rs=OneState;
367 Model->HiBitsFlag=Model->HB2Flag[Model->FoundState->Symbol];
368 ushort& bs=Model->BinSumm[rs.Freq-1][Model->PrevSuccess+
369 Model->NS2BSIndx[Suffix->NumStats-1]+
370 Model->HiBitsFlag+2*Model->HB2Flag[rs.Symbol]+
371 ((Model->RunLength >> 26) & 0x20)];
372 if (Model->Coder.GetCurrentShiftCount(TOT_BITS) < bs)
373 {
374 Model->FoundState=&rs;
375 rs.Freq += (rs.Freq < 128);
376 Model->Coder.SubRange.LowCount=0;
377 Model->Coder.SubRange.HighCount=bs;
378 bs = GET_SHORT16(bs+INTERVAL-GET_MEAN(bs,PERIOD_BITS,2));
379 Model->PrevSuccess=1;
380 Model->RunLength++;
381 }
382 else
383 {
384 Model->Coder.SubRange.LowCount=bs;
385 bs = GET_SHORT16(bs-GET_MEAN(bs,PERIOD_BITS,2));
386 Model->Coder.SubRange.HighCount=BIN_SCALE;
387 Model->InitEsc=ExpEscape[bs >> 10];
388 Model->NumMasked=1;
389 Model->CharMask[rs.Symbol]=Model->EscCount;
390 Model->PrevSuccess=0;
391 Model->FoundState=NULL;
392 }
393 }
394
395
update1(ModelPPM * Model,RARPPM_STATE * p)396 inline void RARPPM_CONTEXT::update1(ModelPPM *Model,RARPPM_STATE* p)
397 {
398 (Model->FoundState=p)->Freq += 4;
399 U.SummFreq += 4;
400 if (p[0].Freq > p[-1].Freq)
401 {
402 _PPMD_SWAP(p[0],p[-1]);
403 Model->FoundState=--p;
404 if (p->Freq > MAX_FREQ)
405 rescale(Model);
406 }
407 }
408
409
410
411
decodeSymbol1(ModelPPM * Model)412 inline bool RARPPM_CONTEXT::decodeSymbol1(ModelPPM *Model)
413 {
414 Model->Coder.SubRange.scale=U.SummFreq;
415 RARPPM_STATE* p=U.Stats;
416 int i, HiCnt;
417 int count=Model->Coder.GetCurrentCount();
418 if (count>=(int)Model->Coder.SubRange.scale)
419 return(false);
420 if (count < (HiCnt=p->Freq))
421 {
422 Model->PrevSuccess=(2*(Model->Coder.SubRange.HighCount=HiCnt) > Model->Coder.SubRange.scale);
423 Model->RunLength += Model->PrevSuccess;
424 (Model->FoundState=p)->Freq=(HiCnt += 4);
425 U.SummFreq += 4;
426 if (HiCnt > MAX_FREQ)
427 rescale(Model);
428 Model->Coder.SubRange.LowCount=0;
429 return(true);
430 }
431 else
432 if (Model->FoundState==NULL)
433 return(false);
434 Model->PrevSuccess=0;
435 i=NumStats-1;
436 while ((HiCnt += (++p)->Freq) <= count)
437 if (--i == 0)
438 {
439 Model->HiBitsFlag=Model->HB2Flag[Model->FoundState->Symbol];
440 Model->Coder.SubRange.LowCount=HiCnt;
441 Model->CharMask[p->Symbol]=Model->EscCount;
442 i=(Model->NumMasked=NumStats)-1;
443 Model->FoundState=NULL;
444 do
445 {
446 Model->CharMask[(--p)->Symbol]=Model->EscCount;
447 } while ( --i );
448 Model->Coder.SubRange.HighCount=Model->Coder.SubRange.scale;
449 return(true);
450 }
451 Model->Coder.SubRange.LowCount=(Model->Coder.SubRange.HighCount=HiCnt)-p->Freq;
452 update1(Model,p);
453 return(true);
454 }
455
456
update2(ModelPPM * Model,RARPPM_STATE * p)457 inline void RARPPM_CONTEXT::update2(ModelPPM *Model,RARPPM_STATE* p)
458 {
459 (Model->FoundState=p)->Freq += 4;
460 U.SummFreq += 4;
461 if (p->Freq > MAX_FREQ)
462 rescale(Model);
463 Model->EscCount++;
464 Model->RunLength=Model->InitRL;
465 }
466
467
makeEscFreq2(ModelPPM * Model,int Diff)468 inline RARPPM_SEE2_CONTEXT* RARPPM_CONTEXT::makeEscFreq2(ModelPPM *Model,int Diff)
469 {
470 RARPPM_SEE2_CONTEXT* psee2c;
471 if (NumStats != 256)
472 {
473 psee2c=Model->SEE2Cont[Model->NS2Indx[Diff-1]]+
474 (Diff < Suffix->NumStats-NumStats)+
475 2*(U.SummFreq < 11*NumStats)+4*(Model->NumMasked > Diff)+
476 Model->HiBitsFlag;
477 Model->Coder.SubRange.scale=psee2c->getMean();
478 }
479 else
480 {
481 psee2c=&Model->DummySEE2Cont;
482 Model->Coder.SubRange.scale=1;
483 }
484 return psee2c;
485 }
486
487
488
489
decodeSymbol2(ModelPPM * Model)490 inline bool RARPPM_CONTEXT::decodeSymbol2(ModelPPM *Model)
491 {
492 int count, HiCnt, i=NumStats-Model->NumMasked;
493 RARPPM_SEE2_CONTEXT* psee2c=makeEscFreq2(Model,i);
494 RARPPM_STATE* ps[256], ** pps=ps, * p=U.Stats-1;
495 HiCnt=0;
496 do
497 {
498 do
499 {
500 p++;
501 } while (Model->CharMask[p->Symbol] == Model->EscCount);
502 HiCnt += p->Freq;
503
504 // We do not reuse PPMd coder in unstable state, so we do not really need
505 // this check and added it for extra safety. See CVE-2017-17969 for details.
506 if (pps>=ps+ASIZE(ps))
507 return false;
508
509 *pps++ = p;
510 } while ( --i );
511 Model->Coder.SubRange.scale += HiCnt;
512 count=Model->Coder.GetCurrentCount();
513 if (count>=(int)Model->Coder.SubRange.scale)
514 return(false);
515 p=*(pps=ps);
516 if (count < HiCnt)
517 {
518 HiCnt=0;
519 while ((HiCnt += p->Freq) <= count)
520 {
521 pps++;
522 if (pps>=ps+ASIZE(ps)) // Extra safety check.
523 return false;
524 p=*pps;
525 }
526 Model->Coder.SubRange.LowCount = (Model->Coder.SubRange.HighCount=HiCnt)-p->Freq;
527 psee2c->update();
528 update2(Model,p);
529 }
530 else
531 {
532 Model->Coder.SubRange.LowCount=HiCnt;
533 Model->Coder.SubRange.HighCount=Model->Coder.SubRange.scale;
534 i=NumStats-Model->NumMasked;
535 pps--;
536 do
537 {
538 pps++;
539 if (pps>=ps+ASIZE(ps)) // Extra safety check.
540 return false;
541 Model->CharMask[(*pps)->Symbol]=Model->EscCount;
542 } while ( --i );
543 psee2c->Summ += Model->Coder.SubRange.scale;
544 Model->NumMasked = NumStats;
545 }
546 return true;
547 }
548
549
ClearMask()550 inline void ModelPPM::ClearMask()
551 {
552 EscCount=1;
553 memset(CharMask,0,sizeof(CharMask));
554 }
555
556
557
558
559 // reset PPM variables after data error allowing safe resuming
560 // of further data processing
CleanUp()561 void ModelPPM::CleanUp()
562 {
563 SubAlloc.StopSubAllocator();
564 SubAlloc.StartSubAllocator(1);
565 StartModelRare(2);
566 }
567
568
DecodeInit(Unpack * UnpackRead,int & EscChar)569 bool ModelPPM::DecodeInit(Unpack *UnpackRead,int &EscChar)
570 {
571 int MaxOrder=UnpackRead->GetChar();
572 bool Reset=(MaxOrder & 0x20)!=0;
573
574 int MaxMB;
575 if (Reset)
576 MaxMB=UnpackRead->GetChar();
577 else
578 if (SubAlloc.GetAllocatedMemory()==0)
579 return(false);
580 if (MaxOrder & 0x40)
581 EscChar=UnpackRead->GetChar();
582 Coder.InitDecoder(UnpackRead);
583 if (Reset)
584 {
585 MaxOrder=(MaxOrder & 0x1f)+1;
586 if (MaxOrder>16)
587 MaxOrder=16+(MaxOrder-16)*3;
588 if (MaxOrder==1)
589 {
590 SubAlloc.StopSubAllocator();
591 return(false);
592 }
593 SubAlloc.StartSubAllocator(MaxMB+1);
594 StartModelRare(MaxOrder);
595 }
596 return(MinContext!=NULL);
597 }
598
599
DecodeChar()600 int ModelPPM::DecodeChar()
601 {
602 if ((byte*)MinContext <= SubAlloc.pText || (byte*)MinContext>SubAlloc.HeapEnd)
603 return(-1);
604 if (MinContext->NumStats != 1)
605 {
606 if ((byte*)MinContext->U.Stats <= SubAlloc.pText || (byte*)MinContext->U.Stats>SubAlloc.HeapEnd)
607 return(-1);
608 if (!MinContext->decodeSymbol1(this))
609 return(-1);
610 }
611 else
612 MinContext->decodeBinSymbol(this);
613 Coder.Decode();
614 while ( !FoundState )
615 {
616 ARI_DEC_NORMALIZE(Coder.code,Coder.low,Coder.range,Coder.UnpackRead);
617 do
618 {
619 OrderFall++;
620 MinContext=MinContext->Suffix;
621 if ((byte*)MinContext <= SubAlloc.pText || (byte*)MinContext>SubAlloc.HeapEnd)
622 return(-1);
623 } while (MinContext->NumStats == NumMasked);
624 if (!MinContext->decodeSymbol2(this))
625 return(-1);
626 Coder.Decode();
627 }
628 int Symbol=FoundState->Symbol;
629 if (!OrderFall && (byte*) FoundState->Successor > SubAlloc.pText)
630 MinContext=MaxContext=FoundState->Successor;
631 else
632 {
633 UpdateModel();
634 if (EscCount == 0)
635 ClearMask();
636 }
637 ARI_DEC_NORMALIZE(Coder.code,Coder.low,Coder.range,Coder.UnpackRead);
638 return(Symbol);
639 }
640