1 // DeflateEncoder.cpp
2 
3 #include "StdAfx.h"
4 
5 #include "../../../C/Alloc.h"
6 #include "../../../C/HuffEnc.h"
7 
8 #include "../../Common/ComTry.h"
9 
10 #include "../Common/CWrappers.h"
11 
12 #include "DeflateEncoder.h"
13 
14 #undef NO_INLINE
15 
16 #ifdef _MSC_VER
17 #define NO_INLINE MY_NO_INLINE
18 #else
19 #define NO_INLINE
20 #endif
21 
22 namespace NCompress {
23 namespace NDeflate {
24 namespace NEncoder {
25 
26 static const unsigned kNumDivPassesMax = 10; // [0, 16); ratio/speed/ram tradeoff; use big value for better compression ratio.
27 static const UInt32 kNumTables = (1 << kNumDivPassesMax);
28 
29 static const UInt32 kFixedHuffmanCodeBlockSizeMax = (1 << 8); // [0, (1 << 32)); ratio/speed tradeoff; use big value for better compression ratio.
30 static const UInt32 kDivideCodeBlockSizeMin = (1 << 7); // [1, (1 << 32)); ratio/speed tradeoff; use small value for better compression ratio.
31 static const UInt32 kDivideBlockSizeMin = (1 << 6); // [1, (1 << 32)); ratio/speed tradeoff; use small value for better compression ratio.
32 
33 static const UInt32 kMaxUncompressedBlockSize = ((1 << 16) - 1) * 1; // [1, (1 << 32))
34 static const UInt32 kMatchArraySize = kMaxUncompressedBlockSize * 10; // [kMatchMaxLen * 2, (1 << 32))
35 static const UInt32 kMatchArrayLimit = kMatchArraySize - kMatchMaxLen * 4 * sizeof(UInt16);
36 static const UInt32 kBlockUncompressedSizeThreshold = kMaxUncompressedBlockSize -
37     kMatchMaxLen - kNumOpts;
38 
39 // static const unsigned kMaxCodeBitLength = 11;
40 static const unsigned kMaxLevelBitLength = 7;
41 
42 static const Byte kNoLiteralStatPrice = 11;
43 static const Byte kNoLenStatPrice = 11;
44 static const Byte kNoPosStatPrice = 6;
45 
46 static Byte g_LenSlots[kNumLenSymbolsMax];
47 
48 #define kNumLogBits 9    // do not change it
49 static Byte g_FastPos[1 << kNumLogBits];
50 
51 class CFastPosInit
52 {
53 public:
CFastPosInit()54   CFastPosInit()
55   {
56     unsigned i;
57     for (i = 0; i < kNumLenSlots; i++)
58     {
59       unsigned c = kLenStart32[i];
60       unsigned j = 1 << kLenDirectBits32[i];
61       for (unsigned k = 0; k < j; k++, c++)
62         g_LenSlots[c] = (Byte)i;
63     }
64 
65     const unsigned kFastSlots = kNumLogBits * 2;
66     unsigned c = 0;
67     for (Byte slotFast = 0; slotFast < kFastSlots; slotFast++)
68     {
69       UInt32 k = (1 << kDistDirectBits[slotFast]);
70       for (UInt32 j = 0; j < k; j++, c++)
71         g_FastPos[c] = slotFast;
72     }
73   }
74 };
75 
76 static CFastPosInit g_FastPosInit;
77 
GetPosSlot(UInt32 pos)78 inline UInt32 GetPosSlot(UInt32 pos)
79 {
80   /*
81   if (pos < 0x200)
82     return g_FastPos[pos];
83   return g_FastPos[pos >> 8] + 16;
84   */
85   // const unsigned zz = (pos < ((UInt32)1 << (kNumLogBits))) ? 0 : 8;
86   /*
87   const unsigned zz = (kNumLogBits - 1) &
88       ((UInt32)0 - (((((UInt32)1 << kNumLogBits) - 1) - pos) >> 31));
89   */
90   const unsigned zz = (kNumLogBits - 1) &
91       (((((UInt32)1 << kNumLogBits) - 1) - pos) >> (31 - 3));
92   return g_FastPos[pos >> zz] + (zz * 2);
93 }
94 
95 
Normalize()96 void CEncProps::Normalize()
97 {
98   int level = Level;
99   if (level < 0) level = 5;
100   Level = level;
101   if (algo < 0) algo = (level < 5 ? 0 : 1);
102   if (fb < 0) fb = (level < 7 ? 32 : (level < 9 ? 64 : 128));
103   if (btMode < 0) btMode = (algo == 0 ? 0 : 1);
104   if (mc == 0) mc = (16 + ((unsigned)fb >> 1));
105   if (numPasses == (UInt32)(Int32)-1) numPasses = (level < 7 ? 1 : (level < 9 ? 3 : 10));
106 }
107 
SetProps(const CEncProps * props2)108 void CCoder::SetProps(const CEncProps *props2)
109 {
110   CEncProps props = *props2;
111   props.Normalize();
112 
113   m_MatchFinderCycles = props.mc;
114   {
115     unsigned fb = (unsigned)props.fb;
116     if (fb < kMatchMinLen)
117       fb = kMatchMinLen;
118     if (fb > m_MatchMaxLen)
119       fb = m_MatchMaxLen;
120     m_NumFastBytes = fb;
121   }
122   _fastMode = (props.algo == 0);
123   _btMode = (props.btMode != 0);
124 
125   m_NumDivPasses = props.numPasses;
126   if (m_NumDivPasses == 0)
127     m_NumDivPasses = 1;
128   if (m_NumDivPasses == 1)
129     m_NumPasses = 1;
130   else if (m_NumDivPasses <= kNumDivPassesMax)
131     m_NumPasses = 2;
132   else
133   {
134     m_NumPasses = 2 + (m_NumDivPasses - kNumDivPassesMax);
135     m_NumDivPasses = kNumDivPassesMax;
136   }
137 }
138 
CCoder(bool deflate64Mode)139 CCoder::CCoder(bool deflate64Mode):
140   m_Values(NULL),
141   m_OnePosMatchesMemory(NULL),
142   m_DistanceMemory(NULL),
143   m_Created(false),
144   m_Deflate64Mode(deflate64Mode),
145   m_Tables(NULL)
146 {
147   m_MatchMaxLen = deflate64Mode ? kMatchMaxLen64 : kMatchMaxLen32;
148   m_NumLenCombinations = deflate64Mode ? kNumLenSymbols64 : kNumLenSymbols32;
149   m_LenStart = deflate64Mode ? kLenStart64 : kLenStart32;
150   m_LenDirectBits = deflate64Mode ? kLenDirectBits64 : kLenDirectBits32;
151   {
152     CEncProps props;
153     SetProps(&props);
154   }
155   MatchFinder_Construct(&_lzInWindow);
156 }
157 
Create()158 HRESULT CCoder::Create()
159 {
160   // COM_TRY_BEGIN
161   if (m_Values == 0)
162   {
163     m_Values = (CCodeValue *)MyAlloc((kMaxUncompressedBlockSize) * sizeof(CCodeValue));
164     if (m_Values == 0)
165       return E_OUTOFMEMORY;
166   }
167   if (m_Tables == 0)
168   {
169     m_Tables = (CTables *)MyAlloc((kNumTables) * sizeof(CTables));
170     if (m_Tables == 0)
171       return E_OUTOFMEMORY;
172   }
173 
174   if (m_IsMultiPass)
175   {
176     if (m_OnePosMatchesMemory == 0)
177     {
178       m_OnePosMatchesMemory = (UInt16 *)::MidAlloc(kMatchArraySize * sizeof(UInt16));
179       if (m_OnePosMatchesMemory == 0)
180         return E_OUTOFMEMORY;
181     }
182   }
183   else
184   {
185     if (m_DistanceMemory == 0)
186     {
187       m_DistanceMemory = (UInt16 *)MyAlloc((kMatchMaxLen + 2) * 2 * sizeof(UInt16));
188       if (m_DistanceMemory == 0)
189         return E_OUTOFMEMORY;
190       m_MatchDistances = m_DistanceMemory;
191     }
192   }
193 
194   if (!m_Created)
195   {
196     _lzInWindow.btMode = (Byte)(_btMode ? 1 : 0);
197     _lzInWindow.numHashBytes = 3;
198     if (!MatchFinder_Create(&_lzInWindow,
199         m_Deflate64Mode ? kHistorySize64 : kHistorySize32,
200         kNumOpts + kMaxUncompressedBlockSize,
201         m_NumFastBytes, m_MatchMaxLen - m_NumFastBytes, &g_Alloc))
202       return E_OUTOFMEMORY;
203     if (!m_OutStream.Create(1 << 20))
204       return E_OUTOFMEMORY;
205   }
206   if (m_MatchFinderCycles != 0)
207     _lzInWindow.cutValue = m_MatchFinderCycles;
208   m_Created = true;
209   return S_OK;
210   // COM_TRY_END
211 }
212 
BaseSetEncoderProperties2(const PROPID * propIDs,const PROPVARIANT * coderProps,UInt32 numProps)213 HRESULT CCoder::BaseSetEncoderProperties2(const PROPID *propIDs, const PROPVARIANT *coderProps, UInt32 numProps)
214 {
215   CEncProps props;
216   for (UInt32 i = 0; i < numProps; i++)
217   {
218     const PROPVARIANT &prop = coderProps[i];
219     PROPID propID = propIDs[i];
220     if (propID >= NCoderPropID::kReduceSize)
221       continue;
222     if (prop.vt != VT_UI4)
223       return E_INVALIDARG;
224     UInt32 v = (UInt32)prop.ulVal;
225     switch (propID)
226     {
227       case NCoderPropID::kNumPasses: props.numPasses = v; break;
228       case NCoderPropID::kNumFastBytes: props.fb = (int)v; break;
229       case NCoderPropID::kMatchFinderCycles: props.mc = v; break;
230       case NCoderPropID::kAlgorithm: props.algo = (int)v; break;
231       case NCoderPropID::kLevel: props.Level = (int)v; break;
232       case NCoderPropID::kNumThreads: break;
233       default: return E_INVALIDARG;
234     }
235   }
236   SetProps(&props);
237   return S_OK;
238 }
239 
Free()240 void CCoder::Free()
241 {
242   ::MidFree(m_OnePosMatchesMemory); m_OnePosMatchesMemory = 0;
243   ::MyFree(m_DistanceMemory); m_DistanceMemory = 0;
244   ::MyFree(m_Values); m_Values = 0;
245   ::MyFree(m_Tables); m_Tables = 0;
246 }
247 
~CCoder()248 CCoder::~CCoder()
249 {
250   Free();
251   MatchFinder_Free(&_lzInWindow, &g_Alloc);
252 }
253 
GetMatches()254 NO_INLINE void CCoder::GetMatches()
255 {
256   if (m_IsMultiPass)
257   {
258     m_MatchDistances = m_OnePosMatchesMemory + m_Pos;
259     if (m_SecondPass)
260     {
261       m_Pos += *m_MatchDistances + 1;
262       return;
263     }
264   }
265 
266   UInt32 distanceTmp[kMatchMaxLen * 2 + 3];
267 
268   const UInt32 numPairs = (UInt32)((_btMode ?
269       Bt3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp):
270       Hc3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp)) - distanceTmp);
271 
272   *m_MatchDistances = (UInt16)numPairs;
273 
274   if (numPairs != 0)
275   {
276     UInt32 i;
277     for (i = 0; i < numPairs; i += 2)
278     {
279       m_MatchDistances[(size_t)i + 1] = (UInt16)distanceTmp[i];
280       m_MatchDistances[(size_t)i + 2] = (UInt16)distanceTmp[(size_t)i + 1];
281     }
282     UInt32 len = distanceTmp[(size_t)numPairs - 2];
283     if (len == m_NumFastBytes && m_NumFastBytes != m_MatchMaxLen)
284     {
285       UInt32 numAvail = Inline_MatchFinder_GetNumAvailableBytes(&_lzInWindow) + 1;
286       const Byte *pby = Inline_MatchFinder_GetPointerToCurrentPos(&_lzInWindow) - 1;
287       const Byte *pby2 = pby - (distanceTmp[(size_t)numPairs - 1] + 1);
288       if (numAvail > m_MatchMaxLen)
289         numAvail = m_MatchMaxLen;
290       for (; len < numAvail && pby[len] == pby2[len]; len++);
291       m_MatchDistances[(size_t)i - 1] = (UInt16)len;
292     }
293   }
294   if (m_IsMultiPass)
295     m_Pos += numPairs + 1;
296   if (!m_SecondPass)
297     m_AdditionalOffset++;
298 }
299 
MovePos(UInt32 num)300 void CCoder::MovePos(UInt32 num)
301 {
302   if (!m_SecondPass && num > 0)
303   {
304     if (_btMode)
305       Bt3Zip_MatchFinder_Skip(&_lzInWindow, num);
306     else
307       Hc3Zip_MatchFinder_Skip(&_lzInWindow, num);
308     m_AdditionalOffset += num;
309   }
310 }
311 
312 static const UInt32 kIfinityPrice = 0xFFFFFFF;
313 
Backward(UInt32 & backRes,UInt32 cur)314 NO_INLINE UInt32 CCoder::Backward(UInt32 &backRes, UInt32 cur)
315 {
316   m_OptimumEndIndex = cur;
317   UInt32 posMem = m_Optimum[cur].PosPrev;
318   UInt16 backMem = m_Optimum[cur].BackPrev;
319   do
320   {
321     UInt32 posPrev = posMem;
322     UInt16 backCur = backMem;
323     backMem = m_Optimum[posPrev].BackPrev;
324     posMem = m_Optimum[posPrev].PosPrev;
325     m_Optimum[posPrev].BackPrev = backCur;
326     m_Optimum[posPrev].PosPrev = (UInt16)cur;
327     cur = posPrev;
328   }
329   while (cur > 0);
330   backRes = m_Optimum[0].BackPrev;
331   m_OptimumCurrentIndex = m_Optimum[0].PosPrev;
332   return m_OptimumCurrentIndex;
333 }
334 
GetOptimal(UInt32 & backRes)335 NO_INLINE UInt32 CCoder::GetOptimal(UInt32 &backRes)
336 {
337   if (m_OptimumEndIndex != m_OptimumCurrentIndex)
338   {
339     UInt32 len = m_Optimum[m_OptimumCurrentIndex].PosPrev - m_OptimumCurrentIndex;
340     backRes = m_Optimum[m_OptimumCurrentIndex].BackPrev;
341     m_OptimumCurrentIndex = m_Optimum[m_OptimumCurrentIndex].PosPrev;
342     return len;
343   }
344   m_OptimumCurrentIndex = m_OptimumEndIndex = 0;
345 
346   GetMatches();
347 
348   UInt32 lenEnd;
349   {
350     const UInt32 numDistancePairs = m_MatchDistances[0];
351     if (numDistancePairs == 0)
352       return 1;
353     const UInt16 *matchDistances = m_MatchDistances + 1;
354     lenEnd = matchDistances[(size_t)numDistancePairs - 2];
355 
356     if (lenEnd > m_NumFastBytes)
357     {
358       backRes = matchDistances[(size_t)numDistancePairs - 1];
359       MovePos(lenEnd - 1);
360       return lenEnd;
361     }
362 
363     m_Optimum[1].Price = m_LiteralPrices[*(Inline_MatchFinder_GetPointerToCurrentPos(&_lzInWindow) - m_AdditionalOffset)];
364     m_Optimum[1].PosPrev = 0;
365 
366     m_Optimum[2].Price = kIfinityPrice;
367     m_Optimum[2].PosPrev = 1;
368 
369     UInt32 offs = 0;
370 
371     for (UInt32 i = kMatchMinLen; i <= lenEnd; i++)
372     {
373       UInt32 distance = matchDistances[(size_t)offs + 1];
374       m_Optimum[i].PosPrev = 0;
375       m_Optimum[i].BackPrev = (UInt16)distance;
376       m_Optimum[i].Price = m_LenPrices[(size_t)i - kMatchMinLen] + m_PosPrices[GetPosSlot(distance)];
377       if (i == matchDistances[offs])
378         offs += 2;
379     }
380   }
381 
382   UInt32 cur = 0;
383 
384   for (;;)
385   {
386     ++cur;
387     if (cur == lenEnd || cur == kNumOptsBase || m_Pos >= kMatchArrayLimit)
388       return Backward(backRes, cur);
389     GetMatches();
390     const UInt16 *matchDistances = m_MatchDistances + 1;
391     const UInt32 numDistancePairs = m_MatchDistances[0];
392     UInt32 newLen = 0;
393     if (numDistancePairs != 0)
394     {
395       newLen = matchDistances[(size_t)numDistancePairs - 2];
396       if (newLen > m_NumFastBytes)
397       {
398         UInt32 len = Backward(backRes, cur);
399         m_Optimum[cur].BackPrev = matchDistances[(size_t)numDistancePairs - 1];
400         m_OptimumEndIndex = cur + newLen;
401         m_Optimum[cur].PosPrev = (UInt16)m_OptimumEndIndex;
402         MovePos(newLen - 1);
403         return len;
404       }
405     }
406     UInt32 curPrice = m_Optimum[cur].Price;
407     {
408       const UInt32 curAnd1Price = curPrice + m_LiteralPrices[*(Inline_MatchFinder_GetPointerToCurrentPos(&_lzInWindow) + cur - m_AdditionalOffset)];
409       COptimal &optimum = m_Optimum[(size_t)cur + 1];
410       if (curAnd1Price < optimum.Price)
411       {
412         optimum.Price = curAnd1Price;
413         optimum.PosPrev = (UInt16)cur;
414       }
415     }
416     if (numDistancePairs == 0)
417       continue;
418     while (lenEnd < cur + newLen)
419       m_Optimum[++lenEnd].Price = kIfinityPrice;
420     UInt32 offs = 0;
421     UInt32 distance = matchDistances[(size_t)offs + 1];
422     curPrice += m_PosPrices[GetPosSlot(distance)];
423     for (UInt32 lenTest = kMatchMinLen; ; lenTest++)
424     {
425       UInt32 curAndLenPrice = curPrice + m_LenPrices[(size_t)lenTest - kMatchMinLen];
426       COptimal &optimum = m_Optimum[cur + lenTest];
427       if (curAndLenPrice < optimum.Price)
428       {
429         optimum.Price = curAndLenPrice;
430         optimum.PosPrev = (UInt16)cur;
431         optimum.BackPrev = (UInt16)distance;
432       }
433       if (lenTest == matchDistances[offs])
434       {
435         offs += 2;
436         if (offs == numDistancePairs)
437           break;
438         curPrice -= m_PosPrices[GetPosSlot(distance)];
439         distance = matchDistances[(size_t)offs + 1];
440         curPrice += m_PosPrices[GetPosSlot(distance)];
441       }
442     }
443   }
444 }
445 
GetOptimalFast(UInt32 & backRes)446 UInt32 CCoder::GetOptimalFast(UInt32 &backRes)
447 {
448   GetMatches();
449   UInt32 numDistancePairs = m_MatchDistances[0];
450   if (numDistancePairs == 0)
451     return 1;
452   UInt32 lenMain = m_MatchDistances[(size_t)numDistancePairs - 1];
453   backRes = m_MatchDistances[numDistancePairs];
454   MovePos(lenMain - 1);
455   return lenMain;
456 }
457 
InitStructures()458 void CTables::InitStructures()
459 {
460   UInt32 i;
461   for (i = 0; i < 256; i++)
462     litLenLevels[i] = 8;
463   litLenLevels[i++] = 13;
464   for (;i < kFixedMainTableSize; i++)
465     litLenLevels[i] = 5;
466   for (i = 0; i < kFixedDistTableSize; i++)
467     distLevels[i] = 5;
468 }
469 
LevelTableDummy(const Byte * levels,unsigned numLevels,UInt32 * freqs)470 NO_INLINE void CCoder::LevelTableDummy(const Byte *levels, unsigned numLevels, UInt32 *freqs)
471 {
472   unsigned prevLen = 0xFF;
473   unsigned nextLen = levels[0];
474   unsigned count = 0;
475   unsigned maxCount = 7;
476   unsigned minCount = 4;
477 
478   if (nextLen == 0)
479   {
480     maxCount = 138;
481     minCount = 3;
482   }
483 
484   for (unsigned n = 0; n < numLevels; n++)
485   {
486     unsigned curLen = nextLen;
487     nextLen = (n < numLevels - 1) ? levels[(size_t)n + 1] : 0xFF;
488     count++;
489     if (count < maxCount && curLen == nextLen)
490       continue;
491 
492     if (count < minCount)
493       freqs[curLen] += (UInt32)count;
494     else if (curLen != 0)
495     {
496       if (curLen != prevLen)
497       {
498         freqs[curLen]++;
499         count--;
500       }
501       freqs[kTableLevelRepNumber]++;
502     }
503     else if (count <= 10)
504       freqs[kTableLevel0Number]++;
505     else
506       freqs[kTableLevel0Number2]++;
507 
508     count = 0;
509     prevLen = curLen;
510 
511     if (nextLen == 0)
512     {
513       maxCount = 138;
514       minCount = 3;
515     }
516     else if (curLen == nextLen)
517     {
518       maxCount = 6;
519       minCount = 3;
520     }
521     else
522     {
523       maxCount = 7;
524       minCount = 4;
525     }
526   }
527 }
528 
WriteBits(UInt32 value,unsigned numBits)529 NO_INLINE void CCoder::WriteBits(UInt32 value, unsigned numBits)
530 {
531   m_OutStream.WriteBits(value, numBits);
532 }
533 
534 #define WRITE_HF2(codes, lens, i) m_OutStream.WriteBits(codes[i], lens[i])
535 #define WRITE_HF(i) WriteBits(codes[i], lens[i])
536 
LevelTableCode(const Byte * levels,unsigned numLevels,const Byte * lens,const UInt32 * codes)537 NO_INLINE void CCoder::LevelTableCode(const Byte *levels, unsigned numLevels, const Byte *lens, const UInt32 *codes)
538 {
539   unsigned prevLen = 0xFF;
540   unsigned nextLen = levels[0];
541   unsigned count = 0;
542   unsigned maxCount = 7;
543   unsigned minCount = 4;
544 
545   if (nextLen == 0)
546   {
547     maxCount = 138;
548     minCount = 3;
549   }
550 
551   for (unsigned n = 0; n < numLevels; n++)
552   {
553     unsigned curLen = nextLen;
554     nextLen = (n < numLevels - 1) ? levels[(size_t)n + 1] : 0xFF;
555     count++;
556     if (count < maxCount && curLen == nextLen)
557       continue;
558 
559     if (count < minCount)
560       for (unsigned i = 0; i < count; i++)
561         WRITE_HF(curLen);
562     else if (curLen != 0)
563     {
564       if (curLen != prevLen)
565       {
566         WRITE_HF(curLen);
567         count--;
568       }
569       WRITE_HF(kTableLevelRepNumber);
570       WriteBits(count - 3, 2);
571     }
572     else if (count <= 10)
573     {
574       WRITE_HF(kTableLevel0Number);
575       WriteBits(count - 3, 3);
576     }
577     else
578     {
579       WRITE_HF(kTableLevel0Number2);
580       WriteBits(count - 11, 7);
581     }
582 
583     count = 0;
584     prevLen = curLen;
585 
586     if (nextLen == 0)
587     {
588       maxCount = 138;
589       minCount = 3;
590     }
591     else if (curLen == nextLen)
592     {
593       maxCount = 6;
594       minCount = 3;
595     }
596     else
597     {
598       maxCount = 7;
599       minCount = 4;
600     }
601   }
602 }
603 
MakeTables(unsigned maxHuffLen)604 NO_INLINE void CCoder::MakeTables(unsigned maxHuffLen)
605 {
606   Huffman_Generate(mainFreqs, mainCodes, m_NewLevels.litLenLevels, kFixedMainTableSize, maxHuffLen);
607   Huffman_Generate(distFreqs, distCodes, m_NewLevels.distLevels, kDistTableSize64, maxHuffLen);
608 }
609 
Huffman_GetPrice(const UInt32 * freqs,const Byte * lens,UInt32 num)610 static NO_INLINE UInt32 Huffman_GetPrice(const UInt32 *freqs, const Byte *lens, UInt32 num)
611 {
612   UInt32 price = 0;
613   UInt32 i;
614   for (i = 0; i < num; i++)
615     price += lens[i] * freqs[i];
616   return price;
617 }
618 
Huffman_GetPrice_Spec(const UInt32 * freqs,const Byte * lens,UInt32 num,const Byte * extraBits,UInt32 extraBase)619 static NO_INLINE UInt32 Huffman_GetPrice_Spec(const UInt32 *freqs, const Byte *lens, UInt32 num, const Byte *extraBits, UInt32 extraBase)
620 {
621   return Huffman_GetPrice(freqs, lens, num) +
622     Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase);
623 }
624 
GetLzBlockPrice() const625 NO_INLINE UInt32 CCoder::GetLzBlockPrice() const
626 {
627   return
628     Huffman_GetPrice_Spec(mainFreqs, m_NewLevels.litLenLevels, kFixedMainTableSize, m_LenDirectBits, kSymbolMatch) +
629     Huffman_GetPrice_Spec(distFreqs, m_NewLevels.distLevels, kDistTableSize64, kDistDirectBits, 0);
630 }
631 
TryBlock()632 NO_INLINE void CCoder::TryBlock()
633 {
634   memset(mainFreqs, 0, sizeof(mainFreqs));
635   memset(distFreqs, 0, sizeof(distFreqs));
636 
637   m_ValueIndex = 0;
638   UInt32 blockSize = BlockSizeRes;
639   BlockSizeRes = 0;
640   for (;;)
641   {
642     if (m_OptimumCurrentIndex == m_OptimumEndIndex)
643     {
644       if (m_Pos >= kMatchArrayLimit
645           || BlockSizeRes >= blockSize
646           || (!m_SecondPass && ((Inline_MatchFinder_GetNumAvailableBytes(&_lzInWindow) == 0) || m_ValueIndex >= m_ValueBlockSize)))
647         break;
648     }
649     UInt32 pos;
650     UInt32 len;
651     if (_fastMode)
652       len = GetOptimalFast(pos);
653     else
654       len = GetOptimal(pos);
655     CCodeValue &codeValue = m_Values[m_ValueIndex++];
656     if (len >= kMatchMinLen)
657     {
658       UInt32 newLen = len - kMatchMinLen;
659       codeValue.Len = (UInt16)newLen;
660       mainFreqs[kSymbolMatch + (size_t)g_LenSlots[newLen]]++;
661       codeValue.Pos = (UInt16)pos;
662       distFreqs[GetPosSlot(pos)]++;
663     }
664     else
665     {
666       Byte b = *(Inline_MatchFinder_GetPointerToCurrentPos(&_lzInWindow) - m_AdditionalOffset);
667       mainFreqs[b]++;
668       codeValue.SetAsLiteral();
669       codeValue.Pos = b;
670     }
671     m_AdditionalOffset -= len;
672     BlockSizeRes += len;
673   }
674   mainFreqs[kSymbolEndOfBlock]++;
675   m_AdditionalOffset += BlockSizeRes;
676   m_SecondPass = true;
677 }
678 
SetPrices(const CLevels & levels)679 NO_INLINE void CCoder::SetPrices(const CLevels &levels)
680 {
681   if (_fastMode)
682     return;
683   UInt32 i;
684   for (i = 0; i < 256; i++)
685   {
686     Byte price = levels.litLenLevels[i];
687     m_LiteralPrices[i] = ((price != 0) ? price : kNoLiteralStatPrice);
688   }
689 
690   for (i = 0; i < m_NumLenCombinations; i++)
691   {
692     UInt32 slot = g_LenSlots[i];
693     Byte price = levels.litLenLevels[kSymbolMatch + (size_t)slot];
694     m_LenPrices[i] = (Byte)(((price != 0) ? price : kNoLenStatPrice) + m_LenDirectBits[slot]);
695   }
696 
697   for (i = 0; i < kDistTableSize64; i++)
698   {
699     Byte price = levels.distLevels[i];
700     m_PosPrices[i] = (Byte)(((price != 0) ? price: kNoPosStatPrice) + kDistDirectBits[i]);
701   }
702 }
703 
Huffman_ReverseBits(UInt32 * codes,const Byte * lens,UInt32 num)704 static NO_INLINE void Huffman_ReverseBits(UInt32 *codes, const Byte *lens, UInt32 num)
705 {
706   for (UInt32 i = 0; i < num; i++)
707   {
708     UInt32 x = codes[i];
709     x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1);
710     x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2);
711     x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4);
712     codes[i] = (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - lens[i]);
713   }
714 }
715 
WriteBlock()716 NO_INLINE void CCoder::WriteBlock()
717 {
718   Huffman_ReverseBits(mainCodes, m_NewLevels.litLenLevels, kFixedMainTableSize);
719   Huffman_ReverseBits(distCodes, m_NewLevels.distLevels, kDistTableSize64);
720 
721   for (UInt32 i = 0; i < m_ValueIndex; i++)
722   {
723     const CCodeValue &codeValue = m_Values[i];
724     if (codeValue.IsLiteral())
725       WRITE_HF2(mainCodes, m_NewLevels.litLenLevels, codeValue.Pos);
726     else
727     {
728       UInt32 len = codeValue.Len;
729       UInt32 lenSlot = g_LenSlots[len];
730       WRITE_HF2(mainCodes, m_NewLevels.litLenLevels, kSymbolMatch + lenSlot);
731       m_OutStream.WriteBits(len - m_LenStart[lenSlot], m_LenDirectBits[lenSlot]);
732       UInt32 dist = codeValue.Pos;
733       UInt32 posSlot = GetPosSlot(dist);
734       WRITE_HF2(distCodes, m_NewLevels.distLevels, posSlot);
735       m_OutStream.WriteBits(dist - kDistStart[posSlot], kDistDirectBits[posSlot]);
736     }
737   }
738   WRITE_HF2(mainCodes, m_NewLevels.litLenLevels, kSymbolEndOfBlock);
739 }
740 
GetStorePrice(UInt32 blockSize,unsigned bitPosition)741 static UInt32 GetStorePrice(UInt32 blockSize, unsigned bitPosition)
742 {
743   UInt32 price = 0;
744   do
745   {
746     UInt32 nextBitPosition = (bitPosition + kFinalBlockFieldSize + kBlockTypeFieldSize) & 7;
747     unsigned numBitsForAlign = nextBitPosition > 0 ? (8 - nextBitPosition): 0;
748     UInt32 curBlockSize = (blockSize < (1 << 16)) ? blockSize : (1 << 16) - 1;
749     price += kFinalBlockFieldSize + kBlockTypeFieldSize + numBitsForAlign + (2 + 2) * 8 + curBlockSize * 8;
750     bitPosition = 0;
751     blockSize -= curBlockSize;
752   }
753   while (blockSize != 0);
754   return price;
755 }
756 
WriteStoreBlock(UInt32 blockSize,UInt32 additionalOffset,bool finalBlock)757 void CCoder::WriteStoreBlock(UInt32 blockSize, UInt32 additionalOffset, bool finalBlock)
758 {
759   do
760   {
761     UInt32 curBlockSize = (blockSize < (1 << 16)) ? blockSize : (1 << 16) - 1;
762     blockSize -= curBlockSize;
763     WriteBits((finalBlock && (blockSize == 0) ? NFinalBlockField::kFinalBlock: NFinalBlockField::kNotFinalBlock), kFinalBlockFieldSize);
764     WriteBits(NBlockType::kStored, kBlockTypeFieldSize);
765     m_OutStream.FlushByte();
766     WriteBits((UInt16)curBlockSize, kStoredBlockLengthFieldSize);
767     WriteBits((UInt16)~curBlockSize, kStoredBlockLengthFieldSize);
768     const Byte *data = Inline_MatchFinder_GetPointerToCurrentPos(&_lzInWindow)- additionalOffset;
769     for (UInt32 i = 0; i < curBlockSize; i++)
770       m_OutStream.WriteByte(data[i]);
771     additionalOffset -= curBlockSize;
772   }
773   while (blockSize != 0);
774 }
775 
TryDynBlock(unsigned tableIndex,UInt32 numPasses)776 NO_INLINE UInt32 CCoder::TryDynBlock(unsigned tableIndex, UInt32 numPasses)
777 {
778   CTables &t = m_Tables[tableIndex];
779   BlockSizeRes = t.BlockSizeRes;
780   UInt32 posTemp = t.m_Pos;
781   SetPrices(t);
782 
783   for (UInt32 p = 0; p < numPasses; p++)
784   {
785     m_Pos = posTemp;
786     TryBlock();
787     unsigned numHuffBits =
788         (m_ValueIndex > 18000 ? 12 :
789         (m_ValueIndex >  7000 ? 11 :
790         (m_ValueIndex >  2000 ? 10 : 9)));
791     MakeTables(numHuffBits);
792     SetPrices(m_NewLevels);
793   }
794 
795   (CLevels &)t = m_NewLevels;
796 
797   m_NumLitLenLevels = kMainTableSize;
798   while (m_NumLitLenLevels > kNumLitLenCodesMin && m_NewLevels.litLenLevels[(size_t)m_NumLitLenLevels - 1] == 0)
799     m_NumLitLenLevels--;
800 
801   m_NumDistLevels = kDistTableSize64;
802   while (m_NumDistLevels > kNumDistCodesMin && m_NewLevels.distLevels[(size_t)m_NumDistLevels - 1] == 0)
803     m_NumDistLevels--;
804 
805   UInt32 levelFreqs[kLevelTableSize];
806   memset(levelFreqs, 0, sizeof(levelFreqs));
807 
808   LevelTableDummy(m_NewLevels.litLenLevels, m_NumLitLenLevels, levelFreqs);
809   LevelTableDummy(m_NewLevels.distLevels, m_NumDistLevels, levelFreqs);
810 
811   Huffman_Generate(levelFreqs, levelCodes, levelLens, kLevelTableSize, kMaxLevelBitLength);
812 
813   m_NumLevelCodes = kNumLevelCodesMin;
814   for (UInt32 i = 0; i < kLevelTableSize; i++)
815   {
816     Byte level = levelLens[kCodeLengthAlphabetOrder[i]];
817     if (level > 0 && i >= m_NumLevelCodes)
818       m_NumLevelCodes = i + 1;
819     m_LevelLevels[i] = level;
820   }
821 
822   return GetLzBlockPrice() +
823       Huffman_GetPrice_Spec(levelFreqs, levelLens, kLevelTableSize, kLevelDirectBits, kTableDirectLevels) +
824       kNumLenCodesFieldSize + kNumDistCodesFieldSize + kNumLevelCodesFieldSize +
825       m_NumLevelCodes * kLevelFieldSize + kFinalBlockFieldSize + kBlockTypeFieldSize;
826 }
827 
TryFixedBlock(unsigned tableIndex)828 NO_INLINE UInt32 CCoder::TryFixedBlock(unsigned tableIndex)
829 {
830   CTables &t = m_Tables[tableIndex];
831   BlockSizeRes = t.BlockSizeRes;
832   m_Pos = t.m_Pos;
833   m_NewLevels.SetFixedLevels();
834   SetPrices(m_NewLevels);
835   TryBlock();
836   return kFinalBlockFieldSize + kBlockTypeFieldSize + GetLzBlockPrice();
837 }
838 
GetBlockPrice(unsigned tableIndex,unsigned numDivPasses)839 NO_INLINE UInt32 CCoder::GetBlockPrice(unsigned tableIndex, unsigned numDivPasses)
840 {
841   CTables &t = m_Tables[tableIndex];
842   t.StaticMode = false;
843   UInt32 price = TryDynBlock(tableIndex, m_NumPasses);
844   t.BlockSizeRes = BlockSizeRes;
845   UInt32 numValues = m_ValueIndex;
846   UInt32 posTemp = m_Pos;
847   UInt32 additionalOffsetEnd = m_AdditionalOffset;
848 
849   if (m_CheckStatic && m_ValueIndex <= kFixedHuffmanCodeBlockSizeMax)
850   {
851     const UInt32 fixedPrice = TryFixedBlock(tableIndex);
852     t.StaticMode = (fixedPrice < price);
853     if (t.StaticMode)
854       price = fixedPrice;
855   }
856 
857   const UInt32 storePrice = GetStorePrice(BlockSizeRes, 0); // bitPosition
858   t.StoreMode = (storePrice <= price);
859   if (t.StoreMode)
860     price = storePrice;
861 
862   t.UseSubBlocks = false;
863 
864   if (numDivPasses > 1 && numValues >= kDivideCodeBlockSizeMin)
865   {
866     CTables &t0 = m_Tables[(tableIndex << 1)];
867     (CLevels &)t0 = t;
868     t0.BlockSizeRes = t.BlockSizeRes >> 1;
869     t0.m_Pos = t.m_Pos;
870     UInt32 subPrice = GetBlockPrice((tableIndex << 1), numDivPasses - 1);
871 
872     UInt32 blockSize2 = t.BlockSizeRes - t0.BlockSizeRes;
873     if (t0.BlockSizeRes >= kDivideBlockSizeMin && blockSize2 >= kDivideBlockSizeMin)
874     {
875       CTables &t1 = m_Tables[(tableIndex << 1) + 1];
876       (CLevels &)t1 = t;
877       t1.BlockSizeRes = blockSize2;
878       t1.m_Pos = m_Pos;
879       m_AdditionalOffset -= t0.BlockSizeRes;
880       subPrice += GetBlockPrice((tableIndex << 1) + 1, numDivPasses - 1);
881       t.UseSubBlocks = (subPrice < price);
882       if (t.UseSubBlocks)
883         price = subPrice;
884     }
885   }
886 
887   m_AdditionalOffset = additionalOffsetEnd;
888   m_Pos = posTemp;
889   return price;
890 }
891 
CodeBlock(unsigned tableIndex,bool finalBlock)892 void CCoder::CodeBlock(unsigned tableIndex, bool finalBlock)
893 {
894   CTables &t = m_Tables[tableIndex];
895   if (t.UseSubBlocks)
896   {
897     CodeBlock((tableIndex << 1), false);
898     CodeBlock((tableIndex << 1) + 1, finalBlock);
899   }
900   else
901   {
902     if (t.StoreMode)
903       WriteStoreBlock(t.BlockSizeRes, m_AdditionalOffset, finalBlock);
904     else
905     {
906       WriteBits((finalBlock ? NFinalBlockField::kFinalBlock: NFinalBlockField::kNotFinalBlock), kFinalBlockFieldSize);
907       if (t.StaticMode)
908       {
909         WriteBits(NBlockType::kFixedHuffman, kBlockTypeFieldSize);
910         TryFixedBlock(tableIndex);
911         unsigned i;
912         const unsigned kMaxStaticHuffLen = 9;
913         for (i = 0; i < kFixedMainTableSize; i++)
914           mainFreqs[i] = (UInt32)1 << (kMaxStaticHuffLen - m_NewLevels.litLenLevels[i]);
915         for (i = 0; i < kFixedDistTableSize; i++)
916           distFreqs[i] = (UInt32)1 << (kMaxStaticHuffLen - m_NewLevels.distLevels[i]);
917         MakeTables(kMaxStaticHuffLen);
918       }
919       else
920       {
921         if (m_NumDivPasses > 1 || m_CheckStatic)
922           TryDynBlock(tableIndex, 1);
923         WriteBits(NBlockType::kDynamicHuffman, kBlockTypeFieldSize);
924         WriteBits(m_NumLitLenLevels - kNumLitLenCodesMin, kNumLenCodesFieldSize);
925         WriteBits(m_NumDistLevels - kNumDistCodesMin, kNumDistCodesFieldSize);
926         WriteBits(m_NumLevelCodes - kNumLevelCodesMin, kNumLevelCodesFieldSize);
927 
928         for (UInt32 i = 0; i < m_NumLevelCodes; i++)
929           WriteBits(m_LevelLevels[i], kLevelFieldSize);
930 
931         Huffman_ReverseBits(levelCodes, levelLens, kLevelTableSize);
932         LevelTableCode(m_NewLevels.litLenLevels, m_NumLitLenLevels, levelLens, levelCodes);
933         LevelTableCode(m_NewLevels.distLevels, m_NumDistLevels, levelLens, levelCodes);
934       }
935       WriteBlock();
936     }
937     m_AdditionalOffset -= t.BlockSizeRes;
938   }
939 }
940 
941 
CodeReal(ISequentialInStream * inStream,ISequentialOutStream * outStream,const UInt64 *,const UInt64 *,ICompressProgressInfo * progress)942 HRESULT CCoder::CodeReal(ISequentialInStream *inStream, ISequentialOutStream *outStream,
943     const UInt64 * /* inSize */ , const UInt64 * /* outSize */ , ICompressProgressInfo *progress)
944 {
945   m_CheckStatic = (m_NumPasses != 1 || m_NumDivPasses != 1);
946   m_IsMultiPass = (m_CheckStatic || (m_NumPasses != 1 || m_NumDivPasses != 1));
947 
948   RINOK(Create());
949 
950   m_ValueBlockSize = (7 << 10) + (1 << 12) * m_NumDivPasses;
951 
952   UInt64 nowPos = 0;
953 
954   CSeqInStreamWrap _seqInStream;
955 
956   _seqInStream.Init(inStream);
957 
958   _lzInWindow.stream = &_seqInStream.vt;
959 
960   MatchFinder_Init(&_lzInWindow);
961   m_OutStream.SetStream(outStream);
962   m_OutStream.Init();
963 
964   m_OptimumEndIndex = m_OptimumCurrentIndex = 0;
965 
966   CTables &t = m_Tables[1];
967   t.m_Pos = 0;
968   t.InitStructures();
969 
970   m_AdditionalOffset = 0;
971   do
972   {
973     t.BlockSizeRes = kBlockUncompressedSizeThreshold;
974     m_SecondPass = false;
975     GetBlockPrice(1, m_NumDivPasses);
976     CodeBlock(1, Inline_MatchFinder_GetNumAvailableBytes(&_lzInWindow) == 0);
977     nowPos += m_Tables[1].BlockSizeRes;
978     if (progress != NULL)
979     {
980       UInt64 packSize = m_OutStream.GetProcessedSize();
981       RINOK(progress->SetRatioInfo(&nowPos, &packSize));
982     }
983   }
984   while (Inline_MatchFinder_GetNumAvailableBytes(&_lzInWindow) != 0);
985 
986   if (_seqInStream.Res != S_OK)
987     return _seqInStream.Res;
988 
989   if (_lzInWindow.result != SZ_OK)
990     return SResToHRESULT(_lzInWindow.result);
991   return m_OutStream.Flush();
992 }
993 
BaseCode(ISequentialInStream * inStream,ISequentialOutStream * outStream,const UInt64 * inSize,const UInt64 * outSize,ICompressProgressInfo * progress)994 HRESULT CCoder::BaseCode(ISequentialInStream *inStream, ISequentialOutStream *outStream,
995     const UInt64 *inSize, const UInt64 *outSize, ICompressProgressInfo *progress)
996 {
997   try { return CodeReal(inStream, outStream, inSize, outSize, progress); }
998   catch(const COutBufferException &e) { return e.ErrorCode; }
999   catch(...) { return E_FAIL; }
1000 }
1001 
Code(ISequentialInStream * inStream,ISequentialOutStream * outStream,const UInt64 * inSize,const UInt64 * outSize,ICompressProgressInfo * progress)1002 STDMETHODIMP CCOMCoder::Code(ISequentialInStream *inStream, ISequentialOutStream *outStream,
1003     const UInt64 *inSize, const UInt64 *outSize, ICompressProgressInfo *progress)
1004   { return BaseCode(inStream, outStream, inSize, outSize, progress); }
1005 
SetCoderProperties(const PROPID * propIDs,const PROPVARIANT * props,UInt32 numProps)1006 STDMETHODIMP CCOMCoder::SetCoderProperties(const PROPID *propIDs, const PROPVARIANT *props, UInt32 numProps)
1007   { return BaseSetEncoderProperties2(propIDs, props, numProps); }
1008 
Code(ISequentialInStream * inStream,ISequentialOutStream * outStream,const UInt64 * inSize,const UInt64 * outSize,ICompressProgressInfo * progress)1009 STDMETHODIMP CCOMCoder64::Code(ISequentialInStream *inStream, ISequentialOutStream *outStream,
1010     const UInt64 *inSize, const UInt64 *outSize, ICompressProgressInfo *progress)
1011   { return BaseCode(inStream, outStream, inSize, outSize, progress); }
1012 
SetCoderProperties(const PROPID * propIDs,const PROPVARIANT * props,UInt32 numProps)1013 STDMETHODIMP CCOMCoder64::SetCoderProperties(const PROPID *propIDs, const PROPVARIANT *props, UInt32 numProps)
1014   { return BaseSetEncoderProperties2(propIDs, props, numProps); }
1015 
1016 }}}
1017