1 /* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2 2021-12-21 : Igor Pavlov : Public domain */
3 
4 #include "Precomp.h"
5 
6 // #include <stdio.h>
7 
8 #include "CpuArch.h"
9 
10 #include "LzHash.h"
11 #include "LzFindMt.h"
12 
13 // #define LOG_ITERS
14 
15 // #define LOG_THREAD
16 
17 #ifdef LOG_THREAD
18 #include <stdio.h>
19 #define PRF(x) x
20 #else
21 #define PRF(x)
22 #endif
23 
24 #ifdef LOG_ITERS
25 #include <stdio.h>
26 extern UInt64 g_NumIters_Tree;
27 extern UInt64 g_NumIters_Loop;
28 extern UInt64 g_NumIters_Bytes;
29 #define LOG_ITER(x) x
30 #else
31 #define LOG_ITER(x)
32 #endif
33 
34 #define kMtHashBlockSize ((UInt32)1 << 17)
35 #define kMtHashNumBlocks (1 << 1)
36 
37 #define GET_HASH_BLOCK_OFFSET(i)  (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)
38 
39 #define kMtBtBlockSize ((UInt32)1 << 16)
40 #define kMtBtNumBlocks (1 << 4)
41 
42 #define GET_BT_BLOCK_OFFSET(i)  (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)
43 
44 /*
45   HASH functions:
46   We use raw 8/16 bits from a[1] and a[2],
47   xored with crc(a[0]) and crc(a[3]).
48   We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.
49   our crc() function provides one-to-one correspondence for low 8-bit values:
50     (crc[0...0xFF] & 0xFF) <-> [0...0xFF]
51 */
52 
53 #define MF(mt) ((mt)->MatchFinder)
54 #define MF_CRC (p->crc)
55 
56 // #define MF(mt) (&(mt)->MatchFinder)
57 // #define MF_CRC (p->MatchFinder.crc)
58 
59 #define MT_HASH2_CALC \
60   h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);
61 
62 #define MT_HASH3_CALC { \
63   UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \
64   h2 = temp & (kHash2Size - 1); \
65   h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
66 
67 /*
68 #define MT_HASH3_CALC__NO_2 { \
69   UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
70   h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
71 
72 #define __MT_HASH4_CALC { \
73   UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
74   h2 = temp & (kHash2Size - 1); \
75   temp ^= ((UInt32)cur[2] << 8); \
76   h3 = temp & (kHash3Size - 1); \
77   h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }
78   // (kHash4Size - 1);
79 */
80 
81 
82 MY_NO_INLINE
MtSync_Construct(CMtSync * p)83 static void MtSync_Construct(CMtSync *p)
84 {
85   p->affinity = 0;
86   p->wasCreated = False;
87   p->csWasInitialized = False;
88   p->csWasEntered = False;
89   Thread_Construct(&p->thread);
90   Event_Construct(&p->canStart);
91   Event_Construct(&p->wasStopped);
92   Semaphore_Construct(&p->freeSemaphore);
93   Semaphore_Construct(&p->filledSemaphore);
94 }
95 
96 
97 #define DEBUG_BUFFER_LOCK   // define it to debug lock state
98 
99 #ifdef DEBUG_BUFFER_LOCK
100 #include <stdlib.h>
101 #define BUFFER_MUST_BE_LOCKED(p)    if (!(p)->csWasEntered) exit(1);
102 #define BUFFER_MUST_BE_UNLOCKED(p)  if ( (p)->csWasEntered) exit(1);
103 #else
104 #define BUFFER_MUST_BE_LOCKED(p)
105 #define BUFFER_MUST_BE_UNLOCKED(p)
106 #endif
107 
108 #define LOCK_BUFFER(p) { \
109     BUFFER_MUST_BE_UNLOCKED(p); \
110     CriticalSection_Enter(&(p)->cs); \
111     (p)->csWasEntered = True; }
112 
113 #define UNLOCK_BUFFER(p) { \
114     BUFFER_MUST_BE_LOCKED(p); \
115     CriticalSection_Leave(&(p)->cs); \
116     (p)->csWasEntered = False; }
117 
118 
119 MY_NO_INLINE
MtSync_GetNextBlock(CMtSync * p)120 static UInt32 MtSync_GetNextBlock(CMtSync *p)
121 {
122   UInt32 numBlocks = 0;
123   if (p->needStart)
124   {
125     BUFFER_MUST_BE_UNLOCKED(p)
126     p->numProcessedBlocks = 1;
127     p->needStart = False;
128     p->stopWriting = False;
129     p->exit = False;
130     Event_Reset(&p->wasStopped);
131     Event_Set(&p->canStart);
132   }
133   else
134   {
135     UNLOCK_BUFFER(p)
136     // we free current block
137     numBlocks = p->numProcessedBlocks++;
138     Semaphore_Release1(&p->freeSemaphore);
139   }
140 
141   // buffer is UNLOCKED here
142   Semaphore_Wait(&p->filledSemaphore);
143   LOCK_BUFFER(p);
144   return numBlocks;
145 }
146 
147 
148 /* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
149 
150 MY_NO_INLINE
MtSync_StopWriting(CMtSync * p)151 static void MtSync_StopWriting(CMtSync *p)
152 {
153   if (!Thread_WasCreated(&p->thread) || p->needStart)
154     return;
155 
156     PRF(printf("\nMtSync_StopWriting %p\n", p));
157 
158   if (p->csWasEntered)
159   {
160     /* we don't use buffer in this thread after StopWriting().
161        So we UNLOCK buffer.
162        And we restore default UNLOCKED state for stopped thread */
163     UNLOCK_BUFFER(p)
164   }
165 
166   /* We send (p->stopWriting) message and release freeSemaphore
167      to free current block.
168      So the thread will see (p->stopWriting) at some
169      iteration after Wait(freeSemaphore).
170      The thread doesn't need to fill all avail free blocks,
171      so we can get fast thread stop.
172   */
173 
174   p->stopWriting = True;
175   Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!
176 
177     PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));
178   Event_Wait(&p->wasStopped);
179     PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));
180 
181   /* 21.03 : we don't restore samaphore counters here.
182      We will recreate and reinit samaphores in next start */
183 
184   p->needStart = True;
185 }
186 
187 
188 MY_NO_INLINE
MtSync_Destruct(CMtSync * p)189 static void MtSync_Destruct(CMtSync *p)
190 {
191     PRF(printf("\nMtSync_Destruct %p\n", p));
192 
193   if (Thread_WasCreated(&p->thread))
194   {
195     /* we want thread to be in Stopped state before sending EXIT command.
196        note: stop(btSync) will stop (htSync) also */
197     MtSync_StopWriting(p);
198     /* thread in Stopped state here : (p->needStart == true) */
199     p->exit = True;
200     // if (p->needStart)  // it's (true)
201     Event_Set(&p->canStart);  // we send EXIT command to thread
202     Thread_Wait_Close(&p->thread);  // we wait thread finishing
203   }
204 
205   if (p->csWasInitialized)
206   {
207     CriticalSection_Delete(&p->cs);
208     p->csWasInitialized = False;
209   }
210   p->csWasEntered = False;
211 
212   Event_Close(&p->canStart);
213   Event_Close(&p->wasStopped);
214   Semaphore_Close(&p->freeSemaphore);
215   Semaphore_Close(&p->filledSemaphore);
216 
217   p->wasCreated = False;
218 }
219 
220 
221 // #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
222 // we want to get real system error codes here instead of SZ_ERROR_THREAD
223 #define RINOK_THREAD(x) RINOK(x)
224 
225 
226 // call it before each new file (when new starting is required):
227 MY_NO_INLINE
MtSync_Init(CMtSync * p,UInt32 numBlocks)228 static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
229 {
230   WRes wres;
231   // BUFFER_MUST_BE_UNLOCKED(p)
232   if (!p->needStart || p->csWasEntered)
233     return SZ_ERROR_FAIL;
234   wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);
235   if (wres == 0)
236     wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);
237   return MY_SRes_HRESULT_FROM_WRes(wres);
238 }
239 
240 
MtSync_Create_WRes(CMtSync * p,THREAD_FUNC_TYPE startAddress,void * obj)241 static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
242 {
243   WRes wres;
244 
245   if (p->wasCreated)
246     return SZ_OK;
247 
248   RINOK_THREAD(CriticalSection_Init(&p->cs));
249   p->csWasInitialized = True;
250   p->csWasEntered = False;
251 
252   RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));
253   RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));
254 
255   p->needStart = True;
256   p->exit = True;  /* p->exit is unused before (canStart) Event.
257      But in case of some unexpected code failure we will get fast exit from thread */
258 
259   // return ERROR_TOO_MANY_POSTS; // for debug
260   // return EINVAL; // for debug
261 
262   if (p->affinity != 0)
263     wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
264   else
265     wres = Thread_Create(&p->thread, startAddress, obj);
266 
267   RINOK_THREAD(wres);
268   p->wasCreated = True;
269   return SZ_OK;
270 }
271 
272 
273 MY_NO_INLINE
MtSync_Create(CMtSync * p,THREAD_FUNC_TYPE startAddress,void * obj)274 static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
275 {
276   const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
277   if (wres == 0)
278     return 0;
279   MtSync_Destruct(p);
280   return MY_SRes_HRESULT_FROM_WRes(wres);
281 }
282 
283 
284 // ---------- HASH THREAD ----------
285 
286 #define kMtMaxValForNormalize 0xFFFFFFFF
287 // #define kMtMaxValForNormalize ((1 << 21)) // for debug
288 // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
289 
290 #ifdef MY_CPU_LE_UNALIGN
291   #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
292 #else
293   #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))
294 #endif
295 
296 #define GetHeads_DECL(name) \
297     static void GetHeads ## name(const Byte *p, UInt32 pos, \
298       UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)
299 
300 #define GetHeads_LOOP(v) \
301     for (; numHeads != 0; numHeads--) { \
302       const UInt32 value = (v); \
303       p++; \
304       *heads++ = pos - hash[value]; \
305       hash[value] = pos++; }
306 
307 #define DEF_GetHeads2(name, v, action) \
308     GetHeads_DECL(name) { action \
309     GetHeads_LOOP(v) }
310 
311 #define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
312 
GetUi16(p)313 DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
314 DEF_GetHeads(3,  (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)
315 DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
316 // BT3 is not good for crc collisions for big hashMask values.
317 
318 /*
319 GetHeads_DECL(3b)
320 {
321   UNUSED_VAR(hashMask);
322   UNUSED_VAR(crc);
323   {
324   const Byte *pLim = p + numHeads;
325   if (numHeads == 0)
326     return;
327   pLim--;
328   while (p < pLim)
329   {
330     UInt32 v1 = GetUi32(p);
331     UInt32 v0 = v1 & 0xFFFFFF;
332     UInt32 h0, h1;
333     p += 2;
334     v1 >>= 8;
335     h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;
336     h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;
337     heads += 2;
338   }
339   if (p == pLim)
340   {
341     UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);
342     *heads = pos - hash[v0];
343     hash[v0] = pos;
344   }
345   }
346 }
347 */
348 
349 /*
350 GetHeads_DECL(4)
351 {
352   unsigned sh = 0;
353   UNUSED_VAR(crc)
354   while ((hashMask & 0x80000000) == 0)
355   {
356     hashMask <<= 1;
357     sh++;
358   }
359   GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)
360 }
361 #define GetHeads4b GetHeads4
362 */
363 
364 #define USE_GetHeads_LOCAL_CRC
365 
366 #ifdef USE_GetHeads_LOCAL_CRC
367 
368 GetHeads_DECL(4)
369 {
370   UInt32 crc0[256];
371   UInt32 crc1[256];
372   {
373     unsigned i;
374     for (i = 0; i < 256; i++)
375     {
376       UInt32 v = crc[i];
377       crc0[i] = v & hashMask;
378       crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
379       // crc1[i] = rotlFixed(v, 8) & hashMask;
380     }
381   }
382   GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))
383 }
384 
385 GetHeads_DECL(4b)
386 {
387   UInt32 crc0[256];
388   {
389     unsigned i;
390     for (i = 0; i < 256; i++)
391       crc0[i] = crc[i] & hashMask;
392   }
393   GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))
394 }
395 
396 GetHeads_DECL(5)
397 {
398   UInt32 crc0[256];
399   UInt32 crc1[256];
400   UInt32 crc2[256];
401   {
402     unsigned i;
403     for (i = 0; i < 256; i++)
404     {
405       UInt32 v = crc[i];
406       crc0[i] = v & hashMask;
407       crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
408       crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;
409     }
410   }
411   GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))
412 }
413 
414 GetHeads_DECL(5b)
415 {
416   UInt32 crc0[256];
417   UInt32 crc1[256];
418   {
419     unsigned i;
420     for (i = 0; i < 256; i++)
421     {
422       UInt32 v = crc[i];
423       crc0[i] = v & hashMask;
424       crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
425     }
426   }
427   GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))
428 }
429 
430 #else
431 
432 DEF_GetHeads(4,  (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)
433 DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)
434 DEF_GetHeads(5,  (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)
435 DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)
436 
437 #endif
438 
439 
HashThreadFunc(CMatchFinderMt * mt)440 static void HashThreadFunc(CMatchFinderMt *mt)
441 {
442   CMtSync *p = &mt->hashSync;
443     PRF(printf("\nHashThreadFunc\n"));
444 
445   for (;;)
446   {
447     UInt32 blockIndex = 0;
448       PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));
449     Event_Wait(&p->canStart);
450       PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));
451     if (p->exit)
452     {
453       PRF(printf("\nHashThreadFunc : exit \n"));
454       return;
455     }
456 
457     MatchFinder_Init_HighHash(MF(mt));
458 
459     for (;;)
460     {
461       PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));
462 
463       {
464         CMatchFinder *mf = MF(mt);
465         if (MatchFinder_NeedMove(mf))
466         {
467           CriticalSection_Enter(&mt->btSync.cs);
468           CriticalSection_Enter(&mt->hashSync.cs);
469           {
470             const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);
471             ptrdiff_t offset;
472             MatchFinder_MoveBlock(mf);
473             offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);
474             mt->pointerToCurPos -= offset;
475             mt->buffer -= offset;
476           }
477           CriticalSection_Leave(&mt->hashSync.cs);
478           CriticalSection_Leave(&mt->btSync.cs);
479           continue;
480         }
481 
482         Semaphore_Wait(&p->freeSemaphore);
483 
484         if (p->exit) // exit is unexpected here. But we check it here for some failure case
485           return;
486 
487         // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
488         if (p->stopWriting)
489           break;
490 
491         MatchFinder_ReadIfRequired(mf);
492         {
493           UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);
494           UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);
495           heads[0] = 2;
496           heads[1] = num;
497 
498           /* heads[1] contains the number of avail bytes:
499              if (avail < mf->numHashBytes) :
500              {
501                it means that stream was finished
502                HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.
503                HASH_THREAD doesn't stop,
504                HASH_THREAD fills only the header (2 numbers) for all next blocks:
505                {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}
506              }
507              else
508              {
509                HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;
510              }
511           */
512 
513           if (num >= mf->numHashBytes)
514           {
515             num = num - mf->numHashBytes + 1;
516             if (num > kMtHashBlockSize - 2)
517               num = kMtHashBlockSize - 2;
518 
519             if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
520             {
521               const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
522               Inline_MatchFinder_ReduceOffsets(mf, subValue);
523               MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
524             }
525 
526             heads[0] = 2 + num;
527             mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
528           }
529 
530           mf->pos += num;  // wrap over zero is allowed at the end of stream
531           mf->buffer += num;
532         }
533       }
534 
535       Semaphore_Release1(&p->filledSemaphore);
536     } // for() processing end
537 
538     // p->numBlocks_Sent = blockIndex;
539     Event_Set(&p->wasStopped);
540   } // for() thread end
541 }
542 
543 
544 
545 
546 // ---------- BT THREAD ----------
547 
548 /* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.
549    here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */
550 #define CYC_TO_POS_OFFSET 0
551 // #define CYC_TO_POS_OFFSET 1 // for debug
552 
553 #define MFMT_GM_INLINE
554 
555 #ifdef MFMT_GM_INLINE
556 
557 /*
558   we use size_t for (pos) instead of UInt32
559   to eliminate "movsx" BUG in old MSVC x64 compiler.
560 */
561 
562 
563 UInt32 * MY_FAST_CALL  GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
564     UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
565     size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
566     UInt32 *posRes);
567 
568 #endif
569 
570 
BtGetMatches(CMatchFinderMt * p,UInt32 * d)571 static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
572 {
573   UInt32 numProcessed = 0;
574   UInt32 curPos = 2;
575 
576   /* GetMatchesSpec() functions don't create (len = 1)
577      in [len, dist] match pairs, if (p->numHashBytes >= 2)
578      Also we suppose here that (matchMaxLen >= 2).
579      So the following code for (reserve) is not required
580      UInt32 reserve = (p->matchMaxLen * 2);
581      const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX
582      if (reserve < kNumHashBytes_Max - 1)
583         reserve = kNumHashBytes_Max - 1;
584      const UInt32 limit = kMtBtBlockSize - (reserve);
585   */
586 
587   const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
588 
589   d[1] = p->hashNumAvail;
590 
591   if (p->failure_BT)
592   {
593     // printf("\n == 1 BtGetMatches() p->failure_BT\n");
594     d[0] = 0;
595     // d[1] = 0;
596     return;
597   }
598 
599   while (curPos < limit)
600   {
601     if (p->hashBufPos == p->hashBufPosLimit)
602     {
603       // MatchFinderMt_GetNextBlock_Hash(p);
604       UInt32 avail;
605       {
606         const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);
607         const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);
608         const UInt32 *h = p->hashBuf + k;
609         avail = h[1];
610         p->hashBufPosLimit = k + h[0];
611         p->hashNumAvail = avail;
612         p->hashBufPos = k + 2;
613       }
614 
615       {
616         /* we must prevent UInt32 overflow for avail total value,
617            if avail was increased with new hash block */
618         UInt32 availSum = numProcessed + avail;
619         if (availSum < numProcessed)
620           availSum = (UInt32)(Int32)-1;
621         d[1] = availSum;
622       }
623 
624       if (avail >= p->numHashBytes)
625         continue;
626 
627       // if (p->hashBufPos != p->hashBufPosLimit) exit(1);
628 
629       /* (avail < p->numHashBytes)
630          It means that stream was finished.
631          And (avail) - is a number of remaining bytes,
632          we fill (d) for (avail) bytes for LZ_THREAD (receiver).
633          but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */
634 
635       /* here we suppose that we have space enough:
636          (kMtBtBlockSize - curPos >= p->hashNumAvail) */
637       p->hashNumAvail = 0;
638       d[0] = curPos + avail;
639       d += curPos;
640       for (; avail != 0; avail--)
641         *d++ = 0;
642       return;
643     }
644     {
645       UInt32 size = p->hashBufPosLimit - p->hashBufPos;
646       UInt32 pos = p->pos;
647       UInt32 cyclicBufferPos = p->cyclicBufferPos;
648       UInt32 lenLimit = p->matchMaxLen;
649       if (lenLimit >= p->hashNumAvail)
650         lenLimit = p->hashNumAvail;
651       {
652         UInt32 size2 = p->hashNumAvail - lenLimit + 1;
653         if (size2 < size)
654           size = size2;
655         size2 = p->cyclicBufferSize - cyclicBufferPos;
656         if (size2 < size)
657           size = size2;
658       }
659 
660       if (pos > (UInt32)kMtMaxValForNormalize - size)
661       {
662         const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);
663         pos -= subValue;
664         p->pos = pos;
665         MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
666       }
667 
668       #ifndef MFMT_GM_INLINE
669       while (curPos < limit && size-- != 0)
670       {
671         UInt32 *startDistances = d + curPos;
672         UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
673             pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
674             startDistances + 1, p->numHashBytes - 1) - startDistances);
675         *startDistances = num - 1;
676         curPos += num;
677         cyclicBufferPos++;
678         pos++;
679         p->buffer++;
680       }
681       #else
682       {
683         UInt32 posRes = pos;
684         const UInt32 *d_end;
685         {
686           d_end = GetMatchesSpecN_2(
687               p->buffer + lenLimit - 1,
688               pos, p->buffer, p->son, p->cutValue, d + curPos,
689               p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
690               d + limit, p->hashBuf + p->hashBufPos + size,
691               cyclicBufferPos, p->cyclicBufferSize,
692               &posRes);
693         }
694         {
695           if (!d_end)
696           {
697             // printf("\n == 2 BtGetMatches() p->failure_BT\n");
698             // internal data failure
699             p->failure_BT = True;
700             d[0] = 0;
701             // d[1] = 0;
702             return;
703           }
704         }
705         curPos = (UInt32)(d_end - d);
706         {
707           const UInt32 processed = posRes - pos;
708           pos = posRes;
709           p->hashBufPos += processed;
710           cyclicBufferPos += processed;
711           p->buffer += processed;
712         }
713       }
714       #endif
715 
716       {
717         const UInt32 processed = pos - p->pos;
718         numProcessed += processed;
719         p->hashNumAvail -= processed;
720         p->pos = pos;
721       }
722       if (cyclicBufferPos == p->cyclicBufferSize)
723         cyclicBufferPos = 0;
724       p->cyclicBufferPos = cyclicBufferPos;
725     }
726   }
727 
728   d[0] = curPos;
729 }
730 
731 
BtFillBlock(CMatchFinderMt * p,UInt32 globalBlockIndex)732 static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
733 {
734   CMtSync *sync = &p->hashSync;
735 
736   BUFFER_MUST_BE_UNLOCKED(sync)
737 
738   if (!sync->needStart)
739   {
740     LOCK_BUFFER(sync)
741   }
742 
743   BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));
744 
745   /* We suppose that we have called GetNextBlock() from start.
746      So buffer is LOCKED */
747 
748   UNLOCK_BUFFER(sync)
749 }
750 
751 
752 MY_NO_INLINE
BtThreadFunc(CMatchFinderMt * mt)753 static void BtThreadFunc(CMatchFinderMt *mt)
754 {
755   CMtSync *p = &mt->btSync;
756   for (;;)
757   {
758     UInt32 blockIndex = 0;
759     Event_Wait(&p->canStart);
760 
761     for (;;)
762     {
763         PRF(printf("  BT thread block = %d  pos = %d\n", (unsigned)blockIndex, mt->pos));
764       /* (p->exit == true) is possible after (p->canStart) at first loop iteration
765          and is unexpected after more Wait(freeSemaphore) iterations */
766       if (p->exit)
767         return;
768 
769       Semaphore_Wait(&p->freeSemaphore);
770 
771       // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
772       if (p->stopWriting)
773         break;
774 
775       BtFillBlock(mt, blockIndex++);
776 
777       Semaphore_Release1(&p->filledSemaphore);
778     }
779 
780     // we stop HASH_THREAD here
781     MtSync_StopWriting(&mt->hashSync);
782 
783     // p->numBlocks_Sent = blockIndex;
784     Event_Set(&p->wasStopped);
785   }
786 }
787 
788 
MatchFinderMt_Construct(CMatchFinderMt * p)789 void MatchFinderMt_Construct(CMatchFinderMt *p)
790 {
791   p->hashBuf = NULL;
792   MtSync_Construct(&p->hashSync);
793   MtSync_Construct(&p->btSync);
794 }
795 
MatchFinderMt_FreeMem(CMatchFinderMt * p,ISzAllocPtr alloc)796 static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
797 {
798   ISzAlloc_Free(alloc, p->hashBuf);
799   p->hashBuf = NULL;
800 }
801 
MatchFinderMt_Destruct(CMatchFinderMt * p,ISzAllocPtr alloc)802 void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
803 {
804   /*
805      HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.
806      So we must be sure that HASH_THREAD will not use CriticalSection(s)
807      after deleting CriticalSection here.
808 
809      we call ReleaseStream(p)
810        that calls StopWriting(btSync)
811          that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.
812      after StopWriting() it's safe to destruct MtSync(s) in any order */
813 
814   MatchFinderMt_ReleaseStream(p);
815 
816   MtSync_Destruct(&p->btSync);
817   MtSync_Destruct(&p->hashSync);
818 
819   LOG_ITER(
820   printf("\nTree %9d * %7d iter = %9d = sum  :  bytes = %9d\n",
821       (UInt32)(g_NumIters_Tree / 1000),
822       (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
823       (UInt32)(g_NumIters_Loop / 1000),
824       (UInt32)(g_NumIters_Bytes / 1000)
825       ));
826 
827   MatchFinderMt_FreeMem(p, alloc);
828 }
829 
830 
831 #define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
832 #define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
833 
834 
HashThreadFunc2(void * p)835 static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p);  return 0; }
BtThreadFunc2(void * p)836 static THREAD_FUNC_DECL BtThreadFunc2(void *p)
837 {
838   Byte allocaDummy[0x180];
839   unsigned i = 0;
840   for (i = 0; i < 16; i++)
841     allocaDummy[i] = (Byte)0;
842   if (allocaDummy[0] == 0)
843     BtThreadFunc((CMatchFinderMt *)p);
844   return 0;
845 }
846 
847 
MatchFinderMt_Create(CMatchFinderMt * p,UInt32 historySize,UInt32 keepAddBufferBefore,UInt32 matchMaxLen,UInt32 keepAddBufferAfter,ISzAllocPtr alloc)848 SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
849     UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
850 {
851   CMatchFinder *mf = MF(p);
852   p->historySize = historySize;
853   if (kMtBtBlockSize <= matchMaxLen * 4)
854     return SZ_ERROR_PARAM;
855   if (!p->hashBuf)
856   {
857     p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));
858     if (!p->hashBuf)
859       return SZ_ERROR_MEM;
860     p->btBuf = p->hashBuf + kHashBufferSize;
861   }
862   keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);
863   keepAddBufferAfter += kMtHashBlockSize;
864   if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
865     return SZ_ERROR_MEM;
866 
867   RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p));
868   RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p));
869   return SZ_OK;
870 }
871 
872 
MatchFinderMt_InitMt(CMatchFinderMt * p)873 SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
874 {
875   RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks));
876   return MtSync_Init(&p->btSync, kMtBtNumBlocks);
877 }
878 
879 
MatchFinderMt_Init(CMatchFinderMt * p)880 static void MatchFinderMt_Init(CMatchFinderMt *p)
881 {
882   CMatchFinder *mf = MF(p);
883 
884   p->btBufPos =
885   p->btBufPosLimit = NULL;
886   p->hashBufPos =
887   p->hashBufPosLimit = 0;
888   p->hashNumAvail = 0; // 21.03
889 
890   p->failure_BT = False;
891 
892   /* Init without data reading. We don't want to read data in this thread */
893   MatchFinder_Init_4(mf);
894 
895   MatchFinder_Init_LowHash(mf);
896 
897   p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
898   p->btNumAvailBytes = 0;
899   p->failure_LZ_BT = False;
900   // p->failure_LZ_LZ = False;
901 
902   p->lzPos =
903       1; // optimal smallest value
904       // 0; // for debug: ignores match to start
905       // kNormalizeAlign; // for debug
906 
907   p->hash = mf->hash;
908   p->fixedHashSize = mf->fixedHashSize;
909   // p->hash4Mask = mf->hash4Mask;
910   p->crc = mf->crc;
911   // memcpy(p->crc, mf->crc, sizeof(mf->crc));
912 
913   p->son = mf->son;
914   p->matchMaxLen = mf->matchMaxLen;
915   p->numHashBytes = mf->numHashBytes;
916 
917   /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */
918   // mf->streamPos = mf->pos = 1; // optimal smallest value
919       // 0; // for debug: ignores match to start
920       // kNormalizeAlign; // for debug
921 
922   /* we must init (p->pos = mf->pos) for BT, because
923      BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */
924   p->pos = mf->pos; // do not change it
925 
926   p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);
927   p->cyclicBufferSize = mf->cyclicBufferSize;
928   p->buffer = mf->buffer;
929   p->cutValue = mf->cutValue;
930   // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.
931 }
932 
933 
934 /* ReleaseStream is required to finish multithreading */
MatchFinderMt_ReleaseStream(CMatchFinderMt * p)935 void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
936 {
937   // Sleep(1); // for debug
938   MtSync_StopWriting(&p->btSync);
939   // Sleep(200); // for debug
940   /* p->MatchFinder->ReleaseStream(); */
941 }
942 
943 
944 MY_NO_INLINE
MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt * p)945 static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
946 {
947   if (p->failure_LZ_BT)
948     p->btBufPos = p->failureBuf;
949   else
950   {
951     const UInt32 bi = MtSync_GetNextBlock(&p->btSync);
952     const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);
953     {
954       const UInt32 numItems = bt[0];
955       p->btBufPosLimit = bt + numItems;
956       p->btNumAvailBytes = bt[1];
957       p->btBufPos = bt + 2;
958       if (numItems < 2 || numItems > kMtBtBlockSize)
959       {
960         p->failureBuf[0] = 0;
961         p->btBufPos = p->failureBuf;
962         p->btBufPosLimit = p->failureBuf + 1;
963         p->failure_LZ_BT = True;
964         // p->btNumAvailBytes = 0;
965         /* we don't want to decrease AvailBytes, that was load before.
966             that can be unxepected for the code that have loaded anopther value before */
967       }
968     }
969 
970     if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)
971     {
972       /* we don't check (lzPos) over exact avail bytes in (btBuf).
973          (fixedHashSize) is small, so normalization is fast */
974       const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
975       p->lzPos -= subValue;
976       MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);
977     }
978   }
979   return p->btNumAvailBytes;
980 }
981 
982 
983 
MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt * p)984 static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
985 {
986   return p->pointerToCurPos;
987 }
988 
989 
990 #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
991 
992 
MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt * p)993 static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
994 {
995   if (p->btBufPos != p->btBufPosLimit)
996     return p->btNumAvailBytes;
997   return MatchFinderMt_GetNextBlock_Bt(p);
998 }
999 
1000 
1001 // #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True;  return d; }
1002 #define CHECK_FAILURE_LZ(_match_, _pos_)
1003 
MixMatches2(CMatchFinderMt * p,UInt32 matchMinPos,UInt32 * d)1004 static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1005 {
1006   UInt32 h2, c2;
1007   UInt32 *hash = p->hash;
1008   const Byte *cur = p->pointerToCurPos;
1009   const UInt32 m = p->lzPos;
1010   MT_HASH2_CALC
1011 
1012   c2 = hash[h2];
1013   hash[h2] = m;
1014 
1015   if (c2 >= matchMinPos)
1016   {
1017     CHECK_FAILURE_LZ(c2, m)
1018     if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1019     {
1020       *d++ = 2;
1021       *d++ = m - c2 - 1;
1022     }
1023   }
1024 
1025   return d;
1026 }
1027 
MixMatches3(CMatchFinderMt * p,UInt32 matchMinPos,UInt32 * d)1028 static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1029 {
1030   UInt32 h2, h3, c2, c3;
1031   UInt32 *hash = p->hash;
1032   const Byte *cur = p->pointerToCurPos;
1033   const UInt32 m = p->lzPos;
1034   MT_HASH3_CALC
1035 
1036   c2 = hash[h2];
1037   c3 = (hash + kFix3HashSize)[h3];
1038 
1039   hash[h2] = m;
1040   (hash + kFix3HashSize)[h3] = m;
1041 
1042   if (c2 >= matchMinPos)
1043   {
1044     CHECK_FAILURE_LZ(c2, m)
1045     if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1046     {
1047       d[1] = m - c2 - 1;
1048       if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1049       {
1050         d[0] = 3;
1051         return d + 2;
1052       }
1053       d[0] = 2;
1054       d += 2;
1055     }
1056   }
1057 
1058   if (c3 >= matchMinPos)
1059   {
1060     CHECK_FAILURE_LZ(c3, m)
1061     if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1062     {
1063       *d++ = 3;
1064       *d++ = m - c3 - 1;
1065     }
1066   }
1067 
1068   return d;
1069 }
1070 
1071 
1072 #define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
1073 
1074 /*
1075 static
1076 UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
1077 {
1078   const UInt32 *bt = p->btBufPos;
1079   const UInt32 len = *bt++;
1080   const UInt32 *btLim = bt + len;
1081   UInt32 matchMinPos;
1082   UInt32 avail = p->btNumAvailBytes - 1;
1083   p->btBufPos = btLim;
1084 
1085   {
1086     p->btNumAvailBytes = avail;
1087 
1088     #define BT_HASH_BYTES_MAX 5
1089 
1090     matchMinPos = p->lzPos;
1091 
1092     if (len != 0)
1093       matchMinPos -= bt[1];
1094     else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)
1095     {
1096       INCREASE_LZ_POS
1097       return d;
1098     }
1099     else
1100     {
1101       const UInt32 hs = p->historySize;
1102       if (matchMinPos > hs)
1103         matchMinPos -= hs;
1104       else
1105         matchMinPos = 1;
1106     }
1107   }
1108 
1109   for (;;)
1110   {
1111 
1112   UInt32 h2, h3, c2, c3;
1113   UInt32 *hash = p->hash;
1114   const Byte *cur = p->pointerToCurPos;
1115   UInt32 m = p->lzPos;
1116   MT_HASH3_CALC
1117 
1118   c2 = hash[h2];
1119   c3 = (hash + kFix3HashSize)[h3];
1120 
1121   hash[h2] = m;
1122   (hash + kFix3HashSize)[h3] = m;
1123 
1124   if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1125   {
1126     d[1] = m - c2 - 1;
1127     if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1128     {
1129       d[0] = 3;
1130       d += 2;
1131       break;
1132     }
1133     // else
1134     {
1135       d[0] = 2;
1136       d += 2;
1137     }
1138   }
1139   if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1140   {
1141     *d++ = 3;
1142     *d++ = m - c3 - 1;
1143   }
1144   break;
1145   }
1146 
1147   if (len != 0)
1148   {
1149     do
1150     {
1151       const UInt32 v0 = bt[0];
1152       const UInt32 v1 = bt[1];
1153       bt += 2;
1154       d[0] = v0;
1155       d[1] = v1;
1156       d += 2;
1157     }
1158     while (bt != btLim);
1159   }
1160   INCREASE_LZ_POS
1161   return d;
1162 }
1163 */
1164 
1165 
MixMatches4(CMatchFinderMt * p,UInt32 matchMinPos,UInt32 * d)1166 static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1167 {
1168   UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
1169   UInt32 *hash = p->hash;
1170   const Byte *cur = p->pointerToCurPos;
1171   const UInt32 m = p->lzPos;
1172   MT_HASH3_CALC
1173   // MT_HASH4_CALC
1174   c2 = hash[h2];
1175   c3 = (hash + kFix3HashSize)[h3];
1176   // c4 = (hash + kFix4HashSize)[h4];
1177 
1178   hash[h2] = m;
1179   (hash + kFix3HashSize)[h3] = m;
1180   // (hash + kFix4HashSize)[h4] = m;
1181 
1182   #define _USE_H2
1183 
1184   #ifdef _USE_H2
1185   if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1186   {
1187     d[1] = m - c2 - 1;
1188     if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1189     {
1190       // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;
1191       // return d + 2;
1192 
1193       if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])
1194       {
1195         d[0] = 4;
1196         return d + 2;
1197       }
1198       d[0] = 3;
1199       d += 2;
1200 
1201       #ifdef _USE_H4
1202       if (c4 >= matchMinPos)
1203         if (
1204           cur[(ptrdiff_t)c4 - (ptrdiff_t)m]     == cur[0] &&
1205           cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
1206           )
1207       {
1208         *d++ = 4;
1209         *d++ = m - c4 - 1;
1210       }
1211       #endif
1212       return d;
1213     }
1214     d[0] = 2;
1215     d += 2;
1216   }
1217   #endif
1218 
1219   if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1220   {
1221     d[1] = m - c3 - 1;
1222     if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])
1223     {
1224       d[0] = 4;
1225       return d + 2;
1226     }
1227     d[0] = 3;
1228     d += 2;
1229   }
1230 
1231   #ifdef _USE_H4
1232   if (c4 >= matchMinPos)
1233     if (
1234       cur[(ptrdiff_t)c4 - (ptrdiff_t)m]     == cur[0] &&
1235       cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
1236       )
1237     {
1238       *d++ = 4;
1239       *d++ = m - c4 - 1;
1240     }
1241   #endif
1242 
1243   return d;
1244 }
1245 
1246 
MatchFinderMt2_GetMatches(CMatchFinderMt * p,UInt32 * d)1247 static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
1248 {
1249   const UInt32 *bt = p->btBufPos;
1250   const UInt32 len = *bt++;
1251   const UInt32 *btLim = bt + len;
1252   p->btBufPos = btLim;
1253   p->btNumAvailBytes--;
1254   INCREASE_LZ_POS
1255   {
1256     while (bt != btLim)
1257     {
1258       const UInt32 v0 = bt[0];
1259       const UInt32 v1 = bt[1];
1260       bt += 2;
1261       d[0] = v0;
1262       d[1] = v1;
1263       d += 2;
1264     }
1265   }
1266   return d;
1267 }
1268 
1269 
1270 
MatchFinderMt_GetMatches(CMatchFinderMt * p,UInt32 * d)1271 static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
1272 {
1273   const UInt32 *bt = p->btBufPos;
1274   UInt32 len = *bt++;
1275   const UInt32 avail = p->btNumAvailBytes - 1;
1276   p->btNumAvailBytes = avail;
1277   p->btBufPos = bt + len;
1278   if (len == 0)
1279   {
1280     #define BT_HASH_BYTES_MAX 5
1281     if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
1282     {
1283       UInt32 m = p->lzPos;
1284       if (m > p->historySize)
1285         m -= p->historySize;
1286       else
1287         m = 1;
1288       d = p->MixMatchesFunc(p, m, d);
1289     }
1290   }
1291   else
1292   {
1293     /*
1294       first match pair from BinTree: (match_len, match_dist),
1295       (match_len >= numHashBytes).
1296       MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
1297     */
1298     d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
1299     // if (d) // check for failure
1300     do
1301     {
1302       const UInt32 v0 = bt[0];
1303       const UInt32 v1 = bt[1];
1304       bt += 2;
1305       d[0] = v0;
1306       d[1] = v1;
1307       d += 2;
1308     }
1309     while (len -= 2);
1310   }
1311   INCREASE_LZ_POS
1312   return d;
1313 }
1314 
1315 #define SKIP_HEADER2_MT  do { GET_NEXT_BLOCK_IF_REQUIRED
1316 #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
1317 #define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
1318 
MatchFinderMt0_Skip(CMatchFinderMt * p,UInt32 num)1319 static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
1320 {
1321   SKIP_HEADER2_MT { p->btNumAvailBytes--;
1322   SKIP_FOOTER_MT
1323 }
1324 
1325 static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
1326 {
1327   SKIP_HEADER_MT(2)
1328       UInt32 h2;
1329       MT_HASH2_CALC
1330       hash[h2] = p->lzPos;
1331   SKIP_FOOTER_MT
1332 }
1333 
1334 static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
1335 {
1336   SKIP_HEADER_MT(3)
1337       UInt32 h2, h3;
1338       MT_HASH3_CALC
1339       (hash + kFix3HashSize)[h3] =
1340       hash[                h2] =
1341         p->lzPos;
1342   SKIP_FOOTER_MT
1343 }
1344 
1345 /*
1346 // MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().
1347 // The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.
1348 
1349 static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
1350 {
1351   SKIP_HEADER_MT(4)
1352       UInt32 h2, h3; // h4
1353       MT_HASH3_CALC
1354       // MT_HASH4_CALC
1355       // (hash + kFix4HashSize)[h4] =
1356       (hash + kFix3HashSize)[h3] =
1357       hash[                h2] =
1358         p->lzPos;
1359   SKIP_FOOTER_MT
1360 }
1361 */
1362 
1363 void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
1364 {
1365   vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
1366   vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
1367   vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
1368   vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
1369 
1370   switch (MF(p)->numHashBytes)
1371   {
1372     case 2:
1373       p->GetHeadsFunc = GetHeads2;
1374       p->MixMatchesFunc = (Mf_Mix_Matches)NULL;
1375       vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
1376       vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
1377       break;
1378     case 3:
1379       p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
1380       p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
1381       vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
1382       break;
1383     case 4:
1384       p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
1385 
1386       // it's fast inline version of GetMatches()
1387       // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
1388 
1389       p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
1390       vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
1391       break;
1392     default:
1393       p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
1394       p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
1395       vTable->Skip =
1396           (Mf_Skip_Func)MatchFinderMt3_Skip;
1397           // (Mf_Skip_Func)MatchFinderMt4_Skip;
1398       break;
1399   }
1400 }
1401