1eb7fbc25SPierre Schweitzer /*
206042735SVincent Franchomme * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3eb7fbc25SPierre Schweitzer * All rights reserved.
4eb7fbc25SPierre Schweitzer *
5eb7fbc25SPierre Schweitzer * This source code is licensed under both the BSD-style license (found in the
6eb7fbc25SPierre Schweitzer * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7eb7fbc25SPierre Schweitzer * in the COPYING file in the root directory of this source tree).
8eb7fbc25SPierre Schweitzer * You may select, at your option, one of the above-listed licenses.
9eb7fbc25SPierre Schweitzer */
10eb7fbc25SPierre Schweitzer
11eb7fbc25SPierre Schweitzer #include "zstd_compress_internal.h"
12eb7fbc25SPierre Schweitzer #include "zstd_lazy.h"
13eb7fbc25SPierre Schweitzer
14eb7fbc25SPierre Schweitzer
15eb7fbc25SPierre Schweitzer /*-*************************************
16eb7fbc25SPierre Schweitzer * Binary Tree search
17eb7fbc25SPierre Schweitzer ***************************************/
18eb7fbc25SPierre Schweitzer
19eb7fbc25SPierre Schweitzer static void
ZSTD_updateDUBT(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * iend,U32 mls)20eb7fbc25SPierre Schweitzer ZSTD_updateDUBT(ZSTD_matchState_t* ms,
21eb7fbc25SPierre Schweitzer const BYTE* ip, const BYTE* iend,
22eb7fbc25SPierre Schweitzer U32 mls)
23eb7fbc25SPierre Schweitzer {
24eb7fbc25SPierre Schweitzer const ZSTD_compressionParameters* const cParams = &ms->cParams;
25eb7fbc25SPierre Schweitzer U32* const hashTable = ms->hashTable;
26eb7fbc25SPierre Schweitzer U32 const hashLog = cParams->hashLog;
27eb7fbc25SPierre Schweitzer
28eb7fbc25SPierre Schweitzer U32* const bt = ms->chainTable;
29eb7fbc25SPierre Schweitzer U32 const btLog = cParams->chainLog - 1;
30eb7fbc25SPierre Schweitzer U32 const btMask = (1 << btLog) - 1;
31eb7fbc25SPierre Schweitzer
32eb7fbc25SPierre Schweitzer const BYTE* const base = ms->window.base;
33eb7fbc25SPierre Schweitzer U32 const target = (U32)(ip - base);
34eb7fbc25SPierre Schweitzer U32 idx = ms->nextToUpdate;
35eb7fbc25SPierre Schweitzer
36eb7fbc25SPierre Schweitzer if (idx != target)
37eb7fbc25SPierre Schweitzer DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
38eb7fbc25SPierre Schweitzer idx, target, ms->window.dictLimit);
39eb7fbc25SPierre Schweitzer assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
40eb7fbc25SPierre Schweitzer (void)iend;
41eb7fbc25SPierre Schweitzer
42eb7fbc25SPierre Schweitzer assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */
43eb7fbc25SPierre Schweitzer for ( ; idx < target ; idx++) {
44eb7fbc25SPierre Schweitzer size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
45eb7fbc25SPierre Schweitzer U32 const matchIndex = hashTable[h];
46eb7fbc25SPierre Schweitzer
47eb7fbc25SPierre Schweitzer U32* const nextCandidatePtr = bt + 2*(idx&btMask);
48eb7fbc25SPierre Schweitzer U32* const sortMarkPtr = nextCandidatePtr + 1;
49eb7fbc25SPierre Schweitzer
50eb7fbc25SPierre Schweitzer DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
51eb7fbc25SPierre Schweitzer hashTable[h] = idx; /* Update Hash Table */
52eb7fbc25SPierre Schweitzer *nextCandidatePtr = matchIndex; /* update BT like a chain */
53eb7fbc25SPierre Schweitzer *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
54eb7fbc25SPierre Schweitzer }
55eb7fbc25SPierre Schweitzer ms->nextToUpdate = target;
56eb7fbc25SPierre Schweitzer }
57eb7fbc25SPierre Schweitzer
58eb7fbc25SPierre Schweitzer
59eb7fbc25SPierre Schweitzer /** ZSTD_insertDUBT1() :
60eb7fbc25SPierre Schweitzer * sort one already inserted but unsorted position
61eb7fbc25SPierre Schweitzer * assumption : current >= btlow == (current - btmask)
62eb7fbc25SPierre Schweitzer * doesn't fail */
63eb7fbc25SPierre Schweitzer static void
ZSTD_insertDUBT1(ZSTD_matchState_t * ms,U32 current,const BYTE * inputEnd,U32 nbCompares,U32 btLow,const ZSTD_dictMode_e dictMode)64eb7fbc25SPierre Schweitzer ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
65eb7fbc25SPierre Schweitzer U32 current, const BYTE* inputEnd,
6606042735SVincent Franchomme U32 nbCompares, U32 btLow,
6706042735SVincent Franchomme const ZSTD_dictMode_e dictMode)
68eb7fbc25SPierre Schweitzer {
69eb7fbc25SPierre Schweitzer const ZSTD_compressionParameters* const cParams = &ms->cParams;
70eb7fbc25SPierre Schweitzer U32* const bt = ms->chainTable;
71eb7fbc25SPierre Schweitzer U32 const btLog = cParams->chainLog - 1;
72eb7fbc25SPierre Schweitzer U32 const btMask = (1 << btLog) - 1;
73eb7fbc25SPierre Schweitzer size_t commonLengthSmaller=0, commonLengthLarger=0;
74eb7fbc25SPierre Schweitzer const BYTE* const base = ms->window.base;
75eb7fbc25SPierre Schweitzer const BYTE* const dictBase = ms->window.dictBase;
76eb7fbc25SPierre Schweitzer const U32 dictLimit = ms->window.dictLimit;
77eb7fbc25SPierre Schweitzer const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
78eb7fbc25SPierre Schweitzer const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
79eb7fbc25SPierre Schweitzer const BYTE* const dictEnd = dictBase + dictLimit;
80eb7fbc25SPierre Schweitzer const BYTE* const prefixStart = base + dictLimit;
81eb7fbc25SPierre Schweitzer const BYTE* match;
82eb7fbc25SPierre Schweitzer U32* smallerPtr = bt + 2*(current&btMask);
83eb7fbc25SPierre Schweitzer U32* largerPtr = smallerPtr + 1;
8406042735SVincent Franchomme U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85eb7fbc25SPierre Schweitzer U32 dummy32; /* to be nullified at the end */
8606042735SVincent Franchomme U32 const windowValid = ms->window.lowLimit;
8706042735SVincent Franchomme U32 const maxDistance = 1U << cParams->windowLog;
8806042735SVincent Franchomme U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
8906042735SVincent Franchomme
90eb7fbc25SPierre Schweitzer
91eb7fbc25SPierre Schweitzer DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
92eb7fbc25SPierre Schweitzer current, dictLimit, windowLow);
93eb7fbc25SPierre Schweitzer assert(current >= btLow);
94eb7fbc25SPierre Schweitzer assert(ip < iend); /* condition for ZSTD_count */
95eb7fbc25SPierre Schweitzer
96eb7fbc25SPierre Schweitzer while (nbCompares-- && (matchIndex > windowLow)) {
97eb7fbc25SPierre Schweitzer U32* const nextPtr = bt + 2*(matchIndex & btMask);
98eb7fbc25SPierre Schweitzer size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
99eb7fbc25SPierre Schweitzer assert(matchIndex < current);
10006042735SVincent Franchomme /* note : all candidates are now supposed sorted,
10106042735SVincent Franchomme * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
10206042735SVincent Franchomme * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
103eb7fbc25SPierre Schweitzer
104eb7fbc25SPierre Schweitzer if ( (dictMode != ZSTD_extDict)
105eb7fbc25SPierre Schweitzer || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
106eb7fbc25SPierre Schweitzer || (current < dictLimit) /* both in extDict */) {
107eb7fbc25SPierre Schweitzer const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
108eb7fbc25SPierre Schweitzer || (matchIndex+matchLength >= dictLimit)) ?
109eb7fbc25SPierre Schweitzer base : dictBase;
110eb7fbc25SPierre Schweitzer assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
111eb7fbc25SPierre Schweitzer || (current < dictLimit) );
112eb7fbc25SPierre Schweitzer match = mBase + matchIndex;
113eb7fbc25SPierre Schweitzer matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
114eb7fbc25SPierre Schweitzer } else {
115eb7fbc25SPierre Schweitzer match = dictBase + matchIndex;
116eb7fbc25SPierre Schweitzer matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
117eb7fbc25SPierre Schweitzer if (matchIndex+matchLength >= dictLimit)
11806042735SVincent Franchomme match = base + matchIndex; /* preparation for next read of match[matchLength] */
119eb7fbc25SPierre Schweitzer }
120eb7fbc25SPierre Schweitzer
121eb7fbc25SPierre Schweitzer DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
122eb7fbc25SPierre Schweitzer current, matchIndex, (U32)matchLength);
123eb7fbc25SPierre Schweitzer
124eb7fbc25SPierre Schweitzer if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
125eb7fbc25SPierre Schweitzer break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
126eb7fbc25SPierre Schweitzer }
127eb7fbc25SPierre Schweitzer
128eb7fbc25SPierre Schweitzer if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
129eb7fbc25SPierre Schweitzer /* match is smaller than current */
130eb7fbc25SPierre Schweitzer *smallerPtr = matchIndex; /* update smaller idx */
131eb7fbc25SPierre Schweitzer commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
132eb7fbc25SPierre Schweitzer if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
133eb7fbc25SPierre Schweitzer DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
134eb7fbc25SPierre Schweitzer matchIndex, btLow, nextPtr[1]);
135eb7fbc25SPierre Schweitzer smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
136eb7fbc25SPierre Schweitzer matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
137eb7fbc25SPierre Schweitzer } else {
138eb7fbc25SPierre Schweitzer /* match is larger than current */
139eb7fbc25SPierre Schweitzer *largerPtr = matchIndex;
140eb7fbc25SPierre Schweitzer commonLengthLarger = matchLength;
141eb7fbc25SPierre Schweitzer if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
142eb7fbc25SPierre Schweitzer DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
143eb7fbc25SPierre Schweitzer matchIndex, btLow, nextPtr[0]);
144eb7fbc25SPierre Schweitzer largerPtr = nextPtr;
145eb7fbc25SPierre Schweitzer matchIndex = nextPtr[0];
146eb7fbc25SPierre Schweitzer } }
147eb7fbc25SPierre Schweitzer
148eb7fbc25SPierre Schweitzer *smallerPtr = *largerPtr = 0;
149eb7fbc25SPierre Schweitzer }
150eb7fbc25SPierre Schweitzer
151eb7fbc25SPierre Schweitzer
152eb7fbc25SPierre Schweitzer static size_t
ZSTD_DUBT_findBetterDictMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,size_t * offsetPtr,size_t bestLength,U32 nbCompares,U32 const mls,const ZSTD_dictMode_e dictMode)153eb7fbc25SPierre Schweitzer ZSTD_DUBT_findBetterDictMatch (
154eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms,
155eb7fbc25SPierre Schweitzer const BYTE* const ip, const BYTE* const iend,
156eb7fbc25SPierre Schweitzer size_t* offsetPtr,
157eb7fbc25SPierre Schweitzer size_t bestLength,
158eb7fbc25SPierre Schweitzer U32 nbCompares,
159eb7fbc25SPierre Schweitzer U32 const mls,
160eb7fbc25SPierre Schweitzer const ZSTD_dictMode_e dictMode)
161eb7fbc25SPierre Schweitzer {
162eb7fbc25SPierre Schweitzer const ZSTD_matchState_t * const dms = ms->dictMatchState;
163eb7fbc25SPierre Schweitzer const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
164eb7fbc25SPierre Schweitzer const U32 * const dictHashTable = dms->hashTable;
165eb7fbc25SPierre Schweitzer U32 const hashLog = dmsCParams->hashLog;
166eb7fbc25SPierre Schweitzer size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
167eb7fbc25SPierre Schweitzer U32 dictMatchIndex = dictHashTable[h];
168eb7fbc25SPierre Schweitzer
169eb7fbc25SPierre Schweitzer const BYTE* const base = ms->window.base;
170eb7fbc25SPierre Schweitzer const BYTE* const prefixStart = base + ms->window.dictLimit;
171eb7fbc25SPierre Schweitzer U32 const current = (U32)(ip-base);
172eb7fbc25SPierre Schweitzer const BYTE* const dictBase = dms->window.base;
173eb7fbc25SPierre Schweitzer const BYTE* const dictEnd = dms->window.nextSrc;
174eb7fbc25SPierre Schweitzer U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
175eb7fbc25SPierre Schweitzer U32 const dictLowLimit = dms->window.lowLimit;
176eb7fbc25SPierre Schweitzer U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
177eb7fbc25SPierre Schweitzer
178eb7fbc25SPierre Schweitzer U32* const dictBt = dms->chainTable;
179eb7fbc25SPierre Schweitzer U32 const btLog = dmsCParams->chainLog - 1;
180eb7fbc25SPierre Schweitzer U32 const btMask = (1 << btLog) - 1;
181eb7fbc25SPierre Schweitzer U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
182eb7fbc25SPierre Schweitzer
183eb7fbc25SPierre Schweitzer size_t commonLengthSmaller=0, commonLengthLarger=0;
184eb7fbc25SPierre Schweitzer
185eb7fbc25SPierre Schweitzer (void)dictMode;
186eb7fbc25SPierre Schweitzer assert(dictMode == ZSTD_dictMatchState);
187eb7fbc25SPierre Schweitzer
188eb7fbc25SPierre Schweitzer while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
189eb7fbc25SPierre Schweitzer U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
190eb7fbc25SPierre Schweitzer size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
191eb7fbc25SPierre Schweitzer const BYTE* match = dictBase + dictMatchIndex;
192eb7fbc25SPierre Schweitzer matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
193eb7fbc25SPierre Schweitzer if (dictMatchIndex+matchLength >= dictHighLimit)
194eb7fbc25SPierre Schweitzer match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
195eb7fbc25SPierre Schweitzer
196eb7fbc25SPierre Schweitzer if (matchLength > bestLength) {
197eb7fbc25SPierre Schweitzer U32 matchIndex = dictMatchIndex + dictIndexDelta;
198eb7fbc25SPierre Schweitzer if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
199eb7fbc25SPierre Schweitzer DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
200eb7fbc25SPierre Schweitzer current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
201eb7fbc25SPierre Schweitzer bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
202eb7fbc25SPierre Schweitzer }
203eb7fbc25SPierre Schweitzer if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
204eb7fbc25SPierre Schweitzer break; /* drop, to guarantee consistency (miss a little bit of compression) */
205eb7fbc25SPierre Schweitzer }
206eb7fbc25SPierre Schweitzer }
207eb7fbc25SPierre Schweitzer
208eb7fbc25SPierre Schweitzer if (match[matchLength] < ip[matchLength]) {
209eb7fbc25SPierre Schweitzer if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
210eb7fbc25SPierre Schweitzer commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
211eb7fbc25SPierre Schweitzer dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
212eb7fbc25SPierre Schweitzer } else {
213eb7fbc25SPierre Schweitzer /* match is larger than current */
214eb7fbc25SPierre Schweitzer if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
215eb7fbc25SPierre Schweitzer commonLengthLarger = matchLength;
216eb7fbc25SPierre Schweitzer dictMatchIndex = nextPtr[0];
217eb7fbc25SPierre Schweitzer }
218eb7fbc25SPierre Schweitzer }
219eb7fbc25SPierre Schweitzer
220eb7fbc25SPierre Schweitzer if (bestLength >= MINMATCH) {
221eb7fbc25SPierre Schweitzer U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
222eb7fbc25SPierre Schweitzer DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
223eb7fbc25SPierre Schweitzer current, (U32)bestLength, (U32)*offsetPtr, mIndex);
224eb7fbc25SPierre Schweitzer }
225eb7fbc25SPierre Schweitzer return bestLength;
226eb7fbc25SPierre Schweitzer
227eb7fbc25SPierre Schweitzer }
228eb7fbc25SPierre Schweitzer
229eb7fbc25SPierre Schweitzer
230eb7fbc25SPierre Schweitzer static size_t
ZSTD_DUBT_findBestMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,size_t * offsetPtr,U32 const mls,const ZSTD_dictMode_e dictMode)231eb7fbc25SPierre Schweitzer ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
232eb7fbc25SPierre Schweitzer const BYTE* const ip, const BYTE* const iend,
233eb7fbc25SPierre Schweitzer size_t* offsetPtr,
234eb7fbc25SPierre Schweitzer U32 const mls,
235eb7fbc25SPierre Schweitzer const ZSTD_dictMode_e dictMode)
236eb7fbc25SPierre Schweitzer {
237eb7fbc25SPierre Schweitzer const ZSTD_compressionParameters* const cParams = &ms->cParams;
238eb7fbc25SPierre Schweitzer U32* const hashTable = ms->hashTable;
239eb7fbc25SPierre Schweitzer U32 const hashLog = cParams->hashLog;
240eb7fbc25SPierre Schweitzer size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
241eb7fbc25SPierre Schweitzer U32 matchIndex = hashTable[h];
242eb7fbc25SPierre Schweitzer
243eb7fbc25SPierre Schweitzer const BYTE* const base = ms->window.base;
244eb7fbc25SPierre Schweitzer U32 const current = (U32)(ip-base);
24506042735SVincent Franchomme U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
246eb7fbc25SPierre Schweitzer
247eb7fbc25SPierre Schweitzer U32* const bt = ms->chainTable;
248eb7fbc25SPierre Schweitzer U32 const btLog = cParams->chainLog - 1;
249eb7fbc25SPierre Schweitzer U32 const btMask = (1 << btLog) - 1;
250eb7fbc25SPierre Schweitzer U32 const btLow = (btMask >= current) ? 0 : current - btMask;
251eb7fbc25SPierre Schweitzer U32 const unsortLimit = MAX(btLow, windowLow);
252eb7fbc25SPierre Schweitzer
253eb7fbc25SPierre Schweitzer U32* nextCandidate = bt + 2*(matchIndex&btMask);
254eb7fbc25SPierre Schweitzer U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
255eb7fbc25SPierre Schweitzer U32 nbCompares = 1U << cParams->searchLog;
256eb7fbc25SPierre Schweitzer U32 nbCandidates = nbCompares;
257eb7fbc25SPierre Schweitzer U32 previousCandidate = 0;
258eb7fbc25SPierre Schweitzer
259eb7fbc25SPierre Schweitzer DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
260eb7fbc25SPierre Schweitzer assert(ip <= iend-8); /* required for h calculation */
261eb7fbc25SPierre Schweitzer
262eb7fbc25SPierre Schweitzer /* reach end of unsorted candidates list */
263eb7fbc25SPierre Schweitzer while ( (matchIndex > unsortLimit)
264eb7fbc25SPierre Schweitzer && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
265eb7fbc25SPierre Schweitzer && (nbCandidates > 1) ) {
266eb7fbc25SPierre Schweitzer DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
267eb7fbc25SPierre Schweitzer matchIndex);
26806042735SVincent Franchomme *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
269eb7fbc25SPierre Schweitzer previousCandidate = matchIndex;
270eb7fbc25SPierre Schweitzer matchIndex = *nextCandidate;
271eb7fbc25SPierre Schweitzer nextCandidate = bt + 2*(matchIndex&btMask);
272eb7fbc25SPierre Schweitzer unsortedMark = bt + 2*(matchIndex&btMask) + 1;
273eb7fbc25SPierre Schweitzer nbCandidates --;
274eb7fbc25SPierre Schweitzer }
275eb7fbc25SPierre Schweitzer
27606042735SVincent Franchomme /* nullify last candidate if it's still unsorted
27706042735SVincent Franchomme * simplification, detrimental to compression ratio, beneficial for speed */
278eb7fbc25SPierre Schweitzer if ( (matchIndex > unsortLimit)
279eb7fbc25SPierre Schweitzer && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
280eb7fbc25SPierre Schweitzer DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
281eb7fbc25SPierre Schweitzer matchIndex);
28206042735SVincent Franchomme *nextCandidate = *unsortedMark = 0;
283eb7fbc25SPierre Schweitzer }
284eb7fbc25SPierre Schweitzer
285eb7fbc25SPierre Schweitzer /* batch sort stacked candidates */
286eb7fbc25SPierre Schweitzer matchIndex = previousCandidate;
287eb7fbc25SPierre Schweitzer while (matchIndex) { /* will end on matchIndex == 0 */
288eb7fbc25SPierre Schweitzer U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
289eb7fbc25SPierre Schweitzer U32 const nextCandidateIdx = *nextCandidateIdxPtr;
290eb7fbc25SPierre Schweitzer ZSTD_insertDUBT1(ms, matchIndex, iend,
291eb7fbc25SPierre Schweitzer nbCandidates, unsortLimit, dictMode);
292eb7fbc25SPierre Schweitzer matchIndex = nextCandidateIdx;
293eb7fbc25SPierre Schweitzer nbCandidates++;
294eb7fbc25SPierre Schweitzer }
295eb7fbc25SPierre Schweitzer
296eb7fbc25SPierre Schweitzer /* find longest match */
297eb7fbc25SPierre Schweitzer { size_t commonLengthSmaller = 0, commonLengthLarger = 0;
298eb7fbc25SPierre Schweitzer const BYTE* const dictBase = ms->window.dictBase;
299eb7fbc25SPierre Schweitzer const U32 dictLimit = ms->window.dictLimit;
300eb7fbc25SPierre Schweitzer const BYTE* const dictEnd = dictBase + dictLimit;
301eb7fbc25SPierre Schweitzer const BYTE* const prefixStart = base + dictLimit;
302eb7fbc25SPierre Schweitzer U32* smallerPtr = bt + 2*(current&btMask);
303eb7fbc25SPierre Schweitzer U32* largerPtr = bt + 2*(current&btMask) + 1;
304eb7fbc25SPierre Schweitzer U32 matchEndIdx = current + 8 + 1;
305eb7fbc25SPierre Schweitzer U32 dummy32; /* to be nullified at the end */
306eb7fbc25SPierre Schweitzer size_t bestLength = 0;
307eb7fbc25SPierre Schweitzer
308eb7fbc25SPierre Schweitzer matchIndex = hashTable[h];
309eb7fbc25SPierre Schweitzer hashTable[h] = current; /* Update Hash Table */
310eb7fbc25SPierre Schweitzer
311eb7fbc25SPierre Schweitzer while (nbCompares-- && (matchIndex > windowLow)) {
312eb7fbc25SPierre Schweitzer U32* const nextPtr = bt + 2*(matchIndex & btMask);
313eb7fbc25SPierre Schweitzer size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
314eb7fbc25SPierre Schweitzer const BYTE* match;
315eb7fbc25SPierre Schweitzer
316eb7fbc25SPierre Schweitzer if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
317eb7fbc25SPierre Schweitzer match = base + matchIndex;
318eb7fbc25SPierre Schweitzer matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
319eb7fbc25SPierre Schweitzer } else {
320eb7fbc25SPierre Schweitzer match = dictBase + matchIndex;
321eb7fbc25SPierre Schweitzer matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
322eb7fbc25SPierre Schweitzer if (matchIndex+matchLength >= dictLimit)
323eb7fbc25SPierre Schweitzer match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
324eb7fbc25SPierre Schweitzer }
325eb7fbc25SPierre Schweitzer
326eb7fbc25SPierre Schweitzer if (matchLength > bestLength) {
327eb7fbc25SPierre Schweitzer if (matchLength > matchEndIdx - matchIndex)
328eb7fbc25SPierre Schweitzer matchEndIdx = matchIndex + (U32)matchLength;
329eb7fbc25SPierre Schweitzer if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
330eb7fbc25SPierre Schweitzer bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
331eb7fbc25SPierre Schweitzer if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
332eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_dictMatchState) {
333eb7fbc25SPierre Schweitzer nbCompares = 0; /* in addition to avoiding checking any
334eb7fbc25SPierre Schweitzer * further in this loop, make sure we
335eb7fbc25SPierre Schweitzer * skip checking in the dictionary. */
336eb7fbc25SPierre Schweitzer }
337eb7fbc25SPierre Schweitzer break; /* drop, to guarantee consistency (miss a little bit of compression) */
338eb7fbc25SPierre Schweitzer }
339eb7fbc25SPierre Schweitzer }
340eb7fbc25SPierre Schweitzer
341eb7fbc25SPierre Schweitzer if (match[matchLength] < ip[matchLength]) {
342eb7fbc25SPierre Schweitzer /* match is smaller than current */
343eb7fbc25SPierre Schweitzer *smallerPtr = matchIndex; /* update smaller idx */
344eb7fbc25SPierre Schweitzer commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
345eb7fbc25SPierre Schweitzer if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
346eb7fbc25SPierre Schweitzer smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
347eb7fbc25SPierre Schweitzer matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
348eb7fbc25SPierre Schweitzer } else {
349eb7fbc25SPierre Schweitzer /* match is larger than current */
350eb7fbc25SPierre Schweitzer *largerPtr = matchIndex;
351eb7fbc25SPierre Schweitzer commonLengthLarger = matchLength;
352eb7fbc25SPierre Schweitzer if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
353eb7fbc25SPierre Schweitzer largerPtr = nextPtr;
354eb7fbc25SPierre Schweitzer matchIndex = nextPtr[0];
355eb7fbc25SPierre Schweitzer } }
356eb7fbc25SPierre Schweitzer
357eb7fbc25SPierre Schweitzer *smallerPtr = *largerPtr = 0;
358eb7fbc25SPierre Schweitzer
359eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_dictMatchState && nbCompares) {
360eb7fbc25SPierre Schweitzer bestLength = ZSTD_DUBT_findBetterDictMatch(
361eb7fbc25SPierre Schweitzer ms, ip, iend,
362eb7fbc25SPierre Schweitzer offsetPtr, bestLength, nbCompares,
363eb7fbc25SPierre Schweitzer mls, dictMode);
364eb7fbc25SPierre Schweitzer }
365eb7fbc25SPierre Schweitzer
366eb7fbc25SPierre Schweitzer assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
367eb7fbc25SPierre Schweitzer ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
368eb7fbc25SPierre Schweitzer if (bestLength >= MINMATCH) {
369eb7fbc25SPierre Schweitzer U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
370eb7fbc25SPierre Schweitzer DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
371eb7fbc25SPierre Schweitzer current, (U32)bestLength, (U32)*offsetPtr, mIndex);
372eb7fbc25SPierre Schweitzer }
373eb7fbc25SPierre Schweitzer return bestLength;
374eb7fbc25SPierre Schweitzer }
375eb7fbc25SPierre Schweitzer }
376eb7fbc25SPierre Schweitzer
377eb7fbc25SPierre Schweitzer
378eb7fbc25SPierre Schweitzer /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
379eb7fbc25SPierre Schweitzer FORCE_INLINE_TEMPLATE size_t
ZSTD_BtFindBestMatch(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 mls,const ZSTD_dictMode_e dictMode)380eb7fbc25SPierre Schweitzer ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
381eb7fbc25SPierre Schweitzer const BYTE* const ip, const BYTE* const iLimit,
382eb7fbc25SPierre Schweitzer size_t* offsetPtr,
383eb7fbc25SPierre Schweitzer const U32 mls /* template */,
384eb7fbc25SPierre Schweitzer const ZSTD_dictMode_e dictMode)
385eb7fbc25SPierre Schweitzer {
386eb7fbc25SPierre Schweitzer DEBUGLOG(7, "ZSTD_BtFindBestMatch");
387eb7fbc25SPierre Schweitzer if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
388eb7fbc25SPierre Schweitzer ZSTD_updateDUBT(ms, ip, iLimit, mls);
389eb7fbc25SPierre Schweitzer return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
390eb7fbc25SPierre Schweitzer }
391eb7fbc25SPierre Schweitzer
392eb7fbc25SPierre Schweitzer
393eb7fbc25SPierre Schweitzer static size_t
ZSTD_BtFindBestMatch_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)394eb7fbc25SPierre Schweitzer ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
395eb7fbc25SPierre Schweitzer const BYTE* ip, const BYTE* const iLimit,
396eb7fbc25SPierre Schweitzer size_t* offsetPtr)
397eb7fbc25SPierre Schweitzer {
39806042735SVincent Franchomme switch(ms->cParams.minMatch)
399eb7fbc25SPierre Schweitzer {
400eb7fbc25SPierre Schweitzer default : /* includes case 3 */
401eb7fbc25SPierre Schweitzer case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
402eb7fbc25SPierre Schweitzer case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
403eb7fbc25SPierre Schweitzer case 7 :
404eb7fbc25SPierre Schweitzer case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
405eb7fbc25SPierre Schweitzer }
406eb7fbc25SPierre Schweitzer }
407eb7fbc25SPierre Schweitzer
408eb7fbc25SPierre Schweitzer
ZSTD_BtFindBestMatch_dictMatchState_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)409eb7fbc25SPierre Schweitzer static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
410eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms,
411eb7fbc25SPierre Schweitzer const BYTE* ip, const BYTE* const iLimit,
412eb7fbc25SPierre Schweitzer size_t* offsetPtr)
413eb7fbc25SPierre Schweitzer {
41406042735SVincent Franchomme switch(ms->cParams.minMatch)
415eb7fbc25SPierre Schweitzer {
416eb7fbc25SPierre Schweitzer default : /* includes case 3 */
417eb7fbc25SPierre Schweitzer case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
418eb7fbc25SPierre Schweitzer case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
419eb7fbc25SPierre Schweitzer case 7 :
420eb7fbc25SPierre Schweitzer case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
421eb7fbc25SPierre Schweitzer }
422eb7fbc25SPierre Schweitzer }
423eb7fbc25SPierre Schweitzer
424eb7fbc25SPierre Schweitzer
ZSTD_BtFindBestMatch_extDict_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)425eb7fbc25SPierre Schweitzer static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
426eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms,
427eb7fbc25SPierre Schweitzer const BYTE* ip, const BYTE* const iLimit,
428eb7fbc25SPierre Schweitzer size_t* offsetPtr)
429eb7fbc25SPierre Schweitzer {
43006042735SVincent Franchomme switch(ms->cParams.minMatch)
431eb7fbc25SPierre Schweitzer {
432eb7fbc25SPierre Schweitzer default : /* includes case 3 */
433eb7fbc25SPierre Schweitzer case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
434eb7fbc25SPierre Schweitzer case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
435eb7fbc25SPierre Schweitzer case 7 :
436eb7fbc25SPierre Schweitzer case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
437eb7fbc25SPierre Schweitzer }
438eb7fbc25SPierre Schweitzer }
439eb7fbc25SPierre Schweitzer
440eb7fbc25SPierre Schweitzer
441eb7fbc25SPierre Schweitzer
442eb7fbc25SPierre Schweitzer /* *********************************
443eb7fbc25SPierre Schweitzer * Hash Chain
444eb7fbc25SPierre Schweitzer ***********************************/
44506042735SVincent Franchomme #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
446eb7fbc25SPierre Schweitzer
447eb7fbc25SPierre Schweitzer /* Update chains up to ip (excluded)
448eb7fbc25SPierre Schweitzer Assumption : always within prefix (i.e. not within extDict) */
ZSTD_insertAndFindFirstIndex_internal(ZSTD_matchState_t * ms,const ZSTD_compressionParameters * const cParams,const BYTE * ip,U32 const mls)449eb7fbc25SPierre Schweitzer static U32 ZSTD_insertAndFindFirstIndex_internal(
450eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms,
451eb7fbc25SPierre Schweitzer const ZSTD_compressionParameters* const cParams,
452eb7fbc25SPierre Schweitzer const BYTE* ip, U32 const mls)
453eb7fbc25SPierre Schweitzer {
454eb7fbc25SPierre Schweitzer U32* const hashTable = ms->hashTable;
455eb7fbc25SPierre Schweitzer const U32 hashLog = cParams->hashLog;
456eb7fbc25SPierre Schweitzer U32* const chainTable = ms->chainTable;
457eb7fbc25SPierre Schweitzer const U32 chainMask = (1 << cParams->chainLog) - 1;
458eb7fbc25SPierre Schweitzer const BYTE* const base = ms->window.base;
459eb7fbc25SPierre Schweitzer const U32 target = (U32)(ip - base);
460eb7fbc25SPierre Schweitzer U32 idx = ms->nextToUpdate;
461eb7fbc25SPierre Schweitzer
462eb7fbc25SPierre Schweitzer while(idx < target) { /* catch up */
463eb7fbc25SPierre Schweitzer size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
464eb7fbc25SPierre Schweitzer NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
465eb7fbc25SPierre Schweitzer hashTable[h] = idx;
466eb7fbc25SPierre Schweitzer idx++;
467eb7fbc25SPierre Schweitzer }
468eb7fbc25SPierre Schweitzer
469eb7fbc25SPierre Schweitzer ms->nextToUpdate = target;
470eb7fbc25SPierre Schweitzer return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
471eb7fbc25SPierre Schweitzer }
472eb7fbc25SPierre Schweitzer
ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t * ms,const BYTE * ip)473eb7fbc25SPierre Schweitzer U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
474eb7fbc25SPierre Schweitzer const ZSTD_compressionParameters* const cParams = &ms->cParams;
47506042735SVincent Franchomme return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
476eb7fbc25SPierre Schweitzer }
477eb7fbc25SPierre Schweitzer
478eb7fbc25SPierre Schweitzer
479eb7fbc25SPierre Schweitzer /* inlining is important to hardwire a hot branch (template emulation) */
480eb7fbc25SPierre Schweitzer FORCE_INLINE_TEMPLATE
ZSTD_HcFindBestMatch_generic(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 mls,const ZSTD_dictMode_e dictMode)481eb7fbc25SPierre Schweitzer size_t ZSTD_HcFindBestMatch_generic (
482eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms,
483eb7fbc25SPierre Schweitzer const BYTE* const ip, const BYTE* const iLimit,
484eb7fbc25SPierre Schweitzer size_t* offsetPtr,
485eb7fbc25SPierre Schweitzer const U32 mls, const ZSTD_dictMode_e dictMode)
486eb7fbc25SPierre Schweitzer {
487eb7fbc25SPierre Schweitzer const ZSTD_compressionParameters* const cParams = &ms->cParams;
488eb7fbc25SPierre Schweitzer U32* const chainTable = ms->chainTable;
489eb7fbc25SPierre Schweitzer const U32 chainSize = (1 << cParams->chainLog);
490eb7fbc25SPierre Schweitzer const U32 chainMask = chainSize-1;
491eb7fbc25SPierre Schweitzer const BYTE* const base = ms->window.base;
492eb7fbc25SPierre Schweitzer const BYTE* const dictBase = ms->window.dictBase;
493eb7fbc25SPierre Schweitzer const U32 dictLimit = ms->window.dictLimit;
494eb7fbc25SPierre Schweitzer const BYTE* const prefixStart = base + dictLimit;
495eb7fbc25SPierre Schweitzer const BYTE* const dictEnd = dictBase + dictLimit;
496eb7fbc25SPierre Schweitzer const U32 current = (U32)(ip-base);
49706042735SVincent Franchomme const U32 maxDistance = 1U << cParams->windowLog;
49806042735SVincent Franchomme const U32 lowestValid = ms->window.lowLimit;
49906042735SVincent Franchomme const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
50006042735SVincent Franchomme const U32 isDictionary = (ms->loadedDictEnd != 0);
50106042735SVincent Franchomme const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
502eb7fbc25SPierre Schweitzer const U32 minChain = current > chainSize ? current - chainSize : 0;
503eb7fbc25SPierre Schweitzer U32 nbAttempts = 1U << cParams->searchLog;
504eb7fbc25SPierre Schweitzer size_t ml=4-1;
505eb7fbc25SPierre Schweitzer
506eb7fbc25SPierre Schweitzer /* HC4 match finder */
507eb7fbc25SPierre Schweitzer U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
508eb7fbc25SPierre Schweitzer
509eb7fbc25SPierre Schweitzer for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
510eb7fbc25SPierre Schweitzer size_t currentMl=0;
511eb7fbc25SPierre Schweitzer if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
512eb7fbc25SPierre Schweitzer const BYTE* const match = base + matchIndex;
51306042735SVincent Franchomme assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
514eb7fbc25SPierre Schweitzer if (match[ml] == ip[ml]) /* potentially better */
515eb7fbc25SPierre Schweitzer currentMl = ZSTD_count(ip, match, iLimit);
516eb7fbc25SPierre Schweitzer } else {
517eb7fbc25SPierre Schweitzer const BYTE* const match = dictBase + matchIndex;
518eb7fbc25SPierre Schweitzer assert(match+4 <= dictEnd);
519eb7fbc25SPierre Schweitzer if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
520eb7fbc25SPierre Schweitzer currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
521eb7fbc25SPierre Schweitzer }
522eb7fbc25SPierre Schweitzer
523eb7fbc25SPierre Schweitzer /* save best solution */
524eb7fbc25SPierre Schweitzer if (currentMl > ml) {
525eb7fbc25SPierre Schweitzer ml = currentMl;
526eb7fbc25SPierre Schweitzer *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
527eb7fbc25SPierre Schweitzer if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
528eb7fbc25SPierre Schweitzer }
529eb7fbc25SPierre Schweitzer
530eb7fbc25SPierre Schweitzer if (matchIndex <= minChain) break;
531eb7fbc25SPierre Schweitzer matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
532eb7fbc25SPierre Schweitzer }
533eb7fbc25SPierre Schweitzer
534eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_dictMatchState) {
535eb7fbc25SPierre Schweitzer const ZSTD_matchState_t* const dms = ms->dictMatchState;
536eb7fbc25SPierre Schweitzer const U32* const dmsChainTable = dms->chainTable;
537eb7fbc25SPierre Schweitzer const U32 dmsChainSize = (1 << dms->cParams.chainLog);
538eb7fbc25SPierre Schweitzer const U32 dmsChainMask = dmsChainSize - 1;
539eb7fbc25SPierre Schweitzer const U32 dmsLowestIndex = dms->window.dictLimit;
540eb7fbc25SPierre Schweitzer const BYTE* const dmsBase = dms->window.base;
541eb7fbc25SPierre Schweitzer const BYTE* const dmsEnd = dms->window.nextSrc;
542eb7fbc25SPierre Schweitzer const U32 dmsSize = (U32)(dmsEnd - dmsBase);
543eb7fbc25SPierre Schweitzer const U32 dmsIndexDelta = dictLimit - dmsSize;
544eb7fbc25SPierre Schweitzer const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
545eb7fbc25SPierre Schweitzer
546eb7fbc25SPierre Schweitzer matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
547eb7fbc25SPierre Schweitzer
548eb7fbc25SPierre Schweitzer for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
549eb7fbc25SPierre Schweitzer size_t currentMl=0;
550eb7fbc25SPierre Schweitzer const BYTE* const match = dmsBase + matchIndex;
551eb7fbc25SPierre Schweitzer assert(match+4 <= dmsEnd);
552eb7fbc25SPierre Schweitzer if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
553eb7fbc25SPierre Schweitzer currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
554eb7fbc25SPierre Schweitzer
555eb7fbc25SPierre Schweitzer /* save best solution */
556eb7fbc25SPierre Schweitzer if (currentMl > ml) {
557eb7fbc25SPierre Schweitzer ml = currentMl;
558eb7fbc25SPierre Schweitzer *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
559eb7fbc25SPierre Schweitzer if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
560eb7fbc25SPierre Schweitzer }
561eb7fbc25SPierre Schweitzer
562eb7fbc25SPierre Schweitzer if (matchIndex <= dmsMinChain) break;
563eb7fbc25SPierre Schweitzer matchIndex = dmsChainTable[matchIndex & dmsChainMask];
564eb7fbc25SPierre Schweitzer }
565eb7fbc25SPierre Schweitzer }
566eb7fbc25SPierre Schweitzer
567eb7fbc25SPierre Schweitzer return ml;
568eb7fbc25SPierre Schweitzer }
569eb7fbc25SPierre Schweitzer
570eb7fbc25SPierre Schweitzer
ZSTD_HcFindBestMatch_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)571eb7fbc25SPierre Schweitzer FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
572eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms,
573eb7fbc25SPierre Schweitzer const BYTE* ip, const BYTE* const iLimit,
574eb7fbc25SPierre Schweitzer size_t* offsetPtr)
575eb7fbc25SPierre Schweitzer {
57606042735SVincent Franchomme switch(ms->cParams.minMatch)
577eb7fbc25SPierre Schweitzer {
578eb7fbc25SPierre Schweitzer default : /* includes case 3 */
579eb7fbc25SPierre Schweitzer case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
580eb7fbc25SPierre Schweitzer case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
581eb7fbc25SPierre Schweitzer case 7 :
582eb7fbc25SPierre Schweitzer case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
583eb7fbc25SPierre Schweitzer }
584eb7fbc25SPierre Schweitzer }
585eb7fbc25SPierre Schweitzer
586eb7fbc25SPierre Schweitzer
ZSTD_HcFindBestMatch_dictMatchState_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)587eb7fbc25SPierre Schweitzer static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
588eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms,
589eb7fbc25SPierre Schweitzer const BYTE* ip, const BYTE* const iLimit,
590eb7fbc25SPierre Schweitzer size_t* offsetPtr)
591eb7fbc25SPierre Schweitzer {
59206042735SVincent Franchomme switch(ms->cParams.minMatch)
593eb7fbc25SPierre Schweitzer {
594eb7fbc25SPierre Schweitzer default : /* includes case 3 */
595eb7fbc25SPierre Schweitzer case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
596eb7fbc25SPierre Schweitzer case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
597eb7fbc25SPierre Schweitzer case 7 :
598eb7fbc25SPierre Schweitzer case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
599eb7fbc25SPierre Schweitzer }
600eb7fbc25SPierre Schweitzer }
601eb7fbc25SPierre Schweitzer
602eb7fbc25SPierre Schweitzer
ZSTD_HcFindBestMatch_extDict_selectMLS(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr)603eb7fbc25SPierre Schweitzer FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
604eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms,
605eb7fbc25SPierre Schweitzer const BYTE* ip, const BYTE* const iLimit,
606eb7fbc25SPierre Schweitzer size_t* offsetPtr)
607eb7fbc25SPierre Schweitzer {
60806042735SVincent Franchomme switch(ms->cParams.minMatch)
609eb7fbc25SPierre Schweitzer {
610eb7fbc25SPierre Schweitzer default : /* includes case 3 */
611eb7fbc25SPierre Schweitzer case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
612eb7fbc25SPierre Schweitzer case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
613eb7fbc25SPierre Schweitzer case 7 :
614eb7fbc25SPierre Schweitzer case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
615eb7fbc25SPierre Schweitzer }
616eb7fbc25SPierre Schweitzer }
617eb7fbc25SPierre Schweitzer
618eb7fbc25SPierre Schweitzer
619eb7fbc25SPierre Schweitzer /* *******************************
620eb7fbc25SPierre Schweitzer * Common parser - lazy strategy
621eb7fbc25SPierre Schweitzer *********************************/
62206042735SVincent Franchomme typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
62306042735SVincent Franchomme
624*f5556fdcSVincent Franchomme size_t
ZSTD_compressBlock_lazy_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const searchMethod_e searchMethod,const U32 depth,ZSTD_dictMode_e const dictMode)62506042735SVincent Franchomme ZSTD_compressBlock_lazy_generic(
626eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore,
627eb7fbc25SPierre Schweitzer U32 rep[ZSTD_REP_NUM],
628eb7fbc25SPierre Schweitzer const void* src, size_t srcSize,
62906042735SVincent Franchomme const searchMethod_e searchMethod, const U32 depth,
630eb7fbc25SPierre Schweitzer ZSTD_dictMode_e const dictMode)
631eb7fbc25SPierre Schweitzer {
632eb7fbc25SPierre Schweitzer const BYTE* const istart = (const BYTE*)src;
633eb7fbc25SPierre Schweitzer const BYTE* ip = istart;
634eb7fbc25SPierre Schweitzer const BYTE* anchor = istart;
635eb7fbc25SPierre Schweitzer const BYTE* const iend = istart + srcSize;
636eb7fbc25SPierre Schweitzer const BYTE* const ilimit = iend - 8;
637eb7fbc25SPierre Schweitzer const BYTE* const base = ms->window.base;
638eb7fbc25SPierre Schweitzer const U32 prefixLowestIndex = ms->window.dictLimit;
639eb7fbc25SPierre Schweitzer const BYTE* const prefixLowest = base + prefixLowestIndex;
640eb7fbc25SPierre Schweitzer
641eb7fbc25SPierre Schweitzer typedef size_t (*searchMax_f)(
642eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms,
643eb7fbc25SPierre Schweitzer const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
644eb7fbc25SPierre Schweitzer searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
64506042735SVincent Franchomme (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
64606042735SVincent Franchomme : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
64706042735SVincent Franchomme (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
64806042735SVincent Franchomme : ZSTD_HcFindBestMatch_selectMLS);
649eb7fbc25SPierre Schweitzer U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
650eb7fbc25SPierre Schweitzer
651eb7fbc25SPierre Schweitzer const ZSTD_matchState_t* const dms = ms->dictMatchState;
652eb7fbc25SPierre Schweitzer const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
653eb7fbc25SPierre Schweitzer dms->window.dictLimit : 0;
654eb7fbc25SPierre Schweitzer const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
655eb7fbc25SPierre Schweitzer dms->window.base : NULL;
656eb7fbc25SPierre Schweitzer const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
657eb7fbc25SPierre Schweitzer dictBase + dictLowestIndex : NULL;
658eb7fbc25SPierre Schweitzer const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
659eb7fbc25SPierre Schweitzer dms->window.nextSrc : NULL;
660eb7fbc25SPierre Schweitzer const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
661eb7fbc25SPierre Schweitzer prefixLowestIndex - (U32)(dictEnd - dictBase) :
662eb7fbc25SPierre Schweitzer 0;
66306042735SVincent Franchomme const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
66406042735SVincent Franchomme
66506042735SVincent Franchomme DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
666eb7fbc25SPierre Schweitzer
667eb7fbc25SPierre Schweitzer /* init */
668eb7fbc25SPierre Schweitzer ip += (dictAndPrefixLength == 0);
669eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_noDict) {
67006042735SVincent Franchomme U32 const current = (U32)(ip - base);
67106042735SVincent Franchomme U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
67206042735SVincent Franchomme U32 const maxRep = current - windowLow;
673eb7fbc25SPierre Schweitzer if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
674eb7fbc25SPierre Schweitzer if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
675eb7fbc25SPierre Schweitzer }
676eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_dictMatchState) {
677eb7fbc25SPierre Schweitzer /* dictMatchState repCode checks don't currently handle repCode == 0
678eb7fbc25SPierre Schweitzer * disabling. */
679eb7fbc25SPierre Schweitzer assert(offset_1 <= dictAndPrefixLength);
680eb7fbc25SPierre Schweitzer assert(offset_2 <= dictAndPrefixLength);
681eb7fbc25SPierre Schweitzer }
682eb7fbc25SPierre Schweitzer
683eb7fbc25SPierre Schweitzer /* Match Loop */
68406042735SVincent Franchomme #if defined(__GNUC__) && defined(__x86_64__)
68506042735SVincent Franchomme /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
68606042735SVincent Franchomme * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
68706042735SVincent Franchomme */
68806042735SVincent Franchomme __asm__(".p2align 5");
68906042735SVincent Franchomme #endif
690eb7fbc25SPierre Schweitzer while (ip < ilimit) {
691eb7fbc25SPierre Schweitzer size_t matchLength=0;
692eb7fbc25SPierre Schweitzer size_t offset=0;
693eb7fbc25SPierre Schweitzer const BYTE* start=ip+1;
694eb7fbc25SPierre Schweitzer
695eb7fbc25SPierre Schweitzer /* check repCode */
696eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_dictMatchState) {
697eb7fbc25SPierre Schweitzer const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
698eb7fbc25SPierre Schweitzer const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
699eb7fbc25SPierre Schweitzer && repIndex < prefixLowestIndex) ?
700eb7fbc25SPierre Schweitzer dictBase + (repIndex - dictIndexDelta) :
701eb7fbc25SPierre Schweitzer base + repIndex;
702eb7fbc25SPierre Schweitzer if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
703eb7fbc25SPierre Schweitzer && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
704eb7fbc25SPierre Schweitzer const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
705eb7fbc25SPierre Schweitzer matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
706eb7fbc25SPierre Schweitzer if (depth==0) goto _storeSequence;
707eb7fbc25SPierre Schweitzer }
708eb7fbc25SPierre Schweitzer }
709eb7fbc25SPierre Schweitzer if ( dictMode == ZSTD_noDict
710eb7fbc25SPierre Schweitzer && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
711eb7fbc25SPierre Schweitzer matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
712eb7fbc25SPierre Schweitzer if (depth==0) goto _storeSequence;
713eb7fbc25SPierre Schweitzer }
714eb7fbc25SPierre Schweitzer
715eb7fbc25SPierre Schweitzer /* first search (depth 0) */
716eb7fbc25SPierre Schweitzer { size_t offsetFound = 999999999;
717eb7fbc25SPierre Schweitzer size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
718eb7fbc25SPierre Schweitzer if (ml2 > matchLength)
719eb7fbc25SPierre Schweitzer matchLength = ml2, start = ip, offset=offsetFound;
720eb7fbc25SPierre Schweitzer }
721eb7fbc25SPierre Schweitzer
722eb7fbc25SPierre Schweitzer if (matchLength < 4) {
723eb7fbc25SPierre Schweitzer ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
724eb7fbc25SPierre Schweitzer continue;
725eb7fbc25SPierre Schweitzer }
726eb7fbc25SPierre Schweitzer
727eb7fbc25SPierre Schweitzer /* let's try to find a better solution */
728eb7fbc25SPierre Schweitzer if (depth>=1)
729eb7fbc25SPierre Schweitzer while (ip<ilimit) {
730eb7fbc25SPierre Schweitzer ip ++;
731eb7fbc25SPierre Schweitzer if ( (dictMode == ZSTD_noDict)
732eb7fbc25SPierre Schweitzer && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
733eb7fbc25SPierre Schweitzer size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
734eb7fbc25SPierre Schweitzer int const gain2 = (int)(mlRep * 3);
735eb7fbc25SPierre Schweitzer int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
736eb7fbc25SPierre Schweitzer if ((mlRep >= 4) && (gain2 > gain1))
737eb7fbc25SPierre Schweitzer matchLength = mlRep, offset = 0, start = ip;
738eb7fbc25SPierre Schweitzer }
739eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_dictMatchState) {
740eb7fbc25SPierre Schweitzer const U32 repIndex = (U32)(ip - base) - offset_1;
741eb7fbc25SPierre Schweitzer const BYTE* repMatch = repIndex < prefixLowestIndex ?
742eb7fbc25SPierre Schweitzer dictBase + (repIndex - dictIndexDelta) :
743eb7fbc25SPierre Schweitzer base + repIndex;
744eb7fbc25SPierre Schweitzer if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
745eb7fbc25SPierre Schweitzer && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
746eb7fbc25SPierre Schweitzer const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
747eb7fbc25SPierre Schweitzer size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
748eb7fbc25SPierre Schweitzer int const gain2 = (int)(mlRep * 3);
749eb7fbc25SPierre Schweitzer int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
750eb7fbc25SPierre Schweitzer if ((mlRep >= 4) && (gain2 > gain1))
751eb7fbc25SPierre Schweitzer matchLength = mlRep, offset = 0, start = ip;
752eb7fbc25SPierre Schweitzer }
753eb7fbc25SPierre Schweitzer }
754eb7fbc25SPierre Schweitzer { size_t offset2=999999999;
755eb7fbc25SPierre Schweitzer size_t const ml2 = searchMax(ms, ip, iend, &offset2);
756eb7fbc25SPierre Schweitzer int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
757eb7fbc25SPierre Schweitzer int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
758eb7fbc25SPierre Schweitzer if ((ml2 >= 4) && (gain2 > gain1)) {
759eb7fbc25SPierre Schweitzer matchLength = ml2, offset = offset2, start = ip;
760eb7fbc25SPierre Schweitzer continue; /* search a better one */
761eb7fbc25SPierre Schweitzer } }
762eb7fbc25SPierre Schweitzer
763eb7fbc25SPierre Schweitzer /* let's find an even better one */
764eb7fbc25SPierre Schweitzer if ((depth==2) && (ip<ilimit)) {
765eb7fbc25SPierre Schweitzer ip ++;
766eb7fbc25SPierre Schweitzer if ( (dictMode == ZSTD_noDict)
767eb7fbc25SPierre Schweitzer && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
768eb7fbc25SPierre Schweitzer size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
769eb7fbc25SPierre Schweitzer int const gain2 = (int)(mlRep * 4);
770eb7fbc25SPierre Schweitzer int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
771eb7fbc25SPierre Schweitzer if ((mlRep >= 4) && (gain2 > gain1))
772eb7fbc25SPierre Schweitzer matchLength = mlRep, offset = 0, start = ip;
773eb7fbc25SPierre Schweitzer }
774eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_dictMatchState) {
775eb7fbc25SPierre Schweitzer const U32 repIndex = (U32)(ip - base) - offset_1;
776eb7fbc25SPierre Schweitzer const BYTE* repMatch = repIndex < prefixLowestIndex ?
777eb7fbc25SPierre Schweitzer dictBase + (repIndex - dictIndexDelta) :
778eb7fbc25SPierre Schweitzer base + repIndex;
779eb7fbc25SPierre Schweitzer if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
780eb7fbc25SPierre Schweitzer && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
781eb7fbc25SPierre Schweitzer const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
782eb7fbc25SPierre Schweitzer size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
783eb7fbc25SPierre Schweitzer int const gain2 = (int)(mlRep * 4);
784eb7fbc25SPierre Schweitzer int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
785eb7fbc25SPierre Schweitzer if ((mlRep >= 4) && (gain2 > gain1))
786eb7fbc25SPierre Schweitzer matchLength = mlRep, offset = 0, start = ip;
787eb7fbc25SPierre Schweitzer }
788eb7fbc25SPierre Schweitzer }
789eb7fbc25SPierre Schweitzer { size_t offset2=999999999;
790eb7fbc25SPierre Schweitzer size_t const ml2 = searchMax(ms, ip, iend, &offset2);
791eb7fbc25SPierre Schweitzer int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
792eb7fbc25SPierre Schweitzer int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
793eb7fbc25SPierre Schweitzer if ((ml2 >= 4) && (gain2 > gain1)) {
794eb7fbc25SPierre Schweitzer matchLength = ml2, offset = offset2, start = ip;
795eb7fbc25SPierre Schweitzer continue;
796eb7fbc25SPierre Schweitzer } } }
797eb7fbc25SPierre Schweitzer break; /* nothing found : store previous solution */
798eb7fbc25SPierre Schweitzer }
799eb7fbc25SPierre Schweitzer
800eb7fbc25SPierre Schweitzer /* NOTE:
801eb7fbc25SPierre Schweitzer * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
802eb7fbc25SPierre Schweitzer * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
803eb7fbc25SPierre Schweitzer * overflows the pointer, which is undefined behavior.
804eb7fbc25SPierre Schweitzer */
805eb7fbc25SPierre Schweitzer /* catch up */
806eb7fbc25SPierre Schweitzer if (offset) {
807eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_noDict) {
808eb7fbc25SPierre Schweitzer while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
809eb7fbc25SPierre Schweitzer && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
810eb7fbc25SPierre Schweitzer { start--; matchLength++; }
811eb7fbc25SPierre Schweitzer }
812eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_dictMatchState) {
813eb7fbc25SPierre Schweitzer U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
814eb7fbc25SPierre Schweitzer const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
815eb7fbc25SPierre Schweitzer const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
816eb7fbc25SPierre Schweitzer while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
817eb7fbc25SPierre Schweitzer }
818eb7fbc25SPierre Schweitzer offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
819eb7fbc25SPierre Schweitzer }
820eb7fbc25SPierre Schweitzer /* store sequence */
821eb7fbc25SPierre Schweitzer _storeSequence:
822eb7fbc25SPierre Schweitzer { size_t const litLength = start - anchor;
82306042735SVincent Franchomme ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
824eb7fbc25SPierre Schweitzer anchor = ip = start + matchLength;
825eb7fbc25SPierre Schweitzer }
826eb7fbc25SPierre Schweitzer
827eb7fbc25SPierre Schweitzer /* check immediate repcode */
828eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_dictMatchState) {
829eb7fbc25SPierre Schweitzer while (ip <= ilimit) {
830eb7fbc25SPierre Schweitzer U32 const current2 = (U32)(ip-base);
831eb7fbc25SPierre Schweitzer U32 const repIndex = current2 - offset_2;
832eb7fbc25SPierre Schweitzer const BYTE* repMatch = dictMode == ZSTD_dictMatchState
833eb7fbc25SPierre Schweitzer && repIndex < prefixLowestIndex ?
834eb7fbc25SPierre Schweitzer dictBase - dictIndexDelta + repIndex :
835eb7fbc25SPierre Schweitzer base + repIndex;
836eb7fbc25SPierre Schweitzer if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
837eb7fbc25SPierre Schweitzer && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
838eb7fbc25SPierre Schweitzer const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
839eb7fbc25SPierre Schweitzer matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
840eb7fbc25SPierre Schweitzer offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
84106042735SVincent Franchomme ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
842eb7fbc25SPierre Schweitzer ip += matchLength;
843eb7fbc25SPierre Schweitzer anchor = ip;
844eb7fbc25SPierre Schweitzer continue;
845eb7fbc25SPierre Schweitzer }
846eb7fbc25SPierre Schweitzer break;
847eb7fbc25SPierre Schweitzer }
848eb7fbc25SPierre Schweitzer }
849eb7fbc25SPierre Schweitzer
850eb7fbc25SPierre Schweitzer if (dictMode == ZSTD_noDict) {
851eb7fbc25SPierre Schweitzer while ( ((ip <= ilimit) & (offset_2>0))
852eb7fbc25SPierre Schweitzer && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
853eb7fbc25SPierre Schweitzer /* store sequence */
854eb7fbc25SPierre Schweitzer matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
855eb7fbc25SPierre Schweitzer offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
85606042735SVincent Franchomme ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
857eb7fbc25SPierre Schweitzer ip += matchLength;
858eb7fbc25SPierre Schweitzer anchor = ip;
859eb7fbc25SPierre Schweitzer continue; /* faster when present ... (?) */
860eb7fbc25SPierre Schweitzer } } }
861eb7fbc25SPierre Schweitzer
862eb7fbc25SPierre Schweitzer /* Save reps for next block */
863eb7fbc25SPierre Schweitzer rep[0] = offset_1 ? offset_1 : savedOffset;
864eb7fbc25SPierre Schweitzer rep[1] = offset_2 ? offset_2 : savedOffset;
865eb7fbc25SPierre Schweitzer
866eb7fbc25SPierre Schweitzer /* Return the last literals size */
86706042735SVincent Franchomme return (size_t)(iend - anchor);
868eb7fbc25SPierre Schweitzer }
869eb7fbc25SPierre Schweitzer
870eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_btlazy2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)871eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_btlazy2(
872eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
873eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
874eb7fbc25SPierre Schweitzer {
87506042735SVincent Franchomme return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
876eb7fbc25SPierre Schweitzer }
877eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_lazy2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)878eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_lazy2(
879eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
880eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
881eb7fbc25SPierre Schweitzer {
88206042735SVincent Franchomme return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
883eb7fbc25SPierre Schweitzer }
884eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_lazy(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)885eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_lazy(
886eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
887eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
888eb7fbc25SPierre Schweitzer {
88906042735SVincent Franchomme return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
890eb7fbc25SPierre Schweitzer }
891eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_greedy(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)892eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_greedy(
893eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
894eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
895eb7fbc25SPierre Schweitzer {
89606042735SVincent Franchomme return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
897eb7fbc25SPierre Schweitzer }
898eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_btlazy2_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)899eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_btlazy2_dictMatchState(
900eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
901eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
902eb7fbc25SPierre Schweitzer {
90306042735SVincent Franchomme return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
904eb7fbc25SPierre Schweitzer }
905eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_lazy2_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)906eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_lazy2_dictMatchState(
907eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
908eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
909eb7fbc25SPierre Schweitzer {
91006042735SVincent Franchomme return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
911eb7fbc25SPierre Schweitzer }
912eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_lazy_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)913eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_lazy_dictMatchState(
914eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
915eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
916eb7fbc25SPierre Schweitzer {
91706042735SVincent Franchomme return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
918eb7fbc25SPierre Schweitzer }
919eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_greedy_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)920eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_greedy_dictMatchState(
921eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
922eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
923eb7fbc25SPierre Schweitzer {
92406042735SVincent Franchomme return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
925eb7fbc25SPierre Schweitzer }
926eb7fbc25SPierre Schweitzer
927eb7fbc25SPierre Schweitzer
928eb7fbc25SPierre Schweitzer FORCE_INLINE_TEMPLATE
ZSTD_compressBlock_lazy_extDict_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const searchMethod_e searchMethod,const U32 depth)929eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_lazy_extDict_generic(
930eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore,
931eb7fbc25SPierre Schweitzer U32 rep[ZSTD_REP_NUM],
932eb7fbc25SPierre Schweitzer const void* src, size_t srcSize,
93306042735SVincent Franchomme const searchMethod_e searchMethod, const U32 depth)
934eb7fbc25SPierre Schweitzer {
935eb7fbc25SPierre Schweitzer const BYTE* const istart = (const BYTE*)src;
936eb7fbc25SPierre Schweitzer const BYTE* ip = istart;
937eb7fbc25SPierre Schweitzer const BYTE* anchor = istart;
938eb7fbc25SPierre Schweitzer const BYTE* const iend = istart + srcSize;
939eb7fbc25SPierre Schweitzer const BYTE* const ilimit = iend - 8;
940eb7fbc25SPierre Schweitzer const BYTE* const base = ms->window.base;
941eb7fbc25SPierre Schweitzer const U32 dictLimit = ms->window.dictLimit;
942eb7fbc25SPierre Schweitzer const BYTE* const prefixStart = base + dictLimit;
943eb7fbc25SPierre Schweitzer const BYTE* const dictBase = ms->window.dictBase;
944eb7fbc25SPierre Schweitzer const BYTE* const dictEnd = dictBase + dictLimit;
94506042735SVincent Franchomme const BYTE* const dictStart = dictBase + ms->window.lowLimit;
94606042735SVincent Franchomme const U32 windowLog = ms->cParams.windowLog;
947eb7fbc25SPierre Schweitzer
948eb7fbc25SPierre Schweitzer typedef size_t (*searchMax_f)(
949eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms,
950eb7fbc25SPierre Schweitzer const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
95106042735SVincent Franchomme searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
952eb7fbc25SPierre Schweitzer
953eb7fbc25SPierre Schweitzer U32 offset_1 = rep[0], offset_2 = rep[1];
954eb7fbc25SPierre Schweitzer
95506042735SVincent Franchomme DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
95606042735SVincent Franchomme
957eb7fbc25SPierre Schweitzer /* init */
958eb7fbc25SPierre Schweitzer ip += (ip == prefixStart);
959eb7fbc25SPierre Schweitzer
960eb7fbc25SPierre Schweitzer /* Match Loop */
96106042735SVincent Franchomme #if defined(__GNUC__) && defined(__x86_64__)
96206042735SVincent Franchomme /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
96306042735SVincent Franchomme * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
96406042735SVincent Franchomme */
96506042735SVincent Franchomme __asm__(".p2align 5");
96606042735SVincent Franchomme #endif
967eb7fbc25SPierre Schweitzer while (ip < ilimit) {
968eb7fbc25SPierre Schweitzer size_t matchLength=0;
969eb7fbc25SPierre Schweitzer size_t offset=0;
970eb7fbc25SPierre Schweitzer const BYTE* start=ip+1;
971eb7fbc25SPierre Schweitzer U32 current = (U32)(ip-base);
972eb7fbc25SPierre Schweitzer
973eb7fbc25SPierre Schweitzer /* check repCode */
97406042735SVincent Franchomme { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
97506042735SVincent Franchomme const U32 repIndex = (U32)(current+1 - offset_1);
976eb7fbc25SPierre Schweitzer const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
977eb7fbc25SPierre Schweitzer const BYTE* const repMatch = repBase + repIndex;
97806042735SVincent Franchomme if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
979eb7fbc25SPierre Schweitzer if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
980eb7fbc25SPierre Schweitzer /* repcode detected we should take it */
981eb7fbc25SPierre Schweitzer const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
982eb7fbc25SPierre Schweitzer matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
983eb7fbc25SPierre Schweitzer if (depth==0) goto _storeSequence;
984eb7fbc25SPierre Schweitzer } }
985eb7fbc25SPierre Schweitzer
986eb7fbc25SPierre Schweitzer /* first search (depth 0) */
987eb7fbc25SPierre Schweitzer { size_t offsetFound = 999999999;
988eb7fbc25SPierre Schweitzer size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
989eb7fbc25SPierre Schweitzer if (ml2 > matchLength)
990eb7fbc25SPierre Schweitzer matchLength = ml2, start = ip, offset=offsetFound;
991eb7fbc25SPierre Schweitzer }
992eb7fbc25SPierre Schweitzer
993eb7fbc25SPierre Schweitzer if (matchLength < 4) {
994eb7fbc25SPierre Schweitzer ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
995eb7fbc25SPierre Schweitzer continue;
996eb7fbc25SPierre Schweitzer }
997eb7fbc25SPierre Schweitzer
998eb7fbc25SPierre Schweitzer /* let's try to find a better solution */
999eb7fbc25SPierre Schweitzer if (depth>=1)
1000eb7fbc25SPierre Schweitzer while (ip<ilimit) {
1001eb7fbc25SPierre Schweitzer ip ++;
1002eb7fbc25SPierre Schweitzer current++;
1003eb7fbc25SPierre Schweitzer /* check repCode */
1004eb7fbc25SPierre Schweitzer if (offset) {
100506042735SVincent Franchomme const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1006eb7fbc25SPierre Schweitzer const U32 repIndex = (U32)(current - offset_1);
1007eb7fbc25SPierre Schweitzer const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1008eb7fbc25SPierre Schweitzer const BYTE* const repMatch = repBase + repIndex;
100906042735SVincent Franchomme if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1010eb7fbc25SPierre Schweitzer if (MEM_read32(ip) == MEM_read32(repMatch)) {
1011eb7fbc25SPierre Schweitzer /* repcode detected */
1012eb7fbc25SPierre Schweitzer const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1013eb7fbc25SPierre Schweitzer size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1014eb7fbc25SPierre Schweitzer int const gain2 = (int)(repLength * 3);
1015eb7fbc25SPierre Schweitzer int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1016eb7fbc25SPierre Schweitzer if ((repLength >= 4) && (gain2 > gain1))
1017eb7fbc25SPierre Schweitzer matchLength = repLength, offset = 0, start = ip;
1018eb7fbc25SPierre Schweitzer } }
1019eb7fbc25SPierre Schweitzer
1020eb7fbc25SPierre Schweitzer /* search match, depth 1 */
1021eb7fbc25SPierre Schweitzer { size_t offset2=999999999;
1022eb7fbc25SPierre Schweitzer size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1023eb7fbc25SPierre Schweitzer int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1024eb7fbc25SPierre Schweitzer int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1025eb7fbc25SPierre Schweitzer if ((ml2 >= 4) && (gain2 > gain1)) {
1026eb7fbc25SPierre Schweitzer matchLength = ml2, offset = offset2, start = ip;
1027eb7fbc25SPierre Schweitzer continue; /* search a better one */
1028eb7fbc25SPierre Schweitzer } }
1029eb7fbc25SPierre Schweitzer
1030eb7fbc25SPierre Schweitzer /* let's find an even better one */
1031eb7fbc25SPierre Schweitzer if ((depth==2) && (ip<ilimit)) {
1032eb7fbc25SPierre Schweitzer ip ++;
1033eb7fbc25SPierre Schweitzer current++;
1034eb7fbc25SPierre Schweitzer /* check repCode */
1035eb7fbc25SPierre Schweitzer if (offset) {
103606042735SVincent Franchomme const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1037eb7fbc25SPierre Schweitzer const U32 repIndex = (U32)(current - offset_1);
1038eb7fbc25SPierre Schweitzer const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1039eb7fbc25SPierre Schweitzer const BYTE* const repMatch = repBase + repIndex;
104006042735SVincent Franchomme if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1041eb7fbc25SPierre Schweitzer if (MEM_read32(ip) == MEM_read32(repMatch)) {
1042eb7fbc25SPierre Schweitzer /* repcode detected */
1043eb7fbc25SPierre Schweitzer const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1044eb7fbc25SPierre Schweitzer size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1045eb7fbc25SPierre Schweitzer int const gain2 = (int)(repLength * 4);
1046eb7fbc25SPierre Schweitzer int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1047eb7fbc25SPierre Schweitzer if ((repLength >= 4) && (gain2 > gain1))
1048eb7fbc25SPierre Schweitzer matchLength = repLength, offset = 0, start = ip;
1049eb7fbc25SPierre Schweitzer } }
1050eb7fbc25SPierre Schweitzer
1051eb7fbc25SPierre Schweitzer /* search match, depth 2 */
1052eb7fbc25SPierre Schweitzer { size_t offset2=999999999;
1053eb7fbc25SPierre Schweitzer size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1054eb7fbc25SPierre Schweitzer int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1055eb7fbc25SPierre Schweitzer int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1056eb7fbc25SPierre Schweitzer if ((ml2 >= 4) && (gain2 > gain1)) {
1057eb7fbc25SPierre Schweitzer matchLength = ml2, offset = offset2, start = ip;
1058eb7fbc25SPierre Schweitzer continue;
1059eb7fbc25SPierre Schweitzer } } }
1060eb7fbc25SPierre Schweitzer break; /* nothing found : store previous solution */
1061eb7fbc25SPierre Schweitzer }
1062eb7fbc25SPierre Schweitzer
1063eb7fbc25SPierre Schweitzer /* catch up */
1064eb7fbc25SPierre Schweitzer if (offset) {
1065eb7fbc25SPierre Schweitzer U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
1066eb7fbc25SPierre Schweitzer const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
1067eb7fbc25SPierre Schweitzer const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
1068eb7fbc25SPierre Schweitzer while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
1069eb7fbc25SPierre Schweitzer offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1070eb7fbc25SPierre Schweitzer }
1071eb7fbc25SPierre Schweitzer
1072eb7fbc25SPierre Schweitzer /* store sequence */
1073eb7fbc25SPierre Schweitzer _storeSequence:
1074eb7fbc25SPierre Schweitzer { size_t const litLength = start - anchor;
107506042735SVincent Franchomme ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1076eb7fbc25SPierre Schweitzer anchor = ip = start + matchLength;
1077eb7fbc25SPierre Schweitzer }
1078eb7fbc25SPierre Schweitzer
1079eb7fbc25SPierre Schweitzer /* check immediate repcode */
1080eb7fbc25SPierre Schweitzer while (ip <= ilimit) {
108106042735SVincent Franchomme const U32 repCurrent = (U32)(ip-base);
108206042735SVincent Franchomme const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
108306042735SVincent Franchomme const U32 repIndex = repCurrent - offset_2;
1084eb7fbc25SPierre Schweitzer const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1085eb7fbc25SPierre Schweitzer const BYTE* const repMatch = repBase + repIndex;
108606042735SVincent Franchomme if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1087eb7fbc25SPierre Schweitzer if (MEM_read32(ip) == MEM_read32(repMatch)) {
1088eb7fbc25SPierre Schweitzer /* repcode detected we should take it */
1089eb7fbc25SPierre Schweitzer const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1090eb7fbc25SPierre Schweitzer matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1091eb7fbc25SPierre Schweitzer offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
109206042735SVincent Franchomme ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1093eb7fbc25SPierre Schweitzer ip += matchLength;
1094eb7fbc25SPierre Schweitzer anchor = ip;
1095eb7fbc25SPierre Schweitzer continue; /* faster when present ... (?) */
1096eb7fbc25SPierre Schweitzer }
1097eb7fbc25SPierre Schweitzer break;
1098eb7fbc25SPierre Schweitzer } }
1099eb7fbc25SPierre Schweitzer
1100eb7fbc25SPierre Schweitzer /* Save reps for next block */
1101eb7fbc25SPierre Schweitzer rep[0] = offset_1;
1102eb7fbc25SPierre Schweitzer rep[1] = offset_2;
1103eb7fbc25SPierre Schweitzer
1104eb7fbc25SPierre Schweitzer /* Return the last literals size */
110506042735SVincent Franchomme return (size_t)(iend - anchor);
1106eb7fbc25SPierre Schweitzer }
1107eb7fbc25SPierre Schweitzer
1108eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_greedy_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1109eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_greedy_extDict(
1110eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1111eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
1112eb7fbc25SPierre Schweitzer {
111306042735SVincent Franchomme return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
1114eb7fbc25SPierre Schweitzer }
1115eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_lazy_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1116eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_lazy_extDict(
1117eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1118eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
1119eb7fbc25SPierre Schweitzer
1120eb7fbc25SPierre Schweitzer {
112106042735SVincent Franchomme return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
1122eb7fbc25SPierre Schweitzer }
1123eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_lazy2_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1124eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_lazy2_extDict(
1125eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1126eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
1127eb7fbc25SPierre Schweitzer
1128eb7fbc25SPierre Schweitzer {
112906042735SVincent Franchomme return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
1130eb7fbc25SPierre Schweitzer }
1131eb7fbc25SPierre Schweitzer
ZSTD_compressBlock_btlazy2_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],void const * src,size_t srcSize)1132eb7fbc25SPierre Schweitzer size_t ZSTD_compressBlock_btlazy2_extDict(
1133eb7fbc25SPierre Schweitzer ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1134eb7fbc25SPierre Schweitzer void const* src, size_t srcSize)
1135eb7fbc25SPierre Schweitzer
1136eb7fbc25SPierre Schweitzer {
113706042735SVincent Franchomme return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
1138eb7fbc25SPierre Schweitzer }
1139