1 /*
2  * matchfinder_common.h - common code for Lempel-Ziv matchfinding
3  */
4 
5 #ifndef LIB_MATCHFINDER_COMMON_H
6 #define LIB_MATCHFINDER_COMMON_H
7 
8 #include "lib_common.h"
9 #include "unaligned.h"
10 
11 #ifndef MATCHFINDER_WINDOW_ORDER
12 #  error "MATCHFINDER_WINDOW_ORDER must be defined!"
13 #endif
14 
15 #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
16 
17 typedef s16 mf_pos_t;
18 
19 #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
20 
21 #define MATCHFINDER_ALIGNMENT 8
22 
23 #ifdef __AVX2__
24 #  include "matchfinder_avx2.h"
25 #  if MATCHFINDER_ALIGNMENT < 32
26 #    undef MATCHFINDER_ALIGNMENT
27 #    define MATCHFINDER_ALIGNMENT 32
28 #  endif
29 #endif
30 
31 #ifdef __SSE2__
32 #  include "matchfinder_sse2.h"
33 #  if MATCHFINDER_ALIGNMENT < 16
34 #    undef MATCHFINDER_ALIGNMENT
35 #    define MATCHFINDER_ALIGNMENT 16
36 #  endif
37 #endif
38 
39 #ifdef __ARM_NEON
40 #  include "matchfinder_neon.h"
41 #  if MATCHFINDER_ALIGNMENT < 16
42 #    undef MATCHFINDER_ALIGNMENT
43 #    define MATCHFINDER_ALIGNMENT 16
44 #  endif
45 #endif
46 
47 /*
48  * Initialize the hash table portion of the matchfinder.
49  *
50  * Essentially, this is an optimized memset().
51  *
52  * 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary.
53  */
54 static forceinline void
matchfinder_init(mf_pos_t * data,size_t num_entries)55 matchfinder_init(mf_pos_t *data, size_t num_entries)
56 {
57 	size_t i;
58 
59 #if defined(__AVX2__) && defined(_aligned_attribute)
60 	if (matchfinder_init_avx2(data, num_entries * sizeof(data[0])))
61 		return;
62 #endif
63 
64 #if defined(__SSE2__) && defined(_aligned_attribute)
65 	if (matchfinder_init_sse2(data, num_entries * sizeof(data[0])))
66 		return;
67 #endif
68 
69 #if defined(__ARM_NEON) && defined(_aligned_attribute)
70 	if (matchfinder_init_neon(data, num_entries * sizeof(data[0])))
71 		return;
72 #endif
73 
74 	for (i = 0; i < num_entries; i++)
75 		data[i] = MATCHFINDER_INITVAL;
76 }
77 
78 /*
79  * Slide the matchfinder by WINDOW_SIZE bytes.
80  *
81  * This must be called just after each WINDOW_SIZE bytes have been run through
82  * the matchfinder.
83  *
84  * This will subtract WINDOW_SIZE bytes from each entry in the array specified.
85  * The effect is that all entries are updated to be relative to the current
86  * position, rather than the position WINDOW_SIZE bytes prior.
87  *
88  * Underflow is detected and replaced with signed saturation.  This ensures that
89  * once the sliding window has passed over a position, that position forever
90  * remains out of bounds.
91  *
92  * The array passed in must contain all matchfinder data that is
93  * position-relative.  Concretely, this will include the hash table as well as
94  * the table of positions that is used to link together the sequences in each
95  * hash bucket.  Note that in the latter table, the links are 1-ary in the case
96  * of "hash chains", and 2-ary in the case of "binary trees".  In either case,
97  * the links need to be rebased in the same way.
98  */
99 static forceinline void
matchfinder_rebase(mf_pos_t * data,size_t num_entries)100 matchfinder_rebase(mf_pos_t *data, size_t num_entries)
101 {
102 	size_t i;
103 
104 #if defined(__AVX2__) && defined(_aligned_attribute)
105 	if (matchfinder_rebase_avx2(data, num_entries * sizeof(data[0])))
106 		return;
107 #endif
108 
109 #if defined(__SSE2__) && defined(_aligned_attribute)
110 	if (matchfinder_rebase_sse2(data, num_entries * sizeof(data[0])))
111 		return;
112 #endif
113 
114 #if defined(__ARM_NEON) && defined(_aligned_attribute)
115 	if (matchfinder_rebase_neon(data, num_entries * sizeof(data[0])))
116 		return;
117 #endif
118 
119 	if (MATCHFINDER_WINDOW_SIZE == 32768) {
120 		/* Branchless version for 32768 byte windows.  If the value was
121 		 * already negative, clear all bits except the sign bit; this
122 		 * changes the value to -32768.  Otherwise, set the sign bit;
123 		 * this is equivalent to subtracting 32768.  */
124 		for (i = 0; i < num_entries; i++) {
125 			u16 v = data[i];
126 			u16 sign_bit = v & 0x8000;
127 			v &= sign_bit - ((sign_bit >> 15) ^ 1);
128 			v |= 0x8000;
129 			data[i] = v;
130 		}
131 		return;
132 	}
133 
134 	for (i = 0; i < num_entries; i++) {
135 		if (data[i] >= 0)
136 			data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
137 		else
138 			data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
139 	}
140 }
141 
142 /*
143  * The hash function: given a sequence prefix held in the low-order bits of a
144  * 32-bit value, multiply by a carefully-chosen large constant.  Discard any
145  * bits of the product that don't fit in a 32-bit value, but take the
146  * next-highest @num_bits bits of the product as the hash value, as those have
147  * the most randomness.
148  */
149 static forceinline u32
lz_hash(u32 seq,unsigned num_bits)150 lz_hash(u32 seq, unsigned num_bits)
151 {
152 	return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
153 }
154 
155 /*
156  * Return the number of bytes at @matchptr that match the bytes at @strptr, up
157  * to a maximum of @max_len.  Initially, @start_len bytes are matched.
158  */
159 static forceinline unsigned
lz_extend(const u8 * const strptr,const u8 * const matchptr,const unsigned start_len,const unsigned max_len)160 lz_extend(const u8 * const strptr, const u8 * const matchptr,
161 	  const unsigned start_len, const unsigned max_len)
162 {
163 	unsigned len = start_len;
164 	machine_word_t v_word;
165 
166 	if (UNALIGNED_ACCESS_IS_FAST) {
167 
168 		if (likely(max_len - len >= 4 * WORDBYTES)) {
169 
170 		#define COMPARE_WORD_STEP				\
171 			v_word = load_word_unaligned(&matchptr[len]) ^	\
172 				 load_word_unaligned(&strptr[len]);	\
173 			if (v_word != 0)				\
174 				goto word_differs;			\
175 			len += WORDBYTES;				\
176 
177 			COMPARE_WORD_STEP
178 			COMPARE_WORD_STEP
179 			COMPARE_WORD_STEP
180 			COMPARE_WORD_STEP
181 		#undef COMPARE_WORD_STEP
182 		}
183 
184 		while (len + WORDBYTES <= max_len) {
185 			v_word = load_word_unaligned(&matchptr[len]) ^
186 				 load_word_unaligned(&strptr[len]);
187 			if (v_word != 0)
188 				goto word_differs;
189 			len += WORDBYTES;
190 		}
191 	}
192 
193 	while (len < max_len && matchptr[len] == strptr[len])
194 		len++;
195 	return len;
196 
197 word_differs:
198 	if (CPU_IS_LITTLE_ENDIAN())
199 		len += (bsfw(v_word) >> 3);
200 	else
201 		len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
202 	return len;
203 }
204 
205 #endif /* LIB_MATCHFINDER_COMMON_H */
206