1 /*
2  * matchfinder_common.h - common code for Lempel-Ziv matchfinding
3  */
4 
5 #ifndef LIB_MATCHFINDER_COMMON_H
6 #define LIB_MATCHFINDER_COMMON_H
7 
8 #include "lib_common.h"
9 #include "unaligned.h"
10 
11 #ifndef MATCHFINDER_WINDOW_ORDER
12 #  error "MATCHFINDER_WINDOW_ORDER must be defined!"
13 #endif
14 
15 #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
16 
17 typedef s16 mf_pos_t;
18 
19 #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
20 
21 /*
22  * Required alignment of the matchfinder buffer pointer and size.  The values
23  * here come from the AVX-2 implementation, which is the worst case.
24  */
25 #define MATCHFINDER_MEM_ALIGNMENT	32
26 #define MATCHFINDER_SIZE_ALIGNMENT	128
27 
28 #undef matchfinder_init
29 #undef matchfinder_rebase
30 #ifdef _aligned_attribute
31 #  if defined(__arm__) || defined(__aarch64__)
32 #    include "arm/matchfinder_impl.h"
33 #  elif defined(__i386__) || defined(__x86_64__)
34 #    include "x86/matchfinder_impl.h"
35 #  endif
36 #endif
37 
38 /*
39  * Initialize the hash table portion of the matchfinder.
40  *
41  * Essentially, this is an optimized memset().
42  *
43  * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
44  * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
45  */
46 #ifndef matchfinder_init
47 static forceinline void
matchfinder_init(mf_pos_t * data,size_t size)48 matchfinder_init(mf_pos_t *data, size_t size)
49 {
50 	size_t num_entries = size / sizeof(*data);
51 	size_t i;
52 
53 	for (i = 0; i < num_entries; i++)
54 		data[i] = MATCHFINDER_INITVAL;
55 }
56 #endif
57 
58 /*
59  * Slide the matchfinder by WINDOW_SIZE bytes.
60  *
61  * This must be called just after each WINDOW_SIZE bytes have been run through
62  * the matchfinder.
63  *
64  * This will subtract WINDOW_SIZE bytes from each entry in the array specified.
65  * The effect is that all entries are updated to be relative to the current
66  * position, rather than the position WINDOW_SIZE bytes prior.
67  *
68  * Underflow is detected and replaced with signed saturation.  This ensures that
69  * once the sliding window has passed over a position, that position forever
70  * remains out of bounds.
71  *
72  * The array passed in must contain all matchfinder data that is
73  * position-relative.  Concretely, this will include the hash table as well as
74  * the table of positions that is used to link together the sequences in each
75  * hash bucket.  Note that in the latter table, the links are 1-ary in the case
76  * of "hash chains", and 2-ary in the case of "binary trees".  In either case,
77  * the links need to be rebased in the same way.
78  *
79  * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
80  * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
81  */
82 #ifndef matchfinder_rebase
83 static forceinline void
matchfinder_rebase(mf_pos_t * data,size_t size)84 matchfinder_rebase(mf_pos_t *data, size_t size)
85 {
86 	size_t num_entries = size / sizeof(*data);
87 	size_t i;
88 
89 	if (MATCHFINDER_WINDOW_SIZE == 32768) {
90 		/* Branchless version for 32768 byte windows.  If the value was
91 		 * already negative, clear all bits except the sign bit; this
92 		 * changes the value to -32768.  Otherwise, set the sign bit;
93 		 * this is equivalent to subtracting 32768.  */
94 		for (i = 0; i < num_entries; i++) {
95 			u16 v = data[i];
96 			u16 sign_bit = v & 0x8000;
97 			v &= sign_bit - ((sign_bit >> 15) ^ 1);
98 			v |= 0x8000;
99 			data[i] = v;
100 		}
101 		return;
102 	}
103 
104 	for (i = 0; i < num_entries; i++) {
105 		if (data[i] >= 0)
106 			data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
107 		else
108 			data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
109 	}
110 }
111 #endif
112 
113 /*
114  * The hash function: given a sequence prefix held in the low-order bits of a
115  * 32-bit value, multiply by a carefully-chosen large constant.  Discard any
116  * bits of the product that don't fit in a 32-bit value, but take the
117  * next-highest @num_bits bits of the product as the hash value, as those have
118  * the most randomness.
119  */
120 static forceinline u32
lz_hash(u32 seq,unsigned num_bits)121 lz_hash(u32 seq, unsigned num_bits)
122 {
123 	return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
124 }
125 
126 /*
127  * Return the number of bytes at @matchptr that match the bytes at @strptr, up
128  * to a maximum of @max_len.  Initially, @start_len bytes are matched.
129  */
130 static forceinline unsigned
lz_extend(const u8 * const strptr,const u8 * const matchptr,const unsigned start_len,const unsigned max_len)131 lz_extend(const u8 * const strptr, const u8 * const matchptr,
132 	  const unsigned start_len, const unsigned max_len)
133 {
134 	unsigned len = start_len;
135 	machine_word_t v_word;
136 
137 	if (UNALIGNED_ACCESS_IS_FAST) {
138 
139 		if (likely(max_len - len >= 4 * WORDBYTES)) {
140 
141 		#define COMPARE_WORD_STEP				\
142 			v_word = load_word_unaligned(&matchptr[len]) ^	\
143 				 load_word_unaligned(&strptr[len]);	\
144 			if (v_word != 0)				\
145 				goto word_differs;			\
146 			len += WORDBYTES;				\
147 
148 			COMPARE_WORD_STEP
149 			COMPARE_WORD_STEP
150 			COMPARE_WORD_STEP
151 			COMPARE_WORD_STEP
152 		#undef COMPARE_WORD_STEP
153 		}
154 
155 		while (len + WORDBYTES <= max_len) {
156 			v_word = load_word_unaligned(&matchptr[len]) ^
157 				 load_word_unaligned(&strptr[len]);
158 			if (v_word != 0)
159 				goto word_differs;
160 			len += WORDBYTES;
161 		}
162 	}
163 
164 	while (len < max_len && matchptr[len] == strptr[len])
165 		len++;
166 	return len;
167 
168 word_differs:
169 	if (CPU_IS_LITTLE_ENDIAN())
170 		len += (bsfw(v_word) >> 3);
171 	else
172 		len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
173 	return len;
174 }
175 
176 #endif /* LIB_MATCHFINDER_COMMON_H */
177