1 /*
2 * matchfinder_common.h - common code for Lempel-Ziv matchfinding
3 */
4
5 #ifndef LIB_MATCHFINDER_COMMON_H
6 #define LIB_MATCHFINDER_COMMON_H
7
8 #include "lib_common.h"
9 #include "unaligned.h"
10
11 #ifndef MATCHFINDER_WINDOW_ORDER
12 # error "MATCHFINDER_WINDOW_ORDER must be defined!"
13 #endif
14
15 #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
16
17 typedef s16 mf_pos_t;
18
19 #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
20
21 /*
22 * Required alignment of the matchfinder buffer pointer and size. The values
23 * here come from the AVX-2 implementation, which is the worst case.
24 */
25 #define MATCHFINDER_MEM_ALIGNMENT 32
26 #define MATCHFINDER_SIZE_ALIGNMENT 128
27
28 #undef matchfinder_init
29 #undef matchfinder_rebase
30 #ifdef _aligned_attribute
31 # if defined(__arm__) || defined(__aarch64__)
32 # include "arm/matchfinder_impl.h"
33 # elif defined(__i386__) || defined(__x86_64__)
34 # include "x86/matchfinder_impl.h"
35 # endif
36 #endif
37
38 /*
39 * Initialize the hash table portion of the matchfinder.
40 *
41 * Essentially, this is an optimized memset().
42 *
43 * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
44 * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
45 */
46 #ifndef matchfinder_init
47 static forceinline void
matchfinder_init(mf_pos_t * data,size_t size)48 matchfinder_init(mf_pos_t *data, size_t size)
49 {
50 size_t num_entries = size / sizeof(*data);
51 size_t i;
52
53 for (i = 0; i < num_entries; i++)
54 data[i] = MATCHFINDER_INITVAL;
55 }
56 #endif
57
58 /*
59 * Slide the matchfinder by WINDOW_SIZE bytes.
60 *
61 * This must be called just after each WINDOW_SIZE bytes have been run through
62 * the matchfinder.
63 *
64 * This will subtract WINDOW_SIZE bytes from each entry in the array specified.
65 * The effect is that all entries are updated to be relative to the current
66 * position, rather than the position WINDOW_SIZE bytes prior.
67 *
68 * Underflow is detected and replaced with signed saturation. This ensures that
69 * once the sliding window has passed over a position, that position forever
70 * remains out of bounds.
71 *
72 * The array passed in must contain all matchfinder data that is
73 * position-relative. Concretely, this will include the hash table as well as
74 * the table of positions that is used to link together the sequences in each
75 * hash bucket. Note that in the latter table, the links are 1-ary in the case
76 * of "hash chains", and 2-ary in the case of "binary trees". In either case,
77 * the links need to be rebased in the same way.
78 *
79 * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
80 * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
81 */
82 #ifndef matchfinder_rebase
83 static forceinline void
matchfinder_rebase(mf_pos_t * data,size_t size)84 matchfinder_rebase(mf_pos_t *data, size_t size)
85 {
86 size_t num_entries = size / sizeof(*data);
87 size_t i;
88
89 if (MATCHFINDER_WINDOW_SIZE == 32768) {
90 /* Branchless version for 32768 byte windows. If the value was
91 * already negative, clear all bits except the sign bit; this
92 * changes the value to -32768. Otherwise, set the sign bit;
93 * this is equivalent to subtracting 32768. */
94 for (i = 0; i < num_entries; i++) {
95 u16 v = data[i];
96 u16 sign_bit = v & 0x8000;
97 v &= sign_bit - ((sign_bit >> 15) ^ 1);
98 v |= 0x8000;
99 data[i] = v;
100 }
101 return;
102 }
103
104 for (i = 0; i < num_entries; i++) {
105 if (data[i] >= 0)
106 data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
107 else
108 data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
109 }
110 }
111 #endif
112
113 /*
114 * The hash function: given a sequence prefix held in the low-order bits of a
115 * 32-bit value, multiply by a carefully-chosen large constant. Discard any
116 * bits of the product that don't fit in a 32-bit value, but take the
117 * next-highest @num_bits bits of the product as the hash value, as those have
118 * the most randomness.
119 */
120 static forceinline u32
lz_hash(u32 seq,unsigned num_bits)121 lz_hash(u32 seq, unsigned num_bits)
122 {
123 return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
124 }
125
126 /*
127 * Return the number of bytes at @matchptr that match the bytes at @strptr, up
128 * to a maximum of @max_len. Initially, @start_len bytes are matched.
129 */
130 static forceinline unsigned
lz_extend(const u8 * const strptr,const u8 * const matchptr,const unsigned start_len,const unsigned max_len)131 lz_extend(const u8 * const strptr, const u8 * const matchptr,
132 const unsigned start_len, const unsigned max_len)
133 {
134 unsigned len = start_len;
135 machine_word_t v_word;
136
137 if (UNALIGNED_ACCESS_IS_FAST) {
138
139 if (likely(max_len - len >= 4 * WORDBYTES)) {
140
141 #define COMPARE_WORD_STEP \
142 v_word = load_word_unaligned(&matchptr[len]) ^ \
143 load_word_unaligned(&strptr[len]); \
144 if (v_word != 0) \
145 goto word_differs; \
146 len += WORDBYTES; \
147
148 COMPARE_WORD_STEP
149 COMPARE_WORD_STEP
150 COMPARE_WORD_STEP
151 COMPARE_WORD_STEP
152 #undef COMPARE_WORD_STEP
153 }
154
155 while (len + WORDBYTES <= max_len) {
156 v_word = load_word_unaligned(&matchptr[len]) ^
157 load_word_unaligned(&strptr[len]);
158 if (v_word != 0)
159 goto word_differs;
160 len += WORDBYTES;
161 }
162 }
163
164 while (len < max_len && matchptr[len] == strptr[len])
165 len++;
166 return len;
167
168 word_differs:
169 if (CPU_IS_LITTLE_ENDIAN())
170 len += (bsfw(v_word) >> 3);
171 else
172 len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
173 return len;
174 }
175
176 #endif /* LIB_MATCHFINDER_COMMON_H */
177