1 /*
2  * matchfinder_avx2.h - matchfinding routines optimized for Intel AVX2 (Advanced
3  * Vector Extensions)
4  */
5 
6 #include <immintrin.h>
7 
8 static forceinline bool
matchfinder_init_avx2(mf_pos_t * data,size_t size)9 matchfinder_init_avx2(mf_pos_t *data, size_t size)
10 {
11 	__m256i v, *p;
12 	size_t n;
13 
14 	if (size % (sizeof(__m256i) * 4) != 0)
15 		return false;
16 
17 	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
18 	v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
19 	p = (__m256i *)data;
20 	n = size / (sizeof(__m256i) * 4);
21 	do {
22 		p[0] = v;
23 		p[1] = v;
24 		p[2] = v;
25 		p[3] = v;
26 		p += 4;
27 	} while (--n);
28 	return true;
29 }
30 
31 static forceinline bool
matchfinder_rebase_avx2(mf_pos_t * data,size_t size)32 matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
33 {
34 	__m256i v, *p;
35 	size_t n;
36 
37 	if (size % (sizeof(__m256i) * 4) != 0)
38 		return false;
39 
40 	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
41 	v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
42 	p = (__m256i *)data;
43 	n = size / (sizeof(__m256i) * 4);
44 	do {
45 		/* PADDSW: Add Packed Signed Integers With Signed Saturation  */
46 		p[0] = _mm256_adds_epi16(p[0], v);
47 		p[1] = _mm256_adds_epi16(p[1], v);
48 		p[2] = _mm256_adds_epi16(p[2], v);
49 		p[3] = _mm256_adds_epi16(p[3], v);
50 		p += 4;
51 	} while (--n);
52 	return true;
53 }
54