1 /*
2 * matchfinder_avx2.h - matchfinding routines optimized for Intel AVX2 (Advanced
3 * Vector Extensions)
4 */
5
6 #include <immintrin.h>
7
8 static forceinline bool
matchfinder_init_avx2(mf_pos_t * data,size_t size)9 matchfinder_init_avx2(mf_pos_t *data, size_t size)
10 {
11 __m256i v, *p;
12 size_t n;
13
14 if (size % (sizeof(__m256i) * 4) != 0)
15 return false;
16
17 STATIC_ASSERT(sizeof(mf_pos_t) == 2);
18 v = _mm256_set1_epi16(MATCHFINDER_INITVAL);
19 p = (__m256i *)data;
20 n = size / (sizeof(__m256i) * 4);
21 do {
22 p[0] = v;
23 p[1] = v;
24 p[2] = v;
25 p[3] = v;
26 p += 4;
27 } while (--n);
28 return true;
29 }
30
31 static forceinline bool
matchfinder_rebase_avx2(mf_pos_t * data,size_t size)32 matchfinder_rebase_avx2(mf_pos_t *data, size_t size)
33 {
34 __m256i v, *p;
35 size_t n;
36
37 if (size % (sizeof(__m256i) * 4) != 0)
38 return false;
39
40 STATIC_ASSERT(sizeof(mf_pos_t) == 2);
41 v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE);
42 p = (__m256i *)data;
43 n = size / (sizeof(__m256i) * 4);
44 do {
45 /* PADDSW: Add Packed Signed Integers With Signed Saturation */
46 p[0] = _mm256_adds_epi16(p[0], v);
47 p[1] = _mm256_adds_epi16(p[1], v);
48 p[2] = _mm256_adds_epi16(p[2], v);
49 p[3] = _mm256_adds_epi16(p[3], v);
50 p += 4;
51 } while (--n);
52 return true;
53 }
54