1 /*
2 * matchfinder_neon.h - matchfinding routines optimized for ARM NEON (Advanced
3 * SIMD) instructions
4 */
5
6 #include <arm_neon.h>
7
8 static forceinline bool
matchfinder_init_neon(mf_pos_t * data,size_t size)9 matchfinder_init_neon(mf_pos_t *data, size_t size)
10 {
11 int16x8_t v, *p;
12 size_t n;
13
14 if (size % (sizeof(int16x8_t) * 4) != 0)
15 return false;
16
17 STATIC_ASSERT(sizeof(mf_pos_t) == 2);
18 v = (int16x8_t) {
19 MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
20 MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
21 MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
22 };
23 p = (int16x8_t *)data;
24 n = size / (sizeof(int16x8_t) * 4);
25 do {
26 p[0] = v;
27 p[1] = v;
28 p[2] = v;
29 p[3] = v;
30 p += 4;
31 } while (--n);
32 return true;
33 }
34
35 static forceinline bool
matchfinder_rebase_neon(mf_pos_t * data,size_t size)36 matchfinder_rebase_neon(mf_pos_t *data, size_t size)
37 {
38 int16x8_t v, *p;
39 size_t n;
40
41 if (size % (sizeof(int16x8_t) * 4) != 0)
42 return false;
43
44 STATIC_ASSERT(sizeof(mf_pos_t) == 2);
45 v = (int16x8_t) {
46 (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
47 (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
48 (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
49 (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
50 };
51 p = (int16x8_t *)data;
52 n = size / (sizeof(int16x8_t) * 4);
53 do {
54 p[0] = vqaddq_s16(p[0], v);
55 p[1] = vqaddq_s16(p[1], v);
56 p[2] = vqaddq_s16(p[2], v);
57 p[3] = vqaddq_s16(p[3], v);
58 p += 4;
59 } while (--n);
60 return true;
61 }
62