1 /*
2  * matchfinder_neon.h - matchfinding routines optimized for ARM NEON (Advanced
3  * SIMD) instructions
4  */
5 
6 #include <arm_neon.h>
7 
8 static forceinline bool
matchfinder_init_neon(mf_pos_t * data,size_t size)9 matchfinder_init_neon(mf_pos_t *data, size_t size)
10 {
11 	int16x8_t v, *p;
12 	size_t n;
13 
14 	if (size % (sizeof(int16x8_t) * 4) != 0)
15 		return false;
16 
17 	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
18 	v = (int16x8_t) {
19 		MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
20 		MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
21 		MATCHFINDER_INITVAL, MATCHFINDER_INITVAL,
22 	};
23 	p = (int16x8_t *)data;
24 	n = size / (sizeof(int16x8_t) * 4);
25 	do {
26 		p[0] = v;
27 		p[1] = v;
28 		p[2] = v;
29 		p[3] = v;
30 		p += 4;
31 	} while (--n);
32 	return true;
33 }
34 
35 static forceinline bool
matchfinder_rebase_neon(mf_pos_t * data,size_t size)36 matchfinder_rebase_neon(mf_pos_t *data, size_t size)
37 {
38 	int16x8_t v, *p;
39 	size_t n;
40 
41 	if (size % (sizeof(int16x8_t) * 4) != 0)
42 		return false;
43 
44 	STATIC_ASSERT(sizeof(mf_pos_t) == 2);
45 	v = (int16x8_t) {
46 		(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
47 		(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
48 		(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
49 		(u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE,
50 	};
51 	p = (int16x8_t *)data;
52 	n = size / (sizeof(int16x8_t) * 4);
53 	do {
54 		p[0] = vqaddq_s16(p[0], v);
55 		p[1] = vqaddq_s16(p[1], v);
56 		p[2] = vqaddq_s16(p[2], v);
57 		p[3] = vqaddq_s16(p[3], v);
58 		p += 4;
59 	} while (--n);
60 	return true;
61 }
62