1 /* chunkset_neon.c -- NEON inline functions to copy small data chunks.
2 * For conditions of distribution and use, see copyright notice in zlib.h
3 */
4
5 #ifdef ARM_NEON_CHUNKSET
6 #ifdef _M_ARM64
7 # include <arm64_neon.h>
8 #else
9 # include <arm_neon.h>
10 #endif
11 #include "../../zbuild.h"
12 #include "../../zutil.h"
13
14 typedef uint8x16_t chunk_t;
15
16 #define CHUNK_SIZE 16
17
18 #define HAVE_CHUNKMEMSET_1
19 #define HAVE_CHUNKMEMSET_2
20 #define HAVE_CHUNKMEMSET_4
21 #define HAVE_CHUNKMEMSET_8
22
chunkmemset_1(uint8_t * from,chunk_t * chunk)23 static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
24 *chunk = vld1q_dup_u8(from);
25 }
26
chunkmemset_2(uint8_t * from,chunk_t * chunk)27 static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
28 uint16_t tmp;
29 memcpy(&tmp, from, 2);
30 *chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp));
31 }
32
chunkmemset_4(uint8_t * from,chunk_t * chunk)33 static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
34 uint32_t tmp;
35 memcpy(&tmp, from, 4);
36 *chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp));
37 }
38
chunkmemset_8(uint8_t * from,chunk_t * chunk)39 static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
40 uint64_t tmp;
41 memcpy(&tmp, from, 8);
42 *chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
43 }
44
45 #define CHUNKSIZE chunksize_neon
46 #define CHUNKCOPY chunkcopy_neon
47 #define CHUNKCOPY_SAFE chunkcopy_safe_neon
48 #define CHUNKUNROLL chunkunroll_neon
49 #define CHUNKMEMSET chunkmemset_neon
50 #define CHUNKMEMSET_SAFE chunkmemset_safe_neon
51
loadchunk(uint8_t const * s,chunk_t * chunk)52 static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
53 *chunk = vld1q_u8(s);
54 }
55
storechunk(uint8_t * out,chunk_t * chunk)56 static inline void storechunk(uint8_t *out, chunk_t *chunk) {
57 vst1q_u8(out, *chunk);
58 }
59
60 #include "chunkset_tpl.h"
61
62 #endif
63