1 /* slide_neon.c -- Optimized hash table shifting for ARM with support for NEON instructions
2  * Copyright (C) 2017-2020 Mika T. Lindqvist
3  *
4  * Authors:
5  * Mika T. Lindqvist <postmaster@raasu.org>
6  * Jun He <jun.he@arm.com>
7  *
8  * For conditions of distribution and use, see copyright notice in zlib.h
9  */
10 
11 #if defined(ARM_NEON_SLIDEHASH)
12 #ifdef _M_ARM64
13 #  include <arm64_neon.h>
14 #else
15 #  include <arm_neon.h>
16 #endif
17 #include "../../zbuild.h"
18 #include "../../deflate.h"
19 
20 /* SIMD version of hash_chain rebase */
slide_hash_chain(Pos * table,unsigned int entries,uint16_t window_size)21 static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
22     ZLIB_REGISTER uint16x8_t v, *p;
23     ZLIB_REGISTER size_t n;
24 
25     size_t size = entries*sizeof(table[0]);
26     Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
27 
28     Assert(sizeof(Pos) == 2, "Wrong Pos size");
29     v = vdupq_n_u16(window_size);
30 
31     p = (uint16x8_t *)table;
32     n = size / (sizeof(uint16x8_t) * 8);
33     do {
34         p[0] = vqsubq_u16(p[0], v);
35         p[1] = vqsubq_u16(p[1], v);
36         p[2] = vqsubq_u16(p[2], v);
37         p[3] = vqsubq_u16(p[3], v);
38         p[4] = vqsubq_u16(p[4], v);
39         p[5] = vqsubq_u16(p[5], v);
40         p[6] = vqsubq_u16(p[6], v);
41         p[7] = vqsubq_u16(p[7], v);
42         p += 8;
43     } while (--n);
44 }
45 
slide_hash_neon(deflate_state * s)46 ZLIB_INTERNAL void slide_hash_neon(deflate_state *s) {
47     unsigned int wsize = s->w_size;
48 
49     slide_hash_chain(s->head, s->hash_size, wsize);
50     slide_hash_chain(s->prev, wsize, wsize);
51 }
52 #endif
53