1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AOM_DSP_SIMD_V64_INTRINSICS_H_
13 #define AOM_AOM_DSP_SIMD_V64_INTRINSICS_H_
14 
15 #include <stdio.h>
16 #include <stdlib.h>
17 
18 #include "aom_dsp/simd/v64_intrinsics_c.h"
19 
20 /* Fallback to plain, unoptimised C. */
21 
22 typedef c_v64 v64;
23 
v64_low_u32(v64 a)24 SIMD_INLINE uint32_t v64_low_u32(v64 a) { return c_v64_low_u32(a); }
v64_high_u32(v64 a)25 SIMD_INLINE uint32_t v64_high_u32(v64 a) { return c_v64_high_u32(a); }
v64_low_s32(v64 a)26 SIMD_INLINE int32_t v64_low_s32(v64 a) { return c_v64_low_s32(a); }
v64_high_s32(v64 a)27 SIMD_INLINE int32_t v64_high_s32(v64 a) { return c_v64_high_s32(a); }
v64_from_32(uint32_t x,uint32_t y)28 SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) {
29   return c_v64_from_32(x, y);
30 }
v64_from_64(uint64_t x)31 SIMD_INLINE v64 v64_from_64(uint64_t x) { return c_v64_from_64(x); }
v64_u64(v64 x)32 SIMD_INLINE uint64_t v64_u64(v64 x) { return c_v64_u64(x); }
v64_from_16(uint16_t a,uint16_t b,uint16_t c,uint16_t d)33 SIMD_INLINE v64 v64_from_16(uint16_t a, uint16_t b, uint16_t c, uint16_t d) {
34   return c_v64_from_16(a, b, c, d);
35 }
36 
u32_load_unaligned(const void * p)37 SIMD_INLINE uint32_t u32_load_unaligned(const void *p) {
38   return c_u32_load_unaligned(p);
39 }
u32_load_aligned(const void * p)40 SIMD_INLINE uint32_t u32_load_aligned(const void *p) {
41   return c_u32_load_aligned(p);
42 }
u32_store_unaligned(void * p,uint32_t a)43 SIMD_INLINE void u32_store_unaligned(void *p, uint32_t a) {
44   c_u32_store_unaligned(p, a);
45 }
u32_store_aligned(void * p,uint32_t a)46 SIMD_INLINE void u32_store_aligned(void *p, uint32_t a) {
47   c_u32_store_aligned(p, a);
48 }
49 
v64_load_unaligned(const void * p)50 SIMD_INLINE v64 v64_load_unaligned(const void *p) {
51   return c_v64_load_unaligned(p);
52 }
v64_load_aligned(const void * p)53 SIMD_INLINE v64 v64_load_aligned(const void *p) {
54   return c_v64_load_aligned(p);
55 }
56 
v64_store_unaligned(void * p,v64 a)57 SIMD_INLINE void v64_store_unaligned(void *p, v64 a) {
58   c_v64_store_unaligned(p, a);
59 }
v64_store_aligned(void * p,v64 a)60 SIMD_INLINE void v64_store_aligned(void *p, v64 a) {
61   c_v64_store_aligned(p, a);
62 }
63 
v64_align(v64 a,v64 b,unsigned int c)64 SIMD_INLINE v64 v64_align(v64 a, v64 b, unsigned int c) {
65   return c_v64_align(a, b, c);
66 }
67 
v64_zero()68 SIMD_INLINE v64 v64_zero() { return c_v64_zero(); }
v64_dup_8(uint8_t x)69 SIMD_INLINE v64 v64_dup_8(uint8_t x) { return c_v64_dup_8(x); }
v64_dup_16(uint16_t x)70 SIMD_INLINE v64 v64_dup_16(uint16_t x) { return c_v64_dup_16(x); }
v64_dup_32(uint32_t x)71 SIMD_INLINE v64 v64_dup_32(uint32_t x) { return c_v64_dup_32(x); }
72 
v64_add_8(v64 a,v64 b)73 SIMD_INLINE v64 v64_add_8(v64 a, v64 b) { return c_v64_add_8(a, b); }
v64_add_16(v64 a,v64 b)74 SIMD_INLINE v64 v64_add_16(v64 a, v64 b) { return c_v64_add_16(a, b); }
v64_sadd_u8(v64 a,v64 b)75 SIMD_INLINE v64 v64_sadd_u8(v64 a, v64 b) { return c_v64_sadd_u8(a, b); }
v64_sadd_s8(v64 a,v64 b)76 SIMD_INLINE v64 v64_sadd_s8(v64 a, v64 b) { return c_v64_sadd_s8(a, b); }
v64_sadd_s16(v64 a,v64 b)77 SIMD_INLINE v64 v64_sadd_s16(v64 a, v64 b) { return c_v64_sadd_s16(a, b); }
v64_add_32(v64 a,v64 b)78 SIMD_INLINE v64 v64_add_32(v64 a, v64 b) { return c_v64_add_32(a, b); }
v64_sub_8(v64 a,v64 b)79 SIMD_INLINE v64 v64_sub_8(v64 a, v64 b) { return c_v64_sub_8(a, b); }
v64_ssub_u8(v64 a,v64 b)80 SIMD_INLINE v64 v64_ssub_u8(v64 a, v64 b) { return c_v64_ssub_u8(a, b); }
v64_ssub_s8(v64 a,v64 b)81 SIMD_INLINE v64 v64_ssub_s8(v64 a, v64 b) { return c_v64_ssub_s8(a, b); }
v64_sub_16(v64 a,v64 b)82 SIMD_INLINE v64 v64_sub_16(v64 a, v64 b) { return c_v64_sub_16(a, b); }
v64_ssub_s16(v64 a,v64 b)83 SIMD_INLINE v64 v64_ssub_s16(v64 a, v64 b) { return c_v64_ssub_s16(a, b); }
v64_ssub_u16(v64 a,v64 b)84 SIMD_INLINE v64 v64_ssub_u16(v64 a, v64 b) { return c_v64_ssub_u16(a, b); }
v64_sub_32(v64 a,v64 b)85 SIMD_INLINE v64 v64_sub_32(v64 a, v64 b) { return c_v64_sub_32(a, b); }
v64_abs_s16(v64 a)86 SIMD_INLINE v64 v64_abs_s16(v64 a) { return c_v64_abs_s16(a); }
v64_abs_s8(v64 a)87 SIMD_INLINE v64 v64_abs_s8(v64 a) { return c_v64_abs_s8(a); }
88 
v64_ziplo_8(v64 a,v64 b)89 SIMD_INLINE v64 v64_ziplo_8(v64 a, v64 b) { return c_v64_ziplo_8(a, b); }
v64_ziphi_8(v64 a,v64 b)90 SIMD_INLINE v64 v64_ziphi_8(v64 a, v64 b) { return c_v64_ziphi_8(a, b); }
v64_ziplo_16(v64 a,v64 b)91 SIMD_INLINE v64 v64_ziplo_16(v64 a, v64 b) { return c_v64_ziplo_16(a, b); }
v64_ziphi_16(v64 a,v64 b)92 SIMD_INLINE v64 v64_ziphi_16(v64 a, v64 b) { return c_v64_ziphi_16(a, b); }
v64_ziplo_32(v64 a,v64 b)93 SIMD_INLINE v64 v64_ziplo_32(v64 a, v64 b) { return c_v64_ziplo_32(a, b); }
v64_ziphi_32(v64 a,v64 b)94 SIMD_INLINE v64 v64_ziphi_32(v64 a, v64 b) { return c_v64_ziphi_32(a, b); }
v64_unziplo_8(v64 a,v64 b)95 SIMD_INLINE v64 v64_unziplo_8(v64 a, v64 b) { return c_v64_unziplo_8(a, b); }
v64_unziphi_8(v64 a,v64 b)96 SIMD_INLINE v64 v64_unziphi_8(v64 a, v64 b) { return c_v64_unziphi_8(a, b); }
v64_unziplo_16(v64 a,v64 b)97 SIMD_INLINE v64 v64_unziplo_16(v64 a, v64 b) { return c_v64_unziplo_16(a, b); }
v64_unziphi_16(v64 a,v64 b)98 SIMD_INLINE v64 v64_unziphi_16(v64 a, v64 b) { return c_v64_unziphi_16(a, b); }
v64_unpacklo_u8_s16(v64 a)99 SIMD_INLINE v64 v64_unpacklo_u8_s16(v64 a) { return c_v64_unpacklo_u8_s16(a); }
v64_unpackhi_u8_s16(v64 a)100 SIMD_INLINE v64 v64_unpackhi_u8_s16(v64 a) { return c_v64_unpackhi_u8_s16(a); }
v64_unpacklo_s8_s16(v64 a)101 SIMD_INLINE v64 v64_unpacklo_s8_s16(v64 a) { return c_v64_unpacklo_s8_s16(a); }
v64_unpackhi_s8_s16(v64 a)102 SIMD_INLINE v64 v64_unpackhi_s8_s16(v64 a) { return c_v64_unpackhi_s8_s16(a); }
v64_pack_s32_s16(v64 a,v64 b)103 SIMD_INLINE v64 v64_pack_s32_s16(v64 a, v64 b) {
104   return c_v64_pack_s32_s16(a, b);
105 }
v64_pack_s32_u16(v64 a,v64 b)106 SIMD_INLINE v64 v64_pack_s32_u16(v64 a, v64 b) {
107   return c_v64_pack_s32_u16(a, b);
108 }
v64_pack_s16_u8(v64 a,v64 b)109 SIMD_INLINE v64 v64_pack_s16_u8(v64 a, v64 b) {
110   return c_v64_pack_s16_u8(a, b);
111 }
v64_pack_s16_s8(v64 a,v64 b)112 SIMD_INLINE v64 v64_pack_s16_s8(v64 a, v64 b) {
113   return c_v64_pack_s16_s8(a, b);
114 }
v64_unpacklo_u16_s32(v64 a)115 SIMD_INLINE v64 v64_unpacklo_u16_s32(v64 a) {
116   return c_v64_unpacklo_u16_s32(a);
117 }
v64_unpacklo_s16_s32(v64 a)118 SIMD_INLINE v64 v64_unpacklo_s16_s32(v64 a) {
119   return c_v64_unpacklo_s16_s32(a);
120 }
v64_unpackhi_u16_s32(v64 a)121 SIMD_INLINE v64 v64_unpackhi_u16_s32(v64 a) {
122   return c_v64_unpackhi_u16_s32(a);
123 }
v64_unpackhi_s16_s32(v64 a)124 SIMD_INLINE v64 v64_unpackhi_s16_s32(v64 a) {
125   return c_v64_unpackhi_s16_s32(a);
126 }
v64_shuffle_8(v64 a,v64 pattern)127 SIMD_INLINE v64 v64_shuffle_8(v64 a, v64 pattern) {
128   return c_v64_shuffle_8(a, pattern);
129 }
130 
131 typedef uint32_t sad64_internal;
v64_sad_u8_init()132 SIMD_INLINE sad64_internal v64_sad_u8_init() { return c_v64_sad_u8_init(); }
v64_sad_u8(sad64_internal s,v64 a,v64 b)133 SIMD_INLINE sad64_internal v64_sad_u8(sad64_internal s, v64 a, v64 b) {
134   return c_v64_sad_u8(s, a, b);
135 }
v64_sad_u8_sum(sad64_internal s)136 SIMD_INLINE uint32_t v64_sad_u8_sum(sad64_internal s) {
137   return c_v64_sad_u8_sum(s);
138 }
139 typedef uint32_t ssd64_internal;
v64_ssd_u8_init()140 SIMD_INLINE ssd64_internal v64_ssd_u8_init() { return c_v64_ssd_u8_init(); }
v64_ssd_u8(ssd64_internal s,v64 a,v64 b)141 SIMD_INLINE ssd64_internal v64_ssd_u8(ssd64_internal s, v64 a, v64 b) {
142   return c_v64_ssd_u8(s, a, b);
143 }
v64_ssd_u8_sum(ssd64_internal s)144 SIMD_INLINE uint32_t v64_ssd_u8_sum(ssd64_internal s) {
145   return c_v64_ssd_u8_sum(s);
146 }
v64_dotp_su8(v64 a,v64 b)147 SIMD_INLINE int64_t v64_dotp_su8(v64 a, v64 b) { return c_v64_dotp_su8(a, b); }
v64_dotp_s16(v64 a,v64 b)148 SIMD_INLINE int64_t v64_dotp_s16(v64 a, v64 b) { return c_v64_dotp_s16(a, b); }
v64_hadd_u8(v64 a)149 SIMD_INLINE uint64_t v64_hadd_u8(v64 a) { return c_v64_hadd_u8(a); }
v64_hadd_s16(v64 a)150 SIMD_INLINE int64_t v64_hadd_s16(v64 a) { return c_v64_hadd_s16(a); }
151 
v64_or(v64 a,v64 b)152 SIMD_INLINE v64 v64_or(v64 a, v64 b) { return c_v64_or(a, b); }
v64_xor(v64 a,v64 b)153 SIMD_INLINE v64 v64_xor(v64 a, v64 b) { return c_v64_xor(a, b); }
v64_and(v64 a,v64 b)154 SIMD_INLINE v64 v64_and(v64 a, v64 b) { return c_v64_and(a, b); }
v64_andn(v64 a,v64 b)155 SIMD_INLINE v64 v64_andn(v64 a, v64 b) { return c_v64_andn(a, b); }
156 
v64_mullo_s16(v64 a,v64 b)157 SIMD_INLINE v64 v64_mullo_s16(v64 a, v64 b) { return c_v64_mullo_s16(a, b); }
v64_mulhi_s16(v64 a,v64 b)158 SIMD_INLINE v64 v64_mulhi_s16(v64 a, v64 b) { return c_v64_mulhi_s16(a, b); }
v64_mullo_s32(v64 a,v64 b)159 SIMD_INLINE v64 v64_mullo_s32(v64 a, v64 b) { return c_v64_mullo_s32(a, b); }
v64_madd_s16(v64 a,v64 b)160 SIMD_INLINE v64 v64_madd_s16(v64 a, v64 b) { return c_v64_madd_s16(a, b); }
v64_madd_us8(v64 a,v64 b)161 SIMD_INLINE v64 v64_madd_us8(v64 a, v64 b) { return c_v64_madd_us8(a, b); }
162 
v64_avg_u8(v64 a,v64 b)163 SIMD_INLINE v64 v64_avg_u8(v64 a, v64 b) { return c_v64_avg_u8(a, b); }
v64_rdavg_u8(v64 a,v64 b)164 SIMD_INLINE v64 v64_rdavg_u8(v64 a, v64 b) { return c_v64_rdavg_u8(a, b); }
v64_rdavg_u16(v64 a,v64 b)165 SIMD_INLINE v64 v64_rdavg_u16(v64 a, v64 b) { return c_v64_rdavg_u16(a, b); }
v64_avg_u16(v64 a,v64 b)166 SIMD_INLINE v64 v64_avg_u16(v64 a, v64 b) { return c_v64_avg_u16(a, b); }
v64_min_u8(v64 a,v64 b)167 SIMD_INLINE v64 v64_min_u8(v64 a, v64 b) { return c_v64_min_u8(a, b); }
v64_max_u8(v64 a,v64 b)168 SIMD_INLINE v64 v64_max_u8(v64 a, v64 b) { return c_v64_max_u8(a, b); }
v64_min_s8(v64 a,v64 b)169 SIMD_INLINE v64 v64_min_s8(v64 a, v64 b) { return c_v64_min_s8(a, b); }
v64_max_s8(v64 a,v64 b)170 SIMD_INLINE v64 v64_max_s8(v64 a, v64 b) { return c_v64_max_s8(a, b); }
v64_min_s16(v64 a,v64 b)171 SIMD_INLINE v64 v64_min_s16(v64 a, v64 b) { return c_v64_min_s16(a, b); }
v64_max_s16(v64 a,v64 b)172 SIMD_INLINE v64 v64_max_s16(v64 a, v64 b) { return c_v64_max_s16(a, b); }
173 
v64_cmpgt_s8(v64 a,v64 b)174 SIMD_INLINE v64 v64_cmpgt_s8(v64 a, v64 b) { return c_v64_cmpgt_s8(a, b); }
v64_cmplt_s8(v64 a,v64 b)175 SIMD_INLINE v64 v64_cmplt_s8(v64 a, v64 b) { return c_v64_cmplt_s8(a, b); }
v64_cmpeq_8(v64 a,v64 b)176 SIMD_INLINE v64 v64_cmpeq_8(v64 a, v64 b) { return c_v64_cmpeq_8(a, b); }
v64_cmpgt_s16(v64 a,v64 b)177 SIMD_INLINE v64 v64_cmpgt_s16(v64 a, v64 b) { return c_v64_cmpgt_s16(a, b); }
v64_cmplt_s16(v64 a,v64 b)178 SIMD_INLINE v64 v64_cmplt_s16(v64 a, v64 b) { return c_v64_cmplt_s16(a, b); }
v64_cmpeq_16(v64 a,v64 b)179 SIMD_INLINE v64 v64_cmpeq_16(v64 a, v64 b) { return c_v64_cmpeq_16(a, b); }
180 
v64_shl_8(v64 a,unsigned int n)181 SIMD_INLINE v64 v64_shl_8(v64 a, unsigned int n) { return c_v64_shl_8(a, n); }
v64_shr_u8(v64 a,unsigned int n)182 SIMD_INLINE v64 v64_shr_u8(v64 a, unsigned int n) { return c_v64_shr_u8(a, n); }
v64_shr_s8(v64 a,unsigned int n)183 SIMD_INLINE v64 v64_shr_s8(v64 a, unsigned int n) { return c_v64_shr_s8(a, n); }
v64_shl_16(v64 a,unsigned int n)184 SIMD_INLINE v64 v64_shl_16(v64 a, unsigned int n) { return c_v64_shl_16(a, n); }
v64_shr_u16(v64 a,unsigned int n)185 SIMD_INLINE v64 v64_shr_u16(v64 a, unsigned int n) {
186   return c_v64_shr_u16(a, n);
187 }
v64_shr_s16(v64 a,unsigned int n)188 SIMD_INLINE v64 v64_shr_s16(v64 a, unsigned int n) {
189   return c_v64_shr_s16(a, n);
190 }
v64_shl_32(v64 a,unsigned int n)191 SIMD_INLINE v64 v64_shl_32(v64 a, unsigned int n) { return c_v64_shl_32(a, n); }
v64_shr_u32(v64 a,unsigned int n)192 SIMD_INLINE v64 v64_shr_u32(v64 a, unsigned int n) {
193   return c_v64_shr_u32(a, n);
194 }
v64_shr_s32(v64 a,unsigned int n)195 SIMD_INLINE v64 v64_shr_s32(v64 a, unsigned int n) {
196   return c_v64_shr_s32(a, n);
197 }
v64_shr_n_byte(v64 a,unsigned int n)198 SIMD_INLINE v64 v64_shr_n_byte(v64 a, unsigned int n) {
199   return c_v64_shr_n_byte(a, n);
200 }
v64_shl_n_byte(v64 a,unsigned int n)201 SIMD_INLINE v64 v64_shl_n_byte(v64 a, unsigned int n) {
202   return c_v64_shl_n_byte(a, n);
203 }
v64_shl_n_8(v64 a,unsigned int c)204 SIMD_INLINE v64 v64_shl_n_8(v64 a, unsigned int c) {
205   return c_v64_shl_n_8(a, c);
206 }
v64_shr_n_u8(v64 a,unsigned int c)207 SIMD_INLINE v64 v64_shr_n_u8(v64 a, unsigned int c) {
208   return c_v64_shr_n_u8(a, c);
209 }
v64_shr_n_s8(v64 a,unsigned int c)210 SIMD_INLINE v64 v64_shr_n_s8(v64 a, unsigned int c) {
211   return c_v64_shr_n_s8(a, c);
212 }
v64_shl_n_16(v64 a,unsigned int c)213 SIMD_INLINE v64 v64_shl_n_16(v64 a, unsigned int c) {
214   return c_v64_shl_n_16(a, c);
215 }
v64_shr_n_u16(v64 a,unsigned int c)216 SIMD_INLINE v64 v64_shr_n_u16(v64 a, unsigned int c) {
217   return c_v64_shr_n_u16(a, c);
218 }
v64_shr_n_s16(v64 a,unsigned int c)219 SIMD_INLINE v64 v64_shr_n_s16(v64 a, unsigned int c) {
220   return c_v64_shr_n_s16(a, c);
221 }
v64_shl_n_32(v64 a,unsigned int c)222 SIMD_INLINE v64 v64_shl_n_32(v64 a, unsigned int c) {
223   return c_v64_shl_n_32(a, c);
224 }
v64_shr_n_u32(v64 a,unsigned int c)225 SIMD_INLINE v64 v64_shr_n_u32(v64 a, unsigned int c) {
226   return c_v64_shr_n_u32(a, c);
227 }
v64_shr_n_s32(v64 a,unsigned int c)228 SIMD_INLINE v64 v64_shr_n_s32(v64 a, unsigned int c) {
229   return c_v64_shr_n_s32(a, c);
230 }
231 
232 #endif  // AOM_AOM_DSP_SIMD_V64_INTRINSICS_H_
233