1 /*
2  * Copyright (c) 2016-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 #include "mth_intrinsics.h"
18 
19 vrs16_t
__ZGVzN16v__mth_i_vr4(vrs16_t x,float func (float))20 __ZGVzN16v__mth_i_vr4(vrs16_t x, float func(float))
21 {
22   int i;
23   vrs16_t r;
24   for (i = 0; i < 16; i++) {
25     r[i] = func(x[i]);
26   }
27   return r;
28 }
29 
30 vrs16_t
__ZGVzM16v__mth_i_vr4(vrs16_t x,vis16_t mask,float func (float))31 __ZGVzM16v__mth_i_vr4(vrs16_t x, vis16_t mask, float func(float))
32 {
33   int i;
34   vrs16_t r;
35   for (i = 0; i < 16; i++) {
36     if (mask[i])
37       r[i] = func(x[i]);
38   }
39   return r;
40 }
41 
42 vrs16_t
__ZGVzN16vv__mth_i_vr4vr4(vrs16_t x,vrs16_t y,float func (float,float))43 __ZGVzN16vv__mth_i_vr4vr4(vrs16_t x, vrs16_t y, float func(float, float))
44 {
45   int i;
46   vrs16_t r;
47   for (i = 0; i < 16; i++) {
48     r[i] = func(x[i], y[i]);
49   }
50   return r;
51 }
52 
53 vrs16_t
__ZGVzM16vv__mth_i_vr4vr4(vrs16_t x,vrs16_t y,vis16_t mask,float func (float,float))54 __ZGVzM16vv__mth_i_vr4vr4(vrs16_t x, vrs16_t y, vis16_t mask, float func(float, float))
55 {
56   int i;
57   vrs16_t r;
58   for (i = 0; i < 16; i++) {
59     if (mask[i])
60       r[i] = func(x[i], y[i]);
61   }
62   return r;
63 }
64 
65 vrd8_t
__ZGVzN8v__mth_i_vr8(vrd8_t x,double func (double))66 __ZGVzN8v__mth_i_vr8(vrd8_t x, double func(double))
67 {
68   int i;
69   vrd8_t r;
70   for (i = 0; i < 8; i++) {
71     r[i] = func(x[i]);
72   }
73   return r;
74 }
75 
76 vrd8_t
__ZGVzM8v__mth_i_vr8(vrd8_t x,vid8_t mask,double func (double))77 __ZGVzM8v__mth_i_vr8(vrd8_t x, vid8_t mask, double func(double))
78 {
79   int i;
80   vrd8_t r;
81   for (i = 0; i < 8; i++) {
82     if (mask[i])
83       r[i] = func(x[i]);
84   }
85   return r;
86 }
87 
88 vrd8_t
__ZGVzN8vv__mth_i_vr8vr8(vrd8_t x,vrd8_t y,double func (double,double))89 __ZGVzN8vv__mth_i_vr8vr8(vrd8_t x, vrd8_t y, double func(double, double))
90 {
91   int i;
92   vrd8_t r;
93   for (i = 0; i < 8; i++) {
94     r[i] = func(x[i], y[i]);
95   }
96   return r;
97 }
98 
99 vrd8_t
__ZGVzM8vv__mth_i_vr8vr8(vrd8_t x,vrd8_t y,vid8_t mask,double func (double,double))100 __ZGVzM8vv__mth_i_vr8vr8(vrd8_t x, vrd8_t y, vid8_t mask, double func(double, double))
101 {
102   int i;
103   vrd8_t r;
104   for (i = 0; i < 8; i++) {
105     if (mask[i])
106       r[i] = func(x[i], y[i]);
107   }
108   return r;
109 }
110 
111 vrs16_t
__ZGVzN16v__mth_i_vr4si4(vrs16_t x,int32_t iy,float func (float,int32_t))112 __ZGVzN16v__mth_i_vr4si4(vrs16_t x, int32_t iy, float func(float, int32_t))
113 {
114   int i;
115   vrs16_t r;
116   for (i = 0 ; i < 16 ; i++) {
117     r[i] = func(x[i], iy);
118   }
119   return r;
120 }
121 
122 vrs16_t
__ZGVzM16v__mth_i_vr4si4(vrs16_t x,int32_t iy,vis16_t mask,float func (float,int32_t))123 __ZGVzM16v__mth_i_vr4si4(vrs16_t x, int32_t iy, vis16_t mask, float func(float, int32_t))
124 {
125   int i;
126   vrs16_t r;
127   for (i = 0 ; i < 16 ; i++) {
128     if (mask[i])
129       r[i] = func(x[i], iy);
130   }
131   return r;
132 }
133 
134 vrs16_t
__ZGVzN16vv__mth_i_vr4vi4(vrs16_t x,vis16_t iy,float func (float,int32_t))135 __ZGVzN16vv__mth_i_vr4vi4(vrs16_t x, vis16_t iy, float func(float, int32_t))
136 {
137   int i;
138   vrs16_t r;
139   for (i = 0 ; i < 16 ; i++) {
140     r[i] = func(x[i], iy[i]);
141   }
142   return r;
143 }
144 
145 vrs16_t
__ZGVzM16vv__mth_i_vr4vi4(vrs16_t x,vis16_t iy,vis16_t mask,float func (float,int32_t))146 __ZGVzM16vv__mth_i_vr4vi4(vrs16_t x, vis16_t iy, vis16_t mask, float func(float, int32_t))
147 {
148   int i;
149   vrs16_t r;
150   for (i = 0 ; i < 16 ; i++) {
151     if (mask[i])
152       r[i] = func(x[i], iy[i]);
153   }
154   return r;
155 }
156 
157 vrs16_t
__ZGVzN16v__mth_i_vr4si8(vrs16_t x,long long iy,float func (float,long long))158 __ZGVzN16v__mth_i_vr4si8(vrs16_t x, long long iy, float func(float, long long))
159 {
160   int i;
161   vrs16_t r;
162   for (i = 0 ; i < 16 ; i++) {
163     r[i] = func(x[i], iy);
164   }
165   return r;
166 }
167 
168 vrs16_t
__ZGVzM16v__mth_i_vr4si8(vrs16_t x,long long iy,vis16_t mask,float func (float,long long))169 __ZGVzM16v__mth_i_vr4si8(vrs16_t x, long long iy, vis16_t mask, float func(float, long long))
170 {
171   int i;
172   vrs16_t r;
173   for (i = 0 ; i < 16 ; i++) {
174     if (mask[i])
175       r[i] = func(x[i], iy);
176   }
177   return r;
178 }
179 
180 vrs16_t
__ZGVzN16vv__mth_i_vr4vi8(vrs16_t x,vid8_t iyu,vid8_t iyl,float func (float,long long))181 __ZGVzN16vv__mth_i_vr4vi8(vrs16_t x, vid8_t iyu, vid8_t iyl, float func(float, long long))
182 {
183   int i;
184   vrs16_t r;
185   for (i = 0 ; i < 8 ; i++) {
186     r[i] = func(x[i], iyu[i]);
187   }
188   for (i = 8 ; i < 16 ; i++) {
189     r[i] = func(x[i], iyl[i-8]);
190   }
191   return r;
192 }
193 
194 vrs16_t
__ZGVzM16vv__mth_i_vr4vi8(vrs16_t x,vid8_t iyu,vid8_t iyl,vis16_t mask,float func (float,long long))195 __ZGVzM16vv__mth_i_vr4vi8(vrs16_t x, vid8_t iyu, vid8_t iyl, vis16_t mask, float func(float, long long))
196 {
197   int i;
198   vrs16_t r;
199   for (i = 0 ; i < 8 ; i++) {
200     if (mask[i])
201       r[i] = func(x[i], iyu[i]);
202   }
203   for (i = 8 ; i < 16 ; i++) {
204     if (mask[i])
205       r[i] = func(x[i], iyl[i-8]);
206   }
207   return r;
208 }
209 
210 vrd8_t
__ZGVzN8v__mth_i_vr8si4(vrd8_t x,int32_t iy,double func (double,int32_t))211 __ZGVzN8v__mth_i_vr8si4(vrd8_t x, int32_t iy, double func(double, int32_t))
212 {
213   int i;
214   vrd8_t r;
215   for (i = 0 ; i < 8 ; i++) {
216     r[i] = func(x[i], iy);
217   }
218   return r;
219 }
220 
221 vrd8_t
__ZGVzM8v__mth_i_vr8si4(vrd8_t x,int32_t iy,vid8_t mask,double func (double,int32_t))222 __ZGVzM8v__mth_i_vr8si4(vrd8_t x, int32_t iy, vid8_t mask, double func(double, int32_t))
223 {
224   int i;
225   vrd8_t r;
226   for (i = 0 ; i < 8 ; i++) {
227     if (mask[i])
228       r[i] = func(x[i], iy);
229   }
230   return r;
231 }
232 
233 vrd8_t
__ZGVzN8vv__mth_i_vr8vi4(vrd8_t x,vis8_t iy,double func (double,int32_t))234 __ZGVzN8vv__mth_i_vr8vi4(vrd8_t x, vis8_t iy, double func(double, int32_t))
235 {
236   int i;
237   vrd8_t r;
238   for (i = 0 ; i < 8 ; i++) {
239     r[i] = func(x[i], iy[i]);
240   }
241   return r;
242 }
243 
244 vrd8_t
__ZGVzM8vv__mth_i_vr8vi4(vrd8_t x,vis8_t iy,vid8_t mask,double func (double,int32_t))245 __ZGVzM8vv__mth_i_vr8vi4(vrd8_t x, vis8_t iy, vid8_t mask, double func(double, int32_t))
246 {
247   int i;
248   vrd8_t r;
249   for (i = 0 ; i < 8 ; i++) {
250     if (mask[i])
251       r[i] = func(x[i], iy[i]);
252   }
253   return r;
254 }
255 
256 vrd8_t
__ZGVzN8v__mth_i_vr8si8(vrd8_t x,long long iy,double func (double,long long))257 __ZGVzN8v__mth_i_vr8si8(vrd8_t x, long long iy, double func(double, long long))
258 {
259   int i;
260   vrd8_t r;
261   for (i = 0 ; i < 8 ; i++) {
262     r[i] = func(x[i], iy);
263   }
264   return r;
265 }
266 
267 vrd8_t
__ZGVzM8v__mth_i_vr8si8(vrd8_t x,long long iy,vid8_t mask,double func (double,long long))268 __ZGVzM8v__mth_i_vr8si8(vrd8_t x, long long iy, vid8_t mask, double func(double, long long))
269 {
270   int i;
271   vrd8_t r;
272   for (i = 0 ; i < 8 ; i++) {
273     if (mask[i])
274       r[i] = func(x[i], iy);
275   }
276   return r;
277 }
278 
279 vrd8_t
__ZGVzN8vv__mth_i_vr8vi8(vrd8_t x,vid8_t iy,double func (double,long long))280 __ZGVzN8vv__mth_i_vr8vi8(vrd8_t x, vid8_t iy, double func(double, long long))
281 {
282   int i;
283   vrd8_t r;
284   for (i = 0 ; i < 8 ; i++) {
285     r[i] = func(x[i], iy[i]);
286   }
287   return r;
288 }
289 
290 vrd8_t
__ZGVzM8vv__mth_i_vr8vi8(vrd8_t x,vid8_t iy,vid8_t mask,double func (double,long long))291 __ZGVzM8vv__mth_i_vr8vi8(vrd8_t x, vid8_t iy, vid8_t mask, double func(double, long long))
292 {
293   int i;
294   vrd8_t r;
295   for (i = 0 ; i < 8 ; i++) {
296     if (mask[i])
297       r[i] = func(x[i], iy[i]);
298   }
299   return r;
300 }
301 
302 vcs8_t
__ZGVzN8v__mth_i_vc4(vcs8_t x,float _Complex func (float _Complex))303 __ZGVzN8v__mth_i_vc4(vcs8_t x, float _Complex func(float _Complex))
304 {
305   int i;
306   float _Complex tx[8];
307   *(vcs8_t *)&tx = x;
308   for (i = 0 ; i < 8 ; i++) {
309     tx[i] = func(tx[i]);
310   }
311   return *(vcs8_t *)&tx;
312 }
313 
314 vcs8_t
__ZGVzN8vv__mth_i_vc4vc4(vcs8_t x,vcs8_t y,float _Complex func (float _Complex,float _Complex))315 __ZGVzN8vv__mth_i_vc4vc4(vcs8_t x, vcs8_t y, float _Complex func(float _Complex, float _Complex))
316 {
317   int i;
318   float _Complex tx[8];
319   float _Complex ty[8];
320   *(vcs8_t *)&tx = x;
321   *(vcs8_t *)&ty = y;
322   for (i = 0 ; i < 8 ; i++) {
323     tx[i] = func(tx[i], ty[i]);
324   }
325   return *(vcs8_t *)&tx;
326 }
327 
328 vcd4_t
__ZGVzN4v__mth_i_vc8(vcd4_t x,double _Complex func (double _Complex))329 __ZGVzN4v__mth_i_vc8(vcd4_t x, double _Complex func(double _Complex))
330 {
331   int i;
332   double _Complex tx[4];
333   *(vcd4_t *)&tx = x;
334   for (i = 0 ; i < 4 ; i++) {
335     tx[i] = func(tx[i]);
336   }
337   return *(vcd4_t *)&tx;
338 }
339 
340 vcd4_t
__ZGVzN4vv__mth_i_vc8vc8(vcd4_t x,vcd4_t y,double _Complex func (double _Complex,double _Complex))341 __ZGVzN4vv__mth_i_vc8vc8(vcd4_t x, vcd4_t y, double _Complex func(double _Complex, double _Complex))
342 {
343   int i;
344   double _Complex tx[4];
345   double _Complex ty[4];
346   *(vcd4_t *)&tx = x;
347   *(vcd4_t *)&ty = y;
348   for (i = 0 ; i < 4 ; i++) {
349     tx[i] = func(tx[i], ty[i]);
350   }
351   return *(vcd4_t *)&tx;
352 }
353