1 /*
2  * Copyright (c) 2016-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 #include "mth_intrinsics.h"
18 
19 vrs4_t
__ZGVxN4v__mth_i_vr4(vrs4_t x,float func (float))20 __ZGVxN4v__mth_i_vr4(vrs4_t x, float func(float))
21 {
22   int i;
23   vrs4_t r;
24   for (i = 0; i < 4; i++) {
25     r[i] = func(x[i]);
26   }
27   return r;
28 }
29 
30 vrs4_t
__ZGVxM4v__mth_i_vr4(vrs4_t x,vis4_t mask,float func (float))31 __ZGVxM4v__mth_i_vr4(vrs4_t x, vis4_t mask, float func(float))
32 {
33   int i;
34   vrs4_t r;
35   for (i = 0; i < 4; i++) {
36     if (mask[i])
37       r[i] = func(x[i]);
38   }
39   return r;
40 }
41 
42 vrs4_t
__ZGVxN4vv__mth_i_vr4vr4(vrs4_t x,vrs4_t y,float func (float,float))43 __ZGVxN4vv__mth_i_vr4vr4(vrs4_t x, vrs4_t y, float func(float, float))
44 {
45   int i;
46   vrs4_t r;
47   for (i = 0; i < 4; i++) {
48     r[i] = func(x[i], y[i]);
49   }
50   return r;
51 }
52 
53 vrs4_t
__ZGVxM4vv__mth_i_vr4vr4(vrs4_t x,vrs4_t y,vis4_t mask,float func (float,float))54 __ZGVxM4vv__mth_i_vr4vr4(vrs4_t x, vrs4_t y, vis4_t mask, float func(float, float))
55 {
56   int i;
57   vrs4_t r;
58   for (i = 0; i < 4; i++) {
59     if (mask[i])
60       r[i] = func(x[i], y[i]);
61   }
62   return r;
63 }
64 
65 vrd2_t
__ZGVxN2v__mth_i_vr8(vrd2_t x,double func (double))66 __ZGVxN2v__mth_i_vr8(vrd2_t x, double func(double))
67 {
68   int i;
69   vrd2_t r;
70   for (i = 0; i < 2; i++) {
71     r[i] = func(x[i]);
72   }
73   return r;
74 }
75 
76 vrd2_t
__ZGVxM2v__mth_i_vr8(vrd2_t x,vid2_t mask,double func (double))77 __ZGVxM2v__mth_i_vr8(vrd2_t x, vid2_t mask, double func(double))
78 {
79   int i;
80   vrd2_t r;
81   for (i = 0; i < 2; i++) {
82     if (mask[i])
83       r[i] = func(x[i]);
84   }
85   return r;
86 }
87 
88 vrd2_t
__ZGVxN2vv__mth_i_vr8vr8(vrd2_t x,vrd2_t y,double func (double,double))89 __ZGVxN2vv__mth_i_vr8vr8(vrd2_t x, vrd2_t y, double func(double, double))
90 {
91   int i;
92   vrd2_t r;
93   for (i = 0; i < 2; i++) {
94     r[i] = func(x[i], y[i]);
95   }
96   return r;
97 }
98 
99 vrd2_t
__ZGVxM2vv__mth_i_vr8vr8(vrd2_t x,vrd2_t y,vid2_t mask,double func (double,double))100 __ZGVxM2vv__mth_i_vr8vr8(vrd2_t x, vrd2_t y, vid2_t mask, double func(double, double))
101 {
102   int i;
103   vrd2_t r;
104   for (i = 0; i < 2; i++) {
105     if (mask[i])
106       r[i] = func(x[i], y[i]);
107   }
108   return r;
109 }
110 
111 vrs4_t
__ZGVxN4v__mth_i_vr4si4(vrs4_t x,int32_t iy,float func (float,int32_t))112 __ZGVxN4v__mth_i_vr4si4(vrs4_t x, int32_t iy, float func(float, int32_t))
113 {
114   int i;
115   vrs4_t r;
116   for (i = 0 ; i < 4 ; i++) {
117     r[i] = func(x[i], iy);
118   }
119   return r;
120 }
121 
122 vrs4_t
__ZGVxM4v__mth_i_vr4si4(vrs4_t x,int32_t iy,vis4_t mask,float func (float,int32_t))123 __ZGVxM4v__mth_i_vr4si4(vrs4_t x, int32_t iy, vis4_t mask, float func(float, int32_t))
124 {
125   int i;
126   vrs4_t r;
127   for (i = 0 ; i < 4 ; i++) {
128     if (mask[i])
129       r[i] = func(x[i], iy);
130   }
131   return r;
132 }
133 
134 vrs4_t
__ZGVxN4vv__mth_i_vr4vi4(vrs4_t x,vis4_t iy,float func (float,int32_t))135 __ZGVxN4vv__mth_i_vr4vi4(vrs4_t x, vis4_t iy, float func(float, int32_t))
136 {
137   int i;
138   vrs4_t r;
139   for (i = 0 ; i < 4 ; i++) {
140     r[i] = func(x[i], iy[i]);
141   }
142   return r;
143 }
144 
145 vrs4_t
__ZGVxM4vv__mth_i_vr4vi4(vrs4_t x,vis4_t iy,vis4_t mask,float func (float,int32_t))146 __ZGVxM4vv__mth_i_vr4vi4(vrs4_t x, vis4_t iy, vis4_t mask, float func(float, int32_t))
147 {
148   int i;
149   vrs4_t r;
150   for (i = 0 ; i < 4 ; i++) {
151     if (mask[i])
152       r[i] = func(x[i], iy[i]);
153   }
154   return r;
155 }
156 
157 vrs4_t
__ZGVxN4v__mth_i_vr4si8(vrs4_t x,long long iy,float func (float,long long))158 __ZGVxN4v__mth_i_vr4si8(vrs4_t x, long long iy, float func(float, long long))
159 {
160   int i;
161   vrs4_t r;
162   for (i = 0 ; i < 4 ; i++) {
163     r[i] = func(x[i], iy);
164   }
165   return r;
166 }
167 
168 vrs4_t
__ZGVxM4v__mth_i_vr4si8(vrs4_t x,long long iy,vis4_t mask,float func (float,long long))169 __ZGVxM4v__mth_i_vr4si8(vrs4_t x, long long iy, vis4_t mask, float func(float, long long))
170 {
171   int i;
172   vrs4_t r;
173   for (i = 0 ; i < 4 ; i++) {
174     if (mask[i])
175       r[i] = func(x[i], iy);
176   }
177   return r;
178 }
179 
180 vrs4_t
__ZGVxN4vv__mth_i_vr4vi8(vrs4_t x,vid2_t iyu,vid2_t iyl,float func (float,long long))181 __ZGVxN4vv__mth_i_vr4vi8(vrs4_t x, vid2_t iyu, vid2_t iyl, float func(float, long long))
182 {
183   int i;
184   vrs4_t r;
185   for (i = 0 ; i < 2 ; i++) {
186     r[i] = func(x[i], iyu[i]);
187   }
188   for (i = 2 ; i < 4 ; i++) {
189     r[i] = func(x[i], iyl[i-2]);
190   }
191   return r;
192 }
193 
194 vrs4_t
__ZGVxM4vv__mth_i_vr4vi8(vrs4_t x,vid2_t iyu,vid2_t iyl,vis4_t mask,float func (float,long long))195 __ZGVxM4vv__mth_i_vr4vi8(vrs4_t x, vid2_t iyu, vid2_t iyl, vis4_t mask, float func(float, long long))
196 {
197   int i;
198   vrs4_t r;
199   for (i = 0 ; i < 2 ; i++) {
200     if (mask[i])
201       r[i] = func(x[i], iyu[i]);
202   }
203   for (i = 2 ; i < 4 ; i++) {
204     if (mask[i])
205       r[i] = func(x[i], iyl[i-2]);
206   }
207   return r;
208 }
209 
210 
211 //---------------
212 
213 
214 vrd2_t
__ZGVxN2v__mth_i_vr8si4(vrd2_t x,int32_t iy,double func (double,int32_t))215 __ZGVxN2v__mth_i_vr8si4(vrd2_t x, int32_t iy, double func(double, int32_t))
216 {
217   int i;
218   vrd2_t r;
219   for (i = 0 ; i < 2 ; i++) {
220     r[i] = func(x[i], iy);
221   }
222   return r;
223 }
224 
225 vrd2_t
__ZGVxM2v__mth_i_vr8si4(vrd2_t x,int32_t iy,vid2_t mask,double func (double,int32_t))226 __ZGVxM2v__mth_i_vr8si4(vrd2_t x, int32_t iy, vid2_t mask, double func(double, int32_t))
227 {
228   int i;
229   vrd2_t r;
230   for (i = 0 ; i < 2 ; i++) {
231     if (mask[i])
232       r[i] = func(x[i], iy);
233   }
234   return r;
235 }
236 
237 /*
238  * __ZGVxN2vv__mth_i_vr8vi4 and __ZGVxM2vv__mth_i_vr8vi4 should
239  * be defined as:
240  * __ZGVxN2vv__mth_i_vr8vi4(vrd2_t x, vis2_t iy, double func(double, int32_t))
241  * __ZGVxM2vv__mth_i_vr8vi4(vrd2_t x, vis2_t iy, vid2_t mask, double func(double, int32_t))
242  *
243  * But the POWER architectures needs the 32-bit integer vectors to
244  * be the full 128-bits of a vector register.
245  */
246 
247 vrd2_t
__ZGVxN2vv__mth_i_vr8vi4(vrd2_t x,vis4_t iy,double func (double,int32_t))248 __ZGVxN2vv__mth_i_vr8vi4(vrd2_t x, vis4_t iy, double func(double, int32_t))
249 {
250   int i;
251   vrd2_t r;
252   for (i = 0 ; i < 2 ; i++) {
253     r[i] = func(x[i], iy[i]);
254   }
255   return r;
256 }
257 
258 vrd2_t
__ZGVxM2vv__mth_i_vr8vi4(vrd2_t x,vis4_t iy,vid2_t mask,double func (double,int32_t))259 __ZGVxM2vv__mth_i_vr8vi4(vrd2_t x, vis4_t iy, vid2_t mask, double func(double, int32_t))
260 {
261   int i;
262   vrd2_t r;
263   for (i = 0 ; i < 2 ; i++) {
264     if (mask[i])
265       r[i] = func(x[i], iy[i]);
266   }
267   return r;
268 }
269 
270 vrd2_t
__ZGVxN2v__mth_i_vr8si8(vrd2_t x,long long iy,double func (double,long long))271 __ZGVxN2v__mth_i_vr8si8(vrd2_t x, long long iy, double func(double, long long))
272 {
273   int i;
274   vrd2_t r;
275   for (i = 0 ; i < 2 ; i++) {
276     r[i] = func(x[i], iy);
277   }
278   return r;
279 }
280 
281 vrd2_t
__ZGVxM2v__mth_i_vr8si8(vrd2_t x,long long iy,vid2_t mask,double func (double,long long))282 __ZGVxM2v__mth_i_vr8si8(vrd2_t x, long long iy, vid2_t mask, double func(double, long long))
283 {
284   int i;
285   vrd2_t r;
286   for (i = 0 ; i < 2 ; i++) {
287     if (mask[i])
288       r[i] = func(x[i], iy);
289   }
290   return r;
291 }
292 
293 vrd2_t
__ZGVxN2vv__mth_i_vr8vi8(vrd2_t x,vid2_t iy,double func (double,long long))294 __ZGVxN2vv__mth_i_vr8vi8(vrd2_t x, vid2_t iy, double func(double, long long))
295 {
296   int i;
297   vrd2_t r;
298   for (i = 0 ; i < 2 ; i++) {
299     r[i] = func(x[i], iy[i]);
300   }
301   return r;
302 }
303 
304 vrd2_t
__ZGVxM2vv__mth_i_vr8vi8(vrd2_t x,vid2_t iy,vid2_t mask,double func (double,long long))305 __ZGVxM2vv__mth_i_vr8vi8(vrd2_t x, vid2_t iy, vid2_t mask, double func(double, long long))
306 {
307   int i;
308   vrd2_t r;
309   for (i = 0 ; i < 2 ; i++) {
310     if (mask[i])
311       r[i] = func(x[i], iy[i]);
312   }
313   return r;
314 }
315 
316 
317 vcs1_t
__ZGVxN1v__mth_i_vc4(vcs1_t x,float _Complex func (float _Complex))318 __ZGVxN1v__mth_i_vc4(vcs1_t x, float _Complex func(float _Complex))
319 {
320   int i;
321   float _Complex tx;
322   *(vcs1_t *)&tx = x;
323   tx = func(tx);
324   return *(vcs1_t *)&tx;
325 }
326 
327 vcs1_t
__ZGVxN1vv__mth_i_vc4vc4(vcs1_t x,vcs1_t y,float _Complex func (float _Complex,float _Complex))328 __ZGVxN1vv__mth_i_vc4vc4(vcs1_t x, vcs1_t y, float _Complex func(float _Complex, float _Complex))
329 {
330   int i;
331   float _Complex tx;
332   float _Complex ty;
333   *(vcs1_t *)&tx = x;
334   *(vcs1_t *)&ty = y;
335   tx = func(tx, ty);
336   return *(vcs1_t *)&tx;
337 }
338 
339 vcs2_t
__ZGVxN2v__mth_i_vc4(vcs2_t x,float _Complex func (float _Complex))340 __ZGVxN2v__mth_i_vc4(vcs2_t x, float _Complex func(float _Complex))
341 {
342   int i;
343   float _Complex tx[2];
344   *(vcs2_t *)&tx = x;
345   for (i = 0 ; i < 2 ; i++) {
346     tx[i] = func(tx[i]);
347   }
348   return *(vcs2_t *)&tx;
349 }
350 
351 vcs2_t
__ZGVxN2vv__mth_i_vc4vc4(vcs2_t x,vcs2_t y,float _Complex func (float _Complex,float _Complex))352 __ZGVxN2vv__mth_i_vc4vc4(vcs2_t x, vcs2_t y, float _Complex func(float _Complex, float _Complex))
353 {
354   int i;
355   float _Complex tx[2];
356   float _Complex ty[2];
357   *(vcs2_t *)&tx = x;
358   *(vcs2_t *)&ty = y;
359   for (i = 0 ; i < 2 ; i++) {
360     tx[i] = func(tx[i], ty[i]);
361   }
362   return *(vcs2_t *)&tx;
363 }
364 
365 vcd1_t
__ZGVxN1v__mth_i_vc8(vcd1_t x,double _Complex func (double _Complex))366 __ZGVxN1v__mth_i_vc8(vcd1_t x, double _Complex func(double _Complex))
367 {
368   int i;
369   double _Complex tx;
370   *(vcd1_t *)&tx = x;
371   tx = func(tx);
372   return *(vcd1_t *)&tx;
373 }
374 
375 vcd1_t
__ZGVxN1vv__mth_i_vc8vc8(vcd1_t x,vcd1_t y,double _Complex func (double _Complex,double _Complex))376 __ZGVxN1vv__mth_i_vc8vc8(vcd1_t x, vcd1_t y, double _Complex func(double _Complex, double _Complex))
377 {
378   int i;
379   double _Complex tx;
380   double _Complex ty;
381   *(vcd1_t *)&tx = x;
382   *(vcd1_t *)&ty = y;
383   tx = func(tx, ty);
384   return *(vcd1_t *)&tx;
385 }
386 
387 vcs1_t
__ZGVxN1v__mth_i_vc4si4(vcs1_t x,int32_t iy,float _Complex func (float _Complex,int32_t))388 __ZGVxN1v__mth_i_vc4si4(vcs1_t x, int32_t iy, float _Complex func(float _Complex, int32_t))
389 {
390   int i;
391   float _Complex tx;
392   *(vcs1_t *)&tx = x;
393   tx = func(tx, iy);
394   return *(vcs1_t *)&tx;
395 }
396 
397 vcs1_t
__ZGVxN1v__mth_i_vc4si8(vcs1_t x,long long iy,float _Complex func (float _Complex,long long))398 __ZGVxN1v__mth_i_vc4si8(vcs1_t x, long long iy, float _Complex func(float _Complex, long long))
399 {
400   int i;
401   float _Complex tx;
402   *(vcs1_t *)&tx = x;
403   tx = func(tx, iy);
404   return *(vcs1_t *)&tx;
405 }
406 
407 vcd1_t
__ZGVxN1v__mth_i_vc8si4(vcd1_t x,int32_t iy,double _Complex func (double _Complex,int32_t))408 __ZGVxN1v__mth_i_vc8si4(vcd1_t x, int32_t iy, double _Complex func(double _Complex, int32_t))
409 {
410   int i;
411   double _Complex tx;
412   *(vcd1_t *)&tx = x;
413   tx = func(tx, iy);
414   return *(vcd1_t *)&tx;
415 }
416 
417 vcd1_t
__ZGVxN1v__mth_i_vc8si8(vcd1_t x,long long iy,double _Complex func (double _Complex,long long))418 __ZGVxN1v__mth_i_vc8si8(vcd1_t x, long long iy, double _Complex func(double _Complex, long long))
419 {
420   int i;
421   double _Complex tx;
422   *(vcd1_t *)&tx = x;
423   tx = func(tx, iy);
424   return *(vcd1_t *)&tx;
425 }
426