1 /*
2 * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17 #include "mth_intrinsics.h"
18
19 vrs4_t
__ZGVxN4v__mth_i_vr4(vrs4_t x,float func (float))20 __ZGVxN4v__mth_i_vr4(vrs4_t x, float func(float))
21 {
22 int i;
23 vrs4_t r;
24 for (i = 0; i < 4; i++) {
25 r[i] = func(x[i]);
26 }
27 return r;
28 }
29
30 vrs4_t
__ZGVxM4v__mth_i_vr4(vrs4_t x,vis4_t mask,float func (float))31 __ZGVxM4v__mth_i_vr4(vrs4_t x, vis4_t mask, float func(float))
32 {
33 int i;
34 vrs4_t r;
35 for (i = 0; i < 4; i++) {
36 if (mask[i])
37 r[i] = func(x[i]);
38 }
39 return r;
40 }
41
42 vrs4_t
__ZGVxN4vv__mth_i_vr4vr4(vrs4_t x,vrs4_t y,float func (float,float))43 __ZGVxN4vv__mth_i_vr4vr4(vrs4_t x, vrs4_t y, float func(float, float))
44 {
45 int i;
46 vrs4_t r;
47 for (i = 0; i < 4; i++) {
48 r[i] = func(x[i], y[i]);
49 }
50 return r;
51 }
52
53 vrs4_t
__ZGVxM4vv__mth_i_vr4vr4(vrs4_t x,vrs4_t y,vis4_t mask,float func (float,float))54 __ZGVxM4vv__mth_i_vr4vr4(vrs4_t x, vrs4_t y, vis4_t mask, float func(float, float))
55 {
56 int i;
57 vrs4_t r;
58 for (i = 0; i < 4; i++) {
59 if (mask[i])
60 r[i] = func(x[i], y[i]);
61 }
62 return r;
63 }
64
65 vrd2_t
__ZGVxN2v__mth_i_vr8(vrd2_t x,double func (double))66 __ZGVxN2v__mth_i_vr8(vrd2_t x, double func(double))
67 {
68 int i;
69 vrd2_t r;
70 for (i = 0; i < 2; i++) {
71 r[i] = func(x[i]);
72 }
73 return r;
74 }
75
76 vrd2_t
__ZGVxM2v__mth_i_vr8(vrd2_t x,vid2_t mask,double func (double))77 __ZGVxM2v__mth_i_vr8(vrd2_t x, vid2_t mask, double func(double))
78 {
79 int i;
80 vrd2_t r;
81 for (i = 0; i < 2; i++) {
82 if (mask[i])
83 r[i] = func(x[i]);
84 }
85 return r;
86 }
87
88 vrd2_t
__ZGVxN2vv__mth_i_vr8vr8(vrd2_t x,vrd2_t y,double func (double,double))89 __ZGVxN2vv__mth_i_vr8vr8(vrd2_t x, vrd2_t y, double func(double, double))
90 {
91 int i;
92 vrd2_t r;
93 for (i = 0; i < 2; i++) {
94 r[i] = func(x[i], y[i]);
95 }
96 return r;
97 }
98
99 vrd2_t
__ZGVxM2vv__mth_i_vr8vr8(vrd2_t x,vrd2_t y,vid2_t mask,double func (double,double))100 __ZGVxM2vv__mth_i_vr8vr8(vrd2_t x, vrd2_t y, vid2_t mask, double func(double, double))
101 {
102 int i;
103 vrd2_t r;
104 for (i = 0; i < 2; i++) {
105 if (mask[i])
106 r[i] = func(x[i], y[i]);
107 }
108 return r;
109 }
110
111 vrs4_t
__ZGVxN4v__mth_i_vr4si4(vrs4_t x,int32_t iy,float func (float,int32_t))112 __ZGVxN4v__mth_i_vr4si4(vrs4_t x, int32_t iy, float func(float, int32_t))
113 {
114 int i;
115 vrs4_t r;
116 for (i = 0 ; i < 4 ; i++) {
117 r[i] = func(x[i], iy);
118 }
119 return r;
120 }
121
122 vrs4_t
__ZGVxM4v__mth_i_vr4si4(vrs4_t x,int32_t iy,vis4_t mask,float func (float,int32_t))123 __ZGVxM4v__mth_i_vr4si4(vrs4_t x, int32_t iy, vis4_t mask, float func(float, int32_t))
124 {
125 int i;
126 vrs4_t r;
127 for (i = 0 ; i < 4 ; i++) {
128 if (mask[i])
129 r[i] = func(x[i], iy);
130 }
131 return r;
132 }
133
134 vrs4_t
__ZGVxN4vv__mth_i_vr4vi4(vrs4_t x,vis4_t iy,float func (float,int32_t))135 __ZGVxN4vv__mth_i_vr4vi4(vrs4_t x, vis4_t iy, float func(float, int32_t))
136 {
137 int i;
138 vrs4_t r;
139 for (i = 0 ; i < 4 ; i++) {
140 r[i] = func(x[i], iy[i]);
141 }
142 return r;
143 }
144
145 vrs4_t
__ZGVxM4vv__mth_i_vr4vi4(vrs4_t x,vis4_t iy,vis4_t mask,float func (float,int32_t))146 __ZGVxM4vv__mth_i_vr4vi4(vrs4_t x, vis4_t iy, vis4_t mask, float func(float, int32_t))
147 {
148 int i;
149 vrs4_t r;
150 for (i = 0 ; i < 4 ; i++) {
151 if (mask[i])
152 r[i] = func(x[i], iy[i]);
153 }
154 return r;
155 }
156
157 vrs4_t
__ZGVxN4v__mth_i_vr4si8(vrs4_t x,long long iy,float func (float,long long))158 __ZGVxN4v__mth_i_vr4si8(vrs4_t x, long long iy, float func(float, long long))
159 {
160 int i;
161 vrs4_t r;
162 for (i = 0 ; i < 4 ; i++) {
163 r[i] = func(x[i], iy);
164 }
165 return r;
166 }
167
168 vrs4_t
__ZGVxM4v__mth_i_vr4si8(vrs4_t x,long long iy,vis4_t mask,float func (float,long long))169 __ZGVxM4v__mth_i_vr4si8(vrs4_t x, long long iy, vis4_t mask, float func(float, long long))
170 {
171 int i;
172 vrs4_t r;
173 for (i = 0 ; i < 4 ; i++) {
174 if (mask[i])
175 r[i] = func(x[i], iy);
176 }
177 return r;
178 }
179
180 vrs4_t
__ZGVxN4vv__mth_i_vr4vi8(vrs4_t x,vid2_t iyu,vid2_t iyl,float func (float,long long))181 __ZGVxN4vv__mth_i_vr4vi8(vrs4_t x, vid2_t iyu, vid2_t iyl, float func(float, long long))
182 {
183 int i;
184 vrs4_t r;
185 for (i = 0 ; i < 2 ; i++) {
186 r[i] = func(x[i], iyu[i]);
187 }
188 for (i = 2 ; i < 4 ; i++) {
189 r[i] = func(x[i], iyl[i-2]);
190 }
191 return r;
192 }
193
194 vrs4_t
__ZGVxM4vv__mth_i_vr4vi8(vrs4_t x,vid2_t iyu,vid2_t iyl,vis4_t mask,float func (float,long long))195 __ZGVxM4vv__mth_i_vr4vi8(vrs4_t x, vid2_t iyu, vid2_t iyl, vis4_t mask, float func(float, long long))
196 {
197 int i;
198 vrs4_t r;
199 for (i = 0 ; i < 2 ; i++) {
200 if (mask[i])
201 r[i] = func(x[i], iyu[i]);
202 }
203 for (i = 2 ; i < 4 ; i++) {
204 if (mask[i])
205 r[i] = func(x[i], iyl[i-2]);
206 }
207 return r;
208 }
209
210
211 //---------------
212
213
214 vrd2_t
__ZGVxN2v__mth_i_vr8si4(vrd2_t x,int32_t iy,double func (double,int32_t))215 __ZGVxN2v__mth_i_vr8si4(vrd2_t x, int32_t iy, double func(double, int32_t))
216 {
217 int i;
218 vrd2_t r;
219 for (i = 0 ; i < 2 ; i++) {
220 r[i] = func(x[i], iy);
221 }
222 return r;
223 }
224
225 vrd2_t
__ZGVxM2v__mth_i_vr8si4(vrd2_t x,int32_t iy,vid2_t mask,double func (double,int32_t))226 __ZGVxM2v__mth_i_vr8si4(vrd2_t x, int32_t iy, vid2_t mask, double func(double, int32_t))
227 {
228 int i;
229 vrd2_t r;
230 for (i = 0 ; i < 2 ; i++) {
231 if (mask[i])
232 r[i] = func(x[i], iy);
233 }
234 return r;
235 }
236
237 /*
238 * __ZGVxN2vv__mth_i_vr8vi4 and __ZGVxM2vv__mth_i_vr8vi4 should
239 * be defined as:
240 * __ZGVxN2vv__mth_i_vr8vi4(vrd2_t x, vis2_t iy, double func(double, int32_t))
241 * __ZGVxM2vv__mth_i_vr8vi4(vrd2_t x, vis2_t iy, vid2_t mask, double func(double, int32_t))
242 *
243 * But the POWER architectures needs the 32-bit integer vectors to
244 * be the full 128-bits of a vector register.
245 */
246
247 vrd2_t
__ZGVxN2vv__mth_i_vr8vi4(vrd2_t x,vis4_t iy,double func (double,int32_t))248 __ZGVxN2vv__mth_i_vr8vi4(vrd2_t x, vis4_t iy, double func(double, int32_t))
249 {
250 int i;
251 vrd2_t r;
252 for (i = 0 ; i < 2 ; i++) {
253 r[i] = func(x[i], iy[i]);
254 }
255 return r;
256 }
257
258 vrd2_t
__ZGVxM2vv__mth_i_vr8vi4(vrd2_t x,vis4_t iy,vid2_t mask,double func (double,int32_t))259 __ZGVxM2vv__mth_i_vr8vi4(vrd2_t x, vis4_t iy, vid2_t mask, double func(double, int32_t))
260 {
261 int i;
262 vrd2_t r;
263 for (i = 0 ; i < 2 ; i++) {
264 if (mask[i])
265 r[i] = func(x[i], iy[i]);
266 }
267 return r;
268 }
269
270 vrd2_t
__ZGVxN2v__mth_i_vr8si8(vrd2_t x,long long iy,double func (double,long long))271 __ZGVxN2v__mth_i_vr8si8(vrd2_t x, long long iy, double func(double, long long))
272 {
273 int i;
274 vrd2_t r;
275 for (i = 0 ; i < 2 ; i++) {
276 r[i] = func(x[i], iy);
277 }
278 return r;
279 }
280
281 vrd2_t
__ZGVxM2v__mth_i_vr8si8(vrd2_t x,long long iy,vid2_t mask,double func (double,long long))282 __ZGVxM2v__mth_i_vr8si8(vrd2_t x, long long iy, vid2_t mask, double func(double, long long))
283 {
284 int i;
285 vrd2_t r;
286 for (i = 0 ; i < 2 ; i++) {
287 if (mask[i])
288 r[i] = func(x[i], iy);
289 }
290 return r;
291 }
292
293 vrd2_t
__ZGVxN2vv__mth_i_vr8vi8(vrd2_t x,vid2_t iy,double func (double,long long))294 __ZGVxN2vv__mth_i_vr8vi8(vrd2_t x, vid2_t iy, double func(double, long long))
295 {
296 int i;
297 vrd2_t r;
298 for (i = 0 ; i < 2 ; i++) {
299 r[i] = func(x[i], iy[i]);
300 }
301 return r;
302 }
303
304 vrd2_t
__ZGVxM2vv__mth_i_vr8vi8(vrd2_t x,vid2_t iy,vid2_t mask,double func (double,long long))305 __ZGVxM2vv__mth_i_vr8vi8(vrd2_t x, vid2_t iy, vid2_t mask, double func(double, long long))
306 {
307 int i;
308 vrd2_t r;
309 for (i = 0 ; i < 2 ; i++) {
310 if (mask[i])
311 r[i] = func(x[i], iy[i]);
312 }
313 return r;
314 }
315
316
317 vcs1_t
__ZGVxN1v__mth_i_vc4(vcs1_t x,float _Complex func (float _Complex))318 __ZGVxN1v__mth_i_vc4(vcs1_t x, float _Complex func(float _Complex))
319 {
320 int i;
321 float _Complex tx;
322 *(vcs1_t *)&tx = x;
323 tx = func(tx);
324 return *(vcs1_t *)&tx;
325 }
326
327 vcs1_t
__ZGVxN1vv__mth_i_vc4vc4(vcs1_t x,vcs1_t y,float _Complex func (float _Complex,float _Complex))328 __ZGVxN1vv__mth_i_vc4vc4(vcs1_t x, vcs1_t y, float _Complex func(float _Complex, float _Complex))
329 {
330 int i;
331 float _Complex tx;
332 float _Complex ty;
333 *(vcs1_t *)&tx = x;
334 *(vcs1_t *)&ty = y;
335 tx = func(tx, ty);
336 return *(vcs1_t *)&tx;
337 }
338
339 vcs2_t
__ZGVxN2v__mth_i_vc4(vcs2_t x,float _Complex func (float _Complex))340 __ZGVxN2v__mth_i_vc4(vcs2_t x, float _Complex func(float _Complex))
341 {
342 int i;
343 float _Complex tx[2];
344 *(vcs2_t *)&tx = x;
345 for (i = 0 ; i < 2 ; i++) {
346 tx[i] = func(tx[i]);
347 }
348 return *(vcs2_t *)&tx;
349 }
350
351 vcs2_t
__ZGVxN2vv__mth_i_vc4vc4(vcs2_t x,vcs2_t y,float _Complex func (float _Complex,float _Complex))352 __ZGVxN2vv__mth_i_vc4vc4(vcs2_t x, vcs2_t y, float _Complex func(float _Complex, float _Complex))
353 {
354 int i;
355 float _Complex tx[2];
356 float _Complex ty[2];
357 *(vcs2_t *)&tx = x;
358 *(vcs2_t *)&ty = y;
359 for (i = 0 ; i < 2 ; i++) {
360 tx[i] = func(tx[i], ty[i]);
361 }
362 return *(vcs2_t *)&tx;
363 }
364
365 vcd1_t
__ZGVxN1v__mth_i_vc8(vcd1_t x,double _Complex func (double _Complex))366 __ZGVxN1v__mth_i_vc8(vcd1_t x, double _Complex func(double _Complex))
367 {
368 int i;
369 double _Complex tx;
370 *(vcd1_t *)&tx = x;
371 tx = func(tx);
372 return *(vcd1_t *)&tx;
373 }
374
375 vcd1_t
__ZGVxN1vv__mth_i_vc8vc8(vcd1_t x,vcd1_t y,double _Complex func (double _Complex,double _Complex))376 __ZGVxN1vv__mth_i_vc8vc8(vcd1_t x, vcd1_t y, double _Complex func(double _Complex, double _Complex))
377 {
378 int i;
379 double _Complex tx;
380 double _Complex ty;
381 *(vcd1_t *)&tx = x;
382 *(vcd1_t *)&ty = y;
383 tx = func(tx, ty);
384 return *(vcd1_t *)&tx;
385 }
386
387 vcs1_t
__ZGVxN1v__mth_i_vc4si4(vcs1_t x,int32_t iy,float _Complex func (float _Complex,int32_t))388 __ZGVxN1v__mth_i_vc4si4(vcs1_t x, int32_t iy, float _Complex func(float _Complex, int32_t))
389 {
390 int i;
391 float _Complex tx;
392 *(vcs1_t *)&tx = x;
393 tx = func(tx, iy);
394 return *(vcs1_t *)&tx;
395 }
396
397 vcs1_t
__ZGVxN1v__mth_i_vc4si8(vcs1_t x,long long iy,float _Complex func (float _Complex,long long))398 __ZGVxN1v__mth_i_vc4si8(vcs1_t x, long long iy, float _Complex func(float _Complex, long long))
399 {
400 int i;
401 float _Complex tx;
402 *(vcs1_t *)&tx = x;
403 tx = func(tx, iy);
404 return *(vcs1_t *)&tx;
405 }
406
407 vcd1_t
__ZGVxN1v__mth_i_vc8si4(vcd1_t x,int32_t iy,double _Complex func (double _Complex,int32_t))408 __ZGVxN1v__mth_i_vc8si4(vcd1_t x, int32_t iy, double _Complex func(double _Complex, int32_t))
409 {
410 int i;
411 double _Complex tx;
412 *(vcd1_t *)&tx = x;
413 tx = func(tx, iy);
414 return *(vcd1_t *)&tx;
415 }
416
417 vcd1_t
__ZGVxN1v__mth_i_vc8si8(vcd1_t x,long long iy,double _Complex func (double _Complex,long long))418 __ZGVxN1v__mth_i_vc8si8(vcd1_t x, long long iy, double _Complex func(double _Complex, long long))
419 {
420 int i;
421 double _Complex tx;
422 *(vcd1_t *)&tx = x;
423 tx = func(tx, iy);
424 return *(vcd1_t *)&tx;
425 }
426