1 /*
2 * Copyright © 2018 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark (robdclark@gmail.com)
25 */
26
27 #include "math.h"
28 #include "nir/nir_builtin_builder.h"
29
30 #include "util/u_printf.h"
31 #include "vtn_private.h"
32 #include "OpenCL.std.h"
33
34 typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b,
35 uint32_t opcode,
36 unsigned num_srcs, nir_ssa_def **srcs,
37 struct vtn_type **src_types,
38 const struct vtn_type *dest_type);
39
to_llvm_address_space(SpvStorageClass mode)40 static int to_llvm_address_space(SpvStorageClass mode)
41 {
42 switch (mode) {
43 case SpvStorageClassPrivate:
44 case SpvStorageClassFunction: return 0;
45 case SpvStorageClassCrossWorkgroup: return 1;
46 case SpvStorageClassUniform:
47 case SpvStorageClassUniformConstant: return 2;
48 case SpvStorageClassWorkgroup: return 3;
49 default: return -1;
50 }
51 }
52
53
54 static void
vtn_opencl_mangle(const char * in_name,uint32_t const_mask,int ntypes,struct vtn_type ** src_types,char ** outstring)55 vtn_opencl_mangle(const char *in_name,
56 uint32_t const_mask,
57 int ntypes, struct vtn_type **src_types,
58 char **outstring)
59 {
60 char local_name[256] = "";
61 char *args_str = local_name + sprintf(local_name, "_Z%zu%s", strlen(in_name), in_name);
62
63 for (unsigned i = 0; i < ntypes; ++i) {
64 const struct glsl_type *type = src_types[i]->type;
65 enum vtn_base_type base_type = src_types[i]->base_type;
66 if (src_types[i]->base_type == vtn_base_type_pointer) {
67 *(args_str++) = 'P';
68 int address_space = to_llvm_address_space(src_types[i]->storage_class);
69 if (address_space > 0)
70 args_str += sprintf(args_str, "U3AS%d", address_space);
71
72 type = src_types[i]->deref->type;
73 base_type = src_types[i]->deref->base_type;
74 }
75
76 if (const_mask & (1 << i))
77 *(args_str++) = 'K';
78
79 unsigned num_elements = glsl_get_components(type);
80 if (num_elements > 1) {
81 /* Vectors are not treated as built-ins for mangling, so check for substitution.
82 * In theory, we'd need to know which substitution value this is. In practice,
83 * the functions we need from libclc only support 1
84 */
85 bool substitution = false;
86 for (unsigned j = 0; j < i; ++j) {
87 const struct glsl_type *other_type = src_types[j]->base_type == vtn_base_type_pointer ?
88 src_types[j]->deref->type : src_types[j]->type;
89 if (type == other_type) {
90 substitution = true;
91 break;
92 }
93 }
94
95 if (substitution) {
96 args_str += sprintf(args_str, "S_");
97 continue;
98 } else
99 args_str += sprintf(args_str, "Dv%d_", num_elements);
100 }
101
102 const char *suffix = NULL;
103 switch (base_type) {
104 case vtn_base_type_sampler: suffix = "11ocl_sampler"; break;
105 case vtn_base_type_event: suffix = "9ocl_event"; break;
106 default: {
107 const char *primitives[] = {
108 [GLSL_TYPE_UINT] = "j",
109 [GLSL_TYPE_INT] = "i",
110 [GLSL_TYPE_FLOAT] = "f",
111 [GLSL_TYPE_FLOAT16] = "Dh",
112 [GLSL_TYPE_DOUBLE] = "d",
113 [GLSL_TYPE_UINT8] = "h",
114 [GLSL_TYPE_INT8] = "c",
115 [GLSL_TYPE_UINT16] = "t",
116 [GLSL_TYPE_INT16] = "s",
117 [GLSL_TYPE_UINT64] = "m",
118 [GLSL_TYPE_INT64] = "l",
119 [GLSL_TYPE_BOOL] = "b",
120 [GLSL_TYPE_ERROR] = NULL,
121 };
122 enum glsl_base_type glsl_base_type = glsl_get_base_type(type);
123 assert(glsl_base_type < ARRAY_SIZE(primitives) && primitives[glsl_base_type]);
124 suffix = primitives[glsl_base_type];
125 break;
126 }
127 }
128 args_str += sprintf(args_str, "%s", suffix);
129 }
130
131 *outstring = strdup(local_name);
132 }
133
mangle_and_find(struct vtn_builder * b,const char * name,uint32_t const_mask,uint32_t num_srcs,struct vtn_type ** src_types)134 static nir_function *mangle_and_find(struct vtn_builder *b,
135 const char *name,
136 uint32_t const_mask,
137 uint32_t num_srcs,
138 struct vtn_type **src_types)
139 {
140 char *mname;
141 nir_function *found = NULL;
142
143 vtn_opencl_mangle(name, const_mask, num_srcs, src_types, &mname);
144 /* try and find in current shader first. */
145 nir_foreach_function(funcs, b->shader) {
146 if (!strcmp(funcs->name, mname)) {
147 found = funcs;
148 break;
149 }
150 }
151 /* if not found here find in clc shader and create a decl mirroring it */
152 if (!found && b->options->clc_shader && b->options->clc_shader != b->shader) {
153 nir_foreach_function(funcs, b->options->clc_shader) {
154 if (!strcmp(funcs->name, mname)) {
155 found = funcs;
156 break;
157 }
158 }
159 if (found) {
160 nir_function *decl = nir_function_create(b->shader, mname);
161 decl->num_params = found->num_params;
162 decl->params = ralloc_array(b->shader, nir_parameter, decl->num_params);
163 for (unsigned i = 0; i < decl->num_params; i++) {
164 decl->params[i] = found->params[i];
165 }
166 found = decl;
167 }
168 }
169 if (!found)
170 vtn_fail("Can't find clc function %s\n", mname);
171 free(mname);
172 return found;
173 }
174
call_mangled_function(struct vtn_builder * b,const char * name,uint32_t const_mask,uint32_t num_srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type,nir_ssa_def ** srcs,nir_deref_instr ** ret_deref_ptr)175 static bool call_mangled_function(struct vtn_builder *b,
176 const char *name,
177 uint32_t const_mask,
178 uint32_t num_srcs,
179 struct vtn_type **src_types,
180 const struct vtn_type *dest_type,
181 nir_ssa_def **srcs,
182 nir_deref_instr **ret_deref_ptr)
183 {
184 nir_function *found = mangle_and_find(b, name, const_mask, num_srcs, src_types);
185 if (!found)
186 return false;
187
188 nir_call_instr *call = nir_call_instr_create(b->shader, found);
189
190 nir_deref_instr *ret_deref = NULL;
191 uint32_t param_idx = 0;
192 if (dest_type) {
193 nir_variable *ret_tmp = nir_local_variable_create(b->nb.impl,
194 glsl_get_bare_type(dest_type->type),
195 "return_tmp");
196 ret_deref = nir_build_deref_var(&b->nb, ret_tmp);
197 call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa);
198 }
199
200 for (unsigned i = 0; i < num_srcs; i++)
201 call->params[param_idx++] = nir_src_for_ssa(srcs[i]);
202 nir_builder_instr_insert(&b->nb, &call->instr);
203
204 *ret_deref_ptr = ret_deref;
205 return true;
206 }
207
208 static void
handle_instr(struct vtn_builder * b,uint32_t opcode,const uint32_t * w_src,unsigned num_srcs,const uint32_t * w_dest,nir_handler handler)209 handle_instr(struct vtn_builder *b, uint32_t opcode,
210 const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest, nir_handler handler)
211 {
212 struct vtn_type *dest_type = w_dest ? vtn_get_type(b, w_dest[0]) : NULL;
213
214 nir_ssa_def *srcs[5] = { NULL };
215 struct vtn_type *src_types[5] = { NULL };
216 vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
217 for (unsigned i = 0; i < num_srcs; i++) {
218 struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
219 struct vtn_ssa_value *ssa = vtn_ssa_value(b, w_src[i]);
220 srcs[i] = ssa->def;
221 src_types[i] = val->type;
222 }
223
224 nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, src_types, dest_type);
225 if (result) {
226 vtn_push_nir_ssa(b, w_dest[1], result);
227 } else {
228 vtn_assert(dest_type == NULL);
229 }
230 }
231
232 static nir_op
nir_alu_op_for_opencl_opcode(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode)233 nir_alu_op_for_opencl_opcode(struct vtn_builder *b,
234 enum OpenCLstd_Entrypoints opcode)
235 {
236 switch (opcode) {
237 case OpenCLstd_Fabs: return nir_op_fabs;
238 case OpenCLstd_SAbs: return nir_op_iabs;
239 case OpenCLstd_SAdd_sat: return nir_op_iadd_sat;
240 case OpenCLstd_UAdd_sat: return nir_op_uadd_sat;
241 case OpenCLstd_Ceil: return nir_op_fceil;
242 case OpenCLstd_Floor: return nir_op_ffloor;
243 case OpenCLstd_SHadd: return nir_op_ihadd;
244 case OpenCLstd_UHadd: return nir_op_uhadd;
245 case OpenCLstd_Fmax: return nir_op_fmax;
246 case OpenCLstd_SMax: return nir_op_imax;
247 case OpenCLstd_UMax: return nir_op_umax;
248 case OpenCLstd_Fmin: return nir_op_fmin;
249 case OpenCLstd_SMin: return nir_op_imin;
250 case OpenCLstd_UMin: return nir_op_umin;
251 case OpenCLstd_Mix: return nir_op_flrp;
252 case OpenCLstd_Native_cos: return nir_op_fcos;
253 case OpenCLstd_Native_divide: return nir_op_fdiv;
254 case OpenCLstd_Native_exp2: return nir_op_fexp2;
255 case OpenCLstd_Native_log2: return nir_op_flog2;
256 case OpenCLstd_Native_powr: return nir_op_fpow;
257 case OpenCLstd_Native_recip: return nir_op_frcp;
258 case OpenCLstd_Native_rsqrt: return nir_op_frsq;
259 case OpenCLstd_Native_sin: return nir_op_fsin;
260 case OpenCLstd_Native_sqrt: return nir_op_fsqrt;
261 case OpenCLstd_SMul_hi: return nir_op_imul_high;
262 case OpenCLstd_UMul_hi: return nir_op_umul_high;
263 case OpenCLstd_Popcount: return nir_op_bit_count;
264 case OpenCLstd_SRhadd: return nir_op_irhadd;
265 case OpenCLstd_URhadd: return nir_op_urhadd;
266 case OpenCLstd_Rsqrt: return nir_op_frsq;
267 case OpenCLstd_Sign: return nir_op_fsign;
268 case OpenCLstd_Sqrt: return nir_op_fsqrt;
269 case OpenCLstd_SSub_sat: return nir_op_isub_sat;
270 case OpenCLstd_USub_sat: return nir_op_usub_sat;
271 case OpenCLstd_Trunc: return nir_op_ftrunc;
272 case OpenCLstd_Rint: return nir_op_fround_even;
273 case OpenCLstd_Half_divide: return nir_op_fdiv;
274 case OpenCLstd_Half_recip: return nir_op_frcp;
275 /* uhm... */
276 case OpenCLstd_UAbs: return nir_op_mov;
277 default:
278 vtn_fail("No NIR equivalent");
279 }
280 }
281
282 static nir_ssa_def *
handle_alu(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)283 handle_alu(struct vtn_builder *b, uint32_t opcode,
284 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
285 const struct vtn_type *dest_type)
286 {
287 nir_ssa_def *ret = nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, (enum OpenCLstd_Entrypoints)opcode),
288 srcs[0], srcs[1], srcs[2], NULL);
289 if (opcode == OpenCLstd_Popcount)
290 ret = nir_u2u(&b->nb, ret, glsl_get_bit_size(dest_type->type));
291 return ret;
292 }
293
294 #define REMAP(op, str) [OpenCLstd_##op] = { str }
295 static const struct {
296 const char *fn;
297 } remap_table[] = {
298 REMAP(Distance, "distance"),
299 REMAP(Fast_distance, "fast_distance"),
300 REMAP(Fast_length, "fast_length"),
301 REMAP(Fast_normalize, "fast_normalize"),
302 REMAP(Half_rsqrt, "half_rsqrt"),
303 REMAP(Half_sqrt, "half_sqrt"),
304 REMAP(Length, "length"),
305 REMAP(Normalize, "normalize"),
306 REMAP(Degrees, "degrees"),
307 REMAP(Radians, "radians"),
308 REMAP(Rotate, "rotate"),
309 REMAP(Smoothstep, "smoothstep"),
310 REMAP(Step, "step"),
311
312 REMAP(Pow, "pow"),
313 REMAP(Pown, "pown"),
314 REMAP(Powr, "powr"),
315 REMAP(Rootn, "rootn"),
316 REMAP(Modf, "modf"),
317
318 REMAP(Acos, "acos"),
319 REMAP(Acosh, "acosh"),
320 REMAP(Acospi, "acospi"),
321 REMAP(Asin, "asin"),
322 REMAP(Asinh, "asinh"),
323 REMAP(Asinpi, "asinpi"),
324 REMAP(Atan, "atan"),
325 REMAP(Atan2, "atan2"),
326 REMAP(Atanh, "atanh"),
327 REMAP(Atanpi, "atanpi"),
328 REMAP(Atan2pi, "atan2pi"),
329 REMAP(Cos, "cos"),
330 REMAP(Cosh, "cosh"),
331 REMAP(Cospi, "cospi"),
332 REMAP(Sin, "sin"),
333 REMAP(Sinh, "sinh"),
334 REMAP(Sinpi, "sinpi"),
335 REMAP(Tan, "tan"),
336 REMAP(Tanh, "tanh"),
337 REMAP(Tanpi, "tanpi"),
338 REMAP(Sincos, "sincos"),
339 REMAP(Fract, "fract"),
340 REMAP(Frexp, "frexp"),
341 REMAP(Fma, "fma"),
342 REMAP(Fmod, "fmod"),
343
344 REMAP(Half_cos, "cos"),
345 REMAP(Half_exp, "exp"),
346 REMAP(Half_exp2, "exp2"),
347 REMAP(Half_exp10, "exp10"),
348 REMAP(Half_log, "log"),
349 REMAP(Half_log2, "log2"),
350 REMAP(Half_log10, "log10"),
351 REMAP(Half_powr, "powr"),
352 REMAP(Half_sin, "sin"),
353 REMAP(Half_tan, "tan"),
354
355 REMAP(Remainder, "remainder"),
356 REMAP(Remquo, "remquo"),
357 REMAP(Hypot, "hypot"),
358 REMAP(Exp, "exp"),
359 REMAP(Exp2, "exp2"),
360 REMAP(Exp10, "exp10"),
361 REMAP(Expm1, "expm1"),
362 REMAP(Ldexp, "ldexp"),
363
364 REMAP(Ilogb, "ilogb"),
365 REMAP(Log, "log"),
366 REMAP(Log2, "log2"),
367 REMAP(Log10, "log10"),
368 REMAP(Log1p, "log1p"),
369 REMAP(Logb, "logb"),
370
371 REMAP(Cbrt, "cbrt"),
372 REMAP(Erfc, "erfc"),
373 REMAP(Erf, "erf"),
374
375 REMAP(Lgamma, "lgamma"),
376 REMAP(Lgamma_r, "lgamma_r"),
377 REMAP(Tgamma, "tgamma"),
378
379 REMAP(UMad_sat, "mad_sat"),
380 REMAP(SMad_sat, "mad_sat"),
381
382 REMAP(Shuffle, "shuffle"),
383 REMAP(Shuffle2, "shuffle2"),
384 };
385 #undef REMAP
386
remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)387 static const char *remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)
388 {
389 if (opcode >= (sizeof(remap_table) / sizeof(const char *)))
390 return NULL;
391 return remap_table[opcode].fn;
392 }
393
394 static struct vtn_type *
get_vtn_type_for_glsl_type(struct vtn_builder * b,const struct glsl_type * type)395 get_vtn_type_for_glsl_type(struct vtn_builder *b, const struct glsl_type *type)
396 {
397 struct vtn_type *ret = rzalloc(b, struct vtn_type);
398 assert(glsl_type_is_vector_or_scalar(type));
399 ret->type = type;
400 ret->length = glsl_get_vector_elements(type);
401 ret->base_type = glsl_type_is_vector(type) ? vtn_base_type_vector : vtn_base_type_scalar;
402 return ret;
403 }
404
405 static struct vtn_type *
get_pointer_type(struct vtn_builder * b,struct vtn_type * t,SpvStorageClass storage_class)406 get_pointer_type(struct vtn_builder *b, struct vtn_type *t, SpvStorageClass storage_class)
407 {
408 struct vtn_type *ret = rzalloc(b, struct vtn_type);
409 ret->type = nir_address_format_to_glsl_type(
410 vtn_mode_to_address_format(
411 b, vtn_storage_class_to_mode(b, storage_class, NULL, NULL)));
412 ret->base_type = vtn_base_type_pointer;
413 ret->storage_class = storage_class;
414 ret->deref = t;
415 return ret;
416 }
417
418 static struct vtn_type *
get_signed_type(struct vtn_builder * b,struct vtn_type * t)419 get_signed_type(struct vtn_builder *b, struct vtn_type *t)
420 {
421 if (t->base_type == vtn_base_type_pointer) {
422 return get_pointer_type(b, get_signed_type(b, t->deref), t->storage_class);
423 }
424 return get_vtn_type_for_glsl_type(
425 b, glsl_vector_type(glsl_signed_base_type_of(glsl_get_base_type(t->type)),
426 glsl_get_vector_elements(t->type)));
427 }
428
429 static nir_ssa_def *
handle_clc_fn(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,int num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)430 handle_clc_fn(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
431 int num_srcs,
432 nir_ssa_def **srcs,
433 struct vtn_type **src_types,
434 const struct vtn_type *dest_type)
435 {
436 const char *name = remap_clc_opcode(opcode);
437 if (!name)
438 return NULL;
439
440 /* Some functions which take params end up with uint (or pointer-to-uint) being passed,
441 * which doesn't mangle correctly when the function expects int or pointer-to-int.
442 * See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_unsignedsigned_a_unsigned_versus_signed_integers
443 */
444 int signed_param = -1;
445 switch (opcode) {
446 case OpenCLstd_Frexp:
447 case OpenCLstd_Lgamma_r:
448 case OpenCLstd_Pown:
449 case OpenCLstd_Rootn:
450 case OpenCLstd_Ldexp:
451 signed_param = 1;
452 break;
453 case OpenCLstd_Remquo:
454 signed_param = 2;
455 break;
456 case OpenCLstd_SMad_sat: {
457 /* All parameters need to be converted to signed */
458 src_types[0] = src_types[1] = src_types[2] = get_signed_type(b, src_types[0]);
459 break;
460 }
461 default: break;
462 }
463
464 if (signed_param >= 0) {
465 src_types[signed_param] = get_signed_type(b, src_types[signed_param]);
466 }
467
468 nir_deref_instr *ret_deref = NULL;
469
470 if (!call_mangled_function(b, name, 0, num_srcs, src_types,
471 dest_type, srcs, &ret_deref))
472 return NULL;
473
474 return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
475 }
476
477 static nir_ssa_def *
handle_special(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)478 handle_special(struct vtn_builder *b, uint32_t opcode,
479 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
480 const struct vtn_type *dest_type)
481 {
482 nir_builder *nb = &b->nb;
483 enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints)opcode;
484
485 switch (cl_opcode) {
486 case OpenCLstd_SAbs_diff:
487 /* these works easier in direct NIR */
488 return nir_iabs_diff(nb, srcs[0], srcs[1]);
489 case OpenCLstd_UAbs_diff:
490 return nir_uabs_diff(nb, srcs[0], srcs[1]);
491 case OpenCLstd_Bitselect:
492 return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]);
493 case OpenCLstd_SMad_hi:
494 return nir_imad_hi(nb, srcs[0], srcs[1], srcs[2]);
495 case OpenCLstd_UMad_hi:
496 return nir_umad_hi(nb, srcs[0], srcs[1], srcs[2]);
497 case OpenCLstd_SMul24:
498 return nir_imul24_relaxed(nb, srcs[0], srcs[1]);
499 case OpenCLstd_UMul24:
500 return nir_umul24_relaxed(nb, srcs[0], srcs[1]);
501 case OpenCLstd_SMad24:
502 return nir_iadd(nb, nir_imul24_relaxed(nb, srcs[0], srcs[1]), srcs[2]);
503 case OpenCLstd_UMad24:
504 return nir_umad24_relaxed(nb, srcs[0], srcs[1], srcs[2]);
505 case OpenCLstd_FClamp:
506 return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]);
507 case OpenCLstd_SClamp:
508 return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]);
509 case OpenCLstd_UClamp:
510 return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]);
511 case OpenCLstd_Copysign:
512 return nir_copysign(nb, srcs[0], srcs[1]);
513 case OpenCLstd_Cross:
514 if (dest_type->length == 4)
515 return nir_cross4(nb, srcs[0], srcs[1]);
516 return nir_cross3(nb, srcs[0], srcs[1]);
517 case OpenCLstd_Fdim:
518 return nir_fdim(nb, srcs[0], srcs[1]);
519 case OpenCLstd_Fmod:
520 if (nb->shader->options->lower_fmod)
521 break;
522 return nir_fmod(nb, srcs[0], srcs[1]);
523 case OpenCLstd_Mad:
524 return nir_fmad(nb, srcs[0], srcs[1], srcs[2]);
525 case OpenCLstd_Maxmag:
526 return nir_maxmag(nb, srcs[0], srcs[1]);
527 case OpenCLstd_Minmag:
528 return nir_minmag(nb, srcs[0], srcs[1]);
529 case OpenCLstd_Nan:
530 return nir_nan(nb, srcs[0]);
531 case OpenCLstd_Nextafter:
532 return nir_nextafter(nb, srcs[0], srcs[1]);
533 case OpenCLstd_Normalize:
534 return nir_normalize(nb, srcs[0]);
535 case OpenCLstd_Clz:
536 return nir_clz_u(nb, srcs[0]);
537 case OpenCLstd_Ctz:
538 return nir_ctz_u(nb, srcs[0]);
539 case OpenCLstd_Select:
540 return nir_select(nb, srcs[0], srcs[1], srcs[2]);
541 case OpenCLstd_S_Upsample:
542 case OpenCLstd_U_Upsample:
543 /* SPIR-V and CL have different defs for upsample, just implement in nir */
544 return nir_upsample(nb, srcs[0], srcs[1]);
545 case OpenCLstd_Native_exp:
546 return nir_fexp(nb, srcs[0]);
547 case OpenCLstd_Native_exp10:
548 return nir_fexp2(nb, nir_fmul_imm(nb, srcs[0], log(10) / log(2)));
549 case OpenCLstd_Native_log:
550 return nir_flog(nb, srcs[0]);
551 case OpenCLstd_Native_log10:
552 return nir_fmul_imm(nb, nir_flog2(nb, srcs[0]), log(2) / log(10));
553 case OpenCLstd_Native_tan:
554 return nir_ftan(nb, srcs[0]);
555 case OpenCLstd_Ldexp:
556 if (nb->shader->options->lower_ldexp)
557 break;
558 return nir_ldexp(nb, srcs[0], srcs[1]);
559 case OpenCLstd_Fma:
560 /* FIXME: the software implementation only supports fp32 for now. */
561 if (nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32)
562 break;
563 return nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
564 default:
565 break;
566 }
567
568 nir_ssa_def *ret = handle_clc_fn(b, opcode, num_srcs, srcs, src_types, dest_type);
569 if (!ret)
570 vtn_fail("No NIR equivalent");
571
572 return ret;
573 }
574
575 static nir_ssa_def *
handle_core(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)576 handle_core(struct vtn_builder *b, uint32_t opcode,
577 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
578 const struct vtn_type *dest_type)
579 {
580 nir_deref_instr *ret_deref = NULL;
581
582 switch ((SpvOp)opcode) {
583 case SpvOpGroupAsyncCopy: {
584 /* Libclc doesn't include 3-component overloads of the async copy functions.
585 * However, the CLC spec says:
586 * async_work_group_copy and async_work_group_strided_copy for 3-component vector types
587 * behave as async_work_group_copy and async_work_group_strided_copy respectively for 4-component
588 * vector types
589 */
590 for (unsigned i = 0; i < num_srcs; ++i) {
591 if (src_types[i]->base_type == vtn_base_type_pointer &&
592 src_types[i]->deref->base_type == vtn_base_type_vector &&
593 src_types[i]->deref->length == 3) {
594 src_types[i] =
595 get_pointer_type(b,
596 get_vtn_type_for_glsl_type(b, glsl_replace_vector_type(src_types[i]->deref->type, 4)),
597 src_types[i]->storage_class);
598 }
599 }
600 if (!call_mangled_function(b, "async_work_group_strided_copy", (1 << 1), num_srcs, src_types, dest_type, srcs, &ret_deref))
601 return NULL;
602 break;
603 }
604 case SpvOpGroupWaitEvents: {
605 src_types[0] = get_vtn_type_for_glsl_type(b, glsl_int_type());
606 if (!call_mangled_function(b, "wait_group_events", 0, num_srcs, src_types, dest_type, srcs, &ret_deref))
607 return NULL;
608 break;
609 }
610 default:
611 return NULL;
612 }
613
614 return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
615 }
616
617
618 static void
_handle_v_load_store(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count,bool load,bool vec_aligned,nir_rounding_mode rounding)619 _handle_v_load_store(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
620 const uint32_t *w, unsigned count, bool load,
621 bool vec_aligned, nir_rounding_mode rounding)
622 {
623 struct vtn_type *type;
624 if (load)
625 type = vtn_get_type(b, w[1]);
626 else
627 type = vtn_get_value_type(b, w[5]);
628 unsigned a = load ? 0 : 1;
629
630 enum glsl_base_type base_type = glsl_get_base_type(type->type);
631 unsigned components = glsl_get_vector_elements(type->type);
632
633 nir_ssa_def *offset = vtn_get_nir_ssa(b, w[5 + a]);
634 struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer);
635
636 struct vtn_ssa_value *comps[NIR_MAX_VEC_COMPONENTS];
637 nir_ssa_def *ncomps[NIR_MAX_VEC_COMPONENTS];
638
639 nir_ssa_def *moffset = nir_imul_imm(&b->nb, offset,
640 (vec_aligned && components == 3) ? 4 : components);
641 nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer);
642
643 unsigned alignment = vec_aligned ? glsl_get_cl_alignment(type->type) :
644 glsl_get_bit_size(type->type) / 8;
645 enum glsl_base_type ptr_base_type =
646 glsl_get_base_type(p->pointer->type->type);
647 if (base_type != ptr_base_type) {
648 vtn_fail_if(ptr_base_type != GLSL_TYPE_FLOAT16 ||
649 (base_type != GLSL_TYPE_FLOAT &&
650 base_type != GLSL_TYPE_DOUBLE),
651 "vload/vstore cannot do type conversion. "
652 "vload/vstore_half can only convert from half to other "
653 "floating-point types.");
654
655 /* Above-computed alignment was for floats/doubles, not halves */
656 alignment /= glsl_get_bit_size(type->type) / glsl_base_type_get_bit_size(ptr_base_type);
657 }
658
659 deref = nir_alignment_deref_cast(&b->nb, deref, alignment, 0);
660
661 for (int i = 0; i < components; i++) {
662 nir_ssa_def *coffset = nir_iadd_imm(&b->nb, moffset, i);
663 nir_deref_instr *arr_deref = nir_build_deref_ptr_as_array(&b->nb, deref, coffset);
664
665 if (load) {
666 comps[i] = vtn_local_load(b, arr_deref, p->type->access);
667 ncomps[i] = comps[i]->def;
668 if (base_type != ptr_base_type) {
669 assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
670 (base_type == GLSL_TYPE_FLOAT ||
671 base_type == GLSL_TYPE_DOUBLE));
672 ncomps[i] = nir_f2fN(&b->nb, ncomps[i],
673 glsl_base_type_get_bit_size(base_type));
674 }
675 } else {
676 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
677 struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]);
678 ssa->def = nir_channel(&b->nb, val->def, i);
679 if (base_type != ptr_base_type) {
680 assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
681 (base_type == GLSL_TYPE_FLOAT ||
682 base_type == GLSL_TYPE_DOUBLE));
683 if (rounding == nir_rounding_mode_undef) {
684 ssa->def = nir_f2f16(&b->nb, ssa->def);
685 } else {
686 ssa->def = nir_convert_alu_types(&b->nb, 16, ssa->def,
687 nir_type_float | ssa->def->bit_size,
688 nir_type_float16,
689 rounding, false);
690 }
691 }
692 vtn_local_store(b, ssa, arr_deref, p->type->access);
693 }
694 }
695 if (load) {
696 vtn_push_nir_ssa(b, w[2], nir_vec(&b->nb, ncomps, components));
697 }
698 }
699
700 static void
vtn_handle_opencl_vload(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)701 vtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
702 const uint32_t *w, unsigned count)
703 {
704 _handle_v_load_store(b, opcode, w, count, true,
705 opcode == OpenCLstd_Vloada_halfn,
706 nir_rounding_mode_undef);
707 }
708
709 static void
vtn_handle_opencl_vstore(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)710 vtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
711 const uint32_t *w, unsigned count)
712 {
713 _handle_v_load_store(b, opcode, w, count, false,
714 opcode == OpenCLstd_Vstorea_halfn,
715 nir_rounding_mode_undef);
716 }
717
718 static void
vtn_handle_opencl_vstore_half_r(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)719 vtn_handle_opencl_vstore_half_r(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
720 const uint32_t *w, unsigned count)
721 {
722 _handle_v_load_store(b, opcode, w, count, false,
723 opcode == OpenCLstd_Vstorea_halfn_r,
724 vtn_rounding_mode_to_nir(b, w[8]));
725 }
726
727 static unsigned
vtn_add_printf_string(struct vtn_builder * b,uint32_t id,nir_printf_info * info)728 vtn_add_printf_string(struct vtn_builder *b, uint32_t id, nir_printf_info *info)
729 {
730 nir_deref_instr *deref = vtn_nir_deref(b, id);
731
732 while (deref && deref->deref_type != nir_deref_type_var)
733 deref = nir_deref_instr_parent(deref);
734
735 vtn_fail_if(deref == NULL || !nir_deref_mode_is(deref, nir_var_mem_constant),
736 "Printf string argument must be a pointer to a constant variable");
737 vtn_fail_if(deref->var->constant_initializer == NULL,
738 "Printf string argument must have an initializer");
739 vtn_fail_if(!glsl_type_is_array(deref->var->type),
740 "Printf string must be an char array");
741 const struct glsl_type *char_type = glsl_get_array_element(deref->var->type);
742 vtn_fail_if(char_type != glsl_uint8_t_type() &&
743 char_type != glsl_int8_t_type(),
744 "Printf string must be an char array");
745
746 nir_constant *c = deref->var->constant_initializer;
747 assert(c->num_elements == glsl_get_length(deref->var->type));
748
749 unsigned idx = info->string_size;
750 info->strings = reralloc_size(b->shader, info->strings,
751 idx + c->num_elements);
752 info->string_size += c->num_elements;
753
754 char *str = &info->strings[idx];
755 bool found_null = false;
756 for (unsigned i = 0; i < c->num_elements; i++) {
757 memcpy((char *)str + i, c->elements[i]->values, 1);
758 if (str[i] == '\0')
759 found_null = true;
760 }
761 vtn_fail_if(!found_null, "Printf string must be null terminated");
762 return idx;
763 }
764
765 /* printf is special because there are no limits on args */
766 static void
handle_printf(struct vtn_builder * b,uint32_t opcode,const uint32_t * w_src,unsigned num_srcs,const uint32_t * w_dest)767 handle_printf(struct vtn_builder *b, uint32_t opcode,
768 const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest)
769 {
770 if (!b->options->caps.printf) {
771 vtn_push_nir_ssa(b, w_dest[1], nir_imm_int(&b->nb, -1));
772 return;
773 }
774
775 /* Step 1. extract the format string */
776
777 /*
778 * info_idx is 1-based to match clover/llvm
779 * the backend indexes the info table at info_idx - 1.
780 */
781 b->shader->printf_info_count++;
782 unsigned info_idx = b->shader->printf_info_count;
783
784 b->shader->printf_info = reralloc(b->shader, b->shader->printf_info,
785 nir_printf_info, info_idx);
786 nir_printf_info *info = &b->shader->printf_info[info_idx - 1];
787
788 info->strings = NULL;
789 info->string_size = 0;
790
791 vtn_add_printf_string(b, w_src[0], info);
792
793 info->num_args = num_srcs - 1;
794 info->arg_sizes = ralloc_array(b->shader, unsigned, info->num_args);
795
796 /* Step 2, build an ad-hoc struct type out of the args */
797 unsigned field_offset = 0;
798 struct glsl_struct_field *fields =
799 rzalloc_array(b, struct glsl_struct_field, num_srcs - 1);
800 for (unsigned i = 1; i < num_srcs; ++i) {
801 struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
802 struct vtn_type *src_type = val->type;
803 fields[i - 1].type = src_type->type;
804 fields[i - 1].name = ralloc_asprintf(b->shader, "arg_%u", i);
805 field_offset = align(field_offset, 4);
806 fields[i - 1].offset = field_offset;
807 info->arg_sizes[i - 1] = glsl_get_cl_size(src_type->type);
808 field_offset += glsl_get_cl_size(src_type->type);
809 }
810 const struct glsl_type *struct_type =
811 glsl_struct_type(fields, num_srcs - 1, "printf", true);
812
813 /* Step 3, create a variable of that type and populate its fields */
814 nir_variable *var = nir_local_variable_create(b->func->nir_func->impl,
815 struct_type, NULL);
816 nir_deref_instr *deref_var = nir_build_deref_var(&b->nb, var);
817 size_t fmt_pos = 0;
818 for (unsigned i = 1; i < num_srcs; ++i) {
819 nir_deref_instr *field_deref =
820 nir_build_deref_struct(&b->nb, deref_var, i - 1);
821 nir_ssa_def *field_src = vtn_ssa_value(b, w_src[i])->def;
822 /* extract strings */
823 fmt_pos = util_printf_next_spec_pos(info->strings, fmt_pos);
824 if (fmt_pos != -1 && info->strings[fmt_pos] == 's') {
825 unsigned idx = vtn_add_printf_string(b, w_src[i], info);
826 nir_store_deref(&b->nb, field_deref,
827 nir_imm_intN_t(&b->nb, idx, field_src->bit_size),
828 ~0 /* write_mask */);
829 } else
830 nir_store_deref(&b->nb, field_deref, field_src, ~0);
831 }
832
833 /* Lastly, the actual intrinsic */
834 nir_ssa_def *fmt_idx = nir_imm_int(&b->nb, info_idx);
835 nir_ssa_def *ret = nir_printf(&b->nb, fmt_idx, &deref_var->dest.ssa);
836 vtn_push_nir_ssa(b, w_dest[1], ret);
837 }
838
839 static nir_ssa_def *
handle_round(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)840 handle_round(struct vtn_builder *b, uint32_t opcode,
841 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
842 const struct vtn_type *dest_type)
843 {
844 nir_ssa_def *src = srcs[0];
845 nir_builder *nb = &b->nb;
846 nir_ssa_def *half = nir_imm_floatN_t(nb, 0.5, src->bit_size);
847 nir_ssa_def *truncated = nir_ftrunc(nb, src);
848 nir_ssa_def *remainder = nir_fsub(nb, src, truncated);
849
850 return nir_bcsel(nb, nir_fge(nb, nir_fabs(nb, remainder), half),
851 nir_fadd(nb, truncated, nir_fsign(nb, src)), truncated);
852 }
853
854 static nir_ssa_def *
handle_shuffle(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)855 handle_shuffle(struct vtn_builder *b, uint32_t opcode,
856 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
857 const struct vtn_type *dest_type)
858 {
859 struct nir_ssa_def *input = srcs[0];
860 struct nir_ssa_def *mask = srcs[1];
861
862 unsigned out_elems = dest_type->length;
863 nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
864 unsigned in_elems = input->num_components;
865 if (mask->bit_size != 32)
866 mask = nir_u2u32(&b->nb, mask);
867 mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, in_elems - 1, mask->bit_size));
868 for (unsigned i = 0; i < out_elems; i++)
869 outres[i] = nir_vector_extract(&b->nb, input, nir_channel(&b->nb, mask, i));
870
871 return nir_vec(&b->nb, outres, out_elems);
872 }
873
874 static nir_ssa_def *
handle_shuffle2(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)875 handle_shuffle2(struct vtn_builder *b, uint32_t opcode,
876 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
877 const struct vtn_type *dest_type)
878 {
879 struct nir_ssa_def *input0 = srcs[0];
880 struct nir_ssa_def *input1 = srcs[1];
881 struct nir_ssa_def *mask = srcs[2];
882
883 unsigned out_elems = dest_type->length;
884 nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
885 unsigned in_elems = input0->num_components;
886 unsigned total_mask = 2 * in_elems - 1;
887 unsigned half_mask = in_elems - 1;
888 if (mask->bit_size != 32)
889 mask = nir_u2u32(&b->nb, mask);
890 mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, total_mask, mask->bit_size));
891 for (unsigned i = 0; i < out_elems; i++) {
892 nir_ssa_def *this_mask = nir_channel(&b->nb, mask, i);
893 nir_ssa_def *vmask = nir_iand(&b->nb, this_mask, nir_imm_intN_t(&b->nb, half_mask, mask->bit_size));
894 nir_ssa_def *val0 = nir_vector_extract(&b->nb, input0, vmask);
895 nir_ssa_def *val1 = nir_vector_extract(&b->nb, input1, vmask);
896 nir_ssa_def *sel = nir_ilt(&b->nb, this_mask, nir_imm_intN_t(&b->nb, in_elems, mask->bit_size));
897 outres[i] = nir_bcsel(&b->nb, sel, val0, val1);
898 }
899 return nir_vec(&b->nb, outres, out_elems);
900 }
901
902 bool
vtn_handle_opencl_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)903 vtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode,
904 const uint32_t *w, unsigned count)
905 {
906 enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints) ext_opcode;
907
908 switch (cl_opcode) {
909 case OpenCLstd_Fabs:
910 case OpenCLstd_SAbs:
911 case OpenCLstd_UAbs:
912 case OpenCLstd_SAdd_sat:
913 case OpenCLstd_UAdd_sat:
914 case OpenCLstd_Ceil:
915 case OpenCLstd_Floor:
916 case OpenCLstd_Fmax:
917 case OpenCLstd_SHadd:
918 case OpenCLstd_UHadd:
919 case OpenCLstd_SMax:
920 case OpenCLstd_UMax:
921 case OpenCLstd_Fmin:
922 case OpenCLstd_SMin:
923 case OpenCLstd_UMin:
924 case OpenCLstd_Mix:
925 case OpenCLstd_Native_cos:
926 case OpenCLstd_Native_divide:
927 case OpenCLstd_Native_exp2:
928 case OpenCLstd_Native_log2:
929 case OpenCLstd_Native_powr:
930 case OpenCLstd_Native_recip:
931 case OpenCLstd_Native_rsqrt:
932 case OpenCLstd_Native_sin:
933 case OpenCLstd_Native_sqrt:
934 case OpenCLstd_SMul_hi:
935 case OpenCLstd_UMul_hi:
936 case OpenCLstd_Popcount:
937 case OpenCLstd_SRhadd:
938 case OpenCLstd_URhadd:
939 case OpenCLstd_Rsqrt:
940 case OpenCLstd_Sign:
941 case OpenCLstd_Sqrt:
942 case OpenCLstd_SSub_sat:
943 case OpenCLstd_USub_sat:
944 case OpenCLstd_Trunc:
945 case OpenCLstd_Rint:
946 case OpenCLstd_Half_divide:
947 case OpenCLstd_Half_recip:
948 handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_alu);
949 return true;
950 case OpenCLstd_SAbs_diff:
951 case OpenCLstd_UAbs_diff:
952 case OpenCLstd_SMad_hi:
953 case OpenCLstd_UMad_hi:
954 case OpenCLstd_SMad24:
955 case OpenCLstd_UMad24:
956 case OpenCLstd_SMul24:
957 case OpenCLstd_UMul24:
958 case OpenCLstd_Bitselect:
959 case OpenCLstd_FClamp:
960 case OpenCLstd_SClamp:
961 case OpenCLstd_UClamp:
962 case OpenCLstd_Copysign:
963 case OpenCLstd_Cross:
964 case OpenCLstd_Degrees:
965 case OpenCLstd_Fdim:
966 case OpenCLstd_Fma:
967 case OpenCLstd_Distance:
968 case OpenCLstd_Fast_distance:
969 case OpenCLstd_Fast_length:
970 case OpenCLstd_Fast_normalize:
971 case OpenCLstd_Half_rsqrt:
972 case OpenCLstd_Half_sqrt:
973 case OpenCLstd_Length:
974 case OpenCLstd_Mad:
975 case OpenCLstd_Maxmag:
976 case OpenCLstd_Minmag:
977 case OpenCLstd_Nan:
978 case OpenCLstd_Nextafter:
979 case OpenCLstd_Normalize:
980 case OpenCLstd_Radians:
981 case OpenCLstd_Rotate:
982 case OpenCLstd_Select:
983 case OpenCLstd_Step:
984 case OpenCLstd_Smoothstep:
985 case OpenCLstd_S_Upsample:
986 case OpenCLstd_U_Upsample:
987 case OpenCLstd_Clz:
988 case OpenCLstd_Ctz:
989 case OpenCLstd_Native_exp:
990 case OpenCLstd_Native_exp10:
991 case OpenCLstd_Native_log:
992 case OpenCLstd_Native_log10:
993 case OpenCLstd_Acos:
994 case OpenCLstd_Acosh:
995 case OpenCLstd_Acospi:
996 case OpenCLstd_Asin:
997 case OpenCLstd_Asinh:
998 case OpenCLstd_Asinpi:
999 case OpenCLstd_Atan:
1000 case OpenCLstd_Atan2:
1001 case OpenCLstd_Atanh:
1002 case OpenCLstd_Atanpi:
1003 case OpenCLstd_Atan2pi:
1004 case OpenCLstd_Fract:
1005 case OpenCLstd_Frexp:
1006 case OpenCLstd_Exp:
1007 case OpenCLstd_Exp2:
1008 case OpenCLstd_Expm1:
1009 case OpenCLstd_Exp10:
1010 case OpenCLstd_Fmod:
1011 case OpenCLstd_Ilogb:
1012 case OpenCLstd_Log:
1013 case OpenCLstd_Log2:
1014 case OpenCLstd_Log10:
1015 case OpenCLstd_Log1p:
1016 case OpenCLstd_Logb:
1017 case OpenCLstd_Ldexp:
1018 case OpenCLstd_Cos:
1019 case OpenCLstd_Cosh:
1020 case OpenCLstd_Cospi:
1021 case OpenCLstd_Sin:
1022 case OpenCLstd_Sinh:
1023 case OpenCLstd_Sinpi:
1024 case OpenCLstd_Tan:
1025 case OpenCLstd_Tanh:
1026 case OpenCLstd_Tanpi:
1027 case OpenCLstd_Cbrt:
1028 case OpenCLstd_Erfc:
1029 case OpenCLstd_Erf:
1030 case OpenCLstd_Lgamma:
1031 case OpenCLstd_Lgamma_r:
1032 case OpenCLstd_Tgamma:
1033 case OpenCLstd_Pow:
1034 case OpenCLstd_Powr:
1035 case OpenCLstd_Pown:
1036 case OpenCLstd_Rootn:
1037 case OpenCLstd_Remainder:
1038 case OpenCLstd_Remquo:
1039 case OpenCLstd_Hypot:
1040 case OpenCLstd_Sincos:
1041 case OpenCLstd_Modf:
1042 case OpenCLstd_UMad_sat:
1043 case OpenCLstd_SMad_sat:
1044 case OpenCLstd_Native_tan:
1045 case OpenCLstd_Half_cos:
1046 case OpenCLstd_Half_exp:
1047 case OpenCLstd_Half_exp2:
1048 case OpenCLstd_Half_exp10:
1049 case OpenCLstd_Half_log:
1050 case OpenCLstd_Half_log2:
1051 case OpenCLstd_Half_log10:
1052 case OpenCLstd_Half_powr:
1053 case OpenCLstd_Half_sin:
1054 case OpenCLstd_Half_tan:
1055 handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_special);
1056 return true;
1057 case OpenCLstd_Vloadn:
1058 case OpenCLstd_Vload_half:
1059 case OpenCLstd_Vload_halfn:
1060 case OpenCLstd_Vloada_halfn:
1061 vtn_handle_opencl_vload(b, cl_opcode, w, count);
1062 return true;
1063 case OpenCLstd_Vstoren:
1064 case OpenCLstd_Vstore_half:
1065 case OpenCLstd_Vstore_halfn:
1066 case OpenCLstd_Vstorea_halfn:
1067 vtn_handle_opencl_vstore(b, cl_opcode, w, count);
1068 return true;
1069 case OpenCLstd_Vstore_half_r:
1070 case OpenCLstd_Vstore_halfn_r:
1071 case OpenCLstd_Vstorea_halfn_r:
1072 vtn_handle_opencl_vstore_half_r(b, cl_opcode, w, count);
1073 return true;
1074 case OpenCLstd_Shuffle:
1075 handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle);
1076 return true;
1077 case OpenCLstd_Shuffle2:
1078 handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle2);
1079 return true;
1080 case OpenCLstd_Round:
1081 handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_round);
1082 return true;
1083 case OpenCLstd_Printf:
1084 handle_printf(b, ext_opcode, w + 5, count - 5, w + 1);
1085 return true;
1086 case OpenCLstd_Prefetch:
1087 /* TODO maybe add a nir instruction for this? */
1088 return true;
1089 default:
1090 vtn_fail("unhandled opencl opc: %u\n", ext_opcode);
1091 return false;
1092 }
1093 }
1094
1095 bool
vtn_handle_opencl_core_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1096 vtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode,
1097 const uint32_t *w, unsigned count)
1098 {
1099 switch (opcode) {
1100 case SpvOpGroupAsyncCopy:
1101 handle_instr(b, opcode, w + 4, count - 4, w + 1, handle_core);
1102 return true;
1103 case SpvOpGroupWaitEvents:
1104 handle_instr(b, opcode, w + 2, count - 2, NULL, handle_core);
1105 return true;
1106 default:
1107 return false;
1108 }
1109 return true;
1110 }
1111