1 /*
2  * Copyright © 2018 Red Hat
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark (robdclark@gmail.com)
25  */
26 
27 #include "math.h"
28 #include "nir/nir_builtin_builder.h"
29 
30 #include "util/u_printf.h"
31 #include "vtn_private.h"
32 #include "OpenCL.std.h"
33 
34 typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b,
35                                     uint32_t opcode,
36                                     unsigned num_srcs, nir_ssa_def **srcs,
37                                     struct vtn_type **src_types,
38                                     const struct vtn_type *dest_type);
39 
to_llvm_address_space(SpvStorageClass mode)40 static int to_llvm_address_space(SpvStorageClass mode)
41 {
42    switch (mode) {
43    case SpvStorageClassPrivate:
44    case SpvStorageClassFunction: return 0;
45    case SpvStorageClassCrossWorkgroup: return 1;
46    case SpvStorageClassUniform:
47    case SpvStorageClassUniformConstant: return 2;
48    case SpvStorageClassWorkgroup: return 3;
49    default: return -1;
50    }
51 }
52 
53 
54 static void
vtn_opencl_mangle(const char * in_name,uint32_t const_mask,int ntypes,struct vtn_type ** src_types,char ** outstring)55 vtn_opencl_mangle(const char *in_name,
56                   uint32_t const_mask,
57                   int ntypes, struct vtn_type **src_types,
58                   char **outstring)
59 {
60    char local_name[256] = "";
61    char *args_str = local_name + sprintf(local_name, "_Z%zu%s", strlen(in_name), in_name);
62 
63    for (unsigned i = 0; i < ntypes; ++i) {
64       const struct glsl_type *type = src_types[i]->type;
65       enum vtn_base_type base_type = src_types[i]->base_type;
66       if (src_types[i]->base_type == vtn_base_type_pointer) {
67          *(args_str++) = 'P';
68          int address_space = to_llvm_address_space(src_types[i]->storage_class);
69          if (address_space > 0)
70             args_str += sprintf(args_str, "U3AS%d", address_space);
71 
72          type = src_types[i]->deref->type;
73          base_type = src_types[i]->deref->base_type;
74       }
75 
76       if (const_mask & (1 << i))
77          *(args_str++) = 'K';
78 
79       unsigned num_elements = glsl_get_components(type);
80       if (num_elements > 1) {
81          /* Vectors are not treated as built-ins for mangling, so check for substitution.
82           * In theory, we'd need to know which substitution value this is. In practice,
83           * the functions we need from libclc only support 1
84           */
85          bool substitution = false;
86          for (unsigned j = 0; j < i; ++j) {
87             const struct glsl_type *other_type = src_types[j]->base_type == vtn_base_type_pointer ?
88                src_types[j]->deref->type : src_types[j]->type;
89             if (type == other_type) {
90                substitution = true;
91                break;
92             }
93          }
94 
95          if (substitution) {
96             args_str += sprintf(args_str, "S_");
97             continue;
98          } else
99             args_str += sprintf(args_str, "Dv%d_", num_elements);
100       }
101 
102       const char *suffix = NULL;
103       switch (base_type) {
104       case vtn_base_type_sampler: suffix = "11ocl_sampler"; break;
105       case vtn_base_type_event: suffix = "9ocl_event"; break;
106       default: {
107          const char *primitives[] = {
108             [GLSL_TYPE_UINT] = "j",
109             [GLSL_TYPE_INT] = "i",
110             [GLSL_TYPE_FLOAT] = "f",
111             [GLSL_TYPE_FLOAT16] = "Dh",
112             [GLSL_TYPE_DOUBLE] = "d",
113             [GLSL_TYPE_UINT8] = "h",
114             [GLSL_TYPE_INT8] = "c",
115             [GLSL_TYPE_UINT16] = "t",
116             [GLSL_TYPE_INT16] = "s",
117             [GLSL_TYPE_UINT64] = "m",
118             [GLSL_TYPE_INT64] = "l",
119             [GLSL_TYPE_BOOL] = "b",
120             [GLSL_TYPE_ERROR] = NULL,
121          };
122          enum glsl_base_type glsl_base_type = glsl_get_base_type(type);
123          assert(glsl_base_type < ARRAY_SIZE(primitives) && primitives[glsl_base_type]);
124          suffix = primitives[glsl_base_type];
125          break;
126       }
127       }
128       args_str += sprintf(args_str, "%s", suffix);
129    }
130 
131    *outstring = strdup(local_name);
132 }
133 
mangle_and_find(struct vtn_builder * b,const char * name,uint32_t const_mask,uint32_t num_srcs,struct vtn_type ** src_types)134 static nir_function *mangle_and_find(struct vtn_builder *b,
135                                      const char *name,
136                                      uint32_t const_mask,
137                                      uint32_t num_srcs,
138                                      struct vtn_type **src_types)
139 {
140    char *mname;
141    nir_function *found = NULL;
142 
143    vtn_opencl_mangle(name, const_mask, num_srcs, src_types, &mname);
144    /* try and find in current shader first. */
145    nir_foreach_function(funcs, b->shader) {
146       if (!strcmp(funcs->name, mname)) {
147          found = funcs;
148          break;
149       }
150    }
151    /* if not found here find in clc shader and create a decl mirroring it */
152    if (!found && b->options->clc_shader && b->options->clc_shader != b->shader) {
153       nir_foreach_function(funcs, b->options->clc_shader) {
154          if (!strcmp(funcs->name, mname)) {
155             found = funcs;
156             break;
157          }
158       }
159       if (found) {
160          nir_function *decl = nir_function_create(b->shader, mname);
161          decl->num_params = found->num_params;
162          decl->params = ralloc_array(b->shader, nir_parameter, decl->num_params);
163          for (unsigned i = 0; i < decl->num_params; i++) {
164             decl->params[i] = found->params[i];
165          }
166          found = decl;
167       }
168    }
169    if (!found)
170       vtn_fail("Can't find clc function %s\n", mname);
171    free(mname);
172    return found;
173 }
174 
call_mangled_function(struct vtn_builder * b,const char * name,uint32_t const_mask,uint32_t num_srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type,nir_ssa_def ** srcs,nir_deref_instr ** ret_deref_ptr)175 static bool call_mangled_function(struct vtn_builder *b,
176                                   const char *name,
177                                   uint32_t const_mask,
178                                   uint32_t num_srcs,
179                                   struct vtn_type **src_types,
180                                   const struct vtn_type *dest_type,
181                                   nir_ssa_def **srcs,
182                                   nir_deref_instr **ret_deref_ptr)
183 {
184    nir_function *found = mangle_and_find(b, name, const_mask, num_srcs, src_types);
185    if (!found)
186       return false;
187 
188    nir_call_instr *call = nir_call_instr_create(b->shader, found);
189 
190    nir_deref_instr *ret_deref = NULL;
191    uint32_t param_idx = 0;
192    if (dest_type) {
193       nir_variable *ret_tmp = nir_local_variable_create(b->nb.impl,
194                                                         glsl_get_bare_type(dest_type->type),
195                                                         "return_tmp");
196       ret_deref = nir_build_deref_var(&b->nb, ret_tmp);
197       call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa);
198    }
199 
200    for (unsigned i = 0; i < num_srcs; i++)
201       call->params[param_idx++] = nir_src_for_ssa(srcs[i]);
202    nir_builder_instr_insert(&b->nb, &call->instr);
203 
204    *ret_deref_ptr = ret_deref;
205    return true;
206 }
207 
208 static void
handle_instr(struct vtn_builder * b,uint32_t opcode,const uint32_t * w_src,unsigned num_srcs,const uint32_t * w_dest,nir_handler handler)209 handle_instr(struct vtn_builder *b, uint32_t opcode,
210              const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest, nir_handler handler)
211 {
212    struct vtn_type *dest_type = w_dest ? vtn_get_type(b, w_dest[0]) : NULL;
213 
214    nir_ssa_def *srcs[5] = { NULL };
215    struct vtn_type *src_types[5] = { NULL };
216    vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
217    for (unsigned i = 0; i < num_srcs; i++) {
218       struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
219       struct vtn_ssa_value *ssa = vtn_ssa_value(b, w_src[i]);
220       srcs[i] = ssa->def;
221       src_types[i] = val->type;
222    }
223 
224    nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, src_types, dest_type);
225    if (result) {
226       vtn_push_nir_ssa(b, w_dest[1], result);
227    } else {
228       vtn_assert(dest_type == NULL);
229    }
230 }
231 
232 static nir_op
nir_alu_op_for_opencl_opcode(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode)233 nir_alu_op_for_opencl_opcode(struct vtn_builder *b,
234                              enum OpenCLstd_Entrypoints opcode)
235 {
236    switch (opcode) {
237    case OpenCLstd_Fabs: return nir_op_fabs;
238    case OpenCLstd_SAbs: return nir_op_iabs;
239    case OpenCLstd_SAdd_sat: return nir_op_iadd_sat;
240    case OpenCLstd_UAdd_sat: return nir_op_uadd_sat;
241    case OpenCLstd_Ceil: return nir_op_fceil;
242    case OpenCLstd_Floor: return nir_op_ffloor;
243    case OpenCLstd_SHadd: return nir_op_ihadd;
244    case OpenCLstd_UHadd: return nir_op_uhadd;
245    case OpenCLstd_Fmax: return nir_op_fmax;
246    case OpenCLstd_SMax: return nir_op_imax;
247    case OpenCLstd_UMax: return nir_op_umax;
248    case OpenCLstd_Fmin: return nir_op_fmin;
249    case OpenCLstd_SMin: return nir_op_imin;
250    case OpenCLstd_UMin: return nir_op_umin;
251    case OpenCLstd_Mix: return nir_op_flrp;
252    case OpenCLstd_Native_cos: return nir_op_fcos;
253    case OpenCLstd_Native_divide: return nir_op_fdiv;
254    case OpenCLstd_Native_exp2: return nir_op_fexp2;
255    case OpenCLstd_Native_log2: return nir_op_flog2;
256    case OpenCLstd_Native_powr: return nir_op_fpow;
257    case OpenCLstd_Native_recip: return nir_op_frcp;
258    case OpenCLstd_Native_rsqrt: return nir_op_frsq;
259    case OpenCLstd_Native_sin: return nir_op_fsin;
260    case OpenCLstd_Native_sqrt: return nir_op_fsqrt;
261    case OpenCLstd_SMul_hi: return nir_op_imul_high;
262    case OpenCLstd_UMul_hi: return nir_op_umul_high;
263    case OpenCLstd_Popcount: return nir_op_bit_count;
264    case OpenCLstd_SRhadd: return nir_op_irhadd;
265    case OpenCLstd_URhadd: return nir_op_urhadd;
266    case OpenCLstd_Rsqrt: return nir_op_frsq;
267    case OpenCLstd_Sign: return nir_op_fsign;
268    case OpenCLstd_Sqrt: return nir_op_fsqrt;
269    case OpenCLstd_SSub_sat: return nir_op_isub_sat;
270    case OpenCLstd_USub_sat: return nir_op_usub_sat;
271    case OpenCLstd_Trunc: return nir_op_ftrunc;
272    case OpenCLstd_Rint: return nir_op_fround_even;
273    case OpenCLstd_Half_divide: return nir_op_fdiv;
274    case OpenCLstd_Half_recip: return nir_op_frcp;
275    /* uhm... */
276    case OpenCLstd_UAbs: return nir_op_mov;
277    default:
278       vtn_fail("No NIR equivalent");
279    }
280 }
281 
282 static nir_ssa_def *
handle_alu(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)283 handle_alu(struct vtn_builder *b, uint32_t opcode,
284            unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
285            const struct vtn_type *dest_type)
286 {
287    nir_ssa_def *ret = nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, (enum OpenCLstd_Entrypoints)opcode),
288                                     srcs[0], srcs[1], srcs[2], NULL);
289    if (opcode == OpenCLstd_Popcount)
290       ret = nir_u2u(&b->nb, ret, glsl_get_bit_size(dest_type->type));
291    return ret;
292 }
293 
294 #define REMAP(op, str) [OpenCLstd_##op] = { str }
295 static const struct {
296    const char *fn;
297 } remap_table[] = {
298    REMAP(Distance, "distance"),
299    REMAP(Fast_distance, "fast_distance"),
300    REMAP(Fast_length, "fast_length"),
301    REMAP(Fast_normalize, "fast_normalize"),
302    REMAP(Half_rsqrt, "half_rsqrt"),
303    REMAP(Half_sqrt, "half_sqrt"),
304    REMAP(Length, "length"),
305    REMAP(Normalize, "normalize"),
306    REMAP(Degrees, "degrees"),
307    REMAP(Radians, "radians"),
308    REMAP(Rotate, "rotate"),
309    REMAP(Smoothstep, "smoothstep"),
310    REMAP(Step, "step"),
311 
312    REMAP(Pow, "pow"),
313    REMAP(Pown, "pown"),
314    REMAP(Powr, "powr"),
315    REMAP(Rootn, "rootn"),
316    REMAP(Modf, "modf"),
317 
318    REMAP(Acos, "acos"),
319    REMAP(Acosh, "acosh"),
320    REMAP(Acospi, "acospi"),
321    REMAP(Asin, "asin"),
322    REMAP(Asinh, "asinh"),
323    REMAP(Asinpi, "asinpi"),
324    REMAP(Atan, "atan"),
325    REMAP(Atan2, "atan2"),
326    REMAP(Atanh, "atanh"),
327    REMAP(Atanpi, "atanpi"),
328    REMAP(Atan2pi, "atan2pi"),
329    REMAP(Cos, "cos"),
330    REMAP(Cosh, "cosh"),
331    REMAP(Cospi, "cospi"),
332    REMAP(Sin, "sin"),
333    REMAP(Sinh, "sinh"),
334    REMAP(Sinpi, "sinpi"),
335    REMAP(Tan, "tan"),
336    REMAP(Tanh, "tanh"),
337    REMAP(Tanpi, "tanpi"),
338    REMAP(Sincos, "sincos"),
339    REMAP(Fract, "fract"),
340    REMAP(Frexp, "frexp"),
341    REMAP(Fma, "fma"),
342    REMAP(Fmod, "fmod"),
343 
344    REMAP(Half_cos, "cos"),
345    REMAP(Half_exp, "exp"),
346    REMAP(Half_exp2, "exp2"),
347    REMAP(Half_exp10, "exp10"),
348    REMAP(Half_log, "log"),
349    REMAP(Half_log2, "log2"),
350    REMAP(Half_log10, "log10"),
351    REMAP(Half_powr, "powr"),
352    REMAP(Half_sin, "sin"),
353    REMAP(Half_tan, "tan"),
354 
355    REMAP(Remainder, "remainder"),
356    REMAP(Remquo, "remquo"),
357    REMAP(Hypot, "hypot"),
358    REMAP(Exp, "exp"),
359    REMAP(Exp2, "exp2"),
360    REMAP(Exp10, "exp10"),
361    REMAP(Expm1, "expm1"),
362    REMAP(Ldexp, "ldexp"),
363 
364    REMAP(Ilogb, "ilogb"),
365    REMAP(Log, "log"),
366    REMAP(Log2, "log2"),
367    REMAP(Log10, "log10"),
368    REMAP(Log1p, "log1p"),
369    REMAP(Logb, "logb"),
370 
371    REMAP(Cbrt, "cbrt"),
372    REMAP(Erfc, "erfc"),
373    REMAP(Erf, "erf"),
374 
375    REMAP(Lgamma, "lgamma"),
376    REMAP(Lgamma_r, "lgamma_r"),
377    REMAP(Tgamma, "tgamma"),
378 
379    REMAP(UMad_sat, "mad_sat"),
380    REMAP(SMad_sat, "mad_sat"),
381 
382    REMAP(Shuffle, "shuffle"),
383    REMAP(Shuffle2, "shuffle2"),
384 };
385 #undef REMAP
386 
remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)387 static const char *remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)
388 {
389    if (opcode >= (sizeof(remap_table) / sizeof(const char *)))
390       return NULL;
391    return remap_table[opcode].fn;
392 }
393 
394 static struct vtn_type *
get_vtn_type_for_glsl_type(struct vtn_builder * b,const struct glsl_type * type)395 get_vtn_type_for_glsl_type(struct vtn_builder *b, const struct glsl_type *type)
396 {
397    struct vtn_type *ret = rzalloc(b, struct vtn_type);
398    assert(glsl_type_is_vector_or_scalar(type));
399    ret->type = type;
400    ret->length = glsl_get_vector_elements(type);
401    ret->base_type = glsl_type_is_vector(type) ? vtn_base_type_vector : vtn_base_type_scalar;
402    return ret;
403 }
404 
405 static struct vtn_type *
get_pointer_type(struct vtn_builder * b,struct vtn_type * t,SpvStorageClass storage_class)406 get_pointer_type(struct vtn_builder *b, struct vtn_type *t, SpvStorageClass storage_class)
407 {
408    struct vtn_type *ret = rzalloc(b, struct vtn_type);
409    ret->type = nir_address_format_to_glsl_type(
410             vtn_mode_to_address_format(
411                b, vtn_storage_class_to_mode(b, storage_class, NULL, NULL)));
412    ret->base_type = vtn_base_type_pointer;
413    ret->storage_class = storage_class;
414    ret->deref = t;
415    return ret;
416 }
417 
418 static struct vtn_type *
get_signed_type(struct vtn_builder * b,struct vtn_type * t)419 get_signed_type(struct vtn_builder *b, struct vtn_type *t)
420 {
421    if (t->base_type == vtn_base_type_pointer) {
422       return get_pointer_type(b, get_signed_type(b, t->deref), t->storage_class);
423    }
424    return get_vtn_type_for_glsl_type(
425       b, glsl_vector_type(glsl_signed_base_type_of(glsl_get_base_type(t->type)),
426                           glsl_get_vector_elements(t->type)));
427 }
428 
429 static nir_ssa_def *
handle_clc_fn(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,int num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)430 handle_clc_fn(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
431               int num_srcs,
432               nir_ssa_def **srcs,
433               struct vtn_type **src_types,
434               const struct vtn_type *dest_type)
435 {
436    const char *name = remap_clc_opcode(opcode);
437    if (!name)
438        return NULL;
439 
440    /* Some functions which take params end up with uint (or pointer-to-uint) being passed,
441     * which doesn't mangle correctly when the function expects int or pointer-to-int.
442     * See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_unsignedsigned_a_unsigned_versus_signed_integers
443     */
444    int signed_param = -1;
445    switch (opcode) {
446    case OpenCLstd_Frexp:
447    case OpenCLstd_Lgamma_r:
448    case OpenCLstd_Pown:
449    case OpenCLstd_Rootn:
450    case OpenCLstd_Ldexp:
451       signed_param = 1;
452       break;
453    case OpenCLstd_Remquo:
454       signed_param = 2;
455       break;
456    case OpenCLstd_SMad_sat: {
457       /* All parameters need to be converted to signed */
458       src_types[0] = src_types[1] = src_types[2] = get_signed_type(b, src_types[0]);
459       break;
460    }
461    default: break;
462    }
463 
464    if (signed_param >= 0) {
465       src_types[signed_param] = get_signed_type(b, src_types[signed_param]);
466    }
467 
468    nir_deref_instr *ret_deref = NULL;
469 
470    if (!call_mangled_function(b, name, 0, num_srcs, src_types,
471                               dest_type, srcs, &ret_deref))
472       return NULL;
473 
474    return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
475 }
476 
477 static nir_ssa_def *
handle_special(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)478 handle_special(struct vtn_builder *b, uint32_t opcode,
479                unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
480                const struct vtn_type *dest_type)
481 {
482    nir_builder *nb = &b->nb;
483    enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints)opcode;
484 
485    switch (cl_opcode) {
486    case OpenCLstd_SAbs_diff:
487      /* these works easier in direct NIR */
488       return nir_iabs_diff(nb, srcs[0], srcs[1]);
489    case OpenCLstd_UAbs_diff:
490       return nir_uabs_diff(nb, srcs[0], srcs[1]);
491    case OpenCLstd_Bitselect:
492       return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]);
493    case OpenCLstd_SMad_hi:
494       return nir_imad_hi(nb, srcs[0], srcs[1], srcs[2]);
495    case OpenCLstd_UMad_hi:
496       return nir_umad_hi(nb, srcs[0], srcs[1], srcs[2]);
497    case OpenCLstd_SMul24:
498       return nir_imul24_relaxed(nb, srcs[0], srcs[1]);
499    case OpenCLstd_UMul24:
500       return nir_umul24_relaxed(nb, srcs[0], srcs[1]);
501    case OpenCLstd_SMad24:
502       return nir_iadd(nb, nir_imul24_relaxed(nb, srcs[0], srcs[1]), srcs[2]);
503    case OpenCLstd_UMad24:
504       return nir_umad24_relaxed(nb, srcs[0], srcs[1], srcs[2]);
505    case OpenCLstd_FClamp:
506       return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]);
507    case OpenCLstd_SClamp:
508       return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]);
509    case OpenCLstd_UClamp:
510       return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]);
511    case OpenCLstd_Copysign:
512       return nir_copysign(nb, srcs[0], srcs[1]);
513    case OpenCLstd_Cross:
514       if (dest_type->length == 4)
515          return nir_cross4(nb, srcs[0], srcs[1]);
516       return nir_cross3(nb, srcs[0], srcs[1]);
517    case OpenCLstd_Fdim:
518       return nir_fdim(nb, srcs[0], srcs[1]);
519    case OpenCLstd_Fmod:
520       if (nb->shader->options->lower_fmod)
521          break;
522       return nir_fmod(nb, srcs[0], srcs[1]);
523    case OpenCLstd_Mad:
524       return nir_fmad(nb, srcs[0], srcs[1], srcs[2]);
525    case OpenCLstd_Maxmag:
526       return nir_maxmag(nb, srcs[0], srcs[1]);
527    case OpenCLstd_Minmag:
528       return nir_minmag(nb, srcs[0], srcs[1]);
529    case OpenCLstd_Nan:
530       return nir_nan(nb, srcs[0]);
531    case OpenCLstd_Nextafter:
532       return nir_nextafter(nb, srcs[0], srcs[1]);
533    case OpenCLstd_Normalize:
534       return nir_normalize(nb, srcs[0]);
535    case OpenCLstd_Clz:
536       return nir_clz_u(nb, srcs[0]);
537    case OpenCLstd_Ctz:
538       return nir_ctz_u(nb, srcs[0]);
539    case OpenCLstd_Select:
540       return nir_select(nb, srcs[0], srcs[1], srcs[2]);
541    case OpenCLstd_S_Upsample:
542    case OpenCLstd_U_Upsample:
543       /* SPIR-V and CL have different defs for upsample, just implement in nir */
544       return nir_upsample(nb, srcs[0], srcs[1]);
545    case OpenCLstd_Native_exp:
546       return nir_fexp(nb, srcs[0]);
547    case OpenCLstd_Native_exp10:
548       return nir_fexp2(nb, nir_fmul_imm(nb, srcs[0], log(10) / log(2)));
549    case OpenCLstd_Native_log:
550       return nir_flog(nb, srcs[0]);
551    case OpenCLstd_Native_log10:
552       return nir_fmul_imm(nb, nir_flog2(nb, srcs[0]), log(2) / log(10));
553    case OpenCLstd_Native_tan:
554       return nir_ftan(nb, srcs[0]);
555    case OpenCLstd_Ldexp:
556       if (nb->shader->options->lower_ldexp)
557          break;
558       return nir_ldexp(nb, srcs[0], srcs[1]);
559    case OpenCLstd_Fma:
560       /* FIXME: the software implementation only supports fp32 for now. */
561       if (nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32)
562          break;
563       return nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
564    default:
565       break;
566    }
567 
568    nir_ssa_def *ret = handle_clc_fn(b, opcode, num_srcs, srcs, src_types, dest_type);
569    if (!ret)
570       vtn_fail("No NIR equivalent");
571 
572    return ret;
573 }
574 
575 static nir_ssa_def *
handle_core(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)576 handle_core(struct vtn_builder *b, uint32_t opcode,
577             unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
578             const struct vtn_type *dest_type)
579 {
580    nir_deref_instr *ret_deref = NULL;
581 
582    switch ((SpvOp)opcode) {
583    case SpvOpGroupAsyncCopy: {
584       /* Libclc doesn't include 3-component overloads of the async copy functions.
585        * However, the CLC spec says:
586        * async_work_group_copy and async_work_group_strided_copy for 3-component vector types
587        * behave as async_work_group_copy and async_work_group_strided_copy respectively for 4-component
588        * vector types
589        */
590       for (unsigned i = 0; i < num_srcs; ++i) {
591          if (src_types[i]->base_type == vtn_base_type_pointer &&
592              src_types[i]->deref->base_type == vtn_base_type_vector &&
593              src_types[i]->deref->length == 3) {
594             src_types[i] =
595                get_pointer_type(b,
596                                 get_vtn_type_for_glsl_type(b, glsl_replace_vector_type(src_types[i]->deref->type, 4)),
597                                 src_types[i]->storage_class);
598          }
599       }
600       if (!call_mangled_function(b, "async_work_group_strided_copy", (1 << 1), num_srcs, src_types, dest_type, srcs, &ret_deref))
601          return NULL;
602       break;
603    }
604    case SpvOpGroupWaitEvents: {
605       src_types[0] = get_vtn_type_for_glsl_type(b, glsl_int_type());
606       if (!call_mangled_function(b, "wait_group_events", 0, num_srcs, src_types, dest_type, srcs, &ret_deref))
607          return NULL;
608       break;
609    }
610    default:
611       return NULL;
612    }
613 
614    return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
615 }
616 
617 
618 static void
_handle_v_load_store(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count,bool load,bool vec_aligned,nir_rounding_mode rounding)619 _handle_v_load_store(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
620                      const uint32_t *w, unsigned count, bool load,
621                      bool vec_aligned, nir_rounding_mode rounding)
622 {
623    struct vtn_type *type;
624    if (load)
625       type = vtn_get_type(b, w[1]);
626    else
627       type = vtn_get_value_type(b, w[5]);
628    unsigned a = load ? 0 : 1;
629 
630    enum glsl_base_type base_type = glsl_get_base_type(type->type);
631    unsigned components = glsl_get_vector_elements(type->type);
632 
633    nir_ssa_def *offset = vtn_get_nir_ssa(b, w[5 + a]);
634    struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer);
635 
636    struct vtn_ssa_value *comps[NIR_MAX_VEC_COMPONENTS];
637    nir_ssa_def *ncomps[NIR_MAX_VEC_COMPONENTS];
638 
639    nir_ssa_def *moffset = nir_imul_imm(&b->nb, offset,
640       (vec_aligned && components == 3) ? 4 : components);
641    nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer);
642 
643    unsigned alignment = vec_aligned ? glsl_get_cl_alignment(type->type) :
644                                       glsl_get_bit_size(type->type) / 8;
645    enum glsl_base_type ptr_base_type =
646       glsl_get_base_type(p->pointer->type->type);
647    if (base_type != ptr_base_type) {
648       vtn_fail_if(ptr_base_type != GLSL_TYPE_FLOAT16 ||
649                   (base_type != GLSL_TYPE_FLOAT &&
650                    base_type != GLSL_TYPE_DOUBLE),
651                   "vload/vstore cannot do type conversion. "
652                   "vload/vstore_half can only convert from half to other "
653                   "floating-point types.");
654 
655       /* Above-computed alignment was for floats/doubles, not halves */
656       alignment /= glsl_get_bit_size(type->type) / glsl_base_type_get_bit_size(ptr_base_type);
657    }
658 
659    deref = nir_alignment_deref_cast(&b->nb, deref, alignment, 0);
660 
661    for (int i = 0; i < components; i++) {
662       nir_ssa_def *coffset = nir_iadd_imm(&b->nb, moffset, i);
663       nir_deref_instr *arr_deref = nir_build_deref_ptr_as_array(&b->nb, deref, coffset);
664 
665       if (load) {
666          comps[i] = vtn_local_load(b, arr_deref, p->type->access);
667          ncomps[i] = comps[i]->def;
668          if (base_type != ptr_base_type) {
669             assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
670                    (base_type == GLSL_TYPE_FLOAT ||
671                     base_type == GLSL_TYPE_DOUBLE));
672             ncomps[i] = nir_f2fN(&b->nb, ncomps[i],
673                                  glsl_base_type_get_bit_size(base_type));
674          }
675       } else {
676          struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
677          struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]);
678          ssa->def = nir_channel(&b->nb, val->def, i);
679          if (base_type != ptr_base_type) {
680             assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
681                    (base_type == GLSL_TYPE_FLOAT ||
682                     base_type == GLSL_TYPE_DOUBLE));
683             if (rounding == nir_rounding_mode_undef) {
684                ssa->def = nir_f2f16(&b->nb, ssa->def);
685             } else {
686                ssa->def = nir_convert_alu_types(&b->nb, 16, ssa->def,
687                                                 nir_type_float | ssa->def->bit_size,
688                                                 nir_type_float16,
689                                                 rounding, false);
690             }
691          }
692          vtn_local_store(b, ssa, arr_deref, p->type->access);
693       }
694    }
695    if (load) {
696       vtn_push_nir_ssa(b, w[2], nir_vec(&b->nb, ncomps, components));
697    }
698 }
699 
700 static void
vtn_handle_opencl_vload(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)701 vtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
702                         const uint32_t *w, unsigned count)
703 {
704    _handle_v_load_store(b, opcode, w, count, true,
705                         opcode == OpenCLstd_Vloada_halfn,
706                         nir_rounding_mode_undef);
707 }
708 
709 static void
vtn_handle_opencl_vstore(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)710 vtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
711                          const uint32_t *w, unsigned count)
712 {
713    _handle_v_load_store(b, opcode, w, count, false,
714                         opcode == OpenCLstd_Vstorea_halfn,
715                         nir_rounding_mode_undef);
716 }
717 
718 static void
vtn_handle_opencl_vstore_half_r(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)719 vtn_handle_opencl_vstore_half_r(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
720                                 const uint32_t *w, unsigned count)
721 {
722    _handle_v_load_store(b, opcode, w, count, false,
723                         opcode == OpenCLstd_Vstorea_halfn_r,
724                         vtn_rounding_mode_to_nir(b, w[8]));
725 }
726 
727 static unsigned
vtn_add_printf_string(struct vtn_builder * b,uint32_t id,nir_printf_info * info)728 vtn_add_printf_string(struct vtn_builder *b, uint32_t id, nir_printf_info *info)
729 {
730    nir_deref_instr *deref = vtn_nir_deref(b, id);
731 
732    while (deref && deref->deref_type != nir_deref_type_var)
733       deref = nir_deref_instr_parent(deref);
734 
735    vtn_fail_if(deref == NULL || !nir_deref_mode_is(deref, nir_var_mem_constant),
736                "Printf string argument must be a pointer to a constant variable");
737    vtn_fail_if(deref->var->constant_initializer == NULL,
738                "Printf string argument must have an initializer");
739    vtn_fail_if(!glsl_type_is_array(deref->var->type),
740                "Printf string must be an char array");
741    const struct glsl_type *char_type = glsl_get_array_element(deref->var->type);
742    vtn_fail_if(char_type != glsl_uint8_t_type() &&
743                char_type != glsl_int8_t_type(),
744                "Printf string must be an char array");
745 
746    nir_constant *c = deref->var->constant_initializer;
747    assert(c->num_elements == glsl_get_length(deref->var->type));
748 
749    unsigned idx = info->string_size;
750    info->strings = reralloc_size(b->shader, info->strings,
751                                  idx + c->num_elements);
752    info->string_size += c->num_elements;
753 
754    char *str = &info->strings[idx];
755    bool found_null = false;
756    for (unsigned i = 0; i < c->num_elements; i++) {
757       memcpy((char *)str + i, c->elements[i]->values, 1);
758       if (str[i] == '\0')
759          found_null = true;
760    }
761    vtn_fail_if(!found_null, "Printf string must be null terminated");
762    return idx;
763 }
764 
765 /* printf is special because there are no limits on args */
766 static void
handle_printf(struct vtn_builder * b,uint32_t opcode,const uint32_t * w_src,unsigned num_srcs,const uint32_t * w_dest)767 handle_printf(struct vtn_builder *b, uint32_t opcode,
768               const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest)
769 {
770    if (!b->options->caps.printf) {
771       vtn_push_nir_ssa(b, w_dest[1], nir_imm_int(&b->nb, -1));
772       return;
773    }
774 
775    /* Step 1. extract the format string */
776 
777    /*
778     * info_idx is 1-based to match clover/llvm
779     * the backend indexes the info table at info_idx - 1.
780     */
781    b->shader->printf_info_count++;
782    unsigned info_idx = b->shader->printf_info_count;
783 
784    b->shader->printf_info = reralloc(b->shader, b->shader->printf_info,
785                                      nir_printf_info, info_idx);
786    nir_printf_info *info = &b->shader->printf_info[info_idx - 1];
787 
788    info->strings = NULL;
789    info->string_size = 0;
790 
791    vtn_add_printf_string(b, w_src[0], info);
792 
793    info->num_args = num_srcs - 1;
794    info->arg_sizes = ralloc_array(b->shader, unsigned, info->num_args);
795 
796    /* Step 2, build an ad-hoc struct type out of the args */
797    unsigned field_offset = 0;
798    struct glsl_struct_field *fields =
799       rzalloc_array(b, struct glsl_struct_field, num_srcs - 1);
800    for (unsigned i = 1; i < num_srcs; ++i) {
801       struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
802       struct vtn_type *src_type = val->type;
803       fields[i - 1].type = src_type->type;
804       fields[i - 1].name = ralloc_asprintf(b->shader, "arg_%u", i);
805       field_offset = align(field_offset, 4);
806       fields[i - 1].offset = field_offset;
807       info->arg_sizes[i - 1] = glsl_get_cl_size(src_type->type);
808       field_offset += glsl_get_cl_size(src_type->type);
809    }
810    const struct glsl_type *struct_type =
811       glsl_struct_type(fields, num_srcs - 1, "printf", true);
812 
813    /* Step 3, create a variable of that type and populate its fields */
814    nir_variable *var = nir_local_variable_create(b->func->nir_func->impl,
815                                                  struct_type, NULL);
816    nir_deref_instr *deref_var = nir_build_deref_var(&b->nb, var);
817    size_t fmt_pos = 0;
818    for (unsigned i = 1; i < num_srcs; ++i) {
819       nir_deref_instr *field_deref =
820          nir_build_deref_struct(&b->nb, deref_var, i - 1);
821       nir_ssa_def *field_src = vtn_ssa_value(b, w_src[i])->def;
822       /* extract strings */
823       fmt_pos = util_printf_next_spec_pos(info->strings, fmt_pos);
824       if (fmt_pos != -1 && info->strings[fmt_pos] == 's') {
825          unsigned idx = vtn_add_printf_string(b, w_src[i], info);
826          nir_store_deref(&b->nb, field_deref,
827                          nir_imm_intN_t(&b->nb, idx, field_src->bit_size),
828                          ~0 /* write_mask */);
829       } else
830          nir_store_deref(&b->nb, field_deref, field_src, ~0);
831    }
832 
833    /* Lastly, the actual intrinsic */
834    nir_ssa_def *fmt_idx = nir_imm_int(&b->nb, info_idx);
835    nir_ssa_def *ret = nir_printf(&b->nb, fmt_idx, &deref_var->dest.ssa);
836    vtn_push_nir_ssa(b, w_dest[1], ret);
837 }
838 
839 static nir_ssa_def *
handle_round(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)840 handle_round(struct vtn_builder *b, uint32_t opcode,
841              unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
842              const struct vtn_type *dest_type)
843 {
844    nir_ssa_def *src = srcs[0];
845    nir_builder *nb = &b->nb;
846    nir_ssa_def *half = nir_imm_floatN_t(nb, 0.5, src->bit_size);
847    nir_ssa_def *truncated = nir_ftrunc(nb, src);
848    nir_ssa_def *remainder = nir_fsub(nb, src, truncated);
849 
850    return nir_bcsel(nb, nir_fge(nb, nir_fabs(nb, remainder), half),
851                     nir_fadd(nb, truncated, nir_fsign(nb, src)), truncated);
852 }
853 
854 static nir_ssa_def *
handle_shuffle(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)855 handle_shuffle(struct vtn_builder *b, uint32_t opcode,
856                unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
857                const struct vtn_type *dest_type)
858 {
859    struct nir_ssa_def *input = srcs[0];
860    struct nir_ssa_def *mask = srcs[1];
861 
862    unsigned out_elems = dest_type->length;
863    nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
864    unsigned in_elems = input->num_components;
865    if (mask->bit_size != 32)
866       mask = nir_u2u32(&b->nb, mask);
867    mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, in_elems - 1, mask->bit_size));
868    for (unsigned i = 0; i < out_elems; i++)
869       outres[i] = nir_vector_extract(&b->nb, input, nir_channel(&b->nb, mask, i));
870 
871    return nir_vec(&b->nb, outres, out_elems);
872 }
873 
874 static nir_ssa_def *
handle_shuffle2(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)875 handle_shuffle2(struct vtn_builder *b, uint32_t opcode,
876                 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
877                 const struct vtn_type *dest_type)
878 {
879    struct nir_ssa_def *input0 = srcs[0];
880    struct nir_ssa_def *input1 = srcs[1];
881    struct nir_ssa_def *mask = srcs[2];
882 
883    unsigned out_elems = dest_type->length;
884    nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
885    unsigned in_elems = input0->num_components;
886    unsigned total_mask = 2 * in_elems - 1;
887    unsigned half_mask = in_elems - 1;
888    if (mask->bit_size != 32)
889       mask = nir_u2u32(&b->nb, mask);
890    mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, total_mask, mask->bit_size));
891    for (unsigned i = 0; i < out_elems; i++) {
892       nir_ssa_def *this_mask = nir_channel(&b->nb, mask, i);
893       nir_ssa_def *vmask = nir_iand(&b->nb, this_mask, nir_imm_intN_t(&b->nb, half_mask, mask->bit_size));
894       nir_ssa_def *val0 = nir_vector_extract(&b->nb, input0, vmask);
895       nir_ssa_def *val1 = nir_vector_extract(&b->nb, input1, vmask);
896       nir_ssa_def *sel = nir_ilt(&b->nb, this_mask, nir_imm_intN_t(&b->nb, in_elems, mask->bit_size));
897       outres[i] = nir_bcsel(&b->nb, sel, val0, val1);
898    }
899    return nir_vec(&b->nb, outres, out_elems);
900 }
901 
902 bool
vtn_handle_opencl_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)903 vtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode,
904                               const uint32_t *w, unsigned count)
905 {
906    enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints) ext_opcode;
907 
908    switch (cl_opcode) {
909    case OpenCLstd_Fabs:
910    case OpenCLstd_SAbs:
911    case OpenCLstd_UAbs:
912    case OpenCLstd_SAdd_sat:
913    case OpenCLstd_UAdd_sat:
914    case OpenCLstd_Ceil:
915    case OpenCLstd_Floor:
916    case OpenCLstd_Fmax:
917    case OpenCLstd_SHadd:
918    case OpenCLstd_UHadd:
919    case OpenCLstd_SMax:
920    case OpenCLstd_UMax:
921    case OpenCLstd_Fmin:
922    case OpenCLstd_SMin:
923    case OpenCLstd_UMin:
924    case OpenCLstd_Mix:
925    case OpenCLstd_Native_cos:
926    case OpenCLstd_Native_divide:
927    case OpenCLstd_Native_exp2:
928    case OpenCLstd_Native_log2:
929    case OpenCLstd_Native_powr:
930    case OpenCLstd_Native_recip:
931    case OpenCLstd_Native_rsqrt:
932    case OpenCLstd_Native_sin:
933    case OpenCLstd_Native_sqrt:
934    case OpenCLstd_SMul_hi:
935    case OpenCLstd_UMul_hi:
936    case OpenCLstd_Popcount:
937    case OpenCLstd_SRhadd:
938    case OpenCLstd_URhadd:
939    case OpenCLstd_Rsqrt:
940    case OpenCLstd_Sign:
941    case OpenCLstd_Sqrt:
942    case OpenCLstd_SSub_sat:
943    case OpenCLstd_USub_sat:
944    case OpenCLstd_Trunc:
945    case OpenCLstd_Rint:
946    case OpenCLstd_Half_divide:
947    case OpenCLstd_Half_recip:
948       handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_alu);
949       return true;
950    case OpenCLstd_SAbs_diff:
951    case OpenCLstd_UAbs_diff:
952    case OpenCLstd_SMad_hi:
953    case OpenCLstd_UMad_hi:
954    case OpenCLstd_SMad24:
955    case OpenCLstd_UMad24:
956    case OpenCLstd_SMul24:
957    case OpenCLstd_UMul24:
958    case OpenCLstd_Bitselect:
959    case OpenCLstd_FClamp:
960    case OpenCLstd_SClamp:
961    case OpenCLstd_UClamp:
962    case OpenCLstd_Copysign:
963    case OpenCLstd_Cross:
964    case OpenCLstd_Degrees:
965    case OpenCLstd_Fdim:
966    case OpenCLstd_Fma:
967    case OpenCLstd_Distance:
968    case OpenCLstd_Fast_distance:
969    case OpenCLstd_Fast_length:
970    case OpenCLstd_Fast_normalize:
971    case OpenCLstd_Half_rsqrt:
972    case OpenCLstd_Half_sqrt:
973    case OpenCLstd_Length:
974    case OpenCLstd_Mad:
975    case OpenCLstd_Maxmag:
976    case OpenCLstd_Minmag:
977    case OpenCLstd_Nan:
978    case OpenCLstd_Nextafter:
979    case OpenCLstd_Normalize:
980    case OpenCLstd_Radians:
981    case OpenCLstd_Rotate:
982    case OpenCLstd_Select:
983    case OpenCLstd_Step:
984    case OpenCLstd_Smoothstep:
985    case OpenCLstd_S_Upsample:
986    case OpenCLstd_U_Upsample:
987    case OpenCLstd_Clz:
988    case OpenCLstd_Ctz:
989    case OpenCLstd_Native_exp:
990    case OpenCLstd_Native_exp10:
991    case OpenCLstd_Native_log:
992    case OpenCLstd_Native_log10:
993    case OpenCLstd_Acos:
994    case OpenCLstd_Acosh:
995    case OpenCLstd_Acospi:
996    case OpenCLstd_Asin:
997    case OpenCLstd_Asinh:
998    case OpenCLstd_Asinpi:
999    case OpenCLstd_Atan:
1000    case OpenCLstd_Atan2:
1001    case OpenCLstd_Atanh:
1002    case OpenCLstd_Atanpi:
1003    case OpenCLstd_Atan2pi:
1004    case OpenCLstd_Fract:
1005    case OpenCLstd_Frexp:
1006    case OpenCLstd_Exp:
1007    case OpenCLstd_Exp2:
1008    case OpenCLstd_Expm1:
1009    case OpenCLstd_Exp10:
1010    case OpenCLstd_Fmod:
1011    case OpenCLstd_Ilogb:
1012    case OpenCLstd_Log:
1013    case OpenCLstd_Log2:
1014    case OpenCLstd_Log10:
1015    case OpenCLstd_Log1p:
1016    case OpenCLstd_Logb:
1017    case OpenCLstd_Ldexp:
1018    case OpenCLstd_Cos:
1019    case OpenCLstd_Cosh:
1020    case OpenCLstd_Cospi:
1021    case OpenCLstd_Sin:
1022    case OpenCLstd_Sinh:
1023    case OpenCLstd_Sinpi:
1024    case OpenCLstd_Tan:
1025    case OpenCLstd_Tanh:
1026    case OpenCLstd_Tanpi:
1027    case OpenCLstd_Cbrt:
1028    case OpenCLstd_Erfc:
1029    case OpenCLstd_Erf:
1030    case OpenCLstd_Lgamma:
1031    case OpenCLstd_Lgamma_r:
1032    case OpenCLstd_Tgamma:
1033    case OpenCLstd_Pow:
1034    case OpenCLstd_Powr:
1035    case OpenCLstd_Pown:
1036    case OpenCLstd_Rootn:
1037    case OpenCLstd_Remainder:
1038    case OpenCLstd_Remquo:
1039    case OpenCLstd_Hypot:
1040    case OpenCLstd_Sincos:
1041    case OpenCLstd_Modf:
1042    case OpenCLstd_UMad_sat:
1043    case OpenCLstd_SMad_sat:
1044    case OpenCLstd_Native_tan:
1045    case OpenCLstd_Half_cos:
1046    case OpenCLstd_Half_exp:
1047    case OpenCLstd_Half_exp2:
1048    case OpenCLstd_Half_exp10:
1049    case OpenCLstd_Half_log:
1050    case OpenCLstd_Half_log2:
1051    case OpenCLstd_Half_log10:
1052    case OpenCLstd_Half_powr:
1053    case OpenCLstd_Half_sin:
1054    case OpenCLstd_Half_tan:
1055       handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_special);
1056       return true;
1057    case OpenCLstd_Vloadn:
1058    case OpenCLstd_Vload_half:
1059    case OpenCLstd_Vload_halfn:
1060    case OpenCLstd_Vloada_halfn:
1061       vtn_handle_opencl_vload(b, cl_opcode, w, count);
1062       return true;
1063    case OpenCLstd_Vstoren:
1064    case OpenCLstd_Vstore_half:
1065    case OpenCLstd_Vstore_halfn:
1066    case OpenCLstd_Vstorea_halfn:
1067       vtn_handle_opencl_vstore(b, cl_opcode, w, count);
1068       return true;
1069    case OpenCLstd_Vstore_half_r:
1070    case OpenCLstd_Vstore_halfn_r:
1071    case OpenCLstd_Vstorea_halfn_r:
1072       vtn_handle_opencl_vstore_half_r(b, cl_opcode, w, count);
1073       return true;
1074    case OpenCLstd_Shuffle:
1075       handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle);
1076       return true;
1077    case OpenCLstd_Shuffle2:
1078       handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle2);
1079       return true;
1080    case OpenCLstd_Round:
1081       handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_round);
1082       return true;
1083    case OpenCLstd_Printf:
1084       handle_printf(b, ext_opcode, w + 5, count - 5, w + 1);
1085       return true;
1086    case OpenCLstd_Prefetch:
1087       /* TODO maybe add a nir instruction for this? */
1088       return true;
1089    default:
1090       vtn_fail("unhandled opencl opc: %u\n", ext_opcode);
1091       return false;
1092    }
1093 }
1094 
1095 bool
vtn_handle_opencl_core_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1096 vtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode,
1097                                    const uint32_t *w, unsigned count)
1098 {
1099    switch (opcode) {
1100    case SpvOpGroupAsyncCopy:
1101       handle_instr(b, opcode, w + 4, count - 4, w + 1, handle_core);
1102       return true;
1103    case SpvOpGroupWaitEvents:
1104       handle_instr(b, opcode, w + 2, count - 2, NULL, handle_core);
1105       return true;
1106    default:
1107       return false;
1108    }
1109    return true;
1110 }
1111