1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4 
5 #include <cmath>
6 
7 #include <OpenImageIO/fmath.h>
8 
9 #include "oslexec_pvt.h"
10 #include <OSL/genclosure.h>
11 #include "backendllvm.h"
12 
13 using namespace OSL;
14 using namespace OSL::pvt;
15 
16 OSL_NAMESPACE_ENTER
17 
18 namespace pvt {
19 
20 static ustring op_and("and");
21 static ustring op_bitand("bitand");
22 static ustring op_bitor("bitor");
23 static ustring op_break("break");
24 static ustring op_ceil("ceil");
25 static ustring op_cellnoise("cellnoise");
26 static ustring op_color("color");
27 static ustring op_compl("compl");
28 static ustring op_continue("continue");
29 static ustring op_dowhile("dowhile");
30 static ustring op_eq("eq");
31 static ustring op_error("error");
32 static ustring op_fabs("fabs");
33 static ustring op_floor("floor");
34 static ustring op_for("for");
35 static ustring op_format("format");
36 static ustring op_fprintf("fprintf");
37 static ustring op_ge("ge");
38 static ustring op_gt("gt");
39 static ustring op_hashnoise("hashnoise");
40 static ustring op_if("if");
41 static ustring op_le("le");
42 static ustring op_logb("logb");
43 static ustring op_lt("lt");
44 static ustring op_min("min");
45 static ustring op_neq("neq");
46 static ustring op_normal("normal");
47 static ustring op_or("or");
48 static ustring op_point("point");
49 static ustring op_printf("printf");
50 static ustring op_round("round");
51 static ustring op_shl("shl");
52 static ustring op_shr("shr");
53 static ustring op_sign("sign");
54 static ustring op_step("step");
55 static ustring op_trunc("trunc");
56 static ustring op_vector("vector");
57 static ustring op_warning("warning");
58 static ustring op_xor("xor");
59 
60 static ustring u_distance ("distance");
61 static ustring u_index ("index");
62 static ustring u__empty;  // empty/default ustring
63 
64 
65 
66 /// Macro that defines the arguments to LLVM IR generating routines
67 ///
68 #define LLVMGEN_ARGS     BackendLLVM &rop, int opnum
69 
70 /// Macro that defines the full declaration of an LLVM generator.
71 ///
72 #define LLVMGEN(name)  bool name (LLVMGEN_ARGS)
73 
74 // Forward decl
75 LLVMGEN (llvm_gen_generic);
76 
77 
78 
79 void
llvm_gen_debug_printf(string_view message)80 BackendLLVM::llvm_gen_debug_printf (string_view message)
81 {
82     ustring s = ustring::sprintf ("(%s %s) %s", inst()->shadername(),
83                                  inst()->layername(), message);
84     ll.call_function ("osl_printf", sg_void_ptr(), ll.constant("%s\n"),
85                       ll.constant(s));
86 }
87 
88 
89 
90 void
llvm_gen_warning(string_view message)91 BackendLLVM::llvm_gen_warning (string_view message)
92 {
93     ll.call_function ("osl_warning", sg_void_ptr(), ll.constant("%s\n"),
94                       ll.constant(message));
95 }
96 
97 
98 
99 void
llvm_gen_error(string_view message)100 BackendLLVM::llvm_gen_error (string_view message)
101 {
102     ll.call_function ("osl_error", sg_void_ptr(), ll.constant("%s\n"),
103                       ll.constant(message));
104 }
105 
106 
107 
108 void
llvm_call_layer(int layer,bool unconditional)109 BackendLLVM::llvm_call_layer (int layer, bool unconditional)
110 {
111     // Make code that looks like:
112     //     if (! groupdata->run[parentlayer])
113     //         parent_layer (sg, groupdata);
114     // if it's a conditional call, or
115     //     parent_layer (sg, groupdata);
116     // if it's run unconditionally.
117     // The code in the parent layer itself will set its 'executed' flag.
118 
119     llvm::Value *args[] = { sg_ptr (), groupdata_ptr () };
120 
121     ShaderInstance *parent = group()[layer];
122     llvm::Value *trueval = ll.constant_bool(true);
123     llvm::Value *layerfield = layer_run_ref(layer_remap(layer));
124     llvm::BasicBlock *then_block = NULL, *after_block = NULL;
125     if (! unconditional) {
126         llvm::Value *executed = ll.op_load (layerfield);
127         executed = ll.op_ne (executed, trueval);
128         then_block = ll.new_basic_block ("");
129         after_block = ll.new_basic_block ("");
130         ll.op_branch (executed, then_block, after_block);
131         // insert point is now then_block
132     }
133 
134     // Mark the call as a fast call
135     llvm::Value *funccall = ll.call_function (layer_function_name(group(), *parent).c_str(), args);
136     if (!parent->entry_layer())
137         ll.mark_fast_func_call (funccall);
138 
139     if (! unconditional)
140         ll.op_branch (after_block);  // also moves insert point
141 }
142 
143 
144 
145 void
llvm_run_connected_layers(Symbol & sym,int symindex,int opnum,std::set<int> * already_run)146 BackendLLVM::llvm_run_connected_layers (Symbol &sym, int symindex,
147                                              int opnum,
148                                              std::set<int> *already_run)
149 {
150     if (sym.valuesource() != Symbol::ConnectedVal)
151         return;  // Nothing to do
152 
153     bool inmain = (opnum >= inst()->maincodebegin() &&
154                    opnum < inst()->maincodeend());
155 
156     for (int c = 0;  c < inst()->nconnections();  ++c) {
157         const Connection &con (inst()->connection (c));
158         // If the connection gives a value to this param
159         if (con.dst.param == symindex) {
160             // already_run is a set of layers run for this particular op.
161             // Just so we don't stupidly do several consecutive checks on
162             // whether we ran this same layer. It's JUST for this op.
163             if (already_run) {
164                 if (already_run->count (con.srclayer))
165                     continue;  // already ran that one on this op
166                 else
167                     already_run->insert (con.srclayer);  // mark it
168             }
169 
170             if (inmain) {
171                 // There is an instance-wide m_layers_already_run that tries
172                 // to remember which earlier layers have unconditionally
173                 // been run at any point in the execution of this layer. But
174                 // only honor (and modify) that when in the main code
175                 // section, not when in init ops, which are inherently
176                 // conditional.
177                 if (m_layers_already_run.count (con.srclayer)) {
178                     continue;  // already unconditionally ran the layer
179                 }
180                 if (! m_in_conditional[opnum]) {
181                     // Unconditionally running -- mark so we don't do it
182                     // again. If we're inside a conditional, don't mark
183                     // because it may not execute the conditional body.
184                     m_layers_already_run.insert (con.srclayer);
185                 }
186             }
187 
188             // If the earlier layer it comes from has not yet been
189             // executed, do so now.
190             llvm_call_layer (con.srclayer);
191         }
192     }
193 }
194 
195 
196 
197 OSL_PRAGMA_WARNING_PUSH
198 OSL_GCC_PRAGMA(GCC diagnostic ignored "-Wunused-parameter")
199 
LLVMGEN(llvm_gen_nop)200 LLVMGEN (llvm_gen_nop)
201 {
202     return true;
203 }
204 
205 OSL_PRAGMA_WARNING_POP
206 
207 
208 
LLVMGEN(llvm_gen_useparam)209 LLVMGEN (llvm_gen_useparam)
210 {
211     OSL_DASSERT (! rop.inst()->unused() &&
212                  "oops, thought this layer was unused, why do we call it?");
213 
214     // If we have multiple params needed on this statement, don't waste
215     // time checking the same upstream layer more than once.
216     std::set<int> already_run;
217 
218     Opcode &op (rop.inst()->ops()[opnum]);
219     for (int i = 0;  i < op.nargs();  ++i) {
220         Symbol& sym = *rop.opargsym (op, i);
221         int symindex = rop.inst()->arg (op.firstarg()+i);
222         rop.llvm_run_connected_layers (sym, symindex, opnum, &already_run);
223         // If it's an interpolated (userdata) parameter and we're
224         // initializing them lazily, now we have to do it.
225         if ((sym.symtype() == SymTypeParam || sym.symtype() == SymTypeOutputParam)
226                 && ! sym.lockgeom() && ! sym.typespec().is_closure()
227                 && ! sym.connected() && ! sym.connected_down()
228                 && rop.shadingsys().lazy_userdata()) {
229             rop.llvm_assign_initial_value (sym);
230         }
231     }
232     return true;
233 }
234 
235 
236 
237 // Used for printf, error, warning, format, fprintf
LLVMGEN(llvm_gen_printf)238 LLVMGEN (llvm_gen_printf)
239 {
240     Opcode &op (rop.inst()->ops()[opnum]);
241 
242     // Prepare the args for the call
243 
244     // Which argument is the format string?  Usually 0, but for op
245     // format() and fprintf(), the formatting string is argument #1.
246     int format_arg = (op.opname() == "format" || op.opname() == "fprintf") ? 1 : 0;
247     Symbol& format_sym = *rop.opargsym (op, format_arg);
248 
249     std::vector<llvm::Value*> call_args;
250     if (!format_sym.is_constant()) {
251         rop.shadingcontext()->warningf("%s must currently have constant format\n",
252                                        op.opname());
253         return false;
254     }
255 
256     // For some ops, we push the shader globals pointer
257     if (op.opname() == op_printf || op.opname() == op_error ||
258             op.opname() == op_warning || op.opname() == op_fprintf)
259         call_args.push_back (rop.sg_void_ptr());
260 
261     // fprintf also needs the filename
262     if (op.opname() == op_fprintf) {
263         Symbol& Filename = *rop.opargsym (op, 0);
264         llvm::Value* fn = rop.llvm_load_value (Filename);
265         call_args.push_back (fn);
266     }
267 
268     // We're going to need to adjust the format string as we go, but I'd
269     // like to reserve a spot for the char*.
270     size_t new_format_slot = call_args.size();
271     call_args.push_back(NULL);
272 
273     ustring format_ustring = *((ustring*)format_sym.data());
274     const char* format = format_ustring.c_str();
275     std::string s;
276     int arg = format_arg + 1;
277     size_t optix_size = 0;
278     while (*format != '\0') {
279         if (*format == '%') {
280             if (format[1] == '%') {
281                 // '%%' is a literal '%'
282                 s += "%%";
283                 format += 2;  // skip both percentages
284                 continue;
285             }
286             const char *oldfmt = format;  // mark beginning of format
287             while (*format &&
288                    *format != 'c' && *format != 'd' && *format != 'e' &&
289                    *format != 'f' && *format != 'g' && *format != 'i' &&
290                    *format != 'm' && *format != 'n' && *format != 'o' &&
291                    *format != 'p' && *format != 's' && *format != 'u' &&
292                    *format != 'v' && *format != 'x' && *format != 'X')
293                 ++format;
294             char formatchar = *format++;  // Also eat the format char
295             if (arg >= op.nargs()) {
296                 rop.shadingcontext()->errorf("Mismatch between format string and arguments (%s:%d)",
297                                              op.sourcefile(), op.sourceline());
298                 return false;
299             }
300 
301             std::string ourformat (oldfmt, format);  // straddle the format
302             // Doctor it to fix mismatches between format and data
303             Symbol& sym (*rop.opargsym (op, arg));
304             OSL_ASSERT (! sym.typespec().is_structure_based());
305 
306             TypeDesc simpletype (sym.typespec().simpletype());
307             int num_elements = simpletype.numelements();
308             int num_components = simpletype.aggregate;
309             if ((sym.typespec().is_closure_based() ||
310                  simpletype.basetype == TypeDesc::STRING)
311                 && formatchar != 's') {
312                 ourformat[ourformat.length()-1] = 's';
313             }
314             if (simpletype.basetype == TypeDesc::INT && formatchar != 'd' &&
315                 formatchar != 'i' && formatchar != 'o' && formatchar != 'u' &&
316                 formatchar != 'x' && formatchar != 'X') {
317                 ourformat[ourformat.length()-1] = 'd';
318             }
319             if (simpletype.basetype == TypeDesc::FLOAT && formatchar != 'f' &&
320                 formatchar != 'g' && formatchar != 'c' && formatchar != 'e' &&
321                 formatchar != 'm' && formatchar != 'n' && formatchar != 'p' &&
322                 formatchar != 'v') {
323                 ourformat[ourformat.length()-1] = 'f';
324             }
325             // NOTE(boulos): Only for debug mode do the derivatives get printed...
326             for (int a = 0;  a < num_elements;  ++a) {
327                 llvm::Value *arrind = simpletype.arraylen ? rop.ll.constant(a) : NULL;
328                 if (sym.typespec().is_closure_based()) {
329                     s += ourformat;
330                     llvm::Value *v = rop.llvm_load_value (sym, 0, arrind, 0);
331                     v = rop.ll.call_function ("osl_closure_to_string", rop.sg_void_ptr(), v);
332                     call_args.push_back (v);
333                     continue;
334                 }
335 
336                 for (int c = 0; c < num_components; c++) {
337                     if (c != 0 || a != 0)
338                         s += " ";
339                     s += ourformat;
340 
341                     llvm::Value* loaded = nullptr;
342                     if (rop.use_optix() && simpletype.basetype == TypeDesc::STRING) {
343                         // In the OptiX case, we register each string separately.
344                         if (simpletype.arraylen >= 1) {
345                             // Mangle the element's name in case llvm_load_device_string calls getOrAllocateLLVMSymbol
346                             ustring name = ustring::sprintf("__symname__%s[%d]", sym.mangled(), a);
347                             Symbol lsym(name, TypeDesc::TypeString, sym.symtype());
348                             lsym.data(&((ustring*)sym.data())[a]);
349                             loaded = rop.llvm_load_device_string (lsym, /*follow*/ true);
350                         } else {
351                             loaded = rop.llvm_load_device_string (sym, /*follow*/ true);
352                         }
353                         optix_size += sizeof(uint64_t);
354                     }
355                     else {
356                         loaded = rop.llvm_load_value (sym, 0, arrind, c);
357 
358                         if (simpletype.basetype == TypeDesc::FLOAT) {
359                             // C varargs convention upconverts float->double.
360                             loaded = rop.ll.op_float_to_double(loaded);
361                             // Ensure that 64-bit values are aligned to 8-byte boundaries
362                             optix_size = (optix_size + sizeof(double) - 1) & ~(sizeof(double) - 1);
363                             optix_size += sizeof(double);
364                         }
365                         else if (simpletype.basetype == TypeDesc::INT)
366                             optix_size += sizeof(int);
367                     }
368 
369                     call_args.push_back (loaded);
370                 }
371             }
372             ++arg;
373         } else {
374             // Everything else -- just copy the character and advance
375             s += *format++;
376         }
377     }
378 
379 
380     // In OptiX, printf currently supports 0 or 1 arguments, and the signature
381     // requires 1 argument, so push a null pointer onto the call args if there
382     // is no argument.
383     if (rop.use_optix() && arg == format_arg + 1) {
384         call_args.push_back(rop.ll.void_ptr_null());
385     }
386 
387     // Some ops prepend things
388     if (op.opname() == op_error || op.opname() == op_warning) {
389         std::string prefix = Strutil::sprintf ("Shader %s [%s]: ",
390                                                op.opname(),
391                                                rop.inst()->shadername());
392         s = prefix + s;
393     }
394 
395     // Now go back and put the new format string in its place
396     if (! rop.use_optix()) {
397         call_args[new_format_slot] = rop.ll.constant (s.c_str());
398     }
399     else {
400         // In the OptiX case, we do this:
401         // void* args = { arg0, arg1, arg2 };
402         // osl_printf(sg, fmt, args);
403         //   vprintf(fmt, args);
404         //
405         Symbol sym(format_sym.name(), format_sym.typespec(), format_sym.symtype());
406         format_ustring = s;
407         sym.data(&format_ustring);
408         call_args[new_format_slot] = rop.llvm_load_device_string (sym, /*follow*/ true);
409 
410         size_t nargs = call_args.size() - (new_format_slot+1);
411         llvm::Value *voids = rop.ll.op_alloca (rop.ll.type_char(), optix_size, std::string(), 8);
412         optix_size = 0;
413         for (size_t i = 0; i < nargs; ++i) {
414             llvm::Value* arg = call_args[new_format_slot+1+i];
415             if (arg->getType()->isFloatingPointTy()) {
416                 // Ensure that 64-bit values are aligned to 8-byte boundaries
417                 optix_size = (optix_size + sizeof(double) - 1) & ~(sizeof(double)-1);
418             }
419             llvm::Value* memptr = rop.ll.offset_ptr (voids, optix_size);
420             if (arg->getType()->isIntegerTy()) {
421                 llvm::Value* iptr = rop.ll.ptr_cast(memptr, rop.ll.type_int_ptr());
422                 rop.ll.op_store (arg, iptr);
423                 optix_size += sizeof(int);
424             } else if (arg->getType()->isFloatingPointTy()) {
425                 llvm::Value* fptr = rop.ll.ptr_cast(memptr, rop.ll.type_double_ptr());
426                 rop.ll.op_store (arg, fptr);
427                 optix_size += sizeof(double);
428             }
429             else {
430                 llvm::Value* vptr = rop.ll.ptr_to_cast(memptr, rop.ll.type_void_ptr());
431                 rop.ll.op_store (arg, vptr);
432                 optix_size += sizeof(uint64_t);
433             }
434         }
435         call_args.resize(new_format_slot+2);
436         call_args.back() = rop.ll.void_ptr(voids);
437     }
438 
439     // Construct the function name and call it.
440     std::string opname = std::string("osl_") + op.opname().string();
441     llvm::Value *ret = rop.ll.call_function (opname.c_str(), call_args);
442 
443     // The format op returns a string value, put in in the right spot
444     if (op.opname() == op_format)
445         rop.llvm_store_value (ret, *rop.opargsym (op, 0));
446     return true;
447 }
448 
449 
450 
LLVMGEN(llvm_gen_add)451 LLVMGEN (llvm_gen_add)
452 {
453     Opcode &op (rop.inst()->ops()[opnum]);
454     Symbol& Result = *rop.opargsym (op, 0);
455     Symbol& A = *rop.opargsym (op, 1);
456     Symbol& B = *rop.opargsym (op, 2);
457 
458     OSL_DASSERT (! A.typespec().is_array() && ! B.typespec().is_array());
459     if (Result.typespec().is_closure()) {
460         OSL_DASSERT (A.typespec().is_closure() && B.typespec().is_closure());
461         llvm::Value *valargs[] = {
462             rop.sg_void_ptr(),
463             rop.llvm_load_value (A),
464             rop.llvm_load_value (B)
465         };
466         llvm::Value *res = rop.ll.call_function ("osl_add_closure_closure", valargs);
467         rop.llvm_store_value (res, Result, 0, NULL, 0);
468         return true;
469     }
470 
471     TypeDesc type = Result.typespec().simpletype();
472     int num_components = type.aggregate;
473 
474     // The following should handle f+f, v+v, v+f, f+v, i+i
475     // That's all that should be allowed by oslc.
476     for (int i = 0; i < num_components; i++) {
477         llvm::Value *a = rop.loadLLVMValue (A, i, 0, type);
478         llvm::Value *b = rop.loadLLVMValue (B, i, 0, type);
479         if (!a || !b)
480             return false;
481         llvm::Value *r = rop.ll.op_add (a, b);
482         rop.storeLLVMValue (r, Result, i, 0);
483     }
484 
485     if (Result.has_derivs()) {
486         if (A.has_derivs() || B.has_derivs()) {
487             for (int d = 1;  d <= 2;  ++d) {  // dx, dy
488                 for (int i = 0; i < num_components; i++) {
489                     llvm::Value *a = rop.loadLLVMValue (A, i, d, type);
490                     llvm::Value *b = rop.loadLLVMValue (B, i, d, type);
491                     llvm::Value *r = rop.ll.op_add (a, b);
492                     rop.storeLLVMValue (r, Result, i, d);
493                 }
494             }
495         } else {
496             // Result has derivs, operands do not
497             rop.llvm_zero_derivs (Result);
498         }
499     }
500     return true;
501 }
502 
503 
504 
LLVMGEN(llvm_gen_sub)505 LLVMGEN (llvm_gen_sub)
506 {
507     Opcode &op (rop.inst()->ops()[opnum]);
508     Symbol& Result = *rop.opargsym (op, 0);
509     Symbol& A = *rop.opargsym (op, 1);
510     Symbol& B = *rop.opargsym (op, 2);
511 
512     TypeDesc type = Result.typespec().simpletype();
513     int num_components = type.aggregate;
514 
515     OSL_DASSERT (! Result.typespec().is_closure_based() &&
516             "subtraction of closures not supported");
517 
518     // The following should handle f-f, v-v, v-f, f-v, i-i
519     // That's all that should be allowed by oslc.
520     for (int i = 0; i < num_components; i++) {
521         llvm::Value *a = rop.loadLLVMValue (A, i, 0, type);
522         llvm::Value *b = rop.loadLLVMValue (B, i, 0, type);
523         if (!a || !b)
524             return false;
525         llvm::Value *r = rop.ll.op_sub (a, b);
526         rop.storeLLVMValue (r, Result, i, 0);
527     }
528 
529     if (Result.has_derivs()) {
530         if (A.has_derivs() || B.has_derivs()) {
531             for (int d = 1;  d <= 2;  ++d) {  // dx, dy
532                 for (int i = 0; i < num_components; i++) {
533                     llvm::Value *a = rop.loadLLVMValue (A, i, d, type);
534                     llvm::Value *b = rop.loadLLVMValue (B, i, d, type);
535                     llvm::Value *r = rop.ll.op_sub (a, b);
536                     rop.storeLLVMValue (r, Result, i, d);
537                 }
538             }
539         } else {
540             // Result has derivs, operands do not
541             rop.llvm_zero_derivs (Result);
542         }
543     }
544     return true;
545 }
546 
547 
548 
LLVMGEN(llvm_gen_mul)549 LLVMGEN (llvm_gen_mul)
550 {
551     Opcode &op (rop.inst()->ops()[opnum]);
552     Symbol& Result = *rop.opargsym (op, 0);
553     Symbol& A = *rop.opargsym (op, 1);
554     Symbol& B = *rop.opargsym (op, 2);
555 
556     TypeDesc type = Result.typespec().simpletype();
557     OSL_MAYBE_UNUSED bool is_float = !Result.typespec().is_closure_based() && Result.typespec().is_float_based();
558     int num_components = type.aggregate;
559 
560     // multiplication involving closures
561     if (Result.typespec().is_closure()) {
562         llvm::Value *valargs[3];
563         valargs[0] = rop.sg_void_ptr();
564         bool tfloat;
565         if (A.typespec().is_closure()) {
566             tfloat = B.typespec().is_float();
567             valargs[1] = rop.llvm_load_value (A);
568             valargs[2] = tfloat ? rop.llvm_load_value (B) : rop.llvm_void_ptr(B);
569         } else {
570             tfloat = A.typespec().is_float();
571             valargs[1] = rop.llvm_load_value (B);
572             valargs[2] = tfloat ? rop.llvm_load_value (A) : rop.llvm_void_ptr(A);
573         }
574         llvm::Value *res = tfloat ? rop.ll.call_function ("osl_mul_closure_float", valargs)
575                                   : rop.ll.call_function ("osl_mul_closure_color", valargs);
576         rop.llvm_store_value (res, Result, 0, NULL, 0);
577         return true;
578     }
579 
580     // multiplication involving matrices
581     if (Result.typespec().is_matrix()) {
582         if (A.typespec().is_float()) {
583             if (B.typespec().is_matrix())
584                 rop.llvm_call_function ("osl_mul_mmf", Result, B, A);
585             else OSL_ASSERT(0 && "frontend should not allow");
586         } else if (A.typespec().is_matrix()) {
587             if (B.typespec().is_float())
588                 rop.llvm_call_function ("osl_mul_mmf", Result, A, B);
589             else if (B.typespec().is_matrix())
590                 rop.llvm_call_function ("osl_mul_mmm", Result, A, B);
591             else OSL_ASSERT(0 && "frontend should not allow");
592         } else OSL_ASSERT (0 && "frontend should not allow");
593         if (Result.has_derivs())
594             rop.llvm_zero_derivs (Result);
595         return true;
596     }
597 
598     // The following should handle f*f, v*v, v*f, f*v, i*i
599     // That's all that should be allowed by oslc.
600     for (int i = 0; i < num_components; i++) {
601         llvm::Value *a = rop.llvm_load_value (A, 0, i, type);
602         llvm::Value *b = rop.llvm_load_value (B, 0, i, type);
603         if (!a || !b)
604             return false;
605         llvm::Value *r = rop.ll.op_mul (a, b);
606         rop.llvm_store_value (r, Result, 0, i);
607 
608         if (Result.has_derivs() && (A.has_derivs() || B.has_derivs())) {
609             // Multiplication of duals: (a*b, a*b.dx + a.dx*b, a*b.dy + a.dy*b)
610             OSL_DASSERT (is_float);
611             llvm::Value *ax = rop.llvm_load_value (A, 1, i, type);
612             llvm::Value *bx = rop.llvm_load_value (B, 1, i, type);
613             llvm::Value *abx = rop.ll.op_mul (a, bx);
614             llvm::Value *axb = rop.ll.op_mul (ax, b);
615             llvm::Value *rx = rop.ll.op_add (abx, axb);
616             llvm::Value *ay = rop.llvm_load_value (A, 2, i, type);
617             llvm::Value *by = rop.llvm_load_value (B, 2, i, type);
618             llvm::Value *aby = rop.ll.op_mul (a, by);
619             llvm::Value *ayb = rop.ll.op_mul (ay, b);
620             llvm::Value *ry = rop.ll.op_add (aby, ayb);
621             rop.llvm_store_value (rx, Result, 1, i);
622             rop.llvm_store_value (ry, Result, 2, i);
623         }
624     }
625 
626     if (Result.has_derivs() &&  ! (A.has_derivs() || B.has_derivs())) {
627         // Result has derivs, operands do not
628         rop.llvm_zero_derivs (Result);
629     }
630 
631     return true;
632 }
633 
634 
635 
LLVMGEN(llvm_gen_div)636 LLVMGEN (llvm_gen_div)
637 {
638     Opcode &op (rop.inst()->ops()[opnum]);
639     Symbol& Result = *rop.opargsym (op, 0);
640     Symbol& A = *rop.opargsym (op, 1);
641     Symbol& B = *rop.opargsym (op, 2);
642 
643     TypeDesc type = Result.typespec().simpletype();
644     bool is_float = Result.typespec().is_float_based();
645     int num_components = type.aggregate;
646 
647     OSL_DASSERT (! Result.typespec().is_closure_based());
648 
649     // division involving matrices
650     if (Result.typespec().is_matrix()) {
651         if (A.typespec().is_float()) {
652             OSL_ASSERT (!B.typespec().is_float() && "frontend should not allow");
653             if (B.typespec().is_matrix())
654                 rop.llvm_call_function ("osl_div_mfm", Result, A, B);
655             else OSL_ASSERT (0);
656         } else if (A.typespec().is_matrix()) {
657             if (B.typespec().is_float())
658                 rop.llvm_call_function ("osl_div_mmf", Result, A, B);
659             else if (B.typespec().is_matrix())
660                 rop.llvm_call_function ("osl_div_mmm", Result, A, B);
661             else OSL_ASSERT (0);
662         } else OSL_ASSERT (0);
663         if (Result.has_derivs())
664             rop.llvm_zero_derivs (Result);
665         return true;
666     }
667 
668     // The following should handle f/f, v/v, v/f, f/v, i/i
669     // That's all that should be allowed by oslc.
670     const char *safe_div = is_float ? "osl_safe_div_fff" : "osl_safe_div_iii";
671     bool deriv = (Result.has_derivs() && (A.has_derivs() || B.has_derivs()));
672     for (int i = 0; i < num_components; i++) {
673         llvm::Value *a = rop.llvm_load_value (A, 0, i, type);
674         llvm::Value *b = rop.llvm_load_value (B, 0, i, type);
675         if (!a || !b)
676             return false;
677         llvm::Value *a_div_b;
678         if (B.is_constant() && ! rop.is_zero(B))
679             a_div_b = rop.ll.op_div (a, b);
680         else
681             a_div_b = rop.ll.call_function (safe_div, a, b);
682         llvm::Value *rx = NULL, *ry = NULL;
683 
684         if (deriv) {
685             // Division of duals: (a/b, 1/b*(ax-a/b*bx), 1/b*(ay-a/b*by))
686             OSL_DASSERT (is_float);
687             llvm::Value *binv;
688             if (B.is_constant() && ! rop.is_zero(B))
689                 binv = rop.ll.op_div (rop.ll.constant(1.0f), b);
690             else
691                 binv = rop.ll.call_function (safe_div, rop.ll.constant(1.0f), b);
692             llvm::Value *ax = rop.llvm_load_value (A, 1, i, type);
693             llvm::Value *bx = rop.llvm_load_value (B, 1, i, type);
694             llvm::Value *a_div_b_mul_bx = rop.ll.op_mul (a_div_b, bx);
695             llvm::Value *ax_minus_a_div_b_mul_bx = rop.ll.op_sub (ax, a_div_b_mul_bx);
696             rx = rop.ll.op_mul (binv, ax_minus_a_div_b_mul_bx);
697             llvm::Value *ay = rop.llvm_load_value (A, 2, i, type);
698             llvm::Value *by = rop.llvm_load_value (B, 2, i, type);
699             llvm::Value *a_div_b_mul_by = rop.ll.op_mul (a_div_b, by);
700             llvm::Value *ay_minus_a_div_b_mul_by = rop.ll.op_sub (ay, a_div_b_mul_by);
701             ry = rop.ll.op_mul (binv, ay_minus_a_div_b_mul_by);
702         }
703 
704         rop.llvm_store_value (a_div_b, Result, 0, i);
705         if (deriv) {
706             rop.llvm_store_value (rx, Result, 1, i);
707             rop.llvm_store_value (ry, Result, 2, i);
708         }
709     }
710 
711     if (Result.has_derivs() &&  ! (A.has_derivs() || B.has_derivs())) {
712         // Result has derivs, operands do not
713         rop.llvm_zero_derivs (Result);
714     }
715 
716     return true;
717 }
718 
719 
720 
LLVMGEN(llvm_gen_modulus)721 LLVMGEN (llvm_gen_modulus)
722 {
723     Opcode &op (rop.inst()->ops()[opnum]);
724     Symbol& Result = *rop.opargsym (op, 0);
725     Symbol& A = *rop.opargsym (op, 1);
726     Symbol& B = *rop.opargsym (op, 2);
727 
728     TypeDesc type = Result.typespec().simpletype();
729     bool is_float = Result.typespec().is_float_based();
730     int num_components = type.aggregate;
731 
732 #ifdef OSL_LLVM_NO_BITCODE
733     // On Windows 32 bit this calls an unknown instruction, probably need to
734     // link with LLVM compiler-rt to fix, for now just fall back to op
735     if (is_float)
736         return llvm_gen_generic (rop, opnum);
737 #endif
738 
739     // The following should handle f%f, v%v, v%f, i%i
740     // That's all that should be allowed by oslc.
741     const char *safe_mod = is_float ? "osl_fmod_fff" : "osl_safe_mod_iii";
742     for (int i = 0; i < num_components; i++) {
743         llvm::Value *a = rop.loadLLVMValue (A, i, 0, type);
744         llvm::Value *b = rop.loadLLVMValue (B, i, 0, type);
745         if (!a || !b)
746             return false;
747         llvm::Value *r;
748         if (B.is_constant() && ! rop.is_zero(B))
749             r = rop.ll.op_mod (a, b);
750         else
751             r = rop.ll.call_function (safe_mod, a, b);
752         rop.storeLLVMValue (r, Result, i, 0);
753     }
754 
755     if (Result.has_derivs()) {
756         OSL_DASSERT (is_float);
757         if (A.has_derivs()) {
758             // Modulus of duals: (a mod b, ax, ay)
759             for (int d = 1;  d <= 2;  ++d) {
760                 for (int i = 0; i < num_components; i++) {
761                     llvm::Value *deriv = rop.loadLLVMValue (A, i, d, type);
762                     rop.storeLLVMValue (deriv, Result, i, d);
763                 }
764             }
765         } else {
766             // Result has derivs, operands do not
767             rop.llvm_zero_derivs (Result);
768         }
769     }
770     return true;
771 }
772 
773 
774 
LLVMGEN(llvm_gen_neg)775 LLVMGEN (llvm_gen_neg)
776 {
777     Opcode &op (rop.inst()->ops()[opnum]);
778     Symbol& Result = *rop.opargsym (op, 0);
779     Symbol& A = *rop.opargsym (op, 1);
780 
781     TypeDesc type = Result.typespec().simpletype();
782     int num_components = type.aggregate;
783     for (int d = 0;  d < 3;  ++d) {  // dx, dy
784         for (int i = 0; i < num_components; i++) {
785             llvm::Value *a = rop.llvm_load_value (A, d, i, type);
786             llvm::Value *r = rop.ll.op_neg (a);
787             rop.llvm_store_value (r, Result, d, i);
788         }
789         if (! Result.has_derivs())
790             break;
791     }
792     return true;
793 }
794 
795 
796 
797 // Implementation for clamp
LLVMGEN(llvm_gen_clamp)798 LLVMGEN (llvm_gen_clamp)
799 {
800     Opcode &op (rop.inst()->ops()[opnum]);
801     Symbol& Result = *rop.opargsym (op, 0);
802     Symbol& X = *rop.opargsym (op, 1);
803     Symbol& Min = *rop.opargsym (op, 2);
804     Symbol& Max = *rop.opargsym (op, 3);
805 
806     TypeDesc type = Result.typespec().simpletype();
807     int num_components = type.aggregate;
808     for (int i = 0; i < num_components; i++) {
809         // First do the lower bound
810         llvm::Value *val = rop.llvm_load_value (X, 0, i, type);
811         llvm::Value *min = rop.llvm_load_value (Min, 0, i, type);
812         llvm::Value *cond = rop.ll.op_lt (val, min);
813         val = rop.ll.op_select (cond, min, val);
814         llvm::Value *valdx=NULL, *valdy=NULL;
815         if (Result.has_derivs()) {
816             valdx = rop.llvm_load_value (X, 1, i, type);
817             valdy = rop.llvm_load_value (X, 2, i, type);
818             llvm::Value *mindx = rop.llvm_load_value (Min, 1, i, type);
819             llvm::Value *mindy = rop.llvm_load_value (Min, 2, i, type);
820             valdx = rop.ll.op_select (cond, mindx, valdx);
821             valdy = rop.ll.op_select (cond, mindy, valdy);
822         }
823         // Now do the upper bound
824         llvm::Value *max = rop.llvm_load_value (Max, 0, i, type);
825         cond = rop.ll.op_gt (val, max);
826         val = rop.ll.op_select (cond, max, val);
827         if (Result.has_derivs()) {
828             llvm::Value *maxdx = rop.llvm_load_value (Max, 1, i, type);
829             llvm::Value *maxdy = rop.llvm_load_value (Max, 2, i, type);
830             valdx = rop.ll.op_select (cond, maxdx, valdx);
831             valdy = rop.ll.op_select (cond, maxdy, valdy);
832         }
833         rop.llvm_store_value (val, Result, 0, i);
834         rop.llvm_store_value (valdx, Result, 1, i);
835         rop.llvm_store_value (valdy, Result, 2, i);
836     }
837     return true;
838 }
839 
840 
841 
LLVMGEN(llvm_gen_mix)842 LLVMGEN (llvm_gen_mix)
843 {
844     Opcode &op (rop.inst()->ops()[opnum]);
845     Symbol& Result = *rop.opargsym (op, 0);
846     Symbol& A = *rop.opargsym (op, 1);
847     Symbol& B = *rop.opargsym (op, 2);
848     Symbol& X = *rop.opargsym (op, 3);
849     TypeDesc type = Result.typespec().simpletype();
850     OSL_DASSERT (!Result.typespec().is_closure_based() &&
851                  Result.typespec().is_float_based());
852     int num_components = type.aggregate;
853     int x_components = X.typespec().aggregate();
854     bool derivs = (Result.has_derivs() &&
855                    (A.has_derivs() || B.has_derivs() || X.has_derivs()));
856 
857     llvm::Value *one = rop.ll.constant (1.0f);
858     llvm::Value *x = rop.llvm_load_value (X, 0, 0, type);
859     llvm::Value *one_minus_x = rop.ll.op_sub (one, x);
860     llvm::Value *xx = derivs ? rop.llvm_load_value (X, 1, 0, type) : NULL;
861     llvm::Value *xy = derivs ? rop.llvm_load_value (X, 2, 0, type) : NULL;
862     for (int i = 0; i < num_components; i++) {
863         llvm::Value *a = rop.llvm_load_value (A, 0, i, type);
864         llvm::Value *b = rop.llvm_load_value (B, 0, i, type);
865         if (!a || !b)
866             return false;
867         if (i > 0 && x_components > 1) {
868             // Only need to recompute x and 1-x if they change
869             x = rop.llvm_load_value (X, 0, i, type);
870             one_minus_x = rop.ll.op_sub (one, x);
871         }
872         // r = a*one_minus_x + b*x
873         llvm::Value *r1 = rop.ll.op_mul (a, one_minus_x);
874         llvm::Value *r2 = rop.ll.op_mul (b, x);
875         llvm::Value *r = rop.ll.op_add (r1, r2);
876         rop.llvm_store_value (r, Result, 0, i);
877 
878         if (derivs) {
879             // mix of duals:
880             //   (a*one_minus_x + b*x,
881             //    a*one_minus_x.dx + a.dx*one_minus_x + b*x.dx + b.dx*x,
882             //    a*one_minus_x.dy + a.dy*one_minus_x + b*x.dy + b.dy*x)
883             // and since one_minus_x.dx = -x.dx, one_minus_x.dy = -x.dy,
884             //   (a*one_minus_x + b*x,
885             //    -a*x.dx + a.dx*one_minus_x + b*x.dx + b.dx*x,
886             //    -a*x.dy + a.dy*one_minus_x + b*x.dy + b.dy*x)
887             llvm::Value *ax = rop.llvm_load_value (A, 1, i, type);
888             llvm::Value *bx = rop.llvm_load_value (B, 1, i, type);
889             if (i > 0 && x_components > 1)
890                 xx = rop.llvm_load_value (X, 1, i, type);
891             llvm::Value *rx1 = rop.ll.op_mul (a, xx);
892             llvm::Value *rx2 = rop.ll.op_mul (ax, one_minus_x);
893             llvm::Value *rx = rop.ll.op_sub (rx2, rx1);
894             llvm::Value *rx3 = rop.ll.op_mul (b, xx);
895             rx = rop.ll.op_add (rx, rx3);
896             llvm::Value *rx4 = rop.ll.op_mul (bx, x);
897             rx = rop.ll.op_add (rx, rx4);
898 
899             llvm::Value *ay = rop.llvm_load_value (A, 2, i, type);
900             llvm::Value *by = rop.llvm_load_value (B, 2, i, type);
901             if (i > 0 && x_components > 1)
902                 xy = rop.llvm_load_value (X, 2, i, type);
903             llvm::Value *ry1 = rop.ll.op_mul (a, xy);
904             llvm::Value *ry2 = rop.ll.op_mul (ay, one_minus_x);
905             llvm::Value *ry = rop.ll.op_sub (ry2, ry1);
906             llvm::Value *ry3 = rop.ll.op_mul (b, xy);
907             ry = rop.ll.op_add (ry, ry3);
908             llvm::Value *ry4 = rop.ll.op_mul (by, x);
909             ry = rop.ll.op_add (ry, ry4);
910 
911             rop.llvm_store_value (rx, Result, 1, i);
912             rop.llvm_store_value (ry, Result, 2, i);
913         }
914     }
915 
916     if (Result.has_derivs() && !derivs) {
917         // Result has derivs, operands do not
918         rop.llvm_zero_derivs (Result);
919     }
920 
921     return true;
922 }
923 
924 
925 
LLVMGEN(llvm_gen_select)926 LLVMGEN (llvm_gen_select)
927 {
928     Opcode &op (rop.inst()->ops()[opnum]);
929     Symbol& Result = *rop.opargsym (op, 0);
930     Symbol& A = *rop.opargsym (op, 1);
931     Symbol& B = *rop.opargsym (op, 2);
932     Symbol& X = *rop.opargsym (op, 3);
933     TypeDesc type = Result.typespec().simpletype();
934     OSL_DASSERT (!Result.typespec().is_closure_based() &&
935             Result.typespec().is_float_based());
936     int num_components = type.aggregate;
937     int x_components = X.typespec().aggregate();
938     bool derivs = (Result.has_derivs() &&
939                    (A.has_derivs() || B.has_derivs()));
940 
941     llvm::Value *zero = X.typespec().is_int() ? rop.ll.constant (0)
942                                               : rop.ll.constant (0.0f);
943     llvm::Value *cond[3];
944     for (int i = 0; i < x_components; ++i)
945         cond[i] = rop.ll.op_ne (rop.llvm_load_value (X, 0, i), zero);
946 
947     for (int i = 0; i < num_components; i++) {
948         llvm::Value *a = rop.llvm_load_value (A, 0, i, type);
949         llvm::Value *b = rop.llvm_load_value (B, 0, i, type);
950         llvm::Value *c = (i >= x_components) ? cond[0] : cond[i];
951         llvm::Value *r = rop.ll.op_select (c, b, a);
952         rop.llvm_store_value (r, Result, 0, i);
953         if (derivs) {
954             for (int d = 1; d < 3; ++d) {
955                 a = rop.llvm_load_value (A, d, i, type);
956                 b = rop.llvm_load_value (B, d, i, type);
957                 r = rop.ll.op_select (c, b, a);
958                 rop.llvm_store_value (r, Result, d, i);
959             }
960         }
961     }
962 
963     if (Result.has_derivs() && !derivs) {
964         // Result has derivs, operands do not
965         rop.llvm_zero_derivs (Result);
966     }
967     return true;
968 }
969 
970 
971 
972 // Implementation for min/max
LLVMGEN(llvm_gen_minmax)973 LLVMGEN (llvm_gen_minmax)
974 {
975     Opcode &op (rop.inst()->ops()[opnum]);
976     Symbol& Result = *rop.opargsym (op, 0);
977     Symbol& x = *rop.opargsym (op, 1);
978     Symbol& y = *rop.opargsym (op, 2);
979 
980     TypeDesc type = Result.typespec().simpletype();
981     int num_components = type.aggregate;
982     for (int i = 0; i < num_components; i++) {
983         // First do the lower bound
984         llvm::Value *x_val = rop.llvm_load_value (x, 0, i, type);
985         llvm::Value *y_val = rop.llvm_load_value (y, 0, i, type);
986 
987         llvm::Value* cond = NULL;
988         // NOTE(boulos): Using <= instead of < to match old behavior
989         // (only matters for derivs)
990         if (op.opname() == op_min) {
991             cond = rop.ll.op_le (x_val, y_val);
992         } else {
993             cond = rop.ll.op_gt (x_val, y_val);
994         }
995 
996         llvm::Value* res_val = rop.ll.op_select (cond, x_val, y_val);
997         rop.llvm_store_value (res_val, Result, 0, i);
998         if (Result.has_derivs()) {
999           llvm::Value* x_dx = rop.llvm_load_value (x, 1, i, type);
1000           llvm::Value* x_dy = rop.llvm_load_value (x, 2, i, type);
1001           llvm::Value* y_dx = rop.llvm_load_value (y, 1, i, type);
1002           llvm::Value* y_dy = rop.llvm_load_value (y, 2, i, type);
1003           rop.llvm_store_value (rop.ll.op_select(cond, x_dx, y_dx), Result, 1, i);
1004           rop.llvm_store_value (rop.ll.op_select(cond, x_dy, y_dy), Result, 2, i);
1005         }
1006     }
1007     return true;
1008 }
1009 
1010 
1011 
LLVMGEN(llvm_gen_bitwise_binary_op)1012 LLVMGEN (llvm_gen_bitwise_binary_op)
1013 {
1014     Opcode &op (rop.inst()->ops()[opnum]);
1015     Symbol& Result = *rop.opargsym (op, 0);
1016     Symbol& A = *rop.opargsym (op, 1);
1017     Symbol& B = *rop.opargsym (op, 2);
1018     OSL_DASSERT (Result.typespec().is_int() && A.typespec().is_int() &&
1019             B.typespec().is_int());
1020 
1021     llvm::Value *a = rop.loadLLVMValue (A);
1022     llvm::Value *b = rop.loadLLVMValue (B);
1023     if (!a || !b)
1024         return false;
1025     llvm::Value *r = NULL;
1026     if (op.opname() == op_bitand)
1027         r = rop.ll.op_and (a, b);
1028     else if (op.opname() == op_bitor)
1029         r = rop.ll.op_or (a, b);
1030     else if (op.opname() == op_xor)
1031         r = rop.ll.op_xor (a, b);
1032     else if (op.opname() == op_shl)
1033         r = rop.ll.op_shl (a, b);
1034     else if (op.opname() == op_shr)
1035         r = rop.ll.op_shr (a, b);
1036     else
1037         return false;
1038     rop.storeLLVMValue (r, Result);
1039     return true;
1040 }
1041 
1042 
1043 
1044 // Simple (pointwise) unary ops (Abs, ...,
LLVMGEN(llvm_gen_unary_op)1045 LLVMGEN (llvm_gen_unary_op)
1046 {
1047     Opcode &op (rop.inst()->ops()[opnum]);
1048     Symbol& dst  = *rop.opargsym (op, 0);
1049     Symbol& src = *rop.opargsym (op, 1);
1050     bool dst_derivs = dst.has_derivs();
1051     int num_components = dst.typespec().simpletype().aggregate;
1052 
1053     bool dst_float = dst.typespec().is_float_based();
1054     bool src_float = src.typespec().is_float_based();
1055 
1056     for (int i = 0; i < num_components; i++) {
1057         // Get src1/2 component i
1058         llvm::Value* src_load = rop.loadLLVMValue (src, i, 0);
1059         if (!src_load) return false;
1060 
1061         llvm::Value* src_val = src_load;
1062 
1063         // Perform the op
1064         llvm::Value* result = 0;
1065         ustring opname = op.opname();
1066 
1067         if (opname == op_compl) {
1068             OSL_DASSERT (dst.typespec().is_int());
1069             result = rop.ll.op_not (src_val);
1070         } else {
1071             // Don't know how to handle this.
1072             rop.shadingcontext()->errorf("Don't know how to handle op '%s', eliding the store\n", opname);
1073         }
1074 
1075         // Store the result
1076         if (result) {
1077             // if our op type doesn't match result, convert
1078             if (dst_float && !src_float) {
1079                 // Op was int, but we need to store float
1080                 result = rop.ll.op_int_to_float (result);
1081             } else if (!dst_float && src_float) {
1082                 // Op was float, but we need to store int
1083                 result = rop.ll.op_float_to_int (result);
1084             } // otherwise just fine
1085             rop.storeLLVMValue (result, dst, i, 0);
1086         }
1087 
1088         if (dst_derivs) {
1089             // mul results in <a * b, a * b_dx + b * a_dx, a * b_dy + b * a_dy>
1090             rop.shadingcontext()->infof("punting on derivatives for now\n");
1091             // FIXME!!
1092         }
1093     }
1094     return true;
1095 }
1096 
1097 
1098 
1099 // Simple assignment
LLVMGEN(llvm_gen_assign)1100 LLVMGEN (llvm_gen_assign)
1101 {
1102     Opcode &op (rop.inst()->ops()[opnum]);
1103     Symbol& Result (*rop.opargsym (op, 0));
1104     Symbol& Src (*rop.opargsym (op, 1));
1105 
1106     return rop.llvm_assign_impl (Result, Src);
1107 }
1108 
1109 
1110 
1111 // Entire array copying
LLVMGEN(llvm_gen_arraycopy)1112 LLVMGEN (llvm_gen_arraycopy)
1113 {
1114     Opcode &op (rop.inst()->ops()[opnum]);
1115     Symbol& Result (*rop.opargsym (op, 0));
1116     Symbol& Src (*rop.opargsym (op, 1));
1117 
1118     return rop.llvm_assign_impl (Result, Src);
1119 }
1120 
1121 
1122 
1123 // Vector component reference
LLVMGEN(llvm_gen_compref)1124 LLVMGEN (llvm_gen_compref)
1125 {
1126     Opcode &op (rop.inst()->ops()[opnum]);
1127     Symbol& Result = *rop.opargsym (op, 0);
1128     Symbol& Val = *rop.opargsym (op, 1);
1129     Symbol& Index = *rop.opargsym (op, 2);
1130 
1131     llvm::Value *c = rop.llvm_load_value(Index);
1132     if (rop.inst()->master()->range_checking()) {
1133         if (! (Index.is_constant() &&  *(int *)Index.data() >= 0 &&
1134                *(int *)Index.data() < 3)) {
1135             llvm::Value *args[] = { c, rop.ll.constant(3),
1136                                     rop.ll.constant(Val.unmangled()),
1137                                     rop.sg_void_ptr(),
1138                                     rop.ll.constant(op.sourcefile()),
1139                                     rop.ll.constant(op.sourceline()),
1140                                     rop.ll.constant(rop.group().name()),
1141                                     rop.ll.constant(rop.layer()),
1142                                     rop.ll.constant(rop.inst()->layername()),
1143                                     rop.ll.constant(rop.inst()->shadername()) };
1144             c = rop.ll.call_function ("osl_range_check", args);
1145         }
1146     }
1147 
1148     for (int d = 0;  d < 3;  ++d) {  // deriv
1149         llvm::Value *val = NULL;
1150         if (Index.is_constant()) {
1151             int i = *(int*)Index.data();
1152             i = Imath::clamp (i, 0, 2);
1153             val = rop.llvm_load_value (Val, d, i);
1154         } else {
1155             val = rop.llvm_load_component_value (Val, d, c);
1156         }
1157         rop.llvm_store_value (val, Result, d);
1158         if (! Result.has_derivs())  // skip the derivs if we don't need them
1159             break;
1160     }
1161     return true;
1162 }
1163 
1164 
1165 
1166 // Vector component assignment
LLVMGEN(llvm_gen_compassign)1167 LLVMGEN (llvm_gen_compassign)
1168 {
1169     Opcode &op (rop.inst()->ops()[opnum]);
1170     Symbol& Result = *rop.opargsym (op, 0);
1171     Symbol& Index = *rop.opargsym (op, 1);
1172     Symbol& Val = *rop.opargsym (op, 2);
1173 
1174     llvm::Value *c = rop.llvm_load_value(Index);
1175     if (rop.inst()->master()->range_checking()) {
1176         if (! (Index.is_constant() &&  *(int *)Index.data() >= 0 &&
1177                *(int *)Index.data() < 3)) {
1178             llvm::Value *args[] = { c, rop.ll.constant(3),
1179                                     rop.ll.constant(Result.unmangled()),
1180                                     rop.sg_void_ptr(),
1181                                     rop.ll.constant(op.sourcefile()),
1182                                     rop.ll.constant(op.sourceline()),
1183                                     rop.ll.constant(rop.group().name()),
1184                                     rop.ll.constant(rop.layer()),
1185                                     rop.ll.constant(rop.inst()->layername()),
1186                                     rop.ll.constant(rop.inst()->shadername()) };
1187             c = rop.ll.call_function ("osl_range_check", args);
1188         }
1189     }
1190 
1191     for (int d = 0;  d < 3;  ++d) {  // deriv
1192         llvm::Value *val = rop.llvm_load_value (Val, d, 0, TypeDesc::TypeFloat);
1193         if (Index.is_constant()) {
1194             int i = *(int*)Index.data();
1195             i = Imath::clamp (i, 0, 2);
1196             rop.llvm_store_value (val, Result, d, i);
1197         } else {
1198             rop.llvm_store_component_value (val, Result, d, c);
1199         }
1200         if (! Result.has_derivs())  // skip the derivs if we don't need them
1201             break;
1202     }
1203     return true;
1204 }
1205 
1206 
1207 
1208 // Matrix component reference
LLVMGEN(llvm_gen_mxcompref)1209 LLVMGEN (llvm_gen_mxcompref)
1210 {
1211     Opcode &op (rop.inst()->ops()[opnum]);
1212     Symbol& Result = *rop.opargsym (op, 0);
1213     Symbol& M = *rop.opargsym (op, 1);
1214     Symbol& Row = *rop.opargsym (op, 2);
1215     Symbol& Col = *rop.opargsym (op, 3);
1216 
1217     llvm::Value *row = rop.llvm_load_value (Row);
1218     llvm::Value *col = rop.llvm_load_value (Col);
1219     if (rop.inst()->master()->range_checking()) {
1220         if (! (Row.is_constant() && Col.is_constant() &&
1221                *(int *)Row.data() >= 0 && *(int *)Row.data() < 4 &&
1222                *(int *)Col.data() >= 0 && *(int *)Col.data() < 4)) {
1223             llvm::Value *args[] = { row, rop.ll.constant(4),
1224                                     rop.ll.constant(M.name()),
1225                                     rop.sg_void_ptr(),
1226                                     rop.ll.constant(op.sourcefile()),
1227                                     rop.ll.constant(op.sourceline()),
1228                                     rop.ll.constant(rop.group().name()),
1229                                     rop.ll.constant(rop.layer()),
1230                                     rop.ll.constant(rop.inst()->layername()),
1231                                     rop.ll.constant(rop.inst()->shadername()) };
1232             if (! (Row.is_constant() &&
1233                    *(int *)Row.data() >= 0 && *(int *)Row.data() < 4)) {
1234                 row = rop.ll.call_function ("osl_range_check", args);
1235             }
1236             if (! (Col.is_constant() &&
1237                    *(int *)Col.data() >= 0 && *(int *)Col.data() < 4)) {
1238                 args[0] = col;
1239                 col = rop.ll.call_function ("osl_range_check", args);
1240             }
1241         }
1242     }
1243 
1244     llvm::Value *val = NULL;
1245     if (Row.is_constant() && Col.is_constant()) {
1246         int r = Imath::clamp (((int*)Row.data())[0], 0, 3);
1247         int c = Imath::clamp (((int*)Col.data())[0], 0, 3);
1248         int comp = 4 * r + c;
1249         val = rop.llvm_load_value (M, 0, comp);
1250     } else {
1251         llvm::Value *comp = rop.ll.op_mul (row, rop.ll.constant(4));
1252         comp = rop.ll.op_add (comp, col);
1253         val = rop.llvm_load_component_value (M, 0, comp);
1254     }
1255     rop.llvm_store_value (val, Result);
1256     rop.llvm_zero_derivs (Result);
1257 
1258     return true;
1259 }
1260 
1261 
1262 
1263 // Matrix component assignment
LLVMGEN(llvm_gen_mxcompassign)1264 LLVMGEN (llvm_gen_mxcompassign)
1265 {
1266     Opcode &op (rop.inst()->ops()[opnum]);
1267     Symbol& Result = *rop.opargsym (op, 0);
1268     Symbol& Row = *rop.opargsym (op, 1);
1269     Symbol& Col = *rop.opargsym (op, 2);
1270     Symbol& Val = *rop.opargsym (op, 3);
1271 
1272     llvm::Value *row = rop.llvm_load_value (Row);
1273     llvm::Value *col = rop.llvm_load_value (Col);
1274     if (rop.inst()->master()->range_checking()) {
1275         if (! (Row.is_constant() && Col.is_constant() &&
1276                *(int *)Row.data() >= 0 && *(int *)Row.data() < 4 &&
1277                *(int *)Col.data() >= 0 && *(int *)Col.data() < 4)) {
1278             llvm::Value *args[] = { row, rop.ll.constant(4),
1279                                     rop.ll.constant(Result.name()),
1280                                     rop.sg_void_ptr(),
1281                                     rop.ll.constant(op.sourcefile()),
1282                                     rop.ll.constant(op.sourceline()),
1283                                     rop.ll.constant(rop.group().name()),
1284                                     rop.ll.constant(rop.layer()),
1285                                     rop.ll.constant(rop.inst()->layername()),
1286                                     rop.ll.constant(rop.inst()->shadername()) };
1287             if (! (Row.is_constant() &&
1288                    *(int *)Row.data() >= 0 && *(int *)Row.data() < 4)) {
1289                 row = rop.ll.call_function ("osl_range_check", args);
1290             }
1291             if (! (Col.is_constant() &&
1292                    *(int *)Col.data() >= 0 && *(int *)Col.data() < 4)) {
1293                 args[0] = col;
1294                 col = rop.ll.call_function ("osl_range_check", args);
1295             }
1296         }
1297     }
1298 
1299     llvm::Value *val = rop.llvm_load_value (Val, 0, 0, TypeDesc::TypeFloat);
1300 
1301     if (Row.is_constant() && Col.is_constant()) {
1302         int r = Imath::clamp (((int*)Row.data())[0], 0, 3);
1303         int c = Imath::clamp (((int*)Col.data())[0], 0, 3);
1304         int comp = 4 * r + c;
1305         rop.llvm_store_value (val, Result, 0, comp);
1306     } else {
1307         llvm::Value *comp = rop.ll.op_mul (row, rop.ll.constant(4));
1308         comp = rop.ll.op_add (comp, col);
1309         rop.llvm_store_component_value (val, Result, 0, comp);
1310     }
1311     return true;
1312 }
1313 
1314 
1315 
1316 // Array length
LLVMGEN(llvm_gen_arraylength)1317 LLVMGEN (llvm_gen_arraylength)
1318 {
1319     Opcode &op (rop.inst()->ops()[opnum]);
1320     Symbol& Result = *rop.opargsym (op, 0);
1321     Symbol& A = *rop.opargsym (op, 1);
1322     OSL_DASSERT(Result.typespec().is_int() && A.typespec().is_array());
1323 
1324     int len = A.typespec().is_unsized_array() ? A.initializers()
1325                                               : A.typespec().arraylength();
1326     rop.llvm_store_value (rop.ll.constant(len), Result);
1327     return true;
1328 }
1329 
1330 
1331 
1332 // Array reference
LLVMGEN(llvm_gen_aref)1333 LLVMGEN (llvm_gen_aref)
1334 {
1335     Opcode &op (rop.inst()->ops()[opnum]);
1336     Symbol& Result = *rop.opargsym (op, 0);
1337     Symbol& Src = *rop.opargsym (op, 1);
1338     Symbol& Index = *rop.opargsym (op, 2);
1339 
1340     // Get array index we're interested in
1341     llvm::Value *index = rop.loadLLVMValue (Index);
1342     if (! index)
1343         return false;
1344     if (rop.inst()->master()->range_checking()) {
1345         if (! (Index.is_constant() &&  *(int *)Index.data() >= 0 &&
1346                *(int *)Index.data() < Src.typespec().arraylength())) {
1347             llvm::Value *args[] = { index,
1348                                     rop.ll.constant(Src.typespec().arraylength()),
1349                                     rop.ll.constant(Src.unmangled()),
1350                                     rop.sg_void_ptr(),
1351                                     rop.ll.constant(op.sourcefile()),
1352                                     rop.ll.constant(op.sourceline()),
1353                                     rop.ll.constant(rop.group().name()),
1354                                     rop.ll.constant(rop.layer()),
1355                                     rop.ll.constant(rop.inst()->layername()),
1356                                     rop.ll.constant(rop.inst()->shadername()) };
1357             index = rop.ll.call_function ("osl_range_check", args);
1358         }
1359     }
1360 
1361     int num_components = Src.typespec().simpletype().aggregate;
1362     for (int d = 0;  d <= 2;  ++d) {
1363         for (int c = 0;  c < num_components;  ++c) {
1364             llvm::Value *val = rop.llvm_load_value (Src, d, index, c);
1365             rop.storeLLVMValue (val, Result, c, d);
1366         }
1367         if (! Result.has_derivs())
1368             break;
1369     }
1370 
1371     return true;
1372 }
1373 
1374 
1375 
1376 // Array assignment
LLVMGEN(llvm_gen_aassign)1377 LLVMGEN (llvm_gen_aassign)
1378 {
1379     Opcode &op (rop.inst()->ops()[opnum]);
1380     Symbol& Result = *rop.opargsym (op, 0);
1381     Symbol& Index = *rop.opargsym (op, 1);
1382     Symbol& Src = *rop.opargsym (op, 2);
1383 
1384     // Get array index we're interested in
1385     llvm::Value *index = rop.loadLLVMValue (Index);
1386     if (! index)
1387         return false;
1388     if (rop.inst()->master()->range_checking()) {
1389         if (! (Index.is_constant() &&  *(int *)Index.data() >= 0 &&
1390                *(int *)Index.data() < Result.typespec().arraylength())) {
1391             llvm::Value *args[] = { index,
1392                                     rop.ll.constant(Result.typespec().arraylength()),
1393                                     rop.ll.constant(Result.unmangled()),
1394                                     rop.sg_void_ptr(),
1395                                     rop.ll.constant(op.sourcefile()),
1396                                     rop.ll.constant(op.sourceline()),
1397                                     rop.ll.constant(rop.group().name()),
1398                                     rop.ll.constant(rop.layer()),
1399                                     rop.ll.constant(rop.inst()->layername()),
1400                                     rop.ll.constant(rop.inst()->shadername()) };
1401             index = rop.ll.call_function ("osl_range_check", args);
1402         }
1403     }
1404 
1405     int num_components = Result.typespec().simpletype().aggregate;
1406 
1407     // Allow float <=> int casting
1408     TypeDesc cast;
1409     if (num_components == 1 && !Result.typespec().is_closure() && !Src.typespec().is_closure() &&
1410         (Result.typespec().is_int_based() ||  Result.typespec().is_float_based()) &&
1411         (Src.typespec().is_int_based() ||  Src.typespec().is_float_based())) {
1412         cast = Result.typespec().simpletype();
1413         cast.arraylen = 0;
1414     } else {
1415         // Try to warn before llvm_fatal_error is called which provides little
1416         // context as to what went wrong.
1417         OSL_ASSERT (Result.typespec().simpletype().basetype ==
1418                     Src.typespec().simpletype().basetype);
1419     }
1420 
1421     for (int d = 0;  d <= 2;  ++d) {
1422         for (int c = 0;  c < num_components;  ++c) {
1423             llvm::Value *val = rop.loadLLVMValue (Src, c, d, cast);
1424             rop.llvm_store_value (val, Result, d, index, c);
1425         }
1426         if (! Result.has_derivs())
1427             break;
1428     }
1429 
1430     return true;
1431 }
1432 
1433 
1434 
1435 // Construct color, optionally with a color transformation from a named
1436 // color space.
LLVMGEN(llvm_gen_construct_color)1437 LLVMGEN (llvm_gen_construct_color)
1438 {
1439     Opcode &op (rop.inst()->ops()[opnum]);
1440     Symbol& Result = *rop.opargsym (op, 0);
1441     bool using_space = (op.nargs() == 5);
1442     Symbol& Space = *rop.opargsym (op, 1);
1443     OSL_MAYBE_UNUSED Symbol& X = *rop.opargsym (op, 1+using_space);
1444     OSL_MAYBE_UNUSED Symbol& Y = *rop.opargsym (op, 2+using_space);
1445     OSL_MAYBE_UNUSED Symbol& Z = *rop.opargsym (op, 3+using_space);
1446     OSL_DASSERT (Result.typespec().is_triple() && X.typespec().is_float() &&
1447                  Y.typespec().is_float() && Z.typespec().is_float() &&
1448                  (using_space == false || Space.typespec().is_string()));
1449 
1450     // First, copy the floats into the vector
1451     int dmax = Result.has_derivs() ? 3 : 1;
1452     for (int d = 0;  d < dmax;  ++d) {  // loop over derivs
1453         for (int c = 0;  c < 3;  ++c) {  // loop over components
1454             const Symbol& comp = *rop.opargsym (op, c+1+using_space);
1455             llvm::Value* val = rop.llvm_load_value (comp, d, NULL, 0, TypeDesc::TypeFloat);
1456             rop.llvm_store_value (val, Result, d, NULL, c);
1457         }
1458     }
1459 
1460     // Do the color space conversion in-place, if called for
1461     if (using_space) {
1462         llvm::Value *args[] = {
1463                 rop.sg_void_ptr(),  // shader globals
1464                 rop.llvm_void_ptr(Result, 0),  // color
1465                 rop.llvm_load_string(Space), // from
1466         };
1467         rop.ll.call_function ("osl_prepend_color_from", args);
1468         // FIXME(deriv): Punt on derivs for color ctrs with space names.
1469         // We should try to do this right, but we never had it right for
1470         // the interpreter, to it's probably not an emergency.
1471         if (Result.has_derivs())
1472             rop.llvm_zero_derivs (Result);
1473     }
1474 
1475     return true;
1476 }
1477 
1478 
1479 
1480 // Construct spatial triple (point, vector, normal), optionally with a
1481 // transformation from a named coordinate system.
LLVMGEN(llvm_gen_construct_triple)1482 LLVMGEN (llvm_gen_construct_triple)
1483 {
1484     Opcode &op (rop.inst()->ops()[opnum]);
1485     Symbol& Result = *rop.opargsym (op, 0);
1486     bool using_space = (op.nargs() == 5);
1487     Symbol& Space = *rop.opargsym (op, 1);
1488     OSL_MAYBE_UNUSED Symbol& X = *rop.opargsym (op, 1+using_space);
1489     OSL_MAYBE_UNUSED Symbol& Y = *rop.opargsym (op, 2+using_space);
1490     OSL_MAYBE_UNUSED Symbol& Z = *rop.opargsym (op, 3+using_space);
1491     OSL_DASSERT (Result.typespec().is_triple() && X.typespec().is_float() &&
1492                  Y.typespec().is_float() && Z.typespec().is_float() &&
1493                  (using_space == false || Space.typespec().is_string()));
1494 
1495     // First, copy the floats into the vector
1496     int dmax = Result.has_derivs() ? 3 : 1;
1497     for (int d = 0;  d < dmax;  ++d) {  // loop over derivs
1498         for (int c = 0;  c < 3;  ++c) {  // loop over components
1499             const Symbol& comp = *rop.opargsym (op, c+1+using_space);
1500             llvm::Value* val = rop.llvm_load_value (comp, d, NULL, 0, TypeDesc::TypeFloat);
1501             rop.llvm_store_value (val, Result, d, NULL, c);
1502         }
1503     }
1504 
1505     // Do the transformation in-place, if called for
1506     if (using_space) {
1507         ustring from, to;  // N.B. initialize to empty strings
1508         if (Space.is_constant()) {
1509             from = *(ustring *)Space.data();
1510             if (from == Strings::common ||
1511                 from == rop.shadingsys().commonspace_synonym())
1512                 return true;  // no transformation necessary
1513         }
1514         TypeDesc::VECSEMANTICS vectype = TypeDesc::POINT;
1515         if (op.opname() == "vector")
1516             vectype = TypeDesc::VECTOR;
1517         else if (op.opname() == "normal")
1518             vectype = TypeDesc::NORMAL;
1519         llvm::Value *args[] = { rop.sg_void_ptr(),
1520             rop.llvm_void_ptr(Result), rop.ll.constant(Result.has_derivs()),
1521             rop.llvm_void_ptr(Result), rop.ll.constant(Result.has_derivs()),
1522             rop.llvm_load_value(Space), rop.ll.constant(Strings::common),
1523             rop.ll.constant((int)vectype) };
1524         RendererServices *rend (rop.shadingsys().renderer());
1525         if (rend->transform_points (NULL, from, to, 0.0f, NULL, NULL, 0, vectype)) {
1526             // renderer potentially knows about a nonlinear transformation.
1527             // Note that for the case of non-constant strings, passing empty
1528             // from & to will make transform_points just tell us if ANY
1529             // nonlinear transformations potentially are supported.
1530             rop.ll.call_function ("osl_transform_triple_nonlinear", args);
1531         } else {
1532             // definitely not a nonlinear transformation
1533             rop.ll.call_function ("osl_transform_triple", args);
1534         }
1535     }
1536 
1537     return true;
1538 }
1539 
1540 
1541 
1542 /// matrix constructor.  Comes in several varieties:
1543 ///    matrix (float)
1544 ///    matrix (space, float)
1545 ///    matrix (...16 floats...)
1546 ///    matrix (space, ...16 floats...)
1547 ///    matrix (fromspace, tospace)
LLVMGEN(llvm_gen_matrix)1548 LLVMGEN (llvm_gen_matrix)
1549 {
1550     Opcode &op (rop.inst()->ops()[opnum]);
1551     Symbol& Result = *rop.opargsym (op, 0);
1552     int nargs = op.nargs();
1553     bool using_space = (nargs == 3 || nargs == 18);
1554     bool using_two_spaces = (nargs == 3 && rop.opargsym(op,2)->typespec().is_string());
1555     int nfloats = nargs - 1 - (int)using_space;
1556     OSL_DASSERT (nargs == 2 || nargs == 3 || nargs == 17 || nargs == 18);
1557 
1558     if (using_two_spaces) {
1559         llvm::Value *args[] = {
1560                 rop.sg_void_ptr(),  // shader globals
1561                 rop.llvm_void_ptr(Result),  // result
1562                 rop.llvm_load_value(*rop.opargsym (op, 1)),  // from
1563                 rop.llvm_load_value(*rop.opargsym (op, 2)),  // to
1564         };
1565         rop.ll.call_function ("osl_get_from_to_matrix", args);
1566     } else {
1567         if (nfloats == 1) {
1568             for (int i = 0; i < 16; i++) {
1569                 llvm::Value* src_val = ((i%4) == (i/4))
1570                     ? rop.llvm_load_value (*rop.opargsym(op,1+using_space))
1571                     : rop.ll.constant(0.0f);
1572                 rop.llvm_store_value (src_val, Result, 0, i);
1573             }
1574         } else if (nfloats == 16) {
1575             for (int i = 0; i < 16; i++) {
1576                 llvm::Value* src_val = rop.llvm_load_value (*rop.opargsym(op,i+1+using_space));
1577                 rop.llvm_store_value (src_val, Result, 0, i);
1578             }
1579         } else {
1580             OSL_ASSERT (0);
1581         }
1582         if (using_space) {
1583             llvm::Value *args[] = {
1584                 rop.sg_void_ptr(),  // shader globals
1585                 rop.llvm_void_ptr(Result),  // result
1586                 rop.llvm_load_value(*rop.opargsym (op, 1)),  // from
1587             };
1588             rop.ll.call_function ("osl_prepend_matrix_from", args);
1589         }
1590     }
1591     if (Result.has_derivs())
1592         rop.llvm_zero_derivs (Result);
1593     return true;
1594 }
1595 
1596 
1597 
1598 /// int getmatrix (fromspace, tospace, M)
LLVMGEN(llvm_gen_getmatrix)1599 LLVMGEN (llvm_gen_getmatrix)
1600 {
1601     Opcode &op (rop.inst()->ops()[opnum]);
1602     OSL_DASSERT (op.nargs() == 4);
1603     Symbol& Result = *rop.opargsym (op, 0);
1604     Symbol& From = *rop.opargsym (op, 1);
1605     Symbol& To = *rop.opargsym (op, 2);
1606     Symbol& M = *rop.opargsym (op, 3);
1607 
1608     llvm::Value *args[] = {
1609         rop.sg_void_ptr(),  // shader globals
1610         rop.llvm_void_ptr(M),  // matrix result
1611         rop.llvm_load_value(From),
1612         rop.llvm_load_value(To),
1613     };
1614     llvm::Value *result = rop.ll.call_function ("osl_get_from_to_matrix", args);
1615     rop.llvm_store_value (result, Result);
1616     rop.llvm_zero_derivs (M);
1617     return true;
1618 }
1619 
1620 
1621 
1622 // transform{,v,n} (string tospace, triple p)
1623 // transform{,v,n} (string fromspace, string tospace, triple p)
1624 // transform{,v,n} (matrix, triple p)
LLVMGEN(llvm_gen_transform)1625 LLVMGEN (llvm_gen_transform)
1626 {
1627     Opcode &op (rop.inst()->ops()[opnum]);
1628     int nargs = op.nargs();
1629     Symbol *Result = rop.opargsym (op, 0);
1630     Symbol *From = (nargs == 3) ? NULL : rop.opargsym (op, 1);
1631     Symbol *To = rop.opargsym (op, (nargs == 3) ? 1 : 2);
1632     Symbol *P = rop.opargsym (op, (nargs == 3) ? 2 : 3);
1633 
1634     if (To->typespec().is_matrix()) {
1635         // llvm_ops has the matrix version already implemented
1636         llvm_gen_generic (rop, opnum);
1637         return true;
1638     }
1639 
1640     // Named space versions from here on out.
1641     ustring from, to;  // N.B.: initialize to empty strings
1642     if ((From == NULL || From->is_constant()) && To->is_constant()) {
1643         // We can know all the space names at this time
1644         from = From ? *((ustring *)From->data()) : Strings::common;
1645         to = *((ustring *)To->data());
1646         ustring syn = rop.shadingsys().commonspace_synonym();
1647         if (from == syn)
1648             from = Strings::common;
1649         if (to == syn)
1650             to = Strings::common;
1651         if (from == to) {
1652             // An identity transformation, just copy
1653             if (Result != P) // don't bother in-place copy
1654                 rop.llvm_assign_impl (*Result, *P);
1655             return true;
1656         }
1657     }
1658     TypeDesc::VECSEMANTICS vectype = TypeDesc::POINT;
1659     if (op.opname() == "transformv")
1660         vectype = TypeDesc::VECTOR;
1661     else if (op.opname() == "transformn")
1662         vectype = TypeDesc::NORMAL;
1663     llvm::Value *args[] = { rop.sg_void_ptr(),
1664         rop.llvm_void_ptr(*P), rop.ll.constant(P->has_derivs()),
1665         rop.llvm_void_ptr(*Result), rop.ll.constant(Result->has_derivs()),
1666         rop.llvm_load_value(*From), rop.llvm_load_value(*To),
1667         rop.ll.constant((int)vectype) };
1668     RendererServices *rend (rop.shadingsys().renderer());
1669     if (rend->transform_points (NULL, from, to, 0.0f, NULL, NULL, 0, vectype)) {
1670         // renderer potentially knows about a nonlinear transformation.
1671         // Note that for the case of non-constant strings, passing empty
1672         // from & to will make transform_points just tell us if ANY
1673         // nonlinear transformations potentially are supported.
1674         rop.ll.call_function ("osl_transform_triple_nonlinear", args);
1675     } else {
1676         // definitely not a nonlinear transformation
1677         rop.ll.call_function ("osl_transform_triple", args);
1678     }
1679     return true;
1680 }
1681 
1682 
1683 
1684 // transformc (string fromspace, string tospace, color p)
LLVMGEN(llvm_gen_transformc)1685 LLVMGEN (llvm_gen_transformc)
1686 {
1687     Opcode &op (rop.inst()->ops()[opnum]);
1688     OSL_DASSERT (op.nargs() == 4);
1689     Symbol *Result = rop.opargsym (op, 0);
1690     Symbol *From = rop.opargsym (op, 1);
1691     Symbol *To = rop.opargsym (op, 2);
1692     Symbol *C = rop.opargsym (op, 3);
1693 
1694     llvm::Value *args[] = { rop.sg_void_ptr(),
1695         rop.llvm_void_ptr(*C), rop.ll.constant(C->has_derivs()),
1696         rop.llvm_void_ptr(*Result), rop.ll.constant(Result->has_derivs()),
1697         rop.llvm_load_string (*From), rop.llvm_load_string (*To)
1698     };
1699 
1700     rop.ll.call_function ("osl_transformc", args);
1701     return true;
1702 }
1703 
1704 
1705 
1706 // Derivs
LLVMGEN(llvm_gen_DxDy)1707 LLVMGEN (llvm_gen_DxDy)
1708 {
1709     Opcode &op (rop.inst()->ops()[opnum]);
1710     Symbol& Result (*rop.opargsym (op, 0));
1711     Symbol& Src (*rop.opargsym (op, 1));
1712     int deriv = (op.opname() == "Dx") ? 1 : 2;
1713 
1714     for (int i = 0; i < Result.typespec().aggregate(); ++i) {
1715         llvm::Value* src_val = rop.llvm_load_value (Src, deriv, i);
1716         rop.storeLLVMValue (src_val, Result, i, 0);
1717     }
1718 
1719     // Don't have 2nd order derivs
1720     rop.llvm_zero_derivs (Result);
1721     return true;
1722 }
1723 
1724 
1725 
1726 // Dz
LLVMGEN(llvm_gen_Dz)1727 LLVMGEN (llvm_gen_Dz)
1728 {
1729     Opcode &op (rop.inst()->ops()[opnum]);
1730     Symbol& Result (*rop.opargsym (op, 0));
1731     Symbol& Src (*rop.opargsym (op, 1));
1732 
1733     if (&Src == rop.inst()->symbol(rop.inst()->Psym())) {
1734         // dPdz -- the only Dz we know how to take
1735         int deriv = 3;
1736         for (int i = 0; i < Result.typespec().aggregate(); ++i) {
1737             llvm::Value* src_val = rop.llvm_load_value (Src, deriv, i);
1738             rop.storeLLVMValue (src_val, Result, i, 0);
1739         }
1740         // Don't have 2nd order derivs
1741         rop.llvm_zero_derivs (Result);
1742     } else {
1743         // Punt, everything else for now returns 0 for Dz
1744         // FIXME?
1745         rop.llvm_assign_zero (Result);
1746     }
1747     return true;
1748 }
1749 
1750 
1751 
LLVMGEN(llvm_gen_filterwidth)1752 LLVMGEN (llvm_gen_filterwidth)
1753 {
1754     Opcode &op (rop.inst()->ops()[opnum]);
1755     Symbol& Result (*rop.opargsym (op, 0));
1756     Symbol& Src (*rop.opargsym (op, 1));
1757 
1758     OSL_DASSERT (Src.typespec().is_float() || Src.typespec().is_triple());
1759     if (Src.has_derivs()) {
1760         if (Src.typespec().is_float()) {
1761             llvm::Value *r = rop.ll.call_function ("osl_filterwidth_fdf",
1762                                                      rop.llvm_void_ptr (Src));
1763             rop.llvm_store_value (r, Result);
1764         } else {
1765             rop.ll.call_function ("osl_filterwidth_vdv",
1766                                     rop.llvm_void_ptr (Result),
1767                                     rop.llvm_void_ptr (Src));
1768         }
1769         // Don't have 2nd order derivs
1770         rop.llvm_zero_derivs (Result);
1771     } else {
1772         // No derivs to be had
1773         rop.llvm_assign_zero (Result);
1774     }
1775 
1776     return true;
1777 }
1778 
1779 
1780 
1781 // Comparison ops
LLVMGEN(llvm_gen_compare_op)1782 LLVMGEN (llvm_gen_compare_op)
1783 {
1784     Opcode &op (rop.inst()->ops()[opnum]);
1785     Symbol &Result (*rop.opargsym (op, 0));
1786     Symbol &A (*rop.opargsym (op, 1));
1787     Symbol &B (*rop.opargsym (op, 2));
1788     OSL_DASSERT (Result.typespec().is_int() && ! Result.has_derivs());
1789 
1790     if (A.typespec().is_closure()) {
1791         OSL_ASSERT (B.typespec().is_int() &&
1792                     "Only closure==0 and closure!=0 allowed");
1793         llvm::Value *a = rop.llvm_load_value (A);
1794         llvm::Value *b = rop.ll.void_ptr_null ();
1795         llvm::Value *r = (op.opname()==op_eq) ? rop.ll.op_eq(a,b)
1796                                               : rop.ll.op_ne(a,b);
1797         // Convert the single bit bool into an int
1798         r = rop.ll.op_bool_to_int (r);
1799         rop.llvm_store_value (r, Result);
1800         return true;
1801     }
1802 
1803     int num_components = std::max (A.typespec().aggregate(), B.typespec().aggregate());
1804     bool float_based = A.typespec().is_float_based() || B.typespec().is_float_based();
1805     TypeDesc cast (float_based ? TypeDesc::FLOAT : TypeDesc::UNKNOWN);
1806 
1807     llvm::Value* final_result = 0;
1808     ustring opname = op.opname();
1809 
1810     if (rop.use_optix() && A.typespec().is_string()) {
1811         OSL_DASSERT (B.typespec().is_string()
1812                      && "Only string-to-string comparison is supported");
1813 
1814         llvm::Value* a = rop.llvm_load_device_string (A, /*follow*/ true);
1815         llvm::Value* b = rop.llvm_load_device_string (B, /*follow*/ true);
1816 
1817         if (opname == op_eq) {
1818             final_result = rop.ll.op_eq (a, b);
1819         } else if (opname == op_neq) {
1820             final_result = rop.ll.op_ne (a, b);
1821         } else {
1822             // Don't know how to handle this.
1823             OSL_ASSERT (0 && "OptiX only supports equality testing for strings");
1824         }
1825         OSL_ASSERT (final_result);
1826 
1827         final_result = rop.ll.op_bool_to_int (final_result);
1828         rop.storeLLVMValue (final_result, Result, 0, 0);
1829         return true;
1830     }
1831 
1832     for (int i = 0; i < num_components; i++) {
1833         // Get A&B component i -- note that these correctly handle mixed
1834         // scalar/triple comparisons as well as int->float casts as needed.
1835         llvm::Value* a = rop.loadLLVMValue (A, i, 0, cast);
1836         llvm::Value* b = rop.loadLLVMValue (B, i, 0, cast);
1837 
1838         // Trickery for mixed matrix/scalar comparisons -- compare
1839         // on-diagonal to the scalar, off-diagonal to zero
1840         if (A.typespec().is_matrix() && !B.typespec().is_matrix()) {
1841             if ((i/4) != (i%4))
1842                 b = rop.ll.constant (0.0f);
1843         }
1844         if (! A.typespec().is_matrix() && B.typespec().is_matrix()) {
1845             if ((i/4) != (i%4))
1846                 a = rop.ll.constant (0.0f);
1847         }
1848 
1849         // Perform the op
1850         llvm::Value* result = 0;
1851         if (opname == op_lt) {
1852             result = rop.ll.op_lt (a, b);
1853         } else if (opname == op_le) {
1854             result = rop.ll.op_le (a, b);
1855         } else if (opname == op_eq) {
1856             result = rop.ll.op_eq (a, b);
1857         } else if (opname == op_ge) {
1858             result = rop.ll.op_ge (a, b);
1859         } else if (opname == op_gt) {
1860             result = rop.ll.op_gt (a, b);
1861         } else if (opname == op_neq) {
1862             result = rop.ll.op_ne (a, b);
1863         } else {
1864             // Don't know how to handle this.
1865             OSL_ASSERT (0 && "Comparison error");
1866         }
1867         OSL_DASSERT (result);
1868 
1869         if (final_result) {
1870             // Combine the component bool based on the op
1871             if (opname != op_neq)        // final_result &= result
1872                 final_result = rop.ll.op_and (final_result, result);
1873             else                         // final_result |= result
1874                 final_result = rop.ll.op_or (final_result, result);
1875         } else {
1876             final_result = result;
1877         }
1878     }
1879     OSL_ASSERT (final_result);
1880 
1881     // Convert the single bit bool into an int for now.
1882     final_result = rop.ll.op_bool_to_int (final_result);
1883     rop.storeLLVMValue (final_result, Result, 0, 0);
1884     return true;
1885 }
1886 
1887 
1888 
1889 // int regex_search (string subject, string pattern)
1890 // int regex_search (string subject, int results[], string pattern)
1891 // int regex_match (string subject, string pattern)
1892 // int regex_match (string subject, int results[], string pattern)
LLVMGEN(llvm_gen_regex)1893 LLVMGEN (llvm_gen_regex)
1894 {
1895     Opcode &op (rop.inst()->ops()[opnum]);
1896     int nargs = op.nargs();
1897     OSL_DASSERT (nargs == 3 || nargs == 4);
1898     Symbol &Result (*rop.opargsym (op, 0));
1899     Symbol &Subject (*rop.opargsym (op, 1));
1900     bool do_match_results = (nargs == 4);
1901     bool fullmatch = (op.opname() == "regex_match");
1902     Symbol &Match (*rop.opargsym (op, 2));
1903     Symbol &Pattern (*rop.opargsym (op, 2+do_match_results));
1904     OSL_DASSERT (Result.typespec().is_int() && Subject.typespec().is_string() &&
1905                  Pattern.typespec().is_string());
1906     OSL_DASSERT (!do_match_results ||
1907                  (Match.typespec().is_array() &&
1908                   Match.typespec().elementtype().is_int()));
1909 
1910     llvm::Value* call_args[] = {
1911         rop.sg_void_ptr(),              // First arg is ShaderGlobals ptr
1912         rop.llvm_load_value (Subject),  // Next arg is subject string
1913         rop.llvm_void_ptr(Match),       // Pass the results array and length (just pass 0 if no results wanted).
1914         do_match_results ?
1915             rop.ll.constant(Match.typespec().arraylength()) :
1916             rop.ll.constant(0),
1917         rop.llvm_load_value (Pattern),  // Pass the regex match pattern
1918         rop.ll.constant(fullmatch),     // Pass whether or not to do the full match
1919     };
1920     llvm::Value *ret = rop.ll.call_function ("osl_regex_impl", call_args);
1921     rop.llvm_store_value (ret, Result);
1922     return true;
1923 }
1924 
1925 
1926 
1927 // Generic llvm code generation.  See the comments in llvm_ops.cpp for
1928 // the full list of assumptions and conventions.  But in short:
1929 //   1. All polymorphic and derivative cases implemented as functions in
1930 //      llvm_ops.cpp -- no custom IR is needed.
1931 //   2. Naming conention is: osl_NAME_{args}, where args is the
1932 //      concatenation of type codes for all args including return value --
1933 //      f/i/v/m/s for float/int/triple/matrix/string, and df/dv/dm for
1934 //      duals.
1935 //   3. The function returns scalars as an actual return value (that
1936 //      must be stored), but "returns" aggregates or duals in the first
1937 //      argument.
1938 //   4. Duals and aggregates are passed as void*'s, float/int/string
1939 //      passed by value.
1940 //   5. Note that this only works if triples are all treated identically,
1941 //      this routine can't be used if it must be polymorphic based on
1942 //      color, point, vector, normal differences.
1943 //
LLVMGEN(llvm_gen_generic)1944 LLVMGEN (llvm_gen_generic)
1945 {
1946     // most invocations of this function will only need a handful of args
1947     // so avoid dynamic allocation where possible
1948     constexpr int SHORT_NUM_ARGS = 16;
1949     const Symbol* short_args[SHORT_NUM_ARGS];
1950     std::vector<const Symbol*> long_args;
1951     Opcode &op (rop.inst()->ops()[opnum]);
1952     const Symbol** args = short_args;
1953     if (op.nargs() > SHORT_NUM_ARGS) {
1954         long_args.resize(op.nargs());
1955         args = long_args.data();
1956     }
1957     Symbol& Result  = *rop.opargsym (op, 0);
1958     bool any_deriv_args = false;
1959     for (int i = 0;  i < op.nargs();  ++i) {
1960         Symbol *s (rop.opargsym (op, i));
1961         args[i] = s;
1962         any_deriv_args |= (i > 0 && s->has_derivs() && !s->typespec().is_matrix());
1963     }
1964 
1965     // Special cases: functions that have no derivs -- suppress them
1966     if (any_deriv_args)
1967         if (op.opname() == op_logb  ||
1968             op.opname() == op_floor || op.opname() == op_ceil ||
1969             op.opname() == op_round || op.opname() == op_step ||
1970             op.opname() == op_trunc ||
1971             op.opname() == op_sign)
1972             any_deriv_args = false;
1973 
1974     std::string name = std::string("osl_") + op.opname().string() + "_";
1975     for (int i = 0;  i < op.nargs();  ++i) {
1976         Symbol *s (rop.opargsym (op, i));
1977         if (any_deriv_args && Result.has_derivs() && s->has_derivs() && !s->typespec().is_matrix())
1978             name += "d";
1979         if (s->typespec().is_float())
1980             name += "f";
1981         else if (s->typespec().is_triple())
1982             name += "v";
1983         else if (s->typespec().is_matrix())
1984             name += "m";
1985         else if (s->typespec().is_string())
1986             name += "s";
1987         else if (s->typespec().is_int())
1988             name += "i";
1989         else OSL_ASSERT (0);
1990     }
1991 
1992     if (! Result.has_derivs() || ! any_deriv_args) {
1993         // Don't compute derivs -- either not needed or not provided in args
1994         if (Result.typespec().aggregate() == TypeDesc::SCALAR) {
1995             llvm::Value *r = rop.llvm_call_function (name.c_str(), cspan<const Symbol*>(args + 1, op.nargs() - 1));
1996             rop.llvm_store_value (r, Result);
1997         } else {
1998             rop.llvm_call_function (name.c_str(), cspan<const Symbol*>(args, op.nargs()));
1999         }
2000         rop.llvm_zero_derivs (Result);
2001     } else {
2002         // Cases with derivs
2003         OSL_ASSERT (Result.has_derivs() && any_deriv_args);
2004         rop.llvm_call_function (name.c_str(),
2005                                 cspan<const Symbol*>(args, op.nargs()),
2006                                 true);
2007     }
2008     return true;
2009 }
2010 
2011 
2012 
LLVMGEN(llvm_gen_sincos)2013 LLVMGEN (llvm_gen_sincos)
2014 {
2015     Opcode &op (rop.inst()->ops()[opnum]);
2016     Symbol& Theta   = *rop.opargsym (op, 0);
2017     Symbol& Sin_out = *rop.opargsym (op, 1);
2018     Symbol& Cos_out = *rop.opargsym (op, 2);
2019     bool theta_deriv   = Theta.has_derivs();
2020     bool result_derivs = (Sin_out.has_derivs() || Cos_out.has_derivs());
2021 
2022     std::string name = std::string("osl_sincos_");
2023     for (int i = 0;  i < op.nargs();  ++i) {
2024         Symbol *s (rop.opargsym (op, i));
2025         if (s->has_derivs() && result_derivs  && theta_deriv)
2026             name += "d";
2027         if (s->typespec().is_float())
2028             name += "f";
2029         else if (s->typespec().is_triple())
2030             name += "v";
2031         else OSL_ASSERT (0);
2032     }
2033     // push back llvm arguments
2034     llvm::Value* valargs[] = {
2035         (theta_deriv && result_derivs) || Theta.typespec().is_triple() ?
2036             rop.llvm_void_ptr (Theta) :
2037             rop.llvm_load_value (Theta),
2038         rop.llvm_void_ptr (Sin_out),
2039         rop.llvm_void_ptr (Cos_out)
2040     };
2041     rop.ll.call_function (name.c_str(), valargs);
2042 
2043     // If the input angle didn't have derivatives, we would not have
2044     // called the version of sincos with derivs; however in that case we
2045     // need to clear the derivs of either of the outputs that has them.
2046     if (Sin_out.has_derivs() && !theta_deriv)
2047         rop.llvm_zero_derivs (Sin_out);
2048     if (Cos_out.has_derivs() && !theta_deriv)
2049         rop.llvm_zero_derivs (Cos_out);
2050 
2051     return true;
2052 }
2053 
2054 
2055 
LLVMGEN(llvm_gen_andor)2056 LLVMGEN (llvm_gen_andor)
2057 {
2058     Opcode& op (rop.inst()->ops()[opnum]);
2059     Symbol& result = *rop.opargsym (op, 0);
2060     Symbol& a = *rop.opargsym (op, 1);
2061     Symbol& b = *rop.opargsym (op, 2);
2062 
2063     llvm::Value* i1_res = NULL;
2064     llvm::Value* a_val = rop.llvm_load_value (a, 0, 0, TypeDesc::TypeInt);
2065     llvm::Value* b_val = rop.llvm_load_value (b, 0, 0, TypeDesc::TypeInt);
2066     if (op.opname() == op_and) {
2067         // From the old bitcode generated
2068         // define i32 @osl_and_iii(i32 %a, i32 %b) nounwind readnone ssp {
2069         //     %1 = icmp ne i32 %b, 0
2070         //  %not. = icmp ne i32 %a, 0
2071         //     %2 = and i1 %1, %not.
2072         //     %3 = zext i1 %2 to i32
2073         //   ret i32 %3
2074         llvm::Value* b_ne_0 = rop.ll.op_ne (b_val, rop.ll.constant(0));
2075         llvm::Value* a_ne_0 = rop.ll.op_ne (a_val, rop.ll.constant(0));
2076         llvm::Value* both_ne_0 = rop.ll.op_and  (b_ne_0, a_ne_0);
2077         i1_res = both_ne_0;
2078     } else {
2079         // Also from the bitcode
2080         // %1 = or i32 %b, %a
2081         // %2 = icmp ne i32 %1, 0
2082         // %3 = zext i1 %2 to i32
2083         llvm::Value* or_ab = rop.ll.op_or(a_val, b_val);
2084         llvm::Value* or_ab_ne_0 = rop.ll.op_ne (or_ab, rop.ll.constant(0));
2085         i1_res = or_ab_ne_0;
2086     }
2087     llvm::Value* i32_res = rop.ll.op_bool_to_int(i1_res);
2088     rop.llvm_store_value(i32_res, result, 0, 0);
2089     return true;
2090 }
2091 
2092 
LLVMGEN(llvm_gen_if)2093 LLVMGEN (llvm_gen_if)
2094 {
2095     Opcode &op (rop.inst()->ops()[opnum]);
2096     Symbol& cond = *rop.opargsym (op, 0);
2097 
2098     // Load the condition variable and figure out if it's nonzero
2099     llvm::Value* cond_val = rop.llvm_test_nonzero (cond);
2100 
2101     // Branch on the condition, to our blocks
2102     llvm::BasicBlock* then_block = rop.ll.new_basic_block ("then");
2103     llvm::BasicBlock* else_block = rop.ll.new_basic_block ("else");
2104     llvm::BasicBlock* after_block = rop.ll.new_basic_block ("");
2105     rop.ll.op_branch (cond_val, then_block, else_block);
2106 
2107     // Then block
2108     rop.build_llvm_code (opnum+1, op.jump(0), then_block);
2109     rop.ll.op_branch (after_block);
2110 
2111     // Else block
2112     rop.build_llvm_code (op.jump(0), op.jump(1), else_block);
2113     rop.ll.op_branch (after_block);  // insert point is now after_block
2114 
2115     // Continue on with the previous flow
2116     return true;
2117 }
2118 
2119 
2120 
LLVMGEN(llvm_gen_loop_op)2121 LLVMGEN (llvm_gen_loop_op)
2122 {
2123     Opcode &op (rop.inst()->ops()[opnum]);
2124     Symbol& cond = *rop.opargsym (op, 0);
2125 
2126     // Branch on the condition, to our blocks
2127     llvm::BasicBlock* cond_block = rop.ll.new_basic_block ("cond");
2128     llvm::BasicBlock* body_block = rop.ll.new_basic_block ("body");
2129     llvm::BasicBlock* step_block = rop.ll.new_basic_block ("step");
2130     llvm::BasicBlock* after_block = rop.ll.new_basic_block ("");
2131     // Save the step and after block pointers for possible break/continue
2132     rop.ll.push_loop (step_block, after_block);
2133 
2134     // Initialization (will be empty except for "for" loops)
2135     rop.build_llvm_code (opnum+1, op.jump(0));
2136 
2137     // For "do-while", we go straight to the body of the loop, but for
2138     // "for" or "while", we test the condition next.
2139     rop.ll.op_branch (op.opname() == op_dowhile ? body_block : cond_block);
2140 
2141     // Load the condition variable and figure out if it's nonzero
2142     rop.build_llvm_code (op.jump(0), op.jump(1), cond_block);
2143     llvm::Value* cond_val = rop.llvm_test_nonzero (cond);
2144 
2145     // Jump to either LoopBody or AfterLoop
2146     rop.ll.op_branch (cond_val, body_block, after_block);
2147 
2148     // Body of loop
2149     rop.build_llvm_code (op.jump(1), op.jump(2), body_block);
2150     rop.ll.op_branch (step_block);
2151 
2152     // Step
2153     rop.build_llvm_code (op.jump(2), op.jump(3), step_block);
2154     rop.ll.op_branch (cond_block);
2155 
2156     // Continue on with the previous flow
2157     rop.ll.set_insert_point (after_block);
2158     rop.ll.pop_loop ();
2159 
2160     return true;
2161 }
2162 
2163 
2164 
LLVMGEN(llvm_gen_loopmod_op)2165 LLVMGEN (llvm_gen_loopmod_op)
2166 {
2167     Opcode &op (rop.inst()->ops()[opnum]);
2168     OSL_DASSERT(op.nargs() == 0);
2169     if (op.opname() == op_break) {
2170         rop.ll.op_branch (rop.ll.loop_after_block());
2171     } else {  // continue
2172         rop.ll.op_branch (rop.ll.loop_step_block());
2173     }
2174     llvm::BasicBlock* next_block = rop.ll.new_basic_block ("");
2175     rop.ll.set_insert_point (next_block);
2176     return true;
2177 }
2178 
2179 
2180 
2181 static llvm::Value *
llvm_gen_texture_options(BackendLLVM & rop,int opnum,int first_optional_arg,bool tex3d,int nchans,llvm::Value * & alpha,llvm::Value * & dalphadx,llvm::Value * & dalphady,llvm::Value * & errormessage)2182 llvm_gen_texture_options (BackendLLVM &rop, int opnum,
2183                           int first_optional_arg, bool tex3d, int nchans,
2184                           llvm::Value* &alpha, llvm::Value* &dalphadx,
2185                           llvm::Value* &dalphady, llvm::Value* &errormessage)
2186 {
2187     llvm::Value* opt = rop.ll.call_function ("osl_get_texture_options",
2188                                              rop.sg_void_ptr());
2189     llvm::Value* missingcolor = NULL;
2190     TextureOpt optdefaults;  // So we can check the defaults
2191     bool swidth_set = false, twidth_set = false, rwidth_set = false;
2192     bool sblur_set = false, tblur_set = false, rblur_set = false;
2193     bool swrap_set = false, twrap_set = false, rwrap_set = false;
2194     bool firstchannel_set = false, fill_set = false, interp_set = false;
2195     bool time_set = false, subimage_set = false;
2196 
2197     Opcode &op (rop.inst()->ops()[opnum]);
2198     for (int a = first_optional_arg;  a < op.nargs();  ++a) {
2199         Symbol &Name (*rop.opargsym(op,a));
2200         OSL_DASSERT (Name.typespec().is_string() &&
2201                      "optional texture token must be a string");
2202         OSL_DASSERT (a+1 < op.nargs() && "malformed argument list for texture");
2203         ustring name = *(ustring *)Name.data();
2204         ++a;  // advance to next argument
2205 
2206         if (name.empty())    // skip empty string param name
2207             continue;
2208 
2209         Symbol &Val (*rop.opargsym(op,a));
2210         TypeDesc valtype = Val.typespec().simpletype ();
2211         const int *ival = Val.typespec().is_int() && Val.is_constant() ? (const int *)Val.data() : NULL;
2212         const float *fval = Val.typespec().is_float() && Val.is_constant() ? (const float *)Val.data() : NULL;
2213 
2214 #define PARAM_INT(paramname)                                            \
2215         if (name == Strings::paramname && valtype == TypeDesc::INT)   { \
2216             if (! paramname##_set &&                                    \
2217                 ival && *ival == optdefaults.paramname)                 \
2218                 continue;     /* default constant */                    \
2219             llvm::Value *val = rop.llvm_load_value (Val);               \
2220             rop.ll.call_function ("osl_texture_set_" #paramname, opt, val); \
2221             paramname##_set = true;                                     \
2222             continue;                                                   \
2223         }
2224 
2225 #define PARAM_FLOAT(paramname)                                          \
2226         if (name == Strings::paramname &&                               \
2227             (valtype == TypeDesc::FLOAT || valtype == TypeDesc::INT)) { \
2228             if (! paramname##_set &&                                    \
2229                 ((ival && *ival == optdefaults.paramname) ||            \
2230                  (fval && *fval == optdefaults.paramname)))             \
2231                 continue;     /* default constant */                    \
2232             llvm::Value *val = rop.llvm_load_value (Val);               \
2233             if (valtype == TypeDesc::INT)                               \
2234                 val = rop.ll.op_int_to_float (val);                     \
2235             rop.ll.call_function ("osl_texture_set_" #paramname, opt, val); \
2236             paramname##_set = true;                                     \
2237             continue;                                                   \
2238         }
2239 
2240 #define PARAM_FLOAT_STR(paramname)                                      \
2241         if (name == Strings::paramname &&                               \
2242             (valtype == TypeDesc::FLOAT || valtype == TypeDesc::INT)) { \
2243             if (! s##paramname##_set && ! t##paramname##_set &&         \
2244                 ! r##paramname##_set &&                                 \
2245                 ((ival && *ival == optdefaults.s##paramname) ||         \
2246                  (fval && *fval == optdefaults.s##paramname)))          \
2247                 continue;     /* default constant */                    \
2248             llvm::Value *val = rop.llvm_load_value (Val);               \
2249             if (valtype == TypeDesc::INT)                               \
2250                 val = rop.ll.op_int_to_float (val);                     \
2251             rop.ll.call_function ("osl_texture_set_st" #paramname, opt, val); \
2252             if (tex3d)                                                  \
2253                 rop.ll.call_function ("osl_texture_set_r" #paramname, opt, val); \
2254             s##paramname##_set = true;                                  \
2255             t##paramname##_set = true;                                  \
2256             r##paramname##_set = true;                                  \
2257             continue;                                                   \
2258         }
2259 
2260 #define PARAM_STRING_CODE(paramname,decoder,fieldname)                  \
2261         if (name == Strings::paramname && valtype == TypeDesc::STRING) { \
2262             if (Val.is_constant()) {                                    \
2263                 int code = decoder (*(ustring *)Val.data());            \
2264                 if (! paramname##_set && code == optdefaults.fieldname) \
2265                     continue;                                           \
2266                 if (code >= 0) {                                        \
2267                     llvm::Value *val = rop.ll.constant (code);          \
2268                     rop.ll.call_function ("osl_texture_set_" #paramname "_code", opt, val); \
2269                 }                                                       \
2270             } else {                                                    \
2271                 llvm::Value *val = rop.llvm_load_value (Val);           \
2272                 rop.ll.call_function ("osl_texture_set_" #paramname, opt, val); \
2273             }                                                           \
2274             paramname##_set = true;                                     \
2275             continue;                                                   \
2276         }
2277 
2278         PARAM_FLOAT_STR (width)
2279         PARAM_FLOAT (swidth)
2280         PARAM_FLOAT (twidth)
2281         PARAM_FLOAT (rwidth)
2282         PARAM_FLOAT_STR (blur)
2283         PARAM_FLOAT (sblur)
2284         PARAM_FLOAT (tblur)
2285         PARAM_FLOAT (rblur)
2286 
2287         if (name == Strings::wrap && valtype == TypeDesc::STRING) {
2288             if (Val.is_constant()) {
2289                 int mode = TextureOpt::decode_wrapmode (*(ustring *)Val.data());
2290                 llvm::Value *val = rop.ll.constant (mode);
2291                 rop.ll.call_function ("osl_texture_set_stwrap_code", opt, val);
2292                 if (tex3d)
2293                     rop.ll.call_function ("osl_texture_set_rwrap_code", opt, val);
2294             } else {
2295                 llvm::Value *val = rop.llvm_load_value (Val);
2296                 rop.ll.call_function ("osl_texture_set_stwrap", opt, val);
2297                 if (tex3d)
2298                     rop.ll.call_function ("osl_texture_set_rwrap", opt, val);
2299             }
2300             swrap_set = twrap_set = rwrap_set = true;
2301             continue;
2302         }
2303         PARAM_STRING_CODE(swrap, TextureOpt::decode_wrapmode, swrap)
2304         PARAM_STRING_CODE(twrap, TextureOpt::decode_wrapmode, twrap)
2305         PARAM_STRING_CODE(rwrap, TextureOpt::decode_wrapmode, rwrap)
2306 
2307         PARAM_FLOAT (fill)
2308         PARAM_FLOAT (time)
2309         PARAM_INT (firstchannel)
2310         PARAM_INT (subimage)
2311 
2312         if (name == Strings::subimage && valtype == TypeDesc::STRING) {
2313             if (Val.is_constant()) {
2314                 ustring v = *(ustring *)Val.data();
2315                 if (v.empty() && ! subimage_set) {
2316                     continue;     // Ignore nulls unless they are overrides
2317                 }
2318             }
2319             llvm::Value *val = rop.llvm_load_value (Val);
2320             rop.ll.call_function ("osl_texture_set_subimagename", opt, val);
2321             subimage_set = true;
2322             continue;
2323         }
2324 
2325         PARAM_STRING_CODE (interp, tex_interp_to_code, interpmode)
2326 
2327         if (name == Strings::alpha && valtype == TypeDesc::FLOAT) {
2328             alpha = rop.llvm_get_pointer (Val);
2329             if (Val.has_derivs()) {
2330                 dalphadx = rop.llvm_get_pointer (Val, 1);
2331                 dalphady = rop.llvm_get_pointer (Val, 2);
2332                 // NO z derivs!  dalphadz = rop.llvm_get_pointer (Val, 3);
2333             }
2334             continue;
2335         }
2336         if (name == Strings::errormessage && valtype == TypeDesc::STRING) {
2337             errormessage = rop.llvm_get_pointer (Val);
2338             continue;
2339         }
2340         if (name == Strings::missingcolor &&
2341                    equivalent(valtype,TypeDesc::TypeColor)) {
2342             if (! missingcolor) {
2343                 // If not already done, allocate enough storage for the
2344                 // missingcolor value (4 floats), and call the special
2345                 // function that points the TextureOpt.missingcolor to it.
2346                 missingcolor = rop.ll.op_alloca(rop.ll.type_float(), 4);
2347                 rop.ll.call_function ("osl_texture_set_missingcolor_arena",
2348                                       opt, rop.ll.void_ptr(missingcolor));
2349             }
2350             rop.ll.op_memcpy (rop.ll.void_ptr(missingcolor),
2351                               rop.llvm_void_ptr(Val), (int)sizeof(Color3));
2352             continue;
2353         }
2354         if (name == Strings::missingalpha && valtype == TypeDesc::FLOAT) {
2355             if (! missingcolor) {
2356                 // If not already done, allocate enough storage for the
2357                 // missingcolor value (4 floats), and call the special
2358                 // function that points the TextureOpt.missingcolor to it.
2359                 missingcolor = rop.ll.op_alloca(rop.ll.type_float(), 4);
2360                 rop.ll.call_function ("osl_texture_set_missingcolor_arena",
2361                                       opt, rop.ll.void_ptr(missingcolor));
2362             }
2363             llvm::Value *val = rop.llvm_load_value (Val);
2364             rop.ll.call_function ("osl_texture_set_missingcolor_alpha",
2365                                     opt, rop.ll.constant(nchans), val);
2366             continue;
2367 
2368         }
2369         rop.shadingcontext()->errorf("Unknown texture%s optional argument: \"%s\", <%s> (%s:%d)",
2370                                      tex3d ? "3d" : "", name, valtype,
2371                                      op.sourcefile(), op.sourceline());
2372 #undef PARAM_INT
2373 #undef PARAM_FLOAT
2374 #undef PARAM_FLOAT_STR
2375 #undef PARAM_STRING_CODE
2376 
2377 #if 0
2378         // Helps me find any constant optional params that aren't elided
2379         if (Name.is_constant() && Val.is_constant()) {
2380             std::cout << "! texture constant optional arg '" << name << "'\n";
2381             if (Val.typespec().is_float()) std::cout << "\tf " << *(float *)Val.data() << "\n";
2382             if (Val.typespec().is_int()) std::cout << "\ti " << *(int *)Val.data() << "\n";
2383             if (Val.typespec().is_string()) std::cout << "\t" << *(ustring *)Val.data() << "\n";
2384         }
2385 #endif
2386     }
2387 
2388     return opt;
2389 }
2390 
2391 
2392 
LLVMGEN(llvm_gen_texture)2393 LLVMGEN (llvm_gen_texture)
2394 {
2395     Opcode &op (rop.inst()->ops()[opnum]);
2396     Symbol &Result = *rop.opargsym (op, 0);
2397     Symbol &Filename = *rop.opargsym (op, 1);
2398     Symbol &S = *rop.opargsym (op, 2);
2399     Symbol &T = *rop.opargsym (op, 3);
2400     int nchans = Result.typespec().aggregate();
2401 
2402     bool user_derivs = false;
2403     int first_optional_arg = 4;
2404     if (op.nargs() > 4 && rop.opargsym(op,4)->typespec().is_float()) {
2405         user_derivs = true;
2406         first_optional_arg = 8;
2407         OSL_DASSERT(rop.opargsym(op,5)->typespec().is_float());
2408         OSL_DASSERT(rop.opargsym(op,6)->typespec().is_float());
2409         OSL_DASSERT(rop.opargsym(op,7)->typespec().is_float());
2410     }
2411 
2412     llvm::Value* opt;   // TextureOpt
2413     llvm::Value *alpha = NULL, *dalphadx = NULL, *dalphady = NULL;
2414     llvm::Value *errormessage = NULL;
2415     opt = llvm_gen_texture_options (rop, opnum, first_optional_arg,
2416                                     false /*3d*/, nchans,
2417                                     alpha, dalphadx, dalphady, errormessage);
2418 
2419     RendererServices::TextureHandle *texture_handle = NULL;
2420     if (Filename.is_constant() && rop.shadingsys().opt_texture_handle()) {
2421         texture_handle = rop.renderer()->get_texture_handle (*(ustring *)Filename.data(), rop.shadingcontext());
2422     }
2423 
2424     // Now call the osl_texture function, passing the options and all the
2425     // explicit args like texture coordinates.
2426     llvm::Value * args[] = {
2427         rop.sg_void_ptr(),
2428         rop.llvm_load_value (Filename),
2429         rop.ll.constant_ptr (texture_handle),
2430         opt,
2431         rop.llvm_load_value (S),
2432         rop.llvm_load_value (T),
2433         user_derivs ? rop.llvm_load_value (*rop.opargsym (op, 4)) : rop.llvm_load_value (S, 1),
2434         user_derivs ? rop.llvm_load_value (*rop.opargsym (op, 5)) : rop.llvm_load_value (T, 1),
2435         user_derivs ? rop.llvm_load_value (*rop.opargsym (op, 6)) : rop.llvm_load_value (S, 2),
2436         user_derivs ? rop.llvm_load_value (*rop.opargsym (op, 7)) : rop.llvm_load_value (T, 2),
2437         rop.ll.constant (nchans),
2438         rop.ll.void_ptr (rop.llvm_get_pointer (Result, 0)),
2439         rop.ll.void_ptr (rop.llvm_get_pointer (Result, 1)),
2440         rop.ll.void_ptr (rop.llvm_get_pointer (Result, 2)),
2441         rop.ll.void_ptr (alpha    ? alpha    : rop.ll.void_ptr_null()),
2442         rop.ll.void_ptr (dalphadx ? dalphadx : rop.ll.void_ptr_null()),
2443         rop.ll.void_ptr (dalphady ? dalphady : rop.ll.void_ptr_null()),
2444         rop.ll.void_ptr (errormessage ? errormessage : rop.ll.void_ptr_null()),
2445     };
2446     rop.ll.call_function ("osl_texture", args);
2447     rop.generated_texture_call (texture_handle != NULL);
2448     return true;
2449 }
2450 
2451 
2452 
LLVMGEN(llvm_gen_texture3d)2453 LLVMGEN (llvm_gen_texture3d)
2454 {
2455     Opcode &op (rop.inst()->ops()[opnum]);
2456     Symbol &Result = *rop.opargsym (op, 0);
2457     Symbol &Filename = *rop.opargsym (op, 1);
2458     Symbol &P = *rop.opargsym (op, 2);
2459     int nchans = Result.typespec().aggregate();
2460 
2461     bool user_derivs = false;
2462     int first_optional_arg = 3;
2463     if (op.nargs() > 3 && rop.opargsym(op,3)->typespec().is_triple()) {
2464         user_derivs = true;
2465         first_optional_arg = 5;
2466         OSL_DASSERT(rop.opargsym(op,3)->typespec().is_triple());
2467         OSL_DASSERT(rop.opargsym(op,4)->typespec().is_triple());
2468     }
2469 
2470     llvm::Value* opt;   // TextureOpt
2471     llvm::Value *alpha = NULL, *dalphadx = NULL, *dalphady = NULL;
2472     llvm::Value *errormessage = NULL;
2473     opt = llvm_gen_texture_options (rop, opnum, first_optional_arg,
2474                                     true /*3d*/, nchans,
2475                                     alpha, dalphadx, dalphady, errormessage);
2476 
2477     RendererServices::TextureHandle *texture_handle = NULL;
2478     if (Filename.is_constant() && rop.shadingsys().opt_texture_handle()) {
2479         texture_handle = rop.renderer()->get_texture_handle (*(ustring *)Filename.data(), rop.shadingcontext());
2480     }
2481 
2482     // Now call the osl_texture3d function, passing the options and all the
2483     // explicit args like texture coordinates.
2484     llvm::Value *args[] = {
2485         rop.sg_void_ptr(),
2486         rop.llvm_load_value (Filename),
2487         rop.ll.constant_ptr (texture_handle),
2488         opt,
2489         rop.llvm_void_ptr (P),
2490         // Auto derivs of P if !user_derivs
2491         user_derivs ? rop.llvm_void_ptr (*rop.opargsym (op, 3)) : rop.llvm_void_ptr (P, 1),
2492         user_derivs ? rop.llvm_void_ptr (*rop.opargsym (op, 4)) : rop.llvm_void_ptr (P, 2),
2493         rop.ll.constant (nchans),
2494         rop.ll.void_ptr (rop.llvm_void_ptr (Result, 0)),
2495         rop.ll.void_ptr (rop.llvm_void_ptr (Result, 1)),
2496         rop.ll.void_ptr (rop.llvm_void_ptr (Result, 2)),
2497         rop.ll.void_ptr (alpha    ? alpha    : rop.ll.void_ptr_null()),
2498         rop.ll.void_ptr (dalphadx ? dalphadx : rop.ll.void_ptr_null()),
2499         rop.ll.void_ptr (dalphady ? dalphady : rop.ll.void_ptr_null()),
2500         rop.ll.void_ptr (errormessage ? errormessage : rop.ll.void_ptr_null()),
2501     };
2502     rop.ll.call_function ("osl_texture3d", args);
2503     rop.generated_texture_call (texture_handle != NULL);
2504     return true;
2505 }
2506 
2507 
2508 
LLVMGEN(llvm_gen_environment)2509 LLVMGEN (llvm_gen_environment)
2510 {
2511     Opcode &op (rop.inst()->ops()[opnum]);
2512     Symbol &Result = *rop.opargsym (op, 0);
2513     Symbol &Filename = *rop.opargsym (op, 1);
2514     Symbol &R = *rop.opargsym (op, 2);
2515     int nchans = Result.typespec().aggregate();
2516 
2517     bool user_derivs = false;
2518     int first_optional_arg = 3;
2519     if (op.nargs() > 3 && rop.opargsym(op,3)->typespec().is_triple()) {
2520         user_derivs = true;
2521         first_optional_arg = 5;
2522         OSL_DASSERT(rop.opargsym(op,4)->typespec().is_triple());
2523     }
2524 
2525     llvm::Value* opt;   // TextureOpt
2526     llvm::Value *alpha = NULL, *dalphadx = NULL, *dalphady = NULL;
2527     llvm::Value *errormessage = NULL;
2528     opt = llvm_gen_texture_options (rop, opnum, first_optional_arg,
2529                                     false /*3d*/, nchans,
2530                                     alpha, dalphadx, dalphady, errormessage);
2531 
2532     RendererServices::TextureHandle *texture_handle = NULL;
2533     if (Filename.is_constant() && rop.shadingsys().opt_texture_handle()) {
2534         texture_handle = rop.renderer()->get_texture_handle (*(ustring *)Filename.data(), rop.shadingcontext());
2535     }
2536 
2537     // Now call the osl_environment function, passing the options and all the
2538     // explicit args like texture coordinates.
2539     llvm::Value *args[] = {
2540         rop.sg_void_ptr(),
2541         rop.llvm_load_value (Filename),
2542         rop.ll.constant_ptr (texture_handle),
2543         opt,
2544         rop.llvm_void_ptr (R),
2545         user_derivs ? rop.llvm_void_ptr (*rop.opargsym (op, 3)) : rop.llvm_void_ptr (R, 1),
2546         user_derivs ? rop.llvm_void_ptr (*rop.opargsym (op, 4)) : rop.llvm_void_ptr (R, 2),
2547         rop.ll.constant (nchans),
2548         rop.llvm_void_ptr (Result, 0),
2549         rop.llvm_void_ptr (Result, 1),
2550         rop.llvm_void_ptr (Result, 2),
2551         alpha    ? rop.ll.void_ptr (alpha)    : rop.ll.void_ptr_null(),
2552         dalphadx ? rop.ll.void_ptr (dalphadx) : rop.ll.void_ptr_null(),
2553         dalphady ? rop.ll.void_ptr (dalphady) : rop.ll.void_ptr_null(),
2554         rop.ll.void_ptr (errormessage ? errormessage : rop.ll.void_ptr_null()),
2555     };
2556     rop.ll.call_function ("osl_environment", args);
2557     rop.generated_texture_call (texture_handle != NULL);
2558     return true;
2559 }
2560 
2561 
2562 
2563 static llvm::Value *
llvm_gen_trace_options(BackendLLVM & rop,int opnum,int first_optional_arg)2564 llvm_gen_trace_options (BackendLLVM &rop, int opnum,
2565                         int first_optional_arg)
2566 {
2567     llvm::Value* opt = rop.ll.call_function ("osl_get_trace_options",
2568                                              rop.sg_void_ptr());
2569     Opcode &op (rop.inst()->ops()[opnum]);
2570     for (int a = first_optional_arg;  a < op.nargs();  ++a) {
2571         Symbol &Name (*rop.opargsym(op,a));
2572         OSL_DASSERT (Name.typespec().is_string() &&
2573                      "optional trace token must be a string");
2574         OSL_DASSERT (a+1 < op.nargs() && "malformed argument list for trace");
2575         ustring name = *(ustring *)Name.data();
2576 
2577         ++a;  // advance to next argument
2578         Symbol &Val (*rop.opargsym(op,a));
2579         TypeDesc valtype = Val.typespec().simpletype ();
2580 
2581         llvm::Value *val = rop.llvm_load_value (Val);
2582         if (name == Strings::mindist && valtype == TypeDesc::FLOAT) {
2583             rop.ll.call_function ("osl_trace_set_mindist", opt, val);
2584         } else if (name == Strings::maxdist && valtype == TypeDesc::FLOAT) {
2585             rop.ll.call_function ("osl_trace_set_maxdist", opt, val);
2586         } else if (name == Strings::shade && valtype == TypeDesc::INT) {
2587             rop.ll.call_function ("osl_trace_set_shade", opt, val);
2588         } else if (name == Strings::traceset && valtype == TypeDesc::STRING) {
2589             rop.ll.call_function ("osl_trace_set_traceset", opt, val);
2590         } else {
2591             rop.shadingcontext()->errorf("Unknown trace() optional argument: \"%s\", <%s> (%s:%d)",
2592                                          name, valtype,
2593                                          op.sourcefile(), op.sourceline());
2594         }
2595     }
2596 
2597     return opt;
2598 }
2599 
2600 
2601 
LLVMGEN(llvm_gen_trace)2602 LLVMGEN (llvm_gen_trace)
2603 {
2604     Opcode &op (rop.inst()->ops()[opnum]);
2605     Symbol &Result = *rop.opargsym (op, 0);
2606     Symbol &Pos = *rop.opargsym (op, 1);
2607     Symbol &Dir = *rop.opargsym (op, 2);
2608     int first_optional_arg = 3;
2609 
2610     llvm::Value* opt;   // TraceOpt
2611     opt = llvm_gen_trace_options (rop, opnum, first_optional_arg);
2612 
2613     // Now call the osl_trace function, passing the options and all the
2614     // explicit args like trace coordinates.
2615     llvm::Value *args[] = {
2616         rop.sg_void_ptr(),
2617         opt,
2618         rop.llvm_void_ptr (Pos, 0),
2619         rop.llvm_void_ptr (Pos, 1),
2620         rop.llvm_void_ptr (Pos, 2),
2621         rop.llvm_void_ptr (Dir, 0),
2622         rop.llvm_void_ptr (Dir, 1),
2623         rop.llvm_void_ptr (Dir, 2),
2624     };
2625     llvm::Value *r = rop.ll.call_function ("osl_trace", args);
2626     rop.llvm_store_value (r, Result);
2627     return true;
2628 }
2629 
2630 
2631 
2632 static std::string
arg_typecode(Symbol * sym,bool derivs)2633 arg_typecode (Symbol *sym, bool derivs)
2634 {
2635     const TypeSpec &t (sym->typespec());
2636     if (t.is_int())
2637         return "i";
2638     else if (t.is_matrix())
2639         return "m";
2640     else if (t.is_string())
2641         return "s";
2642 
2643     std::string name;
2644     if (derivs)
2645         name = "d";
2646     if (t.is_float())
2647         name += "f";
2648     else if (t.is_triple())
2649         name += "v";
2650     else OSL_ASSERT (0);
2651     return name;
2652 }
2653 
2654 
2655 
2656 static llvm::Value *
llvm_gen_noise_options(BackendLLVM & rop,int opnum,int first_optional_arg)2657 llvm_gen_noise_options (BackendLLVM &rop, int opnum,
2658                         int first_optional_arg)
2659 {
2660     llvm::Value* opt = rop.ll.call_function ("osl_get_noise_options",
2661                                              rop.sg_void_ptr());
2662 
2663     Opcode &op (rop.inst()->ops()[opnum]);
2664     for (int a = first_optional_arg;  a < op.nargs();  ++a) {
2665         Symbol &Name (*rop.opargsym(op,a));
2666         OSL_DASSERT (Name.typespec().is_string() &&
2667                      "optional noise token must be a string");
2668         OSL_DASSERT (a+1 < op.nargs() && "malformed argument list for noise");
2669         ustring name = *(ustring *)Name.data();
2670 
2671         ++a;  // advance to next argument
2672         Symbol &Val (*rop.opargsym(op,a));
2673         TypeDesc valtype = Val.typespec().simpletype ();
2674 
2675         if (name.empty())    // skip empty string param name
2676             continue;
2677 
2678         if (name == Strings::anisotropic && Val.typespec().is_int()) {
2679             rop.ll.call_function ("osl_noiseparams_set_anisotropic", opt,
2680                                     rop.llvm_load_value (Val));
2681         } else if (name == Strings::do_filter && Val.typespec().is_int()) {
2682             rop.ll.call_function ("osl_noiseparams_set_do_filter", opt,
2683                                     rop.llvm_load_value (Val));
2684         } else if (name == Strings::direction && Val.typespec().is_triple()) {
2685             rop.ll.call_function ("osl_noiseparams_set_direction", opt,
2686                                     rop.llvm_void_ptr (Val));
2687         } else if (name == Strings::bandwidth &&
2688                    (Val.typespec().is_float() || Val.typespec().is_int())) {
2689             rop.ll.call_function ("osl_noiseparams_set_bandwidth", opt,
2690                                     rop.llvm_load_value (Val, 0, NULL, 0,
2691                                                          TypeDesc::TypeFloat));
2692         } else if (name == Strings::impulses &&
2693                    (Val.typespec().is_float() || Val.typespec().is_int())) {
2694             rop.ll.call_function ("osl_noiseparams_set_impulses", opt,
2695                                     rop.llvm_load_value (Val, 0, NULL, 0,
2696                                                          TypeDesc::TypeFloat));
2697         } else {
2698             rop.shadingcontext()->errorf("Unknown %s optional argument: \"%s\", <%s> (%s:%d)",
2699                                          op.opname(), name, valtype,
2700                                          op.sourcefile(), op.sourceline());
2701         }
2702     }
2703     return opt;
2704 }
2705 
2706 
2707 
2708 // T noise ([string name,] float s, ...);
2709 // T noise ([string name,] float s, float t, ...);
2710 // T noise ([string name,] point P, ...);
2711 // T noise ([string name,] point P, float t, ...);
2712 // T pnoise ([string name,] float s, float sper, ...);
2713 // T pnoise ([string name,] float s, float t, float sper, float tper, ...);
2714 // T pnoise ([string name,] point P, point Pper, ...);
2715 // T pnoise ([string name,] point P, float t, point Pper, float tper, ...);
LLVMGEN(llvm_gen_noise)2716 LLVMGEN (llvm_gen_noise)
2717 {
2718     Opcode &op (rop.inst()->ops()[opnum]);
2719     bool periodic = (op.opname() == Strings::pnoise ||
2720                      op.opname() == Strings::psnoise);
2721 
2722     int arg = 0;   // Next arg to read
2723     Symbol &Result = *rop.opargsym (op, arg++);
2724     int outdim = Result.typespec().is_triple() ? 3 : 1;
2725     Symbol *Name = rop.opargsym (op, arg++);
2726     ustring name;
2727     if (Name->typespec().is_string()) {
2728         name = Name->is_constant() ? *(ustring *)Name->data() : ustring();
2729     } else {
2730         // Not a string, must be the old-style noise/pnoise
2731         --arg;  // forget that arg
2732         Name = NULL;
2733         name = op.opname();
2734     }
2735 
2736     Symbol *S = rop.opargsym (op, arg++), *T = NULL;
2737     Symbol *Sper = NULL, *Tper = NULL;
2738     int indim = S->typespec().is_triple() ? 3 : 1;
2739     bool derivs = S->has_derivs();
2740 
2741     if (periodic) {
2742         if (op.nargs() > (arg+1) &&
2743                 (rop.opargsym(op,arg+1)->typespec().is_float() ||
2744                  rop.opargsym(op,arg+1)->typespec().is_triple())) {
2745             // 2D or 4D
2746             ++indim;
2747             T = rop.opargsym (op, arg++);
2748             derivs |= T->has_derivs();
2749         }
2750         Sper = rop.opargsym (op, arg++);
2751         if (indim == 2 || indim == 4)
2752             Tper = rop.opargsym (op, arg++);
2753     } else {
2754         // non-periodic case
2755         if (op.nargs() > arg && rop.opargsym(op,arg)->typespec().is_float()) {
2756             // either 2D or 4D, so needs a second index
2757             ++indim;
2758             T = rop.opargsym (op, arg++);
2759             derivs |= T->has_derivs();
2760         }
2761     }
2762     derivs &= Result.has_derivs();  // ignore derivs if result doesn't need
2763 
2764     bool pass_name = false, pass_sg = false, pass_options = false;
2765     if (name.empty()) {
2766         // name is not a constant
2767         name = periodic ? Strings::genericpnoise : Strings::genericnoise;
2768         pass_name = true;
2769         pass_sg = true;
2770         pass_options = true;
2771         derivs = true;   // always take derivs if we don't know noise type
2772     } else if (name == Strings::perlin || name == Strings::snoise ||
2773                name == Strings::psnoise) {
2774         name = periodic ? Strings::psnoise : Strings::snoise;
2775         // derivs = false;
2776     } else if (name == Strings::uperlin || name == Strings::noise ||
2777                name == Strings::pnoise) {
2778         name = periodic ? Strings::pnoise : Strings::noise;
2779         // derivs = false;
2780     } else if (name == Strings::cell || name == Strings::cellnoise) {
2781         name = periodic ? Strings::pcellnoise : Strings::cellnoise;
2782         derivs = false;  // cell noise derivs are always zero
2783     } else if (name == Strings::hash || name == Strings::hashnoise) {
2784         name = periodic ? Strings::phashnoise : Strings::hashnoise;
2785         derivs = false;  // hash noise derivs are always zero
2786     } else if (name == Strings::simplex && !periodic) {
2787         name = Strings::simplexnoise;
2788     } else if (name == Strings::usimplex && !periodic) {
2789         name = Strings::usimplexnoise;
2790     } else if (name == Strings::gabor) {
2791         // already named
2792         pass_name = true;
2793         pass_sg = true;
2794         pass_options = true;
2795         derivs = true;
2796         name = periodic ? Strings::gaborpnoise : Strings::gabornoise;
2797     } else {
2798         rop.shadingcontext()->errorf("%snoise type \"%s\" is unknown, called from (%s:%d)",
2799                                 (periodic ? "periodic " : ""), name,
2800                                 op.sourcefile(), op.sourceline());
2801         return false;
2802     }
2803 
2804     if (rop.shadingsys().no_noise()) {
2805         // renderer option to replace noise with constant value. This can be
2806         // useful as a profiling aid, to see how much it speeds up to have
2807         // trivial expense for noise calls.
2808         if (name == Strings::uperlin || name == Strings::noise ||
2809             name == Strings::usimplexnoise || name == Strings::usimplex ||
2810             name == Strings::cell || name == Strings::cellnoise ||
2811             name == Strings::hash || name == Strings::hashnoise ||
2812             name == Strings::pcellnoise || name == Strings::pnoise)
2813             name = ustring("unullnoise");
2814         else
2815             name = ustring("nullnoise");
2816         pass_name = false;
2817         periodic = false;
2818         pass_sg = false;
2819         pass_options = false;
2820     }
2821 
2822     llvm::Value *opt = NULL;
2823     if (pass_options) {
2824         opt = llvm_gen_noise_options (rop, opnum, arg);
2825     }
2826 
2827     std::string funcname = "osl_" + name.string() + "_" + arg_typecode(&Result,derivs);
2828     llvm::Value * args[10]; int nargs = 0;
2829     if (pass_name) {
2830         args[nargs++] = rop.llvm_load_string (*Name);
2831     }
2832     llvm::Value *tmpresult = NULL;
2833     // triple return, or float return with derivs, passes result pointer
2834     if (outdim == 3 || derivs) {
2835         if (derivs && !Result.has_derivs()) {
2836             tmpresult = rop.llvm_load_arg (Result, true);
2837             args[nargs++] = tmpresult;
2838         }
2839         else
2840             args[nargs++] = rop.llvm_void_ptr (Result);
2841     }
2842     funcname += arg_typecode(S, derivs);
2843     args[nargs++] = rop.llvm_load_arg (*S, derivs);
2844     if (T) {
2845         funcname += arg_typecode(T, derivs);
2846         args[nargs++] = rop.llvm_load_arg (*T, derivs);
2847     }
2848 
2849     if (periodic) {
2850         funcname += arg_typecode (Sper, false /* no derivs */);
2851         args[nargs++] = rop.llvm_load_arg (*Sper, false);
2852         if (Tper) {
2853             funcname += arg_typecode (Tper, false /* no derivs */);
2854             args[nargs++] = rop.llvm_load_arg (*Tper, false);
2855         }
2856     }
2857 
2858     if (pass_sg)
2859         args[nargs++] = rop.sg_void_ptr();
2860     if (pass_options)
2861         args[nargs++] = opt;
2862 
2863     OSL_DASSERT(nargs < int(sizeof(args) / sizeof(args[0])));
2864 
2865 #if 0
2866     llvm::outs() << "About to push " << funcname << "\n";
2867     for (int i = 0;  i < nargs;  ++i)
2868         llvm::outs() << "    " << *args[i] << "\n";
2869 #endif
2870 
2871     llvm::Value *r = rop.ll.call_function (funcname.c_str(), cspan<llvm::Value*>(args, args + nargs));
2872     if (outdim == 1 && !derivs) {
2873         // Just plain float (no derivs) returns its value
2874         rop.llvm_store_value (r, Result);
2875     } else if (derivs && !Result.has_derivs()) {
2876         // Function needed to take derivs, but our result doesn't have them.
2877         // We created a temp, now we need to copy to the real result.
2878         tmpresult = rop.llvm_ptr_cast (tmpresult, Result.typespec());
2879         for (int c = 0;  c < Result.typespec().aggregate();  ++c) {
2880             llvm::Value *v = rop.llvm_load_value (tmpresult, Result.typespec(),
2881                                                   0, NULL, c);
2882             rop.llvm_store_value (v, Result, 0, c);
2883         }
2884     } // N.B. other cases already stored their result in the right place
2885 
2886     // Clear derivs if result has them but we couldn't compute them
2887     if (Result.has_derivs() && !derivs)
2888         rop.llvm_zero_derivs (Result);
2889 
2890     if (rop.shadingsys().profile() >= 1)
2891         rop.ll.call_function ("osl_count_noise", rop.sg_void_ptr());
2892 
2893     return true;
2894 }
2895 
2896 
2897 
LLVMGEN(llvm_gen_getattribute)2898 LLVMGEN (llvm_gen_getattribute)
2899 {
2900     // getattribute() has eight "flavors":
2901     //   * getattribute (attribute_name, value)
2902     //   * getattribute (attribute_name, value[])
2903     //   * getattribute (attribute_name, index, value)
2904     //   * getattribute (attribute_name, index, value[])
2905     //   * getattribute (object, attribute_name, value)
2906     //   * getattribute (object, attribute_name, value[])
2907     //   * getattribute (object, attribute_name, index, value)
2908     //   * getattribute (object, attribute_name, index, value[])
2909     Opcode &op (rop.inst()->ops()[opnum]);
2910     int nargs = op.nargs();
2911     OSL_DASSERT(nargs >= 3 && nargs <= 5);
2912 
2913     bool array_lookup = rop.opargsym(op,nargs-2)->typespec().is_int();
2914     bool object_lookup = rop.opargsym(op,2)->typespec().is_string() && nargs >= 4;
2915     int object_slot = (int)object_lookup;
2916     int attrib_slot = object_slot + 1;
2917     int index_slot = array_lookup ? nargs - 2 : 0;
2918 
2919     Symbol& Result      = *rop.opargsym (op, 0);
2920     Symbol& ObjectName  = *rop.opargsym (op, object_slot); // only valid if object_slot is true
2921     Symbol& Attribute   = *rop.opargsym (op, attrib_slot);
2922     Symbol& Index       = *rop.opargsym (op, index_slot);  // only valid if array_lookup is true
2923     Symbol& Destination = *rop.opargsym (op, nargs-1);
2924     OSL_DASSERT(!Result.typespec().is_closure_based() &&
2925              !ObjectName.typespec().is_closure_based() &&
2926              !Attribute.typespec().is_closure_based() &&
2927              !Index.typespec().is_closure_based() &&
2928              !Destination.typespec().is_closure_based());
2929 
2930     // We'll pass the destination's attribute type directly to the
2931     // RenderServices callback so that the renderer can perform any
2932     // necessary conversions from its internal format to OSL's.
2933     const TypeDesc* dest_type = &Destination.typespec().simpletype();
2934 
2935     llvm::Value * args[] = {
2936             rop.sg_void_ptr(),
2937             rop.ll.constant ((int)Destination.has_derivs()),
2938             object_lookup ? rop.llvm_load_value (ObjectName) : rop.ll.constant (ustring()),
2939             rop.llvm_load_value (Attribute),
2940             rop.ll.constant ((int)array_lookup),
2941             rop.llvm_load_value (Index),
2942             rop.ll.constant_ptr ((void *) dest_type),
2943             rop.llvm_void_ptr (Destination),
2944     };
2945     llvm::Value *r = rop.ll.call_function ("osl_get_attribute", args);
2946     rop.llvm_store_value (r, Result);
2947 
2948     return true;
2949 }
2950 
2951 
2952 
LLVMGEN(llvm_gen_gettextureinfo)2953 LLVMGEN (llvm_gen_gettextureinfo)
2954 {
2955     Opcode &op (rop.inst()->ops()[opnum]);
2956 
2957     OSL_DASSERT(op.nargs() == 4);
2958 
2959     Symbol& Result   = *rop.opargsym (op, 0);
2960     Symbol& Filename = *rop.opargsym (op, 1);
2961     Symbol& Dataname = *rop.opargsym (op, 2);
2962     Symbol& Data     = *rop.opargsym (op, 3);
2963 
2964     OSL_DASSERT(!Result.typespec().is_closure_based() &&
2965              Filename.typespec().is_string() &&
2966              Dataname.typespec().is_string() &&
2967              !Data.typespec().is_closure_based() &&
2968              Result.typespec().is_int());
2969 
2970     RendererServices::TextureHandle *texture_handle = NULL;
2971     if (Filename.is_constant() && rop.shadingsys().opt_texture_handle()) {
2972         texture_handle = rop.renderer()->get_texture_handle (*(ustring *)Filename.data(), rop.shadingcontext());
2973     }
2974 
2975     llvm::Value * args[] = {
2976         rop.sg_void_ptr(),
2977         rop.llvm_load_value (Filename),
2978         rop.ll.constant_ptr (texture_handle),
2979         rop.llvm_load_value (Dataname),
2980         // this is passes a TypeDesc to an LLVM op-code
2981         rop.ll.constant((int) Data.typespec().simpletype().basetype),
2982         rop.ll.constant((int) Data.typespec().simpletype().arraylen),
2983         rop.ll.constant((int) Data.typespec().simpletype().aggregate),
2984         // destination
2985         rop.llvm_void_ptr (Data),
2986         // errormessage
2987         rop.ll.void_ptr_null(),
2988     };
2989     llvm::Value *r = rop.ll.call_function ("osl_get_textureinfo", args);
2990     rop.llvm_store_value (r, Result);
2991     /* Do not leave derivs uninitialized */
2992     if (Data.has_derivs())
2993         rop.llvm_zero_derivs (Data);
2994     rop.generated_texture_call (texture_handle != NULL);
2995 
2996     return true;
2997 }
2998 
2999 
3000 
LLVMGEN(llvm_gen_getmessage)3001 LLVMGEN (llvm_gen_getmessage)
3002 {
3003     // getmessage() has four "flavors":
3004     //   * getmessage (attribute_name, value)
3005     //   * getmessage (attribute_name, value[])
3006     //   * getmessage (source, attribute_name, value)
3007     //   * getmessage (source, attribute_name, value[])
3008     Opcode &op (rop.inst()->ops()[opnum]);
3009 
3010     OSL_DASSERT(op.nargs() == 3 || op.nargs() == 4);
3011     int has_source = (op.nargs() == 4);
3012     Symbol& Result = *rop.opargsym (op, 0);
3013     Symbol& Source = *rop.opargsym (op, 1);
3014     Symbol& Name   = *rop.opargsym (op, 1+has_source);
3015     Symbol& Data   = *rop.opargsym (op, 2+has_source);
3016     OSL_DASSERT(Result.typespec().is_int() && Name.typespec().is_string());
3017     OSL_DASSERT(has_source == 0 || Source.typespec().is_string());
3018 
3019     llvm::Value *args[9];
3020     args[0] = rop.sg_void_ptr();
3021     args[1] = has_source ? rop.llvm_load_value(Source)
3022                          : rop.ll.constant(ustring());
3023     args[2] = rop.llvm_load_value (Name);
3024 
3025     if (Data.typespec().is_closure_based()) {
3026         // FIXME: secret handshake for closures ...
3027         args[3] = rop.ll.constant (TypeDesc(TypeDesc::UNKNOWN,
3028                                               Data.typespec().arraylength()));
3029         // We need a void ** here so the function can modify the closure
3030         args[4] = rop.llvm_void_ptr(Data);
3031     } else {
3032         args[3] = rop.ll.constant (Data.typespec().simpletype());
3033         args[4] = rop.llvm_void_ptr (Data);
3034     }
3035     args[5] = rop.ll.constant ((int)Data.has_derivs());
3036 
3037     args[6] = rop.ll.constant(rop.inst()->id());
3038     args[7] = rop.ll.constant(op.sourcefile());
3039     args[8] = rop.ll.constant(op.sourceline());
3040 
3041     llvm::Value *r = rop.ll.call_function ("osl_getmessage", args);
3042     rop.llvm_store_value (r, Result);
3043     return true;
3044 }
3045 
3046 
3047 
LLVMGEN(llvm_gen_setmessage)3048 LLVMGEN (llvm_gen_setmessage)
3049 {
3050     Opcode &op (rop.inst()->ops()[opnum]);
3051 
3052     OSL_DASSERT(op.nargs() == 2);
3053     Symbol& Name   = *rop.opargsym (op, 0);
3054     Symbol& Data   = *rop.opargsym (op, 1);
3055     OSL_DASSERT(Name.typespec().is_string());
3056 
3057     llvm::Value *args[7];
3058     args[0] = rop.sg_void_ptr();
3059     args[1] = rop.llvm_load_value (Name);
3060     if (Data.typespec().is_closure_based()) {
3061         // FIXME: secret handshake for closures ...
3062         args[2] = rop.ll.constant (TypeDesc(TypeDesc::UNKNOWN,
3063                                               Data.typespec().arraylength()));
3064         // We need a void ** here so the function can modify the closure
3065         args[3] = rop.llvm_void_ptr(Data);
3066     } else {
3067         args[2] = rop.ll.constant (Data.typespec().simpletype());
3068         args[3] = rop.llvm_void_ptr (Data);
3069     }
3070 
3071     args[4] = rop.ll.constant(rop.inst()->id());
3072     args[5] = rop.ll.constant(op.sourcefile());
3073     args[6] = rop.ll.constant(op.sourceline());
3074 
3075     rop.ll.call_function ("osl_setmessage", args);
3076     return true;
3077 }
3078 
3079 
3080 
LLVMGEN(llvm_gen_get_simple_SG_field)3081 LLVMGEN (llvm_gen_get_simple_SG_field)
3082 {
3083     Opcode &op (rop.inst()->ops()[opnum]);
3084 
3085     OSL_DASSERT(op.nargs() == 1);
3086 
3087     Symbol& Result = *rop.opargsym (op, 0);
3088     int sg_index = rop.ShaderGlobalNameToIndex (op.opname());
3089     OSL_DASSERT (sg_index >= 0);
3090     llvm::Value *sg_field = rop.ll.GEP (rop.sg_ptr(), 0, sg_index);
3091     llvm::Value* r = rop.ll.op_load(sg_field);
3092     rop.llvm_store_value (r, Result);
3093 
3094     return true;
3095 }
3096 
3097 
3098 
LLVMGEN(llvm_gen_calculatenormal)3099 LLVMGEN (llvm_gen_calculatenormal)
3100 {
3101     Opcode &op (rop.inst()->ops()[opnum]);
3102 
3103     OSL_DASSERT(op.nargs() == 2);
3104 
3105     Symbol& Result = *rop.opargsym (op, 0);
3106     Symbol& P      = *rop.opargsym (op, 1);
3107 
3108     OSL_DASSERT(Result.typespec().is_triple() && P.typespec().is_triple());
3109     if (! P.has_derivs()) {
3110         rop.llvm_assign_zero (Result);
3111         return true;
3112     }
3113 
3114     llvm::Value * args[] = {
3115         rop.llvm_void_ptr (Result),
3116         rop.sg_void_ptr(),
3117         rop.llvm_void_ptr (P),
3118     };
3119     rop.ll.call_function ("osl_calculatenormal", args);
3120     if (Result.has_derivs())
3121         rop.llvm_zero_derivs (Result);
3122     return true;
3123 }
3124 
3125 
3126 
LLVMGEN(llvm_gen_area)3127 LLVMGEN (llvm_gen_area)
3128 {
3129     Opcode &op (rop.inst()->ops()[opnum]);
3130 
3131     OSL_DASSERT(op.nargs() == 2);
3132 
3133     Symbol& Result = *rop.opargsym (op, 0);
3134     Symbol& P      = *rop.opargsym (op, 1);
3135 
3136     OSL_DASSERT(Result.typespec().is_float() && P.typespec().is_triple());
3137     if (! P.has_derivs()) {
3138         rop.llvm_assign_zero (Result);
3139         return true;
3140     }
3141 
3142     llvm::Value *r = rop.ll.call_function ("osl_area", rop.llvm_void_ptr (P));
3143     rop.llvm_store_value (r, Result);
3144     if (Result.has_derivs())
3145         rop.llvm_zero_derivs (Result);
3146     return true;
3147 }
3148 
3149 
3150 
LLVMGEN(llvm_gen_spline)3151 LLVMGEN (llvm_gen_spline)
3152 {
3153     Opcode &op (rop.inst()->ops()[opnum]);
3154 
3155     OSL_DASSERT(op.nargs() >= 4 && op.nargs() <= 5);
3156 
3157     bool has_knot_count = (op.nargs() == 5);
3158     Symbol& Result   = *rop.opargsym (op, 0);
3159     Symbol& Spline   = *rop.opargsym (op, 1);
3160     Symbol& Value    = *rop.opargsym (op, 2);
3161     Symbol& Knot_count = *rop.opargsym (op, 3); // might alias Knots
3162     Symbol& Knots    = has_knot_count ? *rop.opargsym (op, 4) :
3163                                         *rop.opargsym (op, 3);
3164 
3165     OSL_DASSERT(!Result.typespec().is_closure_based() &&
3166              Spline.typespec().is_string()  &&
3167              Value.typespec().is_float() &&
3168              !Knots.typespec().is_closure_based() &&
3169              Knots.typespec().is_array() &&
3170              (!has_knot_count || (has_knot_count && Knot_count.typespec().is_int())));
3171 
3172     std::string name = Strutil::sprintf("osl_%s_", op.opname());
3173     // only use derivatives for result if:
3174     //   result has derivs and (value || knots) have derivs
3175     bool result_derivs = Result.has_derivs() && (Value.has_derivs() || Knots.has_derivs());
3176 
3177     if (result_derivs)
3178         name += "d";
3179     if (Result.typespec().is_float())
3180         name += "f";
3181     else if (Result.typespec().is_triple())
3182         name += "v";
3183 
3184     if (result_derivs && Value.has_derivs())
3185         name += "d";
3186     if (Value.typespec().is_float())
3187         name += "f";
3188     else if (Value.typespec().is_triple())
3189         name += "v";
3190 
3191     if (result_derivs && Knots.has_derivs())
3192         name += "d";
3193     if (Knots.typespec().simpletype().elementtype() == TypeDesc::FLOAT)
3194         name += "f";
3195     else if (Knots.typespec().simpletype().elementtype().aggregate == TypeDesc::VEC3)
3196         name += "v";
3197 
3198     llvm::Value * args[] = {
3199         rop.llvm_void_ptr (Result),
3200         rop.llvm_load_string (Spline),
3201         rop.llvm_void_ptr (Value), // make things easy
3202         rop.llvm_void_ptr (Knots),
3203         has_knot_count ?
3204             rop.llvm_load_value (Knot_count) :
3205             rop.ll.constant ((int)Knots.typespec().arraylength()),
3206         rop.ll.constant ((int)Knots.typespec().arraylength()),
3207     };
3208     rop.ll.call_function (name.c_str(), args);
3209 
3210     if (Result.has_derivs() && !result_derivs)
3211         rop.llvm_zero_derivs (Result);
3212 
3213     return true;
3214 }
3215 
3216 
3217 
3218 static void
llvm_gen_keyword_fill(BackendLLVM & rop,Opcode & op,const ClosureRegistry::ClosureEntry * clentry,ustring clname,llvm::Value * mem_void_ptr,int argsoffset)3219 llvm_gen_keyword_fill(BackendLLVM &rop, Opcode &op, const ClosureRegistry::ClosureEntry *clentry, ustring clname, llvm::Value *mem_void_ptr, int argsoffset)
3220 {
3221     OSL_DASSERT(((op.nargs() - argsoffset) % 2) == 0);
3222 
3223     int Nattrs = (op.nargs() - argsoffset) / 2;
3224 
3225     for (int attr_i = 0; attr_i < Nattrs; ++attr_i) {
3226         int argno = attr_i * 2 + argsoffset;
3227         Symbol &Key     = *rop.opargsym (op, argno);
3228         Symbol &Value   = *rop.opargsym (op, argno + 1);
3229         OSL_DASSERT(Key.typespec().is_string());
3230         OSL_ASSERT(Key.is_constant());
3231         ustring *key = (ustring *)Key.data();
3232         TypeDesc ValueType = Value.typespec().simpletype();
3233 
3234         bool legal = false;
3235         // Make sure there is some keyword arg that has the name and the type
3236         for (int t = 0; t < clentry->nkeyword; ++t) {
3237             const ClosureParam &p = clentry->params[clentry->nformal + t];
3238             // strcmp might be too much, we could precompute the ustring for the param,
3239             // but in this part of the code is not a big deal
3240             if (equivalent(p.type,ValueType) && !strcmp(key->c_str(), p.key)) {
3241             	// store data
3242             	OSL_DASSERT(p.offset + p.field_size <= clentry->struct_size);
3243                 llvm::Value* dst = rop.ll.offset_ptr (mem_void_ptr, p.offset);
3244                 llvm::Value* src = rop.llvm_void_ptr (Value);
3245                 rop.ll.op_memcpy (dst, src, (int)p.type.size(),
3246                 					4 /* use 4 byte alignment for now */);
3247                 legal = true;
3248                 break;
3249             }
3250         }
3251         if (!legal) {
3252             rop.shadingcontext()->warningf("Unsupported closure keyword arg \"%s\" for %s (%s:%d)", key->c_str(), clname, op.sourcefile(), op.sourceline());
3253         }
3254     }
3255 }
3256 
3257 
3258 
LLVMGEN(llvm_gen_closure)3259 LLVMGEN (llvm_gen_closure)
3260 {
3261     Opcode &op (rop.inst()->ops()[opnum]);
3262     OSL_DASSERT (op.nargs() >= 2); // at least the result and the ID
3263 
3264     Symbol &Result = *rop.opargsym (op, 0);
3265     int weighted   = rop.opargsym(op,1)->typespec().is_string() ? 0 : 1;
3266     Symbol *weight = weighted ? rop.opargsym (op, 1) : NULL;
3267     Symbol &Id     = *rop.opargsym (op, 1+weighted);
3268     OSL_DASSERT(Result.typespec().is_closure());
3269     OSL_DASSERT(Id.typespec().is_string());
3270     ustring closure_name = *((ustring *)Id.data());
3271 
3272     const ClosureRegistry::ClosureEntry * clentry = rop.shadingsys().find_closure(closure_name);
3273     if (!clentry) {
3274         rop.llvm_gen_error (Strutil::sprintf("Closure '%s' is not supported by the current renderer, called from %s:%d in shader \"%s\", layer %d \"%s\", group \"%s\"",
3275                                      closure_name, op.sourcefile(), op.sourceline(),
3276                                      rop.inst()->shadername(), rop.layer(),
3277                                      rop.inst()->layername(), rop.group().name()));
3278         return false;
3279     }
3280 
3281     OSL_DASSERT (op.nargs() >= (2 + weighted + clentry->nformal));
3282 
3283     // Call osl_allocate_closure_component(closure, id, size).  It returns
3284     // the memory for the closure parameter data.
3285     llvm::Value *render_ptr = rop.ll.constant_ptr(rop.shadingsys().renderer(), rop.ll.type_void_ptr());
3286     llvm::Value *sg_ptr = rop.sg_void_ptr();
3287     llvm::Value *id_int = rop.ll.constant(clentry->id);
3288     llvm::Value *size_int = rop.ll.constant(clentry->struct_size);
3289     llvm::Value *return_ptr = weighted ?
3290           rop.ll.call_function ("osl_allocate_weighted_closure_component", sg_ptr, id_int, size_int, rop.llvm_void_ptr(*weight))
3291         : rop.ll.call_function ("osl_allocate_closure_component"         , sg_ptr, id_int, size_int);
3292     llvm::Value *comp_void_ptr = return_ptr;
3293 
3294     // For the weighted closures, we need a surrounding "if" so that it's safe
3295     // for osl_allocate_weighted_closure_component to return NULL (unless we
3296     // know for sure that it's constant weighted and that the weight is
3297     // not zero).
3298     llvm::BasicBlock *next_block = NULL;
3299     if (weighted && ! (weight->is_constant() && !rop.is_zero(*weight))) {
3300         llvm::BasicBlock *notnull_block = rop.ll.new_basic_block ("non_null_closure");
3301         next_block = rop.ll.new_basic_block ("");
3302         llvm::Value *cond = rop.ll.op_ne (return_ptr, rop.ll.void_ptr_null());
3303         rop.ll.op_branch (cond, notnull_block, next_block);
3304         // new insert point is nonnull_block
3305     }
3306 
3307     llvm::Value *comp_ptr = rop.ll.ptr_cast(comp_void_ptr, rop.llvm_type_closure_component_ptr());
3308     // Get the address of the primitive buffer, which is the 2nd field
3309     llvm::Value *mem_void_ptr = rop.ll.GEP (comp_ptr, 0, 2);
3310     mem_void_ptr = rop.ll.ptr_cast(mem_void_ptr, rop.ll.type_void_ptr());
3311 
3312     // If the closure has a "prepare" method, call
3313     // prepare(renderer, id, memptr).  If there is no prepare method, just
3314     // zero out the closure parameter memory.
3315     if (clentry->prepare) {
3316         // Call clentry->prepare(renderservices *, int id, void *mem)
3317         llvm::Value *funct_ptr = rop.ll.constant_ptr((void *)clentry->prepare, rop.llvm_type_prepare_closure_func());
3318         llvm::Value *args[] = {render_ptr, id_int, mem_void_ptr};
3319         rop.ll.call_function (funct_ptr, args);
3320     } else {
3321         rop.ll.op_memset (mem_void_ptr, 0, clentry->struct_size, 4 /*align*/);
3322     }
3323 
3324     // Here is where we fill the struct using the params
3325     for (int carg = 0; carg < clentry->nformal; ++carg) {
3326         const ClosureParam &p = clentry->params[carg];
3327         if (p.key != NULL) break;
3328         OSL_DASSERT(p.offset + p.field_size <= clentry->struct_size);
3329         Symbol &sym = *rop.opargsym (op, carg + 2 + weighted);
3330         TypeDesc t = sym.typespec().simpletype();
3331 
3332         if (rop.use_optix() && sym.typespec().is_string()) {
3333             llvm::Value* dst = rop.ll.offset_ptr (mem_void_ptr, p.offset);
3334             llvm::Value* src = rop.llvm_load_device_string (sym, /*follow*/ false);
3335             rop.ll.op_memcpy (dst, src, 8, 8);
3336         }
3337         else if (!sym.typespec().is_closure_array() && !sym.typespec().is_structure()
3338                  && equivalent(t,p.type)) {
3339             llvm::Value* dst = rop.ll.offset_ptr (mem_void_ptr, p.offset);
3340             llvm::Value* src = rop.llvm_void_ptr (sym);
3341             rop.ll.op_memcpy (dst, src, (int)p.type.size(),
3342                              4 /* use 4 byte alignment for now */);
3343         } else {
3344             rop.shadingcontext()->errorf("Incompatible formal argument %d to '%s' closure (%s %s, expected %s). Prototypes don't match renderer registry (%s:%d).",
3345                                          carg + 1, closure_name,
3346                                          sym.typespec(), sym.unmangled(), p.type,
3347                                          op.sourcefile(), op.sourceline());
3348         }
3349     }
3350 
3351     // If the closure has a "setup" method, call
3352     // setup(render_services, id, mem_ptr).
3353     if (clentry->setup) {
3354         // Call clentry->setup(renderservices *, int id, void *mem)
3355         llvm::Value *funct_ptr = rop.ll.constant_ptr((void *)clentry->setup, rop.llvm_type_setup_closure_func());
3356         llvm::Value *args[] = {render_ptr, id_int, mem_void_ptr};
3357         rop.ll.call_function (funct_ptr, args);
3358     }
3359 
3360     llvm_gen_keyword_fill(rop, op, clentry, closure_name, mem_void_ptr,
3361                           2 + weighted + clentry->nformal);
3362 
3363     if (next_block)
3364         rop.ll.op_branch (next_block);
3365 
3366     // Store result at the end, otherwise Ci = modifier(Ci) won't work
3367     rop.llvm_store_value (return_ptr, Result, 0, NULL, 0);
3368 
3369     return true;
3370 }
3371 
3372 
3373 
LLVMGEN(llvm_gen_pointcloud_search)3374 LLVMGEN (llvm_gen_pointcloud_search)
3375 {
3376     Opcode &op (rop.inst()->ops()[opnum]);
3377 
3378     OSL_DASSERT(op.nargs() >= 5);
3379     Symbol& Result     = *rop.opargsym (op, 0);
3380     Symbol& Filename   = *rop.opargsym (op, 1);
3381     Symbol& Center     = *rop.opargsym (op, 2);
3382     Symbol& Radius     = *rop.opargsym (op, 3);
3383     Symbol& Max_points = *rop.opargsym (op, 4);
3384 
3385     OSL_DASSERT(Result.typespec().is_int() && Filename.typespec().is_string() &&
3386              Center.typespec().is_triple() && Radius.typespec().is_float() &&
3387              Max_points.typespec().is_int());
3388 
3389     std::vector<Symbol *> clear_derivs_of; // arguments whose derivs we need to zero at the end
3390     int attr_arg_offset = 5; // where the opt attrs begin
3391     Symbol *Sort = NULL;
3392     if (op.nargs() > 5 && rop.opargsym(op,5)->typespec().is_int()) {
3393         Sort = rop.opargsym(op,5);
3394         ++attr_arg_offset;
3395     }
3396     int nattrs = (op.nargs() - attr_arg_offset) / 2;
3397 
3398     std::vector<llvm::Value *> args;
3399     args.push_back (rop.sg_void_ptr());                // 0 sg
3400     args.push_back (rop.llvm_load_value (Filename));   // 1 filename
3401     args.push_back (rop.llvm_void_ptr   (Center));     // 2 center
3402     args.push_back (rop.llvm_load_value (Radius));     // 3 radius
3403     args.push_back (rop.llvm_load_value (Max_points)); // 4 max_points
3404     args.push_back (Sort ? rop.llvm_load_value(*Sort)  // 5 sort
3405                          : rop.ll.constant(0));
3406     args.push_back (rop.ll.constant_ptr (NULL));      // 6 indices
3407     args.push_back (rop.ll.constant_ptr (NULL));      // 7 distances
3408     args.push_back (rop.ll.constant (0));             // 8 derivs_offset
3409     args.push_back (NULL);                              // 9 nattrs
3410     size_t capacity = 0x7FFFFFFF; // Lets put a 32 bit limit
3411     int extra_attrs = 0; // Extra query attrs to search
3412     // This loop does three things. 1) Look for the special attributes
3413     // "distance", "index" and grab the pointer. 2) Compute the minimmum
3414     // size of the provided output arrays to check against max_points
3415     // 3) push optional args to the arg list
3416     for (int i = 0; i < nattrs; ++i) {
3417         Symbol& Name  = *rop.opargsym (op, attr_arg_offset + i*2);
3418         Symbol& Value = *rop.opargsym (op, attr_arg_offset + i*2 + 1);
3419 
3420         OSL_DASSERT (Name.typespec().is_string());
3421         TypeDesc simpletype = Value.typespec().simpletype();
3422         if (Name.is_constant() && *((ustring *)Name.data()) == u_index &&
3423             simpletype.elementtype() == TypeDesc::INT) {
3424             args[6] = rop.llvm_void_ptr (Value);
3425         } else if (Name.is_constant() && *((ustring *)Name.data()) == u_distance &&
3426                    simpletype.elementtype() == TypeDesc::FLOAT) {
3427             args[7] = rop.llvm_void_ptr (Value);
3428             if (Value.has_derivs()) {
3429                 if (Center.has_derivs())
3430                     // deriv offset is the size of the array
3431                     args[8] = rop.ll.constant ((int)simpletype.numelements());
3432                 else
3433                     clear_derivs_of.push_back(&Value);
3434             }
3435         } else {
3436             // It is a regular attribute, push it to the arg list
3437             args.push_back (rop.llvm_load_value (Name));
3438             args.push_back (rop.ll.constant (simpletype));
3439             args.push_back (rop.llvm_void_ptr (Value));
3440             if (Value.has_derivs())
3441                 clear_derivs_of.push_back(&Value);
3442             extra_attrs++;
3443         }
3444         // minimum capacity of the output arrays
3445         capacity = std::min (simpletype.numelements(), capacity);
3446     }
3447 
3448     args[9] = rop.ll.constant (extra_attrs);
3449 
3450     // Compare capacity to the requested number of points. The available
3451     // space on the arrays is a constant, the requested number of
3452     // points is not, so runtime check.
3453     llvm::Value *sizeok = rop.ll.op_ge (rop.ll.constant((int)capacity), args[4]); // max_points
3454 
3455     llvm::BasicBlock* sizeok_block = rop.ll.new_basic_block ("then");
3456     llvm::BasicBlock* badsize_block = rop.ll.new_basic_block ("else");
3457     llvm::BasicBlock* after_block = rop.ll.new_basic_block ("");
3458     rop.ll.op_branch (sizeok, sizeok_block, badsize_block);
3459     // N.B. the op_branch sets sizeok_block as the new insert point
3460 
3461     // non-error code case
3462     llvm::Value *count = rop.ll.call_function ("osl_pointcloud_search", args);
3463     // Clear derivs if necessary
3464     for (size_t i = 0; i < clear_derivs_of.size(); ++i)
3465         rop.llvm_zero_derivs (*clear_derivs_of[i], count);
3466     // Store result
3467     rop.llvm_store_value (count, Result);
3468     rop.ll.op_branch (after_block);
3469 
3470     // error code case
3471     rop.ll.set_insert_point (badsize_block);
3472     args.clear();
3473     static ustring errorfmt("Arrays too small for pointcloud lookup at (%s:%d)");
3474     llvm::Value *err_args[] = {
3475         rop.sg_void_ptr(),
3476         rop.ll.constant_ptr ((void *)errorfmt.c_str()),
3477         rop.ll.constant_ptr ((void *)op.sourcefile().c_str()),
3478         rop.ll.constant (op.sourceline()),
3479     };
3480     rop.ll.call_function ("osl_error", err_args);
3481 
3482     rop.ll.op_branch (after_block);
3483     return true;
3484 }
3485 
3486 
3487 
LLVMGEN(llvm_gen_pointcloud_get)3488 LLVMGEN (llvm_gen_pointcloud_get)
3489 {
3490     Opcode &op (rop.inst()->ops()[opnum]);
3491 
3492     OSL_DASSERT(op.nargs() >= 6);
3493 
3494     Symbol& Result     = *rop.opargsym (op, 0);
3495     Symbol& Filename   = *rop.opargsym (op, 1);
3496     Symbol& Indices    = *rop.opargsym (op, 2);
3497     Symbol& Count      = *rop.opargsym (op, 3);
3498     Symbol& Attr_name  = *rop.opargsym (op, 4);
3499     Symbol& Data       = *rop.opargsym (op, 5);
3500 
3501     llvm::Value *count = rop.llvm_load_value (Count);
3502 
3503     int capacity = std::min ((int)Data.typespec().simpletype().numelements(), (int)Indices.typespec().simpletype().numelements());
3504     // Check available space
3505     llvm::Value *sizeok = rop.ll.op_ge (rop.ll.constant(capacity), count);
3506 
3507     llvm::BasicBlock* sizeok_block = rop.ll.new_basic_block ("then");
3508     llvm::BasicBlock* badsize_block = rop.ll.new_basic_block ("else");
3509     llvm::BasicBlock* after_block = rop.ll.new_basic_block ("");
3510     rop.ll.op_branch (sizeok, sizeok_block, badsize_block);
3511     // N.B. sets insert point to true case
3512 
3513     // non-error code case
3514 
3515     // Convert 32bit indices to 64bit
3516     llvm::Value * args[] = {
3517         rop.sg_void_ptr(),
3518         rop.llvm_load_value (Filename),
3519         rop.llvm_void_ptr (Indices),
3520         count,
3521         rop.llvm_load_value (Attr_name),
3522         rop.ll.constant (Data.typespec().simpletype()),
3523         rop.llvm_void_ptr (Data),
3524     };
3525     llvm::Value *found = rop.ll.call_function ("osl_pointcloud_get", args);
3526     rop.llvm_store_value (found, Result);
3527     if (Data.has_derivs())
3528         rop.llvm_zero_derivs (Data, count);
3529     rop.ll.op_branch (after_block);
3530 
3531     // error code case
3532     rop.ll.set_insert_point (badsize_block);
3533     static ustring errorfmt("Arrays too small for pointcloud attribute get at (%s:%d)");
3534     llvm::Value *err_args[] = {
3535         rop.sg_void_ptr(),
3536         rop.ll.constant_ptr ((void *)errorfmt.c_str()),
3537         rop.ll.constant_ptr ((void *)op.sourcefile().c_str()),
3538         rop.ll.constant (op.sourceline()),
3539     };
3540     rop.ll.call_function ("osl_error", err_args);
3541 
3542     rop.ll.op_branch (after_block);
3543     return true;
3544 }
3545 
3546 
3547 
LLVMGEN(llvm_gen_pointcloud_write)3548 LLVMGEN (llvm_gen_pointcloud_write)
3549 {
3550     Opcode &op (rop.inst()->ops()[opnum]);
3551 
3552     OSL_DASSERT(op.nargs() >= 3);
3553     Symbol& Result   = *rop.opargsym (op, 0);
3554     Symbol& Filename = *rop.opargsym (op, 1);
3555     Symbol& Pos      = *rop.opargsym (op, 2);
3556     OSL_DASSERT(Result.typespec().is_int() && Filename.typespec().is_string() &&
3557              Pos.typespec().is_triple());
3558     OSL_DASSERT((op.nargs() & 1) && "must have an even number of attribs");
3559 
3560     int nattrs = (op.nargs() - 3) / 2;
3561 
3562     // Generate local space for the names/types/values arrays
3563     llvm::Value *names = rop.ll.op_alloca (rop.ll.type_string(), nattrs);
3564     llvm::Value *types = rop.ll.op_alloca (rop.ll.type_typedesc(), nattrs);
3565     llvm::Value *values = rop.ll.op_alloca (rop.ll.type_void_ptr(), nattrs);
3566 
3567     // Fill in the arrays with the params, use helper function because
3568     // it's a pain to offset things into the array ourselves.
3569     for (int i = 0;  i < nattrs;  ++i) {
3570         Symbol *namesym = rop.opargsym (op, 3+2*i);
3571         Symbol *valsym = rop.opargsym (op, 3+2*i+1);
3572         llvm::Value * args[] = {
3573             rop.ll.void_ptr (names),
3574             rop.ll.void_ptr (types),
3575             rop.ll.void_ptr (values),
3576             rop.ll.constant (i),
3577             rop.llvm_load_value (*namesym),  // name[i]
3578             rop.ll.constant (valsym->typespec().simpletype()), // type[i]
3579             rop.llvm_void_ptr (*valsym)  // value[i]
3580         };
3581         rop.ll.call_function ("osl_pointcloud_write_helper", args);
3582     }
3583 
3584     llvm::Value * args[] = {
3585         rop.sg_void_ptr(),   // shaderglobals pointer
3586         rop.llvm_load_value (Filename),  // name
3587         rop.llvm_void_ptr (Pos),   // position
3588         rop.ll.constant (nattrs),  // number of attributes
3589         rop.ll.void_ptr (names),   // attribute names array
3590         rop.ll.void_ptr (types),   // attribute types array
3591         rop.ll.void_ptr (values)   // attribute values array
3592     };
3593     llvm::Value *ret = rop.ll.call_function ("osl_pointcloud_write", args);
3594     rop.llvm_store_value (ret, Result);
3595 
3596     return true;
3597 }
3598 
3599 
3600 
3601 
LLVMGEN(llvm_gen_dict_find)3602 LLVMGEN (llvm_gen_dict_find)
3603 {
3604     // OSL has two variants of this function:
3605     //     dict_find (string dict, string query)
3606     //     dict_find (int nodeID, string query)
3607     Opcode &op (rop.inst()->ops()[opnum]);
3608     OSL_DASSERT(op.nargs() == 3);
3609     Symbol& Result = *rop.opargsym (op, 0);
3610     Symbol& Source = *rop.opargsym (op, 1);
3611     Symbol& Query  = *rop.opargsym (op, 2);
3612     OSL_DASSERT(Result.typespec().is_int() && Query.typespec().is_string() &&
3613              (Source.typespec().is_int() || Source.typespec().is_string()));
3614     bool sourceint = Source.typespec().is_int();  // is it an int?
3615     llvm::Value *args[] = {
3616         rop.sg_void_ptr(),
3617         rop.llvm_load_value(Source),
3618         rop.llvm_load_value (Query)
3619     };
3620     const char *func = sourceint ? "osl_dict_find_iis" : "osl_dict_find_iss";
3621     llvm::Value *ret = rop.ll.call_function (func, args);
3622     rop.llvm_store_value (ret, Result);
3623     return true;
3624 }
3625 
3626 
3627 
LLVMGEN(llvm_gen_dict_next)3628 LLVMGEN (llvm_gen_dict_next)
3629 {
3630     // dict_net is very straightforward -- just insert sg ptr as first arg
3631     Opcode &op (rop.inst()->ops()[opnum]);
3632     OSL_DASSERT(op.nargs() == 2);
3633     Symbol& Result = *rop.opargsym (op, 0);
3634     Symbol& NodeID = *rop.opargsym (op, 1);
3635     OSL_DASSERT(Result.typespec().is_int() && NodeID.typespec().is_int());
3636     llvm::Value *ret = rop.ll.call_function ("osl_dict_next",
3637                                                rop.sg_void_ptr(),
3638                                                rop.llvm_load_value(NodeID));
3639     rop.llvm_store_value (ret, Result);
3640     return true;
3641 }
3642 
3643 
3644 
LLVMGEN(llvm_gen_dict_value)3645 LLVMGEN (llvm_gen_dict_value)
3646 {
3647     // int dict_value (int nodeID, string attribname, output TYPE value)
3648     Opcode &op (rop.inst()->ops()[opnum]);
3649     OSL_DASSERT(op.nargs() == 4);
3650     Symbol& Result = *rop.opargsym (op, 0);
3651     Symbol& NodeID = *rop.opargsym (op, 1);
3652     Symbol& Name   = *rop.opargsym (op, 2);
3653     Symbol& Value  = *rop.opargsym (op, 3);
3654     OSL_DASSERT(Result.typespec().is_int() && NodeID.typespec().is_int() &&
3655              Name.typespec().is_string());
3656     llvm::Value *args[] = {
3657         rop.sg_void_ptr(),                              // arg 0: shaderglobals ptr
3658         rop.llvm_load_value(NodeID),                    // arg 1: nodeID
3659         rop.llvm_load_value(Name),                      // arg 2: attribute name
3660         rop.ll.constant(Value.typespec().simpletype()), // arg 3: encoded type of Value
3661         rop.llvm_void_ptr(Value),                       // arg 4: pointer to Value
3662     };
3663     llvm::Value *ret = rop.ll.call_function ("osl_dict_value", args);
3664     rop.llvm_store_value (ret, Result);
3665     return true;
3666 }
3667 
3668 
3669 
LLVMGEN(llvm_gen_split)3670 LLVMGEN (llvm_gen_split)
3671 {
3672     // int split (string str, output string result[], string sep, int maxsplit)
3673     Opcode &op (rop.inst()->ops()[opnum]);
3674     OSL_DASSERT(op.nargs() >= 3 && op.nargs() <= 5);
3675     Symbol& R       = *rop.opargsym (op, 0);
3676     Symbol& Str     = *rop.opargsym (op, 1);
3677     Symbol& Results = *rop.opargsym (op, 2);
3678     OSL_DASSERT(R.typespec().is_int() && Str.typespec().is_string() &&
3679              Results.typespec().is_array() &&
3680              Results.typespec().is_string_based());
3681 
3682     llvm::Value *args[5];
3683     args[0] = rop.llvm_load_value (Str);
3684     args[1] = rop.llvm_void_ptr (Results);
3685     if (op.nargs() >= 4) {
3686         Symbol& Sep = *rop.opargsym (op, 3);
3687         OSL_DASSERT(Sep.typespec().is_string());
3688         args[2] = rop.llvm_load_value (Sep);
3689     } else {
3690         args[2] = rop.ll.constant ("");
3691     }
3692     if (op.nargs() >= 5) {
3693         Symbol& Maxsplit = *rop.opargsym (op, 4);
3694         OSL_DASSERT(Maxsplit.typespec().is_int());
3695         args[3] = rop.llvm_load_value (Maxsplit);
3696     } else {
3697         args[3] = rop.ll.constant (Results.typespec().arraylength());
3698     }
3699     args[4] = rop.ll.constant (Results.typespec().arraylength());
3700     llvm::Value *ret = rop.ll.call_function ("osl_split", args);
3701     rop.llvm_store_value (ret, R);
3702     return true;
3703 }
3704 
3705 
3706 
LLVMGEN(llvm_gen_raytype)3707 LLVMGEN (llvm_gen_raytype)
3708 {
3709     // int raytype (string name)
3710     Opcode &op (rop.inst()->ops()[opnum]);
3711     OSL_DASSERT(op.nargs() == 2);
3712     Symbol& Result = *rop.opargsym (op, 0);
3713     Symbol& Name = *rop.opargsym (op, 1);
3714     llvm::Value *args[2] = { rop.sg_void_ptr(), NULL };
3715     const char *func = NULL;
3716     if (Name.is_constant()) {
3717         // We can statically determine the bit pattern
3718         ustring name = ((ustring *)Name.data())[0];
3719         args[1] = rop.ll.constant (rop.shadingsys().raytype_bit (name));
3720         func = "osl_raytype_bit";
3721     } else {
3722         // No way to know which name is being asked for
3723         args[1] = rop.llvm_get_pointer (Name);
3724         func = "osl_raytype_name";
3725     }
3726     llvm::Value *ret = rop.ll.call_function (func, args);
3727     rop.llvm_store_value (ret, Result);
3728     return true;
3729 }
3730 
3731 
3732 
3733 // color blackbody (float temperatureK)
3734 // color wavelength_color (float wavelength_nm)  // same function signature
LLVMGEN(llvm_gen_blackbody)3735 LLVMGEN (llvm_gen_blackbody)
3736 {
3737     Opcode &op (rop.inst()->ops()[opnum]);
3738     OSL_DASSERT (op.nargs() == 2);
3739     Symbol &Result (*rop.opargsym (op, 0));
3740     Symbol &Temperature (*rop.opargsym (op, 1));
3741     OSL_DASSERT (Result.typespec().is_triple() && Temperature.typespec().is_float());
3742 
3743     llvm::Value* args[] = { rop.sg_void_ptr(), rop.llvm_void_ptr(Result),
3744                             rop.llvm_load_value(Temperature) };
3745     rop.ll.call_function (Strutil::sprintf("osl_%s_vf",op.opname()).c_str(), args);
3746 
3747     // Punt, zero out derivs.
3748     // FIXME -- only of some day, someone truly needs blackbody() to
3749     // correctly return derivs with spatially-varying temperature.
3750     if (Result.has_derivs())
3751         rop.llvm_zero_derivs (Result);
3752 
3753     return true;
3754 }
3755 
3756 
3757 
3758 // float luminance (color c)
LLVMGEN(llvm_gen_luminance)3759 LLVMGEN (llvm_gen_luminance)
3760 {
3761     Opcode &op (rop.inst()->ops()[opnum]);
3762     OSL_DASSERT (op.nargs() == 2);
3763     Symbol &Result (*rop.opargsym (op, 0));
3764     Symbol &C (*rop.opargsym (op, 1));
3765     OSL_DASSERT (Result.typespec().is_float() && C.typespec().is_triple());
3766 
3767     bool deriv = C.has_derivs() && Result.has_derivs();
3768     llvm::Value* args[] = { rop.sg_void_ptr(), rop.llvm_void_ptr(Result),
3769                             rop.llvm_void_ptr(C) };
3770     rop.ll.call_function (deriv ? "osl_luminance_dfdv" : "osl_luminance_fv", args);
3771 
3772     if (Result.has_derivs() && !C.has_derivs())
3773         rop.llvm_zero_derivs (Result);
3774 
3775     return true;
3776 }
3777 
3778 
3779 
LLVMGEN(llvm_gen_isconstant)3780 LLVMGEN (llvm_gen_isconstant)
3781 {
3782     Opcode &op (rop.inst()->ops()[opnum]);
3783     OSL_DASSERT (op.nargs() == 2);
3784     Symbol &Result (*rop.opargsym (op, 0));
3785     OSL_DASSERT (Result.typespec().is_int());
3786     Symbol &A (*rop.opargsym (op, 1));
3787     rop.llvm_store_value (rop.ll.constant(A.is_constant() ? 1 : 0), Result);
3788     return true;
3789 }
3790 
3791 
3792 
LLVMGEN(llvm_gen_functioncall)3793 LLVMGEN (llvm_gen_functioncall)
3794 {
3795     Opcode &op (rop.inst()->ops()[opnum]);
3796     OSL_DASSERT (op.nargs() == 1);
3797 
3798     llvm::BasicBlock* after_block = rop.ll.push_function ();
3799 
3800     unsigned int op_num_function_starts_at = opnum+1;
3801     unsigned int op_num_function_ends_at = op.jump(0);
3802     if (rop.ll.debug_is_enabled()) {
3803         Symbol &functionNameSymbol(*rop.opargsym (op, 0));
3804         OSL_DASSERT(functionNameSymbol.is_constant());
3805         OSL_DASSERT(functionNameSymbol.typespec().is_string());
3806         ustring functionName = *(ustring *)functionNameSymbol.data();
3807         ustring file_name = rop.inst()->op(op_num_function_starts_at).sourcefile();
3808         unsigned int method_line = rop.inst()->op(op_num_function_starts_at).sourceline();
3809         rop.ll.debug_push_inlined_function(functionName, file_name, method_line);
3810     }
3811 
3812     // Generate the code for the body of the function
3813     rop.build_llvm_code (op_num_function_starts_at, op_num_function_ends_at);
3814     rop.ll.op_branch (after_block);
3815 
3816     // Continue on with the previous flow
3817     if (rop.ll.debug_is_enabled()) {
3818         rop.ll.debug_pop_inlined_function();
3819     }
3820     rop.ll.pop_function ();
3821 
3822     return true;
3823 }
3824 
3825 
3826 
LLVMGEN(llvm_gen_functioncall_nr)3827 LLVMGEN (llvm_gen_functioncall_nr)
3828 {
3829     OSL_ASSERT(rop.ll.debug_is_enabled() && "no return version should only exist when debug is enabled");
3830     Opcode &op (rop.inst()->ops()[opnum]);
3831     OSL_ASSERT (op.nargs() == 1);
3832 
3833     Symbol &functionNameSymbol(*rop.opargsym (op, 0));
3834     OSL_ASSERT(functionNameSymbol.is_constant());
3835     OSL_ASSERT(functionNameSymbol.typespec().is_string());
3836     ustring functionName = *(ustring *)functionNameSymbol.data();
3837 
3838     int op_num_function_starts_at = opnum+1;
3839     int op_num_function_ends_at = op.jump(0);
3840     OSL_ASSERT(op.farthest_jump() == op_num_function_ends_at
3841                && "As we are not doing any branching, we should ensure that the inlined function truly ends at the farthest jump");
3842     const Opcode& startop(rop.inst()->op(op_num_function_starts_at));
3843     rop.ll.debug_push_inlined_function(functionName,
3844                             startop.sourcefile(), startop.sourceline());
3845 
3846     // Generate the code for the body of the function
3847     rop.build_llvm_code (op_num_function_starts_at, op_num_function_ends_at);
3848 
3849     // Continue on with the previous flow
3850     rop.ll.debug_pop_inlined_function();
3851 
3852     return true;
3853 }
3854 
3855 
3856 
LLVMGEN(llvm_gen_return)3857 LLVMGEN (llvm_gen_return)
3858 {
3859     Opcode &op (rop.inst()->ops()[opnum]);
3860     OSL_DASSERT (op.nargs() == 0);
3861     if (op.opname() == Strings::op_exit) {
3862         // If it's a real "exit", totally jump out of the shader instance.
3863         // The exit instance block will be created if it doesn't yet exist.
3864         rop.ll.op_branch (rop.llvm_exit_instance_block());
3865     } else {
3866         // If it's a "return", jump to the exit point of the function.
3867         rop.ll.op_branch (rop.ll.return_block());
3868     }
3869     llvm::BasicBlock* next_block = rop.ll.new_basic_block ("");
3870     rop.ll.set_insert_point (next_block);
3871     return true;
3872 }
3873 
3874 
3875 
3876 OSL_PRAGMA_WARNING_PUSH
3877 OSL_GCC_PRAGMA(GCC diagnostic ignored "-Wunused-parameter")
3878 
LLVMGEN(llvm_gen_end)3879 LLVMGEN (llvm_gen_end)
3880 {
3881     // Dummy routine needed only for the op_descriptor table
3882     return false;
3883 }
3884 
3885 OSL_PRAGMA_WARNING_POP
3886 
3887 
3888 }; // namespace pvt
3889 OSL_NAMESPACE_EXIT
3890