1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4
5 #include <cmath>
6
7 #include <OpenImageIO/fmath.h>
8
9 #include "oslexec_pvt.h"
10 #include <OSL/genclosure.h>
11 #include "backendllvm.h"
12
13 using namespace OSL;
14 using namespace OSL::pvt;
15
16 OSL_NAMESPACE_ENTER
17
18 namespace pvt {
19
20 static ustring op_and("and");
21 static ustring op_bitand("bitand");
22 static ustring op_bitor("bitor");
23 static ustring op_break("break");
24 static ustring op_ceil("ceil");
25 static ustring op_cellnoise("cellnoise");
26 static ustring op_color("color");
27 static ustring op_compl("compl");
28 static ustring op_continue("continue");
29 static ustring op_dowhile("dowhile");
30 static ustring op_eq("eq");
31 static ustring op_error("error");
32 static ustring op_fabs("fabs");
33 static ustring op_floor("floor");
34 static ustring op_for("for");
35 static ustring op_format("format");
36 static ustring op_fprintf("fprintf");
37 static ustring op_ge("ge");
38 static ustring op_gt("gt");
39 static ustring op_hashnoise("hashnoise");
40 static ustring op_if("if");
41 static ustring op_le("le");
42 static ustring op_logb("logb");
43 static ustring op_lt("lt");
44 static ustring op_min("min");
45 static ustring op_neq("neq");
46 static ustring op_normal("normal");
47 static ustring op_or("or");
48 static ustring op_point("point");
49 static ustring op_printf("printf");
50 static ustring op_round("round");
51 static ustring op_shl("shl");
52 static ustring op_shr("shr");
53 static ustring op_sign("sign");
54 static ustring op_step("step");
55 static ustring op_trunc("trunc");
56 static ustring op_vector("vector");
57 static ustring op_warning("warning");
58 static ustring op_xor("xor");
59
60 static ustring u_distance ("distance");
61 static ustring u_index ("index");
62 static ustring u__empty; // empty/default ustring
63
64
65
66 /// Macro that defines the arguments to LLVM IR generating routines
67 ///
68 #define LLVMGEN_ARGS BackendLLVM &rop, int opnum
69
70 /// Macro that defines the full declaration of an LLVM generator.
71 ///
72 #define LLVMGEN(name) bool name (LLVMGEN_ARGS)
73
74 // Forward decl
75 LLVMGEN (llvm_gen_generic);
76
77
78
79 void
llvm_gen_debug_printf(string_view message)80 BackendLLVM::llvm_gen_debug_printf (string_view message)
81 {
82 ustring s = ustring::sprintf ("(%s %s) %s", inst()->shadername(),
83 inst()->layername(), message);
84 ll.call_function ("osl_printf", sg_void_ptr(), ll.constant("%s\n"),
85 ll.constant(s));
86 }
87
88
89
90 void
llvm_gen_warning(string_view message)91 BackendLLVM::llvm_gen_warning (string_view message)
92 {
93 ll.call_function ("osl_warning", sg_void_ptr(), ll.constant("%s\n"),
94 ll.constant(message));
95 }
96
97
98
99 void
llvm_gen_error(string_view message)100 BackendLLVM::llvm_gen_error (string_view message)
101 {
102 ll.call_function ("osl_error", sg_void_ptr(), ll.constant("%s\n"),
103 ll.constant(message));
104 }
105
106
107
108 void
llvm_call_layer(int layer,bool unconditional)109 BackendLLVM::llvm_call_layer (int layer, bool unconditional)
110 {
111 // Make code that looks like:
112 // if (! groupdata->run[parentlayer])
113 // parent_layer (sg, groupdata);
114 // if it's a conditional call, or
115 // parent_layer (sg, groupdata);
116 // if it's run unconditionally.
117 // The code in the parent layer itself will set its 'executed' flag.
118
119 llvm::Value *args[] = { sg_ptr (), groupdata_ptr () };
120
121 ShaderInstance *parent = group()[layer];
122 llvm::Value *trueval = ll.constant_bool(true);
123 llvm::Value *layerfield = layer_run_ref(layer_remap(layer));
124 llvm::BasicBlock *then_block = NULL, *after_block = NULL;
125 if (! unconditional) {
126 llvm::Value *executed = ll.op_load (layerfield);
127 executed = ll.op_ne (executed, trueval);
128 then_block = ll.new_basic_block ("");
129 after_block = ll.new_basic_block ("");
130 ll.op_branch (executed, then_block, after_block);
131 // insert point is now then_block
132 }
133
134 // Mark the call as a fast call
135 llvm::Value *funccall = ll.call_function (layer_function_name(group(), *parent).c_str(), args);
136 if (!parent->entry_layer())
137 ll.mark_fast_func_call (funccall);
138
139 if (! unconditional)
140 ll.op_branch (after_block); // also moves insert point
141 }
142
143
144
145 void
llvm_run_connected_layers(Symbol & sym,int symindex,int opnum,std::set<int> * already_run)146 BackendLLVM::llvm_run_connected_layers (Symbol &sym, int symindex,
147 int opnum,
148 std::set<int> *already_run)
149 {
150 if (sym.valuesource() != Symbol::ConnectedVal)
151 return; // Nothing to do
152
153 bool inmain = (opnum >= inst()->maincodebegin() &&
154 opnum < inst()->maincodeend());
155
156 for (int c = 0; c < inst()->nconnections(); ++c) {
157 const Connection &con (inst()->connection (c));
158 // If the connection gives a value to this param
159 if (con.dst.param == symindex) {
160 // already_run is a set of layers run for this particular op.
161 // Just so we don't stupidly do several consecutive checks on
162 // whether we ran this same layer. It's JUST for this op.
163 if (already_run) {
164 if (already_run->count (con.srclayer))
165 continue; // already ran that one on this op
166 else
167 already_run->insert (con.srclayer); // mark it
168 }
169
170 if (inmain) {
171 // There is an instance-wide m_layers_already_run that tries
172 // to remember which earlier layers have unconditionally
173 // been run at any point in the execution of this layer. But
174 // only honor (and modify) that when in the main code
175 // section, not when in init ops, which are inherently
176 // conditional.
177 if (m_layers_already_run.count (con.srclayer)) {
178 continue; // already unconditionally ran the layer
179 }
180 if (! m_in_conditional[opnum]) {
181 // Unconditionally running -- mark so we don't do it
182 // again. If we're inside a conditional, don't mark
183 // because it may not execute the conditional body.
184 m_layers_already_run.insert (con.srclayer);
185 }
186 }
187
188 // If the earlier layer it comes from has not yet been
189 // executed, do so now.
190 llvm_call_layer (con.srclayer);
191 }
192 }
193 }
194
195
196
197 OSL_PRAGMA_WARNING_PUSH
198 OSL_GCC_PRAGMA(GCC diagnostic ignored "-Wunused-parameter")
199
LLVMGEN(llvm_gen_nop)200 LLVMGEN (llvm_gen_nop)
201 {
202 return true;
203 }
204
205 OSL_PRAGMA_WARNING_POP
206
207
208
LLVMGEN(llvm_gen_useparam)209 LLVMGEN (llvm_gen_useparam)
210 {
211 OSL_DASSERT (! rop.inst()->unused() &&
212 "oops, thought this layer was unused, why do we call it?");
213
214 // If we have multiple params needed on this statement, don't waste
215 // time checking the same upstream layer more than once.
216 std::set<int> already_run;
217
218 Opcode &op (rop.inst()->ops()[opnum]);
219 for (int i = 0; i < op.nargs(); ++i) {
220 Symbol& sym = *rop.opargsym (op, i);
221 int symindex = rop.inst()->arg (op.firstarg()+i);
222 rop.llvm_run_connected_layers (sym, symindex, opnum, &already_run);
223 // If it's an interpolated (userdata) parameter and we're
224 // initializing them lazily, now we have to do it.
225 if ((sym.symtype() == SymTypeParam || sym.symtype() == SymTypeOutputParam)
226 && ! sym.lockgeom() && ! sym.typespec().is_closure()
227 && ! sym.connected() && ! sym.connected_down()
228 && rop.shadingsys().lazy_userdata()) {
229 rop.llvm_assign_initial_value (sym);
230 }
231 }
232 return true;
233 }
234
235
236
237 // Used for printf, error, warning, format, fprintf
LLVMGEN(llvm_gen_printf)238 LLVMGEN (llvm_gen_printf)
239 {
240 Opcode &op (rop.inst()->ops()[opnum]);
241
242 // Prepare the args for the call
243
244 // Which argument is the format string? Usually 0, but for op
245 // format() and fprintf(), the formatting string is argument #1.
246 int format_arg = (op.opname() == "format" || op.opname() == "fprintf") ? 1 : 0;
247 Symbol& format_sym = *rop.opargsym (op, format_arg);
248
249 std::vector<llvm::Value*> call_args;
250 if (!format_sym.is_constant()) {
251 rop.shadingcontext()->warningf("%s must currently have constant format\n",
252 op.opname());
253 return false;
254 }
255
256 // For some ops, we push the shader globals pointer
257 if (op.opname() == op_printf || op.opname() == op_error ||
258 op.opname() == op_warning || op.opname() == op_fprintf)
259 call_args.push_back (rop.sg_void_ptr());
260
261 // fprintf also needs the filename
262 if (op.opname() == op_fprintf) {
263 Symbol& Filename = *rop.opargsym (op, 0);
264 llvm::Value* fn = rop.llvm_load_value (Filename);
265 call_args.push_back (fn);
266 }
267
268 // We're going to need to adjust the format string as we go, but I'd
269 // like to reserve a spot for the char*.
270 size_t new_format_slot = call_args.size();
271 call_args.push_back(NULL);
272
273 ustring format_ustring = *((ustring*)format_sym.data());
274 const char* format = format_ustring.c_str();
275 std::string s;
276 int arg = format_arg + 1;
277 size_t optix_size = 0;
278 while (*format != '\0') {
279 if (*format == '%') {
280 if (format[1] == '%') {
281 // '%%' is a literal '%'
282 s += "%%";
283 format += 2; // skip both percentages
284 continue;
285 }
286 const char *oldfmt = format; // mark beginning of format
287 while (*format &&
288 *format != 'c' && *format != 'd' && *format != 'e' &&
289 *format != 'f' && *format != 'g' && *format != 'i' &&
290 *format != 'm' && *format != 'n' && *format != 'o' &&
291 *format != 'p' && *format != 's' && *format != 'u' &&
292 *format != 'v' && *format != 'x' && *format != 'X')
293 ++format;
294 char formatchar = *format++; // Also eat the format char
295 if (arg >= op.nargs()) {
296 rop.shadingcontext()->errorf("Mismatch between format string and arguments (%s:%d)",
297 op.sourcefile(), op.sourceline());
298 return false;
299 }
300
301 std::string ourformat (oldfmt, format); // straddle the format
302 // Doctor it to fix mismatches between format and data
303 Symbol& sym (*rop.opargsym (op, arg));
304 OSL_ASSERT (! sym.typespec().is_structure_based());
305
306 TypeDesc simpletype (sym.typespec().simpletype());
307 int num_elements = simpletype.numelements();
308 int num_components = simpletype.aggregate;
309 if ((sym.typespec().is_closure_based() ||
310 simpletype.basetype == TypeDesc::STRING)
311 && formatchar != 's') {
312 ourformat[ourformat.length()-1] = 's';
313 }
314 if (simpletype.basetype == TypeDesc::INT && formatchar != 'd' &&
315 formatchar != 'i' && formatchar != 'o' && formatchar != 'u' &&
316 formatchar != 'x' && formatchar != 'X') {
317 ourformat[ourformat.length()-1] = 'd';
318 }
319 if (simpletype.basetype == TypeDesc::FLOAT && formatchar != 'f' &&
320 formatchar != 'g' && formatchar != 'c' && formatchar != 'e' &&
321 formatchar != 'm' && formatchar != 'n' && formatchar != 'p' &&
322 formatchar != 'v') {
323 ourformat[ourformat.length()-1] = 'f';
324 }
325 // NOTE(boulos): Only for debug mode do the derivatives get printed...
326 for (int a = 0; a < num_elements; ++a) {
327 llvm::Value *arrind = simpletype.arraylen ? rop.ll.constant(a) : NULL;
328 if (sym.typespec().is_closure_based()) {
329 s += ourformat;
330 llvm::Value *v = rop.llvm_load_value (sym, 0, arrind, 0);
331 v = rop.ll.call_function ("osl_closure_to_string", rop.sg_void_ptr(), v);
332 call_args.push_back (v);
333 continue;
334 }
335
336 for (int c = 0; c < num_components; c++) {
337 if (c != 0 || a != 0)
338 s += " ";
339 s += ourformat;
340
341 llvm::Value* loaded = nullptr;
342 if (rop.use_optix() && simpletype.basetype == TypeDesc::STRING) {
343 // In the OptiX case, we register each string separately.
344 if (simpletype.arraylen >= 1) {
345 // Mangle the element's name in case llvm_load_device_string calls getOrAllocateLLVMSymbol
346 ustring name = ustring::sprintf("__symname__%s[%d]", sym.mangled(), a);
347 Symbol lsym(name, TypeDesc::TypeString, sym.symtype());
348 lsym.data(&((ustring*)sym.data())[a]);
349 loaded = rop.llvm_load_device_string (lsym, /*follow*/ true);
350 } else {
351 loaded = rop.llvm_load_device_string (sym, /*follow*/ true);
352 }
353 optix_size += sizeof(uint64_t);
354 }
355 else {
356 loaded = rop.llvm_load_value (sym, 0, arrind, c);
357
358 if (simpletype.basetype == TypeDesc::FLOAT) {
359 // C varargs convention upconverts float->double.
360 loaded = rop.ll.op_float_to_double(loaded);
361 // Ensure that 64-bit values are aligned to 8-byte boundaries
362 optix_size = (optix_size + sizeof(double) - 1) & ~(sizeof(double) - 1);
363 optix_size += sizeof(double);
364 }
365 else if (simpletype.basetype == TypeDesc::INT)
366 optix_size += sizeof(int);
367 }
368
369 call_args.push_back (loaded);
370 }
371 }
372 ++arg;
373 } else {
374 // Everything else -- just copy the character and advance
375 s += *format++;
376 }
377 }
378
379
380 // In OptiX, printf currently supports 0 or 1 arguments, and the signature
381 // requires 1 argument, so push a null pointer onto the call args if there
382 // is no argument.
383 if (rop.use_optix() && arg == format_arg + 1) {
384 call_args.push_back(rop.ll.void_ptr_null());
385 }
386
387 // Some ops prepend things
388 if (op.opname() == op_error || op.opname() == op_warning) {
389 std::string prefix = Strutil::sprintf ("Shader %s [%s]: ",
390 op.opname(),
391 rop.inst()->shadername());
392 s = prefix + s;
393 }
394
395 // Now go back and put the new format string in its place
396 if (! rop.use_optix()) {
397 call_args[new_format_slot] = rop.ll.constant (s.c_str());
398 }
399 else {
400 // In the OptiX case, we do this:
401 // void* args = { arg0, arg1, arg2 };
402 // osl_printf(sg, fmt, args);
403 // vprintf(fmt, args);
404 //
405 Symbol sym(format_sym.name(), format_sym.typespec(), format_sym.symtype());
406 format_ustring = s;
407 sym.data(&format_ustring);
408 call_args[new_format_slot] = rop.llvm_load_device_string (sym, /*follow*/ true);
409
410 size_t nargs = call_args.size() - (new_format_slot+1);
411 llvm::Value *voids = rop.ll.op_alloca (rop.ll.type_char(), optix_size, std::string(), 8);
412 optix_size = 0;
413 for (size_t i = 0; i < nargs; ++i) {
414 llvm::Value* arg = call_args[new_format_slot+1+i];
415 if (arg->getType()->isFloatingPointTy()) {
416 // Ensure that 64-bit values are aligned to 8-byte boundaries
417 optix_size = (optix_size + sizeof(double) - 1) & ~(sizeof(double)-1);
418 }
419 llvm::Value* memptr = rop.ll.offset_ptr (voids, optix_size);
420 if (arg->getType()->isIntegerTy()) {
421 llvm::Value* iptr = rop.ll.ptr_cast(memptr, rop.ll.type_int_ptr());
422 rop.ll.op_store (arg, iptr);
423 optix_size += sizeof(int);
424 } else if (arg->getType()->isFloatingPointTy()) {
425 llvm::Value* fptr = rop.ll.ptr_cast(memptr, rop.ll.type_double_ptr());
426 rop.ll.op_store (arg, fptr);
427 optix_size += sizeof(double);
428 }
429 else {
430 llvm::Value* vptr = rop.ll.ptr_to_cast(memptr, rop.ll.type_void_ptr());
431 rop.ll.op_store (arg, vptr);
432 optix_size += sizeof(uint64_t);
433 }
434 }
435 call_args.resize(new_format_slot+2);
436 call_args.back() = rop.ll.void_ptr(voids);
437 }
438
439 // Construct the function name and call it.
440 std::string opname = std::string("osl_") + op.opname().string();
441 llvm::Value *ret = rop.ll.call_function (opname.c_str(), call_args);
442
443 // The format op returns a string value, put in in the right spot
444 if (op.opname() == op_format)
445 rop.llvm_store_value (ret, *rop.opargsym (op, 0));
446 return true;
447 }
448
449
450
LLVMGEN(llvm_gen_add)451 LLVMGEN (llvm_gen_add)
452 {
453 Opcode &op (rop.inst()->ops()[opnum]);
454 Symbol& Result = *rop.opargsym (op, 0);
455 Symbol& A = *rop.opargsym (op, 1);
456 Symbol& B = *rop.opargsym (op, 2);
457
458 OSL_DASSERT (! A.typespec().is_array() && ! B.typespec().is_array());
459 if (Result.typespec().is_closure()) {
460 OSL_DASSERT (A.typespec().is_closure() && B.typespec().is_closure());
461 llvm::Value *valargs[] = {
462 rop.sg_void_ptr(),
463 rop.llvm_load_value (A),
464 rop.llvm_load_value (B)
465 };
466 llvm::Value *res = rop.ll.call_function ("osl_add_closure_closure", valargs);
467 rop.llvm_store_value (res, Result, 0, NULL, 0);
468 return true;
469 }
470
471 TypeDesc type = Result.typespec().simpletype();
472 int num_components = type.aggregate;
473
474 // The following should handle f+f, v+v, v+f, f+v, i+i
475 // That's all that should be allowed by oslc.
476 for (int i = 0; i < num_components; i++) {
477 llvm::Value *a = rop.loadLLVMValue (A, i, 0, type);
478 llvm::Value *b = rop.loadLLVMValue (B, i, 0, type);
479 if (!a || !b)
480 return false;
481 llvm::Value *r = rop.ll.op_add (a, b);
482 rop.storeLLVMValue (r, Result, i, 0);
483 }
484
485 if (Result.has_derivs()) {
486 if (A.has_derivs() || B.has_derivs()) {
487 for (int d = 1; d <= 2; ++d) { // dx, dy
488 for (int i = 0; i < num_components; i++) {
489 llvm::Value *a = rop.loadLLVMValue (A, i, d, type);
490 llvm::Value *b = rop.loadLLVMValue (B, i, d, type);
491 llvm::Value *r = rop.ll.op_add (a, b);
492 rop.storeLLVMValue (r, Result, i, d);
493 }
494 }
495 } else {
496 // Result has derivs, operands do not
497 rop.llvm_zero_derivs (Result);
498 }
499 }
500 return true;
501 }
502
503
504
LLVMGEN(llvm_gen_sub)505 LLVMGEN (llvm_gen_sub)
506 {
507 Opcode &op (rop.inst()->ops()[opnum]);
508 Symbol& Result = *rop.opargsym (op, 0);
509 Symbol& A = *rop.opargsym (op, 1);
510 Symbol& B = *rop.opargsym (op, 2);
511
512 TypeDesc type = Result.typespec().simpletype();
513 int num_components = type.aggregate;
514
515 OSL_DASSERT (! Result.typespec().is_closure_based() &&
516 "subtraction of closures not supported");
517
518 // The following should handle f-f, v-v, v-f, f-v, i-i
519 // That's all that should be allowed by oslc.
520 for (int i = 0; i < num_components; i++) {
521 llvm::Value *a = rop.loadLLVMValue (A, i, 0, type);
522 llvm::Value *b = rop.loadLLVMValue (B, i, 0, type);
523 if (!a || !b)
524 return false;
525 llvm::Value *r = rop.ll.op_sub (a, b);
526 rop.storeLLVMValue (r, Result, i, 0);
527 }
528
529 if (Result.has_derivs()) {
530 if (A.has_derivs() || B.has_derivs()) {
531 for (int d = 1; d <= 2; ++d) { // dx, dy
532 for (int i = 0; i < num_components; i++) {
533 llvm::Value *a = rop.loadLLVMValue (A, i, d, type);
534 llvm::Value *b = rop.loadLLVMValue (B, i, d, type);
535 llvm::Value *r = rop.ll.op_sub (a, b);
536 rop.storeLLVMValue (r, Result, i, d);
537 }
538 }
539 } else {
540 // Result has derivs, operands do not
541 rop.llvm_zero_derivs (Result);
542 }
543 }
544 return true;
545 }
546
547
548
LLVMGEN(llvm_gen_mul)549 LLVMGEN (llvm_gen_mul)
550 {
551 Opcode &op (rop.inst()->ops()[opnum]);
552 Symbol& Result = *rop.opargsym (op, 0);
553 Symbol& A = *rop.opargsym (op, 1);
554 Symbol& B = *rop.opargsym (op, 2);
555
556 TypeDesc type = Result.typespec().simpletype();
557 OSL_MAYBE_UNUSED bool is_float = !Result.typespec().is_closure_based() && Result.typespec().is_float_based();
558 int num_components = type.aggregate;
559
560 // multiplication involving closures
561 if (Result.typespec().is_closure()) {
562 llvm::Value *valargs[3];
563 valargs[0] = rop.sg_void_ptr();
564 bool tfloat;
565 if (A.typespec().is_closure()) {
566 tfloat = B.typespec().is_float();
567 valargs[1] = rop.llvm_load_value (A);
568 valargs[2] = tfloat ? rop.llvm_load_value (B) : rop.llvm_void_ptr(B);
569 } else {
570 tfloat = A.typespec().is_float();
571 valargs[1] = rop.llvm_load_value (B);
572 valargs[2] = tfloat ? rop.llvm_load_value (A) : rop.llvm_void_ptr(A);
573 }
574 llvm::Value *res = tfloat ? rop.ll.call_function ("osl_mul_closure_float", valargs)
575 : rop.ll.call_function ("osl_mul_closure_color", valargs);
576 rop.llvm_store_value (res, Result, 0, NULL, 0);
577 return true;
578 }
579
580 // multiplication involving matrices
581 if (Result.typespec().is_matrix()) {
582 if (A.typespec().is_float()) {
583 if (B.typespec().is_matrix())
584 rop.llvm_call_function ("osl_mul_mmf", Result, B, A);
585 else OSL_ASSERT(0 && "frontend should not allow");
586 } else if (A.typespec().is_matrix()) {
587 if (B.typespec().is_float())
588 rop.llvm_call_function ("osl_mul_mmf", Result, A, B);
589 else if (B.typespec().is_matrix())
590 rop.llvm_call_function ("osl_mul_mmm", Result, A, B);
591 else OSL_ASSERT(0 && "frontend should not allow");
592 } else OSL_ASSERT (0 && "frontend should not allow");
593 if (Result.has_derivs())
594 rop.llvm_zero_derivs (Result);
595 return true;
596 }
597
598 // The following should handle f*f, v*v, v*f, f*v, i*i
599 // That's all that should be allowed by oslc.
600 for (int i = 0; i < num_components; i++) {
601 llvm::Value *a = rop.llvm_load_value (A, 0, i, type);
602 llvm::Value *b = rop.llvm_load_value (B, 0, i, type);
603 if (!a || !b)
604 return false;
605 llvm::Value *r = rop.ll.op_mul (a, b);
606 rop.llvm_store_value (r, Result, 0, i);
607
608 if (Result.has_derivs() && (A.has_derivs() || B.has_derivs())) {
609 // Multiplication of duals: (a*b, a*b.dx + a.dx*b, a*b.dy + a.dy*b)
610 OSL_DASSERT (is_float);
611 llvm::Value *ax = rop.llvm_load_value (A, 1, i, type);
612 llvm::Value *bx = rop.llvm_load_value (B, 1, i, type);
613 llvm::Value *abx = rop.ll.op_mul (a, bx);
614 llvm::Value *axb = rop.ll.op_mul (ax, b);
615 llvm::Value *rx = rop.ll.op_add (abx, axb);
616 llvm::Value *ay = rop.llvm_load_value (A, 2, i, type);
617 llvm::Value *by = rop.llvm_load_value (B, 2, i, type);
618 llvm::Value *aby = rop.ll.op_mul (a, by);
619 llvm::Value *ayb = rop.ll.op_mul (ay, b);
620 llvm::Value *ry = rop.ll.op_add (aby, ayb);
621 rop.llvm_store_value (rx, Result, 1, i);
622 rop.llvm_store_value (ry, Result, 2, i);
623 }
624 }
625
626 if (Result.has_derivs() && ! (A.has_derivs() || B.has_derivs())) {
627 // Result has derivs, operands do not
628 rop.llvm_zero_derivs (Result);
629 }
630
631 return true;
632 }
633
634
635
LLVMGEN(llvm_gen_div)636 LLVMGEN (llvm_gen_div)
637 {
638 Opcode &op (rop.inst()->ops()[opnum]);
639 Symbol& Result = *rop.opargsym (op, 0);
640 Symbol& A = *rop.opargsym (op, 1);
641 Symbol& B = *rop.opargsym (op, 2);
642
643 TypeDesc type = Result.typespec().simpletype();
644 bool is_float = Result.typespec().is_float_based();
645 int num_components = type.aggregate;
646
647 OSL_DASSERT (! Result.typespec().is_closure_based());
648
649 // division involving matrices
650 if (Result.typespec().is_matrix()) {
651 if (A.typespec().is_float()) {
652 OSL_ASSERT (!B.typespec().is_float() && "frontend should not allow");
653 if (B.typespec().is_matrix())
654 rop.llvm_call_function ("osl_div_mfm", Result, A, B);
655 else OSL_ASSERT (0);
656 } else if (A.typespec().is_matrix()) {
657 if (B.typespec().is_float())
658 rop.llvm_call_function ("osl_div_mmf", Result, A, B);
659 else if (B.typespec().is_matrix())
660 rop.llvm_call_function ("osl_div_mmm", Result, A, B);
661 else OSL_ASSERT (0);
662 } else OSL_ASSERT (0);
663 if (Result.has_derivs())
664 rop.llvm_zero_derivs (Result);
665 return true;
666 }
667
668 // The following should handle f/f, v/v, v/f, f/v, i/i
669 // That's all that should be allowed by oslc.
670 const char *safe_div = is_float ? "osl_safe_div_fff" : "osl_safe_div_iii";
671 bool deriv = (Result.has_derivs() && (A.has_derivs() || B.has_derivs()));
672 for (int i = 0; i < num_components; i++) {
673 llvm::Value *a = rop.llvm_load_value (A, 0, i, type);
674 llvm::Value *b = rop.llvm_load_value (B, 0, i, type);
675 if (!a || !b)
676 return false;
677 llvm::Value *a_div_b;
678 if (B.is_constant() && ! rop.is_zero(B))
679 a_div_b = rop.ll.op_div (a, b);
680 else
681 a_div_b = rop.ll.call_function (safe_div, a, b);
682 llvm::Value *rx = NULL, *ry = NULL;
683
684 if (deriv) {
685 // Division of duals: (a/b, 1/b*(ax-a/b*bx), 1/b*(ay-a/b*by))
686 OSL_DASSERT (is_float);
687 llvm::Value *binv;
688 if (B.is_constant() && ! rop.is_zero(B))
689 binv = rop.ll.op_div (rop.ll.constant(1.0f), b);
690 else
691 binv = rop.ll.call_function (safe_div, rop.ll.constant(1.0f), b);
692 llvm::Value *ax = rop.llvm_load_value (A, 1, i, type);
693 llvm::Value *bx = rop.llvm_load_value (B, 1, i, type);
694 llvm::Value *a_div_b_mul_bx = rop.ll.op_mul (a_div_b, bx);
695 llvm::Value *ax_minus_a_div_b_mul_bx = rop.ll.op_sub (ax, a_div_b_mul_bx);
696 rx = rop.ll.op_mul (binv, ax_minus_a_div_b_mul_bx);
697 llvm::Value *ay = rop.llvm_load_value (A, 2, i, type);
698 llvm::Value *by = rop.llvm_load_value (B, 2, i, type);
699 llvm::Value *a_div_b_mul_by = rop.ll.op_mul (a_div_b, by);
700 llvm::Value *ay_minus_a_div_b_mul_by = rop.ll.op_sub (ay, a_div_b_mul_by);
701 ry = rop.ll.op_mul (binv, ay_minus_a_div_b_mul_by);
702 }
703
704 rop.llvm_store_value (a_div_b, Result, 0, i);
705 if (deriv) {
706 rop.llvm_store_value (rx, Result, 1, i);
707 rop.llvm_store_value (ry, Result, 2, i);
708 }
709 }
710
711 if (Result.has_derivs() && ! (A.has_derivs() || B.has_derivs())) {
712 // Result has derivs, operands do not
713 rop.llvm_zero_derivs (Result);
714 }
715
716 return true;
717 }
718
719
720
LLVMGEN(llvm_gen_modulus)721 LLVMGEN (llvm_gen_modulus)
722 {
723 Opcode &op (rop.inst()->ops()[opnum]);
724 Symbol& Result = *rop.opargsym (op, 0);
725 Symbol& A = *rop.opargsym (op, 1);
726 Symbol& B = *rop.opargsym (op, 2);
727
728 TypeDesc type = Result.typespec().simpletype();
729 bool is_float = Result.typespec().is_float_based();
730 int num_components = type.aggregate;
731
732 #ifdef OSL_LLVM_NO_BITCODE
733 // On Windows 32 bit this calls an unknown instruction, probably need to
734 // link with LLVM compiler-rt to fix, for now just fall back to op
735 if (is_float)
736 return llvm_gen_generic (rop, opnum);
737 #endif
738
739 // The following should handle f%f, v%v, v%f, i%i
740 // That's all that should be allowed by oslc.
741 const char *safe_mod = is_float ? "osl_fmod_fff" : "osl_safe_mod_iii";
742 for (int i = 0; i < num_components; i++) {
743 llvm::Value *a = rop.loadLLVMValue (A, i, 0, type);
744 llvm::Value *b = rop.loadLLVMValue (B, i, 0, type);
745 if (!a || !b)
746 return false;
747 llvm::Value *r;
748 if (B.is_constant() && ! rop.is_zero(B))
749 r = rop.ll.op_mod (a, b);
750 else
751 r = rop.ll.call_function (safe_mod, a, b);
752 rop.storeLLVMValue (r, Result, i, 0);
753 }
754
755 if (Result.has_derivs()) {
756 OSL_DASSERT (is_float);
757 if (A.has_derivs()) {
758 // Modulus of duals: (a mod b, ax, ay)
759 for (int d = 1; d <= 2; ++d) {
760 for (int i = 0; i < num_components; i++) {
761 llvm::Value *deriv = rop.loadLLVMValue (A, i, d, type);
762 rop.storeLLVMValue (deriv, Result, i, d);
763 }
764 }
765 } else {
766 // Result has derivs, operands do not
767 rop.llvm_zero_derivs (Result);
768 }
769 }
770 return true;
771 }
772
773
774
LLVMGEN(llvm_gen_neg)775 LLVMGEN (llvm_gen_neg)
776 {
777 Opcode &op (rop.inst()->ops()[opnum]);
778 Symbol& Result = *rop.opargsym (op, 0);
779 Symbol& A = *rop.opargsym (op, 1);
780
781 TypeDesc type = Result.typespec().simpletype();
782 int num_components = type.aggregate;
783 for (int d = 0; d < 3; ++d) { // dx, dy
784 for (int i = 0; i < num_components; i++) {
785 llvm::Value *a = rop.llvm_load_value (A, d, i, type);
786 llvm::Value *r = rop.ll.op_neg (a);
787 rop.llvm_store_value (r, Result, d, i);
788 }
789 if (! Result.has_derivs())
790 break;
791 }
792 return true;
793 }
794
795
796
797 // Implementation for clamp
LLVMGEN(llvm_gen_clamp)798 LLVMGEN (llvm_gen_clamp)
799 {
800 Opcode &op (rop.inst()->ops()[opnum]);
801 Symbol& Result = *rop.opargsym (op, 0);
802 Symbol& X = *rop.opargsym (op, 1);
803 Symbol& Min = *rop.opargsym (op, 2);
804 Symbol& Max = *rop.opargsym (op, 3);
805
806 TypeDesc type = Result.typespec().simpletype();
807 int num_components = type.aggregate;
808 for (int i = 0; i < num_components; i++) {
809 // First do the lower bound
810 llvm::Value *val = rop.llvm_load_value (X, 0, i, type);
811 llvm::Value *min = rop.llvm_load_value (Min, 0, i, type);
812 llvm::Value *cond = rop.ll.op_lt (val, min);
813 val = rop.ll.op_select (cond, min, val);
814 llvm::Value *valdx=NULL, *valdy=NULL;
815 if (Result.has_derivs()) {
816 valdx = rop.llvm_load_value (X, 1, i, type);
817 valdy = rop.llvm_load_value (X, 2, i, type);
818 llvm::Value *mindx = rop.llvm_load_value (Min, 1, i, type);
819 llvm::Value *mindy = rop.llvm_load_value (Min, 2, i, type);
820 valdx = rop.ll.op_select (cond, mindx, valdx);
821 valdy = rop.ll.op_select (cond, mindy, valdy);
822 }
823 // Now do the upper bound
824 llvm::Value *max = rop.llvm_load_value (Max, 0, i, type);
825 cond = rop.ll.op_gt (val, max);
826 val = rop.ll.op_select (cond, max, val);
827 if (Result.has_derivs()) {
828 llvm::Value *maxdx = rop.llvm_load_value (Max, 1, i, type);
829 llvm::Value *maxdy = rop.llvm_load_value (Max, 2, i, type);
830 valdx = rop.ll.op_select (cond, maxdx, valdx);
831 valdy = rop.ll.op_select (cond, maxdy, valdy);
832 }
833 rop.llvm_store_value (val, Result, 0, i);
834 rop.llvm_store_value (valdx, Result, 1, i);
835 rop.llvm_store_value (valdy, Result, 2, i);
836 }
837 return true;
838 }
839
840
841
LLVMGEN(llvm_gen_mix)842 LLVMGEN (llvm_gen_mix)
843 {
844 Opcode &op (rop.inst()->ops()[opnum]);
845 Symbol& Result = *rop.opargsym (op, 0);
846 Symbol& A = *rop.opargsym (op, 1);
847 Symbol& B = *rop.opargsym (op, 2);
848 Symbol& X = *rop.opargsym (op, 3);
849 TypeDesc type = Result.typespec().simpletype();
850 OSL_DASSERT (!Result.typespec().is_closure_based() &&
851 Result.typespec().is_float_based());
852 int num_components = type.aggregate;
853 int x_components = X.typespec().aggregate();
854 bool derivs = (Result.has_derivs() &&
855 (A.has_derivs() || B.has_derivs() || X.has_derivs()));
856
857 llvm::Value *one = rop.ll.constant (1.0f);
858 llvm::Value *x = rop.llvm_load_value (X, 0, 0, type);
859 llvm::Value *one_minus_x = rop.ll.op_sub (one, x);
860 llvm::Value *xx = derivs ? rop.llvm_load_value (X, 1, 0, type) : NULL;
861 llvm::Value *xy = derivs ? rop.llvm_load_value (X, 2, 0, type) : NULL;
862 for (int i = 0; i < num_components; i++) {
863 llvm::Value *a = rop.llvm_load_value (A, 0, i, type);
864 llvm::Value *b = rop.llvm_load_value (B, 0, i, type);
865 if (!a || !b)
866 return false;
867 if (i > 0 && x_components > 1) {
868 // Only need to recompute x and 1-x if they change
869 x = rop.llvm_load_value (X, 0, i, type);
870 one_minus_x = rop.ll.op_sub (one, x);
871 }
872 // r = a*one_minus_x + b*x
873 llvm::Value *r1 = rop.ll.op_mul (a, one_minus_x);
874 llvm::Value *r2 = rop.ll.op_mul (b, x);
875 llvm::Value *r = rop.ll.op_add (r1, r2);
876 rop.llvm_store_value (r, Result, 0, i);
877
878 if (derivs) {
879 // mix of duals:
880 // (a*one_minus_x + b*x,
881 // a*one_minus_x.dx + a.dx*one_minus_x + b*x.dx + b.dx*x,
882 // a*one_minus_x.dy + a.dy*one_minus_x + b*x.dy + b.dy*x)
883 // and since one_minus_x.dx = -x.dx, one_minus_x.dy = -x.dy,
884 // (a*one_minus_x + b*x,
885 // -a*x.dx + a.dx*one_minus_x + b*x.dx + b.dx*x,
886 // -a*x.dy + a.dy*one_minus_x + b*x.dy + b.dy*x)
887 llvm::Value *ax = rop.llvm_load_value (A, 1, i, type);
888 llvm::Value *bx = rop.llvm_load_value (B, 1, i, type);
889 if (i > 0 && x_components > 1)
890 xx = rop.llvm_load_value (X, 1, i, type);
891 llvm::Value *rx1 = rop.ll.op_mul (a, xx);
892 llvm::Value *rx2 = rop.ll.op_mul (ax, one_minus_x);
893 llvm::Value *rx = rop.ll.op_sub (rx2, rx1);
894 llvm::Value *rx3 = rop.ll.op_mul (b, xx);
895 rx = rop.ll.op_add (rx, rx3);
896 llvm::Value *rx4 = rop.ll.op_mul (bx, x);
897 rx = rop.ll.op_add (rx, rx4);
898
899 llvm::Value *ay = rop.llvm_load_value (A, 2, i, type);
900 llvm::Value *by = rop.llvm_load_value (B, 2, i, type);
901 if (i > 0 && x_components > 1)
902 xy = rop.llvm_load_value (X, 2, i, type);
903 llvm::Value *ry1 = rop.ll.op_mul (a, xy);
904 llvm::Value *ry2 = rop.ll.op_mul (ay, one_minus_x);
905 llvm::Value *ry = rop.ll.op_sub (ry2, ry1);
906 llvm::Value *ry3 = rop.ll.op_mul (b, xy);
907 ry = rop.ll.op_add (ry, ry3);
908 llvm::Value *ry4 = rop.ll.op_mul (by, x);
909 ry = rop.ll.op_add (ry, ry4);
910
911 rop.llvm_store_value (rx, Result, 1, i);
912 rop.llvm_store_value (ry, Result, 2, i);
913 }
914 }
915
916 if (Result.has_derivs() && !derivs) {
917 // Result has derivs, operands do not
918 rop.llvm_zero_derivs (Result);
919 }
920
921 return true;
922 }
923
924
925
LLVMGEN(llvm_gen_select)926 LLVMGEN (llvm_gen_select)
927 {
928 Opcode &op (rop.inst()->ops()[opnum]);
929 Symbol& Result = *rop.opargsym (op, 0);
930 Symbol& A = *rop.opargsym (op, 1);
931 Symbol& B = *rop.opargsym (op, 2);
932 Symbol& X = *rop.opargsym (op, 3);
933 TypeDesc type = Result.typespec().simpletype();
934 OSL_DASSERT (!Result.typespec().is_closure_based() &&
935 Result.typespec().is_float_based());
936 int num_components = type.aggregate;
937 int x_components = X.typespec().aggregate();
938 bool derivs = (Result.has_derivs() &&
939 (A.has_derivs() || B.has_derivs()));
940
941 llvm::Value *zero = X.typespec().is_int() ? rop.ll.constant (0)
942 : rop.ll.constant (0.0f);
943 llvm::Value *cond[3];
944 for (int i = 0; i < x_components; ++i)
945 cond[i] = rop.ll.op_ne (rop.llvm_load_value (X, 0, i), zero);
946
947 for (int i = 0; i < num_components; i++) {
948 llvm::Value *a = rop.llvm_load_value (A, 0, i, type);
949 llvm::Value *b = rop.llvm_load_value (B, 0, i, type);
950 llvm::Value *c = (i >= x_components) ? cond[0] : cond[i];
951 llvm::Value *r = rop.ll.op_select (c, b, a);
952 rop.llvm_store_value (r, Result, 0, i);
953 if (derivs) {
954 for (int d = 1; d < 3; ++d) {
955 a = rop.llvm_load_value (A, d, i, type);
956 b = rop.llvm_load_value (B, d, i, type);
957 r = rop.ll.op_select (c, b, a);
958 rop.llvm_store_value (r, Result, d, i);
959 }
960 }
961 }
962
963 if (Result.has_derivs() && !derivs) {
964 // Result has derivs, operands do not
965 rop.llvm_zero_derivs (Result);
966 }
967 return true;
968 }
969
970
971
972 // Implementation for min/max
LLVMGEN(llvm_gen_minmax)973 LLVMGEN (llvm_gen_minmax)
974 {
975 Opcode &op (rop.inst()->ops()[opnum]);
976 Symbol& Result = *rop.opargsym (op, 0);
977 Symbol& x = *rop.opargsym (op, 1);
978 Symbol& y = *rop.opargsym (op, 2);
979
980 TypeDesc type = Result.typespec().simpletype();
981 int num_components = type.aggregate;
982 for (int i = 0; i < num_components; i++) {
983 // First do the lower bound
984 llvm::Value *x_val = rop.llvm_load_value (x, 0, i, type);
985 llvm::Value *y_val = rop.llvm_load_value (y, 0, i, type);
986
987 llvm::Value* cond = NULL;
988 // NOTE(boulos): Using <= instead of < to match old behavior
989 // (only matters for derivs)
990 if (op.opname() == op_min) {
991 cond = rop.ll.op_le (x_val, y_val);
992 } else {
993 cond = rop.ll.op_gt (x_val, y_val);
994 }
995
996 llvm::Value* res_val = rop.ll.op_select (cond, x_val, y_val);
997 rop.llvm_store_value (res_val, Result, 0, i);
998 if (Result.has_derivs()) {
999 llvm::Value* x_dx = rop.llvm_load_value (x, 1, i, type);
1000 llvm::Value* x_dy = rop.llvm_load_value (x, 2, i, type);
1001 llvm::Value* y_dx = rop.llvm_load_value (y, 1, i, type);
1002 llvm::Value* y_dy = rop.llvm_load_value (y, 2, i, type);
1003 rop.llvm_store_value (rop.ll.op_select(cond, x_dx, y_dx), Result, 1, i);
1004 rop.llvm_store_value (rop.ll.op_select(cond, x_dy, y_dy), Result, 2, i);
1005 }
1006 }
1007 return true;
1008 }
1009
1010
1011
LLVMGEN(llvm_gen_bitwise_binary_op)1012 LLVMGEN (llvm_gen_bitwise_binary_op)
1013 {
1014 Opcode &op (rop.inst()->ops()[opnum]);
1015 Symbol& Result = *rop.opargsym (op, 0);
1016 Symbol& A = *rop.opargsym (op, 1);
1017 Symbol& B = *rop.opargsym (op, 2);
1018 OSL_DASSERT (Result.typespec().is_int() && A.typespec().is_int() &&
1019 B.typespec().is_int());
1020
1021 llvm::Value *a = rop.loadLLVMValue (A);
1022 llvm::Value *b = rop.loadLLVMValue (B);
1023 if (!a || !b)
1024 return false;
1025 llvm::Value *r = NULL;
1026 if (op.opname() == op_bitand)
1027 r = rop.ll.op_and (a, b);
1028 else if (op.opname() == op_bitor)
1029 r = rop.ll.op_or (a, b);
1030 else if (op.opname() == op_xor)
1031 r = rop.ll.op_xor (a, b);
1032 else if (op.opname() == op_shl)
1033 r = rop.ll.op_shl (a, b);
1034 else if (op.opname() == op_shr)
1035 r = rop.ll.op_shr (a, b);
1036 else
1037 return false;
1038 rop.storeLLVMValue (r, Result);
1039 return true;
1040 }
1041
1042
1043
1044 // Simple (pointwise) unary ops (Abs, ...,
LLVMGEN(llvm_gen_unary_op)1045 LLVMGEN (llvm_gen_unary_op)
1046 {
1047 Opcode &op (rop.inst()->ops()[opnum]);
1048 Symbol& dst = *rop.opargsym (op, 0);
1049 Symbol& src = *rop.opargsym (op, 1);
1050 bool dst_derivs = dst.has_derivs();
1051 int num_components = dst.typespec().simpletype().aggregate;
1052
1053 bool dst_float = dst.typespec().is_float_based();
1054 bool src_float = src.typespec().is_float_based();
1055
1056 for (int i = 0; i < num_components; i++) {
1057 // Get src1/2 component i
1058 llvm::Value* src_load = rop.loadLLVMValue (src, i, 0);
1059 if (!src_load) return false;
1060
1061 llvm::Value* src_val = src_load;
1062
1063 // Perform the op
1064 llvm::Value* result = 0;
1065 ustring opname = op.opname();
1066
1067 if (opname == op_compl) {
1068 OSL_DASSERT (dst.typespec().is_int());
1069 result = rop.ll.op_not (src_val);
1070 } else {
1071 // Don't know how to handle this.
1072 rop.shadingcontext()->errorf("Don't know how to handle op '%s', eliding the store\n", opname);
1073 }
1074
1075 // Store the result
1076 if (result) {
1077 // if our op type doesn't match result, convert
1078 if (dst_float && !src_float) {
1079 // Op was int, but we need to store float
1080 result = rop.ll.op_int_to_float (result);
1081 } else if (!dst_float && src_float) {
1082 // Op was float, but we need to store int
1083 result = rop.ll.op_float_to_int (result);
1084 } // otherwise just fine
1085 rop.storeLLVMValue (result, dst, i, 0);
1086 }
1087
1088 if (dst_derivs) {
1089 // mul results in <a * b, a * b_dx + b * a_dx, a * b_dy + b * a_dy>
1090 rop.shadingcontext()->infof("punting on derivatives for now\n");
1091 // FIXME!!
1092 }
1093 }
1094 return true;
1095 }
1096
1097
1098
1099 // Simple assignment
LLVMGEN(llvm_gen_assign)1100 LLVMGEN (llvm_gen_assign)
1101 {
1102 Opcode &op (rop.inst()->ops()[opnum]);
1103 Symbol& Result (*rop.opargsym (op, 0));
1104 Symbol& Src (*rop.opargsym (op, 1));
1105
1106 return rop.llvm_assign_impl (Result, Src);
1107 }
1108
1109
1110
1111 // Entire array copying
LLVMGEN(llvm_gen_arraycopy)1112 LLVMGEN (llvm_gen_arraycopy)
1113 {
1114 Opcode &op (rop.inst()->ops()[opnum]);
1115 Symbol& Result (*rop.opargsym (op, 0));
1116 Symbol& Src (*rop.opargsym (op, 1));
1117
1118 return rop.llvm_assign_impl (Result, Src);
1119 }
1120
1121
1122
1123 // Vector component reference
LLVMGEN(llvm_gen_compref)1124 LLVMGEN (llvm_gen_compref)
1125 {
1126 Opcode &op (rop.inst()->ops()[opnum]);
1127 Symbol& Result = *rop.opargsym (op, 0);
1128 Symbol& Val = *rop.opargsym (op, 1);
1129 Symbol& Index = *rop.opargsym (op, 2);
1130
1131 llvm::Value *c = rop.llvm_load_value(Index);
1132 if (rop.inst()->master()->range_checking()) {
1133 if (! (Index.is_constant() && *(int *)Index.data() >= 0 &&
1134 *(int *)Index.data() < 3)) {
1135 llvm::Value *args[] = { c, rop.ll.constant(3),
1136 rop.ll.constant(Val.unmangled()),
1137 rop.sg_void_ptr(),
1138 rop.ll.constant(op.sourcefile()),
1139 rop.ll.constant(op.sourceline()),
1140 rop.ll.constant(rop.group().name()),
1141 rop.ll.constant(rop.layer()),
1142 rop.ll.constant(rop.inst()->layername()),
1143 rop.ll.constant(rop.inst()->shadername()) };
1144 c = rop.ll.call_function ("osl_range_check", args);
1145 }
1146 }
1147
1148 for (int d = 0; d < 3; ++d) { // deriv
1149 llvm::Value *val = NULL;
1150 if (Index.is_constant()) {
1151 int i = *(int*)Index.data();
1152 i = Imath::clamp (i, 0, 2);
1153 val = rop.llvm_load_value (Val, d, i);
1154 } else {
1155 val = rop.llvm_load_component_value (Val, d, c);
1156 }
1157 rop.llvm_store_value (val, Result, d);
1158 if (! Result.has_derivs()) // skip the derivs if we don't need them
1159 break;
1160 }
1161 return true;
1162 }
1163
1164
1165
1166 // Vector component assignment
LLVMGEN(llvm_gen_compassign)1167 LLVMGEN (llvm_gen_compassign)
1168 {
1169 Opcode &op (rop.inst()->ops()[opnum]);
1170 Symbol& Result = *rop.opargsym (op, 0);
1171 Symbol& Index = *rop.opargsym (op, 1);
1172 Symbol& Val = *rop.opargsym (op, 2);
1173
1174 llvm::Value *c = rop.llvm_load_value(Index);
1175 if (rop.inst()->master()->range_checking()) {
1176 if (! (Index.is_constant() && *(int *)Index.data() >= 0 &&
1177 *(int *)Index.data() < 3)) {
1178 llvm::Value *args[] = { c, rop.ll.constant(3),
1179 rop.ll.constant(Result.unmangled()),
1180 rop.sg_void_ptr(),
1181 rop.ll.constant(op.sourcefile()),
1182 rop.ll.constant(op.sourceline()),
1183 rop.ll.constant(rop.group().name()),
1184 rop.ll.constant(rop.layer()),
1185 rop.ll.constant(rop.inst()->layername()),
1186 rop.ll.constant(rop.inst()->shadername()) };
1187 c = rop.ll.call_function ("osl_range_check", args);
1188 }
1189 }
1190
1191 for (int d = 0; d < 3; ++d) { // deriv
1192 llvm::Value *val = rop.llvm_load_value (Val, d, 0, TypeDesc::TypeFloat);
1193 if (Index.is_constant()) {
1194 int i = *(int*)Index.data();
1195 i = Imath::clamp (i, 0, 2);
1196 rop.llvm_store_value (val, Result, d, i);
1197 } else {
1198 rop.llvm_store_component_value (val, Result, d, c);
1199 }
1200 if (! Result.has_derivs()) // skip the derivs if we don't need them
1201 break;
1202 }
1203 return true;
1204 }
1205
1206
1207
1208 // Matrix component reference
LLVMGEN(llvm_gen_mxcompref)1209 LLVMGEN (llvm_gen_mxcompref)
1210 {
1211 Opcode &op (rop.inst()->ops()[opnum]);
1212 Symbol& Result = *rop.opargsym (op, 0);
1213 Symbol& M = *rop.opargsym (op, 1);
1214 Symbol& Row = *rop.opargsym (op, 2);
1215 Symbol& Col = *rop.opargsym (op, 3);
1216
1217 llvm::Value *row = rop.llvm_load_value (Row);
1218 llvm::Value *col = rop.llvm_load_value (Col);
1219 if (rop.inst()->master()->range_checking()) {
1220 if (! (Row.is_constant() && Col.is_constant() &&
1221 *(int *)Row.data() >= 0 && *(int *)Row.data() < 4 &&
1222 *(int *)Col.data() >= 0 && *(int *)Col.data() < 4)) {
1223 llvm::Value *args[] = { row, rop.ll.constant(4),
1224 rop.ll.constant(M.name()),
1225 rop.sg_void_ptr(),
1226 rop.ll.constant(op.sourcefile()),
1227 rop.ll.constant(op.sourceline()),
1228 rop.ll.constant(rop.group().name()),
1229 rop.ll.constant(rop.layer()),
1230 rop.ll.constant(rop.inst()->layername()),
1231 rop.ll.constant(rop.inst()->shadername()) };
1232 if (! (Row.is_constant() &&
1233 *(int *)Row.data() >= 0 && *(int *)Row.data() < 4)) {
1234 row = rop.ll.call_function ("osl_range_check", args);
1235 }
1236 if (! (Col.is_constant() &&
1237 *(int *)Col.data() >= 0 && *(int *)Col.data() < 4)) {
1238 args[0] = col;
1239 col = rop.ll.call_function ("osl_range_check", args);
1240 }
1241 }
1242 }
1243
1244 llvm::Value *val = NULL;
1245 if (Row.is_constant() && Col.is_constant()) {
1246 int r = Imath::clamp (((int*)Row.data())[0], 0, 3);
1247 int c = Imath::clamp (((int*)Col.data())[0], 0, 3);
1248 int comp = 4 * r + c;
1249 val = rop.llvm_load_value (M, 0, comp);
1250 } else {
1251 llvm::Value *comp = rop.ll.op_mul (row, rop.ll.constant(4));
1252 comp = rop.ll.op_add (comp, col);
1253 val = rop.llvm_load_component_value (M, 0, comp);
1254 }
1255 rop.llvm_store_value (val, Result);
1256 rop.llvm_zero_derivs (Result);
1257
1258 return true;
1259 }
1260
1261
1262
1263 // Matrix component assignment
LLVMGEN(llvm_gen_mxcompassign)1264 LLVMGEN (llvm_gen_mxcompassign)
1265 {
1266 Opcode &op (rop.inst()->ops()[opnum]);
1267 Symbol& Result = *rop.opargsym (op, 0);
1268 Symbol& Row = *rop.opargsym (op, 1);
1269 Symbol& Col = *rop.opargsym (op, 2);
1270 Symbol& Val = *rop.opargsym (op, 3);
1271
1272 llvm::Value *row = rop.llvm_load_value (Row);
1273 llvm::Value *col = rop.llvm_load_value (Col);
1274 if (rop.inst()->master()->range_checking()) {
1275 if (! (Row.is_constant() && Col.is_constant() &&
1276 *(int *)Row.data() >= 0 && *(int *)Row.data() < 4 &&
1277 *(int *)Col.data() >= 0 && *(int *)Col.data() < 4)) {
1278 llvm::Value *args[] = { row, rop.ll.constant(4),
1279 rop.ll.constant(Result.name()),
1280 rop.sg_void_ptr(),
1281 rop.ll.constant(op.sourcefile()),
1282 rop.ll.constant(op.sourceline()),
1283 rop.ll.constant(rop.group().name()),
1284 rop.ll.constant(rop.layer()),
1285 rop.ll.constant(rop.inst()->layername()),
1286 rop.ll.constant(rop.inst()->shadername()) };
1287 if (! (Row.is_constant() &&
1288 *(int *)Row.data() >= 0 && *(int *)Row.data() < 4)) {
1289 row = rop.ll.call_function ("osl_range_check", args);
1290 }
1291 if (! (Col.is_constant() &&
1292 *(int *)Col.data() >= 0 && *(int *)Col.data() < 4)) {
1293 args[0] = col;
1294 col = rop.ll.call_function ("osl_range_check", args);
1295 }
1296 }
1297 }
1298
1299 llvm::Value *val = rop.llvm_load_value (Val, 0, 0, TypeDesc::TypeFloat);
1300
1301 if (Row.is_constant() && Col.is_constant()) {
1302 int r = Imath::clamp (((int*)Row.data())[0], 0, 3);
1303 int c = Imath::clamp (((int*)Col.data())[0], 0, 3);
1304 int comp = 4 * r + c;
1305 rop.llvm_store_value (val, Result, 0, comp);
1306 } else {
1307 llvm::Value *comp = rop.ll.op_mul (row, rop.ll.constant(4));
1308 comp = rop.ll.op_add (comp, col);
1309 rop.llvm_store_component_value (val, Result, 0, comp);
1310 }
1311 return true;
1312 }
1313
1314
1315
1316 // Array length
LLVMGEN(llvm_gen_arraylength)1317 LLVMGEN (llvm_gen_arraylength)
1318 {
1319 Opcode &op (rop.inst()->ops()[opnum]);
1320 Symbol& Result = *rop.opargsym (op, 0);
1321 Symbol& A = *rop.opargsym (op, 1);
1322 OSL_DASSERT(Result.typespec().is_int() && A.typespec().is_array());
1323
1324 int len = A.typespec().is_unsized_array() ? A.initializers()
1325 : A.typespec().arraylength();
1326 rop.llvm_store_value (rop.ll.constant(len), Result);
1327 return true;
1328 }
1329
1330
1331
1332 // Array reference
LLVMGEN(llvm_gen_aref)1333 LLVMGEN (llvm_gen_aref)
1334 {
1335 Opcode &op (rop.inst()->ops()[opnum]);
1336 Symbol& Result = *rop.opargsym (op, 0);
1337 Symbol& Src = *rop.opargsym (op, 1);
1338 Symbol& Index = *rop.opargsym (op, 2);
1339
1340 // Get array index we're interested in
1341 llvm::Value *index = rop.loadLLVMValue (Index);
1342 if (! index)
1343 return false;
1344 if (rop.inst()->master()->range_checking()) {
1345 if (! (Index.is_constant() && *(int *)Index.data() >= 0 &&
1346 *(int *)Index.data() < Src.typespec().arraylength())) {
1347 llvm::Value *args[] = { index,
1348 rop.ll.constant(Src.typespec().arraylength()),
1349 rop.ll.constant(Src.unmangled()),
1350 rop.sg_void_ptr(),
1351 rop.ll.constant(op.sourcefile()),
1352 rop.ll.constant(op.sourceline()),
1353 rop.ll.constant(rop.group().name()),
1354 rop.ll.constant(rop.layer()),
1355 rop.ll.constant(rop.inst()->layername()),
1356 rop.ll.constant(rop.inst()->shadername()) };
1357 index = rop.ll.call_function ("osl_range_check", args);
1358 }
1359 }
1360
1361 int num_components = Src.typespec().simpletype().aggregate;
1362 for (int d = 0; d <= 2; ++d) {
1363 for (int c = 0; c < num_components; ++c) {
1364 llvm::Value *val = rop.llvm_load_value (Src, d, index, c);
1365 rop.storeLLVMValue (val, Result, c, d);
1366 }
1367 if (! Result.has_derivs())
1368 break;
1369 }
1370
1371 return true;
1372 }
1373
1374
1375
1376 // Array assignment
LLVMGEN(llvm_gen_aassign)1377 LLVMGEN (llvm_gen_aassign)
1378 {
1379 Opcode &op (rop.inst()->ops()[opnum]);
1380 Symbol& Result = *rop.opargsym (op, 0);
1381 Symbol& Index = *rop.opargsym (op, 1);
1382 Symbol& Src = *rop.opargsym (op, 2);
1383
1384 // Get array index we're interested in
1385 llvm::Value *index = rop.loadLLVMValue (Index);
1386 if (! index)
1387 return false;
1388 if (rop.inst()->master()->range_checking()) {
1389 if (! (Index.is_constant() && *(int *)Index.data() >= 0 &&
1390 *(int *)Index.data() < Result.typespec().arraylength())) {
1391 llvm::Value *args[] = { index,
1392 rop.ll.constant(Result.typespec().arraylength()),
1393 rop.ll.constant(Result.unmangled()),
1394 rop.sg_void_ptr(),
1395 rop.ll.constant(op.sourcefile()),
1396 rop.ll.constant(op.sourceline()),
1397 rop.ll.constant(rop.group().name()),
1398 rop.ll.constant(rop.layer()),
1399 rop.ll.constant(rop.inst()->layername()),
1400 rop.ll.constant(rop.inst()->shadername()) };
1401 index = rop.ll.call_function ("osl_range_check", args);
1402 }
1403 }
1404
1405 int num_components = Result.typespec().simpletype().aggregate;
1406
1407 // Allow float <=> int casting
1408 TypeDesc cast;
1409 if (num_components == 1 && !Result.typespec().is_closure() && !Src.typespec().is_closure() &&
1410 (Result.typespec().is_int_based() || Result.typespec().is_float_based()) &&
1411 (Src.typespec().is_int_based() || Src.typespec().is_float_based())) {
1412 cast = Result.typespec().simpletype();
1413 cast.arraylen = 0;
1414 } else {
1415 // Try to warn before llvm_fatal_error is called which provides little
1416 // context as to what went wrong.
1417 OSL_ASSERT (Result.typespec().simpletype().basetype ==
1418 Src.typespec().simpletype().basetype);
1419 }
1420
1421 for (int d = 0; d <= 2; ++d) {
1422 for (int c = 0; c < num_components; ++c) {
1423 llvm::Value *val = rop.loadLLVMValue (Src, c, d, cast);
1424 rop.llvm_store_value (val, Result, d, index, c);
1425 }
1426 if (! Result.has_derivs())
1427 break;
1428 }
1429
1430 return true;
1431 }
1432
1433
1434
1435 // Construct color, optionally with a color transformation from a named
1436 // color space.
LLVMGEN(llvm_gen_construct_color)1437 LLVMGEN (llvm_gen_construct_color)
1438 {
1439 Opcode &op (rop.inst()->ops()[opnum]);
1440 Symbol& Result = *rop.opargsym (op, 0);
1441 bool using_space = (op.nargs() == 5);
1442 Symbol& Space = *rop.opargsym (op, 1);
1443 OSL_MAYBE_UNUSED Symbol& X = *rop.opargsym (op, 1+using_space);
1444 OSL_MAYBE_UNUSED Symbol& Y = *rop.opargsym (op, 2+using_space);
1445 OSL_MAYBE_UNUSED Symbol& Z = *rop.opargsym (op, 3+using_space);
1446 OSL_DASSERT (Result.typespec().is_triple() && X.typespec().is_float() &&
1447 Y.typespec().is_float() && Z.typespec().is_float() &&
1448 (using_space == false || Space.typespec().is_string()));
1449
1450 // First, copy the floats into the vector
1451 int dmax = Result.has_derivs() ? 3 : 1;
1452 for (int d = 0; d < dmax; ++d) { // loop over derivs
1453 for (int c = 0; c < 3; ++c) { // loop over components
1454 const Symbol& comp = *rop.opargsym (op, c+1+using_space);
1455 llvm::Value* val = rop.llvm_load_value (comp, d, NULL, 0, TypeDesc::TypeFloat);
1456 rop.llvm_store_value (val, Result, d, NULL, c);
1457 }
1458 }
1459
1460 // Do the color space conversion in-place, if called for
1461 if (using_space) {
1462 llvm::Value *args[] = {
1463 rop.sg_void_ptr(), // shader globals
1464 rop.llvm_void_ptr(Result, 0), // color
1465 rop.llvm_load_string(Space), // from
1466 };
1467 rop.ll.call_function ("osl_prepend_color_from", args);
1468 // FIXME(deriv): Punt on derivs for color ctrs with space names.
1469 // We should try to do this right, but we never had it right for
1470 // the interpreter, to it's probably not an emergency.
1471 if (Result.has_derivs())
1472 rop.llvm_zero_derivs (Result);
1473 }
1474
1475 return true;
1476 }
1477
1478
1479
1480 // Construct spatial triple (point, vector, normal), optionally with a
1481 // transformation from a named coordinate system.
LLVMGEN(llvm_gen_construct_triple)1482 LLVMGEN (llvm_gen_construct_triple)
1483 {
1484 Opcode &op (rop.inst()->ops()[opnum]);
1485 Symbol& Result = *rop.opargsym (op, 0);
1486 bool using_space = (op.nargs() == 5);
1487 Symbol& Space = *rop.opargsym (op, 1);
1488 OSL_MAYBE_UNUSED Symbol& X = *rop.opargsym (op, 1+using_space);
1489 OSL_MAYBE_UNUSED Symbol& Y = *rop.opargsym (op, 2+using_space);
1490 OSL_MAYBE_UNUSED Symbol& Z = *rop.opargsym (op, 3+using_space);
1491 OSL_DASSERT (Result.typespec().is_triple() && X.typespec().is_float() &&
1492 Y.typespec().is_float() && Z.typespec().is_float() &&
1493 (using_space == false || Space.typespec().is_string()));
1494
1495 // First, copy the floats into the vector
1496 int dmax = Result.has_derivs() ? 3 : 1;
1497 for (int d = 0; d < dmax; ++d) { // loop over derivs
1498 for (int c = 0; c < 3; ++c) { // loop over components
1499 const Symbol& comp = *rop.opargsym (op, c+1+using_space);
1500 llvm::Value* val = rop.llvm_load_value (comp, d, NULL, 0, TypeDesc::TypeFloat);
1501 rop.llvm_store_value (val, Result, d, NULL, c);
1502 }
1503 }
1504
1505 // Do the transformation in-place, if called for
1506 if (using_space) {
1507 ustring from, to; // N.B. initialize to empty strings
1508 if (Space.is_constant()) {
1509 from = *(ustring *)Space.data();
1510 if (from == Strings::common ||
1511 from == rop.shadingsys().commonspace_synonym())
1512 return true; // no transformation necessary
1513 }
1514 TypeDesc::VECSEMANTICS vectype = TypeDesc::POINT;
1515 if (op.opname() == "vector")
1516 vectype = TypeDesc::VECTOR;
1517 else if (op.opname() == "normal")
1518 vectype = TypeDesc::NORMAL;
1519 llvm::Value *args[] = { rop.sg_void_ptr(),
1520 rop.llvm_void_ptr(Result), rop.ll.constant(Result.has_derivs()),
1521 rop.llvm_void_ptr(Result), rop.ll.constant(Result.has_derivs()),
1522 rop.llvm_load_value(Space), rop.ll.constant(Strings::common),
1523 rop.ll.constant((int)vectype) };
1524 RendererServices *rend (rop.shadingsys().renderer());
1525 if (rend->transform_points (NULL, from, to, 0.0f, NULL, NULL, 0, vectype)) {
1526 // renderer potentially knows about a nonlinear transformation.
1527 // Note that for the case of non-constant strings, passing empty
1528 // from & to will make transform_points just tell us if ANY
1529 // nonlinear transformations potentially are supported.
1530 rop.ll.call_function ("osl_transform_triple_nonlinear", args);
1531 } else {
1532 // definitely not a nonlinear transformation
1533 rop.ll.call_function ("osl_transform_triple", args);
1534 }
1535 }
1536
1537 return true;
1538 }
1539
1540
1541
1542 /// matrix constructor. Comes in several varieties:
1543 /// matrix (float)
1544 /// matrix (space, float)
1545 /// matrix (...16 floats...)
1546 /// matrix (space, ...16 floats...)
1547 /// matrix (fromspace, tospace)
LLVMGEN(llvm_gen_matrix)1548 LLVMGEN (llvm_gen_matrix)
1549 {
1550 Opcode &op (rop.inst()->ops()[opnum]);
1551 Symbol& Result = *rop.opargsym (op, 0);
1552 int nargs = op.nargs();
1553 bool using_space = (nargs == 3 || nargs == 18);
1554 bool using_two_spaces = (nargs == 3 && rop.opargsym(op,2)->typespec().is_string());
1555 int nfloats = nargs - 1 - (int)using_space;
1556 OSL_DASSERT (nargs == 2 || nargs == 3 || nargs == 17 || nargs == 18);
1557
1558 if (using_two_spaces) {
1559 llvm::Value *args[] = {
1560 rop.sg_void_ptr(), // shader globals
1561 rop.llvm_void_ptr(Result), // result
1562 rop.llvm_load_value(*rop.opargsym (op, 1)), // from
1563 rop.llvm_load_value(*rop.opargsym (op, 2)), // to
1564 };
1565 rop.ll.call_function ("osl_get_from_to_matrix", args);
1566 } else {
1567 if (nfloats == 1) {
1568 for (int i = 0; i < 16; i++) {
1569 llvm::Value* src_val = ((i%4) == (i/4))
1570 ? rop.llvm_load_value (*rop.opargsym(op,1+using_space))
1571 : rop.ll.constant(0.0f);
1572 rop.llvm_store_value (src_val, Result, 0, i);
1573 }
1574 } else if (nfloats == 16) {
1575 for (int i = 0; i < 16; i++) {
1576 llvm::Value* src_val = rop.llvm_load_value (*rop.opargsym(op,i+1+using_space));
1577 rop.llvm_store_value (src_val, Result, 0, i);
1578 }
1579 } else {
1580 OSL_ASSERT (0);
1581 }
1582 if (using_space) {
1583 llvm::Value *args[] = {
1584 rop.sg_void_ptr(), // shader globals
1585 rop.llvm_void_ptr(Result), // result
1586 rop.llvm_load_value(*rop.opargsym (op, 1)), // from
1587 };
1588 rop.ll.call_function ("osl_prepend_matrix_from", args);
1589 }
1590 }
1591 if (Result.has_derivs())
1592 rop.llvm_zero_derivs (Result);
1593 return true;
1594 }
1595
1596
1597
1598 /// int getmatrix (fromspace, tospace, M)
LLVMGEN(llvm_gen_getmatrix)1599 LLVMGEN (llvm_gen_getmatrix)
1600 {
1601 Opcode &op (rop.inst()->ops()[opnum]);
1602 OSL_DASSERT (op.nargs() == 4);
1603 Symbol& Result = *rop.opargsym (op, 0);
1604 Symbol& From = *rop.opargsym (op, 1);
1605 Symbol& To = *rop.opargsym (op, 2);
1606 Symbol& M = *rop.opargsym (op, 3);
1607
1608 llvm::Value *args[] = {
1609 rop.sg_void_ptr(), // shader globals
1610 rop.llvm_void_ptr(M), // matrix result
1611 rop.llvm_load_value(From),
1612 rop.llvm_load_value(To),
1613 };
1614 llvm::Value *result = rop.ll.call_function ("osl_get_from_to_matrix", args);
1615 rop.llvm_store_value (result, Result);
1616 rop.llvm_zero_derivs (M);
1617 return true;
1618 }
1619
1620
1621
1622 // transform{,v,n} (string tospace, triple p)
1623 // transform{,v,n} (string fromspace, string tospace, triple p)
1624 // transform{,v,n} (matrix, triple p)
LLVMGEN(llvm_gen_transform)1625 LLVMGEN (llvm_gen_transform)
1626 {
1627 Opcode &op (rop.inst()->ops()[opnum]);
1628 int nargs = op.nargs();
1629 Symbol *Result = rop.opargsym (op, 0);
1630 Symbol *From = (nargs == 3) ? NULL : rop.opargsym (op, 1);
1631 Symbol *To = rop.opargsym (op, (nargs == 3) ? 1 : 2);
1632 Symbol *P = rop.opargsym (op, (nargs == 3) ? 2 : 3);
1633
1634 if (To->typespec().is_matrix()) {
1635 // llvm_ops has the matrix version already implemented
1636 llvm_gen_generic (rop, opnum);
1637 return true;
1638 }
1639
1640 // Named space versions from here on out.
1641 ustring from, to; // N.B.: initialize to empty strings
1642 if ((From == NULL || From->is_constant()) && To->is_constant()) {
1643 // We can know all the space names at this time
1644 from = From ? *((ustring *)From->data()) : Strings::common;
1645 to = *((ustring *)To->data());
1646 ustring syn = rop.shadingsys().commonspace_synonym();
1647 if (from == syn)
1648 from = Strings::common;
1649 if (to == syn)
1650 to = Strings::common;
1651 if (from == to) {
1652 // An identity transformation, just copy
1653 if (Result != P) // don't bother in-place copy
1654 rop.llvm_assign_impl (*Result, *P);
1655 return true;
1656 }
1657 }
1658 TypeDesc::VECSEMANTICS vectype = TypeDesc::POINT;
1659 if (op.opname() == "transformv")
1660 vectype = TypeDesc::VECTOR;
1661 else if (op.opname() == "transformn")
1662 vectype = TypeDesc::NORMAL;
1663 llvm::Value *args[] = { rop.sg_void_ptr(),
1664 rop.llvm_void_ptr(*P), rop.ll.constant(P->has_derivs()),
1665 rop.llvm_void_ptr(*Result), rop.ll.constant(Result->has_derivs()),
1666 rop.llvm_load_value(*From), rop.llvm_load_value(*To),
1667 rop.ll.constant((int)vectype) };
1668 RendererServices *rend (rop.shadingsys().renderer());
1669 if (rend->transform_points (NULL, from, to, 0.0f, NULL, NULL, 0, vectype)) {
1670 // renderer potentially knows about a nonlinear transformation.
1671 // Note that for the case of non-constant strings, passing empty
1672 // from & to will make transform_points just tell us if ANY
1673 // nonlinear transformations potentially are supported.
1674 rop.ll.call_function ("osl_transform_triple_nonlinear", args);
1675 } else {
1676 // definitely not a nonlinear transformation
1677 rop.ll.call_function ("osl_transform_triple", args);
1678 }
1679 return true;
1680 }
1681
1682
1683
1684 // transformc (string fromspace, string tospace, color p)
LLVMGEN(llvm_gen_transformc)1685 LLVMGEN (llvm_gen_transformc)
1686 {
1687 Opcode &op (rop.inst()->ops()[opnum]);
1688 OSL_DASSERT (op.nargs() == 4);
1689 Symbol *Result = rop.opargsym (op, 0);
1690 Symbol *From = rop.opargsym (op, 1);
1691 Symbol *To = rop.opargsym (op, 2);
1692 Symbol *C = rop.opargsym (op, 3);
1693
1694 llvm::Value *args[] = { rop.sg_void_ptr(),
1695 rop.llvm_void_ptr(*C), rop.ll.constant(C->has_derivs()),
1696 rop.llvm_void_ptr(*Result), rop.ll.constant(Result->has_derivs()),
1697 rop.llvm_load_string (*From), rop.llvm_load_string (*To)
1698 };
1699
1700 rop.ll.call_function ("osl_transformc", args);
1701 return true;
1702 }
1703
1704
1705
1706 // Derivs
LLVMGEN(llvm_gen_DxDy)1707 LLVMGEN (llvm_gen_DxDy)
1708 {
1709 Opcode &op (rop.inst()->ops()[opnum]);
1710 Symbol& Result (*rop.opargsym (op, 0));
1711 Symbol& Src (*rop.opargsym (op, 1));
1712 int deriv = (op.opname() == "Dx") ? 1 : 2;
1713
1714 for (int i = 0; i < Result.typespec().aggregate(); ++i) {
1715 llvm::Value* src_val = rop.llvm_load_value (Src, deriv, i);
1716 rop.storeLLVMValue (src_val, Result, i, 0);
1717 }
1718
1719 // Don't have 2nd order derivs
1720 rop.llvm_zero_derivs (Result);
1721 return true;
1722 }
1723
1724
1725
1726 // Dz
LLVMGEN(llvm_gen_Dz)1727 LLVMGEN (llvm_gen_Dz)
1728 {
1729 Opcode &op (rop.inst()->ops()[opnum]);
1730 Symbol& Result (*rop.opargsym (op, 0));
1731 Symbol& Src (*rop.opargsym (op, 1));
1732
1733 if (&Src == rop.inst()->symbol(rop.inst()->Psym())) {
1734 // dPdz -- the only Dz we know how to take
1735 int deriv = 3;
1736 for (int i = 0; i < Result.typespec().aggregate(); ++i) {
1737 llvm::Value* src_val = rop.llvm_load_value (Src, deriv, i);
1738 rop.storeLLVMValue (src_val, Result, i, 0);
1739 }
1740 // Don't have 2nd order derivs
1741 rop.llvm_zero_derivs (Result);
1742 } else {
1743 // Punt, everything else for now returns 0 for Dz
1744 // FIXME?
1745 rop.llvm_assign_zero (Result);
1746 }
1747 return true;
1748 }
1749
1750
1751
LLVMGEN(llvm_gen_filterwidth)1752 LLVMGEN (llvm_gen_filterwidth)
1753 {
1754 Opcode &op (rop.inst()->ops()[opnum]);
1755 Symbol& Result (*rop.opargsym (op, 0));
1756 Symbol& Src (*rop.opargsym (op, 1));
1757
1758 OSL_DASSERT (Src.typespec().is_float() || Src.typespec().is_triple());
1759 if (Src.has_derivs()) {
1760 if (Src.typespec().is_float()) {
1761 llvm::Value *r = rop.ll.call_function ("osl_filterwidth_fdf",
1762 rop.llvm_void_ptr (Src));
1763 rop.llvm_store_value (r, Result);
1764 } else {
1765 rop.ll.call_function ("osl_filterwidth_vdv",
1766 rop.llvm_void_ptr (Result),
1767 rop.llvm_void_ptr (Src));
1768 }
1769 // Don't have 2nd order derivs
1770 rop.llvm_zero_derivs (Result);
1771 } else {
1772 // No derivs to be had
1773 rop.llvm_assign_zero (Result);
1774 }
1775
1776 return true;
1777 }
1778
1779
1780
1781 // Comparison ops
LLVMGEN(llvm_gen_compare_op)1782 LLVMGEN (llvm_gen_compare_op)
1783 {
1784 Opcode &op (rop.inst()->ops()[opnum]);
1785 Symbol &Result (*rop.opargsym (op, 0));
1786 Symbol &A (*rop.opargsym (op, 1));
1787 Symbol &B (*rop.opargsym (op, 2));
1788 OSL_DASSERT (Result.typespec().is_int() && ! Result.has_derivs());
1789
1790 if (A.typespec().is_closure()) {
1791 OSL_ASSERT (B.typespec().is_int() &&
1792 "Only closure==0 and closure!=0 allowed");
1793 llvm::Value *a = rop.llvm_load_value (A);
1794 llvm::Value *b = rop.ll.void_ptr_null ();
1795 llvm::Value *r = (op.opname()==op_eq) ? rop.ll.op_eq(a,b)
1796 : rop.ll.op_ne(a,b);
1797 // Convert the single bit bool into an int
1798 r = rop.ll.op_bool_to_int (r);
1799 rop.llvm_store_value (r, Result);
1800 return true;
1801 }
1802
1803 int num_components = std::max (A.typespec().aggregate(), B.typespec().aggregate());
1804 bool float_based = A.typespec().is_float_based() || B.typespec().is_float_based();
1805 TypeDesc cast (float_based ? TypeDesc::FLOAT : TypeDesc::UNKNOWN);
1806
1807 llvm::Value* final_result = 0;
1808 ustring opname = op.opname();
1809
1810 if (rop.use_optix() && A.typespec().is_string()) {
1811 OSL_DASSERT (B.typespec().is_string()
1812 && "Only string-to-string comparison is supported");
1813
1814 llvm::Value* a = rop.llvm_load_device_string (A, /*follow*/ true);
1815 llvm::Value* b = rop.llvm_load_device_string (B, /*follow*/ true);
1816
1817 if (opname == op_eq) {
1818 final_result = rop.ll.op_eq (a, b);
1819 } else if (opname == op_neq) {
1820 final_result = rop.ll.op_ne (a, b);
1821 } else {
1822 // Don't know how to handle this.
1823 OSL_ASSERT (0 && "OptiX only supports equality testing for strings");
1824 }
1825 OSL_ASSERT (final_result);
1826
1827 final_result = rop.ll.op_bool_to_int (final_result);
1828 rop.storeLLVMValue (final_result, Result, 0, 0);
1829 return true;
1830 }
1831
1832 for (int i = 0; i < num_components; i++) {
1833 // Get A&B component i -- note that these correctly handle mixed
1834 // scalar/triple comparisons as well as int->float casts as needed.
1835 llvm::Value* a = rop.loadLLVMValue (A, i, 0, cast);
1836 llvm::Value* b = rop.loadLLVMValue (B, i, 0, cast);
1837
1838 // Trickery for mixed matrix/scalar comparisons -- compare
1839 // on-diagonal to the scalar, off-diagonal to zero
1840 if (A.typespec().is_matrix() && !B.typespec().is_matrix()) {
1841 if ((i/4) != (i%4))
1842 b = rop.ll.constant (0.0f);
1843 }
1844 if (! A.typespec().is_matrix() && B.typespec().is_matrix()) {
1845 if ((i/4) != (i%4))
1846 a = rop.ll.constant (0.0f);
1847 }
1848
1849 // Perform the op
1850 llvm::Value* result = 0;
1851 if (opname == op_lt) {
1852 result = rop.ll.op_lt (a, b);
1853 } else if (opname == op_le) {
1854 result = rop.ll.op_le (a, b);
1855 } else if (opname == op_eq) {
1856 result = rop.ll.op_eq (a, b);
1857 } else if (opname == op_ge) {
1858 result = rop.ll.op_ge (a, b);
1859 } else if (opname == op_gt) {
1860 result = rop.ll.op_gt (a, b);
1861 } else if (opname == op_neq) {
1862 result = rop.ll.op_ne (a, b);
1863 } else {
1864 // Don't know how to handle this.
1865 OSL_ASSERT (0 && "Comparison error");
1866 }
1867 OSL_DASSERT (result);
1868
1869 if (final_result) {
1870 // Combine the component bool based on the op
1871 if (opname != op_neq) // final_result &= result
1872 final_result = rop.ll.op_and (final_result, result);
1873 else // final_result |= result
1874 final_result = rop.ll.op_or (final_result, result);
1875 } else {
1876 final_result = result;
1877 }
1878 }
1879 OSL_ASSERT (final_result);
1880
1881 // Convert the single bit bool into an int for now.
1882 final_result = rop.ll.op_bool_to_int (final_result);
1883 rop.storeLLVMValue (final_result, Result, 0, 0);
1884 return true;
1885 }
1886
1887
1888
1889 // int regex_search (string subject, string pattern)
1890 // int regex_search (string subject, int results[], string pattern)
1891 // int regex_match (string subject, string pattern)
1892 // int regex_match (string subject, int results[], string pattern)
LLVMGEN(llvm_gen_regex)1893 LLVMGEN (llvm_gen_regex)
1894 {
1895 Opcode &op (rop.inst()->ops()[opnum]);
1896 int nargs = op.nargs();
1897 OSL_DASSERT (nargs == 3 || nargs == 4);
1898 Symbol &Result (*rop.opargsym (op, 0));
1899 Symbol &Subject (*rop.opargsym (op, 1));
1900 bool do_match_results = (nargs == 4);
1901 bool fullmatch = (op.opname() == "regex_match");
1902 Symbol &Match (*rop.opargsym (op, 2));
1903 Symbol &Pattern (*rop.opargsym (op, 2+do_match_results));
1904 OSL_DASSERT (Result.typespec().is_int() && Subject.typespec().is_string() &&
1905 Pattern.typespec().is_string());
1906 OSL_DASSERT (!do_match_results ||
1907 (Match.typespec().is_array() &&
1908 Match.typespec().elementtype().is_int()));
1909
1910 llvm::Value* call_args[] = {
1911 rop.sg_void_ptr(), // First arg is ShaderGlobals ptr
1912 rop.llvm_load_value (Subject), // Next arg is subject string
1913 rop.llvm_void_ptr(Match), // Pass the results array and length (just pass 0 if no results wanted).
1914 do_match_results ?
1915 rop.ll.constant(Match.typespec().arraylength()) :
1916 rop.ll.constant(0),
1917 rop.llvm_load_value (Pattern), // Pass the regex match pattern
1918 rop.ll.constant(fullmatch), // Pass whether or not to do the full match
1919 };
1920 llvm::Value *ret = rop.ll.call_function ("osl_regex_impl", call_args);
1921 rop.llvm_store_value (ret, Result);
1922 return true;
1923 }
1924
1925
1926
1927 // Generic llvm code generation. See the comments in llvm_ops.cpp for
1928 // the full list of assumptions and conventions. But in short:
1929 // 1. All polymorphic and derivative cases implemented as functions in
1930 // llvm_ops.cpp -- no custom IR is needed.
1931 // 2. Naming conention is: osl_NAME_{args}, where args is the
1932 // concatenation of type codes for all args including return value --
1933 // f/i/v/m/s for float/int/triple/matrix/string, and df/dv/dm for
1934 // duals.
1935 // 3. The function returns scalars as an actual return value (that
1936 // must be stored), but "returns" aggregates or duals in the first
1937 // argument.
1938 // 4. Duals and aggregates are passed as void*'s, float/int/string
1939 // passed by value.
1940 // 5. Note that this only works if triples are all treated identically,
1941 // this routine can't be used if it must be polymorphic based on
1942 // color, point, vector, normal differences.
1943 //
LLVMGEN(llvm_gen_generic)1944 LLVMGEN (llvm_gen_generic)
1945 {
1946 // most invocations of this function will only need a handful of args
1947 // so avoid dynamic allocation where possible
1948 constexpr int SHORT_NUM_ARGS = 16;
1949 const Symbol* short_args[SHORT_NUM_ARGS];
1950 std::vector<const Symbol*> long_args;
1951 Opcode &op (rop.inst()->ops()[opnum]);
1952 const Symbol** args = short_args;
1953 if (op.nargs() > SHORT_NUM_ARGS) {
1954 long_args.resize(op.nargs());
1955 args = long_args.data();
1956 }
1957 Symbol& Result = *rop.opargsym (op, 0);
1958 bool any_deriv_args = false;
1959 for (int i = 0; i < op.nargs(); ++i) {
1960 Symbol *s (rop.opargsym (op, i));
1961 args[i] = s;
1962 any_deriv_args |= (i > 0 && s->has_derivs() && !s->typespec().is_matrix());
1963 }
1964
1965 // Special cases: functions that have no derivs -- suppress them
1966 if (any_deriv_args)
1967 if (op.opname() == op_logb ||
1968 op.opname() == op_floor || op.opname() == op_ceil ||
1969 op.opname() == op_round || op.opname() == op_step ||
1970 op.opname() == op_trunc ||
1971 op.opname() == op_sign)
1972 any_deriv_args = false;
1973
1974 std::string name = std::string("osl_") + op.opname().string() + "_";
1975 for (int i = 0; i < op.nargs(); ++i) {
1976 Symbol *s (rop.opargsym (op, i));
1977 if (any_deriv_args && Result.has_derivs() && s->has_derivs() && !s->typespec().is_matrix())
1978 name += "d";
1979 if (s->typespec().is_float())
1980 name += "f";
1981 else if (s->typespec().is_triple())
1982 name += "v";
1983 else if (s->typespec().is_matrix())
1984 name += "m";
1985 else if (s->typespec().is_string())
1986 name += "s";
1987 else if (s->typespec().is_int())
1988 name += "i";
1989 else OSL_ASSERT (0);
1990 }
1991
1992 if (! Result.has_derivs() || ! any_deriv_args) {
1993 // Don't compute derivs -- either not needed or not provided in args
1994 if (Result.typespec().aggregate() == TypeDesc::SCALAR) {
1995 llvm::Value *r = rop.llvm_call_function (name.c_str(), cspan<const Symbol*>(args + 1, op.nargs() - 1));
1996 rop.llvm_store_value (r, Result);
1997 } else {
1998 rop.llvm_call_function (name.c_str(), cspan<const Symbol*>(args, op.nargs()));
1999 }
2000 rop.llvm_zero_derivs (Result);
2001 } else {
2002 // Cases with derivs
2003 OSL_ASSERT (Result.has_derivs() && any_deriv_args);
2004 rop.llvm_call_function (name.c_str(),
2005 cspan<const Symbol*>(args, op.nargs()),
2006 true);
2007 }
2008 return true;
2009 }
2010
2011
2012
LLVMGEN(llvm_gen_sincos)2013 LLVMGEN (llvm_gen_sincos)
2014 {
2015 Opcode &op (rop.inst()->ops()[opnum]);
2016 Symbol& Theta = *rop.opargsym (op, 0);
2017 Symbol& Sin_out = *rop.opargsym (op, 1);
2018 Symbol& Cos_out = *rop.opargsym (op, 2);
2019 bool theta_deriv = Theta.has_derivs();
2020 bool result_derivs = (Sin_out.has_derivs() || Cos_out.has_derivs());
2021
2022 std::string name = std::string("osl_sincos_");
2023 for (int i = 0; i < op.nargs(); ++i) {
2024 Symbol *s (rop.opargsym (op, i));
2025 if (s->has_derivs() && result_derivs && theta_deriv)
2026 name += "d";
2027 if (s->typespec().is_float())
2028 name += "f";
2029 else if (s->typespec().is_triple())
2030 name += "v";
2031 else OSL_ASSERT (0);
2032 }
2033 // push back llvm arguments
2034 llvm::Value* valargs[] = {
2035 (theta_deriv && result_derivs) || Theta.typespec().is_triple() ?
2036 rop.llvm_void_ptr (Theta) :
2037 rop.llvm_load_value (Theta),
2038 rop.llvm_void_ptr (Sin_out),
2039 rop.llvm_void_ptr (Cos_out)
2040 };
2041 rop.ll.call_function (name.c_str(), valargs);
2042
2043 // If the input angle didn't have derivatives, we would not have
2044 // called the version of sincos with derivs; however in that case we
2045 // need to clear the derivs of either of the outputs that has them.
2046 if (Sin_out.has_derivs() && !theta_deriv)
2047 rop.llvm_zero_derivs (Sin_out);
2048 if (Cos_out.has_derivs() && !theta_deriv)
2049 rop.llvm_zero_derivs (Cos_out);
2050
2051 return true;
2052 }
2053
2054
2055
LLVMGEN(llvm_gen_andor)2056 LLVMGEN (llvm_gen_andor)
2057 {
2058 Opcode& op (rop.inst()->ops()[opnum]);
2059 Symbol& result = *rop.opargsym (op, 0);
2060 Symbol& a = *rop.opargsym (op, 1);
2061 Symbol& b = *rop.opargsym (op, 2);
2062
2063 llvm::Value* i1_res = NULL;
2064 llvm::Value* a_val = rop.llvm_load_value (a, 0, 0, TypeDesc::TypeInt);
2065 llvm::Value* b_val = rop.llvm_load_value (b, 0, 0, TypeDesc::TypeInt);
2066 if (op.opname() == op_and) {
2067 // From the old bitcode generated
2068 // define i32 @osl_and_iii(i32 %a, i32 %b) nounwind readnone ssp {
2069 // %1 = icmp ne i32 %b, 0
2070 // %not. = icmp ne i32 %a, 0
2071 // %2 = and i1 %1, %not.
2072 // %3 = zext i1 %2 to i32
2073 // ret i32 %3
2074 llvm::Value* b_ne_0 = rop.ll.op_ne (b_val, rop.ll.constant(0));
2075 llvm::Value* a_ne_0 = rop.ll.op_ne (a_val, rop.ll.constant(0));
2076 llvm::Value* both_ne_0 = rop.ll.op_and (b_ne_0, a_ne_0);
2077 i1_res = both_ne_0;
2078 } else {
2079 // Also from the bitcode
2080 // %1 = or i32 %b, %a
2081 // %2 = icmp ne i32 %1, 0
2082 // %3 = zext i1 %2 to i32
2083 llvm::Value* or_ab = rop.ll.op_or(a_val, b_val);
2084 llvm::Value* or_ab_ne_0 = rop.ll.op_ne (or_ab, rop.ll.constant(0));
2085 i1_res = or_ab_ne_0;
2086 }
2087 llvm::Value* i32_res = rop.ll.op_bool_to_int(i1_res);
2088 rop.llvm_store_value(i32_res, result, 0, 0);
2089 return true;
2090 }
2091
2092
LLVMGEN(llvm_gen_if)2093 LLVMGEN (llvm_gen_if)
2094 {
2095 Opcode &op (rop.inst()->ops()[opnum]);
2096 Symbol& cond = *rop.opargsym (op, 0);
2097
2098 // Load the condition variable and figure out if it's nonzero
2099 llvm::Value* cond_val = rop.llvm_test_nonzero (cond);
2100
2101 // Branch on the condition, to our blocks
2102 llvm::BasicBlock* then_block = rop.ll.new_basic_block ("then");
2103 llvm::BasicBlock* else_block = rop.ll.new_basic_block ("else");
2104 llvm::BasicBlock* after_block = rop.ll.new_basic_block ("");
2105 rop.ll.op_branch (cond_val, then_block, else_block);
2106
2107 // Then block
2108 rop.build_llvm_code (opnum+1, op.jump(0), then_block);
2109 rop.ll.op_branch (after_block);
2110
2111 // Else block
2112 rop.build_llvm_code (op.jump(0), op.jump(1), else_block);
2113 rop.ll.op_branch (after_block); // insert point is now after_block
2114
2115 // Continue on with the previous flow
2116 return true;
2117 }
2118
2119
2120
LLVMGEN(llvm_gen_loop_op)2121 LLVMGEN (llvm_gen_loop_op)
2122 {
2123 Opcode &op (rop.inst()->ops()[opnum]);
2124 Symbol& cond = *rop.opargsym (op, 0);
2125
2126 // Branch on the condition, to our blocks
2127 llvm::BasicBlock* cond_block = rop.ll.new_basic_block ("cond");
2128 llvm::BasicBlock* body_block = rop.ll.new_basic_block ("body");
2129 llvm::BasicBlock* step_block = rop.ll.new_basic_block ("step");
2130 llvm::BasicBlock* after_block = rop.ll.new_basic_block ("");
2131 // Save the step and after block pointers for possible break/continue
2132 rop.ll.push_loop (step_block, after_block);
2133
2134 // Initialization (will be empty except for "for" loops)
2135 rop.build_llvm_code (opnum+1, op.jump(0));
2136
2137 // For "do-while", we go straight to the body of the loop, but for
2138 // "for" or "while", we test the condition next.
2139 rop.ll.op_branch (op.opname() == op_dowhile ? body_block : cond_block);
2140
2141 // Load the condition variable and figure out if it's nonzero
2142 rop.build_llvm_code (op.jump(0), op.jump(1), cond_block);
2143 llvm::Value* cond_val = rop.llvm_test_nonzero (cond);
2144
2145 // Jump to either LoopBody or AfterLoop
2146 rop.ll.op_branch (cond_val, body_block, after_block);
2147
2148 // Body of loop
2149 rop.build_llvm_code (op.jump(1), op.jump(2), body_block);
2150 rop.ll.op_branch (step_block);
2151
2152 // Step
2153 rop.build_llvm_code (op.jump(2), op.jump(3), step_block);
2154 rop.ll.op_branch (cond_block);
2155
2156 // Continue on with the previous flow
2157 rop.ll.set_insert_point (after_block);
2158 rop.ll.pop_loop ();
2159
2160 return true;
2161 }
2162
2163
2164
LLVMGEN(llvm_gen_loopmod_op)2165 LLVMGEN (llvm_gen_loopmod_op)
2166 {
2167 Opcode &op (rop.inst()->ops()[opnum]);
2168 OSL_DASSERT(op.nargs() == 0);
2169 if (op.opname() == op_break) {
2170 rop.ll.op_branch (rop.ll.loop_after_block());
2171 } else { // continue
2172 rop.ll.op_branch (rop.ll.loop_step_block());
2173 }
2174 llvm::BasicBlock* next_block = rop.ll.new_basic_block ("");
2175 rop.ll.set_insert_point (next_block);
2176 return true;
2177 }
2178
2179
2180
2181 static llvm::Value *
llvm_gen_texture_options(BackendLLVM & rop,int opnum,int first_optional_arg,bool tex3d,int nchans,llvm::Value * & alpha,llvm::Value * & dalphadx,llvm::Value * & dalphady,llvm::Value * & errormessage)2182 llvm_gen_texture_options (BackendLLVM &rop, int opnum,
2183 int first_optional_arg, bool tex3d, int nchans,
2184 llvm::Value* &alpha, llvm::Value* &dalphadx,
2185 llvm::Value* &dalphady, llvm::Value* &errormessage)
2186 {
2187 llvm::Value* opt = rop.ll.call_function ("osl_get_texture_options",
2188 rop.sg_void_ptr());
2189 llvm::Value* missingcolor = NULL;
2190 TextureOpt optdefaults; // So we can check the defaults
2191 bool swidth_set = false, twidth_set = false, rwidth_set = false;
2192 bool sblur_set = false, tblur_set = false, rblur_set = false;
2193 bool swrap_set = false, twrap_set = false, rwrap_set = false;
2194 bool firstchannel_set = false, fill_set = false, interp_set = false;
2195 bool time_set = false, subimage_set = false;
2196
2197 Opcode &op (rop.inst()->ops()[opnum]);
2198 for (int a = first_optional_arg; a < op.nargs(); ++a) {
2199 Symbol &Name (*rop.opargsym(op,a));
2200 OSL_DASSERT (Name.typespec().is_string() &&
2201 "optional texture token must be a string");
2202 OSL_DASSERT (a+1 < op.nargs() && "malformed argument list for texture");
2203 ustring name = *(ustring *)Name.data();
2204 ++a; // advance to next argument
2205
2206 if (name.empty()) // skip empty string param name
2207 continue;
2208
2209 Symbol &Val (*rop.opargsym(op,a));
2210 TypeDesc valtype = Val.typespec().simpletype ();
2211 const int *ival = Val.typespec().is_int() && Val.is_constant() ? (const int *)Val.data() : NULL;
2212 const float *fval = Val.typespec().is_float() && Val.is_constant() ? (const float *)Val.data() : NULL;
2213
2214 #define PARAM_INT(paramname) \
2215 if (name == Strings::paramname && valtype == TypeDesc::INT) { \
2216 if (! paramname##_set && \
2217 ival && *ival == optdefaults.paramname) \
2218 continue; /* default constant */ \
2219 llvm::Value *val = rop.llvm_load_value (Val); \
2220 rop.ll.call_function ("osl_texture_set_" #paramname, opt, val); \
2221 paramname##_set = true; \
2222 continue; \
2223 }
2224
2225 #define PARAM_FLOAT(paramname) \
2226 if (name == Strings::paramname && \
2227 (valtype == TypeDesc::FLOAT || valtype == TypeDesc::INT)) { \
2228 if (! paramname##_set && \
2229 ((ival && *ival == optdefaults.paramname) || \
2230 (fval && *fval == optdefaults.paramname))) \
2231 continue; /* default constant */ \
2232 llvm::Value *val = rop.llvm_load_value (Val); \
2233 if (valtype == TypeDesc::INT) \
2234 val = rop.ll.op_int_to_float (val); \
2235 rop.ll.call_function ("osl_texture_set_" #paramname, opt, val); \
2236 paramname##_set = true; \
2237 continue; \
2238 }
2239
2240 #define PARAM_FLOAT_STR(paramname) \
2241 if (name == Strings::paramname && \
2242 (valtype == TypeDesc::FLOAT || valtype == TypeDesc::INT)) { \
2243 if (! s##paramname##_set && ! t##paramname##_set && \
2244 ! r##paramname##_set && \
2245 ((ival && *ival == optdefaults.s##paramname) || \
2246 (fval && *fval == optdefaults.s##paramname))) \
2247 continue; /* default constant */ \
2248 llvm::Value *val = rop.llvm_load_value (Val); \
2249 if (valtype == TypeDesc::INT) \
2250 val = rop.ll.op_int_to_float (val); \
2251 rop.ll.call_function ("osl_texture_set_st" #paramname, opt, val); \
2252 if (tex3d) \
2253 rop.ll.call_function ("osl_texture_set_r" #paramname, opt, val); \
2254 s##paramname##_set = true; \
2255 t##paramname##_set = true; \
2256 r##paramname##_set = true; \
2257 continue; \
2258 }
2259
2260 #define PARAM_STRING_CODE(paramname,decoder,fieldname) \
2261 if (name == Strings::paramname && valtype == TypeDesc::STRING) { \
2262 if (Val.is_constant()) { \
2263 int code = decoder (*(ustring *)Val.data()); \
2264 if (! paramname##_set && code == optdefaults.fieldname) \
2265 continue; \
2266 if (code >= 0) { \
2267 llvm::Value *val = rop.ll.constant (code); \
2268 rop.ll.call_function ("osl_texture_set_" #paramname "_code", opt, val); \
2269 } \
2270 } else { \
2271 llvm::Value *val = rop.llvm_load_value (Val); \
2272 rop.ll.call_function ("osl_texture_set_" #paramname, opt, val); \
2273 } \
2274 paramname##_set = true; \
2275 continue; \
2276 }
2277
2278 PARAM_FLOAT_STR (width)
2279 PARAM_FLOAT (swidth)
2280 PARAM_FLOAT (twidth)
2281 PARAM_FLOAT (rwidth)
2282 PARAM_FLOAT_STR (blur)
2283 PARAM_FLOAT (sblur)
2284 PARAM_FLOAT (tblur)
2285 PARAM_FLOAT (rblur)
2286
2287 if (name == Strings::wrap && valtype == TypeDesc::STRING) {
2288 if (Val.is_constant()) {
2289 int mode = TextureOpt::decode_wrapmode (*(ustring *)Val.data());
2290 llvm::Value *val = rop.ll.constant (mode);
2291 rop.ll.call_function ("osl_texture_set_stwrap_code", opt, val);
2292 if (tex3d)
2293 rop.ll.call_function ("osl_texture_set_rwrap_code", opt, val);
2294 } else {
2295 llvm::Value *val = rop.llvm_load_value (Val);
2296 rop.ll.call_function ("osl_texture_set_stwrap", opt, val);
2297 if (tex3d)
2298 rop.ll.call_function ("osl_texture_set_rwrap", opt, val);
2299 }
2300 swrap_set = twrap_set = rwrap_set = true;
2301 continue;
2302 }
2303 PARAM_STRING_CODE(swrap, TextureOpt::decode_wrapmode, swrap)
2304 PARAM_STRING_CODE(twrap, TextureOpt::decode_wrapmode, twrap)
2305 PARAM_STRING_CODE(rwrap, TextureOpt::decode_wrapmode, rwrap)
2306
2307 PARAM_FLOAT (fill)
2308 PARAM_FLOAT (time)
2309 PARAM_INT (firstchannel)
2310 PARAM_INT (subimage)
2311
2312 if (name == Strings::subimage && valtype == TypeDesc::STRING) {
2313 if (Val.is_constant()) {
2314 ustring v = *(ustring *)Val.data();
2315 if (v.empty() && ! subimage_set) {
2316 continue; // Ignore nulls unless they are overrides
2317 }
2318 }
2319 llvm::Value *val = rop.llvm_load_value (Val);
2320 rop.ll.call_function ("osl_texture_set_subimagename", opt, val);
2321 subimage_set = true;
2322 continue;
2323 }
2324
2325 PARAM_STRING_CODE (interp, tex_interp_to_code, interpmode)
2326
2327 if (name == Strings::alpha && valtype == TypeDesc::FLOAT) {
2328 alpha = rop.llvm_get_pointer (Val);
2329 if (Val.has_derivs()) {
2330 dalphadx = rop.llvm_get_pointer (Val, 1);
2331 dalphady = rop.llvm_get_pointer (Val, 2);
2332 // NO z derivs! dalphadz = rop.llvm_get_pointer (Val, 3);
2333 }
2334 continue;
2335 }
2336 if (name == Strings::errormessage && valtype == TypeDesc::STRING) {
2337 errormessage = rop.llvm_get_pointer (Val);
2338 continue;
2339 }
2340 if (name == Strings::missingcolor &&
2341 equivalent(valtype,TypeDesc::TypeColor)) {
2342 if (! missingcolor) {
2343 // If not already done, allocate enough storage for the
2344 // missingcolor value (4 floats), and call the special
2345 // function that points the TextureOpt.missingcolor to it.
2346 missingcolor = rop.ll.op_alloca(rop.ll.type_float(), 4);
2347 rop.ll.call_function ("osl_texture_set_missingcolor_arena",
2348 opt, rop.ll.void_ptr(missingcolor));
2349 }
2350 rop.ll.op_memcpy (rop.ll.void_ptr(missingcolor),
2351 rop.llvm_void_ptr(Val), (int)sizeof(Color3));
2352 continue;
2353 }
2354 if (name == Strings::missingalpha && valtype == TypeDesc::FLOAT) {
2355 if (! missingcolor) {
2356 // If not already done, allocate enough storage for the
2357 // missingcolor value (4 floats), and call the special
2358 // function that points the TextureOpt.missingcolor to it.
2359 missingcolor = rop.ll.op_alloca(rop.ll.type_float(), 4);
2360 rop.ll.call_function ("osl_texture_set_missingcolor_arena",
2361 opt, rop.ll.void_ptr(missingcolor));
2362 }
2363 llvm::Value *val = rop.llvm_load_value (Val);
2364 rop.ll.call_function ("osl_texture_set_missingcolor_alpha",
2365 opt, rop.ll.constant(nchans), val);
2366 continue;
2367
2368 }
2369 rop.shadingcontext()->errorf("Unknown texture%s optional argument: \"%s\", <%s> (%s:%d)",
2370 tex3d ? "3d" : "", name, valtype,
2371 op.sourcefile(), op.sourceline());
2372 #undef PARAM_INT
2373 #undef PARAM_FLOAT
2374 #undef PARAM_FLOAT_STR
2375 #undef PARAM_STRING_CODE
2376
2377 #if 0
2378 // Helps me find any constant optional params that aren't elided
2379 if (Name.is_constant() && Val.is_constant()) {
2380 std::cout << "! texture constant optional arg '" << name << "'\n";
2381 if (Val.typespec().is_float()) std::cout << "\tf " << *(float *)Val.data() << "\n";
2382 if (Val.typespec().is_int()) std::cout << "\ti " << *(int *)Val.data() << "\n";
2383 if (Val.typespec().is_string()) std::cout << "\t" << *(ustring *)Val.data() << "\n";
2384 }
2385 #endif
2386 }
2387
2388 return opt;
2389 }
2390
2391
2392
LLVMGEN(llvm_gen_texture)2393 LLVMGEN (llvm_gen_texture)
2394 {
2395 Opcode &op (rop.inst()->ops()[opnum]);
2396 Symbol &Result = *rop.opargsym (op, 0);
2397 Symbol &Filename = *rop.opargsym (op, 1);
2398 Symbol &S = *rop.opargsym (op, 2);
2399 Symbol &T = *rop.opargsym (op, 3);
2400 int nchans = Result.typespec().aggregate();
2401
2402 bool user_derivs = false;
2403 int first_optional_arg = 4;
2404 if (op.nargs() > 4 && rop.opargsym(op,4)->typespec().is_float()) {
2405 user_derivs = true;
2406 first_optional_arg = 8;
2407 OSL_DASSERT(rop.opargsym(op,5)->typespec().is_float());
2408 OSL_DASSERT(rop.opargsym(op,6)->typespec().is_float());
2409 OSL_DASSERT(rop.opargsym(op,7)->typespec().is_float());
2410 }
2411
2412 llvm::Value* opt; // TextureOpt
2413 llvm::Value *alpha = NULL, *dalphadx = NULL, *dalphady = NULL;
2414 llvm::Value *errormessage = NULL;
2415 opt = llvm_gen_texture_options (rop, opnum, first_optional_arg,
2416 false /*3d*/, nchans,
2417 alpha, dalphadx, dalphady, errormessage);
2418
2419 RendererServices::TextureHandle *texture_handle = NULL;
2420 if (Filename.is_constant() && rop.shadingsys().opt_texture_handle()) {
2421 texture_handle = rop.renderer()->get_texture_handle (*(ustring *)Filename.data(), rop.shadingcontext());
2422 }
2423
2424 // Now call the osl_texture function, passing the options and all the
2425 // explicit args like texture coordinates.
2426 llvm::Value * args[] = {
2427 rop.sg_void_ptr(),
2428 rop.llvm_load_value (Filename),
2429 rop.ll.constant_ptr (texture_handle),
2430 opt,
2431 rop.llvm_load_value (S),
2432 rop.llvm_load_value (T),
2433 user_derivs ? rop.llvm_load_value (*rop.opargsym (op, 4)) : rop.llvm_load_value (S, 1),
2434 user_derivs ? rop.llvm_load_value (*rop.opargsym (op, 5)) : rop.llvm_load_value (T, 1),
2435 user_derivs ? rop.llvm_load_value (*rop.opargsym (op, 6)) : rop.llvm_load_value (S, 2),
2436 user_derivs ? rop.llvm_load_value (*rop.opargsym (op, 7)) : rop.llvm_load_value (T, 2),
2437 rop.ll.constant (nchans),
2438 rop.ll.void_ptr (rop.llvm_get_pointer (Result, 0)),
2439 rop.ll.void_ptr (rop.llvm_get_pointer (Result, 1)),
2440 rop.ll.void_ptr (rop.llvm_get_pointer (Result, 2)),
2441 rop.ll.void_ptr (alpha ? alpha : rop.ll.void_ptr_null()),
2442 rop.ll.void_ptr (dalphadx ? dalphadx : rop.ll.void_ptr_null()),
2443 rop.ll.void_ptr (dalphady ? dalphady : rop.ll.void_ptr_null()),
2444 rop.ll.void_ptr (errormessage ? errormessage : rop.ll.void_ptr_null()),
2445 };
2446 rop.ll.call_function ("osl_texture", args);
2447 rop.generated_texture_call (texture_handle != NULL);
2448 return true;
2449 }
2450
2451
2452
LLVMGEN(llvm_gen_texture3d)2453 LLVMGEN (llvm_gen_texture3d)
2454 {
2455 Opcode &op (rop.inst()->ops()[opnum]);
2456 Symbol &Result = *rop.opargsym (op, 0);
2457 Symbol &Filename = *rop.opargsym (op, 1);
2458 Symbol &P = *rop.opargsym (op, 2);
2459 int nchans = Result.typespec().aggregate();
2460
2461 bool user_derivs = false;
2462 int first_optional_arg = 3;
2463 if (op.nargs() > 3 && rop.opargsym(op,3)->typespec().is_triple()) {
2464 user_derivs = true;
2465 first_optional_arg = 5;
2466 OSL_DASSERT(rop.opargsym(op,3)->typespec().is_triple());
2467 OSL_DASSERT(rop.opargsym(op,4)->typespec().is_triple());
2468 }
2469
2470 llvm::Value* opt; // TextureOpt
2471 llvm::Value *alpha = NULL, *dalphadx = NULL, *dalphady = NULL;
2472 llvm::Value *errormessage = NULL;
2473 opt = llvm_gen_texture_options (rop, opnum, first_optional_arg,
2474 true /*3d*/, nchans,
2475 alpha, dalphadx, dalphady, errormessage);
2476
2477 RendererServices::TextureHandle *texture_handle = NULL;
2478 if (Filename.is_constant() && rop.shadingsys().opt_texture_handle()) {
2479 texture_handle = rop.renderer()->get_texture_handle (*(ustring *)Filename.data(), rop.shadingcontext());
2480 }
2481
2482 // Now call the osl_texture3d function, passing the options and all the
2483 // explicit args like texture coordinates.
2484 llvm::Value *args[] = {
2485 rop.sg_void_ptr(),
2486 rop.llvm_load_value (Filename),
2487 rop.ll.constant_ptr (texture_handle),
2488 opt,
2489 rop.llvm_void_ptr (P),
2490 // Auto derivs of P if !user_derivs
2491 user_derivs ? rop.llvm_void_ptr (*rop.opargsym (op, 3)) : rop.llvm_void_ptr (P, 1),
2492 user_derivs ? rop.llvm_void_ptr (*rop.opargsym (op, 4)) : rop.llvm_void_ptr (P, 2),
2493 rop.ll.constant (nchans),
2494 rop.ll.void_ptr (rop.llvm_void_ptr (Result, 0)),
2495 rop.ll.void_ptr (rop.llvm_void_ptr (Result, 1)),
2496 rop.ll.void_ptr (rop.llvm_void_ptr (Result, 2)),
2497 rop.ll.void_ptr (alpha ? alpha : rop.ll.void_ptr_null()),
2498 rop.ll.void_ptr (dalphadx ? dalphadx : rop.ll.void_ptr_null()),
2499 rop.ll.void_ptr (dalphady ? dalphady : rop.ll.void_ptr_null()),
2500 rop.ll.void_ptr (errormessage ? errormessage : rop.ll.void_ptr_null()),
2501 };
2502 rop.ll.call_function ("osl_texture3d", args);
2503 rop.generated_texture_call (texture_handle != NULL);
2504 return true;
2505 }
2506
2507
2508
LLVMGEN(llvm_gen_environment)2509 LLVMGEN (llvm_gen_environment)
2510 {
2511 Opcode &op (rop.inst()->ops()[opnum]);
2512 Symbol &Result = *rop.opargsym (op, 0);
2513 Symbol &Filename = *rop.opargsym (op, 1);
2514 Symbol &R = *rop.opargsym (op, 2);
2515 int nchans = Result.typespec().aggregate();
2516
2517 bool user_derivs = false;
2518 int first_optional_arg = 3;
2519 if (op.nargs() > 3 && rop.opargsym(op,3)->typespec().is_triple()) {
2520 user_derivs = true;
2521 first_optional_arg = 5;
2522 OSL_DASSERT(rop.opargsym(op,4)->typespec().is_triple());
2523 }
2524
2525 llvm::Value* opt; // TextureOpt
2526 llvm::Value *alpha = NULL, *dalphadx = NULL, *dalphady = NULL;
2527 llvm::Value *errormessage = NULL;
2528 opt = llvm_gen_texture_options (rop, opnum, first_optional_arg,
2529 false /*3d*/, nchans,
2530 alpha, dalphadx, dalphady, errormessage);
2531
2532 RendererServices::TextureHandle *texture_handle = NULL;
2533 if (Filename.is_constant() && rop.shadingsys().opt_texture_handle()) {
2534 texture_handle = rop.renderer()->get_texture_handle (*(ustring *)Filename.data(), rop.shadingcontext());
2535 }
2536
2537 // Now call the osl_environment function, passing the options and all the
2538 // explicit args like texture coordinates.
2539 llvm::Value *args[] = {
2540 rop.sg_void_ptr(),
2541 rop.llvm_load_value (Filename),
2542 rop.ll.constant_ptr (texture_handle),
2543 opt,
2544 rop.llvm_void_ptr (R),
2545 user_derivs ? rop.llvm_void_ptr (*rop.opargsym (op, 3)) : rop.llvm_void_ptr (R, 1),
2546 user_derivs ? rop.llvm_void_ptr (*rop.opargsym (op, 4)) : rop.llvm_void_ptr (R, 2),
2547 rop.ll.constant (nchans),
2548 rop.llvm_void_ptr (Result, 0),
2549 rop.llvm_void_ptr (Result, 1),
2550 rop.llvm_void_ptr (Result, 2),
2551 alpha ? rop.ll.void_ptr (alpha) : rop.ll.void_ptr_null(),
2552 dalphadx ? rop.ll.void_ptr (dalphadx) : rop.ll.void_ptr_null(),
2553 dalphady ? rop.ll.void_ptr (dalphady) : rop.ll.void_ptr_null(),
2554 rop.ll.void_ptr (errormessage ? errormessage : rop.ll.void_ptr_null()),
2555 };
2556 rop.ll.call_function ("osl_environment", args);
2557 rop.generated_texture_call (texture_handle != NULL);
2558 return true;
2559 }
2560
2561
2562
2563 static llvm::Value *
llvm_gen_trace_options(BackendLLVM & rop,int opnum,int first_optional_arg)2564 llvm_gen_trace_options (BackendLLVM &rop, int opnum,
2565 int first_optional_arg)
2566 {
2567 llvm::Value* opt = rop.ll.call_function ("osl_get_trace_options",
2568 rop.sg_void_ptr());
2569 Opcode &op (rop.inst()->ops()[opnum]);
2570 for (int a = first_optional_arg; a < op.nargs(); ++a) {
2571 Symbol &Name (*rop.opargsym(op,a));
2572 OSL_DASSERT (Name.typespec().is_string() &&
2573 "optional trace token must be a string");
2574 OSL_DASSERT (a+1 < op.nargs() && "malformed argument list for trace");
2575 ustring name = *(ustring *)Name.data();
2576
2577 ++a; // advance to next argument
2578 Symbol &Val (*rop.opargsym(op,a));
2579 TypeDesc valtype = Val.typespec().simpletype ();
2580
2581 llvm::Value *val = rop.llvm_load_value (Val);
2582 if (name == Strings::mindist && valtype == TypeDesc::FLOAT) {
2583 rop.ll.call_function ("osl_trace_set_mindist", opt, val);
2584 } else if (name == Strings::maxdist && valtype == TypeDesc::FLOAT) {
2585 rop.ll.call_function ("osl_trace_set_maxdist", opt, val);
2586 } else if (name == Strings::shade && valtype == TypeDesc::INT) {
2587 rop.ll.call_function ("osl_trace_set_shade", opt, val);
2588 } else if (name == Strings::traceset && valtype == TypeDesc::STRING) {
2589 rop.ll.call_function ("osl_trace_set_traceset", opt, val);
2590 } else {
2591 rop.shadingcontext()->errorf("Unknown trace() optional argument: \"%s\", <%s> (%s:%d)",
2592 name, valtype,
2593 op.sourcefile(), op.sourceline());
2594 }
2595 }
2596
2597 return opt;
2598 }
2599
2600
2601
LLVMGEN(llvm_gen_trace)2602 LLVMGEN (llvm_gen_trace)
2603 {
2604 Opcode &op (rop.inst()->ops()[opnum]);
2605 Symbol &Result = *rop.opargsym (op, 0);
2606 Symbol &Pos = *rop.opargsym (op, 1);
2607 Symbol &Dir = *rop.opargsym (op, 2);
2608 int first_optional_arg = 3;
2609
2610 llvm::Value* opt; // TraceOpt
2611 opt = llvm_gen_trace_options (rop, opnum, first_optional_arg);
2612
2613 // Now call the osl_trace function, passing the options and all the
2614 // explicit args like trace coordinates.
2615 llvm::Value *args[] = {
2616 rop.sg_void_ptr(),
2617 opt,
2618 rop.llvm_void_ptr (Pos, 0),
2619 rop.llvm_void_ptr (Pos, 1),
2620 rop.llvm_void_ptr (Pos, 2),
2621 rop.llvm_void_ptr (Dir, 0),
2622 rop.llvm_void_ptr (Dir, 1),
2623 rop.llvm_void_ptr (Dir, 2),
2624 };
2625 llvm::Value *r = rop.ll.call_function ("osl_trace", args);
2626 rop.llvm_store_value (r, Result);
2627 return true;
2628 }
2629
2630
2631
2632 static std::string
arg_typecode(Symbol * sym,bool derivs)2633 arg_typecode (Symbol *sym, bool derivs)
2634 {
2635 const TypeSpec &t (sym->typespec());
2636 if (t.is_int())
2637 return "i";
2638 else if (t.is_matrix())
2639 return "m";
2640 else if (t.is_string())
2641 return "s";
2642
2643 std::string name;
2644 if (derivs)
2645 name = "d";
2646 if (t.is_float())
2647 name += "f";
2648 else if (t.is_triple())
2649 name += "v";
2650 else OSL_ASSERT (0);
2651 return name;
2652 }
2653
2654
2655
2656 static llvm::Value *
llvm_gen_noise_options(BackendLLVM & rop,int opnum,int first_optional_arg)2657 llvm_gen_noise_options (BackendLLVM &rop, int opnum,
2658 int first_optional_arg)
2659 {
2660 llvm::Value* opt = rop.ll.call_function ("osl_get_noise_options",
2661 rop.sg_void_ptr());
2662
2663 Opcode &op (rop.inst()->ops()[opnum]);
2664 for (int a = first_optional_arg; a < op.nargs(); ++a) {
2665 Symbol &Name (*rop.opargsym(op,a));
2666 OSL_DASSERT (Name.typespec().is_string() &&
2667 "optional noise token must be a string");
2668 OSL_DASSERT (a+1 < op.nargs() && "malformed argument list for noise");
2669 ustring name = *(ustring *)Name.data();
2670
2671 ++a; // advance to next argument
2672 Symbol &Val (*rop.opargsym(op,a));
2673 TypeDesc valtype = Val.typespec().simpletype ();
2674
2675 if (name.empty()) // skip empty string param name
2676 continue;
2677
2678 if (name == Strings::anisotropic && Val.typespec().is_int()) {
2679 rop.ll.call_function ("osl_noiseparams_set_anisotropic", opt,
2680 rop.llvm_load_value (Val));
2681 } else if (name == Strings::do_filter && Val.typespec().is_int()) {
2682 rop.ll.call_function ("osl_noiseparams_set_do_filter", opt,
2683 rop.llvm_load_value (Val));
2684 } else if (name == Strings::direction && Val.typespec().is_triple()) {
2685 rop.ll.call_function ("osl_noiseparams_set_direction", opt,
2686 rop.llvm_void_ptr (Val));
2687 } else if (name == Strings::bandwidth &&
2688 (Val.typespec().is_float() || Val.typespec().is_int())) {
2689 rop.ll.call_function ("osl_noiseparams_set_bandwidth", opt,
2690 rop.llvm_load_value (Val, 0, NULL, 0,
2691 TypeDesc::TypeFloat));
2692 } else if (name == Strings::impulses &&
2693 (Val.typespec().is_float() || Val.typespec().is_int())) {
2694 rop.ll.call_function ("osl_noiseparams_set_impulses", opt,
2695 rop.llvm_load_value (Val, 0, NULL, 0,
2696 TypeDesc::TypeFloat));
2697 } else {
2698 rop.shadingcontext()->errorf("Unknown %s optional argument: \"%s\", <%s> (%s:%d)",
2699 op.opname(), name, valtype,
2700 op.sourcefile(), op.sourceline());
2701 }
2702 }
2703 return opt;
2704 }
2705
2706
2707
2708 // T noise ([string name,] float s, ...);
2709 // T noise ([string name,] float s, float t, ...);
2710 // T noise ([string name,] point P, ...);
2711 // T noise ([string name,] point P, float t, ...);
2712 // T pnoise ([string name,] float s, float sper, ...);
2713 // T pnoise ([string name,] float s, float t, float sper, float tper, ...);
2714 // T pnoise ([string name,] point P, point Pper, ...);
2715 // T pnoise ([string name,] point P, float t, point Pper, float tper, ...);
LLVMGEN(llvm_gen_noise)2716 LLVMGEN (llvm_gen_noise)
2717 {
2718 Opcode &op (rop.inst()->ops()[opnum]);
2719 bool periodic = (op.opname() == Strings::pnoise ||
2720 op.opname() == Strings::psnoise);
2721
2722 int arg = 0; // Next arg to read
2723 Symbol &Result = *rop.opargsym (op, arg++);
2724 int outdim = Result.typespec().is_triple() ? 3 : 1;
2725 Symbol *Name = rop.opargsym (op, arg++);
2726 ustring name;
2727 if (Name->typespec().is_string()) {
2728 name = Name->is_constant() ? *(ustring *)Name->data() : ustring();
2729 } else {
2730 // Not a string, must be the old-style noise/pnoise
2731 --arg; // forget that arg
2732 Name = NULL;
2733 name = op.opname();
2734 }
2735
2736 Symbol *S = rop.opargsym (op, arg++), *T = NULL;
2737 Symbol *Sper = NULL, *Tper = NULL;
2738 int indim = S->typespec().is_triple() ? 3 : 1;
2739 bool derivs = S->has_derivs();
2740
2741 if (periodic) {
2742 if (op.nargs() > (arg+1) &&
2743 (rop.opargsym(op,arg+1)->typespec().is_float() ||
2744 rop.opargsym(op,arg+1)->typespec().is_triple())) {
2745 // 2D or 4D
2746 ++indim;
2747 T = rop.opargsym (op, arg++);
2748 derivs |= T->has_derivs();
2749 }
2750 Sper = rop.opargsym (op, arg++);
2751 if (indim == 2 || indim == 4)
2752 Tper = rop.opargsym (op, arg++);
2753 } else {
2754 // non-periodic case
2755 if (op.nargs() > arg && rop.opargsym(op,arg)->typespec().is_float()) {
2756 // either 2D or 4D, so needs a second index
2757 ++indim;
2758 T = rop.opargsym (op, arg++);
2759 derivs |= T->has_derivs();
2760 }
2761 }
2762 derivs &= Result.has_derivs(); // ignore derivs if result doesn't need
2763
2764 bool pass_name = false, pass_sg = false, pass_options = false;
2765 if (name.empty()) {
2766 // name is not a constant
2767 name = periodic ? Strings::genericpnoise : Strings::genericnoise;
2768 pass_name = true;
2769 pass_sg = true;
2770 pass_options = true;
2771 derivs = true; // always take derivs if we don't know noise type
2772 } else if (name == Strings::perlin || name == Strings::snoise ||
2773 name == Strings::psnoise) {
2774 name = periodic ? Strings::psnoise : Strings::snoise;
2775 // derivs = false;
2776 } else if (name == Strings::uperlin || name == Strings::noise ||
2777 name == Strings::pnoise) {
2778 name = periodic ? Strings::pnoise : Strings::noise;
2779 // derivs = false;
2780 } else if (name == Strings::cell || name == Strings::cellnoise) {
2781 name = periodic ? Strings::pcellnoise : Strings::cellnoise;
2782 derivs = false; // cell noise derivs are always zero
2783 } else if (name == Strings::hash || name == Strings::hashnoise) {
2784 name = periodic ? Strings::phashnoise : Strings::hashnoise;
2785 derivs = false; // hash noise derivs are always zero
2786 } else if (name == Strings::simplex && !periodic) {
2787 name = Strings::simplexnoise;
2788 } else if (name == Strings::usimplex && !periodic) {
2789 name = Strings::usimplexnoise;
2790 } else if (name == Strings::gabor) {
2791 // already named
2792 pass_name = true;
2793 pass_sg = true;
2794 pass_options = true;
2795 derivs = true;
2796 name = periodic ? Strings::gaborpnoise : Strings::gabornoise;
2797 } else {
2798 rop.shadingcontext()->errorf("%snoise type \"%s\" is unknown, called from (%s:%d)",
2799 (periodic ? "periodic " : ""), name,
2800 op.sourcefile(), op.sourceline());
2801 return false;
2802 }
2803
2804 if (rop.shadingsys().no_noise()) {
2805 // renderer option to replace noise with constant value. This can be
2806 // useful as a profiling aid, to see how much it speeds up to have
2807 // trivial expense for noise calls.
2808 if (name == Strings::uperlin || name == Strings::noise ||
2809 name == Strings::usimplexnoise || name == Strings::usimplex ||
2810 name == Strings::cell || name == Strings::cellnoise ||
2811 name == Strings::hash || name == Strings::hashnoise ||
2812 name == Strings::pcellnoise || name == Strings::pnoise)
2813 name = ustring("unullnoise");
2814 else
2815 name = ustring("nullnoise");
2816 pass_name = false;
2817 periodic = false;
2818 pass_sg = false;
2819 pass_options = false;
2820 }
2821
2822 llvm::Value *opt = NULL;
2823 if (pass_options) {
2824 opt = llvm_gen_noise_options (rop, opnum, arg);
2825 }
2826
2827 std::string funcname = "osl_" + name.string() + "_" + arg_typecode(&Result,derivs);
2828 llvm::Value * args[10]; int nargs = 0;
2829 if (pass_name) {
2830 args[nargs++] = rop.llvm_load_string (*Name);
2831 }
2832 llvm::Value *tmpresult = NULL;
2833 // triple return, or float return with derivs, passes result pointer
2834 if (outdim == 3 || derivs) {
2835 if (derivs && !Result.has_derivs()) {
2836 tmpresult = rop.llvm_load_arg (Result, true);
2837 args[nargs++] = tmpresult;
2838 }
2839 else
2840 args[nargs++] = rop.llvm_void_ptr (Result);
2841 }
2842 funcname += arg_typecode(S, derivs);
2843 args[nargs++] = rop.llvm_load_arg (*S, derivs);
2844 if (T) {
2845 funcname += arg_typecode(T, derivs);
2846 args[nargs++] = rop.llvm_load_arg (*T, derivs);
2847 }
2848
2849 if (periodic) {
2850 funcname += arg_typecode (Sper, false /* no derivs */);
2851 args[nargs++] = rop.llvm_load_arg (*Sper, false);
2852 if (Tper) {
2853 funcname += arg_typecode (Tper, false /* no derivs */);
2854 args[nargs++] = rop.llvm_load_arg (*Tper, false);
2855 }
2856 }
2857
2858 if (pass_sg)
2859 args[nargs++] = rop.sg_void_ptr();
2860 if (pass_options)
2861 args[nargs++] = opt;
2862
2863 OSL_DASSERT(nargs < int(sizeof(args) / sizeof(args[0])));
2864
2865 #if 0
2866 llvm::outs() << "About to push " << funcname << "\n";
2867 for (int i = 0; i < nargs; ++i)
2868 llvm::outs() << " " << *args[i] << "\n";
2869 #endif
2870
2871 llvm::Value *r = rop.ll.call_function (funcname.c_str(), cspan<llvm::Value*>(args, args + nargs));
2872 if (outdim == 1 && !derivs) {
2873 // Just plain float (no derivs) returns its value
2874 rop.llvm_store_value (r, Result);
2875 } else if (derivs && !Result.has_derivs()) {
2876 // Function needed to take derivs, but our result doesn't have them.
2877 // We created a temp, now we need to copy to the real result.
2878 tmpresult = rop.llvm_ptr_cast (tmpresult, Result.typespec());
2879 for (int c = 0; c < Result.typespec().aggregate(); ++c) {
2880 llvm::Value *v = rop.llvm_load_value (tmpresult, Result.typespec(),
2881 0, NULL, c);
2882 rop.llvm_store_value (v, Result, 0, c);
2883 }
2884 } // N.B. other cases already stored their result in the right place
2885
2886 // Clear derivs if result has them but we couldn't compute them
2887 if (Result.has_derivs() && !derivs)
2888 rop.llvm_zero_derivs (Result);
2889
2890 if (rop.shadingsys().profile() >= 1)
2891 rop.ll.call_function ("osl_count_noise", rop.sg_void_ptr());
2892
2893 return true;
2894 }
2895
2896
2897
LLVMGEN(llvm_gen_getattribute)2898 LLVMGEN (llvm_gen_getattribute)
2899 {
2900 // getattribute() has eight "flavors":
2901 // * getattribute (attribute_name, value)
2902 // * getattribute (attribute_name, value[])
2903 // * getattribute (attribute_name, index, value)
2904 // * getattribute (attribute_name, index, value[])
2905 // * getattribute (object, attribute_name, value)
2906 // * getattribute (object, attribute_name, value[])
2907 // * getattribute (object, attribute_name, index, value)
2908 // * getattribute (object, attribute_name, index, value[])
2909 Opcode &op (rop.inst()->ops()[opnum]);
2910 int nargs = op.nargs();
2911 OSL_DASSERT(nargs >= 3 && nargs <= 5);
2912
2913 bool array_lookup = rop.opargsym(op,nargs-2)->typespec().is_int();
2914 bool object_lookup = rop.opargsym(op,2)->typespec().is_string() && nargs >= 4;
2915 int object_slot = (int)object_lookup;
2916 int attrib_slot = object_slot + 1;
2917 int index_slot = array_lookup ? nargs - 2 : 0;
2918
2919 Symbol& Result = *rop.opargsym (op, 0);
2920 Symbol& ObjectName = *rop.opargsym (op, object_slot); // only valid if object_slot is true
2921 Symbol& Attribute = *rop.opargsym (op, attrib_slot);
2922 Symbol& Index = *rop.opargsym (op, index_slot); // only valid if array_lookup is true
2923 Symbol& Destination = *rop.opargsym (op, nargs-1);
2924 OSL_DASSERT(!Result.typespec().is_closure_based() &&
2925 !ObjectName.typespec().is_closure_based() &&
2926 !Attribute.typespec().is_closure_based() &&
2927 !Index.typespec().is_closure_based() &&
2928 !Destination.typespec().is_closure_based());
2929
2930 // We'll pass the destination's attribute type directly to the
2931 // RenderServices callback so that the renderer can perform any
2932 // necessary conversions from its internal format to OSL's.
2933 const TypeDesc* dest_type = &Destination.typespec().simpletype();
2934
2935 llvm::Value * args[] = {
2936 rop.sg_void_ptr(),
2937 rop.ll.constant ((int)Destination.has_derivs()),
2938 object_lookup ? rop.llvm_load_value (ObjectName) : rop.ll.constant (ustring()),
2939 rop.llvm_load_value (Attribute),
2940 rop.ll.constant ((int)array_lookup),
2941 rop.llvm_load_value (Index),
2942 rop.ll.constant_ptr ((void *) dest_type),
2943 rop.llvm_void_ptr (Destination),
2944 };
2945 llvm::Value *r = rop.ll.call_function ("osl_get_attribute", args);
2946 rop.llvm_store_value (r, Result);
2947
2948 return true;
2949 }
2950
2951
2952
LLVMGEN(llvm_gen_gettextureinfo)2953 LLVMGEN (llvm_gen_gettextureinfo)
2954 {
2955 Opcode &op (rop.inst()->ops()[opnum]);
2956
2957 OSL_DASSERT(op.nargs() == 4);
2958
2959 Symbol& Result = *rop.opargsym (op, 0);
2960 Symbol& Filename = *rop.opargsym (op, 1);
2961 Symbol& Dataname = *rop.opargsym (op, 2);
2962 Symbol& Data = *rop.opargsym (op, 3);
2963
2964 OSL_DASSERT(!Result.typespec().is_closure_based() &&
2965 Filename.typespec().is_string() &&
2966 Dataname.typespec().is_string() &&
2967 !Data.typespec().is_closure_based() &&
2968 Result.typespec().is_int());
2969
2970 RendererServices::TextureHandle *texture_handle = NULL;
2971 if (Filename.is_constant() && rop.shadingsys().opt_texture_handle()) {
2972 texture_handle = rop.renderer()->get_texture_handle (*(ustring *)Filename.data(), rop.shadingcontext());
2973 }
2974
2975 llvm::Value * args[] = {
2976 rop.sg_void_ptr(),
2977 rop.llvm_load_value (Filename),
2978 rop.ll.constant_ptr (texture_handle),
2979 rop.llvm_load_value (Dataname),
2980 // this is passes a TypeDesc to an LLVM op-code
2981 rop.ll.constant((int) Data.typespec().simpletype().basetype),
2982 rop.ll.constant((int) Data.typespec().simpletype().arraylen),
2983 rop.ll.constant((int) Data.typespec().simpletype().aggregate),
2984 // destination
2985 rop.llvm_void_ptr (Data),
2986 // errormessage
2987 rop.ll.void_ptr_null(),
2988 };
2989 llvm::Value *r = rop.ll.call_function ("osl_get_textureinfo", args);
2990 rop.llvm_store_value (r, Result);
2991 /* Do not leave derivs uninitialized */
2992 if (Data.has_derivs())
2993 rop.llvm_zero_derivs (Data);
2994 rop.generated_texture_call (texture_handle != NULL);
2995
2996 return true;
2997 }
2998
2999
3000
LLVMGEN(llvm_gen_getmessage)3001 LLVMGEN (llvm_gen_getmessage)
3002 {
3003 // getmessage() has four "flavors":
3004 // * getmessage (attribute_name, value)
3005 // * getmessage (attribute_name, value[])
3006 // * getmessage (source, attribute_name, value)
3007 // * getmessage (source, attribute_name, value[])
3008 Opcode &op (rop.inst()->ops()[opnum]);
3009
3010 OSL_DASSERT(op.nargs() == 3 || op.nargs() == 4);
3011 int has_source = (op.nargs() == 4);
3012 Symbol& Result = *rop.opargsym (op, 0);
3013 Symbol& Source = *rop.opargsym (op, 1);
3014 Symbol& Name = *rop.opargsym (op, 1+has_source);
3015 Symbol& Data = *rop.opargsym (op, 2+has_source);
3016 OSL_DASSERT(Result.typespec().is_int() && Name.typespec().is_string());
3017 OSL_DASSERT(has_source == 0 || Source.typespec().is_string());
3018
3019 llvm::Value *args[9];
3020 args[0] = rop.sg_void_ptr();
3021 args[1] = has_source ? rop.llvm_load_value(Source)
3022 : rop.ll.constant(ustring());
3023 args[2] = rop.llvm_load_value (Name);
3024
3025 if (Data.typespec().is_closure_based()) {
3026 // FIXME: secret handshake for closures ...
3027 args[3] = rop.ll.constant (TypeDesc(TypeDesc::UNKNOWN,
3028 Data.typespec().arraylength()));
3029 // We need a void ** here so the function can modify the closure
3030 args[4] = rop.llvm_void_ptr(Data);
3031 } else {
3032 args[3] = rop.ll.constant (Data.typespec().simpletype());
3033 args[4] = rop.llvm_void_ptr (Data);
3034 }
3035 args[5] = rop.ll.constant ((int)Data.has_derivs());
3036
3037 args[6] = rop.ll.constant(rop.inst()->id());
3038 args[7] = rop.ll.constant(op.sourcefile());
3039 args[8] = rop.ll.constant(op.sourceline());
3040
3041 llvm::Value *r = rop.ll.call_function ("osl_getmessage", args);
3042 rop.llvm_store_value (r, Result);
3043 return true;
3044 }
3045
3046
3047
LLVMGEN(llvm_gen_setmessage)3048 LLVMGEN (llvm_gen_setmessage)
3049 {
3050 Opcode &op (rop.inst()->ops()[opnum]);
3051
3052 OSL_DASSERT(op.nargs() == 2);
3053 Symbol& Name = *rop.opargsym (op, 0);
3054 Symbol& Data = *rop.opargsym (op, 1);
3055 OSL_DASSERT(Name.typespec().is_string());
3056
3057 llvm::Value *args[7];
3058 args[0] = rop.sg_void_ptr();
3059 args[1] = rop.llvm_load_value (Name);
3060 if (Data.typespec().is_closure_based()) {
3061 // FIXME: secret handshake for closures ...
3062 args[2] = rop.ll.constant (TypeDesc(TypeDesc::UNKNOWN,
3063 Data.typespec().arraylength()));
3064 // We need a void ** here so the function can modify the closure
3065 args[3] = rop.llvm_void_ptr(Data);
3066 } else {
3067 args[2] = rop.ll.constant (Data.typespec().simpletype());
3068 args[3] = rop.llvm_void_ptr (Data);
3069 }
3070
3071 args[4] = rop.ll.constant(rop.inst()->id());
3072 args[5] = rop.ll.constant(op.sourcefile());
3073 args[6] = rop.ll.constant(op.sourceline());
3074
3075 rop.ll.call_function ("osl_setmessage", args);
3076 return true;
3077 }
3078
3079
3080
LLVMGEN(llvm_gen_get_simple_SG_field)3081 LLVMGEN (llvm_gen_get_simple_SG_field)
3082 {
3083 Opcode &op (rop.inst()->ops()[opnum]);
3084
3085 OSL_DASSERT(op.nargs() == 1);
3086
3087 Symbol& Result = *rop.opargsym (op, 0);
3088 int sg_index = rop.ShaderGlobalNameToIndex (op.opname());
3089 OSL_DASSERT (sg_index >= 0);
3090 llvm::Value *sg_field = rop.ll.GEP (rop.sg_ptr(), 0, sg_index);
3091 llvm::Value* r = rop.ll.op_load(sg_field);
3092 rop.llvm_store_value (r, Result);
3093
3094 return true;
3095 }
3096
3097
3098
LLVMGEN(llvm_gen_calculatenormal)3099 LLVMGEN (llvm_gen_calculatenormal)
3100 {
3101 Opcode &op (rop.inst()->ops()[opnum]);
3102
3103 OSL_DASSERT(op.nargs() == 2);
3104
3105 Symbol& Result = *rop.opargsym (op, 0);
3106 Symbol& P = *rop.opargsym (op, 1);
3107
3108 OSL_DASSERT(Result.typespec().is_triple() && P.typespec().is_triple());
3109 if (! P.has_derivs()) {
3110 rop.llvm_assign_zero (Result);
3111 return true;
3112 }
3113
3114 llvm::Value * args[] = {
3115 rop.llvm_void_ptr (Result),
3116 rop.sg_void_ptr(),
3117 rop.llvm_void_ptr (P),
3118 };
3119 rop.ll.call_function ("osl_calculatenormal", args);
3120 if (Result.has_derivs())
3121 rop.llvm_zero_derivs (Result);
3122 return true;
3123 }
3124
3125
3126
LLVMGEN(llvm_gen_area)3127 LLVMGEN (llvm_gen_area)
3128 {
3129 Opcode &op (rop.inst()->ops()[opnum]);
3130
3131 OSL_DASSERT(op.nargs() == 2);
3132
3133 Symbol& Result = *rop.opargsym (op, 0);
3134 Symbol& P = *rop.opargsym (op, 1);
3135
3136 OSL_DASSERT(Result.typespec().is_float() && P.typespec().is_triple());
3137 if (! P.has_derivs()) {
3138 rop.llvm_assign_zero (Result);
3139 return true;
3140 }
3141
3142 llvm::Value *r = rop.ll.call_function ("osl_area", rop.llvm_void_ptr (P));
3143 rop.llvm_store_value (r, Result);
3144 if (Result.has_derivs())
3145 rop.llvm_zero_derivs (Result);
3146 return true;
3147 }
3148
3149
3150
LLVMGEN(llvm_gen_spline)3151 LLVMGEN (llvm_gen_spline)
3152 {
3153 Opcode &op (rop.inst()->ops()[opnum]);
3154
3155 OSL_DASSERT(op.nargs() >= 4 && op.nargs() <= 5);
3156
3157 bool has_knot_count = (op.nargs() == 5);
3158 Symbol& Result = *rop.opargsym (op, 0);
3159 Symbol& Spline = *rop.opargsym (op, 1);
3160 Symbol& Value = *rop.opargsym (op, 2);
3161 Symbol& Knot_count = *rop.opargsym (op, 3); // might alias Knots
3162 Symbol& Knots = has_knot_count ? *rop.opargsym (op, 4) :
3163 *rop.opargsym (op, 3);
3164
3165 OSL_DASSERT(!Result.typespec().is_closure_based() &&
3166 Spline.typespec().is_string() &&
3167 Value.typespec().is_float() &&
3168 !Knots.typespec().is_closure_based() &&
3169 Knots.typespec().is_array() &&
3170 (!has_knot_count || (has_knot_count && Knot_count.typespec().is_int())));
3171
3172 std::string name = Strutil::sprintf("osl_%s_", op.opname());
3173 // only use derivatives for result if:
3174 // result has derivs and (value || knots) have derivs
3175 bool result_derivs = Result.has_derivs() && (Value.has_derivs() || Knots.has_derivs());
3176
3177 if (result_derivs)
3178 name += "d";
3179 if (Result.typespec().is_float())
3180 name += "f";
3181 else if (Result.typespec().is_triple())
3182 name += "v";
3183
3184 if (result_derivs && Value.has_derivs())
3185 name += "d";
3186 if (Value.typespec().is_float())
3187 name += "f";
3188 else if (Value.typespec().is_triple())
3189 name += "v";
3190
3191 if (result_derivs && Knots.has_derivs())
3192 name += "d";
3193 if (Knots.typespec().simpletype().elementtype() == TypeDesc::FLOAT)
3194 name += "f";
3195 else if (Knots.typespec().simpletype().elementtype().aggregate == TypeDesc::VEC3)
3196 name += "v";
3197
3198 llvm::Value * args[] = {
3199 rop.llvm_void_ptr (Result),
3200 rop.llvm_load_string (Spline),
3201 rop.llvm_void_ptr (Value), // make things easy
3202 rop.llvm_void_ptr (Knots),
3203 has_knot_count ?
3204 rop.llvm_load_value (Knot_count) :
3205 rop.ll.constant ((int)Knots.typespec().arraylength()),
3206 rop.ll.constant ((int)Knots.typespec().arraylength()),
3207 };
3208 rop.ll.call_function (name.c_str(), args);
3209
3210 if (Result.has_derivs() && !result_derivs)
3211 rop.llvm_zero_derivs (Result);
3212
3213 return true;
3214 }
3215
3216
3217
3218 static void
llvm_gen_keyword_fill(BackendLLVM & rop,Opcode & op,const ClosureRegistry::ClosureEntry * clentry,ustring clname,llvm::Value * mem_void_ptr,int argsoffset)3219 llvm_gen_keyword_fill(BackendLLVM &rop, Opcode &op, const ClosureRegistry::ClosureEntry *clentry, ustring clname, llvm::Value *mem_void_ptr, int argsoffset)
3220 {
3221 OSL_DASSERT(((op.nargs() - argsoffset) % 2) == 0);
3222
3223 int Nattrs = (op.nargs() - argsoffset) / 2;
3224
3225 for (int attr_i = 0; attr_i < Nattrs; ++attr_i) {
3226 int argno = attr_i * 2 + argsoffset;
3227 Symbol &Key = *rop.opargsym (op, argno);
3228 Symbol &Value = *rop.opargsym (op, argno + 1);
3229 OSL_DASSERT(Key.typespec().is_string());
3230 OSL_ASSERT(Key.is_constant());
3231 ustring *key = (ustring *)Key.data();
3232 TypeDesc ValueType = Value.typespec().simpletype();
3233
3234 bool legal = false;
3235 // Make sure there is some keyword arg that has the name and the type
3236 for (int t = 0; t < clentry->nkeyword; ++t) {
3237 const ClosureParam &p = clentry->params[clentry->nformal + t];
3238 // strcmp might be too much, we could precompute the ustring for the param,
3239 // but in this part of the code is not a big deal
3240 if (equivalent(p.type,ValueType) && !strcmp(key->c_str(), p.key)) {
3241 // store data
3242 OSL_DASSERT(p.offset + p.field_size <= clentry->struct_size);
3243 llvm::Value* dst = rop.ll.offset_ptr (mem_void_ptr, p.offset);
3244 llvm::Value* src = rop.llvm_void_ptr (Value);
3245 rop.ll.op_memcpy (dst, src, (int)p.type.size(),
3246 4 /* use 4 byte alignment for now */);
3247 legal = true;
3248 break;
3249 }
3250 }
3251 if (!legal) {
3252 rop.shadingcontext()->warningf("Unsupported closure keyword arg \"%s\" for %s (%s:%d)", key->c_str(), clname, op.sourcefile(), op.sourceline());
3253 }
3254 }
3255 }
3256
3257
3258
LLVMGEN(llvm_gen_closure)3259 LLVMGEN (llvm_gen_closure)
3260 {
3261 Opcode &op (rop.inst()->ops()[opnum]);
3262 OSL_DASSERT (op.nargs() >= 2); // at least the result and the ID
3263
3264 Symbol &Result = *rop.opargsym (op, 0);
3265 int weighted = rop.opargsym(op,1)->typespec().is_string() ? 0 : 1;
3266 Symbol *weight = weighted ? rop.opargsym (op, 1) : NULL;
3267 Symbol &Id = *rop.opargsym (op, 1+weighted);
3268 OSL_DASSERT(Result.typespec().is_closure());
3269 OSL_DASSERT(Id.typespec().is_string());
3270 ustring closure_name = *((ustring *)Id.data());
3271
3272 const ClosureRegistry::ClosureEntry * clentry = rop.shadingsys().find_closure(closure_name);
3273 if (!clentry) {
3274 rop.llvm_gen_error (Strutil::sprintf("Closure '%s' is not supported by the current renderer, called from %s:%d in shader \"%s\", layer %d \"%s\", group \"%s\"",
3275 closure_name, op.sourcefile(), op.sourceline(),
3276 rop.inst()->shadername(), rop.layer(),
3277 rop.inst()->layername(), rop.group().name()));
3278 return false;
3279 }
3280
3281 OSL_DASSERT (op.nargs() >= (2 + weighted + clentry->nformal));
3282
3283 // Call osl_allocate_closure_component(closure, id, size). It returns
3284 // the memory for the closure parameter data.
3285 llvm::Value *render_ptr = rop.ll.constant_ptr(rop.shadingsys().renderer(), rop.ll.type_void_ptr());
3286 llvm::Value *sg_ptr = rop.sg_void_ptr();
3287 llvm::Value *id_int = rop.ll.constant(clentry->id);
3288 llvm::Value *size_int = rop.ll.constant(clentry->struct_size);
3289 llvm::Value *return_ptr = weighted ?
3290 rop.ll.call_function ("osl_allocate_weighted_closure_component", sg_ptr, id_int, size_int, rop.llvm_void_ptr(*weight))
3291 : rop.ll.call_function ("osl_allocate_closure_component" , sg_ptr, id_int, size_int);
3292 llvm::Value *comp_void_ptr = return_ptr;
3293
3294 // For the weighted closures, we need a surrounding "if" so that it's safe
3295 // for osl_allocate_weighted_closure_component to return NULL (unless we
3296 // know for sure that it's constant weighted and that the weight is
3297 // not zero).
3298 llvm::BasicBlock *next_block = NULL;
3299 if (weighted && ! (weight->is_constant() && !rop.is_zero(*weight))) {
3300 llvm::BasicBlock *notnull_block = rop.ll.new_basic_block ("non_null_closure");
3301 next_block = rop.ll.new_basic_block ("");
3302 llvm::Value *cond = rop.ll.op_ne (return_ptr, rop.ll.void_ptr_null());
3303 rop.ll.op_branch (cond, notnull_block, next_block);
3304 // new insert point is nonnull_block
3305 }
3306
3307 llvm::Value *comp_ptr = rop.ll.ptr_cast(comp_void_ptr, rop.llvm_type_closure_component_ptr());
3308 // Get the address of the primitive buffer, which is the 2nd field
3309 llvm::Value *mem_void_ptr = rop.ll.GEP (comp_ptr, 0, 2);
3310 mem_void_ptr = rop.ll.ptr_cast(mem_void_ptr, rop.ll.type_void_ptr());
3311
3312 // If the closure has a "prepare" method, call
3313 // prepare(renderer, id, memptr). If there is no prepare method, just
3314 // zero out the closure parameter memory.
3315 if (clentry->prepare) {
3316 // Call clentry->prepare(renderservices *, int id, void *mem)
3317 llvm::Value *funct_ptr = rop.ll.constant_ptr((void *)clentry->prepare, rop.llvm_type_prepare_closure_func());
3318 llvm::Value *args[] = {render_ptr, id_int, mem_void_ptr};
3319 rop.ll.call_function (funct_ptr, args);
3320 } else {
3321 rop.ll.op_memset (mem_void_ptr, 0, clentry->struct_size, 4 /*align*/);
3322 }
3323
3324 // Here is where we fill the struct using the params
3325 for (int carg = 0; carg < clentry->nformal; ++carg) {
3326 const ClosureParam &p = clentry->params[carg];
3327 if (p.key != NULL) break;
3328 OSL_DASSERT(p.offset + p.field_size <= clentry->struct_size);
3329 Symbol &sym = *rop.opargsym (op, carg + 2 + weighted);
3330 TypeDesc t = sym.typespec().simpletype();
3331
3332 if (rop.use_optix() && sym.typespec().is_string()) {
3333 llvm::Value* dst = rop.ll.offset_ptr (mem_void_ptr, p.offset);
3334 llvm::Value* src = rop.llvm_load_device_string (sym, /*follow*/ false);
3335 rop.ll.op_memcpy (dst, src, 8, 8);
3336 }
3337 else if (!sym.typespec().is_closure_array() && !sym.typespec().is_structure()
3338 && equivalent(t,p.type)) {
3339 llvm::Value* dst = rop.ll.offset_ptr (mem_void_ptr, p.offset);
3340 llvm::Value* src = rop.llvm_void_ptr (sym);
3341 rop.ll.op_memcpy (dst, src, (int)p.type.size(),
3342 4 /* use 4 byte alignment for now */);
3343 } else {
3344 rop.shadingcontext()->errorf("Incompatible formal argument %d to '%s' closure (%s %s, expected %s). Prototypes don't match renderer registry (%s:%d).",
3345 carg + 1, closure_name,
3346 sym.typespec(), sym.unmangled(), p.type,
3347 op.sourcefile(), op.sourceline());
3348 }
3349 }
3350
3351 // If the closure has a "setup" method, call
3352 // setup(render_services, id, mem_ptr).
3353 if (clentry->setup) {
3354 // Call clentry->setup(renderservices *, int id, void *mem)
3355 llvm::Value *funct_ptr = rop.ll.constant_ptr((void *)clentry->setup, rop.llvm_type_setup_closure_func());
3356 llvm::Value *args[] = {render_ptr, id_int, mem_void_ptr};
3357 rop.ll.call_function (funct_ptr, args);
3358 }
3359
3360 llvm_gen_keyword_fill(rop, op, clentry, closure_name, mem_void_ptr,
3361 2 + weighted + clentry->nformal);
3362
3363 if (next_block)
3364 rop.ll.op_branch (next_block);
3365
3366 // Store result at the end, otherwise Ci = modifier(Ci) won't work
3367 rop.llvm_store_value (return_ptr, Result, 0, NULL, 0);
3368
3369 return true;
3370 }
3371
3372
3373
LLVMGEN(llvm_gen_pointcloud_search)3374 LLVMGEN (llvm_gen_pointcloud_search)
3375 {
3376 Opcode &op (rop.inst()->ops()[opnum]);
3377
3378 OSL_DASSERT(op.nargs() >= 5);
3379 Symbol& Result = *rop.opargsym (op, 0);
3380 Symbol& Filename = *rop.opargsym (op, 1);
3381 Symbol& Center = *rop.opargsym (op, 2);
3382 Symbol& Radius = *rop.opargsym (op, 3);
3383 Symbol& Max_points = *rop.opargsym (op, 4);
3384
3385 OSL_DASSERT(Result.typespec().is_int() && Filename.typespec().is_string() &&
3386 Center.typespec().is_triple() && Radius.typespec().is_float() &&
3387 Max_points.typespec().is_int());
3388
3389 std::vector<Symbol *> clear_derivs_of; // arguments whose derivs we need to zero at the end
3390 int attr_arg_offset = 5; // where the opt attrs begin
3391 Symbol *Sort = NULL;
3392 if (op.nargs() > 5 && rop.opargsym(op,5)->typespec().is_int()) {
3393 Sort = rop.opargsym(op,5);
3394 ++attr_arg_offset;
3395 }
3396 int nattrs = (op.nargs() - attr_arg_offset) / 2;
3397
3398 std::vector<llvm::Value *> args;
3399 args.push_back (rop.sg_void_ptr()); // 0 sg
3400 args.push_back (rop.llvm_load_value (Filename)); // 1 filename
3401 args.push_back (rop.llvm_void_ptr (Center)); // 2 center
3402 args.push_back (rop.llvm_load_value (Radius)); // 3 radius
3403 args.push_back (rop.llvm_load_value (Max_points)); // 4 max_points
3404 args.push_back (Sort ? rop.llvm_load_value(*Sort) // 5 sort
3405 : rop.ll.constant(0));
3406 args.push_back (rop.ll.constant_ptr (NULL)); // 6 indices
3407 args.push_back (rop.ll.constant_ptr (NULL)); // 7 distances
3408 args.push_back (rop.ll.constant (0)); // 8 derivs_offset
3409 args.push_back (NULL); // 9 nattrs
3410 size_t capacity = 0x7FFFFFFF; // Lets put a 32 bit limit
3411 int extra_attrs = 0; // Extra query attrs to search
3412 // This loop does three things. 1) Look for the special attributes
3413 // "distance", "index" and grab the pointer. 2) Compute the minimmum
3414 // size of the provided output arrays to check against max_points
3415 // 3) push optional args to the arg list
3416 for (int i = 0; i < nattrs; ++i) {
3417 Symbol& Name = *rop.opargsym (op, attr_arg_offset + i*2);
3418 Symbol& Value = *rop.opargsym (op, attr_arg_offset + i*2 + 1);
3419
3420 OSL_DASSERT (Name.typespec().is_string());
3421 TypeDesc simpletype = Value.typespec().simpletype();
3422 if (Name.is_constant() && *((ustring *)Name.data()) == u_index &&
3423 simpletype.elementtype() == TypeDesc::INT) {
3424 args[6] = rop.llvm_void_ptr (Value);
3425 } else if (Name.is_constant() && *((ustring *)Name.data()) == u_distance &&
3426 simpletype.elementtype() == TypeDesc::FLOAT) {
3427 args[7] = rop.llvm_void_ptr (Value);
3428 if (Value.has_derivs()) {
3429 if (Center.has_derivs())
3430 // deriv offset is the size of the array
3431 args[8] = rop.ll.constant ((int)simpletype.numelements());
3432 else
3433 clear_derivs_of.push_back(&Value);
3434 }
3435 } else {
3436 // It is a regular attribute, push it to the arg list
3437 args.push_back (rop.llvm_load_value (Name));
3438 args.push_back (rop.ll.constant (simpletype));
3439 args.push_back (rop.llvm_void_ptr (Value));
3440 if (Value.has_derivs())
3441 clear_derivs_of.push_back(&Value);
3442 extra_attrs++;
3443 }
3444 // minimum capacity of the output arrays
3445 capacity = std::min (simpletype.numelements(), capacity);
3446 }
3447
3448 args[9] = rop.ll.constant (extra_attrs);
3449
3450 // Compare capacity to the requested number of points. The available
3451 // space on the arrays is a constant, the requested number of
3452 // points is not, so runtime check.
3453 llvm::Value *sizeok = rop.ll.op_ge (rop.ll.constant((int)capacity), args[4]); // max_points
3454
3455 llvm::BasicBlock* sizeok_block = rop.ll.new_basic_block ("then");
3456 llvm::BasicBlock* badsize_block = rop.ll.new_basic_block ("else");
3457 llvm::BasicBlock* after_block = rop.ll.new_basic_block ("");
3458 rop.ll.op_branch (sizeok, sizeok_block, badsize_block);
3459 // N.B. the op_branch sets sizeok_block as the new insert point
3460
3461 // non-error code case
3462 llvm::Value *count = rop.ll.call_function ("osl_pointcloud_search", args);
3463 // Clear derivs if necessary
3464 for (size_t i = 0; i < clear_derivs_of.size(); ++i)
3465 rop.llvm_zero_derivs (*clear_derivs_of[i], count);
3466 // Store result
3467 rop.llvm_store_value (count, Result);
3468 rop.ll.op_branch (after_block);
3469
3470 // error code case
3471 rop.ll.set_insert_point (badsize_block);
3472 args.clear();
3473 static ustring errorfmt("Arrays too small for pointcloud lookup at (%s:%d)");
3474 llvm::Value *err_args[] = {
3475 rop.sg_void_ptr(),
3476 rop.ll.constant_ptr ((void *)errorfmt.c_str()),
3477 rop.ll.constant_ptr ((void *)op.sourcefile().c_str()),
3478 rop.ll.constant (op.sourceline()),
3479 };
3480 rop.ll.call_function ("osl_error", err_args);
3481
3482 rop.ll.op_branch (after_block);
3483 return true;
3484 }
3485
3486
3487
LLVMGEN(llvm_gen_pointcloud_get)3488 LLVMGEN (llvm_gen_pointcloud_get)
3489 {
3490 Opcode &op (rop.inst()->ops()[opnum]);
3491
3492 OSL_DASSERT(op.nargs() >= 6);
3493
3494 Symbol& Result = *rop.opargsym (op, 0);
3495 Symbol& Filename = *rop.opargsym (op, 1);
3496 Symbol& Indices = *rop.opargsym (op, 2);
3497 Symbol& Count = *rop.opargsym (op, 3);
3498 Symbol& Attr_name = *rop.opargsym (op, 4);
3499 Symbol& Data = *rop.opargsym (op, 5);
3500
3501 llvm::Value *count = rop.llvm_load_value (Count);
3502
3503 int capacity = std::min ((int)Data.typespec().simpletype().numelements(), (int)Indices.typespec().simpletype().numelements());
3504 // Check available space
3505 llvm::Value *sizeok = rop.ll.op_ge (rop.ll.constant(capacity), count);
3506
3507 llvm::BasicBlock* sizeok_block = rop.ll.new_basic_block ("then");
3508 llvm::BasicBlock* badsize_block = rop.ll.new_basic_block ("else");
3509 llvm::BasicBlock* after_block = rop.ll.new_basic_block ("");
3510 rop.ll.op_branch (sizeok, sizeok_block, badsize_block);
3511 // N.B. sets insert point to true case
3512
3513 // non-error code case
3514
3515 // Convert 32bit indices to 64bit
3516 llvm::Value * args[] = {
3517 rop.sg_void_ptr(),
3518 rop.llvm_load_value (Filename),
3519 rop.llvm_void_ptr (Indices),
3520 count,
3521 rop.llvm_load_value (Attr_name),
3522 rop.ll.constant (Data.typespec().simpletype()),
3523 rop.llvm_void_ptr (Data),
3524 };
3525 llvm::Value *found = rop.ll.call_function ("osl_pointcloud_get", args);
3526 rop.llvm_store_value (found, Result);
3527 if (Data.has_derivs())
3528 rop.llvm_zero_derivs (Data, count);
3529 rop.ll.op_branch (after_block);
3530
3531 // error code case
3532 rop.ll.set_insert_point (badsize_block);
3533 static ustring errorfmt("Arrays too small for pointcloud attribute get at (%s:%d)");
3534 llvm::Value *err_args[] = {
3535 rop.sg_void_ptr(),
3536 rop.ll.constant_ptr ((void *)errorfmt.c_str()),
3537 rop.ll.constant_ptr ((void *)op.sourcefile().c_str()),
3538 rop.ll.constant (op.sourceline()),
3539 };
3540 rop.ll.call_function ("osl_error", err_args);
3541
3542 rop.ll.op_branch (after_block);
3543 return true;
3544 }
3545
3546
3547
LLVMGEN(llvm_gen_pointcloud_write)3548 LLVMGEN (llvm_gen_pointcloud_write)
3549 {
3550 Opcode &op (rop.inst()->ops()[opnum]);
3551
3552 OSL_DASSERT(op.nargs() >= 3);
3553 Symbol& Result = *rop.opargsym (op, 0);
3554 Symbol& Filename = *rop.opargsym (op, 1);
3555 Symbol& Pos = *rop.opargsym (op, 2);
3556 OSL_DASSERT(Result.typespec().is_int() && Filename.typespec().is_string() &&
3557 Pos.typespec().is_triple());
3558 OSL_DASSERT((op.nargs() & 1) && "must have an even number of attribs");
3559
3560 int nattrs = (op.nargs() - 3) / 2;
3561
3562 // Generate local space for the names/types/values arrays
3563 llvm::Value *names = rop.ll.op_alloca (rop.ll.type_string(), nattrs);
3564 llvm::Value *types = rop.ll.op_alloca (rop.ll.type_typedesc(), nattrs);
3565 llvm::Value *values = rop.ll.op_alloca (rop.ll.type_void_ptr(), nattrs);
3566
3567 // Fill in the arrays with the params, use helper function because
3568 // it's a pain to offset things into the array ourselves.
3569 for (int i = 0; i < nattrs; ++i) {
3570 Symbol *namesym = rop.opargsym (op, 3+2*i);
3571 Symbol *valsym = rop.opargsym (op, 3+2*i+1);
3572 llvm::Value * args[] = {
3573 rop.ll.void_ptr (names),
3574 rop.ll.void_ptr (types),
3575 rop.ll.void_ptr (values),
3576 rop.ll.constant (i),
3577 rop.llvm_load_value (*namesym), // name[i]
3578 rop.ll.constant (valsym->typespec().simpletype()), // type[i]
3579 rop.llvm_void_ptr (*valsym) // value[i]
3580 };
3581 rop.ll.call_function ("osl_pointcloud_write_helper", args);
3582 }
3583
3584 llvm::Value * args[] = {
3585 rop.sg_void_ptr(), // shaderglobals pointer
3586 rop.llvm_load_value (Filename), // name
3587 rop.llvm_void_ptr (Pos), // position
3588 rop.ll.constant (nattrs), // number of attributes
3589 rop.ll.void_ptr (names), // attribute names array
3590 rop.ll.void_ptr (types), // attribute types array
3591 rop.ll.void_ptr (values) // attribute values array
3592 };
3593 llvm::Value *ret = rop.ll.call_function ("osl_pointcloud_write", args);
3594 rop.llvm_store_value (ret, Result);
3595
3596 return true;
3597 }
3598
3599
3600
3601
LLVMGEN(llvm_gen_dict_find)3602 LLVMGEN (llvm_gen_dict_find)
3603 {
3604 // OSL has two variants of this function:
3605 // dict_find (string dict, string query)
3606 // dict_find (int nodeID, string query)
3607 Opcode &op (rop.inst()->ops()[opnum]);
3608 OSL_DASSERT(op.nargs() == 3);
3609 Symbol& Result = *rop.opargsym (op, 0);
3610 Symbol& Source = *rop.opargsym (op, 1);
3611 Symbol& Query = *rop.opargsym (op, 2);
3612 OSL_DASSERT(Result.typespec().is_int() && Query.typespec().is_string() &&
3613 (Source.typespec().is_int() || Source.typespec().is_string()));
3614 bool sourceint = Source.typespec().is_int(); // is it an int?
3615 llvm::Value *args[] = {
3616 rop.sg_void_ptr(),
3617 rop.llvm_load_value(Source),
3618 rop.llvm_load_value (Query)
3619 };
3620 const char *func = sourceint ? "osl_dict_find_iis" : "osl_dict_find_iss";
3621 llvm::Value *ret = rop.ll.call_function (func, args);
3622 rop.llvm_store_value (ret, Result);
3623 return true;
3624 }
3625
3626
3627
LLVMGEN(llvm_gen_dict_next)3628 LLVMGEN (llvm_gen_dict_next)
3629 {
3630 // dict_net is very straightforward -- just insert sg ptr as first arg
3631 Opcode &op (rop.inst()->ops()[opnum]);
3632 OSL_DASSERT(op.nargs() == 2);
3633 Symbol& Result = *rop.opargsym (op, 0);
3634 Symbol& NodeID = *rop.opargsym (op, 1);
3635 OSL_DASSERT(Result.typespec().is_int() && NodeID.typespec().is_int());
3636 llvm::Value *ret = rop.ll.call_function ("osl_dict_next",
3637 rop.sg_void_ptr(),
3638 rop.llvm_load_value(NodeID));
3639 rop.llvm_store_value (ret, Result);
3640 return true;
3641 }
3642
3643
3644
LLVMGEN(llvm_gen_dict_value)3645 LLVMGEN (llvm_gen_dict_value)
3646 {
3647 // int dict_value (int nodeID, string attribname, output TYPE value)
3648 Opcode &op (rop.inst()->ops()[opnum]);
3649 OSL_DASSERT(op.nargs() == 4);
3650 Symbol& Result = *rop.opargsym (op, 0);
3651 Symbol& NodeID = *rop.opargsym (op, 1);
3652 Symbol& Name = *rop.opargsym (op, 2);
3653 Symbol& Value = *rop.opargsym (op, 3);
3654 OSL_DASSERT(Result.typespec().is_int() && NodeID.typespec().is_int() &&
3655 Name.typespec().is_string());
3656 llvm::Value *args[] = {
3657 rop.sg_void_ptr(), // arg 0: shaderglobals ptr
3658 rop.llvm_load_value(NodeID), // arg 1: nodeID
3659 rop.llvm_load_value(Name), // arg 2: attribute name
3660 rop.ll.constant(Value.typespec().simpletype()), // arg 3: encoded type of Value
3661 rop.llvm_void_ptr(Value), // arg 4: pointer to Value
3662 };
3663 llvm::Value *ret = rop.ll.call_function ("osl_dict_value", args);
3664 rop.llvm_store_value (ret, Result);
3665 return true;
3666 }
3667
3668
3669
LLVMGEN(llvm_gen_split)3670 LLVMGEN (llvm_gen_split)
3671 {
3672 // int split (string str, output string result[], string sep, int maxsplit)
3673 Opcode &op (rop.inst()->ops()[opnum]);
3674 OSL_DASSERT(op.nargs() >= 3 && op.nargs() <= 5);
3675 Symbol& R = *rop.opargsym (op, 0);
3676 Symbol& Str = *rop.opargsym (op, 1);
3677 Symbol& Results = *rop.opargsym (op, 2);
3678 OSL_DASSERT(R.typespec().is_int() && Str.typespec().is_string() &&
3679 Results.typespec().is_array() &&
3680 Results.typespec().is_string_based());
3681
3682 llvm::Value *args[5];
3683 args[0] = rop.llvm_load_value (Str);
3684 args[1] = rop.llvm_void_ptr (Results);
3685 if (op.nargs() >= 4) {
3686 Symbol& Sep = *rop.opargsym (op, 3);
3687 OSL_DASSERT(Sep.typespec().is_string());
3688 args[2] = rop.llvm_load_value (Sep);
3689 } else {
3690 args[2] = rop.ll.constant ("");
3691 }
3692 if (op.nargs() >= 5) {
3693 Symbol& Maxsplit = *rop.opargsym (op, 4);
3694 OSL_DASSERT(Maxsplit.typespec().is_int());
3695 args[3] = rop.llvm_load_value (Maxsplit);
3696 } else {
3697 args[3] = rop.ll.constant (Results.typespec().arraylength());
3698 }
3699 args[4] = rop.ll.constant (Results.typespec().arraylength());
3700 llvm::Value *ret = rop.ll.call_function ("osl_split", args);
3701 rop.llvm_store_value (ret, R);
3702 return true;
3703 }
3704
3705
3706
LLVMGEN(llvm_gen_raytype)3707 LLVMGEN (llvm_gen_raytype)
3708 {
3709 // int raytype (string name)
3710 Opcode &op (rop.inst()->ops()[opnum]);
3711 OSL_DASSERT(op.nargs() == 2);
3712 Symbol& Result = *rop.opargsym (op, 0);
3713 Symbol& Name = *rop.opargsym (op, 1);
3714 llvm::Value *args[2] = { rop.sg_void_ptr(), NULL };
3715 const char *func = NULL;
3716 if (Name.is_constant()) {
3717 // We can statically determine the bit pattern
3718 ustring name = ((ustring *)Name.data())[0];
3719 args[1] = rop.ll.constant (rop.shadingsys().raytype_bit (name));
3720 func = "osl_raytype_bit";
3721 } else {
3722 // No way to know which name is being asked for
3723 args[1] = rop.llvm_get_pointer (Name);
3724 func = "osl_raytype_name";
3725 }
3726 llvm::Value *ret = rop.ll.call_function (func, args);
3727 rop.llvm_store_value (ret, Result);
3728 return true;
3729 }
3730
3731
3732
3733 // color blackbody (float temperatureK)
3734 // color wavelength_color (float wavelength_nm) // same function signature
LLVMGEN(llvm_gen_blackbody)3735 LLVMGEN (llvm_gen_blackbody)
3736 {
3737 Opcode &op (rop.inst()->ops()[opnum]);
3738 OSL_DASSERT (op.nargs() == 2);
3739 Symbol &Result (*rop.opargsym (op, 0));
3740 Symbol &Temperature (*rop.opargsym (op, 1));
3741 OSL_DASSERT (Result.typespec().is_triple() && Temperature.typespec().is_float());
3742
3743 llvm::Value* args[] = { rop.sg_void_ptr(), rop.llvm_void_ptr(Result),
3744 rop.llvm_load_value(Temperature) };
3745 rop.ll.call_function (Strutil::sprintf("osl_%s_vf",op.opname()).c_str(), args);
3746
3747 // Punt, zero out derivs.
3748 // FIXME -- only of some day, someone truly needs blackbody() to
3749 // correctly return derivs with spatially-varying temperature.
3750 if (Result.has_derivs())
3751 rop.llvm_zero_derivs (Result);
3752
3753 return true;
3754 }
3755
3756
3757
3758 // float luminance (color c)
LLVMGEN(llvm_gen_luminance)3759 LLVMGEN (llvm_gen_luminance)
3760 {
3761 Opcode &op (rop.inst()->ops()[opnum]);
3762 OSL_DASSERT (op.nargs() == 2);
3763 Symbol &Result (*rop.opargsym (op, 0));
3764 Symbol &C (*rop.opargsym (op, 1));
3765 OSL_DASSERT (Result.typespec().is_float() && C.typespec().is_triple());
3766
3767 bool deriv = C.has_derivs() && Result.has_derivs();
3768 llvm::Value* args[] = { rop.sg_void_ptr(), rop.llvm_void_ptr(Result),
3769 rop.llvm_void_ptr(C) };
3770 rop.ll.call_function (deriv ? "osl_luminance_dfdv" : "osl_luminance_fv", args);
3771
3772 if (Result.has_derivs() && !C.has_derivs())
3773 rop.llvm_zero_derivs (Result);
3774
3775 return true;
3776 }
3777
3778
3779
LLVMGEN(llvm_gen_isconstant)3780 LLVMGEN (llvm_gen_isconstant)
3781 {
3782 Opcode &op (rop.inst()->ops()[opnum]);
3783 OSL_DASSERT (op.nargs() == 2);
3784 Symbol &Result (*rop.opargsym (op, 0));
3785 OSL_DASSERT (Result.typespec().is_int());
3786 Symbol &A (*rop.opargsym (op, 1));
3787 rop.llvm_store_value (rop.ll.constant(A.is_constant() ? 1 : 0), Result);
3788 return true;
3789 }
3790
3791
3792
LLVMGEN(llvm_gen_functioncall)3793 LLVMGEN (llvm_gen_functioncall)
3794 {
3795 Opcode &op (rop.inst()->ops()[opnum]);
3796 OSL_DASSERT (op.nargs() == 1);
3797
3798 llvm::BasicBlock* after_block = rop.ll.push_function ();
3799
3800 unsigned int op_num_function_starts_at = opnum+1;
3801 unsigned int op_num_function_ends_at = op.jump(0);
3802 if (rop.ll.debug_is_enabled()) {
3803 Symbol &functionNameSymbol(*rop.opargsym (op, 0));
3804 OSL_DASSERT(functionNameSymbol.is_constant());
3805 OSL_DASSERT(functionNameSymbol.typespec().is_string());
3806 ustring functionName = *(ustring *)functionNameSymbol.data();
3807 ustring file_name = rop.inst()->op(op_num_function_starts_at).sourcefile();
3808 unsigned int method_line = rop.inst()->op(op_num_function_starts_at).sourceline();
3809 rop.ll.debug_push_inlined_function(functionName, file_name, method_line);
3810 }
3811
3812 // Generate the code for the body of the function
3813 rop.build_llvm_code (op_num_function_starts_at, op_num_function_ends_at);
3814 rop.ll.op_branch (after_block);
3815
3816 // Continue on with the previous flow
3817 if (rop.ll.debug_is_enabled()) {
3818 rop.ll.debug_pop_inlined_function();
3819 }
3820 rop.ll.pop_function ();
3821
3822 return true;
3823 }
3824
3825
3826
LLVMGEN(llvm_gen_functioncall_nr)3827 LLVMGEN (llvm_gen_functioncall_nr)
3828 {
3829 OSL_ASSERT(rop.ll.debug_is_enabled() && "no return version should only exist when debug is enabled");
3830 Opcode &op (rop.inst()->ops()[opnum]);
3831 OSL_ASSERT (op.nargs() == 1);
3832
3833 Symbol &functionNameSymbol(*rop.opargsym (op, 0));
3834 OSL_ASSERT(functionNameSymbol.is_constant());
3835 OSL_ASSERT(functionNameSymbol.typespec().is_string());
3836 ustring functionName = *(ustring *)functionNameSymbol.data();
3837
3838 int op_num_function_starts_at = opnum+1;
3839 int op_num_function_ends_at = op.jump(0);
3840 OSL_ASSERT(op.farthest_jump() == op_num_function_ends_at
3841 && "As we are not doing any branching, we should ensure that the inlined function truly ends at the farthest jump");
3842 const Opcode& startop(rop.inst()->op(op_num_function_starts_at));
3843 rop.ll.debug_push_inlined_function(functionName,
3844 startop.sourcefile(), startop.sourceline());
3845
3846 // Generate the code for the body of the function
3847 rop.build_llvm_code (op_num_function_starts_at, op_num_function_ends_at);
3848
3849 // Continue on with the previous flow
3850 rop.ll.debug_pop_inlined_function();
3851
3852 return true;
3853 }
3854
3855
3856
LLVMGEN(llvm_gen_return)3857 LLVMGEN (llvm_gen_return)
3858 {
3859 Opcode &op (rop.inst()->ops()[opnum]);
3860 OSL_DASSERT (op.nargs() == 0);
3861 if (op.opname() == Strings::op_exit) {
3862 // If it's a real "exit", totally jump out of the shader instance.
3863 // The exit instance block will be created if it doesn't yet exist.
3864 rop.ll.op_branch (rop.llvm_exit_instance_block());
3865 } else {
3866 // If it's a "return", jump to the exit point of the function.
3867 rop.ll.op_branch (rop.ll.return_block());
3868 }
3869 llvm::BasicBlock* next_block = rop.ll.new_basic_block ("");
3870 rop.ll.set_insert_point (next_block);
3871 return true;
3872 }
3873
3874
3875
3876 OSL_PRAGMA_WARNING_PUSH
3877 OSL_GCC_PRAGMA(GCC diagnostic ignored "-Wunused-parameter")
3878
LLVMGEN(llvm_gen_end)3879 LLVMGEN (llvm_gen_end)
3880 {
3881 // Dummy routine needed only for the op_descriptor table
3882 return false;
3883 }
3884
3885 OSL_PRAGMA_WARNING_POP
3886
3887
3888 }; // namespace pvt
3889 OSL_NAMESPACE_EXIT
3890