1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4 
5 #include <vector>
6 #include <string>
7 #include <cstdio>
8 #include <fstream>
9 #include <cstdlib>
10 #include <mutex>
11 
12 #include "oslexec_pvt.h"
13 #include <OSL/genclosure.h>
14 #include "backendllvm.h"
15 #include <OSL/oslquery.h>
16 
17 #include <OpenImageIO/filesystem.h>
18 #include <OpenImageIO/fmath.h>
19 #include <OpenImageIO/optparser.h>
20 #include <OpenImageIO/strutil.h>
21 #include <OpenImageIO/sysutil.h>
22 #include <OpenImageIO/thread.h>
23 #include <OpenImageIO/timer.h>
24 
25 #include "opcolor.h"
26 
27 using namespace OSL;
28 using namespace OSL::pvt;
29 
30 #include <OpenEXR/ImfChannelList.h>  // Just for OPENEXR_VERSION_STRING
31 
32 // avoid naming conflicts with MSVC macros
33 #ifdef _MSC_VER
34  #undef RGB
35  // We use some of the iso646.h macro names later on in this file. For
36  // some compilers (MSVS, I'm looking at you) this is trouble. I don't know
37  // how or why that header would have been included here, but it did for at
38  // least one person, so shut off those macros so they don't cause trouble.
39  #undef and
40  #undef or
41  #undef xor
42  #undef compl
43  #undef bitand
44  #undef bitor
45 #endif
46 
47 OSL_NAMESPACE_ENTER
48 
49 
50 
ShadingSystem(RendererServices * renderer,TextureSystem * texturesystem,ErrorHandler * err)51 ShadingSystem::ShadingSystem (RendererServices *renderer,
52                               TextureSystem *texturesystem,
53                               ErrorHandler *err)
54     : m_impl (NULL)
55 {
56     if (! err) {
57         err = & ErrorHandler::default_handler ();
58     }
59     m_impl = new ShadingSystemImpl (renderer, texturesystem, err);
60 #ifndef NDEBUG
61     err->infof("creating new ShadingSystem %p", (void *)this);
62 #endif
63 }
64 
65 
66 
~ShadingSystem()67 ShadingSystem::~ShadingSystem ()
68 {
69     delete m_impl;
70 }
71 
72 
73 
74 bool
attribute(string_view name,TypeDesc type,const void * val)75 ShadingSystem::attribute (string_view name, TypeDesc type, const void *val)
76 {
77     return m_impl->attribute (name, type, val);
78 }
79 
80 
81 
82 bool
attribute(ShaderGroup * group,string_view name,TypeDesc type,const void * val)83 ShadingSystem::attribute (ShaderGroup *group, string_view name,
84                           TypeDesc type, const void *val)
85 {
86     return m_impl->attribute (group, name, type, val);
87 }
88 
89 
90 
91 bool
getattribute(string_view name,TypeDesc type,void * val)92 ShadingSystem::getattribute (string_view name, TypeDesc type, void *val)
93 {
94     return m_impl->getattribute (name, type, val);
95 }
96 
97 
98 
99 bool
getattribute(ShaderGroup * group,string_view name,TypeDesc type,void * val)100 ShadingSystem::getattribute (ShaderGroup *group, string_view name,
101                              TypeDesc type, void *val)
102 {
103     return m_impl->getattribute (group, name, type, val);
104 }
105 
106 
107 
108 bool
LoadMemoryCompiledShader(string_view shadername,string_view buffer)109 ShadingSystem::LoadMemoryCompiledShader (string_view shadername,
110                                          string_view buffer)
111 {
112     return m_impl->LoadMemoryCompiledShader (shadername, buffer);
113 }
114 
115 
116 
117 ShaderGroupRef
ShaderGroupBegin(string_view groupname)118 ShadingSystem::ShaderGroupBegin (string_view groupname)
119 {
120     return m_impl->ShaderGroupBegin (groupname);
121 }
122 
123 
124 
125 ShaderGroupRef
ShaderGroupBegin(string_view groupname,string_view usage,string_view groupspec)126 ShadingSystem::ShaderGroupBegin (string_view groupname, string_view usage,
127                                  string_view groupspec)
128 {
129     return m_impl->ShaderGroupBegin (groupname, usage, groupspec);
130 }
131 
132 
133 
134 bool
ShaderGroupEnd(ShaderGroup & group)135 ShadingSystem::ShaderGroupEnd (ShaderGroup& group)
136 {
137     return m_impl->ShaderGroupEnd(group);
138 }
139 
140 
141 bool
ShaderGroupEnd(void)142 ShadingSystem::ShaderGroupEnd (void)
143 {
144     return m_impl->ShaderGroupEnd();
145 }
146 
147 
148 
149 bool
Parameter(ShaderGroup & group,string_view name,TypeDesc t,const void * val,bool lockgeom)150 ShadingSystem::Parameter (ShaderGroup& group, string_view name, TypeDesc t,
151                           const void *val, bool lockgeom)
152 {
153     return m_impl->Parameter (group, name, t, val, lockgeom);
154 }
155 
156 
157 
158 bool
Parameter(string_view name,TypeDesc t,const void * val,bool lockgeom)159 ShadingSystem::Parameter (string_view name, TypeDesc t, const void *val,
160                           bool lockgeom)
161 {
162     return m_impl->Parameter (name, t, val, lockgeom);
163 }
164 
165 
166 
167 bool
Shader(ShaderGroup & group,string_view shaderusage,string_view shadername,string_view layername)168 ShadingSystem::Shader (ShaderGroup& group, string_view shaderusage,
169                        string_view shadername, string_view layername)
170 {
171     return m_impl->Shader (group, shaderusage, shadername, layername);
172 }
173 
174 
175 
176 bool
Shader(string_view shaderusage,string_view shadername,string_view layername)177 ShadingSystem::Shader (string_view shaderusage, string_view shadername,
178                        string_view layername)
179 {
180     return m_impl->Shader (shaderusage, shadername, layername);
181 }
182 
183 
184 
185 bool
ConnectShaders(ShaderGroup & group,string_view srclayer,string_view srcparam,string_view dstlayer,string_view dstparam)186 ShadingSystem::ConnectShaders (ShaderGroup& group,
187                                string_view srclayer, string_view srcparam,
188                                string_view dstlayer, string_view dstparam)
189 {
190     return m_impl->ConnectShaders (group, srclayer, srcparam,
191                                    dstlayer, dstparam);
192 }
193 
194 
195 
196 bool
ConnectShaders(string_view srclayer,string_view srcparam,string_view dstlayer,string_view dstparam)197 ShadingSystem::ConnectShaders (string_view srclayer, string_view srcparam,
198                                string_view dstlayer, string_view dstparam)
199 {
200     return m_impl->ConnectShaders (srclayer, srcparam, dstlayer, dstparam);
201 }
202 
203 
204 
205 bool
ReParameter(ShaderGroup & group,string_view layername,string_view paramname,TypeDesc type,const void * val)206 ShadingSystem::ReParameter (ShaderGroup &group, string_view layername,
207                             string_view paramname, TypeDesc type,
208                             const void *val)
209 {
210     return m_impl->ReParameter (group, layername, paramname, type, val);
211 }
212 
213 
214 
215 PerThreadInfo *
create_thread_info()216 ShadingSystem::create_thread_info ()
217 {
218     return m_impl->create_thread_info();
219 }
220 
221 
222 
223 void
destroy_thread_info(PerThreadInfo * threadinfo)224 ShadingSystem::destroy_thread_info (PerThreadInfo *threadinfo)
225 {
226     return m_impl->destroy_thread_info (threadinfo);
227 }
228 
229 
230 
231 ShadingContext *
get_context(PerThreadInfo * threadinfo,TextureSystem::Perthread * texture_threadinfo)232 ShadingSystem::get_context (PerThreadInfo *threadinfo,
233                             TextureSystem::Perthread *texture_threadinfo)
234 {
235     return m_impl->get_context (threadinfo, texture_threadinfo);
236 }
237 
238 
239 
240 void
release_context(ShadingContext * ctx)241 ShadingSystem::release_context (ShadingContext *ctx)
242 {
243     return m_impl->release_context (ctx);
244 }
245 
246 
247 
248 bool
execute(ShadingContext & ctx,ShaderGroup & group,ShaderGlobals & globals,bool run)249 ShadingSystem::execute (ShadingContext &ctx, ShaderGroup &group,
250                         ShaderGlobals &globals, bool run)
251 {
252     return m_impl->execute (ctx, group, globals, run);
253 }
254 
255 
256 
257 // DEPRECATED(2.0)
258 bool
execute(ShadingContext * ctx,ShaderGroup & group,ShaderGlobals & globals,bool run)259 ShadingSystem::execute (ShadingContext *ctx, ShaderGroup &group,
260                         ShaderGlobals &globals, bool run)
261 {
262     return m_impl->execute (ctx, group, globals, run);
263 }
264 
265 
266 
267 bool
execute_init(ShadingContext & ctx,ShaderGroup & group,ShaderGlobals & globals,bool run)268 ShadingSystem::execute_init (ShadingContext &ctx, ShaderGroup &group,
269                              ShaderGlobals &globals, bool run)
270 {
271     return ctx.execute_init (group, globals, run);
272 }
273 
274 
275 
276 bool
execute_layer(ShadingContext & ctx,ShaderGlobals & globals,int layernumber)277 ShadingSystem::execute_layer (ShadingContext &ctx, ShaderGlobals &globals,
278                               int layernumber)
279 {
280     return ctx.execute_layer (globals, layernumber);
281 }
282 
283 
284 
285 bool
execute_layer(ShadingContext & ctx,ShaderGlobals & globals,ustring layername)286 ShadingSystem::execute_layer (ShadingContext &ctx, ShaderGlobals &globals,
287                               ustring layername)
288 {
289     int layernumber = find_layer (*ctx.group(), layername);
290     return layernumber >= 0 ? ctx.execute_layer (globals, layernumber) : false;
291 }
292 
293 
294 
295 bool
execute_layer(ShadingContext & ctx,ShaderGlobals & globals,const ShaderSymbol * symbol)296 ShadingSystem::execute_layer (ShadingContext &ctx, ShaderGlobals &globals,
297                               const ShaderSymbol *symbol)
298 {
299     if (! symbol)
300         return false;
301     const Symbol *sym = reinterpret_cast<const Symbol *>(symbol);
302     int layernumber = sym->layer();
303     return layernumber >= 0 ? ctx.execute_layer (globals, layernumber) : false;
304 }
305 
306 
307 
308 bool
execute_cleanup(ShadingContext & ctx)309 ShadingSystem::execute_cleanup (ShadingContext &ctx)
310 {
311     return ctx.execute_cleanup ();
312 }
313 
314 
315 
316 int
find_layer(const ShaderGroup & group,ustring layername) const317 ShadingSystem::find_layer (const ShaderGroup &group, ustring layername) const
318 {
319     return group.find_layer (layername);
320 }
321 
322 
323 
324 const void*
get_symbol(const ShadingContext & ctx,ustring layername,ustring symbolname,TypeDesc & type) const325 ShadingSystem::get_symbol (const ShadingContext &ctx, ustring layername,
326                            ustring symbolname, TypeDesc &type) const
327 {
328     const ShaderSymbol *sym = find_symbol (*ctx.group(), layername,
329                                            symbolname);
330     if (sym) {
331         type = symbol_typedesc (sym);
332         return symbol_address (ctx, sym);
333     }
334     return NULL;
335 }
336 
337 
338 
339 const void*
get_symbol(const ShadingContext & ctx,ustring symbolname,TypeDesc & type) const340 ShadingSystem::get_symbol (const ShadingContext &ctx,
341                            ustring symbolname, TypeDesc &type) const
342 {
343     ustring layername;
344     size_t dot = symbolname.find('.');
345     if (dot != ustring::npos) {
346         // If the name contains a dot, it's intended to be layer.symbol
347         layername = ustring (symbolname, 0, dot);
348         symbolname = ustring (symbolname, dot+1);
349     }
350     return get_symbol (ctx, layername, symbolname, type);
351 }
352 
353 
354 
355 const ShaderSymbol*
find_symbol(const ShaderGroup & group,ustring layername,ustring symbolname) const356 ShadingSystem::find_symbol (const ShaderGroup &group, ustring layername,
357                             ustring symbolname) const
358 {
359     if (! group.optimized())
360         return NULL;   // has to be post-optimized
361     return (const ShaderSymbol *) group.find_symbol (layername, symbolname);
362 }
363 
364 
365 
366 const ShaderSymbol*
find_symbol(const ShaderGroup & group,ustring symbolname) const367 ShadingSystem::find_symbol (const ShaderGroup &group, ustring symbolname) const
368 {
369     ustring layername;
370     size_t dot = symbolname.find('.');
371     if (dot != ustring::npos) {
372         // If the name contains a dot, it's intended to be layer.symbol
373         layername = ustring (symbolname, 0, dot);
374         symbolname = ustring (symbolname, dot+1);
375     }
376     return find_symbol (group, layername, symbolname);
377 }
378 
379 
380 
381 TypeDesc
symbol_typedesc(const ShaderSymbol * sym) const382 ShadingSystem::symbol_typedesc (const ShaderSymbol *sym) const
383 {
384     return sym ? ((const Symbol *)sym)->typespec().simpletype() : TypeDesc();
385 }
386 
387 
388 
389 const void*
symbol_address(const ShadingContext & ctx,const ShaderSymbol * sym) const390 ShadingSystem::symbol_address (const ShadingContext &ctx,
391                                const ShaderSymbol *sym) const
392 {
393     OSL_DASSERT(sym != nullptr);
394     return ctx.symbol_data (*(const Symbol *)sym);
395 }
396 
397 
398 
399 std::string
getstats(int level) const400 ShadingSystem::getstats (int level) const
401 {
402     return m_impl->getstats (level);
403 }
404 
405 
406 
407 void
register_closure(string_view name,int id,const ClosureParam * params,PrepareClosureFunc prepare,SetupClosureFunc setup)408 ShadingSystem::register_closure (string_view name, int id,
409                                  const ClosureParam *params,
410                                  PrepareClosureFunc prepare,
411                                  SetupClosureFunc setup)
412 {
413     return m_impl->register_closure (name, id, params, prepare, setup);
414 }
415 
416 
417 
418 bool
query_closure(const char ** name,int * id,const ClosureParam ** params)419 ShadingSystem::query_closure (const char **name, int *id,
420                               const ClosureParam **params)
421 {
422     return m_impl->query_closure (name, id, params);
423 }
424 
425 
426 
427 static cspan< std::pair<ustring,SGBits> >
sgbit_table()428 sgbit_table ()
429 {
430     static const std::pair<ustring,SGBits> table[] = {
431         { ustring("P"),       SGBits::P },
432         { ustring("I"),       SGBits::I },
433         { ustring("N"),       SGBits::N },
434         { ustring("Ng"),      SGBits::Ng },
435         { ustring("u"),       SGBits::u },
436         { ustring("v"),       SGBits::v },
437         { ustring("dPdu"),    SGBits::dPdu },
438         { ustring("dPdv"),    SGBits::dPdv },
439         { ustring("time"),    SGBits::time },
440         { ustring("dtime"),   SGBits::dtime },
441         { ustring("dPdtime"), SGBits::dPdtime },
442         { ustring("Ps"),      SGBits::Ps },
443         { ustring("Ci"),      SGBits::Ci }
444     };
445     return cspan<std::pair<ustring,SGBits>>(table);
446 }
447 
448 
449 
450 SGBits
globals_bit(ustring name)451 ShadingSystem::globals_bit (ustring name)
452 {
453     for (auto t : sgbit_table()) {
454         if (name == t.first)
455             return t.second;
456     }
457     return SGBits::None;
458 }
459 
460 
461 
462 ustring
globals_name(SGBits bit)463 ShadingSystem::globals_name (SGBits bit)
464 {
465     for (auto t : sgbit_table()) {
466         if (bit == t.second)
467             return t.first;
468     }
469     return ustring();
470 }
471 
472 
473 
474 int
raytype_bit(ustring name)475 ShadingSystem::raytype_bit (ustring name)
476 {
477     return m_impl->raytype_bit (name);
478 }
479 
480 
481 
482 void
optimize_all_groups(int nthreads,bool do_jit)483 ShadingSystem::optimize_all_groups (int nthreads, bool do_jit)
484 {
485     return m_impl->optimize_all_groups (nthreads, 0 /*mythread*/, 1 /*totalthreads*/, do_jit);
486 }
487 
488 
489 
490 TextureSystem *
texturesys() const491 ShadingSystem::texturesys () const
492 {
493     return m_impl->texturesys();
494 }
495 
496 
497 
498 RendererServices *
renderer() const499 ShadingSystem::renderer () const
500 {
501     return m_impl->renderer();
502 }
503 
504 
505 
506 bool
archive_shadergroup(ShaderGroup * group,string_view filename)507 ShadingSystem::archive_shadergroup (ShaderGroup *group, string_view filename)
508 {
509     if (!group) {
510         m_impl->error ("archive_shadergroup: passed nullptr as group");
511         return false;
512     }
513     return m_impl->archive_shadergroup (*group, filename);
514 }
515 
516 
517 bool
archive_shadergroup(ShaderGroup & group,string_view filename)518 ShadingSystem::archive_shadergroup (ShaderGroup& group, string_view filename)
519 {
520     return m_impl->archive_shadergroup (group, filename);
521 }
522 
523 
524 void
set_raytypes(ShaderGroup * group,int raytypes_on,int raytypes_off)525 ShadingSystem::set_raytypes (ShaderGroup *group, int raytypes_on, int raytypes_off)
526 {
527     if (group)
528         group->set_raytypes(raytypes_on, raytypes_off);
529 }
530 
531 
532 void
optimize_group(ShaderGroup * group,ShadingContext * ctx,bool do_jit)533 ShadingSystem::optimize_group (ShaderGroup *group, ShadingContext *ctx, bool do_jit)
534 {
535     if (group)
536         m_impl->optimize_group (*group, ctx, do_jit);
537 }
538 
539 
540 
541 void
optimize_group(ShaderGroup * group,int raytypes_on,int raytypes_off,ShadingContext * ctx,bool do_jit)542 ShadingSystem::optimize_group (ShaderGroup *group,
543                                int raytypes_on, int raytypes_off,
544                                ShadingContext *ctx,
545                                bool do_jit)
546 {
547     // convenience function for backwards compatibility
548     set_raytypes (group, raytypes_on, raytypes_off);
549     optimize_group (group, ctx, do_jit);
550 }
551 
552 
553 
554 static TypeDesc TypeFloatArray2 (TypeDesc::FLOAT, 2);
555 static TypeDesc TypeFloatArray3 (TypeDesc::FLOAT, 3);
556 static TypeDesc TypeFloatArray4 (TypeDesc::FLOAT, 4);
557 
558 
559 
560 bool
convert_value(void * dst,TypeDesc dsttype,const void * src,TypeDesc srctype)561 ShadingSystem::convert_value (void *dst, TypeDesc dsttype,
562                               const void *src, TypeDesc srctype)
563 {
564     int tmp_int;
565     if (srctype == TypeDesc::UINT8) {
566         // uint8 src: Up-convert the source to int
567         if (src) {
568             tmp_int = *(const unsigned char *)src;
569             src = &tmp_int;
570         }
571         srctype = TypeDesc::TypeInt;
572     }
573 
574     float tmp_float;
575     if (srctype == TypeDesc::TypeInt && dsttype.basetype == TypeDesc::FLOAT) {
576         // int -> float-based : up-convert the source to float
577         if (src) {
578             tmp_float = (float) (*(const int *)src);
579             src = &tmp_float;
580         }
581         srctype = TypeDesc::TypeFloat;
582     }
583 
584     // Just copy equivalent types
585     if (equivalent (dsttype, srctype)) {
586         if (dst && src)
587             memmove (dst, src, dsttype.size());
588         return true;
589     }
590 
591     if (srctype == TypeDesc::TypeFloat) {
592         // float->triple conversion
593         if (equivalent(dsttype, TypeDesc::TypePoint)) {
594             if (dst && src) {
595                 float f = *(const float *)src;
596                 ((OSL::Vec3 *)dst)->setValue (f, f, f);
597             }
598             return true;
599         }
600         // float->int
601         if (dsttype == TypeDesc::TypeInt) {
602             if (dst && src)
603                 *(int *)dst = (int) *(const float *)src;
604             return true;
605         }
606         // float->float[2]
607         if (dsttype == TypeFloatArray2) {
608             if (dst && src) {
609                 float f = *(const float *)src;
610                 ((float *)dst)[0] = f;
611                 ((float *)dst)[1] = f;
612             }
613             return true;
614         }
615         // float->float[4]
616         if (dsttype == TypeFloatArray4) {
617             if (dst && src) {
618                 float f = *(const float *)src;
619                 ((float *)dst)[0] = f;
620                 ((float *)dst)[1] = f;
621                 ((float *)dst)[2] = f;
622                 ((float *)dst)[3] = f;
623             }
624             return true;
625         }
626         return false; // Unsupported conversion
627     }
628 
629     // float[3] -> triple
630     if ((srctype == TypeFloatArray3 && equivalent(dsttype, TypeDesc::TypePoint)) ||
631         (dsttype == TypeFloatArray3 && equivalent(srctype, TypeDesc::TypePoint))) {
632         if (dst && src)
633             memmove (dst, src, dsttype.size());
634         return true;
635     }
636 
637     // float[4] -> vec4
638     if ((srctype == TypeFloatArray4 && equivalent(dsttype, TypeDesc::TypeFloat4)) ||
639         (dsttype == TypeFloatArray4 && equivalent(srctype, TypeDesc::TypeFloat4))) {
640         if (dst && src)
641             memmove (dst, src, dsttype.size());
642         return true;
643     }
644 
645     // float[2] -> triple
646     if (srctype == TypeFloatArray2 && equivalent(dsttype, TypeDesc::TypePoint)) {
647         if (dst && src) {
648             float f0 = ((const float *)src)[0];
649             float f1 = ((const float *)src)[1];
650             ((OSL::Vec3 *)dst)->setValue (f0, f1, 0.0f);
651         }
652         return true;
653     }
654 
655     return false;   // Unsupported conversion
656 }
657 
658 
659 
PerThreadInfo()660 PerThreadInfo::PerThreadInfo ()
661 {
662 }
663 
664 
665 
~PerThreadInfo()666 PerThreadInfo::~PerThreadInfo ()
667 {
668     while (! context_pool.empty())
669         delete pop_context ();
670 }
671 
672 
673 
674 ShadingContext *
pop_context()675 PerThreadInfo::pop_context ()
676 {
677     ShadingContext *sc = context_pool.top ();
678     context_pool.pop ();
679     return sc;
680 }
681 
682 
683 
684 
685 
686 namespace Strings {
687 #define STRDECL(str,var_name) const ustring var_name(str);
688 #include <OSL/strdecls.h>
689 #undef STRDECL
690 }
691 
692 
693 
694 namespace pvt {   // OSL::pvt
695 
696 
ShadingSystemImpl(RendererServices * renderer,TextureSystem * texturesystem,ErrorHandler * err)697 ShadingSystemImpl::ShadingSystemImpl (RendererServices *renderer,
698                                       TextureSystem *texturesystem,
699                                       ErrorHandler *err)
700     : m_renderer(renderer), m_texturesys(texturesystem), m_err(err),
701       m_statslevel (0), m_lazylayers (true),
702       m_lazyglobals (true), m_lazyunconnected(true), m_lazyerror(true),
703       m_lazy_userdata(false), m_userdata_isconnected(false),
704       m_clearmemory (false), m_debugnan (false), m_debug_uninit(false),
705       m_lockgeom_default (true), m_strict_messages(true),
706       m_error_repeats(false),
707       m_range_checking(true),
708       m_unknown_coordsys_error(true), m_connection_error(true),
709       m_greedyjit(false), m_countlayerexecs(false),
710       m_relaxed_param_typecheck(false),
711       m_max_warnings_per_thread(100),
712       m_profile(0),
713       m_optimize(2),
714       m_opt_simplify_param(true), m_opt_constant_fold(true),
715       m_opt_stale_assign(true), m_opt_elide_useless_ops(true),
716       m_opt_elide_unconnected_outputs(true),
717       m_opt_peephole(true), m_opt_coalesce_temps(true),
718       m_opt_assign(true), m_opt_mix(true),
719       m_opt_merge_instances(1), m_opt_merge_instances_with_userdata(true),
720       m_opt_fold_getattribute(true),
721       m_opt_middleman(true), m_opt_texture_handle(true),
722       m_opt_seed_bblock_aliases(true),
723       m_llvm_jit_fma(false),
724       m_llvm_jit_aggressive(false),
725       m_optimize_nondebug(false),
726       m_vector_width(4),
727       m_opt_passes(10),
728       m_llvm_optimize(1),
729       m_debug(0), m_llvm_debug(0),
730       m_llvm_debug_layers(0), m_llvm_debug_ops(0),
731       m_llvm_target_host(1),
732       m_llvm_debugging_symbols(0),
733       m_llvm_profiling_events(0),
734       m_llvm_output_bitcode(0),
735       m_llvm_dumpasm(0),
736       m_commonspace_synonym("world"),
737       m_max_local_mem_KB(2048),
738       m_compile_report(false),
739       m_buffer_printf(true),
740       m_no_noise(false),
741       m_no_pointcloud(false),
742       m_force_derivs(false),
743       m_allow_shader_replacement(false),
744       m_exec_repeat(1),
745       m_opt_warnings(0),
746       m_gpu_opt_error(0),
747       m_colorspace("Rec709"),
748       m_stat_opt_locking_time(0), m_stat_specialization_time(0),
749       m_stat_total_llvm_time(0),
750       m_stat_llvm_setup_time(0), m_stat_llvm_irgen_time(0),
751       m_stat_llvm_opt_time(0), m_stat_llvm_jit_time(0),
752       m_stat_inst_merge_time(0),
753       m_stat_max_llvm_local_mem(0)
754 {
755     m_stat_shaders_loaded = 0;
756     m_stat_shaders_requested = 0;
757     m_stat_groups = 0;
758     m_stat_groupinstances = 0;
759     m_stat_instances_compiled = 0;
760     m_stat_groups_compiled = 0;
761     m_stat_empty_instances = 0;
762     m_stat_merged_inst = 0;
763     m_stat_merged_inst_opt = 0;
764     m_stat_empty_groups = 0;
765     m_stat_regexes = 0;
766     m_stat_preopt_syms = 0;
767     m_stat_postopt_syms = 0;
768     m_stat_syms_with_derivs = 0;
769     m_stat_preopt_ops = 0;
770     m_stat_postopt_ops = 0;
771     m_stat_middlemen_eliminated = 0;
772     m_stat_const_connections = 0;
773     m_stat_global_connections = 0;
774     m_stat_tex_calls_codegened = 0;
775     m_stat_tex_calls_as_handles = 0;
776     m_stat_master_load_time = 0;
777     m_stat_optimization_time = 0;
778     m_stat_getattribute_time = 0;
779     m_stat_getattribute_fail_time = 0;
780     m_stat_getattribute_calls = 0;
781     m_stat_get_userdata_calls = 0;
782     m_stat_noise_calls = 0;
783     m_stat_pointcloud_searches = 0;
784     m_stat_pointcloud_searches_total_results = 0;
785     m_stat_pointcloud_max_results = 0;
786     m_stat_pointcloud_failures = 0;
787     m_stat_pointcloud_gets = 0;
788     m_stat_pointcloud_writes = 0;
789     m_stat_layers_executed = 0;
790     m_stat_total_shading_time_ticks = 0;
791 
792     m_groups_to_compile_count = 0;
793     m_threads_currently_compiling = 0;
794 
795     // If client didn't supply an error handler, just use the default
796     // one that echoes to the terminal.
797     if (! m_err) {
798         m_err = & ErrorHandler::default_handler ();
799     }
800 
801     // If client didn't supply a texture system, use the one already held
802     // by the renderer (if it returns one).
803     if (! m_texturesys)
804         m_texturesys = renderer->texturesys();
805 
806     // If we still don't have a texture system, create a new one
807     if (! m_texturesys) {
808 #if OSL_NO_DEFAULT_TEXTURESYSTEM
809         // This build option instructs OSL to never create a TextureSystem
810         // itself. (Most likely reason: this build of OSL is for a renderer
811         // that replaces OIIO's TextureSystem with its own, and therefore
812         // wouldn't want to accidentally make an OIIO one here.
813         OSL_ASSERT (0 && "ShadingSystem was not passed a working TextureSystem*");
814 #else
815         m_texturesys = TextureSystem::create (true /* shared */);
816         // Make some good guesses about default options
817         m_texturesys->attribute ("automip",  1);
818         m_texturesys->attribute ("autotile", 64);
819 #endif
820     }
821 
822     // Alternate way of turning on LLVM debug mode (temporary/experimental)
823     const char *llvm_debug_env = getenv ("OSL_LLVM_DEBUG");
824     if (llvm_debug_env && *llvm_debug_env)
825         m_llvm_debug = atoi(llvm_debug_env);
826 
827     // Initialize a default set of raytype names.  A particular renderer
828     // can override this, add custom names, or change the bits around,
829     // if this default ordering is not to its liking.
830     static const char *raytypes[] = {
831         /*1*/ "camera", /*2*/ "shadow", /*4*/ "reflection", /*8*/ "refraction",
832         /*16*/ "diffuse", /*32*/ "glossy", /*64*/ "subsurface",
833         /*128*/ "displacement"
834     };
835     const int nraytypes = sizeof(raytypes)/sizeof(raytypes[0]);
836     attribute ("raytypes", TypeDesc(TypeDesc::STRING,nraytypes), raytypes);
837 
838     // Allow environment variable to override default options
839     const char *options = getenv ("OSL_OPTIONS");
840     if (options)
841         attribute ("options", TypeDesc::STRING, &options);
842 
843     setup_op_descriptors ();
844 
845     colorsystem().set_colorspace(m_colorspace);
846 }
847 
848 
849 
850 static void
shading_system_setup_op_descriptors(ShadingSystemImpl::OpDescriptorMap & op_descriptor)851 shading_system_setup_op_descriptors (ShadingSystemImpl::OpDescriptorMap& op_descriptor)
852 {
853 #define OP2(alias,name,ll,fold,simp,flag)                                \
854     extern bool llvm_gen_##ll (BackendLLVM &rop, int opnum);             \
855     extern int  constfold_##fold (RuntimeOptimizer &rop, int opnum);     \
856     op_descriptor[ustring(#alias)] = OpDescriptor(#name, llvm_gen_##ll,  \
857                                                   constfold_##fold, simp, flag);
858 #define OP(name,ll,fold,simp,flag) OP2(name,name,ll,fold,simp,flag)
859 #define TEX OpDescriptor::Tex
860 #define SIDE OpDescriptor::SideEffects
861 
862     // name          llvmgen              folder         simple     flags
863     OP (aassign,     aassign,             aassign,       false,     0);
864     OP (abs,         generic,             abs,           true,      0);
865     OP (acos,        generic,             acos,          true,      0);
866     OP (add,         add,                 add,           true,      0);
867     OP (and,         andor,               and,           true,      0);
868     OP (area,        area,                deriv,         true,      0);
869     OP (aref,        aref,                aref,          true,      0);
870     OP (arraycopy,   arraycopy,           none,          false,     0);
871     OP (arraylength, arraylength,         arraylength,   true,      0);
872     OP (asin,        generic,             asin,          true,      0);
873     OP (assign,      assign,              none,          true,      0);
874     OP (atan,        generic,             none,          true,      0);
875     OP (atan2,       generic,             none,          true,      0);
876     OP (backfacing,  get_simple_SG_field, none,          true,      0);
877     OP (bitand,      bitwise_binary_op,   bitand,        true,      0);
878     OP (bitor,       bitwise_binary_op,   bitor,         true,      0);
879     OP (blackbody,   blackbody,           none,          true,      0);
880     OP (break,       loopmod_op,          none,          false,     0);
881     OP (calculatenormal, calculatenormal, none,          true,      0);
882     OP (cbrt,        generic,             cbrt,          true,      0);
883     OP (ceil,        generic,             ceil,          true,      0);
884     OP (cellnoise,   noise,               noise,         true,      0);
885     OP (clamp,       clamp,               clamp,         true,      0);
886     OP (closure,     closure,             none,          true,      0);
887     OP (color,       construct_color,     triple,        true,      0);
888     OP (compassign,  compassign,          compassign,    false,     0);
889     OP (compl,       unary_op,            compl,         true,      0);
890     OP (compref,     compref,             compref,       true,      0);
891     OP (concat,      generic,             concat,        true,      0);
892     OP (continue,    loopmod_op,          none,          false,     0);
893     OP (cos,         generic,             cos,           true,      0);
894     OP (cosh,        generic,             none,          true,      0);
895     OP (cross,       generic,             none,          true,      0);
896     OP (degrees,     generic,             degrees,       true,      0);
897     OP (determinant, generic,             none,          true,      0);
898     OP (dict_find,   dict_find,           none,          false,     0);
899     OP (dict_next,   dict_next,           none,          false,     0);
900     OP (dict_value,  dict_value,          none,          false,     0);
901     OP (distance,    generic,             none,          true,      0);
902     OP (div,         div,                 div,           true,      0);
903     OP (dot,         generic,             dot,           true,      0);
904     OP (Dx,          DxDy,                deriv,         true,      0);
905     OP (Dy,          DxDy,                deriv,         true,      0);
906     OP (Dz,          Dz,                  deriv,         true,      0);
907     OP (dowhile,     loop_op,             none,          false,     0);
908     OP (end,         end,                 none,          false,     0);
909     OP (endswith,    generic,             endswith,      true,      0);
910     OP (environment, environment,         none,          true,      TEX);
911     OP (eq,          compare_op,          eq,            true,      0);
912     OP (erf,         generic,             erf,           true,      0);
913     OP (erfc,        generic,             erfc,          true,      0);
914     OP (error,       printf,              none,          false,     SIDE);
915     OP (exit,        return,              none,          false,     0);
916     OP (exp,         generic,             exp,           true,      0);
917     OP (exp2,        generic,             exp2,          true,      0);
918     OP (expm1,       generic,             expm1,         true,      0);
919     OP (fabs,        generic,             abs,           true,      0);
920     OP (filterwidth, filterwidth,         deriv,         true,      0);
921     OP (floor,       generic,             floor,         true,      0);
922     OP (fmod,        modulus,             none,          true,      0);
923     OP (for,         loop_op,             none,          false,     0);
924     OP (format,      printf,              format,        true,      0);
925     OP (fprintf,     printf,              none,          false,     SIDE);
926     OP (functioncall, functioncall,       functioncall,  false,     0);
927     OP (functioncall_nr,functioncall_nr,  none,          false,     0);
928     OP (ge,          compare_op,          ge,            true,      0);
929     OP (getattribute, getattribute,       getattribute,  false,     0);
930     OP (getchar,      generic,            getchar,       true,      0);
931     OP (getmatrix,   getmatrix,           getmatrix,     false,     0);
932     OP (getmessage,  getmessage,          getmessage,    false,     0);
933     OP (gettextureinfo, gettextureinfo,   gettextureinfo,false,     TEX);
934     OP (gt,          compare_op,          gt,            true,      0);
935     OP (hash,        generic,             hash,          true,      0);
936     OP (hashnoise,   noise,               noise,         true,      0);
937     OP (if,          if,                  if,            false,     0);
938     OP (inversesqrt, generic,             inversesqrt,   true,      0);
939     OP (isconnected, generic,             none,          true,      0);
940     OP (isconstant,  isconstant,          isconstant,    true,      0);
941     OP (isfinite,    generic,             none,          true,      0);
942     OP (isinf,       generic,             none,          true,      0);
943     OP (isnan,       generic,             none,          true,      0);
944     OP (le,          compare_op,          le,            true,      0);
945     OP (length,      generic,             none,          true,      0);
946     OP (log,         generic,             log,           true,      0);
947     OP (log10,       generic,             log10,         true,      0);
948     OP (log2,        generic,             log2,          true,      0);
949     OP (logb,        generic,             logb,          true,      0);
950     OP (lt,          compare_op,          lt,            true,      0);
951     OP (luminance,   luminance,           none,          true,      0);
952     OP (matrix,      matrix,              matrix,        true,      0);
953     OP (max,         minmax,              max,           true,      0);
954     OP (mxcompassign, mxcompassign,       mxcompassign,  false,     0);
955     OP (mxcompref,   mxcompref,           none,          true,      0);
956     OP (min,         minmax,              min,           true,      0);
957     OP (mix,         mix,                 mix,           true,      0);
958     OP (mod,         modulus,             mod,           true,      0);
959     OP (mul,         mul,                 mul,           true,      0);
960     OP (neg,         neg,                 neg,           true,      0);
961     OP (neq,         compare_op,          neq,           true,      0);
962     OP (noise,       noise,               noise,         true,      0);
963     OP (nop,         nop,                 none,          true,      0);
964     OP (normal,      construct_triple,    triple,        true,      0);
965     OP (normalize,   generic,             normalize,     true,      0);
966     OP (or,          andor,               or,            true,      0);
967     OP (pnoise,      noise,               noise,         true,      0);
968     OP (point,       construct_triple,    triple,        true,      0);
969     OP (pointcloud_search, pointcloud_search, pointcloud_search,
970                                                          false,     TEX);
971     OP (pointcloud_get, pointcloud_get,   pointcloud_get,false,     TEX);
972     OP (pointcloud_write, pointcloud_write, none,        false,     SIDE);
973     OP (pow,         generic,             pow,           true,      0);
974     OP (printf,      printf,              none,          false,     SIDE);
975     OP (psnoise,     noise,               noise,         true,      0);
976     OP (radians,     generic,             radians,       true,      0);
977     OP (raytype,     raytype,             raytype,       true,      0);
978     OP (regex_match, regex,               none,          false,     0);
979     OP (regex_search, regex,              regex_search,  false,     0);
980     OP (return,      return,              none,          false,     0);
981     OP (round,       generic,             none,          true,      0);
982     OP (select,      select,              select,        true,      0);
983     OP (setmessage,  setmessage,          setmessage,    false,     SIDE);
984     OP (shl,         bitwise_binary_op,   none,          true,      0);
985     OP (shr,         bitwise_binary_op,   none,          true,      0);
986     OP (sign,        generic,             none,          true,      0);
987     OP (sin,         generic,             sin,           true,      0);
988     OP (sincos,      sincos,              sincos,        false,     0);
989     OP (sinh,        generic,             none,          true,      0);
990     OP (smoothstep,  generic,             none,          true,      0);
991     OP (snoise,      noise,               noise,         true,      0);
992     OP (spline,      spline,              none,          true,      0);
993     OP (splineinverse, spline,            none,          true,      0);
994     OP (split,       split,               split,         false,     0);
995     OP (sqrt,        generic,             sqrt,          true,      0);
996     OP (startswith,  generic,             none,          true,      0);
997     OP (step,        generic,             none,          true,      0);
998     OP (stof,        generic,             stof,          true,      0);
999     OP (stoi,        generic,             stoi,          true,      0);
1000     OP (strlen,      generic,             strlen,        true,      0);
1001     OP2(strtof,stof, generic,             stof,          true,      0);
1002     OP2(strtoi,stoi, generic,             stoi,          true,      0);
1003     OP (sub,         sub,                 sub,           true,      0);
1004     OP (substr,      generic,             substr,        true,      0);
1005     OP (surfacearea, get_simple_SG_field, none,          true,      0);
1006     OP (tan,         generic,             none,          true,      0);
1007     OP (tanh,        generic,             none,          true,      0);
1008     OP (texture,     texture,             texture,       true,      TEX);
1009     OP (texture3d,   texture3d,           none,          true,      TEX);
1010     OP (trace,       trace,               none,          false,     SIDE);
1011     OP (transform,   transform,           transform,     true,      0);
1012     OP (transformc,  transformc,          transformc,    true,      0);
1013     OP (transformn,  transform,           transform,     true,      0);
1014     OP (transformv,  transform,           transform,     true,      0);
1015     OP (transpose,   generic,             none,          true,      0);
1016     OP (trunc,       generic,             none,          true,      0);
1017     OP (useparam,    useparam,            useparam,      false,     0);
1018     OP (vector,      construct_triple,    triple,        true,      0);
1019     OP (warning,     printf,              warning,       false,     SIDE);
1020     OP (wavelength_color, blackbody,      none,          true,      0);
1021     OP (while,       loop_op,             none,          false,     0);
1022     OP (xor,         bitwise_binary_op,   xor,           true,      0);
1023 #undef OP
1024 #undef TEX
1025 #undef SIDE
1026 }
1027 
1028 
1029 
1030 void
setup_op_descriptors()1031 ShadingSystemImpl::setup_op_descriptors ()
1032 {
1033     // This is not a class member function to avoid namespace issues
1034     // with function declarations in the function body, when building
1035     // with visual studio.
1036     shading_system_setup_op_descriptors(m_op_descriptor);
1037 }
1038 
1039 
1040 
1041 void
register_closure(string_view name,int id,const ClosureParam * params,PrepareClosureFunc prepare,SetupClosureFunc setup)1042 ShadingSystemImpl::register_closure (string_view name, int id,
1043                                      const ClosureParam *params,
1044                                      PrepareClosureFunc prepare,
1045                                      SetupClosureFunc setup)
1046 {
1047     for (int i = 0; params && params[i].type != TypeDesc(); ++i) {
1048         if (params[i].key == NULL && params[i].type.size() != (size_t)params[i].field_size) {
1049             errorf("Parameter %d of '%s' closure is assigned to a field of incompatible size", i + 1, name);
1050             return;
1051         }
1052     }
1053     m_closure_registry.register_closure(name, id, params, prepare, setup);
1054 }
1055 
1056 
1057 
1058 bool
query_closure(const char ** name,int * id,const ClosureParam ** params)1059 ShadingSystemImpl::query_closure(const char **name, int *id,
1060                                  const ClosureParam **params)
1061 {
1062     if (!name && !id)
1063         return false;
1064     const ClosureRegistry::ClosureEntry *entry =
1065         (name && *name) ? m_closure_registry.get_entry(ustring(*name))
1066                         : m_closure_registry.get_entry(*id);
1067     if (!entry)
1068         return false;
1069 
1070     if (name)
1071         *name   = entry->name.c_str();
1072     if (id)
1073         *id     = entry->id;
1074     if (params)
1075         *params = &entry->params[0];
1076 
1077     return true;
1078 }
1079 
1080 
1081 
~ShadingSystemImpl()1082 ShadingSystemImpl::~ShadingSystemImpl ()
1083 {
1084     size_t ngroups = m_all_shader_groups.size();
1085     for (size_t i = 0;  i < ngroups;  ++i) {
1086         if (ShaderGroupRef g = m_all_shader_groups[i].lock()) {
1087             if (!g->jitted() ) {
1088                 // As we are now lazier in jitting and need to keep the OSL IR
1089                 // around in case we want to create a batched JIT or vice versa
1090                 // we may have OSL IR to cleanup
1091                 group_post_jit_cleanup(*g);
1092             }
1093         }
1094     }
1095 
1096     printstats ();
1097     // N.B. just let m_texsys go -- if we asked for one to be created,
1098     // we asked for a shared one.
1099 
1100     // FIXME(boulos): According to the docs, we should also call
1101     // llvm_shutdown once we're done. However, ~ShadingSystemImpl
1102     // seems like the wrong place for this since in a multi-threaded
1103     // implementation we might destroy this impl while having others
1104     // outstanding. I'll leave this as a fixme for now.
1105 
1106     //llvm::llvm_shutdown();
1107 }
1108 
1109 
1110 
1111 bool
attribute(string_view name,TypeDesc type,const void * val)1112 ShadingSystemImpl::attribute (string_view name, TypeDesc type,
1113                               const void *val)
1114 {
1115 #define ATTR_SET(_name,_ctype,_dst)                                     \
1116     if (name == _name && type == OIIO::BaseTypeFromC<_ctype>::value) {  \
1117         _dst = *(_ctype *)(val);                                        \
1118         return true;                                                    \
1119     }
1120 #define ATTR_SET_STRING(_name,_dst)                                     \
1121     if (name == _name && type == TypeDesc::STRING) {                    \
1122         _dst = ustring (*(const char **)val);                           \
1123         return true;                                                    \
1124     }
1125 
1126     if (name == "options" && type == TypeDesc::STRING) {
1127         return OIIO::optparser (*this, *(const char **)val);
1128     }
1129 
1130     lock_guard guard (m_mutex);  // Thread safety
1131     ATTR_SET ("statistics:level", int, m_statslevel);
1132     ATTR_SET ("debug", int, m_debug);
1133     ATTR_SET ("lazylayers", int, m_lazylayers);
1134     ATTR_SET ("lazyglobals", int, m_lazyglobals);
1135     ATTR_SET ("lazyunconnected", int, m_lazyunconnected);
1136     ATTR_SET ("lazyerror", int, m_lazyerror);
1137     ATTR_SET ("lazy_userdata", int, m_lazy_userdata);
1138     ATTR_SET ("userdata_isconnected", int, m_userdata_isconnected);
1139     ATTR_SET ("clearmemory", int, m_clearmemory);
1140     ATTR_SET ("debug_nan", int, m_debugnan);
1141     ATTR_SET ("debugnan", int, m_debugnan);  // back-compatible alias
1142     ATTR_SET ("debug_uninit", int, m_debug_uninit);
1143     ATTR_SET ("lockgeom", int, m_lockgeom_default);
1144     ATTR_SET ("profile", int, m_profile);
1145     ATTR_SET ("optimize", int, m_optimize);
1146     ATTR_SET ("opt_simplify_param", int, m_opt_simplify_param);
1147     ATTR_SET ("opt_constant_fold", int, m_opt_constant_fold);
1148     ATTR_SET ("opt_stale_assign", int, m_opt_stale_assign);
1149     ATTR_SET ("opt_elide_useless_ops", int, m_opt_elide_useless_ops);
1150     ATTR_SET ("opt_elide_unconnected_outputs", int, m_opt_elide_unconnected_outputs);
1151     ATTR_SET ("opt_peephole", int, m_opt_peephole);
1152     ATTR_SET ("opt_coalesce_temps", int, m_opt_coalesce_temps);
1153     ATTR_SET ("opt_assign", int, m_opt_assign);
1154     ATTR_SET ("opt_mix", int, m_opt_mix);
1155     ATTR_SET ("opt_merge_instances", int, m_opt_merge_instances);
1156     ATTR_SET ("opt_merge_instances_with_userdata", int, m_opt_merge_instances_with_userdata);
1157     ATTR_SET ("opt_fold_getattribute", int, m_opt_fold_getattribute);
1158     ATTR_SET ("opt_middleman", int, m_opt_middleman);
1159     ATTR_SET ("opt_texture_handle", int, m_opt_texture_handle);
1160     ATTR_SET ("opt_seed_bblock_aliases", int, m_opt_seed_bblock_aliases);
1161     ATTR_SET ("llvm_jit_fma", int, m_llvm_jit_fma);
1162     ATTR_SET ("llvm_jit_aggressive", int, m_llvm_jit_aggressive);
1163     ATTR_SET_STRING ("llvm_jit_target", m_llvm_jit_target);
1164     ATTR_SET ("vector_width", int, m_vector_width);
1165     ATTR_SET ("opt_passes", int, m_opt_passes);
1166     ATTR_SET ("optimize_nondebug", int, m_optimize_nondebug);
1167     ATTR_SET ("llvm_optimize", int, m_llvm_optimize);
1168     ATTR_SET ("llvm_debug", int, m_llvm_debug);
1169     ATTR_SET ("llvm_debug_layers", int, m_llvm_debug_layers);
1170     ATTR_SET ("llvm_debug_ops", int, m_llvm_debug_ops);
1171     ATTR_SET ("llvm_target_host", int, m_llvm_target_host);
1172 
1173     // Due to ABI breakage in LLVM 7.0.[0-1] for llvm::Optional with GCC,
1174     // calling any llvm APIs that accept an llvm::Optional parameter will break
1175     // ABI causing issues.
1176     // https://bugs.llvm.org/show_bug.cgi?id=39427
1177     // Fixed in llvm 7.1.0+
1178     // Workaround don't enable debug symbols which would use llvm::Optional APIs
1179 #if (!OSL_GNUC_VERSION) || (OSL_LLVM_VERSION >= 71)
1180     ATTR_SET ("llvm_debugging_symbols", int, m_llvm_debugging_symbols);
1181 #endif
1182 
1183     ATTR_SET ("llvm_profiling_events", int, m_llvm_profiling_events);
1184     ATTR_SET ("llvm_output_bitcode", int, m_llvm_output_bitcode);
1185     ATTR_SET ("llvm_dumpasm", int, m_llvm_dumpasm);
1186     ATTR_SET_STRING ("llvm_prune_ir_strategy", m_llvm_prune_ir_strategy);
1187     ATTR_SET ("strict_messages", int, m_strict_messages);
1188     ATTR_SET ("range_checking", int, m_range_checking);
1189     ATTR_SET ("unknown_coordsys_error", int, m_unknown_coordsys_error);
1190     ATTR_SET ("connection_error", int, m_connection_error);
1191     ATTR_SET ("greedyjit", int, m_greedyjit);
1192     ATTR_SET ("relaxed_param_typecheck", int, m_relaxed_param_typecheck);
1193     ATTR_SET ("countlayerexecs", int, m_countlayerexecs);
1194     ATTR_SET ("max_warnings_per_thread", int, m_max_warnings_per_thread);
1195     ATTR_SET ("max_local_mem_KB", int, m_max_local_mem_KB);
1196     ATTR_SET ("compile_report", int, m_compile_report);
1197     ATTR_SET ("buffer_printf", int, m_buffer_printf);
1198     ATTR_SET ("no_noise", int, m_no_noise);
1199     ATTR_SET ("no_pointcloud", int, m_no_pointcloud);
1200     ATTR_SET ("force_derivs", int, m_force_derivs);
1201     ATTR_SET ("allow_shader_replacement", int, m_allow_shader_replacement);
1202     ATTR_SET ("exec_repeat", int, m_exec_repeat);
1203     ATTR_SET ("opt_warnings", int, m_opt_warnings);
1204     ATTR_SET ("gpu_opt_error", int, m_gpu_opt_error);
1205     ATTR_SET_STRING ("commonspace", m_commonspace_synonym);
1206     ATTR_SET_STRING ("debug_groupname", m_debug_groupname);
1207     ATTR_SET_STRING ("debug_layername", m_debug_layername);
1208     ATTR_SET_STRING ("opt_layername", m_opt_layername);
1209     ATTR_SET_STRING ("only_groupname", m_only_groupname);
1210     ATTR_SET_STRING ("archive_groupname", m_archive_groupname);
1211     ATTR_SET_STRING ("archive_filename", m_archive_filename);
1212 
1213     // cases for special handling
1214     if (name == "searchpath:shader" && type == TypeDesc::STRING) {
1215         m_searchpath = std::string (*(const char **)val);
1216         OIIO::Filesystem::searchpath_split (m_searchpath, m_searchpath_dirs);
1217         return true;
1218     }
1219     if (name == "colorspace" && type == TypeDesc::STRING) {
1220         ustring c = ustring (*(const char **)val);
1221         if (colorsystem().set_colorspace(c))
1222             m_colorspace = c;
1223         else
1224             errorf("Unknown color space \"%s\"", c);
1225         return true;
1226     }
1227     if (name == "raytypes" && type.basetype == TypeDesc::STRING) {
1228         OSL_ASSERT (type.numelements() <= 32 &&
1229                     "ShaderGlobals.raytype is an int, max of 32 raytypes");
1230         m_raytypes.clear ();
1231         for (size_t i = 0;  i < type.numelements();  ++i)
1232             m_raytypes.emplace_back(((const char **)val)[i]);
1233         return true;
1234     }
1235     if (name == "renderer_outputs" && type.basetype == TypeDesc::STRING) {
1236         m_renderer_outputs.clear ();
1237         for (size_t i = 0;  i < type.numelements();  ++i)
1238             m_renderer_outputs.emplace_back(((const char **)val)[i]);
1239         return true;
1240     }
1241     if (name == "lib_bitcode" && type.basetype == TypeDesc::UINT8) {
1242         if (type.arraylen < 0) {
1243             errorf("Invalid bitcode size: %d", type.arraylen);
1244             return false;
1245         }
1246         m_lib_bitcode.clear();
1247         if (type.arraylen) {
1248             const char* bytes = static_cast<const char*>(val);
1249             std::copy(bytes, bytes + type.arraylen,
1250                       back_inserter(m_lib_bitcode));
1251         }
1252         return true;
1253     }
1254     if (name == "error_repeats") {
1255         // Special case: setting error_repeats also clears the "previously
1256         // seen" error and warning lists.
1257         m_errseen.clear();
1258         m_warnseen.clear();
1259         ATTR_SET ("error_repeats", int, m_error_repeats);
1260     }
1261 
1262     return false;
1263 #undef ATTR_SET
1264 #undef ATTR_SET_STRING
1265 }
1266 
1267 
1268 
1269 bool
getattribute(string_view name,TypeDesc type,void * val)1270 ShadingSystemImpl::getattribute (string_view name, TypeDesc type,
1271                                  void *val)
1272 {
1273 #define ATTR_DECODE(_name,_ctype,_src)                                  \
1274     if (name == _name && type == OIIO::BaseTypeFromC<_ctype>::value) {  \
1275         *(_ctype *)(val) = (_ctype)(_src);                              \
1276         return true;                                                    \
1277     }
1278 #define ATTR_DECODE_STRING(_name,_src)                                  \
1279     if (name == _name && type == TypeDesc::STRING) {                    \
1280         *(const char **)(val) = _src.c_str();                           \
1281         return true;                                                    \
1282     }
1283 
1284     lock_guard guard (m_mutex);  // Thread safety
1285 
1286     ATTR_DECODE_STRING ("searchpath:shader", m_searchpath);
1287     ATTR_DECODE ("statistics:level", int, m_statslevel);
1288     ATTR_DECODE ("lazylayers", int, m_lazylayers);
1289     ATTR_DECODE ("lazyglobals", int, m_lazyglobals);
1290     ATTR_DECODE ("lazyunconnected", int, m_lazyunconnected);
1291     ATTR_DECODE ("lazy_userdata", int, m_lazy_userdata);
1292     ATTR_DECODE ("userdata_isconnected", int, m_userdata_isconnected);
1293     ATTR_DECODE ("clearmemory", int, m_clearmemory);
1294     ATTR_DECODE ("debug_nan", int, m_debugnan);
1295     ATTR_DECODE ("debugnan", int, m_debugnan);  // back-compatible alias
1296     ATTR_DECODE ("debug_uninit", int, m_debug_uninit);
1297     ATTR_DECODE ("lockgeom", int, m_lockgeom_default);
1298     ATTR_DECODE ("profile", int, m_profile);
1299     ATTR_DECODE ("optimize", int, m_optimize);
1300     ATTR_DECODE ("opt_simplify_param", int, m_opt_simplify_param);
1301     ATTR_DECODE ("opt_constant_fold", int, m_opt_constant_fold);
1302     ATTR_DECODE ("opt_stale_assign", int, m_opt_stale_assign);
1303     ATTR_DECODE ("opt_elide_useless_ops", int, m_opt_elide_useless_ops);
1304     ATTR_DECODE ("opt_elide_unconnected_outputs", int, m_opt_elide_unconnected_outputs);
1305     ATTR_DECODE ("opt_peephole", int, m_opt_peephole);
1306     ATTR_DECODE ("opt_coalesce_temps", int, m_opt_coalesce_temps);
1307     ATTR_DECODE ("opt_assign", int, m_opt_assign);
1308     ATTR_DECODE ("opt_mix", int, m_opt_mix);
1309     ATTR_DECODE ("opt_merge_instances", int, m_opt_merge_instances);
1310     ATTR_DECODE ("opt_merge_instances_with_userdata", int, m_opt_merge_instances_with_userdata);
1311     ATTR_DECODE ("opt_fold_getattribute", int, m_opt_fold_getattribute);
1312     ATTR_DECODE ("opt_middleman", int, m_opt_middleman);
1313     ATTR_DECODE ("opt_texture_handle", int, m_opt_texture_handle);
1314     ATTR_DECODE ("opt_seed_bblock_aliases", int, m_opt_seed_bblock_aliases);
1315     ATTR_DECODE ("llvm_jit_fma", int, m_llvm_jit_fma);
1316     ATTR_DECODE ("llvm_jit_aggressive", int, m_llvm_jit_aggressive);
1317     ATTR_DECODE_STRING ("llvm_jit_target", m_llvm_jit_target);
1318     ATTR_DECODE ("vector_width", int, m_vector_width);
1319     ATTR_DECODE ("opt_passes", int, m_opt_passes);
1320     ATTR_DECODE ("optimize_nondebug", int, m_optimize_nondebug);
1321     ATTR_DECODE ("llvm_optimize", int, m_llvm_optimize);
1322     ATTR_DECODE ("debug", int, m_debug);
1323     ATTR_DECODE ("llvm_debug", int, m_llvm_debug);
1324     ATTR_DECODE ("llvm_debug_layers", int, m_llvm_debug_layers);
1325     ATTR_DECODE ("llvm_debug_ops", int, m_llvm_debug_ops);
1326     ATTR_DECODE ("llvm_target_host", int, m_llvm_target_host);
1327     ATTR_DECODE ("llvm_debugging_symbols", int, m_llvm_debugging_symbols);
1328     ATTR_DECODE ("llvm_profiling_events", int, m_llvm_profiling_events);
1329     ATTR_DECODE ("llvm_output_bitcode", int, m_llvm_output_bitcode);
1330     ATTR_DECODE ("llvm_dumpasm", int, m_llvm_dumpasm);
1331     ATTR_DECODE ("strict_messages", int, m_strict_messages);
1332     ATTR_DECODE ("error_repeats", int, m_error_repeats);
1333     ATTR_DECODE ("range_checking", int, m_range_checking);
1334     ATTR_DECODE ("unknown_coordsys_error", int, m_unknown_coordsys_error);
1335     ATTR_DECODE ("connection_error", int, m_connection_error);
1336     ATTR_DECODE ("greedyjit", int, m_greedyjit);
1337     ATTR_DECODE ("countlayerexecs", int, m_countlayerexecs);
1338     ATTR_DECODE ("relaxed_param_typecheck", int, m_relaxed_param_typecheck);
1339     ATTR_DECODE ("max_warnings_per_thread", int, m_max_warnings_per_thread);
1340     ATTR_DECODE_STRING ("commonspace", m_commonspace_synonym);
1341     ATTR_DECODE_STRING ("colorspace", m_colorspace);
1342     ATTR_DECODE_STRING ("debug_groupname", m_debug_groupname);
1343     ATTR_DECODE_STRING ("debug_layername", m_debug_layername);
1344     ATTR_DECODE_STRING ("opt_layername", m_opt_layername);
1345     ATTR_DECODE_STRING ("only_groupname", m_only_groupname);
1346     ATTR_DECODE_STRING ("archive_groupname", m_archive_groupname);
1347     ATTR_DECODE_STRING ("archive_filename", m_archive_filename);
1348     ATTR_DECODE ("max_local_mem_KB", int, m_max_local_mem_KB);
1349     ATTR_DECODE ("compile_report", int, m_compile_report);
1350     ATTR_DECODE ("buffer_printf", int, m_buffer_printf);
1351     ATTR_DECODE ("no_noise", int, m_no_noise);
1352     ATTR_DECODE ("no_pointcloud", int, m_no_pointcloud);
1353     ATTR_DECODE ("force_derivs", int, m_force_derivs);
1354     ATTR_DECODE ("allow_shader_replacement", int, m_allow_shader_replacement);
1355     ATTR_DECODE ("exec_repeat", int, m_exec_repeat);
1356     ATTR_DECODE ("opt_warnings", int, m_opt_warnings);
1357     ATTR_DECODE ("gpu_opt_error", int, m_gpu_opt_error);
1358 
1359     ATTR_DECODE ("stat:masters", int, m_stat_shaders_loaded);
1360     ATTR_DECODE ("stat:groups", int, m_stat_groups);
1361     ATTR_DECODE ("stat:instances_compiled", int, m_stat_instances_compiled);
1362     ATTR_DECODE ("stat:groups_compiled", int, m_stat_groups_compiled);
1363     ATTR_DECODE ("stat:empty_instances", int, m_stat_empty_instances);
1364     ATTR_DECODE ("stat:merged_inst", int, m_stat_merged_inst);
1365     ATTR_DECODE ("stat:merged_inst_opt", int, m_stat_merged_inst_opt);
1366     ATTR_DECODE ("stat:empty_groups", int, m_stat_empty_groups);
1367     ATTR_DECODE ("stat:instances", int, m_stat_groupinstances);
1368     ATTR_DECODE ("stat:regexes", int, m_stat_regexes);
1369     ATTR_DECODE ("stat:preopt_syms", int, m_stat_preopt_syms);
1370     ATTR_DECODE ("stat:postopt_syms", int, m_stat_postopt_syms);
1371     ATTR_DECODE ("stat:syms_with_derivs", int, m_stat_syms_with_derivs);
1372     ATTR_DECODE ("stat:preopt_ops", int, m_stat_preopt_ops);
1373     ATTR_DECODE ("stat:postopt_ops", int, m_stat_postopt_ops);
1374     ATTR_DECODE ("stat:middlemen_eliminated", int, m_stat_middlemen_eliminated);
1375     ATTR_DECODE ("stat:const_connections", int, m_stat_const_connections);
1376     ATTR_DECODE ("stat:global_connections", int, m_stat_global_connections);
1377     ATTR_DECODE ("stat:tex_calls_codegened", int, m_stat_tex_calls_codegened);
1378     ATTR_DECODE ("stat:tex_calls_as_handles", int, m_stat_tex_calls_as_handles);
1379     ATTR_DECODE ("stat:master_load_time", float, m_stat_master_load_time);
1380     ATTR_DECODE ("stat:optimization_time", float, m_stat_optimization_time);
1381     ATTR_DECODE ("stat:opt_locking_time", float, m_stat_opt_locking_time);
1382     ATTR_DECODE ("stat:specialization_time", float, m_stat_specialization_time);
1383     ATTR_DECODE ("stat:total_llvm_time", float, m_stat_total_llvm_time);
1384     ATTR_DECODE ("stat:llvm_setup_time", float, m_stat_llvm_setup_time);
1385     ATTR_DECODE ("stat:llvm_irgen_time", float, m_stat_llvm_irgen_time);
1386     ATTR_DECODE ("stat:llvm_opt_time", float, m_stat_llvm_opt_time);
1387     ATTR_DECODE ("stat:llvm_jit_time", float, m_stat_llvm_jit_time);
1388     ATTR_DECODE ("stat:inst_merge_time", float, m_stat_inst_merge_time);
1389     ATTR_DECODE ("stat:getattribute_calls", long long, m_stat_getattribute_calls);
1390     ATTR_DECODE ("stat:get_userdata_calls", long long, m_stat_get_userdata_calls);
1391     ATTR_DECODE ("stat:noise_calls", long long, m_stat_noise_calls);
1392     ATTR_DECODE ("stat:pointcloud_searches", long long, m_stat_pointcloud_searches);
1393     ATTR_DECODE ("stat:pointcloud_gets", long long, m_stat_pointcloud_gets);
1394     ATTR_DECODE ("stat:pointcloud_writes", long long, m_stat_pointcloud_writes);
1395     ATTR_DECODE ("stat:pointcloud_searches_total_results", long long, m_stat_pointcloud_searches_total_results);
1396     ATTR_DECODE ("stat:pointcloud_max_results", int, m_stat_pointcloud_max_results);
1397     ATTR_DECODE ("stat:pointcloud_failures", int, m_stat_pointcloud_failures);
1398     ATTR_DECODE ("stat:memory_current", long long, m_stat_memory.current());
1399     ATTR_DECODE ("stat:memory_peak", long long, m_stat_memory.peak());
1400     ATTR_DECODE ("stat:mem_master_current", long long, m_stat_mem_master.current());
1401     ATTR_DECODE ("stat:mem_master_peak", long long, m_stat_mem_master.peak());
1402     ATTR_DECODE ("stat:mem_master_ops_current", long long, m_stat_mem_master_ops.current());
1403     ATTR_DECODE ("stat:mem_master_ops_peak", long long, m_stat_mem_master_ops.peak());
1404     ATTR_DECODE ("stat:mem_master_args_current", long long, m_stat_mem_master_args.current());
1405     ATTR_DECODE ("stat:mem_master_args_peak", long long, m_stat_mem_master_args.peak());
1406     ATTR_DECODE ("stat:mem_master_syms_current", long long, m_stat_mem_master_syms.current());
1407     ATTR_DECODE ("stat:mem_master_syms_peak", long long, m_stat_mem_master_syms.peak());
1408     ATTR_DECODE ("stat:mem_master_defaults_current", long long, m_stat_mem_master_defaults.current());
1409     ATTR_DECODE ("stat:mem_master_defaults_peak", long long, m_stat_mem_master_defaults.peak());
1410     ATTR_DECODE ("stat:mem_master_consts_current", long long, m_stat_mem_master_consts.current());
1411     ATTR_DECODE ("stat:mem_master_consts_peak", long long, m_stat_mem_master_consts.peak());
1412     ATTR_DECODE ("stat:mem_inst_current", long long, m_stat_mem_inst.current());
1413     ATTR_DECODE ("stat:mem_inst_peak", long long, m_stat_mem_inst.peak());
1414     ATTR_DECODE ("stat:mem_inst_syms_current", long long, m_stat_mem_inst_syms.current());
1415     ATTR_DECODE ("stat:mem_inst_syms_peak", long long, m_stat_mem_inst_syms.peak());
1416     ATTR_DECODE ("stat:mem_inst_paramvals_current", long long, m_stat_mem_inst_paramvals.current());
1417     ATTR_DECODE ("stat:mem_inst_paramvals_peak", long long, m_stat_mem_inst_paramvals.peak());
1418     ATTR_DECODE ("stat:mem_inst_connections_current", long long, m_stat_mem_inst_connections.current());
1419     ATTR_DECODE ("stat:mem_inst_connections_peak", long long, m_stat_mem_inst_connections.peak());
1420 
1421     if (name == "colorsystem" && type.basetype == TypeDesc::PTR) {
1422         *(void**)val = &colorsystem();
1423         return true;
1424     }
1425     if (name == "colorsystem:sizes" && type.basetype == TypeDesc::LONGLONG) {
1426         if (type.arraylen != 2) {
1427             error ("Must request two colorsystem:sizes, [sizeof(pvt::ColorSystem), num-strings]");
1428             return false;
1429         }
1430         long long* lptr = (long long*) val;
1431         lptr[0] = sizeof(pvt::ColorSystem);
1432         lptr[1] = 1; // 1 string (pvt::ColorSystem::m_colorspace)
1433 
1434         // Make sure everything adds up!
1435         OSL_ASSERT((((char*)&colorsystem() + lptr[0]) - sizeof(ustring)*lptr[1]) ==
1436                    (char*)&colorsystem().colorspace());
1437         return true;
1438     }
1439 
1440     return false;
1441 #undef ATTR_DECODE
1442 #undef ATTR_DECODE_STRING
1443 }
1444 
1445 
1446 
1447 bool
attribute(ShaderGroup * group,string_view name,TypeDesc type,const void * val)1448 ShadingSystemImpl::attribute (ShaderGroup *group, string_view name,
1449                               TypeDesc type, const void *val)
1450 {
1451     // No current group attributes to set
1452     if (! group)
1453         return attribute (name, type, val);
1454     lock_guard lock (group->m_mutex);
1455     if (name == "renderer_outputs" && type.basetype == TypeDesc::STRING) {
1456         group->m_renderer_outputs.clear ();
1457         for (size_t i = 0;  i < type.numelements();  ++i)
1458             group->m_renderer_outputs.emplace_back(((const char **)val)[i]);
1459         return true;
1460     }
1461     if (name == "entry_layers" && type.basetype == TypeDesc::STRING) {
1462         group->clear_entry_layers ();
1463         for (int i = 0;  i < (int)type.numelements();  ++i)
1464             group->mark_entry_layer (ustring(((const char **)val)[i]));
1465         return true;
1466     }
1467     if (name == "exec_repeat" && type == TypeDesc::TypeInt) {
1468         group->m_exec_repeat = *(const int *)val;
1469         return true;
1470     }
1471     if (name == "groupname" && type == TypeDesc::TypeString) {
1472         group->name (ustring(((const char **)val)[0]));
1473         return true;
1474     }
1475     return false;
1476 }
1477 
1478 
1479 
1480 bool
getattribute(ShaderGroup * group,string_view name,TypeDesc type,void * val)1481 ShadingSystemImpl::getattribute (ShaderGroup *group, string_view name,
1482                                  TypeDesc type, void *val)
1483 {
1484     if (! group)
1485         return false;
1486 
1487     if (name == "groupname" && type == TypeDesc::TypeString) {
1488         *(ustring *)val = group->name();
1489         return true;
1490     }
1491     if (name == "num_layers" && type == TypeDesc::TypeInt) {
1492         *(int *)val = group->nlayers();
1493         return true;
1494     }
1495     if (name == "layer_names" && type.basetype == TypeDesc::STRING) {
1496         size_t n = std::min (type.numelements(), (size_t)group->nlayers());
1497         for (size_t i = 0;  i < n;  ++i)
1498             ((ustring *)val)[i] = (*group)[i]->layername();
1499         return true;
1500     }
1501     if (name == "num_renderer_outputs" && type.basetype == TypeDesc::INT) {
1502         *(int *)val = (int) group->m_renderer_outputs.size();
1503         return true;
1504     }
1505     if (name == "renderer_outputs" && type.basetype == TypeDesc::STRING) {
1506         size_t n = std::min (type.numelements(), group->m_renderer_outputs.size());
1507         for (size_t i = 0;  i < n;  ++i)
1508             ((ustring *)val)[i] = group->m_renderer_outputs[i];
1509         for (size_t i = n;  i < type.numelements();  ++i)
1510             ((ustring *)val)[i] = ustring();
1511         return true;
1512     }
1513     if (name == "raytype_queries" && type.basetype == TypeDesc::INT) {
1514         *(int *)val = group->raytype_queries();
1515         return true;
1516     }
1517     if (name == "num_entry_layers" && type.basetype == TypeDesc::INT) {
1518         int n = 0;
1519         for (int i = 0;  i < group->nlayers();  ++i)
1520             n += group->layer(i)->entry_layer();
1521         *(int *)val = n;
1522         return true;
1523     }
1524     if (name == "entry_layers" && type.basetype == TypeDesc::STRING) {
1525         size_t n = 0;
1526         for (size_t i = 0;  i < (size_t)group->nlayers() && i < type.numelements();  ++i)
1527             if (group->layer(i)->entry_layer())
1528                 ((ustring *)val)[n++] = (*group)[i]->layername();
1529         for (size_t i = n;  i < type.numelements();  ++i)
1530             ((ustring *)val)[i] = ustring();
1531         return true;
1532     }
1533     if (name == "group_init_name" && type.basetype == TypeDesc::STRING) {
1534 #ifdef OIIO_HAS_SPRINTF
1535         *(ustring *)val = ustring::sprintf ("__direct_callable__group_%s_%d_init",
1536                                             group->name(), group->id());
1537 #else
1538         *(ustring *)val = ustring::format ("__direct_callable__group_%s_%d_init",
1539                                            group->name(), group->id());
1540 #endif
1541         return true;
1542     }
1543     if (name == "group_entry_name" && type.basetype == TypeDesc::STRING) {
1544         int nlayers = group->nlayers ();
1545         ShaderInstance *inst = (*group)[nlayers-1];
1546         // This formulation mirrors OSOProcessorBase::layer_function_name()
1547 #ifdef OIIO_HAS_SPRINTF
1548         *(ustring *)val = ustring::sprintf ("__direct_callable__%s_%s_%d", group->name(),
1549                                            inst->layername(), inst->id());
1550 #else
1551         *(ustring *)val = ustring::format ("__direct_callable__%s_%s_%d", group->name(),
1552                                            inst->layername(), inst->id());
1553 #endif
1554         return true;
1555     }
1556     if (name == "layer_osofiles" && type.basetype == TypeDesc::STRING) {
1557         size_t n = std::min (type.numelements(), (size_t)group->nlayers());
1558         for (size_t i = 0;  i < n;  ++i)
1559             ((ustring *)val)[i] =(*group)[i]->master()->osofilename();
1560         return true;
1561     }
1562     if (name == "pickle" && type == TypeDesc::STRING) {
1563         *(ustring *)val = ustring(group->serialize());
1564         return true;
1565     }
1566     if (name == "exec_repeat" && type == TypeDesc::TypeInt) {
1567         *(int *)val = group->m_exec_repeat;
1568         return true;
1569     }
1570     if (name == "ptx_compiled_version" && type.basetype == TypeDesc::PTR) {
1571         bool exists = !group->m_llvm_ptx_compiled_version.empty();
1572         *(std::string *)val = exists ? group->m_llvm_ptx_compiled_version : "";
1573         return true;
1574     }
1575 
1576     // All the remaining attributes require the group to already be
1577     // optimized.
1578     if (! group->optimized()) {
1579         auto threadinfo = create_thread_info();
1580         auto ctx = get_context(threadinfo);
1581         optimize_group (*group, ctx, false /*jit*/);
1582         release_context(ctx);
1583         destroy_thread_info (threadinfo);
1584     }
1585 
1586     if (name == "num_textures_needed" && type == TypeDesc::TypeInt) {
1587         *(int *)val = (int)group->m_textures_needed.size();
1588         return true;
1589     }
1590     if (name == "textures_needed" && type.basetype == TypeDesc::PTR) {
1591         size_t n = group->m_textures_needed.size();
1592         *(ustring **)val = n ? &group->m_textures_needed[0] : NULL;
1593         return true;
1594     }
1595     if (name == "unknown_textures_needed" && type == TypeDesc::TypeInt) {
1596         *(int *)val = (int)group->m_unknown_textures_needed;
1597         return true;
1598     }
1599 
1600     if (name == "num_closures_needed" && type == TypeDesc::TypeInt) {
1601         *(int *)val = (int)group->m_closures_needed.size();
1602         return true;
1603     }
1604     if (name == "closures_needed" && type.basetype == TypeDesc::PTR) {
1605         size_t n = group->m_closures_needed.size();
1606         *(ustring **)val = n ? &group->m_closures_needed[0] : NULL;
1607         return true;
1608     }
1609     if (name == "unknown_closures_needed" && type == TypeDesc::TypeInt) {
1610         *(int *)val = (int)group->m_unknown_closures_needed;
1611         return true;
1612     }
1613 
1614     if (name == "num_globals_needed" && type == TypeDesc::TypeInt) {
1615         *(int *)val = (int)group->m_globals_needed.size();
1616         return true;
1617     }
1618     if (name == "globals_needed" && type.basetype == TypeDesc::PTR) {
1619         size_t n = group->m_globals_needed.size();
1620         *(ustring **)val = n ? &group->m_globals_needed[0] : NULL;
1621         return true;
1622     }
1623     if (name == "globals_read" && type.basetype == TypeDesc::INT) {
1624         *(int *)val = group->m_globals_read;
1625         return true;
1626     }
1627     if (name == "globals_write" && type.basetype == TypeDesc::INT) {
1628         *(int *)val = group->m_globals_write;
1629         return true;
1630     }
1631 
1632     if (name == "num_userdata" && type == TypeDesc::TypeInt) {
1633         *(int *)val = (int)group->m_userdata_names.size();
1634         return true;
1635     }
1636     if (name == "userdata_names" && type.basetype == TypeDesc::PTR) {
1637         size_t n = group->m_userdata_names.size();
1638         *(ustring **)val = n ? &group->m_userdata_names[0] : NULL;
1639         return true;
1640     }
1641     if (name == "userdata_types" && type.basetype == TypeDesc::PTR) {
1642         size_t n = group->m_userdata_types.size();
1643         *(TypeDesc **)val = n ? &group->m_userdata_types[0] : NULL;
1644         return true;
1645     }
1646     if (name == "userdata_offsets" && type.basetype == TypeDesc::PTR) {
1647         size_t n = group->m_userdata_offsets.size();
1648         *(int **)val = n ? &group->m_userdata_offsets[0] : NULL;
1649         return true;
1650     }
1651     if (name == "userdata_derivs" && type.basetype == TypeDesc::PTR) {
1652         size_t n = group->m_userdata_derivs.size();
1653         *(char **)val = n ? &group->m_userdata_derivs[0] : NULL;
1654         return true;
1655     }
1656     if (name == "num_attributes_needed" && type == TypeDesc::TypeInt) {
1657         *(int *)val = (int)group->m_attributes_needed.size();
1658         return true;
1659     }
1660     if (name == "attributes_needed" && type.basetype == TypeDesc::PTR) {
1661         size_t n = group->m_attributes_needed.size();
1662         *(ustring **)val = n ? &group->m_attributes_needed[0] : NULL;
1663         return true;
1664     }
1665     if (name == "attribute_scopes" && type.basetype == TypeDesc::PTR) {
1666         size_t n = group->m_attribute_scopes.size();
1667         *(ustring **)val = n ? &group->m_attribute_scopes[0] : NULL;
1668         return true;
1669     }
1670     if (name == "unknown_attributes_needed" && type == TypeDesc::TypeInt) {
1671         *(int *)val = (int)group->m_unknown_attributes_needed;
1672         return true;
1673     }
1674     if (name == "group_id" && type == TypeDesc::TypeInt) {
1675         *(int *)val = (int) group->id();
1676         return true;
1677     }
1678 
1679     // Additional atttributes useful to OptiX-based renderers
1680     if (name == "userdata_layers" && type.basetype == TypeDesc::PTR) {
1681         size_t n = group->m_userdata_layers.size();
1682         *(int **)val = n ? &group->m_userdata_layers[0] : NULL;
1683         return true;
1684     }
1685     if (name == "userdata_init_vals" && type.basetype == TypeDesc::PTR) {
1686         size_t n = group->m_userdata_init_vals.size();
1687         *(void **)val = n ? &group->m_userdata_init_vals[0] : NULL;
1688         return true;
1689     }
1690 
1691     return false;
1692 }
1693 
1694 
1695 
1696 void
error(const std::string & msg) const1697 ShadingSystemImpl::error (const std::string &msg) const
1698 {
1699     lock_guard guard (m_errmutex);
1700     int n = 0;
1701     for (auto&& s : m_errseen) {
1702         if (s == msg && !m_error_repeats)
1703             return;
1704         ++n;
1705     }
1706     if (n >= m_errseenmax)
1707         m_errseen.pop_front ();
1708     m_errseen.push_back (msg);
1709     m_err->error (msg);
1710 }
1711 
1712 
1713 
1714 void
warning(const std::string & msg) const1715 ShadingSystemImpl::warning (const std::string &msg) const
1716 {
1717     lock_guard guard (m_errmutex);
1718     int n = 0;
1719     for (auto&& s : m_warnseen) {
1720         if (s == msg && !m_error_repeats)
1721             return;
1722         ++n;
1723     }
1724     if (n >= m_errseenmax)
1725         m_warnseen.pop_front ();
1726     m_warnseen.push_back (msg);
1727     m_err->warning (msg);
1728 }
1729 
1730 
1731 
1732 void
info(const std::string & msg) const1733 ShadingSystemImpl::info (const std::string &msg) const
1734 {
1735     lock_guard guard (m_errmutex);
1736     m_err->info (msg);
1737 }
1738 
1739 
1740 
1741 void
message(const std::string & msg) const1742 ShadingSystemImpl::message (const std::string &msg) const
1743 {
1744     lock_guard guard (m_errmutex);
1745     m_err->message (msg);
1746 }
1747 
1748 
1749 
1750 void
pointcloud_stats(int search,int get,int results,int writes)1751 ShadingSystemImpl::pointcloud_stats (int search, int get, int results,
1752                                      int writes)
1753 {
1754     spin_lock lock (m_stat_mutex);
1755     m_stat_pointcloud_searches += search;
1756     m_stat_pointcloud_gets += get;
1757     m_stat_pointcloud_searches_total_results += results;
1758     if (search && ! results)
1759         ++m_stat_pointcloud_failures;
1760     m_stat_pointcloud_max_results = std::max (m_stat_pointcloud_max_results,
1761                                               results);
1762     m_stat_pointcloud_writes += writes;
1763 }
1764 
1765 
1766 
1767 namespace {
1768 typedef std::pair<ustring,long long> GroupTimeVal;
1769 struct group_time_compare { // So looking forward to C++11 lambdas!
operator ()pvt::__anond3f550b20111::group_time_compare1770     bool operator() (const GroupTimeVal &a, const GroupTimeVal &b) {
1771         return a.second > b.second;
1772     }
1773 };
1774 }
1775 
1776 
1777 
1778 // Return a comma-separated list of all the important SIMD/capabilities
1779 // that were enabled as a compile-time option when OSL was built.
1780 // (Keep this in sync with oiio_simd_caps in imageio.cpp).
1781 static std::string
osl_simd_caps()1782 osl_simd_caps()
1783 {
1784     // clang-format off
1785     std::vector<string_view> caps;
1786     if (OIIO_SIMD_SSE >= 2)      caps.emplace_back ("sse2");
1787     if (OIIO_SIMD_SSE >= 3)      caps.emplace_back ("sse3");
1788     if (OIIO_SIMD_SSE >= 3)      caps.emplace_back ("ssse3");
1789     if (OIIO_SIMD_SSE >= 4)      caps.emplace_back ("sse41");
1790     if (OIIO_SIMD_SSE >= 4)      caps.emplace_back ("sse42");
1791     if (OIIO_SIMD_AVX)           caps.emplace_back ("avx");
1792     if (OIIO_SIMD_AVX >= 2)      caps.emplace_back ("avx2");
1793     if (OIIO_SIMD_AVX >= 512)    caps.emplace_back ("avx512f");
1794     if (OIIO_AVX512DQ_ENABLED)   caps.emplace_back ("avx512dq");
1795     if (OIIO_AVX512IFMA_ENABLED) caps.emplace_back ("avx512ifma");
1796     if (OIIO_AVX512PF_ENABLED)   caps.emplace_back ("avx512pf");
1797     if (OIIO_AVX512ER_ENABLED)   caps.emplace_back ("avx512er");
1798     if (OIIO_AVX512CD_ENABLED)   caps.emplace_back ("avx512cd");
1799     if (OIIO_AVX512BW_ENABLED)   caps.emplace_back ("avx512bw");
1800     if (OIIO_AVX512VL_ENABLED)   caps.emplace_back ("avx512vl");
1801     if (OIIO_FMA_ENABLED)        caps.emplace_back ("fma");
1802     if (OIIO_F16C_ENABLED)       caps.emplace_back ("f16c");
1803     // if (OIIO_POPCOUNT_ENABLED)   caps.emplace_back ("popcnt");
1804     return OIIO::Strutil::join (caps, ",");
1805     // clang-format on
1806 }
1807 
1808 
1809 
1810 std::string
getstats(int level) const1811 ShadingSystemImpl::getstats (int level) const
1812 {
1813     int columns = OIIO::Sysutil::terminal_columns() - 2;
1814 
1815     if (level <= 0)
1816         return "";
1817     std::ostringstream out;
1818     out.imbue (std::locale::classic());  // force C locale
1819     out << "Open Shading Language " << OSL_LIBRARY_VERSION_STRING << "\n";
1820     out << "  Build deps: LLVM-" << OSL_LLVM_FULL_VERSION
1821         << " OIIO-" << OIIO_VERSION_STRING << " Imath-" <<
1822 #ifdef OPENEXR_VERSION_STRING
1823                           OPENEXR_VERSION_STRING
1824 #else
1825                           "(unknown)"
1826 #endif
1827          << "\n";
1828 
1829     std::string opt;
1830 #define BOOLOPT(name) opt += Strutil::sprintf(#name "=%d ", m_##name)
1831 #define INTOPT(name) opt += Strutil::sprintf(#name "=%d ", m_##name)
1832 #define STROPT(name) if (m_##name.size()) opt += Strutil::sprintf(#name "=\"%s\" ", m_##name)
1833     INTOPT (optimize);
1834     INTOPT (llvm_optimize);
1835     INTOPT (debug);
1836     INTOPT (profile);
1837     INTOPT (llvm_debug);
1838     BOOLOPT (llvm_debug_layers);
1839     BOOLOPT (llvm_debug_ops);
1840     BOOLOPT (llvm_target_host);
1841     BOOLOPT (llvm_output_bitcode);
1842     BOOLOPT (llvm_dumpasm);
1843     BOOLOPT (llvm_prune_ir_strategy);
1844     BOOLOPT (lazylayers);
1845     BOOLOPT (lazyglobals);
1846     BOOLOPT (lazyunconnected);
1847     BOOLOPT (lazyerror);
1848     BOOLOPT (lazy_userdata);
1849     BOOLOPT (userdata_isconnected);
1850     BOOLOPT (clearmemory);
1851     BOOLOPT (debugnan);
1852     BOOLOPT (debug_uninit);
1853     BOOLOPT (lockgeom_default);
1854     BOOLOPT (strict_messages);
1855     BOOLOPT (error_repeats);
1856     BOOLOPT (range_checking);
1857     BOOLOPT (greedyjit);
1858     BOOLOPT (countlayerexecs);
1859     BOOLOPT (opt_simplify_param);
1860     BOOLOPT (opt_constant_fold);
1861     BOOLOPT (opt_stale_assign);
1862     BOOLOPT (opt_elide_useless_ops);
1863     BOOLOPT (opt_elide_unconnected_outputs);
1864     BOOLOPT (opt_peephole);
1865     BOOLOPT (opt_coalesce_temps);
1866     BOOLOPT (opt_assign);
1867     BOOLOPT (opt_mix);
1868     INTOPT  (opt_merge_instances);
1869     BOOLOPT (opt_merge_instances_with_userdata);
1870     BOOLOPT (opt_fold_getattribute);
1871     BOOLOPT (opt_middleman);
1872     BOOLOPT (opt_texture_handle);
1873     BOOLOPT (opt_seed_bblock_aliases);
1874     BOOLOPT (llvm_jit_fma);
1875     BOOLOPT (llvm_jit_aggressive);
1876     INTOPT (vector_width);
1877     STROPT (llvm_jit_target);
1878     INTOPT  (opt_passes);
1879     INTOPT (no_noise);
1880     INTOPT (no_pointcloud);
1881     INTOPT (force_derivs);
1882     INTOPT (allow_shader_replacement);
1883     INTOPT (exec_repeat);
1884     INTOPT (opt_warnings);
1885     INTOPT (gpu_opt_error);
1886     STROPT (debug_groupname);
1887     STROPT (debug_layername);
1888     STROPT (archive_groupname);
1889     STROPT (archive_filename);
1890 #undef BOOLOPT
1891 #undef INTOPT
1892 #undef STROPT
1893 
1894     // Print the HW info
1895     out << "  Build HW support: ";
1896     std::string buildsimd = osl_simd_caps();
1897     if (!buildsimd.size())
1898         buildsimd = "no SIMD";
1899     out << buildsimd << "\n";
1900     OIIO::Strutil::fprintf(out, "  Runtime HW: %d cores %.1fGB %s\n",
1901                            OIIO::Sysutil::hardware_concurrency(),
1902                            OIIO::Sysutil::physical_memory() / float(1 << 30),
1903                            OIIO::get_string_attribute("hw:simd"));
1904     // TODO: detect GPU info and print it here
1905     out << "\n";
1906 
1907     out << "ShadingSystem Options:\n";
1908     out << "    " << Strutil::wordwrap(opt, columns, 4) << "\n";
1909 
1910     out << "\nOSL ShadingSystem statistics (" << (void*)this << ")\n";
1911     if (m_stat_shaders_requested == 0 && m_stat_shaders_loaded == 0) {
1912         out << "  No shaders requested or loaded\n";
1913         return out.str();
1914     }
1915 
1916     out << "  Shaders:\n";
1917     out << "    Requested: " << m_stat_shaders_requested << "\n";
1918     out << "    Loaded:    " << m_stat_shaders_loaded << "\n";
1919     out << "    Masters:   " << m_stat_shaders_loaded << "\n";
1920     out << "    Instances: " << m_stat_instances << "\n";
1921     out << "  Time loading masters: "
1922         << Strutil::timeintervalformat (m_stat_master_load_time, 2) << "\n";
1923     out << "  Shading groups:   " << m_stat_groups << "\n";
1924     out << "    Total instances in all groups: " << m_stat_groupinstances << "\n";
1925     float iperg = (float)m_stat_groupinstances/std::max((int)m_stat_groups,1);
1926     out << "    Avg instances per group: "
1927         << Strutil::sprintf ("%.1f", iperg) << "\n";
1928     out << "  Shading contexts: " << m_stat_contexts << "\n";
1929     if (m_countlayerexecs)
1930         out << "  Total layers executed: " << m_stat_layers_executed << "\n";
1931 
1932 #if 0
1933     long long totalexec = m_layers_executed_uncond + m_layers_executed_lazy +
1934                           m_layers_executed_never;
1935     out << Strutil::sprintf ("  Total layers run: %10lld\n", totalexec);
1936     double inv_totalexec = 1.0 / std::max (totalexec, 1LL);  // prevent div by 0
1937     out << Strutil::sprintf ("    Unconditional:  %10lld  (%.1f%%)\n",
1938                             (long long)m_layers_executed_uncond,
1939                             (100.0*m_layers_executed_uncond) * inv_totalexec);
1940     out << Strutil::sprintf ("    On demand:      %10lld  (%.1f%%)\n",
1941                             (long long)m_layers_executed_lazy,
1942                             (100.0*m_layers_executed_lazy) * inv_totalexec);
1943     out << Strutil::sprintf ("    Skipped:        %10lld  (%.1f%%)\n",
1944                             (long long)m_layers_executed_never,
1945                             (100.0*m_layers_executed_never) * inv_totalexec);
1946 
1947 #endif
1948 
1949     out << "  Compiled " << m_stat_groups_compiled << " groups, "
1950         << m_stat_instances_compiled << " instances\n";
1951     out << "  Merged " << (m_stat_merged_inst+m_stat_merged_inst_opt)
1952         << " instances (" << m_stat_merged_inst << " initial, "
1953         << m_stat_merged_inst_opt << " after opt) in "
1954         << Strutil::timeintervalformat (m_stat_inst_merge_time, 2) << "\n";
1955     if (m_stat_instances_compiled > 0)
1956         out << "  After optimization, " << m_stat_empty_instances
1957             << " empty instances ("
1958             << (int)(100.0f*m_stat_empty_instances/m_stat_instances_compiled) << "%)\n";
1959     if (m_stat_groups_compiled > 0)
1960         out << "  After optimization, " << m_stat_empty_groups << " empty groups ("
1961             << (int)(100.0f*m_stat_empty_groups/m_stat_groups_compiled)<< "%)\n";
1962     if (m_stat_instances_compiled > 0 || m_stat_groups_compiled > 0) {
1963         out << Strutil::sprintf ("  Optimized %llu ops to %llu (%.1f%%)\n",
1964                                 (long long)m_stat_preopt_ops,
1965                                 (long long)m_stat_postopt_ops,
1966                                 100.0*(double(m_stat_postopt_ops)/double(std::max(1,(int)m_stat_preopt_ops))-1.0));
1967         out << Strutil::sprintf ("  Optimized %llu symbols to %llu (%.1f%%)\n",
1968                                 (long long)m_stat_preopt_syms,
1969                                 (long long)m_stat_postopt_syms,
1970                                 100.0*(double(m_stat_postopt_syms)/double(std::max(1,(int)m_stat_preopt_syms))-1.0));
1971     }
1972     out << Strutil::sprintf ("  Constant connections eliminated: %d\n",
1973                             (int)m_stat_const_connections);
1974     out << Strutil::sprintf ("  Global connections eliminated: %d\n",
1975                             (int)m_stat_global_connections);
1976     out << Strutil::sprintf ("  Middlemen eliminated: %d\n",
1977                             (int)m_stat_middlemen_eliminated);
1978     out << Strutil::sprintf ("  Derivatives needed on %d / %d symbols (%.1f%%)\n",
1979                             (int)m_stat_syms_with_derivs, (int)m_stat_postopt_syms,
1980                             (100.0*(int)m_stat_syms_with_derivs)/std::max((int)m_stat_postopt_syms,1));
1981     out << "  Runtime optimization cost: "
1982         << Strutil::timeintervalformat (m_stat_optimization_time, 2) << "\n";
1983     out << "    locking:                   "
1984         << Strutil::timeintervalformat (m_stat_opt_locking_time, 2) << "\n";
1985     out << "    runtime specialization:    "
1986         << Strutil::timeintervalformat (m_stat_specialization_time, 2) << "\n";
1987     if (m_stat_total_llvm_time > 0.0) {
1988         out << "    LLVM setup:                "
1989             << Strutil::timeintervalformat (m_stat_llvm_setup_time, 2) << "\n";
1990         out << "    LLVM IR gen:               "
1991             << Strutil::timeintervalformat (m_stat_llvm_irgen_time, 2) << "\n";
1992         out << "    LLVM optimize:             "
1993             << Strutil::timeintervalformat (m_stat_llvm_opt_time, 2) << "\n";
1994         out << "    LLVM JIT:                  "
1995             << Strutil::timeintervalformat (m_stat_llvm_jit_time, 2) << "\n";
1996     }
1997 
1998     out << "  Texture calls compiled: "
1999         << (int)m_stat_tex_calls_codegened
2000         << " (" << (int)m_stat_tex_calls_as_handles << " used handles)\n";
2001     out << "  Regex's compiled: " << m_stat_regexes << "\n";
2002     out << "  Largest generated function local memory size: "
2003         << m_stat_max_llvm_local_mem/1024 << " KB\n";
2004     if (m_stat_getattribute_calls) {
2005         out << "  getattribute calls: " << m_stat_getattribute_calls << " ("
2006             << Strutil::timeintervalformat (m_stat_getattribute_time, 2) << ")\n";
2007         out << "     (fail time "
2008             << Strutil::timeintervalformat (m_stat_getattribute_fail_time, 2) << ")\n";
2009     }
2010     out << "  Number of get_userdata calls: " << m_stat_get_userdata_calls << "\n";
2011     if (profile() > 1)
2012         out << "  Number of noise calls: " << m_stat_noise_calls << "\n";
2013     if (m_stat_pointcloud_searches || m_stat_pointcloud_writes) {
2014         out << "  Pointcloud operations:\n";
2015         out << "    pointcloud_search calls: " << m_stat_pointcloud_searches << "\n";
2016         out << "      max query results: " << m_stat_pointcloud_max_results << "\n";
2017         double avg = m_stat_pointcloud_searches ?
2018             (double)m_stat_pointcloud_searches_total_results/(double)m_stat_pointcloud_searches : 0.0;
2019         out << "      average query results: " << Strutil::sprintf ("%.1f", avg) << "\n";
2020         out << "      failures: " << m_stat_pointcloud_failures << "\n";
2021         out << "    pointcloud_get calls: " << m_stat_pointcloud_gets << "\n";
2022         out << "    pointcloud_write calls: " << m_stat_pointcloud_writes << "\n";
2023     }
2024     out << "  Memory total: " << m_stat_memory.memstat() << '\n';
2025     out << "    Master memory: " << m_stat_mem_master.memstat() << '\n';
2026     out << "        Master ops:            " << m_stat_mem_master_ops.memstat() << '\n';
2027     out << "        Master args:           " << m_stat_mem_master_args.memstat() << '\n';
2028     out << "        Master syms:           " << m_stat_mem_master_syms.memstat() << '\n';
2029     out << "        Master defaults:       " << m_stat_mem_master_defaults.memstat() << '\n';
2030     out << "        Master consts:         " << m_stat_mem_master_consts.memstat() << '\n';
2031     out << "    Instance memory: " << m_stat_mem_inst.memstat() << '\n';
2032     out << "        Instance syms:         " << m_stat_mem_inst_syms.memstat() << '\n';
2033     out << "        Instance param values: " << m_stat_mem_inst_paramvals.memstat() << '\n';
2034     out << "        Instance connections:  " << m_stat_mem_inst_connections.memstat() << '\n';
2035 
2036     size_t jitmem = LLVM_Util::total_jit_memory_held();
2037     out << "    LLVM JIT memory: " << Strutil::memformat(jitmem) << '\n';
2038 
2039     if (m_profile) {
2040         out << "  Execution profile:\n";
2041         out << "    Total shader execution time: "
2042             << Strutil::timeintervalformat(OIIO::Timer::seconds(m_stat_total_shading_time_ticks), 2)
2043             << " (sum of all threads)\n";
2044         // Account for times of any groups that haven't yet been destroyed
2045         {
2046             spin_lock lock (m_all_shader_groups_mutex);
2047             for (auto&& grp : m_all_shader_groups) {
2048                 if (ShaderGroupRef g = grp.lock()) {
2049                     long long ticks = g->m_stat_total_shading_time_ticks;
2050                     m_group_profile_times[g->name()] += ticks;
2051                     g->m_stat_total_shading_time_ticks -= ticks;
2052                 }
2053             }
2054         }
2055         {
2056             spin_lock lock (m_stat_mutex);
2057             std::vector<GroupTimeVal> grouptimes;
2058             for (std::map<ustring,long long>::const_iterator m = m_group_profile_times.begin();
2059                  m != m_group_profile_times.end(); ++m) {
2060                 grouptimes.emplace_back(m->first, m->second);
2061             }
2062             std::sort (grouptimes.begin(), grouptimes.end(), group_time_compare());
2063             if (grouptimes.size() > 5)
2064                 grouptimes.resize (5);
2065             if (grouptimes.size())
2066                 out << "    Most expensive shader groups:\n";
2067             for (std::vector<GroupTimeVal>::const_iterator i = grouptimes.begin();
2068                      i != grouptimes.end(); ++i) {
2069                 out << "      " << Strutil::timeintervalformat(OIIO::Timer::seconds(i->second),2)
2070                     << ' ' << (i->first.size() ? i->first.c_str() : "<unnamed group>") << "\n";
2071             }
2072         }
2073 
2074     }
2075 
2076     return out.str();
2077 }
2078 
2079 
2080 
2081 void
printstats() const2082 ShadingSystemImpl::printstats () const
2083 {
2084     if (m_statslevel == 0)
2085         return;
2086     m_err->message (getstats (m_statslevel));
2087 }
2088 
2089 
2090 
2091 bool
Parameter(string_view name,TypeDesc t,const void * val,bool lockgeom)2092 ShadingSystemImpl::Parameter (string_view name, TypeDesc t, const void *val,
2093                               bool lockgeom)
2094 {
2095     return Parameter (*m_curgroup, name, t, val, lockgeom);
2096 }
2097 
2098 
2099 
2100 bool
Parameter(ShaderGroup & group,string_view name,TypeDesc t,const void * val,bool lockgeom)2101 ShadingSystemImpl::Parameter (ShaderGroup& group, string_view name,
2102                               TypeDesc t, const void *val, bool lockgeom)
2103 {
2104     // We work very hard not to do extra copies of the data.  First,
2105     // grow the pending list by one (empty) slot...
2106     group.m_pending_params.grow();
2107     // ...then initialize it in place
2108     group.m_pending_params.back().init (name, t, 1, val);
2109     // If we have a possible geometric override (lockgeom=false), set the
2110     // param's interpolation to VERTEX rather than the default CONSTANT.
2111     if (lockgeom == false)
2112         group.m_pending_params.back().interp (OIIO::ParamValue::INTERP_VERTEX);
2113     return true;
2114 }
2115 
2116 
2117 
2118 ShaderGroupRef
ShaderGroupBegin(string_view groupname)2119 ShadingSystemImpl::ShaderGroupBegin (string_view groupname)
2120 {
2121     ShaderGroupRef group (new ShaderGroup(groupname));
2122     group->m_exec_repeat = m_exec_repeat;
2123     {
2124         // Record the group in the SS's census of all extant groups
2125         spin_lock lock (m_all_shader_groups_mutex);
2126         m_all_shader_groups.push_back (group);
2127         ++m_groups_to_compile_count;
2128         m_curgroup = group;
2129     }
2130     return group;
2131 }
2132 
2133 
2134 
2135 bool
ShaderGroupEnd(void)2136 ShadingSystemImpl::ShaderGroupEnd (void)
2137 {
2138     if (! m_curgroup) {
2139         error ("ShaderGroupEnd() was called without ShaderGroupBegin()");
2140         return false;
2141     }
2142     bool ok = ShaderGroupEnd (*m_curgroup);
2143     m_curgroup.reset();  // no currently active group
2144     return ok;
2145 }
2146 
2147 
2148 
2149 bool
ShaderGroupEnd(ShaderGroup & group)2150 ShadingSystemImpl::ShaderGroupEnd (ShaderGroup& group)
2151 {
2152     // Lock just in case we do something not thread-safe within
2153     // ShaderGroupEnd. This may be overly cautious, but unless it shows
2154     // up as a major bottleneck, I'm inclined to play it safe.
2155     lock_guard lock (m_mutex);
2156 
2157     // Mark the layers that can be run lazily
2158     if (! group.m_group_use.empty()) {
2159         int nlayers = group.nlayers ();
2160         for (int layer = 0;  layer < nlayers;  ++layer) {
2161             ShaderInstance *inst = group[layer];
2162             if (! inst)
2163                 continue;
2164             inst->last_layer (layer == nlayers-1);
2165         }
2166 
2167         // Merge instances now if they really want it bad, otherwise wait
2168         // until we optimize the group.
2169         if (m_opt_merge_instances >= 2)
2170             merge_instances (group);
2171     }
2172 
2173     // Merge the raytype_queries of all the individual layers
2174     group.m_raytype_queries = 0;
2175     for (int layer = 0, n = group.nlayers(); layer < n; ++layer) {
2176         if (ShaderInstance *inst = group[layer])
2177             group.m_raytype_queries |= inst->master()->raytype_queries();
2178     }
2179     // std::cout << "Group " << group.name() << " ray query bits "
2180     //         << group.m_raytype_queries << "\n";
2181 
2182     ustring groupname = group.name();
2183     if (groupname.size() && groupname == m_archive_groupname) {
2184         std::string filename = m_archive_filename.string();
2185         if (! filename.size())
2186             filename = OIIO::Filesystem::filename (groupname.string()) + ".tar.gz";
2187         archive_shadergroup (group, filename);
2188     }
2189 
2190     group.m_complete = true;
2191     return true;
2192 }
2193 
2194 
2195 
2196 bool
Shader(string_view shaderusage,string_view shadername,string_view layername)2197 ShadingSystemImpl::Shader (string_view shaderusage,
2198                            string_view shadername,
2199                            string_view layername)
2200 {
2201     // Make sure we have a current attrib state
2202     bool singleton = (! m_curgroup);
2203     if (singleton)
2204         ShaderGroupBegin ("");
2205 
2206     return Shader (*m_curgroup, shaderusage, shadername, layername);
2207 }
2208 
2209 
2210 
2211 bool
Shader(ShaderGroup & group,string_view shaderusage,string_view shadername,string_view layername)2212 ShadingSystemImpl::Shader (ShaderGroup& group, string_view shaderusage,
2213                            string_view shadername, string_view layername)
2214 {
2215     ShaderMaster::ref master = loadshader (shadername);
2216     if (! master) {
2217         errorf("Could not find shader \"%s\"\n"
2218                "        group: %s",
2219                shadername, group.name());
2220         return false;
2221     }
2222 
2223     if (shaderusage.empty()) {
2224         errorf("Shader usage required\n"
2225                "        group: %s",
2226                shadername, group.name());
2227         return false;
2228     }
2229 
2230     // If a layer name was not supplied, make one up.
2231     std::string local_layername;
2232     if (layername.empty()) {
2233         local_layername = OIIO::Strutil::sprintf ("%s_%d", master->shadername(),
2234                                                  group.nlayers());
2235         layername = string_view (local_layername);
2236     }
2237 
2238     ShaderInstanceRef instance (new ShaderInstance (master, layername));
2239     instance->parameters (group.m_pending_params);
2240     group.m_pending_params.clear ();
2241     group.m_pending_params.shrink_to_fit ();
2242 
2243     if (group.m_group_use.empty()) {
2244         // First in a group
2245         group.clear ();
2246         m_stat_groups += 1;
2247         group.m_group_use = shaderusage;
2248     } else if (shaderusage != group.m_group_use) {
2249         errorf("Shader usage \"%s\" does not match current group (%s)\n"
2250                "        group: %s",
2251                shaderusage, group.m_group_use, group.name());
2252         return false;
2253     }
2254 
2255     group.append (instance);
2256     m_stat_groupinstances += 1;
2257 
2258     // FIXME -- check for duplicate layer name within the group?
2259 
2260     return true;
2261 }
2262 
2263 
2264 
2265 bool
ConnectShaders(string_view srclayer,string_view srcparam,string_view dstlayer,string_view dstparam)2266 ShadingSystemImpl::ConnectShaders (string_view srclayer, string_view srcparam,
2267                                    string_view dstlayer, string_view dstparam)
2268 {
2269     if (! m_curgroup) {
2270         error ("ConnectShaders can only be called within ShaderGroupBegin/End");
2271         return false;
2272     }
2273     return ConnectShaders (*m_curgroup, srclayer, srcparam, dstlayer, dstparam);
2274 }
2275 
2276 
2277 
2278 bool
ConnectShaders(ShaderGroup & group,string_view srclayer,string_view srcparam,string_view dstlayer,string_view dstparam)2279 ShadingSystemImpl::ConnectShaders (ShaderGroup& group,
2280                                    string_view srclayer, string_view srcparam,
2281                                    string_view dstlayer, string_view dstparam)
2282 {
2283     // Basic sanity checks
2284     // ConnectShaders, and that the layer and parameter names are not empty.
2285     if (! srclayer.size() || ! srcparam.size()) {
2286         errorf("ConnectShaders: badly formed source layer/parameter\n"
2287                "        group: %s", group.name());
2288         return false;
2289     }
2290     if (! dstlayer.size() || ! dstparam.size()) {
2291         errorf("ConnectShaders: badly formed destination layer/parameter\n"
2292                "        group: %s", group.name());
2293         return false;
2294     }
2295 
2296     // Decode the layers, finding the indices within our group and
2297     // pointers to the instances.  Error and return if they are not found,
2298     // or if it's not connecting an earlier src to a later dst.
2299     ShaderInstance *srcinst, *dstinst;
2300     int srcinstindex = find_named_layer_in_group (group, ustring(srclayer), srcinst);
2301     int dstinstindex = find_named_layer_in_group (group, ustring(dstlayer), dstinst);
2302     if (! srcinst) {
2303         errorf("ConnectShaders: source layer \"%s\" not found\n"
2304                "        group: %s", srclayer, group.name());
2305         return false;
2306     }
2307     if (! dstinst) {
2308         errorf("ConnectShaders: destination layer \"%s\" not found\n"
2309                "        group: %s", dstlayer, group.name());
2310         return false;
2311     }
2312     if (dstinstindex <= srcinstindex) {
2313         errorf("ConnectShaders: destination layer must follow source layer (tried to connect %s.%s -> %s.%s)\n"
2314                "        group: %s", srclayer, srcparam, dstlayer, dstparam,
2315                group.name());
2316         return false;
2317     }
2318 
2319     // Decode the parameter names, find their symbols in their
2320     // respective layers, and also decode request to attach specific
2321     // array elements or color/vector channels.
2322     ConnectedParam srccon = decode_connected_param(srcparam, srclayer, srcinst);
2323     ConnectedParam dstcon = decode_connected_param(dstparam, dstlayer, dstinst);
2324     if (! (srccon.valid() && dstcon.valid())) {
2325         if (connection_error())
2326             errorf("ConnectShaders: cannot connect a %s (%s) to a %s (%s), invalid connection\n"
2327                    "        group: %s",
2328                    srccon.type, srcparam, dstcon.type, dstparam, group.name());
2329         else
2330             warningf("ConnectShaders: cannot connect a %s (%s) to a %s (%s), invalid connection\n"
2331                      "        group: %s",
2332                      srccon.type, srcparam, dstcon.type, dstparam, group.name());
2333         return false;
2334     }
2335 
2336     if (srccon.type.is_structure() && dstcon.type.is_structure() &&
2337             equivalent (srccon.type, dstcon.type)) {
2338         // If the connection is whole struct-to-struct (and they are
2339         // structs with equivalent data layout), implement it underneath
2340         // as connections between their respective fields.
2341         StructSpec *srcstruct = srccon.type.structspec();
2342         StructSpec *dststruct = dstcon.type.structspec();
2343         for (size_t i = 0;  i < (size_t)srcstruct->numfields();  ++i) {
2344             std::string s = Strutil::sprintf("%s.%s", srcparam, srcstruct->field(i).name);
2345             std::string d = Strutil::sprintf("%s.%s", dstparam, dststruct->field(i).name);
2346             ConnectShaders (group, srclayer, s, dstlayer, d);
2347         }
2348         return true;
2349     }
2350 
2351     if (! assignable (dstcon.type, srccon.type)) {
2352         if (connection_error())
2353             errorf("ConnectShaders: cannot connect a %s (%s) to a %s (%s)\n"
2354                    "        group: %s",
2355                    srccon.type, srcparam, dstcon.type, dstparam, group.name());
2356         else
2357             warningf("ConnectShaders: cannot connect a %s (%s) to a %s (%s)\n"
2358                      "        group: %s",
2359                      srccon.type, srcparam, dstcon.type, dstparam, group.name());
2360         return false;
2361     }
2362 
2363     const Symbol *dstsym = dstinst->mastersymbol(dstcon.param);
2364     if (dstsym && !dstsym->allowconnect()) {
2365         std::string name = dstlayer.size() ? Strutil::sprintf("%s.%s", dstlayer, dstparam)
2366                                            : std::string(dstparam);
2367         errorf("ConnectShaders: cannot connect to %s because it has metadata allowconnect=0\n"
2368                "        group: %s", name, group.name());
2369         return false;
2370     }
2371 
2372     dstinst->add_connection (srcinstindex, srccon, dstcon);
2373     dstinst->instoverride(dstcon.param)->valuesource (Symbol::ConnectedVal);
2374     srcinst->instoverride(srccon.param)->connected_down (true);
2375     srcinst->outgoing_connections (true);
2376 
2377     // if (debug())
2378     //     message ("ConnectShaders %s %s -> %s %s\n",
2379     //              srclayer, srcparam, dstlayer, dstparam);
2380 
2381     return true;
2382 }
2383 
2384 
2385 
2386 ShaderGroupRef
ShaderGroupBegin(string_view groupname,string_view usage,string_view groupspec)2387 ShadingSystemImpl::ShaderGroupBegin (string_view groupname,
2388                                      string_view usage,
2389                                      string_view groupspec)
2390 {
2391     ShaderGroupRef g = ShaderGroupBegin (groupname);
2392     bool err = false;
2393     std::string errdesc;
2394     string_view errstatement;
2395     std::vector<int> intvals;
2396     std::vector<float> floatvals;
2397     std::vector<ustring> stringvals;
2398     string_view p = groupspec;   // parse view
2399     // std::cout << "!!!!!\n---\n" << groupspec << "\n---\n\n";
2400     while (p.size()) {
2401         string_view pstart = p;  // save where we were for error reporting
2402         Strutil::skip_whitespace (p);
2403         if (! p.size())
2404             break;
2405         while (Strutil::parse_char (p, ';'))  // skip blank statements
2406             ;
2407         string_view keyword = Strutil::parse_word (p);
2408 
2409         if (keyword == "shader") {
2410             string_view shadername = Strutil::parse_identifier (p);
2411             Strutil::skip_whitespace (p);
2412             string_view layername = Strutil::parse_until (p, " \t\r\n,;");
2413             bool ok = Shader (*g, usage, shadername, layername);
2414             if (!ok) {
2415                 errstatement = pstart;
2416                 err = true;
2417                 break;
2418             }
2419             Strutil::parse_char (p, ';') || Strutil::parse_char (p, ',');
2420             Strutil::skip_whitespace (p);
2421             continue;
2422         }
2423 
2424         if (keyword == "connect") {
2425             Strutil::skip_whitespace (p);
2426             string_view lay1 = Strutil::parse_until (p, " \t\r\n.");
2427             Strutil::parse_char (p, '.');
2428             string_view param1 = Strutil::parse_until (p, " \t\r\n,;");
2429             Strutil::skip_whitespace (p);
2430             string_view lay2 = Strutil::parse_until (p, " \t\r\n.");
2431             Strutil::parse_char (p, '.');
2432             string_view param2 = Strutil::parse_until (p, " \t\r\n,;");
2433             bool ok = ConnectShaders (*g, lay1, param1, lay2, param2);
2434             if (!ok) {
2435                 errstatement = pstart;
2436                 err = true;
2437                 break;
2438             }
2439             Strutil::parse_char (p, ';') || Strutil::parse_char (p, ',');
2440             Strutil::skip_whitespace (p);
2441             continue;
2442         }
2443 
2444         // Remaining case -- it should be declaring a parameter.
2445         string_view typestring;
2446         if (keyword == "param") {
2447             typestring = Strutil::parse_word (p);
2448         } else if (TypeDesc(keyword.str().c_str()) != TypeDesc::UNKNOWN) {
2449             // compatibility: let the 'param' keyword be optional, if it's
2450             // obvious that it's a type name.
2451             typestring = keyword;
2452         } else {
2453             err = true;
2454             errdesc = Strutil::sprintf ("Unknown statement (expected 'param', "
2455                                        "'shader', or 'connect'): \"%s\"",
2456                                        keyword);
2457             break;
2458         }
2459         TypeDesc type;
2460         if (typestring == "int")
2461             type = TypeDesc::TypeInt;
2462         else if (typestring == "float")
2463             type = TypeDesc::TypeFloat;
2464         else if (typestring == "color")
2465             type = TypeDesc::TypeColor;
2466         else if (typestring == "point")
2467             type = TypeDesc::TypePoint;
2468         else if (typestring == "vector")
2469             type = TypeDesc::TypeVector;
2470         else if (typestring == "normal")
2471             type = TypeDesc::TypeNormal;
2472         else if (typestring == "matrix")
2473             type = TypeDesc::TypeMatrix;
2474         else if (typestring == "string")
2475             type = TypeDesc::TypeString;
2476         else {
2477             err = true;
2478             errdesc = Strutil::sprintf ("Unknown type: %s", typestring);
2479             break;  // error
2480         }
2481         if (Strutil::parse_char (p, '[')) {
2482             int arraylen = -1;
2483             Strutil::parse_int (p, arraylen);
2484             Strutil::parse_char (p, ']');
2485             type.arraylen = arraylen;
2486         }
2487         std::string paramname_string;
2488         while (1) {
2489             paramname_string += Strutil::parse_identifier (p);
2490             Strutil::skip_whitespace (p);
2491             if (Strutil::parse_char (p, '.')) {
2492                 paramname_string += ".";
2493             } else {
2494                 break;
2495             }
2496         }
2497         string_view paramname (paramname_string);
2498         int lockgeom = m_lockgeom_default;
2499         // For speed, reserve space. Note that for "unsized" arrays, we only
2500         // preallocate 1 slot and let it grow as needed. That's ok. For
2501         // everything else, we will reserve the right amount up front.
2502         int vals_to_preallocate = type.is_unsized_array()
2503                                 ? 1 : type.numelements() * type.aggregate;
2504         // Stop parsing values when we hit the limit based on the
2505         // declaration.
2506         int max_vals = type.is_unsized_array() ? 1<<28 : vals_to_preallocate;
2507         if (type.basetype == TypeDesc::INT) {
2508             intvals.clear ();
2509             intvals.reserve (vals_to_preallocate);
2510             int i;
2511             for (i = 0; i < max_vals; ++i) {
2512                 int val = 0;
2513                 if (Strutil::parse_int (p, val))
2514                     intvals.push_back (val);
2515                 else
2516                     break;
2517             }
2518             if (type.is_unsized_array()) {
2519                 // For unsized arrays, now set the size based on how many
2520                 // values we actually read.
2521                 type.arraylen = std::max (1, i/type.aggregate);
2522             }
2523             // Zero-pad if we parsed fewer values than we needed
2524             intvals.resize (type.numelements()*type.aggregate, 0);
2525             OSL_DASSERT (int(type.numelements())*type.aggregate == int(intvals.size()));
2526         } else if (type.basetype == TypeDesc::FLOAT) {
2527             floatvals.clear ();
2528             floatvals.reserve (vals_to_preallocate);
2529             int i;
2530             for (i = 0; i < max_vals; ++i) {
2531                 float val = 0;
2532                 if (Strutil::parse_float (p, val))
2533                     floatvals.push_back (val);
2534                 else
2535                     break;
2536             }
2537             if (type.is_unsized_array()) {
2538                 // For unsized arrays, now set the size based on how many
2539                 // values we actually read.
2540                 type.arraylen = std::max (1, i/type.aggregate);
2541             }
2542             // Zero-pad if we parsed fewer values than we needed
2543             floatvals.resize (type.numelements()*type.aggregate, 0);
2544             OSL_DASSERT (int(type.numelements())*type.aggregate == int(floatvals.size()));
2545         } else if (type.basetype == TypeDesc::STRING) {
2546             stringvals.clear ();
2547             stringvals.reserve (vals_to_preallocate);
2548             int i;
2549             for (i = 0; i < max_vals; ++i) {
2550                 std::string unescaped;
2551                 string_view s;
2552                 Strutil::skip_whitespace (p);
2553                 if (p.size() && p[0] == '\"') {
2554                     if (! Strutil::parse_string (p, s))
2555                         break;
2556                     unescaped = Strutil::unescape_chars (s);
2557                     s = unescaped;
2558                 }
2559                 else {
2560                     s = Strutil::parse_until (p, " \t\r\n;");
2561                     if (s.size() == 0)
2562                         break;
2563                 }
2564                 stringvals.emplace_back(s);
2565             }
2566             if (type.is_unsized_array()) {
2567                 // For unsized arrays, now set the size based on how many
2568                 // values we actually read.
2569                 type.arraylen = std::max (1, i/type.aggregate);
2570             }
2571             // Zero-pad if we parsed fewer values than we needed
2572             stringvals.resize (type.numelements()*type.aggregate, ustring());
2573             OSL_DASSERT (int(type.numelements())*type.aggregate == int(stringvals.size()));
2574         }
2575 
2576         if (Strutil::parse_prefix (p, "[[")) {  // hints
2577             do {
2578                 Strutil::skip_whitespace (p);
2579                 string_view hint_typename = Strutil::parse_word (p);
2580                 string_view hint_name = Strutil::parse_identifier (p);
2581                 TypeDesc hint_type (hint_typename.str().c_str());
2582                 if (! hint_name.size() || hint_type == TypeDesc::UNKNOWN) {
2583                     err = true;
2584                     errdesc = "malformed hint";
2585                     break;
2586                 }
2587                 if (! Strutil::parse_char (p, '=')) {
2588                     err = true;
2589                     errdesc = "hint expected value";
2590                     break;
2591                 }
2592                 if (hint_name == "lockgeom" && hint_type == TypeDesc::INT) {
2593                     if (! Strutil::parse_int (p, lockgeom)) {
2594                         err = true;
2595                         errdesc = Strutil::sprintf ("hint %s expected int value", hint_name);
2596                         break;
2597                     }
2598                 } else {
2599                     err = true;
2600                     errdesc = Strutil::sprintf ("unknown hint '%s %s'",
2601                                                hint_type, hint_name);
2602                     break;
2603                 }
2604             } while (Strutil::parse_char (p, ','));
2605             if (err)
2606                 break;
2607             if (! Strutil::parse_prefix (p, "]]")) {
2608                 err = true;
2609                 errdesc = "malformed hint";
2610                 break;
2611             }
2612         }
2613 
2614         bool ok = true;
2615         if (type.basetype == TypeDesc::INT) {
2616             ok = Parameter (*g, paramname, type, &intvals[0], lockgeom);
2617         } else if (type.basetype == TypeDesc::FLOAT) {
2618             ok = Parameter (*g, paramname, type, &floatvals[0], lockgeom);
2619         } else if (type.basetype == TypeDesc::STRING) {
2620             ok = Parameter (*g, paramname, type, &stringvals[0], lockgeom);
2621         }
2622         if (!ok) {
2623             errstatement = pstart;
2624             err = true;
2625             break;
2626         }
2627 
2628         Strutil::skip_whitespace (p);
2629         if (! p.size())
2630             break;
2631 
2632         if (Strutil::parse_char (p, ';') || Strutil::parse_char (p, ','))
2633             continue;  // next command
2634 
2635         Strutil::parse_until_char (p, ';');
2636         if (! Strutil::parse_char (p, ';')) {
2637             err = true;
2638             errdesc = "semicolon expected";
2639         }
2640     }
2641 
2642     if (err) {
2643         std::string msg = Strutil::sprintf(
2644                 "ShaderGroupBegin: error parsing group description: %s\n"
2645                 "        group: %s",
2646                 errdesc, g->name());
2647         if (errstatement.empty()) {
2648             size_t offset = p.data() - groupspec.data();
2649             size_t begin_stmt = std::min (groupspec.find_last_of (';', offset),
2650                                           groupspec.find_last_of (',', offset));
2651             size_t end_stmt = groupspec.find_first_of (';', begin_stmt+1);
2652             errstatement = groupspec.substr (begin_stmt+1, end_stmt-begin_stmt);
2653         }
2654         if (errstatement.size())
2655             msg += Strutil::sprintf("\n        problem might be here: %s",
2656                                     errstatement);
2657         errorf("%s", msg);
2658         if (debug())
2659             infof("Broken group was:\n---%s\n---\n", groupspec);
2660         return ShaderGroupRef();
2661     }
2662 
2663     return g;
2664 }
2665 
2666 
2667 
2668 bool
ReParameter(ShaderGroup & group,string_view layername_,string_view paramname,TypeDesc type,const void * val)2669 ShadingSystemImpl::ReParameter (ShaderGroup &group, string_view layername_,
2670                                 string_view paramname,
2671                                 TypeDesc type, const void *val)
2672 {
2673     // Find the named layer
2674     ustring layername (layername_);
2675     ShaderInstance *layer = NULL;
2676     for (int i = 0, e = group.nlayers();  i < e;  ++i) {
2677         if (group[i]->layername() == layername) {
2678             layer = group[i];
2679             break;
2680         }
2681     }
2682     if (! layer)
2683         return false;   // could not find the named layer
2684 
2685     // Find the named parameter within the layer
2686     int paramindex = layer->findparam (ustring(paramname));
2687     if (paramindex < 0)
2688         return false;   // could not find the named parameter
2689 
2690     Symbol *sym = layer->symbol (paramindex);
2691     if (!sym) {
2692         // Can have a paramindex >= 0, but no symbol when it's a master-symbol
2693         OSL_DASSERT(layer->mastersymbol(paramindex) && "No symbol for paramindex");
2694         return false;
2695     }
2696 
2697     // Check for mismatch versus previously-declared type
2698     if (!equivalent(sym->typespec(), type))
2699         return false;
2700 
2701     // Can't change param value if the group has already been optimized,
2702     // unless that parameter is marked lockgeom=0.
2703     if (group.optimized() && sym->lockgeom())
2704         return false;
2705 
2706     // Do the deed
2707     memcpy (sym->data(), val, type.size());
2708     return true;
2709 }
2710 
2711 
2712 
2713 PerThreadInfo *
create_thread_info()2714 ShadingSystemImpl::create_thread_info()
2715 {
2716     return new PerThreadInfo;
2717 }
2718 
2719 
2720 
2721 void
destroy_thread_info(PerThreadInfo * threadinfo)2722 ShadingSystemImpl::destroy_thread_info (PerThreadInfo *threadinfo)
2723 {
2724     delete threadinfo;
2725 }
2726 
2727 
2728 
2729 ShadingContext *
get_context(PerThreadInfo * threadinfo,TextureSystem::Perthread * texture_threadinfo)2730 ShadingSystemImpl::get_context (PerThreadInfo *threadinfo,
2731                                 TextureSystem::Perthread *texture_threadinfo)
2732 {
2733     if (! threadinfo) {
2734 #if OSL_VERSION < 20200
2735         threadinfo = get_perthread_info ();
2736         warning ("ShadingSystem::get_context called without a PerThreadInfo");
2737 #else
2738         error ("ShadingSystem::get_context called without a PerThreadInfo");
2739         return nullptr;
2740 #endif
2741     }
2742     ShadingContext *ctx = threadinfo->context_pool.empty()
2743                           ? new ShadingContext (*this, threadinfo)
2744                           : threadinfo->pop_context ();
2745     ctx->texture_thread_info (texture_threadinfo);
2746     return ctx;
2747 }
2748 
2749 
2750 
2751 void
release_context(ShadingContext * ctx)2752 ShadingSystemImpl::release_context (ShadingContext *ctx)
2753 {
2754     if (! ctx)
2755         return;
2756     ctx->process_errors ();
2757     ctx->thread_info()->context_pool.push (ctx);
2758 }
2759 
2760 
2761 
2762 bool
execute(ShadingContext & ctx,ShaderGroup & group,ShaderGlobals & ssg,bool run)2763 ShadingSystemImpl::execute (ShadingContext &ctx, ShaderGroup &group,
2764                             ShaderGlobals &ssg, bool run)
2765 {
2766     return ctx.execute (group, ssg, run);
2767 }
2768 
2769 
2770 
2771 // Deprecated
2772 bool
execute(ShadingContext * ctx,ShaderGroup & group,ShaderGlobals & ssg,bool run)2773 ShadingSystemImpl::execute (ShadingContext *ctx, ShaderGroup &group,
2774                             ShaderGlobals &ssg, bool run)
2775 {
2776     bool free_context = false;
2777     OSL::PerThreadInfo *thread_info = nullptr;
2778     if (! ctx) {
2779         thread_info = create_thread_info();
2780         ctx = get_context(thread_info);
2781         free_context = true;
2782     }
2783     bool result = ctx->execute (group, ssg, run);
2784     if (free_context) {
2785         release_context(ctx);
2786         destroy_thread_info(thread_info);
2787     }
2788     return result;
2789 }
2790 
2791 
2792 
2793 const void *
get_symbol(ShadingContext & ctx,ustring layername,ustring symbolname,TypeDesc & type)2794 ShadingSystemImpl::get_symbol (ShadingContext &ctx, ustring layername,
2795                                ustring symbolname, TypeDesc &type)
2796 {
2797     const Symbol *sym = ctx.symbol (layername, symbolname);
2798     if (sym) {
2799         type = sym->typespec().simpletype();
2800         return ctx.symbol_data (*sym);
2801     } else {
2802         return NULL;
2803     }
2804 }
2805 
2806 
2807 
2808 int
find_named_layer_in_group(ShaderGroup & group,ustring layername,ShaderInstance * & inst)2809 ShadingSystemImpl::find_named_layer_in_group (ShaderGroup& group,
2810                                               ustring layername,
2811                                               ShaderInstance * &inst)
2812 {
2813     inst = NULL;
2814     if (group.m_group_use.empty())
2815         return -1;
2816     for (int i = 0;  i < group.nlayers();  ++i) {
2817         if (group[i]->layername() == layername) {
2818             inst = group[i];
2819             return i;
2820         }
2821     }
2822     return -1;
2823 }
2824 
2825 
2826 
2827 ConnectedParam
decode_connected_param(string_view connectionname,string_view layername,ShaderInstance * inst)2828 ShadingSystemImpl::decode_connected_param (string_view connectionname,
2829                                 string_view layername, ShaderInstance *inst)
2830 {
2831     ConnectedParam c;  // initializes to "invalid"
2832 
2833     // Look for a bracket in the "parameter name"
2834     size_t bracketpos = connectionname.find ('[');
2835     // Grab just the part of the param name up to the bracket
2836     ustring param (connectionname, 0, bracketpos);
2837     string_view cname_remaining = connectionname.substr (bracketpos);
2838 
2839     // Search for the param with that name, fail if not found
2840     c.param = inst->findsymbol (param);
2841     if (c.param < 0) {
2842         if (connection_error())
2843             errorf("ConnectShaders: \"%s\" is not a parameter or global of layer \"%s\" (shader \"%s\")",
2844                    param, layername, inst->shadername());
2845         else
2846             warningf("ConnectShaders: \"%s\" is not a parameter or global of layer \"%s\" (shader \"%s\")",
2847                      param, layername, inst->shadername());
2848         return c;
2849     }
2850 
2851     const Symbol *sym = inst->mastersymbol (c.param);
2852     OSL_ASSERT (sym);
2853 
2854     // Only params, output params, and globals are legal for connections
2855     if (! (sym->symtype() == SymTypeParam ||
2856            sym->symtype() == SymTypeOutputParam ||
2857            sym->symtype() == SymTypeGlobal)) {
2858         errorf("ConnectShaders: \"%s\" is not a parameter or global of layer \"%s\" (shader \"%s\")",
2859                param, layername, inst->shadername());
2860         c.param = -1;  // mark as invalid
2861         return c;
2862     }
2863 
2864     c.type = sym->typespec();
2865 
2866     if (! cname_remaining.empty() && c.type.is_array()) {
2867         // There was at least one set of brackets that appears to be
2868         // selecting an array element.
2869         int index = 0;
2870         if (! (Strutil::parse_char (cname_remaining, '[') &&
2871                Strutil::parse_int  (cname_remaining, index) &&
2872                Strutil::parse_char (cname_remaining, ']'))) {
2873             errorf("ConnectShaders: malformed parameter \"%s\"", connectionname);
2874             c.param = -1;  // mark as invalid
2875             return c;
2876         }
2877         c.arrayindex = index;
2878         if (c.arrayindex >= c.type.arraylength()) {
2879             errorf("ConnectShaders: cannot request array element %s from a %s",
2880                    connectionname, c.type);
2881             c.arrayindex = c.type.arraylength() - 1;  // clamp it
2882         }
2883         c.type.make_array (0);              // chop to the element type
2884         Strutil::skip_whitespace (cname_remaining); // skip to next bracket
2885     }
2886 
2887     if (! cname_remaining.empty() && cname_remaining.front() == '[' &&
2888           ! c.type.is_closure() && c.type.aggregate() != TypeDesc::SCALAR) {
2889         // There was at least one set of brackets that appears to be
2890         // selecting a color/vector component.
2891         int index = 0;
2892         if (! (Strutil::parse_char (cname_remaining, '[') &&
2893                Strutil::parse_int  (cname_remaining, index) &&
2894                Strutil::parse_char (cname_remaining, ']'))) {
2895             errorf("ConnectShaders: malformed parameter \"%s\"", connectionname);
2896             c.param = -1;  // mark as invalid
2897             return c;
2898         }
2899         c.channel = index;
2900         if (c.channel >= (int)c.type.aggregate()) {
2901             errorf("ConnectShaders: cannot request component %s from a %s",
2902                    connectionname, c.type);
2903             c.channel = (int)c.type.aggregate() - 1;  // clamp it
2904         }
2905         // chop to just the scalar part
2906         c.type = TypeSpec ((TypeDesc::BASETYPE)c.type.simpletype().basetype);
2907         Strutil::skip_whitespace (cname_remaining);
2908     }
2909 
2910     // Deal with left over nonsense or unsupported param designations
2911     if (! cname_remaining.empty()) {
2912         // Still a leftover bracket, no idea what to do about that
2913         errorf("ConnectShaders: don't know how to connect '%s' when \"%s\" is a \"%s\"",
2914                connectionname, param, c.type);
2915         c.param = -1;  // mark as invalid
2916     }
2917     return c;
2918 }
2919 
2920 
2921 
2922 int
raytype_bit(ustring name)2923 ShadingSystemImpl::raytype_bit (ustring name)
2924 {
2925     for (size_t i = 0, e = m_raytypes.size();  i < e;  ++i)
2926         if (name == m_raytypes[i])
2927             return (1 << i);
2928     return 0;  // not found
2929 }
2930 
2931 
2932 
2933 bool
is_renderer_output(ustring layername,ustring paramname,ShaderGroup * group) const2934 ShadingSystemImpl::is_renderer_output (ustring layername, ustring paramname,
2935                                        ShaderGroup *group) const
2936 {
2937     if (group) {
2938         const std::vector<ustring> &aovs (group->m_renderer_outputs);
2939         if (aovs.size() > 0) {
2940             if (std::find(aovs.begin(), aovs.end(), paramname) != aovs.end())
2941                 return true;
2942             // Try "layer.name"
2943             ustring name2 = ustring::sprintf("%s.%s", layername, paramname);
2944             if (std::find(aovs.begin(), aovs.end(), name2) != aovs.end())
2945                 return true;
2946         }
2947     }
2948     const std::vector<ustring> &aovs (m_renderer_outputs);
2949     if (aovs.size() > 0) {
2950         if (std::find(aovs.begin(), aovs.end(), paramname) != aovs.end())
2951             return true;
2952         ustring name2 = ustring::sprintf("%s.%s", layername, paramname);
2953         if (std::find(aovs.begin(), aovs.end(), name2) != aovs.end())
2954             return true;
2955     }
2956     return false;
2957 }
2958 
2959 
2960 
2961 void
group_post_jit_cleanup(ShaderGroup & group)2962 ShadingSystemImpl::group_post_jit_cleanup (ShaderGroup &group)
2963 {
2964     // Once we're generated the IR, we really don't need the ops and args,
2965     // and we only need the syms that include the params.
2966     off_t symmem = 0;
2967     size_t connectionmem = 0;
2968     for (int layer = 0;  layer < group.nlayers();  ++layer) {
2969         ShaderInstance *inst = group[layer];
2970         // We no longer needs ops and args -- create empty vectors and
2971         // swap with the ones in the instance.
2972         OpcodeVec emptyops;
2973         inst->ops().swap (emptyops);
2974         std::vector<int> emptyargs;
2975         inst->args().swap (emptyargs);
2976         if (inst->unused()) {
2977             // If we'll never use the layer, we don't need the syms at all
2978             SymbolVec nosyms;
2979             std::swap (inst->symbols(), nosyms);
2980             symmem += vectorbytes(nosyms);
2981             // also don't need the connection info any more
2982             connectionmem += (off_t) inst->clear_connections ();
2983         }
2984     }
2985     {
2986         // adjust memory stats
2987         spin_lock lock (m_stat_mutex);
2988         m_stat_mem_inst_syms -= symmem;
2989         m_stat_mem_inst_connections -= connectionmem;
2990         m_stat_mem_inst -= symmem + connectionmem;
2991         m_stat_memory -= symmem + connectionmem;
2992     }
2993 }
2994 
2995 
2996 
2997 void
optimize_group(ShaderGroup & group,ShadingContext * ctx,bool do_jit)2998 ShadingSystemImpl::optimize_group (ShaderGroup &group, ShadingContext *ctx, bool do_jit)
2999 {
3000     if (group.optimized() && (!do_jit || group.jitted()))
3001         return;    // already optimized and optionally jitted
3002 
3003     OIIO::Timer timer;
3004     lock_guard lock (group.m_mutex);
3005     bool need_jit = do_jit && !group.jitted();
3006     if (group.optimized() && !need_jit) {
3007         // The group was somehow optimized by another thread between the
3008         // time we checked group.optimized() and now that we have the lock.
3009         // Nothing to do but record how long we waited for the lock.
3010         spin_lock stat_lock (m_stat_mutex);
3011         double t = timer();
3012         m_stat_optimization_time += t;
3013         m_stat_opt_locking_time += t;
3014         return;
3015     }
3016 
3017     if (!m_only_groupname.empty() && m_only_groupname != group.name()) {
3018         // For debugging purposes, we are requested to compile only one
3019         // shader group, and this is not it.  Mark it as does_nothing,
3020         // and also as optimized so nobody locks on it again, and record
3021         // how long we waited for the lock.
3022         group.does_nothing (true);
3023         group.m_optimized = true;
3024         group.m_jitted = true;
3025         spin_lock stat_lock (m_stat_mutex);
3026         double t = timer();
3027         m_stat_optimization_time += t;
3028         m_stat_opt_locking_time += t;
3029         return;
3030     }
3031 
3032     double locking_time = timer();
3033 
3034     bool ctx_allocated = false;
3035     PerThreadInfo *thread_info = nullptr;
3036     if (! ctx) {
3037         thread_info = create_thread_info();
3038         ctx = get_context(thread_info);
3039         ctx_allocated = true;
3040     }
3041     if (!group.optimized()) {
3042         RuntimeOptimizer rop (*this, group, ctx);
3043         rop.run ();
3044         rop.police_failed_optimizations();
3045 
3046         // Copy some info recorded by the RuntimeOptimizer into the group
3047         group.m_unknown_textures_needed = rop.m_unknown_textures_needed;
3048         for (auto&& f : rop.m_textures_needed)
3049             group.m_textures_needed.push_back (f);
3050         group.m_unknown_closures_needed = rop.m_unknown_closures_needed;
3051         for (auto&& f : rop.m_closures_needed)
3052             group.m_closures_needed.push_back (f);
3053         for (auto&& f : rop.m_globals_needed)
3054             group.m_globals_needed.push_back (f);
3055         group.m_globals_read = rop.m_globals_read;
3056         group.m_globals_write = rop.m_globals_write;
3057         size_t num_userdata = rop.m_userdata_needed.size();
3058         group.m_userdata_names.reserve (num_userdata);
3059         group.m_userdata_types.reserve (num_userdata);
3060         group.m_userdata_offsets.resize (num_userdata, 0);
3061         group.m_userdata_derivs.reserve (num_userdata);
3062         group.m_userdata_layers.reserve (num_userdata);
3063         group.m_userdata_init_vals.reserve (num_userdata);
3064         for (auto&& n : rop.m_userdata_needed) {
3065             group.m_userdata_names.push_back (n.name);
3066             group.m_userdata_types.push_back (n.type);
3067             group.m_userdata_derivs.push_back (n.derivs);
3068             group.m_userdata_layers.push_back (n.layer_num);
3069             group.m_userdata_init_vals.push_back (n.data);
3070         }
3071         group.m_unknown_attributes_needed = rop.m_unknown_attributes_needed;
3072         for (auto&& f : rop.m_attributes_needed) {
3073             group.m_attributes_needed.push_back (f.name);
3074             group.m_attribute_scopes.push_back (f.scope);
3075         }
3076         group.m_optimized = true;
3077 
3078         spin_lock stat_lock (m_stat_mutex);
3079         if (!need_jit) {
3080             m_stat_opt_locking_time += locking_time;
3081             m_stat_optimization_time += timer();
3082         }
3083         m_stat_opt_locking_time += rop.m_stat_opt_locking_time;
3084         m_stat_opt_locking_time += locking_time + rop.m_stat_opt_locking_time;
3085         m_stat_specialization_time += rop.m_stat_specialization_time;
3086     }
3087 
3088     if (need_jit) {
3089         BackendLLVM lljitter (*this, group, ctx);
3090         lljitter.run ();
3091 
3092         // NOTE: it is now possible to optimize and not JIT
3093         // which would leave the cleanup to happen
3094         // when the ShadingSystem is destroyed
3095         group_post_jit_cleanup (group);
3096 
3097         group.m_jitted = true;
3098         spin_lock stat_lock (m_stat_mutex);
3099         m_stat_opt_locking_time += locking_time;
3100         m_stat_optimization_time += timer();
3101         m_stat_total_llvm_time += lljitter.m_stat_total_llvm_time;
3102         m_stat_llvm_setup_time += lljitter.m_stat_llvm_setup_time;
3103         m_stat_llvm_irgen_time += lljitter.m_stat_llvm_irgen_time;
3104         m_stat_llvm_opt_time += lljitter.m_stat_llvm_opt_time;
3105         m_stat_llvm_jit_time += lljitter.m_stat_llvm_jit_time;
3106         m_stat_max_llvm_local_mem = std::max (m_stat_max_llvm_local_mem,
3107                                               lljitter.m_llvm_local_mem);
3108     }
3109 
3110     if (ctx_allocated) {
3111         release_context(ctx);
3112         destroy_thread_info(thread_info);
3113     }
3114 
3115     m_stat_groups_compiled += 1;
3116     m_stat_instances_compiled += group.nlayers();
3117     m_groups_to_compile_count -= 1;
3118 }
3119 
3120 
3121 
optimize_all_groups_wrapper(ShadingSystemImpl * ss,int mythread,int totalthreads,bool do_jit)3122 static void optimize_all_groups_wrapper (ShadingSystemImpl *ss, int mythread, int totalthreads, bool do_jit)
3123 {
3124     ss->optimize_all_groups (1, mythread, totalthreads, do_jit);
3125 }
3126 
3127 
3128 
3129 void
optimize_all_groups(int nthreads,int mythread,int totalthreads,bool do_jit)3130 ShadingSystemImpl::optimize_all_groups (int nthreads, int mythread, int totalthreads, bool do_jit)
3131 {
3132     // Spawn a bunch of threads to do this in parallel -- just call this
3133     // routine again (with threads=1) for each thread.
3134     if (nthreads < 1)  // threads <= 0 means use all hardware available
3135         nthreads = std::min ((int)std::thread::hardware_concurrency(),
3136                              (int)m_groups_to_compile_count);
3137     if (nthreads > 1) {
3138         if (m_threads_currently_compiling)
3139             return;   // never mind, somebody else spawned the JIT threads
3140         OIIO::thread_group threads;
3141         m_threads_currently_compiling += nthreads;
3142         for (int t = 0;  t < nthreads;  ++t)
3143             threads.add_thread (new std::thread (optimize_all_groups_wrapper, this, t, nthreads, do_jit));
3144         threads.join_all ();
3145         m_threads_currently_compiling -= nthreads;
3146         return;
3147     }
3148 
3149     // And here's the single thread case
3150     size_t ngroups = 0;
3151     {
3152         spin_lock lock (m_all_shader_groups_mutex);
3153         ngroups = m_all_shader_groups.size();
3154     }
3155     PerThreadInfo* threadinfo = create_thread_info();
3156     ShadingContext* ctx = get_context(threadinfo);
3157     for (size_t i = 0;  i < ngroups;  ++i) {
3158         // Assign to threads based on mod of totalthreads
3159         if ((i % totalthreads) == (unsigned)mythread) {
3160             ShaderGroupRef group;
3161             {
3162                 spin_lock lock (m_all_shader_groups_mutex);
3163                 group = m_all_shader_groups[i].lock();
3164             }
3165             if (group && group->m_complete)
3166                 optimize_group (*group, ctx, do_jit);
3167         }
3168     }
3169     release_context(ctx);
3170     destroy_thread_info(threadinfo);
3171 }
3172 
3173 
3174 
3175 int
merge_instances(ShaderGroup & group,bool post_opt)3176 ShadingSystemImpl::merge_instances (ShaderGroup &group, bool post_opt)
3177 {
3178     // Look through the shader group for pairs of nodes/layers that
3179     // actually do exactly the same thing, and eliminate one of the
3180     // redundant shaders, carefully rewiring all its outgoing
3181     // connections to later layers to refer to the one we keep.
3182     //
3183     // It turns out that in practice, it's not uncommon to have
3184     // duplicate nodes.  For example, some materials are "layered" --
3185     // like a character skin shader that has separate sub-networks for
3186     // skin, oil, wetness, and so on -- and those different sub-nets
3187     // often reference the same texture maps or noise functions by
3188     // repetition.  Yes, ideally, the redundancies would be eliminated
3189     // before they were fed to the renderer, but in practice that's hard
3190     // and for many scenes we get substantial savings of time (mostly
3191     // because of reduced texture calls) and instance memory by finding
3192     // these redundancies automatically.  The amount of savings is quite
3193     // scene dependent, as well as probably very dependent on the
3194     // general shading and lookdev approach of the studio.  But it was
3195     // very helpful for us in many cases.
3196     //
3197     // The basic loop below looks very inefficient, O(n^2) in number of
3198     // instances in the group. But it's really not -- a few seconds (sum
3199     // of all threads) for even our very complex scenes. This is because
3200     // most potential pairs have a very fast rejection case if they are
3201     // not using the same master.  Since there's no appreciable cost to
3202     // the brute force approach, it seems silly to have a complex scheme
3203     // to try to reduce the number of pairings.
3204 
3205     if (! m_opt_merge_instances || optimize() < 1)
3206         return 0;
3207 
3208     OIIO::Timer timer;          // Time we spend looking for and doing merges
3209     int merges = 0;             // number of merges we do
3210     size_t connectionmem = 0;   // Connection memory we free
3211     int nlayers = group.nlayers();
3212 
3213     // Need to quickly make sure userdata_params is up to date before any
3214     // mergeability tests.
3215     for (int layer = 0;  layer < nlayers;  ++layer)
3216         if (! group[layer]->unused())
3217             group[layer]->evaluate_writes_globals_and_userdata_params ();
3218 
3219     // Loop over all layers...
3220     for (int a = 0;  a < nlayers-1;  ++a) {
3221         if (group[a]->unused() || group[a]->entry_layer()) // Don't merge a layer that's not used
3222             continue;                                      // or if it's an entry layer
3223         // Check all later layers...
3224         for (int b = a+1;  b < nlayers;  ++b) {
3225             if (group[b]->unused())    // Don't merge a layer that's not used
3226                 continue;
3227             if (b == nlayers-1)   // Don't merge the last layer -- causes
3228                 continue;         // many tears because it's the group entry
3229 
3230             // Now we have two used layers, a and b, to examine.
3231             // See if they are mergeable (identical).  All the heavy
3232             // lifting is done by ShaderInstance::mergeable().
3233             if (! group[a]->mergeable (*group[b], group))
3234                 continue;
3235 
3236             // The two nodes a and b are mergeable, so merge them.
3237             ShaderInstance *A = group[a];
3238             ShaderInstance *B = group[b];
3239             ++merges;
3240 
3241             // We'll keep A, get rid of B.  For all layers later than B,
3242             // check its incoming connections and replace all references
3243             // to B with references to A.
3244             for (int j = b+1;  j < nlayers;  ++j) {
3245                 ShaderInstance *inst = group[j];
3246                 if (inst->unused())  // don't bother if it's unused
3247                     continue;
3248                 for (int c = 0, ce = inst->nconnections();  c < ce;  ++c) {
3249                     Connection &con = inst->connection(c);
3250                     if (con.srclayer == b) {
3251                         con.srclayer = a;
3252                         A->outgoing_connections (true);
3253                         if (A->symbols().size() && B->symbols().size()) {
3254                             OSL_DASSERT (A->symbol(con.src.param)->name() ==
3255                                          B->symbol(con.src.param)->name());
3256                         }
3257                     }
3258                 }
3259             }
3260 
3261             // Mark parameters of B as no longer connected
3262             for (int p = B->firstparam();  p < B->lastparam();  ++p) {
3263                 if (B->symbols().size())
3264                     B->symbol(p)->connected_down(false);
3265                 if (B->m_instoverrides.size())
3266                     B->instoverride(p)->connected_down(false);
3267             }
3268             // B won't be used, so mark it as having no outgoing
3269             // connections and clear its incoming connections (which are
3270             // no longer used).
3271             OSL_DASSERT (B->merged_unused() == false);
3272             B->outgoing_connections (false);
3273             connectionmem += B->clear_connections ();
3274             B->m_merged_unused = true;
3275             OSL_DASSERT (B->unused());
3276         }
3277     }
3278 
3279     {
3280         // Adjust stats
3281         spin_lock lock (m_stat_mutex);
3282         m_stat_mem_inst_connections -= connectionmem;
3283         m_stat_mem_inst -= connectionmem;
3284         m_stat_memory -= connectionmem;
3285         if (post_opt)
3286             m_stat_merged_inst_opt += merges;
3287         else
3288             m_stat_merged_inst += merges;
3289         m_stat_inst_merge_time += timer();
3290     }
3291 
3292     return merges;
3293 }
3294 
3295 
3296 
3297 #if OIIO_HAS_COLORPROCESSOR
3298 
3299 OIIO::ColorProcessorHandle
load_transform(StringParam fromspace,StringParam tospace)3300 OCIOColorSystem::load_transform (StringParam fromspace, StringParam tospace)
3301 {
3302     if (fromspace != m_last_colorproc_fromspace ||
3303         tospace != m_last_colorproc_tospace) {
3304         m_last_colorproc = m_colorconfig.createColorProcessor (fromspace, tospace);
3305         m_last_colorproc_fromspace = fromspace;
3306         m_last_colorproc_tospace = tospace;
3307     }
3308     return m_last_colorproc;
3309 }
3310 
3311 #endif
3312 
3313 
3314 
3315 template <> bool
ocio_transform(StringParam fromspace,StringParam tospace,const Color3 & C,Color3 & Cout)3316 ShadingSystemImpl::ocio_transform (StringParam fromspace, StringParam tospace,
3317                                    const Color3& C, Color3& Cout) {
3318 #if OIIO_HAS_COLORPROCESSOR
3319     OIIO::ColorProcessorHandle cp;
3320     {
3321         lock_guard lock (m_mutex);
3322         cp = m_ocio_system.load_transform(fromspace, tospace);
3323     }
3324     if (cp) {
3325         Cout = C;
3326         cp->apply ((float *)&Cout);
3327         return true;
3328     }
3329 #endif
3330     return false;
3331 }
3332 
3333 
3334 
3335 template <> bool
ocio_transform(StringParam fromspace,StringParam tospace,const Dual2<Color3> & C,Dual2<Color3> & Cout)3336 ShadingSystemImpl::ocio_transform (StringParam fromspace, StringParam tospace,
3337                                    const Dual2<Color3>& C, Dual2<Color3>& Cout) {
3338 #if OIIO_HAS_COLORPROCESSOR
3339     OIIO::ColorProcessorHandle cp;
3340     {
3341         lock_guard lock (m_mutex);
3342         cp = m_ocio_system.load_transform(fromspace, tospace);
3343     }
3344 
3345     if (cp) {
3346         // Use finite differencing to approximate the derivative. Make 3
3347         // color values to convert.
3348         const float eps = 0.001f;
3349         Color3 CC[3] = { C.val(), C.val() + eps*C.dx(), C.val() + eps*C.dy() };
3350         cp->apply ((float *)&CC, 3, 1, 3, sizeof(float), sizeof(Color3), 0);
3351         Cout.set (CC[0],
3352                   (CC[1] - CC[0]) * (1.0f / eps),
3353                   (CC[2] - CC[0]) * (1.0f / eps));
3354         return true;
3355     }
3356 #endif
3357     return false;
3358 }
3359 
3360 
3361 
3362 bool
archive_shadergroup(ShaderGroup & group,string_view filename)3363 ShadingSystemImpl::archive_shadergroup (ShaderGroup& group, string_view filename)
3364 {
3365     std::string filename_base = OIIO::Filesystem::filename(filename);
3366     std::string extension;
3367     for (std::string e = OIIO::Filesystem::extension(filename);
3368          e.size() && filename.size();
3369          e = OIIO::Filesystem::extension(filename)) {
3370         extension = e + extension;
3371         filename.remove_suffix (e.size());
3372     }
3373     if (extension.size() < 2 || extension[0] != '.') {
3374         errorf("archive_shadergroup: invalid filename \"%s\"", filename);
3375         return false;
3376     }
3377     filename_base.erase (filename_base.size() - extension.size());
3378 
3379     std::string pattern = OIIO::Filesystem::temp_directory_path() + "/OSL-%%%%-%%%%";
3380     if (! pattern.size()) {
3381         error ("archive_shadergroup: Could not find a temp directory");
3382         return false;
3383     }
3384     std::string tmpdir = OIIO::Filesystem::unique_path(pattern);
3385     if (! pattern.size()) {
3386         error ("archive_shadergroup: Could not find a temp filename");
3387         return false;
3388     }
3389     std::string errmessage;
3390     bool dir_ok = OIIO::Filesystem::create_directory (tmpdir, errmessage);
3391     if (! dir_ok) {
3392         errorf("archive_shadergroup: Could not create temp directory %s (%s)",
3393                tmpdir, errmessage);
3394         return false;
3395     }
3396 
3397     bool ok = true;
3398     std::string groupfilename = tmpdir + "/shadergroup";
3399     std::ofstream groupfile;
3400     OIIO::Filesystem::open(groupfile, groupfilename);
3401     if (groupfile.good()) {
3402         groupfile << group.serialize();
3403         groupfile.close ();
3404     } else {
3405         error ("archive_shadergroup: Could not open shadergroup file");
3406         ok = false;
3407     }
3408 
3409     std::string filename_list = "shadergroup";
3410     {
3411         std::lock_guard<ShaderGroup> lock (group);
3412         std::set<std::string> entries;   // to avoid duplicates
3413         for (int i = 0, nl = group.nlayers(); i < nl; ++i) {
3414             std::string osofile = group[i]->master()->osofilename();
3415             std::string osoname = OIIO::Filesystem::filename (osofile);
3416             if (entries.find(osoname) == entries.end()) {
3417                 entries.insert (osoname);
3418                 std::string localfile = tmpdir + "/" + osoname;
3419                 OIIO::Filesystem::copy (osofile, localfile);
3420                 filename_list += " " + osoname;
3421             }
3422         }
3423     }
3424 
3425     if (extension == ".tar" || extension == ".tar.gz" || extension == ".tgz") {
3426         std::string z = Strutil::ends_with (extension, "gz") ? "-z" : "";
3427         std::string cmd = Strutil::sprintf ("tar -c %s -C %s -f %s%s %s",
3428                                            z, tmpdir, filename, extension,
3429                                            filename_list);
3430         // std::cout << "Command =\n" << cmd << "\n";
3431         if (system (cmd.c_str()) != 0) {
3432             error ("archive_shadergroup: executing tar command failed");
3433             ok = false;
3434         }
3435 
3436     } else if (extension == ".zip") {
3437         std::string cmd = Strutil::sprintf ("zip -q %s%s %s",
3438                                            filename, extension,
3439                                            filename_list);
3440         // std::cout << "Command =\n" << cmd << "\n";
3441         if (system (cmd.c_str()) != 0) {
3442             error ("archive_shadergroup: executing zip command failed");
3443             ok = false;
3444         }
3445     } else {
3446         error ("archive_shadergroup: no archiving/compressing command");
3447         ok = false;
3448     }
3449 
3450     OIIO::Filesystem::remove_all (tmpdir);
3451 
3452     return ok;
3453 }
3454 
3455 
3456 
3457 void
register_closure(string_view name,int id,const ClosureParam * params,PrepareClosureFunc prepare,SetupClosureFunc setup)3458 ClosureRegistry::register_closure (string_view name, int id,
3459                                    const ClosureParam *params,
3460                                    PrepareClosureFunc prepare,
3461                                    SetupClosureFunc setup)
3462 {
3463     if (m_closure_table.size() <= (size_t)id)
3464         m_closure_table.resize(id + 1);
3465     ClosureEntry &entry = m_closure_table[id];
3466     entry.id = id;
3467     entry.name = name;
3468     entry.nformal = 0;
3469     entry.nkeyword = 0;
3470     entry.struct_size = 0; /* params could be NULL */
3471     for (int i = 0; params; ++i) {
3472         /* always push so the end marker is there */
3473         entry.params.push_back(params[i]);
3474         if (params[i].type == TypeDesc()) {
3475             entry.struct_size = params[i].offset;
3476             /* CLOSURE_FINISH_PARAM stashes the real struct alignment here
3477              * make sure that the closure struct doesn't want more alignment than ClosureComponent
3478              * because we will be allocating the real struct inside it. */
3479             OSL_ASSERT_MSG(params[i].field_size <= int(alignof(ClosureComponent)),
3480                 "Closure %s wants alignment of %d which is larger than that of ClosureComponent",
3481                 name.c_str(),
3482                 params[i].field_size);
3483             break;
3484         }
3485         if (params[i].key == nullptr)
3486             entry.nformal ++;
3487         else
3488             entry.nkeyword ++;
3489     }
3490     entry.prepare = prepare;
3491     entry.setup = setup;
3492     m_closure_name_to_id[ustring(name)] = id;
3493 }
3494 
3495 
3496 
3497 const ClosureRegistry::ClosureEntry *
get_entry(ustring name) const3498 ClosureRegistry::get_entry(ustring name) const
3499 {
3500     std::map<ustring, int>::const_iterator i = m_closure_name_to_id.find(name);
3501     if (i != m_closure_name_to_id.end())
3502     {
3503         OSL_DASSERT((size_t)i->second < m_closure_table.size());
3504         return &m_closure_table[i->second];
3505     }
3506     else
3507         return NULL;
3508 }
3509 
3510 
3511 
3512 }; // namespace pvt
3513 OSL_NAMESPACE_EXIT
3514 
3515 
3516 
3517 bool
init(const ShaderGroup * group,int layernum)3518 OSL::OSLQuery::init (const ShaderGroup *group, int layernum)
3519 {
3520     geterror();   // clear the error, we're newly initializing
3521     if (! group) {
3522         errorf("No group pointer supplied.");
3523         return false;
3524     }
3525     if (layernum < 0 || layernum >= group->nlayers()) {
3526         errorf("Invalid layer number %d (valid indices: 0-%d).",
3527                layernum, group->nlayers()-1);
3528         return false;
3529     }
3530 
3531     const ShaderMaster *master = (*group)[layernum]->master();
3532     m_shadername = master->shadername();
3533     m_shadertypename = master->shadertypename();
3534     m_params.clear();
3535     if (int nparams = master->num_params()) {
3536         m_params.resize (nparams);
3537         for (int i = 0;  i < nparams;  ++i) {
3538             const Symbol *sym = master->symbol (i);
3539             Parameter &p (m_params[i]);
3540             p.name = sym->name().string();
3541             const TypeSpec &ts (sym->typespec());
3542             p.type = ts.simpletype();
3543             p.isoutput = (sym->symtype() == SymTypeOutputParam);
3544             p.varlenarray = ts.is_unsized_array();
3545             p.isstruct = ts.is_structure() || ts.is_structure_array();
3546             p.isclosure = ts.is_closure_based();
3547             p.data = sym->data();
3548             // In this mode, we don't fill in idefault, fdefault, sdefault,
3549             // or spacename.
3550             p.idefault.clear();
3551             p.fdefault.clear();
3552             p.sdefault.clear();
3553             p.spacename.clear();
3554             int n = int (p.type.numelements() * p.type.aggregate);
3555             if (p.type.basetype == TypeDesc::INT) {
3556                 for (int i = 0; i < n; ++i)
3557                     p.idefault.push_back (sym->get_int(i));
3558             }
3559             if (p.type.basetype == TypeDesc::FLOAT) {
3560                 for (int i = 0; i < n; ++i)
3561                     p.fdefault.push_back (sym->get_float(i));
3562             }
3563             if (p.type.basetype == TypeDesc::STRING) {
3564                 for (int i = 0; i < n; ++i)
3565                     p.sdefault.push_back (sym->get_string(i));
3566             }
3567             p.fields.clear();  // don't bother filling this out
3568             if (StructSpec *ss = ts.structspec()) {
3569                 p.structname = ss->name().string();
3570                 for (size_t i = 0, e = ss->numfields();  i < e;  ++i)
3571                     p.fields.push_back (ss->field(i).name);
3572             } else {
3573                 p.structname.clear();
3574             }
3575             p.metadata.clear();   // FIXME?
3576             p.validdefault = (p.data != NULL);
3577         }
3578     }
3579 
3580     m_meta.clear();   // no metadata available at this point
3581 
3582     return true;
3583 }
3584 
3585 
3586 
3587 // vals points to a symbol with a total of ncomps floats (ncomps ==
3588 // aggregate*arraylen).  If has_derivs is true, it's actually 3 times
3589 // that length, the main values then the derivatives.  We want to check
3590 // for nans in vals[firstcheck..firstcheck+nchecks-1], and also in the
3591 // derivatives if present.  Note that if firstcheck==0 and nchecks==ncomps,
3592 // we are checking the entire contents of the symbol.  More restrictive
3593 // firstcheck,nchecks are used to check just one element of an array.
3594 OSL_SHADEOP void
osl_naninf_check(int ncomps,const void * vals_,int has_derivs,void * sg,const void * sourcefile,int sourceline,void * symbolname,int firstcheck,int nchecks,const void * opname)3595 osl_naninf_check (int ncomps, const void *vals_, int has_derivs,
3596                   void *sg, const void *sourcefile, int sourceline,
3597                   void *symbolname, int firstcheck, int nchecks,
3598                   const void *opname)
3599 {
3600     ShadingContext *ctx = (ShadingContext *)((ShaderGlobals *)sg)->context;
3601     const float *vals = (const float *)vals_;
3602     for (int d = 0;  d < (has_derivs ? 3 : 1);  ++d) {
3603         for (int c = firstcheck, e = c+nchecks; c < e;  ++c) {
3604             int i = d*ncomps + c;
3605             if (! OIIO::isfinite(vals[i])) {
3606                 ctx->errorf("Detected %g value in %s%s at %s:%d (op %s)",
3607                             vals[i], d > 0 ? "the derivatives of " : "",
3608                             USTR(symbolname), USTR(sourcefile), sourceline,
3609                             USTR(opname));
3610                 return;
3611             }
3612         }
3613     }
3614 }
3615 
3616 
3617 
3618 // vals points to the data of a float-, int-, or string-based symbol.
3619 // (described by typedesc).  We want to check
3620 // vals[firstcheck..firstcheck+nchecks-1] for floats that are NaN , or
3621 // ints that are -MAXINT, or strings that are "!!!uninitialized!!!"
3622 // which would indicate that the value is uninitialized if
3623 // 'debug_uninit' is turned on.  Note that if firstcheck==0 and
3624 // nchecks==ncomps, we are checking the entire contents of the symbol.
3625 // More restrictive firstcheck,nchecks are used to check just one
3626 // element of an array.
3627 OSL_SHADEOP void
osl_uninit_check(long long typedesc_,void * vals_,void * sg,const void * sourcefile,int sourceline,const char * groupname,int layer,const char * layername,const char * shadername,int opnum,const char * opname,int argnum,void * symbolname,int firstcheck,int nchecks)3628 osl_uninit_check (long long typedesc_, void *vals_,
3629                   void *sg, const void *sourcefile, int sourceline,
3630                   const char *groupname, int layer, const char *layername,
3631                   const char *shadername,
3632                   int opnum, const char *opname, int argnum,
3633                   void *symbolname, int firstcheck, int nchecks)
3634 {
3635     TypeDesc typedesc = TYPEDESC(typedesc_);
3636     ShadingContext *ctx = (ShadingContext *)((ShaderGlobals *)sg)->context;
3637     bool uninit = false;
3638     if (typedesc.basetype == TypeDesc::FLOAT) {
3639         float *vals = (float *)vals_;
3640         for (int c = firstcheck, e = firstcheck+nchecks; c < e;  ++c)
3641             if (!OIIO::isfinite(vals[c])) {
3642                 uninit = true;
3643                 vals[c] = 0;
3644             }
3645     }
3646     if (typedesc.basetype == TypeDesc::INT) {
3647         int *vals = (int *)vals_;
3648         for (int c = firstcheck, e = firstcheck+nchecks; c < e;  ++c)
3649             if (vals[c] == std::numeric_limits<int>::min()) {
3650                 uninit = true;
3651                 vals[c] = 0;
3652             }
3653     }
3654     if (typedesc.basetype == TypeDesc::STRING) {
3655         ustring *vals = (ustring *)vals_;
3656         for (int c = firstcheck, e = firstcheck+nchecks; c < e;  ++c)
3657             if (vals[c] == Strings::uninitialized_string) {
3658                 uninit = true;
3659                 vals[c] = ustring();
3660             }
3661     }
3662     if (uninit) {
3663         ctx->errorf("Detected possible use of uninitialized value in %s %s at %s:%d (group %s, layer %d %s, shader %s, op %d '%s', arg %d)",
3664                     typedesc.c_str(), USTR(symbolname), USTR(sourcefile), sourceline,
3665                     (groupname && groupname[0]) ? groupname: "<unnamed group>",
3666                     layer, (layername && layername[0]) ? layername : "<unnamed layer>",
3667                     shadername, opnum, USTR(opname), argnum);
3668     }
3669 }
3670 
3671 
3672 
3673 OSL_SHADEOP int
osl_range_check_err(int indexvalue,int length,const char * symname,void * sg,const void * sourcefile,int sourceline,const char * groupname,int layer,const char * layername,const char * shadername)3674 osl_range_check_err (int indexvalue, int length, const char *symname,
3675                  void *sg, const void *sourcefile, int sourceline,
3676                  const char *groupname, int layer, const char *layername,
3677                  const char *shadername)
3678 {
3679     if (indexvalue < 0 || indexvalue >= length) {
3680         ShadingContext *ctx = (ShadingContext *)((ShaderGlobals *)sg)->context;
3681         ctx->errorf("Index [%d] out of range %s[0..%d]: %s:%d"
3682                     " (group %s, layer %d %s, shader %s)",
3683                     indexvalue, USTR(symname), length-1,
3684                     USTR(sourcefile), sourceline,
3685                     (groupname && groupname[0]) ? groupname : "<unnamed group>", layer,
3686                     (layername && layername[0]) ? layername : "<unnamed layer>",
3687                     USTR(shadername));
3688         if (indexvalue >= length)
3689             indexvalue = length-1;
3690         else
3691             indexvalue = 0;
3692     }
3693     return indexvalue;
3694 }
3695 
3696 
3697 
3698 // Asked if the raytype is a name we can't know until mid-shader.
osl_raytype_name(void * sg_,void * name)3699 OSL_SHADEOP int osl_raytype_name (void *sg_, void *name)
3700 {
3701     ShaderGlobals *sg = (ShaderGlobals *)sg_;
3702     int bit = sg->context->shadingsys().raytype_bit (USTR(name));
3703     return (sg->raytype & bit) != 0;
3704 }
3705 
3706 
osl_get_attribute(void * sg_,int dest_derivs,void * obj_name_,void * attr_name_,int array_lookup,int index,const void * attr_type,void * attr_dest)3707 OSL_SHADEOP int osl_get_attribute(void *sg_,
3708                              int   dest_derivs,
3709                              void *obj_name_,
3710                              void *attr_name_,
3711                              int   array_lookup,
3712                              int   index,
3713                              const void *attr_type,
3714                              void *attr_dest)
3715 {
3716     ShaderGlobals *sg   = (ShaderGlobals *)sg_;
3717     const ustring &obj_name  = USTR(obj_name_);
3718     const ustring &attr_name = USTR(attr_name_);
3719 
3720     return sg->context->osl_get_attribute (sg, sg->objdata,
3721                                            dest_derivs, obj_name, attr_name,
3722                                            array_lookup, index,
3723                                            *(const TypeDesc *)attr_type,
3724                                            attr_dest);
3725 }
3726 
3727 
3728 
3729 OSL_SHADEOP int
osl_bind_interpolated_param(void * sg_,const void * name,long long type,int userdata_has_derivs,void * userdata_data,int,void * symbol_data,int symbol_data_size,char * userdata_initialized,int)3730 osl_bind_interpolated_param (void *sg_, const void *name, long long type,
3731                              int userdata_has_derivs, void *userdata_data,
3732                              int /*symbol_has_derivs*/, void *symbol_data,
3733                              int symbol_data_size,
3734                              char *userdata_initialized, int /*userdata_index*/)
3735 {
3736     char status = *userdata_initialized;
3737     if (status == 0) {
3738         // First time retrieving this userdata
3739         ShaderGlobals *sg = (ShaderGlobals *)sg_;
3740         bool ok = sg->renderer->get_userdata (userdata_has_derivs, USTR(name),
3741                                               TYPEDESC(type),
3742                                               sg, userdata_data);
3743         // printf ("Binding %s %s : index %d, ok = %d\n", name,
3744         //         TYPEDESC(type).c_str(),userdata_index, ok);
3745         *userdata_initialized = status = 1 + ok;  // 1 = not found, 2 = found
3746         sg->context->incr_get_userdata_calls ();
3747     }
3748     if (status == 2) {
3749         // If userdata was present, copy it to the shader variable
3750         memcpy (symbol_data, userdata_data, symbol_data_size);
3751         return 1;
3752     }
3753     return 0;  // no such user data
3754 }
3755