1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4
5 #include <vector>
6 #include <string>
7 #include <cstdio>
8 #include <fstream>
9 #include <cstdlib>
10 #include <mutex>
11
12 #include "oslexec_pvt.h"
13 #include <OSL/genclosure.h>
14 #include "backendllvm.h"
15 #include <OSL/oslquery.h>
16
17 #include <OpenImageIO/filesystem.h>
18 #include <OpenImageIO/fmath.h>
19 #include <OpenImageIO/optparser.h>
20 #include <OpenImageIO/strutil.h>
21 #include <OpenImageIO/sysutil.h>
22 #include <OpenImageIO/thread.h>
23 #include <OpenImageIO/timer.h>
24
25 #include "opcolor.h"
26
27 using namespace OSL;
28 using namespace OSL::pvt;
29
30 #include <OpenEXR/ImfChannelList.h> // Just for OPENEXR_VERSION_STRING
31
32 // avoid naming conflicts with MSVC macros
33 #ifdef _MSC_VER
34 #undef RGB
35 // We use some of the iso646.h macro names later on in this file. For
36 // some compilers (MSVS, I'm looking at you) this is trouble. I don't know
37 // how or why that header would have been included here, but it did for at
38 // least one person, so shut off those macros so they don't cause trouble.
39 #undef and
40 #undef or
41 #undef xor
42 #undef compl
43 #undef bitand
44 #undef bitor
45 #endif
46
47 OSL_NAMESPACE_ENTER
48
49
50
ShadingSystem(RendererServices * renderer,TextureSystem * texturesystem,ErrorHandler * err)51 ShadingSystem::ShadingSystem (RendererServices *renderer,
52 TextureSystem *texturesystem,
53 ErrorHandler *err)
54 : m_impl (NULL)
55 {
56 if (! err) {
57 err = & ErrorHandler::default_handler ();
58 }
59 m_impl = new ShadingSystemImpl (renderer, texturesystem, err);
60 #ifndef NDEBUG
61 err->infof("creating new ShadingSystem %p", (void *)this);
62 #endif
63 }
64
65
66
~ShadingSystem()67 ShadingSystem::~ShadingSystem ()
68 {
69 delete m_impl;
70 }
71
72
73
74 bool
attribute(string_view name,TypeDesc type,const void * val)75 ShadingSystem::attribute (string_view name, TypeDesc type, const void *val)
76 {
77 return m_impl->attribute (name, type, val);
78 }
79
80
81
82 bool
attribute(ShaderGroup * group,string_view name,TypeDesc type,const void * val)83 ShadingSystem::attribute (ShaderGroup *group, string_view name,
84 TypeDesc type, const void *val)
85 {
86 return m_impl->attribute (group, name, type, val);
87 }
88
89
90
91 bool
getattribute(string_view name,TypeDesc type,void * val)92 ShadingSystem::getattribute (string_view name, TypeDesc type, void *val)
93 {
94 return m_impl->getattribute (name, type, val);
95 }
96
97
98
99 bool
getattribute(ShaderGroup * group,string_view name,TypeDesc type,void * val)100 ShadingSystem::getattribute (ShaderGroup *group, string_view name,
101 TypeDesc type, void *val)
102 {
103 return m_impl->getattribute (group, name, type, val);
104 }
105
106
107
108 bool
LoadMemoryCompiledShader(string_view shadername,string_view buffer)109 ShadingSystem::LoadMemoryCompiledShader (string_view shadername,
110 string_view buffer)
111 {
112 return m_impl->LoadMemoryCompiledShader (shadername, buffer);
113 }
114
115
116
117 ShaderGroupRef
ShaderGroupBegin(string_view groupname)118 ShadingSystem::ShaderGroupBegin (string_view groupname)
119 {
120 return m_impl->ShaderGroupBegin (groupname);
121 }
122
123
124
125 ShaderGroupRef
ShaderGroupBegin(string_view groupname,string_view usage,string_view groupspec)126 ShadingSystem::ShaderGroupBegin (string_view groupname, string_view usage,
127 string_view groupspec)
128 {
129 return m_impl->ShaderGroupBegin (groupname, usage, groupspec);
130 }
131
132
133
134 bool
ShaderGroupEnd(ShaderGroup & group)135 ShadingSystem::ShaderGroupEnd (ShaderGroup& group)
136 {
137 return m_impl->ShaderGroupEnd(group);
138 }
139
140
141 bool
ShaderGroupEnd(void)142 ShadingSystem::ShaderGroupEnd (void)
143 {
144 return m_impl->ShaderGroupEnd();
145 }
146
147
148
149 bool
Parameter(ShaderGroup & group,string_view name,TypeDesc t,const void * val,bool lockgeom)150 ShadingSystem::Parameter (ShaderGroup& group, string_view name, TypeDesc t,
151 const void *val, bool lockgeom)
152 {
153 return m_impl->Parameter (group, name, t, val, lockgeom);
154 }
155
156
157
158 bool
Parameter(string_view name,TypeDesc t,const void * val,bool lockgeom)159 ShadingSystem::Parameter (string_view name, TypeDesc t, const void *val,
160 bool lockgeom)
161 {
162 return m_impl->Parameter (name, t, val, lockgeom);
163 }
164
165
166
167 bool
Shader(ShaderGroup & group,string_view shaderusage,string_view shadername,string_view layername)168 ShadingSystem::Shader (ShaderGroup& group, string_view shaderusage,
169 string_view shadername, string_view layername)
170 {
171 return m_impl->Shader (group, shaderusage, shadername, layername);
172 }
173
174
175
176 bool
Shader(string_view shaderusage,string_view shadername,string_view layername)177 ShadingSystem::Shader (string_view shaderusage, string_view shadername,
178 string_view layername)
179 {
180 return m_impl->Shader (shaderusage, shadername, layername);
181 }
182
183
184
185 bool
ConnectShaders(ShaderGroup & group,string_view srclayer,string_view srcparam,string_view dstlayer,string_view dstparam)186 ShadingSystem::ConnectShaders (ShaderGroup& group,
187 string_view srclayer, string_view srcparam,
188 string_view dstlayer, string_view dstparam)
189 {
190 return m_impl->ConnectShaders (group, srclayer, srcparam,
191 dstlayer, dstparam);
192 }
193
194
195
196 bool
ConnectShaders(string_view srclayer,string_view srcparam,string_view dstlayer,string_view dstparam)197 ShadingSystem::ConnectShaders (string_view srclayer, string_view srcparam,
198 string_view dstlayer, string_view dstparam)
199 {
200 return m_impl->ConnectShaders (srclayer, srcparam, dstlayer, dstparam);
201 }
202
203
204
205 bool
ReParameter(ShaderGroup & group,string_view layername,string_view paramname,TypeDesc type,const void * val)206 ShadingSystem::ReParameter (ShaderGroup &group, string_view layername,
207 string_view paramname, TypeDesc type,
208 const void *val)
209 {
210 return m_impl->ReParameter (group, layername, paramname, type, val);
211 }
212
213
214
215 PerThreadInfo *
create_thread_info()216 ShadingSystem::create_thread_info ()
217 {
218 return m_impl->create_thread_info();
219 }
220
221
222
223 void
destroy_thread_info(PerThreadInfo * threadinfo)224 ShadingSystem::destroy_thread_info (PerThreadInfo *threadinfo)
225 {
226 return m_impl->destroy_thread_info (threadinfo);
227 }
228
229
230
231 ShadingContext *
get_context(PerThreadInfo * threadinfo,TextureSystem::Perthread * texture_threadinfo)232 ShadingSystem::get_context (PerThreadInfo *threadinfo,
233 TextureSystem::Perthread *texture_threadinfo)
234 {
235 return m_impl->get_context (threadinfo, texture_threadinfo);
236 }
237
238
239
240 void
release_context(ShadingContext * ctx)241 ShadingSystem::release_context (ShadingContext *ctx)
242 {
243 return m_impl->release_context (ctx);
244 }
245
246
247
248 bool
execute(ShadingContext & ctx,ShaderGroup & group,ShaderGlobals & globals,bool run)249 ShadingSystem::execute (ShadingContext &ctx, ShaderGroup &group,
250 ShaderGlobals &globals, bool run)
251 {
252 return m_impl->execute (ctx, group, globals, run);
253 }
254
255
256
257 // DEPRECATED(2.0)
258 bool
execute(ShadingContext * ctx,ShaderGroup & group,ShaderGlobals & globals,bool run)259 ShadingSystem::execute (ShadingContext *ctx, ShaderGroup &group,
260 ShaderGlobals &globals, bool run)
261 {
262 return m_impl->execute (ctx, group, globals, run);
263 }
264
265
266
267 bool
execute_init(ShadingContext & ctx,ShaderGroup & group,ShaderGlobals & globals,bool run)268 ShadingSystem::execute_init (ShadingContext &ctx, ShaderGroup &group,
269 ShaderGlobals &globals, bool run)
270 {
271 return ctx.execute_init (group, globals, run);
272 }
273
274
275
276 bool
execute_layer(ShadingContext & ctx,ShaderGlobals & globals,int layernumber)277 ShadingSystem::execute_layer (ShadingContext &ctx, ShaderGlobals &globals,
278 int layernumber)
279 {
280 return ctx.execute_layer (globals, layernumber);
281 }
282
283
284
285 bool
execute_layer(ShadingContext & ctx,ShaderGlobals & globals,ustring layername)286 ShadingSystem::execute_layer (ShadingContext &ctx, ShaderGlobals &globals,
287 ustring layername)
288 {
289 int layernumber = find_layer (*ctx.group(), layername);
290 return layernumber >= 0 ? ctx.execute_layer (globals, layernumber) : false;
291 }
292
293
294
295 bool
execute_layer(ShadingContext & ctx,ShaderGlobals & globals,const ShaderSymbol * symbol)296 ShadingSystem::execute_layer (ShadingContext &ctx, ShaderGlobals &globals,
297 const ShaderSymbol *symbol)
298 {
299 if (! symbol)
300 return false;
301 const Symbol *sym = reinterpret_cast<const Symbol *>(symbol);
302 int layernumber = sym->layer();
303 return layernumber >= 0 ? ctx.execute_layer (globals, layernumber) : false;
304 }
305
306
307
308 bool
execute_cleanup(ShadingContext & ctx)309 ShadingSystem::execute_cleanup (ShadingContext &ctx)
310 {
311 return ctx.execute_cleanup ();
312 }
313
314
315
316 int
find_layer(const ShaderGroup & group,ustring layername) const317 ShadingSystem::find_layer (const ShaderGroup &group, ustring layername) const
318 {
319 return group.find_layer (layername);
320 }
321
322
323
324 const void*
get_symbol(const ShadingContext & ctx,ustring layername,ustring symbolname,TypeDesc & type) const325 ShadingSystem::get_symbol (const ShadingContext &ctx, ustring layername,
326 ustring symbolname, TypeDesc &type) const
327 {
328 const ShaderSymbol *sym = find_symbol (*ctx.group(), layername,
329 symbolname);
330 if (sym) {
331 type = symbol_typedesc (sym);
332 return symbol_address (ctx, sym);
333 }
334 return NULL;
335 }
336
337
338
339 const void*
get_symbol(const ShadingContext & ctx,ustring symbolname,TypeDesc & type) const340 ShadingSystem::get_symbol (const ShadingContext &ctx,
341 ustring symbolname, TypeDesc &type) const
342 {
343 ustring layername;
344 size_t dot = symbolname.find('.');
345 if (dot != ustring::npos) {
346 // If the name contains a dot, it's intended to be layer.symbol
347 layername = ustring (symbolname, 0, dot);
348 symbolname = ustring (symbolname, dot+1);
349 }
350 return get_symbol (ctx, layername, symbolname, type);
351 }
352
353
354
355 const ShaderSymbol*
find_symbol(const ShaderGroup & group,ustring layername,ustring symbolname) const356 ShadingSystem::find_symbol (const ShaderGroup &group, ustring layername,
357 ustring symbolname) const
358 {
359 if (! group.optimized())
360 return NULL; // has to be post-optimized
361 return (const ShaderSymbol *) group.find_symbol (layername, symbolname);
362 }
363
364
365
366 const ShaderSymbol*
find_symbol(const ShaderGroup & group,ustring symbolname) const367 ShadingSystem::find_symbol (const ShaderGroup &group, ustring symbolname) const
368 {
369 ustring layername;
370 size_t dot = symbolname.find('.');
371 if (dot != ustring::npos) {
372 // If the name contains a dot, it's intended to be layer.symbol
373 layername = ustring (symbolname, 0, dot);
374 symbolname = ustring (symbolname, dot+1);
375 }
376 return find_symbol (group, layername, symbolname);
377 }
378
379
380
381 TypeDesc
symbol_typedesc(const ShaderSymbol * sym) const382 ShadingSystem::symbol_typedesc (const ShaderSymbol *sym) const
383 {
384 return sym ? ((const Symbol *)sym)->typespec().simpletype() : TypeDesc();
385 }
386
387
388
389 const void*
symbol_address(const ShadingContext & ctx,const ShaderSymbol * sym) const390 ShadingSystem::symbol_address (const ShadingContext &ctx,
391 const ShaderSymbol *sym) const
392 {
393 OSL_DASSERT(sym != nullptr);
394 return ctx.symbol_data (*(const Symbol *)sym);
395 }
396
397
398
399 std::string
getstats(int level) const400 ShadingSystem::getstats (int level) const
401 {
402 return m_impl->getstats (level);
403 }
404
405
406
407 void
register_closure(string_view name,int id,const ClosureParam * params,PrepareClosureFunc prepare,SetupClosureFunc setup)408 ShadingSystem::register_closure (string_view name, int id,
409 const ClosureParam *params,
410 PrepareClosureFunc prepare,
411 SetupClosureFunc setup)
412 {
413 return m_impl->register_closure (name, id, params, prepare, setup);
414 }
415
416
417
418 bool
query_closure(const char ** name,int * id,const ClosureParam ** params)419 ShadingSystem::query_closure (const char **name, int *id,
420 const ClosureParam **params)
421 {
422 return m_impl->query_closure (name, id, params);
423 }
424
425
426
427 static cspan< std::pair<ustring,SGBits> >
sgbit_table()428 sgbit_table ()
429 {
430 static const std::pair<ustring,SGBits> table[] = {
431 { ustring("P"), SGBits::P },
432 { ustring("I"), SGBits::I },
433 { ustring("N"), SGBits::N },
434 { ustring("Ng"), SGBits::Ng },
435 { ustring("u"), SGBits::u },
436 { ustring("v"), SGBits::v },
437 { ustring("dPdu"), SGBits::dPdu },
438 { ustring("dPdv"), SGBits::dPdv },
439 { ustring("time"), SGBits::time },
440 { ustring("dtime"), SGBits::dtime },
441 { ustring("dPdtime"), SGBits::dPdtime },
442 { ustring("Ps"), SGBits::Ps },
443 { ustring("Ci"), SGBits::Ci }
444 };
445 return cspan<std::pair<ustring,SGBits>>(table);
446 }
447
448
449
450 SGBits
globals_bit(ustring name)451 ShadingSystem::globals_bit (ustring name)
452 {
453 for (auto t : sgbit_table()) {
454 if (name == t.first)
455 return t.second;
456 }
457 return SGBits::None;
458 }
459
460
461
462 ustring
globals_name(SGBits bit)463 ShadingSystem::globals_name (SGBits bit)
464 {
465 for (auto t : sgbit_table()) {
466 if (bit == t.second)
467 return t.first;
468 }
469 return ustring();
470 }
471
472
473
474 int
raytype_bit(ustring name)475 ShadingSystem::raytype_bit (ustring name)
476 {
477 return m_impl->raytype_bit (name);
478 }
479
480
481
482 void
optimize_all_groups(int nthreads,bool do_jit)483 ShadingSystem::optimize_all_groups (int nthreads, bool do_jit)
484 {
485 return m_impl->optimize_all_groups (nthreads, 0 /*mythread*/, 1 /*totalthreads*/, do_jit);
486 }
487
488
489
490 TextureSystem *
texturesys() const491 ShadingSystem::texturesys () const
492 {
493 return m_impl->texturesys();
494 }
495
496
497
498 RendererServices *
renderer() const499 ShadingSystem::renderer () const
500 {
501 return m_impl->renderer();
502 }
503
504
505
506 bool
archive_shadergroup(ShaderGroup * group,string_view filename)507 ShadingSystem::archive_shadergroup (ShaderGroup *group, string_view filename)
508 {
509 if (!group) {
510 m_impl->error ("archive_shadergroup: passed nullptr as group");
511 return false;
512 }
513 return m_impl->archive_shadergroup (*group, filename);
514 }
515
516
517 bool
archive_shadergroup(ShaderGroup & group,string_view filename)518 ShadingSystem::archive_shadergroup (ShaderGroup& group, string_view filename)
519 {
520 return m_impl->archive_shadergroup (group, filename);
521 }
522
523
524 void
set_raytypes(ShaderGroup * group,int raytypes_on,int raytypes_off)525 ShadingSystem::set_raytypes (ShaderGroup *group, int raytypes_on, int raytypes_off)
526 {
527 if (group)
528 group->set_raytypes(raytypes_on, raytypes_off);
529 }
530
531
532 void
optimize_group(ShaderGroup * group,ShadingContext * ctx,bool do_jit)533 ShadingSystem::optimize_group (ShaderGroup *group, ShadingContext *ctx, bool do_jit)
534 {
535 if (group)
536 m_impl->optimize_group (*group, ctx, do_jit);
537 }
538
539
540
541 void
optimize_group(ShaderGroup * group,int raytypes_on,int raytypes_off,ShadingContext * ctx,bool do_jit)542 ShadingSystem::optimize_group (ShaderGroup *group,
543 int raytypes_on, int raytypes_off,
544 ShadingContext *ctx,
545 bool do_jit)
546 {
547 // convenience function for backwards compatibility
548 set_raytypes (group, raytypes_on, raytypes_off);
549 optimize_group (group, ctx, do_jit);
550 }
551
552
553
554 static TypeDesc TypeFloatArray2 (TypeDesc::FLOAT, 2);
555 static TypeDesc TypeFloatArray3 (TypeDesc::FLOAT, 3);
556 static TypeDesc TypeFloatArray4 (TypeDesc::FLOAT, 4);
557
558
559
560 bool
convert_value(void * dst,TypeDesc dsttype,const void * src,TypeDesc srctype)561 ShadingSystem::convert_value (void *dst, TypeDesc dsttype,
562 const void *src, TypeDesc srctype)
563 {
564 int tmp_int;
565 if (srctype == TypeDesc::UINT8) {
566 // uint8 src: Up-convert the source to int
567 if (src) {
568 tmp_int = *(const unsigned char *)src;
569 src = &tmp_int;
570 }
571 srctype = TypeDesc::TypeInt;
572 }
573
574 float tmp_float;
575 if (srctype == TypeDesc::TypeInt && dsttype.basetype == TypeDesc::FLOAT) {
576 // int -> float-based : up-convert the source to float
577 if (src) {
578 tmp_float = (float) (*(const int *)src);
579 src = &tmp_float;
580 }
581 srctype = TypeDesc::TypeFloat;
582 }
583
584 // Just copy equivalent types
585 if (equivalent (dsttype, srctype)) {
586 if (dst && src)
587 memmove (dst, src, dsttype.size());
588 return true;
589 }
590
591 if (srctype == TypeDesc::TypeFloat) {
592 // float->triple conversion
593 if (equivalent(dsttype, TypeDesc::TypePoint)) {
594 if (dst && src) {
595 float f = *(const float *)src;
596 ((OSL::Vec3 *)dst)->setValue (f, f, f);
597 }
598 return true;
599 }
600 // float->int
601 if (dsttype == TypeDesc::TypeInt) {
602 if (dst && src)
603 *(int *)dst = (int) *(const float *)src;
604 return true;
605 }
606 // float->float[2]
607 if (dsttype == TypeFloatArray2) {
608 if (dst && src) {
609 float f = *(const float *)src;
610 ((float *)dst)[0] = f;
611 ((float *)dst)[1] = f;
612 }
613 return true;
614 }
615 // float->float[4]
616 if (dsttype == TypeFloatArray4) {
617 if (dst && src) {
618 float f = *(const float *)src;
619 ((float *)dst)[0] = f;
620 ((float *)dst)[1] = f;
621 ((float *)dst)[2] = f;
622 ((float *)dst)[3] = f;
623 }
624 return true;
625 }
626 return false; // Unsupported conversion
627 }
628
629 // float[3] -> triple
630 if ((srctype == TypeFloatArray3 && equivalent(dsttype, TypeDesc::TypePoint)) ||
631 (dsttype == TypeFloatArray3 && equivalent(srctype, TypeDesc::TypePoint))) {
632 if (dst && src)
633 memmove (dst, src, dsttype.size());
634 return true;
635 }
636
637 // float[4] -> vec4
638 if ((srctype == TypeFloatArray4 && equivalent(dsttype, TypeDesc::TypeFloat4)) ||
639 (dsttype == TypeFloatArray4 && equivalent(srctype, TypeDesc::TypeFloat4))) {
640 if (dst && src)
641 memmove (dst, src, dsttype.size());
642 return true;
643 }
644
645 // float[2] -> triple
646 if (srctype == TypeFloatArray2 && equivalent(dsttype, TypeDesc::TypePoint)) {
647 if (dst && src) {
648 float f0 = ((const float *)src)[0];
649 float f1 = ((const float *)src)[1];
650 ((OSL::Vec3 *)dst)->setValue (f0, f1, 0.0f);
651 }
652 return true;
653 }
654
655 return false; // Unsupported conversion
656 }
657
658
659
PerThreadInfo()660 PerThreadInfo::PerThreadInfo ()
661 {
662 }
663
664
665
~PerThreadInfo()666 PerThreadInfo::~PerThreadInfo ()
667 {
668 while (! context_pool.empty())
669 delete pop_context ();
670 }
671
672
673
674 ShadingContext *
pop_context()675 PerThreadInfo::pop_context ()
676 {
677 ShadingContext *sc = context_pool.top ();
678 context_pool.pop ();
679 return sc;
680 }
681
682
683
684
685
686 namespace Strings {
687 #define STRDECL(str,var_name) const ustring var_name(str);
688 #include <OSL/strdecls.h>
689 #undef STRDECL
690 }
691
692
693
694 namespace pvt { // OSL::pvt
695
696
ShadingSystemImpl(RendererServices * renderer,TextureSystem * texturesystem,ErrorHandler * err)697 ShadingSystemImpl::ShadingSystemImpl (RendererServices *renderer,
698 TextureSystem *texturesystem,
699 ErrorHandler *err)
700 : m_renderer(renderer), m_texturesys(texturesystem), m_err(err),
701 m_statslevel (0), m_lazylayers (true),
702 m_lazyglobals (true), m_lazyunconnected(true), m_lazyerror(true),
703 m_lazy_userdata(false), m_userdata_isconnected(false),
704 m_clearmemory (false), m_debugnan (false), m_debug_uninit(false),
705 m_lockgeom_default (true), m_strict_messages(true),
706 m_error_repeats(false),
707 m_range_checking(true),
708 m_unknown_coordsys_error(true), m_connection_error(true),
709 m_greedyjit(false), m_countlayerexecs(false),
710 m_relaxed_param_typecheck(false),
711 m_max_warnings_per_thread(100),
712 m_profile(0),
713 m_optimize(2),
714 m_opt_simplify_param(true), m_opt_constant_fold(true),
715 m_opt_stale_assign(true), m_opt_elide_useless_ops(true),
716 m_opt_elide_unconnected_outputs(true),
717 m_opt_peephole(true), m_opt_coalesce_temps(true),
718 m_opt_assign(true), m_opt_mix(true),
719 m_opt_merge_instances(1), m_opt_merge_instances_with_userdata(true),
720 m_opt_fold_getattribute(true),
721 m_opt_middleman(true), m_opt_texture_handle(true),
722 m_opt_seed_bblock_aliases(true),
723 m_llvm_jit_fma(false),
724 m_llvm_jit_aggressive(false),
725 m_optimize_nondebug(false),
726 m_vector_width(4),
727 m_opt_passes(10),
728 m_llvm_optimize(1),
729 m_debug(0), m_llvm_debug(0),
730 m_llvm_debug_layers(0), m_llvm_debug_ops(0),
731 m_llvm_target_host(1),
732 m_llvm_debugging_symbols(0),
733 m_llvm_profiling_events(0),
734 m_llvm_output_bitcode(0),
735 m_llvm_dumpasm(0),
736 m_commonspace_synonym("world"),
737 m_max_local_mem_KB(2048),
738 m_compile_report(false),
739 m_buffer_printf(true),
740 m_no_noise(false),
741 m_no_pointcloud(false),
742 m_force_derivs(false),
743 m_allow_shader_replacement(false),
744 m_exec_repeat(1),
745 m_opt_warnings(0),
746 m_gpu_opt_error(0),
747 m_colorspace("Rec709"),
748 m_stat_opt_locking_time(0), m_stat_specialization_time(0),
749 m_stat_total_llvm_time(0),
750 m_stat_llvm_setup_time(0), m_stat_llvm_irgen_time(0),
751 m_stat_llvm_opt_time(0), m_stat_llvm_jit_time(0),
752 m_stat_inst_merge_time(0),
753 m_stat_max_llvm_local_mem(0)
754 {
755 m_stat_shaders_loaded = 0;
756 m_stat_shaders_requested = 0;
757 m_stat_groups = 0;
758 m_stat_groupinstances = 0;
759 m_stat_instances_compiled = 0;
760 m_stat_groups_compiled = 0;
761 m_stat_empty_instances = 0;
762 m_stat_merged_inst = 0;
763 m_stat_merged_inst_opt = 0;
764 m_stat_empty_groups = 0;
765 m_stat_regexes = 0;
766 m_stat_preopt_syms = 0;
767 m_stat_postopt_syms = 0;
768 m_stat_syms_with_derivs = 0;
769 m_stat_preopt_ops = 0;
770 m_stat_postopt_ops = 0;
771 m_stat_middlemen_eliminated = 0;
772 m_stat_const_connections = 0;
773 m_stat_global_connections = 0;
774 m_stat_tex_calls_codegened = 0;
775 m_stat_tex_calls_as_handles = 0;
776 m_stat_master_load_time = 0;
777 m_stat_optimization_time = 0;
778 m_stat_getattribute_time = 0;
779 m_stat_getattribute_fail_time = 0;
780 m_stat_getattribute_calls = 0;
781 m_stat_get_userdata_calls = 0;
782 m_stat_noise_calls = 0;
783 m_stat_pointcloud_searches = 0;
784 m_stat_pointcloud_searches_total_results = 0;
785 m_stat_pointcloud_max_results = 0;
786 m_stat_pointcloud_failures = 0;
787 m_stat_pointcloud_gets = 0;
788 m_stat_pointcloud_writes = 0;
789 m_stat_layers_executed = 0;
790 m_stat_total_shading_time_ticks = 0;
791
792 m_groups_to_compile_count = 0;
793 m_threads_currently_compiling = 0;
794
795 // If client didn't supply an error handler, just use the default
796 // one that echoes to the terminal.
797 if (! m_err) {
798 m_err = & ErrorHandler::default_handler ();
799 }
800
801 // If client didn't supply a texture system, use the one already held
802 // by the renderer (if it returns one).
803 if (! m_texturesys)
804 m_texturesys = renderer->texturesys();
805
806 // If we still don't have a texture system, create a new one
807 if (! m_texturesys) {
808 #if OSL_NO_DEFAULT_TEXTURESYSTEM
809 // This build option instructs OSL to never create a TextureSystem
810 // itself. (Most likely reason: this build of OSL is for a renderer
811 // that replaces OIIO's TextureSystem with its own, and therefore
812 // wouldn't want to accidentally make an OIIO one here.
813 OSL_ASSERT (0 && "ShadingSystem was not passed a working TextureSystem*");
814 #else
815 m_texturesys = TextureSystem::create (true /* shared */);
816 // Make some good guesses about default options
817 m_texturesys->attribute ("automip", 1);
818 m_texturesys->attribute ("autotile", 64);
819 #endif
820 }
821
822 // Alternate way of turning on LLVM debug mode (temporary/experimental)
823 const char *llvm_debug_env = getenv ("OSL_LLVM_DEBUG");
824 if (llvm_debug_env && *llvm_debug_env)
825 m_llvm_debug = atoi(llvm_debug_env);
826
827 // Initialize a default set of raytype names. A particular renderer
828 // can override this, add custom names, or change the bits around,
829 // if this default ordering is not to its liking.
830 static const char *raytypes[] = {
831 /*1*/ "camera", /*2*/ "shadow", /*4*/ "reflection", /*8*/ "refraction",
832 /*16*/ "diffuse", /*32*/ "glossy", /*64*/ "subsurface",
833 /*128*/ "displacement"
834 };
835 const int nraytypes = sizeof(raytypes)/sizeof(raytypes[0]);
836 attribute ("raytypes", TypeDesc(TypeDesc::STRING,nraytypes), raytypes);
837
838 // Allow environment variable to override default options
839 const char *options = getenv ("OSL_OPTIONS");
840 if (options)
841 attribute ("options", TypeDesc::STRING, &options);
842
843 setup_op_descriptors ();
844
845 colorsystem().set_colorspace(m_colorspace);
846 }
847
848
849
850 static void
shading_system_setup_op_descriptors(ShadingSystemImpl::OpDescriptorMap & op_descriptor)851 shading_system_setup_op_descriptors (ShadingSystemImpl::OpDescriptorMap& op_descriptor)
852 {
853 #define OP2(alias,name,ll,fold,simp,flag) \
854 extern bool llvm_gen_##ll (BackendLLVM &rop, int opnum); \
855 extern int constfold_##fold (RuntimeOptimizer &rop, int opnum); \
856 op_descriptor[ustring(#alias)] = OpDescriptor(#name, llvm_gen_##ll, \
857 constfold_##fold, simp, flag);
858 #define OP(name,ll,fold,simp,flag) OP2(name,name,ll,fold,simp,flag)
859 #define TEX OpDescriptor::Tex
860 #define SIDE OpDescriptor::SideEffects
861
862 // name llvmgen folder simple flags
863 OP (aassign, aassign, aassign, false, 0);
864 OP (abs, generic, abs, true, 0);
865 OP (acos, generic, acos, true, 0);
866 OP (add, add, add, true, 0);
867 OP (and, andor, and, true, 0);
868 OP (area, area, deriv, true, 0);
869 OP (aref, aref, aref, true, 0);
870 OP (arraycopy, arraycopy, none, false, 0);
871 OP (arraylength, arraylength, arraylength, true, 0);
872 OP (asin, generic, asin, true, 0);
873 OP (assign, assign, none, true, 0);
874 OP (atan, generic, none, true, 0);
875 OP (atan2, generic, none, true, 0);
876 OP (backfacing, get_simple_SG_field, none, true, 0);
877 OP (bitand, bitwise_binary_op, bitand, true, 0);
878 OP (bitor, bitwise_binary_op, bitor, true, 0);
879 OP (blackbody, blackbody, none, true, 0);
880 OP (break, loopmod_op, none, false, 0);
881 OP (calculatenormal, calculatenormal, none, true, 0);
882 OP (cbrt, generic, cbrt, true, 0);
883 OP (ceil, generic, ceil, true, 0);
884 OP (cellnoise, noise, noise, true, 0);
885 OP (clamp, clamp, clamp, true, 0);
886 OP (closure, closure, none, true, 0);
887 OP (color, construct_color, triple, true, 0);
888 OP (compassign, compassign, compassign, false, 0);
889 OP (compl, unary_op, compl, true, 0);
890 OP (compref, compref, compref, true, 0);
891 OP (concat, generic, concat, true, 0);
892 OP (continue, loopmod_op, none, false, 0);
893 OP (cos, generic, cos, true, 0);
894 OP (cosh, generic, none, true, 0);
895 OP (cross, generic, none, true, 0);
896 OP (degrees, generic, degrees, true, 0);
897 OP (determinant, generic, none, true, 0);
898 OP (dict_find, dict_find, none, false, 0);
899 OP (dict_next, dict_next, none, false, 0);
900 OP (dict_value, dict_value, none, false, 0);
901 OP (distance, generic, none, true, 0);
902 OP (div, div, div, true, 0);
903 OP (dot, generic, dot, true, 0);
904 OP (Dx, DxDy, deriv, true, 0);
905 OP (Dy, DxDy, deriv, true, 0);
906 OP (Dz, Dz, deriv, true, 0);
907 OP (dowhile, loop_op, none, false, 0);
908 OP (end, end, none, false, 0);
909 OP (endswith, generic, endswith, true, 0);
910 OP (environment, environment, none, true, TEX);
911 OP (eq, compare_op, eq, true, 0);
912 OP (erf, generic, erf, true, 0);
913 OP (erfc, generic, erfc, true, 0);
914 OP (error, printf, none, false, SIDE);
915 OP (exit, return, none, false, 0);
916 OP (exp, generic, exp, true, 0);
917 OP (exp2, generic, exp2, true, 0);
918 OP (expm1, generic, expm1, true, 0);
919 OP (fabs, generic, abs, true, 0);
920 OP (filterwidth, filterwidth, deriv, true, 0);
921 OP (floor, generic, floor, true, 0);
922 OP (fmod, modulus, none, true, 0);
923 OP (for, loop_op, none, false, 0);
924 OP (format, printf, format, true, 0);
925 OP (fprintf, printf, none, false, SIDE);
926 OP (functioncall, functioncall, functioncall, false, 0);
927 OP (functioncall_nr,functioncall_nr, none, false, 0);
928 OP (ge, compare_op, ge, true, 0);
929 OP (getattribute, getattribute, getattribute, false, 0);
930 OP (getchar, generic, getchar, true, 0);
931 OP (getmatrix, getmatrix, getmatrix, false, 0);
932 OP (getmessage, getmessage, getmessage, false, 0);
933 OP (gettextureinfo, gettextureinfo, gettextureinfo,false, TEX);
934 OP (gt, compare_op, gt, true, 0);
935 OP (hash, generic, hash, true, 0);
936 OP (hashnoise, noise, noise, true, 0);
937 OP (if, if, if, false, 0);
938 OP (inversesqrt, generic, inversesqrt, true, 0);
939 OP (isconnected, generic, none, true, 0);
940 OP (isconstant, isconstant, isconstant, true, 0);
941 OP (isfinite, generic, none, true, 0);
942 OP (isinf, generic, none, true, 0);
943 OP (isnan, generic, none, true, 0);
944 OP (le, compare_op, le, true, 0);
945 OP (length, generic, none, true, 0);
946 OP (log, generic, log, true, 0);
947 OP (log10, generic, log10, true, 0);
948 OP (log2, generic, log2, true, 0);
949 OP (logb, generic, logb, true, 0);
950 OP (lt, compare_op, lt, true, 0);
951 OP (luminance, luminance, none, true, 0);
952 OP (matrix, matrix, matrix, true, 0);
953 OP (max, minmax, max, true, 0);
954 OP (mxcompassign, mxcompassign, mxcompassign, false, 0);
955 OP (mxcompref, mxcompref, none, true, 0);
956 OP (min, minmax, min, true, 0);
957 OP (mix, mix, mix, true, 0);
958 OP (mod, modulus, mod, true, 0);
959 OP (mul, mul, mul, true, 0);
960 OP (neg, neg, neg, true, 0);
961 OP (neq, compare_op, neq, true, 0);
962 OP (noise, noise, noise, true, 0);
963 OP (nop, nop, none, true, 0);
964 OP (normal, construct_triple, triple, true, 0);
965 OP (normalize, generic, normalize, true, 0);
966 OP (or, andor, or, true, 0);
967 OP (pnoise, noise, noise, true, 0);
968 OP (point, construct_triple, triple, true, 0);
969 OP (pointcloud_search, pointcloud_search, pointcloud_search,
970 false, TEX);
971 OP (pointcloud_get, pointcloud_get, pointcloud_get,false, TEX);
972 OP (pointcloud_write, pointcloud_write, none, false, SIDE);
973 OP (pow, generic, pow, true, 0);
974 OP (printf, printf, none, false, SIDE);
975 OP (psnoise, noise, noise, true, 0);
976 OP (radians, generic, radians, true, 0);
977 OP (raytype, raytype, raytype, true, 0);
978 OP (regex_match, regex, none, false, 0);
979 OP (regex_search, regex, regex_search, false, 0);
980 OP (return, return, none, false, 0);
981 OP (round, generic, none, true, 0);
982 OP (select, select, select, true, 0);
983 OP (setmessage, setmessage, setmessage, false, SIDE);
984 OP (shl, bitwise_binary_op, none, true, 0);
985 OP (shr, bitwise_binary_op, none, true, 0);
986 OP (sign, generic, none, true, 0);
987 OP (sin, generic, sin, true, 0);
988 OP (sincos, sincos, sincos, false, 0);
989 OP (sinh, generic, none, true, 0);
990 OP (smoothstep, generic, none, true, 0);
991 OP (snoise, noise, noise, true, 0);
992 OP (spline, spline, none, true, 0);
993 OP (splineinverse, spline, none, true, 0);
994 OP (split, split, split, false, 0);
995 OP (sqrt, generic, sqrt, true, 0);
996 OP (startswith, generic, none, true, 0);
997 OP (step, generic, none, true, 0);
998 OP (stof, generic, stof, true, 0);
999 OP (stoi, generic, stoi, true, 0);
1000 OP (strlen, generic, strlen, true, 0);
1001 OP2(strtof,stof, generic, stof, true, 0);
1002 OP2(strtoi,stoi, generic, stoi, true, 0);
1003 OP (sub, sub, sub, true, 0);
1004 OP (substr, generic, substr, true, 0);
1005 OP (surfacearea, get_simple_SG_field, none, true, 0);
1006 OP (tan, generic, none, true, 0);
1007 OP (tanh, generic, none, true, 0);
1008 OP (texture, texture, texture, true, TEX);
1009 OP (texture3d, texture3d, none, true, TEX);
1010 OP (trace, trace, none, false, SIDE);
1011 OP (transform, transform, transform, true, 0);
1012 OP (transformc, transformc, transformc, true, 0);
1013 OP (transformn, transform, transform, true, 0);
1014 OP (transformv, transform, transform, true, 0);
1015 OP (transpose, generic, none, true, 0);
1016 OP (trunc, generic, none, true, 0);
1017 OP (useparam, useparam, useparam, false, 0);
1018 OP (vector, construct_triple, triple, true, 0);
1019 OP (warning, printf, warning, false, SIDE);
1020 OP (wavelength_color, blackbody, none, true, 0);
1021 OP (while, loop_op, none, false, 0);
1022 OP (xor, bitwise_binary_op, xor, true, 0);
1023 #undef OP
1024 #undef TEX
1025 #undef SIDE
1026 }
1027
1028
1029
1030 void
setup_op_descriptors()1031 ShadingSystemImpl::setup_op_descriptors ()
1032 {
1033 // This is not a class member function to avoid namespace issues
1034 // with function declarations in the function body, when building
1035 // with visual studio.
1036 shading_system_setup_op_descriptors(m_op_descriptor);
1037 }
1038
1039
1040
1041 void
register_closure(string_view name,int id,const ClosureParam * params,PrepareClosureFunc prepare,SetupClosureFunc setup)1042 ShadingSystemImpl::register_closure (string_view name, int id,
1043 const ClosureParam *params,
1044 PrepareClosureFunc prepare,
1045 SetupClosureFunc setup)
1046 {
1047 for (int i = 0; params && params[i].type != TypeDesc(); ++i) {
1048 if (params[i].key == NULL && params[i].type.size() != (size_t)params[i].field_size) {
1049 errorf("Parameter %d of '%s' closure is assigned to a field of incompatible size", i + 1, name);
1050 return;
1051 }
1052 }
1053 m_closure_registry.register_closure(name, id, params, prepare, setup);
1054 }
1055
1056
1057
1058 bool
query_closure(const char ** name,int * id,const ClosureParam ** params)1059 ShadingSystemImpl::query_closure(const char **name, int *id,
1060 const ClosureParam **params)
1061 {
1062 if (!name && !id)
1063 return false;
1064 const ClosureRegistry::ClosureEntry *entry =
1065 (name && *name) ? m_closure_registry.get_entry(ustring(*name))
1066 : m_closure_registry.get_entry(*id);
1067 if (!entry)
1068 return false;
1069
1070 if (name)
1071 *name = entry->name.c_str();
1072 if (id)
1073 *id = entry->id;
1074 if (params)
1075 *params = &entry->params[0];
1076
1077 return true;
1078 }
1079
1080
1081
~ShadingSystemImpl()1082 ShadingSystemImpl::~ShadingSystemImpl ()
1083 {
1084 size_t ngroups = m_all_shader_groups.size();
1085 for (size_t i = 0; i < ngroups; ++i) {
1086 if (ShaderGroupRef g = m_all_shader_groups[i].lock()) {
1087 if (!g->jitted() ) {
1088 // As we are now lazier in jitting and need to keep the OSL IR
1089 // around in case we want to create a batched JIT or vice versa
1090 // we may have OSL IR to cleanup
1091 group_post_jit_cleanup(*g);
1092 }
1093 }
1094 }
1095
1096 printstats ();
1097 // N.B. just let m_texsys go -- if we asked for one to be created,
1098 // we asked for a shared one.
1099
1100 // FIXME(boulos): According to the docs, we should also call
1101 // llvm_shutdown once we're done. However, ~ShadingSystemImpl
1102 // seems like the wrong place for this since in a multi-threaded
1103 // implementation we might destroy this impl while having others
1104 // outstanding. I'll leave this as a fixme for now.
1105
1106 //llvm::llvm_shutdown();
1107 }
1108
1109
1110
1111 bool
attribute(string_view name,TypeDesc type,const void * val)1112 ShadingSystemImpl::attribute (string_view name, TypeDesc type,
1113 const void *val)
1114 {
1115 #define ATTR_SET(_name,_ctype,_dst) \
1116 if (name == _name && type == OIIO::BaseTypeFromC<_ctype>::value) { \
1117 _dst = *(_ctype *)(val); \
1118 return true; \
1119 }
1120 #define ATTR_SET_STRING(_name,_dst) \
1121 if (name == _name && type == TypeDesc::STRING) { \
1122 _dst = ustring (*(const char **)val); \
1123 return true; \
1124 }
1125
1126 if (name == "options" && type == TypeDesc::STRING) {
1127 return OIIO::optparser (*this, *(const char **)val);
1128 }
1129
1130 lock_guard guard (m_mutex); // Thread safety
1131 ATTR_SET ("statistics:level", int, m_statslevel);
1132 ATTR_SET ("debug", int, m_debug);
1133 ATTR_SET ("lazylayers", int, m_lazylayers);
1134 ATTR_SET ("lazyglobals", int, m_lazyglobals);
1135 ATTR_SET ("lazyunconnected", int, m_lazyunconnected);
1136 ATTR_SET ("lazyerror", int, m_lazyerror);
1137 ATTR_SET ("lazy_userdata", int, m_lazy_userdata);
1138 ATTR_SET ("userdata_isconnected", int, m_userdata_isconnected);
1139 ATTR_SET ("clearmemory", int, m_clearmemory);
1140 ATTR_SET ("debug_nan", int, m_debugnan);
1141 ATTR_SET ("debugnan", int, m_debugnan); // back-compatible alias
1142 ATTR_SET ("debug_uninit", int, m_debug_uninit);
1143 ATTR_SET ("lockgeom", int, m_lockgeom_default);
1144 ATTR_SET ("profile", int, m_profile);
1145 ATTR_SET ("optimize", int, m_optimize);
1146 ATTR_SET ("opt_simplify_param", int, m_opt_simplify_param);
1147 ATTR_SET ("opt_constant_fold", int, m_opt_constant_fold);
1148 ATTR_SET ("opt_stale_assign", int, m_opt_stale_assign);
1149 ATTR_SET ("opt_elide_useless_ops", int, m_opt_elide_useless_ops);
1150 ATTR_SET ("opt_elide_unconnected_outputs", int, m_opt_elide_unconnected_outputs);
1151 ATTR_SET ("opt_peephole", int, m_opt_peephole);
1152 ATTR_SET ("opt_coalesce_temps", int, m_opt_coalesce_temps);
1153 ATTR_SET ("opt_assign", int, m_opt_assign);
1154 ATTR_SET ("opt_mix", int, m_opt_mix);
1155 ATTR_SET ("opt_merge_instances", int, m_opt_merge_instances);
1156 ATTR_SET ("opt_merge_instances_with_userdata", int, m_opt_merge_instances_with_userdata);
1157 ATTR_SET ("opt_fold_getattribute", int, m_opt_fold_getattribute);
1158 ATTR_SET ("opt_middleman", int, m_opt_middleman);
1159 ATTR_SET ("opt_texture_handle", int, m_opt_texture_handle);
1160 ATTR_SET ("opt_seed_bblock_aliases", int, m_opt_seed_bblock_aliases);
1161 ATTR_SET ("llvm_jit_fma", int, m_llvm_jit_fma);
1162 ATTR_SET ("llvm_jit_aggressive", int, m_llvm_jit_aggressive);
1163 ATTR_SET_STRING ("llvm_jit_target", m_llvm_jit_target);
1164 ATTR_SET ("vector_width", int, m_vector_width);
1165 ATTR_SET ("opt_passes", int, m_opt_passes);
1166 ATTR_SET ("optimize_nondebug", int, m_optimize_nondebug);
1167 ATTR_SET ("llvm_optimize", int, m_llvm_optimize);
1168 ATTR_SET ("llvm_debug", int, m_llvm_debug);
1169 ATTR_SET ("llvm_debug_layers", int, m_llvm_debug_layers);
1170 ATTR_SET ("llvm_debug_ops", int, m_llvm_debug_ops);
1171 ATTR_SET ("llvm_target_host", int, m_llvm_target_host);
1172
1173 // Due to ABI breakage in LLVM 7.0.[0-1] for llvm::Optional with GCC,
1174 // calling any llvm APIs that accept an llvm::Optional parameter will break
1175 // ABI causing issues.
1176 // https://bugs.llvm.org/show_bug.cgi?id=39427
1177 // Fixed in llvm 7.1.0+
1178 // Workaround don't enable debug symbols which would use llvm::Optional APIs
1179 #if (!OSL_GNUC_VERSION) || (OSL_LLVM_VERSION >= 71)
1180 ATTR_SET ("llvm_debugging_symbols", int, m_llvm_debugging_symbols);
1181 #endif
1182
1183 ATTR_SET ("llvm_profiling_events", int, m_llvm_profiling_events);
1184 ATTR_SET ("llvm_output_bitcode", int, m_llvm_output_bitcode);
1185 ATTR_SET ("llvm_dumpasm", int, m_llvm_dumpasm);
1186 ATTR_SET_STRING ("llvm_prune_ir_strategy", m_llvm_prune_ir_strategy);
1187 ATTR_SET ("strict_messages", int, m_strict_messages);
1188 ATTR_SET ("range_checking", int, m_range_checking);
1189 ATTR_SET ("unknown_coordsys_error", int, m_unknown_coordsys_error);
1190 ATTR_SET ("connection_error", int, m_connection_error);
1191 ATTR_SET ("greedyjit", int, m_greedyjit);
1192 ATTR_SET ("relaxed_param_typecheck", int, m_relaxed_param_typecheck);
1193 ATTR_SET ("countlayerexecs", int, m_countlayerexecs);
1194 ATTR_SET ("max_warnings_per_thread", int, m_max_warnings_per_thread);
1195 ATTR_SET ("max_local_mem_KB", int, m_max_local_mem_KB);
1196 ATTR_SET ("compile_report", int, m_compile_report);
1197 ATTR_SET ("buffer_printf", int, m_buffer_printf);
1198 ATTR_SET ("no_noise", int, m_no_noise);
1199 ATTR_SET ("no_pointcloud", int, m_no_pointcloud);
1200 ATTR_SET ("force_derivs", int, m_force_derivs);
1201 ATTR_SET ("allow_shader_replacement", int, m_allow_shader_replacement);
1202 ATTR_SET ("exec_repeat", int, m_exec_repeat);
1203 ATTR_SET ("opt_warnings", int, m_opt_warnings);
1204 ATTR_SET ("gpu_opt_error", int, m_gpu_opt_error);
1205 ATTR_SET_STRING ("commonspace", m_commonspace_synonym);
1206 ATTR_SET_STRING ("debug_groupname", m_debug_groupname);
1207 ATTR_SET_STRING ("debug_layername", m_debug_layername);
1208 ATTR_SET_STRING ("opt_layername", m_opt_layername);
1209 ATTR_SET_STRING ("only_groupname", m_only_groupname);
1210 ATTR_SET_STRING ("archive_groupname", m_archive_groupname);
1211 ATTR_SET_STRING ("archive_filename", m_archive_filename);
1212
1213 // cases for special handling
1214 if (name == "searchpath:shader" && type == TypeDesc::STRING) {
1215 m_searchpath = std::string (*(const char **)val);
1216 OIIO::Filesystem::searchpath_split (m_searchpath, m_searchpath_dirs);
1217 return true;
1218 }
1219 if (name == "colorspace" && type == TypeDesc::STRING) {
1220 ustring c = ustring (*(const char **)val);
1221 if (colorsystem().set_colorspace(c))
1222 m_colorspace = c;
1223 else
1224 errorf("Unknown color space \"%s\"", c);
1225 return true;
1226 }
1227 if (name == "raytypes" && type.basetype == TypeDesc::STRING) {
1228 OSL_ASSERT (type.numelements() <= 32 &&
1229 "ShaderGlobals.raytype is an int, max of 32 raytypes");
1230 m_raytypes.clear ();
1231 for (size_t i = 0; i < type.numelements(); ++i)
1232 m_raytypes.emplace_back(((const char **)val)[i]);
1233 return true;
1234 }
1235 if (name == "renderer_outputs" && type.basetype == TypeDesc::STRING) {
1236 m_renderer_outputs.clear ();
1237 for (size_t i = 0; i < type.numelements(); ++i)
1238 m_renderer_outputs.emplace_back(((const char **)val)[i]);
1239 return true;
1240 }
1241 if (name == "lib_bitcode" && type.basetype == TypeDesc::UINT8) {
1242 if (type.arraylen < 0) {
1243 errorf("Invalid bitcode size: %d", type.arraylen);
1244 return false;
1245 }
1246 m_lib_bitcode.clear();
1247 if (type.arraylen) {
1248 const char* bytes = static_cast<const char*>(val);
1249 std::copy(bytes, bytes + type.arraylen,
1250 back_inserter(m_lib_bitcode));
1251 }
1252 return true;
1253 }
1254 if (name == "error_repeats") {
1255 // Special case: setting error_repeats also clears the "previously
1256 // seen" error and warning lists.
1257 m_errseen.clear();
1258 m_warnseen.clear();
1259 ATTR_SET ("error_repeats", int, m_error_repeats);
1260 }
1261
1262 return false;
1263 #undef ATTR_SET
1264 #undef ATTR_SET_STRING
1265 }
1266
1267
1268
1269 bool
getattribute(string_view name,TypeDesc type,void * val)1270 ShadingSystemImpl::getattribute (string_view name, TypeDesc type,
1271 void *val)
1272 {
1273 #define ATTR_DECODE(_name,_ctype,_src) \
1274 if (name == _name && type == OIIO::BaseTypeFromC<_ctype>::value) { \
1275 *(_ctype *)(val) = (_ctype)(_src); \
1276 return true; \
1277 }
1278 #define ATTR_DECODE_STRING(_name,_src) \
1279 if (name == _name && type == TypeDesc::STRING) { \
1280 *(const char **)(val) = _src.c_str(); \
1281 return true; \
1282 }
1283
1284 lock_guard guard (m_mutex); // Thread safety
1285
1286 ATTR_DECODE_STRING ("searchpath:shader", m_searchpath);
1287 ATTR_DECODE ("statistics:level", int, m_statslevel);
1288 ATTR_DECODE ("lazylayers", int, m_lazylayers);
1289 ATTR_DECODE ("lazyglobals", int, m_lazyglobals);
1290 ATTR_DECODE ("lazyunconnected", int, m_lazyunconnected);
1291 ATTR_DECODE ("lazy_userdata", int, m_lazy_userdata);
1292 ATTR_DECODE ("userdata_isconnected", int, m_userdata_isconnected);
1293 ATTR_DECODE ("clearmemory", int, m_clearmemory);
1294 ATTR_DECODE ("debug_nan", int, m_debugnan);
1295 ATTR_DECODE ("debugnan", int, m_debugnan); // back-compatible alias
1296 ATTR_DECODE ("debug_uninit", int, m_debug_uninit);
1297 ATTR_DECODE ("lockgeom", int, m_lockgeom_default);
1298 ATTR_DECODE ("profile", int, m_profile);
1299 ATTR_DECODE ("optimize", int, m_optimize);
1300 ATTR_DECODE ("opt_simplify_param", int, m_opt_simplify_param);
1301 ATTR_DECODE ("opt_constant_fold", int, m_opt_constant_fold);
1302 ATTR_DECODE ("opt_stale_assign", int, m_opt_stale_assign);
1303 ATTR_DECODE ("opt_elide_useless_ops", int, m_opt_elide_useless_ops);
1304 ATTR_DECODE ("opt_elide_unconnected_outputs", int, m_opt_elide_unconnected_outputs);
1305 ATTR_DECODE ("opt_peephole", int, m_opt_peephole);
1306 ATTR_DECODE ("opt_coalesce_temps", int, m_opt_coalesce_temps);
1307 ATTR_DECODE ("opt_assign", int, m_opt_assign);
1308 ATTR_DECODE ("opt_mix", int, m_opt_mix);
1309 ATTR_DECODE ("opt_merge_instances", int, m_opt_merge_instances);
1310 ATTR_DECODE ("opt_merge_instances_with_userdata", int, m_opt_merge_instances_with_userdata);
1311 ATTR_DECODE ("opt_fold_getattribute", int, m_opt_fold_getattribute);
1312 ATTR_DECODE ("opt_middleman", int, m_opt_middleman);
1313 ATTR_DECODE ("opt_texture_handle", int, m_opt_texture_handle);
1314 ATTR_DECODE ("opt_seed_bblock_aliases", int, m_opt_seed_bblock_aliases);
1315 ATTR_DECODE ("llvm_jit_fma", int, m_llvm_jit_fma);
1316 ATTR_DECODE ("llvm_jit_aggressive", int, m_llvm_jit_aggressive);
1317 ATTR_DECODE_STRING ("llvm_jit_target", m_llvm_jit_target);
1318 ATTR_DECODE ("vector_width", int, m_vector_width);
1319 ATTR_DECODE ("opt_passes", int, m_opt_passes);
1320 ATTR_DECODE ("optimize_nondebug", int, m_optimize_nondebug);
1321 ATTR_DECODE ("llvm_optimize", int, m_llvm_optimize);
1322 ATTR_DECODE ("debug", int, m_debug);
1323 ATTR_DECODE ("llvm_debug", int, m_llvm_debug);
1324 ATTR_DECODE ("llvm_debug_layers", int, m_llvm_debug_layers);
1325 ATTR_DECODE ("llvm_debug_ops", int, m_llvm_debug_ops);
1326 ATTR_DECODE ("llvm_target_host", int, m_llvm_target_host);
1327 ATTR_DECODE ("llvm_debugging_symbols", int, m_llvm_debugging_symbols);
1328 ATTR_DECODE ("llvm_profiling_events", int, m_llvm_profiling_events);
1329 ATTR_DECODE ("llvm_output_bitcode", int, m_llvm_output_bitcode);
1330 ATTR_DECODE ("llvm_dumpasm", int, m_llvm_dumpasm);
1331 ATTR_DECODE ("strict_messages", int, m_strict_messages);
1332 ATTR_DECODE ("error_repeats", int, m_error_repeats);
1333 ATTR_DECODE ("range_checking", int, m_range_checking);
1334 ATTR_DECODE ("unknown_coordsys_error", int, m_unknown_coordsys_error);
1335 ATTR_DECODE ("connection_error", int, m_connection_error);
1336 ATTR_DECODE ("greedyjit", int, m_greedyjit);
1337 ATTR_DECODE ("countlayerexecs", int, m_countlayerexecs);
1338 ATTR_DECODE ("relaxed_param_typecheck", int, m_relaxed_param_typecheck);
1339 ATTR_DECODE ("max_warnings_per_thread", int, m_max_warnings_per_thread);
1340 ATTR_DECODE_STRING ("commonspace", m_commonspace_synonym);
1341 ATTR_DECODE_STRING ("colorspace", m_colorspace);
1342 ATTR_DECODE_STRING ("debug_groupname", m_debug_groupname);
1343 ATTR_DECODE_STRING ("debug_layername", m_debug_layername);
1344 ATTR_DECODE_STRING ("opt_layername", m_opt_layername);
1345 ATTR_DECODE_STRING ("only_groupname", m_only_groupname);
1346 ATTR_DECODE_STRING ("archive_groupname", m_archive_groupname);
1347 ATTR_DECODE_STRING ("archive_filename", m_archive_filename);
1348 ATTR_DECODE ("max_local_mem_KB", int, m_max_local_mem_KB);
1349 ATTR_DECODE ("compile_report", int, m_compile_report);
1350 ATTR_DECODE ("buffer_printf", int, m_buffer_printf);
1351 ATTR_DECODE ("no_noise", int, m_no_noise);
1352 ATTR_DECODE ("no_pointcloud", int, m_no_pointcloud);
1353 ATTR_DECODE ("force_derivs", int, m_force_derivs);
1354 ATTR_DECODE ("allow_shader_replacement", int, m_allow_shader_replacement);
1355 ATTR_DECODE ("exec_repeat", int, m_exec_repeat);
1356 ATTR_DECODE ("opt_warnings", int, m_opt_warnings);
1357 ATTR_DECODE ("gpu_opt_error", int, m_gpu_opt_error);
1358
1359 ATTR_DECODE ("stat:masters", int, m_stat_shaders_loaded);
1360 ATTR_DECODE ("stat:groups", int, m_stat_groups);
1361 ATTR_DECODE ("stat:instances_compiled", int, m_stat_instances_compiled);
1362 ATTR_DECODE ("stat:groups_compiled", int, m_stat_groups_compiled);
1363 ATTR_DECODE ("stat:empty_instances", int, m_stat_empty_instances);
1364 ATTR_DECODE ("stat:merged_inst", int, m_stat_merged_inst);
1365 ATTR_DECODE ("stat:merged_inst_opt", int, m_stat_merged_inst_opt);
1366 ATTR_DECODE ("stat:empty_groups", int, m_stat_empty_groups);
1367 ATTR_DECODE ("stat:instances", int, m_stat_groupinstances);
1368 ATTR_DECODE ("stat:regexes", int, m_stat_regexes);
1369 ATTR_DECODE ("stat:preopt_syms", int, m_stat_preopt_syms);
1370 ATTR_DECODE ("stat:postopt_syms", int, m_stat_postopt_syms);
1371 ATTR_DECODE ("stat:syms_with_derivs", int, m_stat_syms_with_derivs);
1372 ATTR_DECODE ("stat:preopt_ops", int, m_stat_preopt_ops);
1373 ATTR_DECODE ("stat:postopt_ops", int, m_stat_postopt_ops);
1374 ATTR_DECODE ("stat:middlemen_eliminated", int, m_stat_middlemen_eliminated);
1375 ATTR_DECODE ("stat:const_connections", int, m_stat_const_connections);
1376 ATTR_DECODE ("stat:global_connections", int, m_stat_global_connections);
1377 ATTR_DECODE ("stat:tex_calls_codegened", int, m_stat_tex_calls_codegened);
1378 ATTR_DECODE ("stat:tex_calls_as_handles", int, m_stat_tex_calls_as_handles);
1379 ATTR_DECODE ("stat:master_load_time", float, m_stat_master_load_time);
1380 ATTR_DECODE ("stat:optimization_time", float, m_stat_optimization_time);
1381 ATTR_DECODE ("stat:opt_locking_time", float, m_stat_opt_locking_time);
1382 ATTR_DECODE ("stat:specialization_time", float, m_stat_specialization_time);
1383 ATTR_DECODE ("stat:total_llvm_time", float, m_stat_total_llvm_time);
1384 ATTR_DECODE ("stat:llvm_setup_time", float, m_stat_llvm_setup_time);
1385 ATTR_DECODE ("stat:llvm_irgen_time", float, m_stat_llvm_irgen_time);
1386 ATTR_DECODE ("stat:llvm_opt_time", float, m_stat_llvm_opt_time);
1387 ATTR_DECODE ("stat:llvm_jit_time", float, m_stat_llvm_jit_time);
1388 ATTR_DECODE ("stat:inst_merge_time", float, m_stat_inst_merge_time);
1389 ATTR_DECODE ("stat:getattribute_calls", long long, m_stat_getattribute_calls);
1390 ATTR_DECODE ("stat:get_userdata_calls", long long, m_stat_get_userdata_calls);
1391 ATTR_DECODE ("stat:noise_calls", long long, m_stat_noise_calls);
1392 ATTR_DECODE ("stat:pointcloud_searches", long long, m_stat_pointcloud_searches);
1393 ATTR_DECODE ("stat:pointcloud_gets", long long, m_stat_pointcloud_gets);
1394 ATTR_DECODE ("stat:pointcloud_writes", long long, m_stat_pointcloud_writes);
1395 ATTR_DECODE ("stat:pointcloud_searches_total_results", long long, m_stat_pointcloud_searches_total_results);
1396 ATTR_DECODE ("stat:pointcloud_max_results", int, m_stat_pointcloud_max_results);
1397 ATTR_DECODE ("stat:pointcloud_failures", int, m_stat_pointcloud_failures);
1398 ATTR_DECODE ("stat:memory_current", long long, m_stat_memory.current());
1399 ATTR_DECODE ("stat:memory_peak", long long, m_stat_memory.peak());
1400 ATTR_DECODE ("stat:mem_master_current", long long, m_stat_mem_master.current());
1401 ATTR_DECODE ("stat:mem_master_peak", long long, m_stat_mem_master.peak());
1402 ATTR_DECODE ("stat:mem_master_ops_current", long long, m_stat_mem_master_ops.current());
1403 ATTR_DECODE ("stat:mem_master_ops_peak", long long, m_stat_mem_master_ops.peak());
1404 ATTR_DECODE ("stat:mem_master_args_current", long long, m_stat_mem_master_args.current());
1405 ATTR_DECODE ("stat:mem_master_args_peak", long long, m_stat_mem_master_args.peak());
1406 ATTR_DECODE ("stat:mem_master_syms_current", long long, m_stat_mem_master_syms.current());
1407 ATTR_DECODE ("stat:mem_master_syms_peak", long long, m_stat_mem_master_syms.peak());
1408 ATTR_DECODE ("stat:mem_master_defaults_current", long long, m_stat_mem_master_defaults.current());
1409 ATTR_DECODE ("stat:mem_master_defaults_peak", long long, m_stat_mem_master_defaults.peak());
1410 ATTR_DECODE ("stat:mem_master_consts_current", long long, m_stat_mem_master_consts.current());
1411 ATTR_DECODE ("stat:mem_master_consts_peak", long long, m_stat_mem_master_consts.peak());
1412 ATTR_DECODE ("stat:mem_inst_current", long long, m_stat_mem_inst.current());
1413 ATTR_DECODE ("stat:mem_inst_peak", long long, m_stat_mem_inst.peak());
1414 ATTR_DECODE ("stat:mem_inst_syms_current", long long, m_stat_mem_inst_syms.current());
1415 ATTR_DECODE ("stat:mem_inst_syms_peak", long long, m_stat_mem_inst_syms.peak());
1416 ATTR_DECODE ("stat:mem_inst_paramvals_current", long long, m_stat_mem_inst_paramvals.current());
1417 ATTR_DECODE ("stat:mem_inst_paramvals_peak", long long, m_stat_mem_inst_paramvals.peak());
1418 ATTR_DECODE ("stat:mem_inst_connections_current", long long, m_stat_mem_inst_connections.current());
1419 ATTR_DECODE ("stat:mem_inst_connections_peak", long long, m_stat_mem_inst_connections.peak());
1420
1421 if (name == "colorsystem" && type.basetype == TypeDesc::PTR) {
1422 *(void**)val = &colorsystem();
1423 return true;
1424 }
1425 if (name == "colorsystem:sizes" && type.basetype == TypeDesc::LONGLONG) {
1426 if (type.arraylen != 2) {
1427 error ("Must request two colorsystem:sizes, [sizeof(pvt::ColorSystem), num-strings]");
1428 return false;
1429 }
1430 long long* lptr = (long long*) val;
1431 lptr[0] = sizeof(pvt::ColorSystem);
1432 lptr[1] = 1; // 1 string (pvt::ColorSystem::m_colorspace)
1433
1434 // Make sure everything adds up!
1435 OSL_ASSERT((((char*)&colorsystem() + lptr[0]) - sizeof(ustring)*lptr[1]) ==
1436 (char*)&colorsystem().colorspace());
1437 return true;
1438 }
1439
1440 return false;
1441 #undef ATTR_DECODE
1442 #undef ATTR_DECODE_STRING
1443 }
1444
1445
1446
1447 bool
attribute(ShaderGroup * group,string_view name,TypeDesc type,const void * val)1448 ShadingSystemImpl::attribute (ShaderGroup *group, string_view name,
1449 TypeDesc type, const void *val)
1450 {
1451 // No current group attributes to set
1452 if (! group)
1453 return attribute (name, type, val);
1454 lock_guard lock (group->m_mutex);
1455 if (name == "renderer_outputs" && type.basetype == TypeDesc::STRING) {
1456 group->m_renderer_outputs.clear ();
1457 for (size_t i = 0; i < type.numelements(); ++i)
1458 group->m_renderer_outputs.emplace_back(((const char **)val)[i]);
1459 return true;
1460 }
1461 if (name == "entry_layers" && type.basetype == TypeDesc::STRING) {
1462 group->clear_entry_layers ();
1463 for (int i = 0; i < (int)type.numelements(); ++i)
1464 group->mark_entry_layer (ustring(((const char **)val)[i]));
1465 return true;
1466 }
1467 if (name == "exec_repeat" && type == TypeDesc::TypeInt) {
1468 group->m_exec_repeat = *(const int *)val;
1469 return true;
1470 }
1471 if (name == "groupname" && type == TypeDesc::TypeString) {
1472 group->name (ustring(((const char **)val)[0]));
1473 return true;
1474 }
1475 return false;
1476 }
1477
1478
1479
1480 bool
getattribute(ShaderGroup * group,string_view name,TypeDesc type,void * val)1481 ShadingSystemImpl::getattribute (ShaderGroup *group, string_view name,
1482 TypeDesc type, void *val)
1483 {
1484 if (! group)
1485 return false;
1486
1487 if (name == "groupname" && type == TypeDesc::TypeString) {
1488 *(ustring *)val = group->name();
1489 return true;
1490 }
1491 if (name == "num_layers" && type == TypeDesc::TypeInt) {
1492 *(int *)val = group->nlayers();
1493 return true;
1494 }
1495 if (name == "layer_names" && type.basetype == TypeDesc::STRING) {
1496 size_t n = std::min (type.numelements(), (size_t)group->nlayers());
1497 for (size_t i = 0; i < n; ++i)
1498 ((ustring *)val)[i] = (*group)[i]->layername();
1499 return true;
1500 }
1501 if (name == "num_renderer_outputs" && type.basetype == TypeDesc::INT) {
1502 *(int *)val = (int) group->m_renderer_outputs.size();
1503 return true;
1504 }
1505 if (name == "renderer_outputs" && type.basetype == TypeDesc::STRING) {
1506 size_t n = std::min (type.numelements(), group->m_renderer_outputs.size());
1507 for (size_t i = 0; i < n; ++i)
1508 ((ustring *)val)[i] = group->m_renderer_outputs[i];
1509 for (size_t i = n; i < type.numelements(); ++i)
1510 ((ustring *)val)[i] = ustring();
1511 return true;
1512 }
1513 if (name == "raytype_queries" && type.basetype == TypeDesc::INT) {
1514 *(int *)val = group->raytype_queries();
1515 return true;
1516 }
1517 if (name == "num_entry_layers" && type.basetype == TypeDesc::INT) {
1518 int n = 0;
1519 for (int i = 0; i < group->nlayers(); ++i)
1520 n += group->layer(i)->entry_layer();
1521 *(int *)val = n;
1522 return true;
1523 }
1524 if (name == "entry_layers" && type.basetype == TypeDesc::STRING) {
1525 size_t n = 0;
1526 for (size_t i = 0; i < (size_t)group->nlayers() && i < type.numelements(); ++i)
1527 if (group->layer(i)->entry_layer())
1528 ((ustring *)val)[n++] = (*group)[i]->layername();
1529 for (size_t i = n; i < type.numelements(); ++i)
1530 ((ustring *)val)[i] = ustring();
1531 return true;
1532 }
1533 if (name == "group_init_name" && type.basetype == TypeDesc::STRING) {
1534 #ifdef OIIO_HAS_SPRINTF
1535 *(ustring *)val = ustring::sprintf ("__direct_callable__group_%s_%d_init",
1536 group->name(), group->id());
1537 #else
1538 *(ustring *)val = ustring::format ("__direct_callable__group_%s_%d_init",
1539 group->name(), group->id());
1540 #endif
1541 return true;
1542 }
1543 if (name == "group_entry_name" && type.basetype == TypeDesc::STRING) {
1544 int nlayers = group->nlayers ();
1545 ShaderInstance *inst = (*group)[nlayers-1];
1546 // This formulation mirrors OSOProcessorBase::layer_function_name()
1547 #ifdef OIIO_HAS_SPRINTF
1548 *(ustring *)val = ustring::sprintf ("__direct_callable__%s_%s_%d", group->name(),
1549 inst->layername(), inst->id());
1550 #else
1551 *(ustring *)val = ustring::format ("__direct_callable__%s_%s_%d", group->name(),
1552 inst->layername(), inst->id());
1553 #endif
1554 return true;
1555 }
1556 if (name == "layer_osofiles" && type.basetype == TypeDesc::STRING) {
1557 size_t n = std::min (type.numelements(), (size_t)group->nlayers());
1558 for (size_t i = 0; i < n; ++i)
1559 ((ustring *)val)[i] =(*group)[i]->master()->osofilename();
1560 return true;
1561 }
1562 if (name == "pickle" && type == TypeDesc::STRING) {
1563 *(ustring *)val = ustring(group->serialize());
1564 return true;
1565 }
1566 if (name == "exec_repeat" && type == TypeDesc::TypeInt) {
1567 *(int *)val = group->m_exec_repeat;
1568 return true;
1569 }
1570 if (name == "ptx_compiled_version" && type.basetype == TypeDesc::PTR) {
1571 bool exists = !group->m_llvm_ptx_compiled_version.empty();
1572 *(std::string *)val = exists ? group->m_llvm_ptx_compiled_version : "";
1573 return true;
1574 }
1575
1576 // All the remaining attributes require the group to already be
1577 // optimized.
1578 if (! group->optimized()) {
1579 auto threadinfo = create_thread_info();
1580 auto ctx = get_context(threadinfo);
1581 optimize_group (*group, ctx, false /*jit*/);
1582 release_context(ctx);
1583 destroy_thread_info (threadinfo);
1584 }
1585
1586 if (name == "num_textures_needed" && type == TypeDesc::TypeInt) {
1587 *(int *)val = (int)group->m_textures_needed.size();
1588 return true;
1589 }
1590 if (name == "textures_needed" && type.basetype == TypeDesc::PTR) {
1591 size_t n = group->m_textures_needed.size();
1592 *(ustring **)val = n ? &group->m_textures_needed[0] : NULL;
1593 return true;
1594 }
1595 if (name == "unknown_textures_needed" && type == TypeDesc::TypeInt) {
1596 *(int *)val = (int)group->m_unknown_textures_needed;
1597 return true;
1598 }
1599
1600 if (name == "num_closures_needed" && type == TypeDesc::TypeInt) {
1601 *(int *)val = (int)group->m_closures_needed.size();
1602 return true;
1603 }
1604 if (name == "closures_needed" && type.basetype == TypeDesc::PTR) {
1605 size_t n = group->m_closures_needed.size();
1606 *(ustring **)val = n ? &group->m_closures_needed[0] : NULL;
1607 return true;
1608 }
1609 if (name == "unknown_closures_needed" && type == TypeDesc::TypeInt) {
1610 *(int *)val = (int)group->m_unknown_closures_needed;
1611 return true;
1612 }
1613
1614 if (name == "num_globals_needed" && type == TypeDesc::TypeInt) {
1615 *(int *)val = (int)group->m_globals_needed.size();
1616 return true;
1617 }
1618 if (name == "globals_needed" && type.basetype == TypeDesc::PTR) {
1619 size_t n = group->m_globals_needed.size();
1620 *(ustring **)val = n ? &group->m_globals_needed[0] : NULL;
1621 return true;
1622 }
1623 if (name == "globals_read" && type.basetype == TypeDesc::INT) {
1624 *(int *)val = group->m_globals_read;
1625 return true;
1626 }
1627 if (name == "globals_write" && type.basetype == TypeDesc::INT) {
1628 *(int *)val = group->m_globals_write;
1629 return true;
1630 }
1631
1632 if (name == "num_userdata" && type == TypeDesc::TypeInt) {
1633 *(int *)val = (int)group->m_userdata_names.size();
1634 return true;
1635 }
1636 if (name == "userdata_names" && type.basetype == TypeDesc::PTR) {
1637 size_t n = group->m_userdata_names.size();
1638 *(ustring **)val = n ? &group->m_userdata_names[0] : NULL;
1639 return true;
1640 }
1641 if (name == "userdata_types" && type.basetype == TypeDesc::PTR) {
1642 size_t n = group->m_userdata_types.size();
1643 *(TypeDesc **)val = n ? &group->m_userdata_types[0] : NULL;
1644 return true;
1645 }
1646 if (name == "userdata_offsets" && type.basetype == TypeDesc::PTR) {
1647 size_t n = group->m_userdata_offsets.size();
1648 *(int **)val = n ? &group->m_userdata_offsets[0] : NULL;
1649 return true;
1650 }
1651 if (name == "userdata_derivs" && type.basetype == TypeDesc::PTR) {
1652 size_t n = group->m_userdata_derivs.size();
1653 *(char **)val = n ? &group->m_userdata_derivs[0] : NULL;
1654 return true;
1655 }
1656 if (name == "num_attributes_needed" && type == TypeDesc::TypeInt) {
1657 *(int *)val = (int)group->m_attributes_needed.size();
1658 return true;
1659 }
1660 if (name == "attributes_needed" && type.basetype == TypeDesc::PTR) {
1661 size_t n = group->m_attributes_needed.size();
1662 *(ustring **)val = n ? &group->m_attributes_needed[0] : NULL;
1663 return true;
1664 }
1665 if (name == "attribute_scopes" && type.basetype == TypeDesc::PTR) {
1666 size_t n = group->m_attribute_scopes.size();
1667 *(ustring **)val = n ? &group->m_attribute_scopes[0] : NULL;
1668 return true;
1669 }
1670 if (name == "unknown_attributes_needed" && type == TypeDesc::TypeInt) {
1671 *(int *)val = (int)group->m_unknown_attributes_needed;
1672 return true;
1673 }
1674 if (name == "group_id" && type == TypeDesc::TypeInt) {
1675 *(int *)val = (int) group->id();
1676 return true;
1677 }
1678
1679 // Additional atttributes useful to OptiX-based renderers
1680 if (name == "userdata_layers" && type.basetype == TypeDesc::PTR) {
1681 size_t n = group->m_userdata_layers.size();
1682 *(int **)val = n ? &group->m_userdata_layers[0] : NULL;
1683 return true;
1684 }
1685 if (name == "userdata_init_vals" && type.basetype == TypeDesc::PTR) {
1686 size_t n = group->m_userdata_init_vals.size();
1687 *(void **)val = n ? &group->m_userdata_init_vals[0] : NULL;
1688 return true;
1689 }
1690
1691 return false;
1692 }
1693
1694
1695
1696 void
error(const std::string & msg) const1697 ShadingSystemImpl::error (const std::string &msg) const
1698 {
1699 lock_guard guard (m_errmutex);
1700 int n = 0;
1701 for (auto&& s : m_errseen) {
1702 if (s == msg && !m_error_repeats)
1703 return;
1704 ++n;
1705 }
1706 if (n >= m_errseenmax)
1707 m_errseen.pop_front ();
1708 m_errseen.push_back (msg);
1709 m_err->error (msg);
1710 }
1711
1712
1713
1714 void
warning(const std::string & msg) const1715 ShadingSystemImpl::warning (const std::string &msg) const
1716 {
1717 lock_guard guard (m_errmutex);
1718 int n = 0;
1719 for (auto&& s : m_warnseen) {
1720 if (s == msg && !m_error_repeats)
1721 return;
1722 ++n;
1723 }
1724 if (n >= m_errseenmax)
1725 m_warnseen.pop_front ();
1726 m_warnseen.push_back (msg);
1727 m_err->warning (msg);
1728 }
1729
1730
1731
1732 void
info(const std::string & msg) const1733 ShadingSystemImpl::info (const std::string &msg) const
1734 {
1735 lock_guard guard (m_errmutex);
1736 m_err->info (msg);
1737 }
1738
1739
1740
1741 void
message(const std::string & msg) const1742 ShadingSystemImpl::message (const std::string &msg) const
1743 {
1744 lock_guard guard (m_errmutex);
1745 m_err->message (msg);
1746 }
1747
1748
1749
1750 void
pointcloud_stats(int search,int get,int results,int writes)1751 ShadingSystemImpl::pointcloud_stats (int search, int get, int results,
1752 int writes)
1753 {
1754 spin_lock lock (m_stat_mutex);
1755 m_stat_pointcloud_searches += search;
1756 m_stat_pointcloud_gets += get;
1757 m_stat_pointcloud_searches_total_results += results;
1758 if (search && ! results)
1759 ++m_stat_pointcloud_failures;
1760 m_stat_pointcloud_max_results = std::max (m_stat_pointcloud_max_results,
1761 results);
1762 m_stat_pointcloud_writes += writes;
1763 }
1764
1765
1766
1767 namespace {
1768 typedef std::pair<ustring,long long> GroupTimeVal;
1769 struct group_time_compare { // So looking forward to C++11 lambdas!
operator ()pvt::__anond3f550b20111::group_time_compare1770 bool operator() (const GroupTimeVal &a, const GroupTimeVal &b) {
1771 return a.second > b.second;
1772 }
1773 };
1774 }
1775
1776
1777
1778 // Return a comma-separated list of all the important SIMD/capabilities
1779 // that were enabled as a compile-time option when OSL was built.
1780 // (Keep this in sync with oiio_simd_caps in imageio.cpp).
1781 static std::string
osl_simd_caps()1782 osl_simd_caps()
1783 {
1784 // clang-format off
1785 std::vector<string_view> caps;
1786 if (OIIO_SIMD_SSE >= 2) caps.emplace_back ("sse2");
1787 if (OIIO_SIMD_SSE >= 3) caps.emplace_back ("sse3");
1788 if (OIIO_SIMD_SSE >= 3) caps.emplace_back ("ssse3");
1789 if (OIIO_SIMD_SSE >= 4) caps.emplace_back ("sse41");
1790 if (OIIO_SIMD_SSE >= 4) caps.emplace_back ("sse42");
1791 if (OIIO_SIMD_AVX) caps.emplace_back ("avx");
1792 if (OIIO_SIMD_AVX >= 2) caps.emplace_back ("avx2");
1793 if (OIIO_SIMD_AVX >= 512) caps.emplace_back ("avx512f");
1794 if (OIIO_AVX512DQ_ENABLED) caps.emplace_back ("avx512dq");
1795 if (OIIO_AVX512IFMA_ENABLED) caps.emplace_back ("avx512ifma");
1796 if (OIIO_AVX512PF_ENABLED) caps.emplace_back ("avx512pf");
1797 if (OIIO_AVX512ER_ENABLED) caps.emplace_back ("avx512er");
1798 if (OIIO_AVX512CD_ENABLED) caps.emplace_back ("avx512cd");
1799 if (OIIO_AVX512BW_ENABLED) caps.emplace_back ("avx512bw");
1800 if (OIIO_AVX512VL_ENABLED) caps.emplace_back ("avx512vl");
1801 if (OIIO_FMA_ENABLED) caps.emplace_back ("fma");
1802 if (OIIO_F16C_ENABLED) caps.emplace_back ("f16c");
1803 // if (OIIO_POPCOUNT_ENABLED) caps.emplace_back ("popcnt");
1804 return OIIO::Strutil::join (caps, ",");
1805 // clang-format on
1806 }
1807
1808
1809
1810 std::string
getstats(int level) const1811 ShadingSystemImpl::getstats (int level) const
1812 {
1813 int columns = OIIO::Sysutil::terminal_columns() - 2;
1814
1815 if (level <= 0)
1816 return "";
1817 std::ostringstream out;
1818 out.imbue (std::locale::classic()); // force C locale
1819 out << "Open Shading Language " << OSL_LIBRARY_VERSION_STRING << "\n";
1820 out << " Build deps: LLVM-" << OSL_LLVM_FULL_VERSION
1821 << " OIIO-" << OIIO_VERSION_STRING << " Imath-" <<
1822 #ifdef OPENEXR_VERSION_STRING
1823 OPENEXR_VERSION_STRING
1824 #else
1825 "(unknown)"
1826 #endif
1827 << "\n";
1828
1829 std::string opt;
1830 #define BOOLOPT(name) opt += Strutil::sprintf(#name "=%d ", m_##name)
1831 #define INTOPT(name) opt += Strutil::sprintf(#name "=%d ", m_##name)
1832 #define STROPT(name) if (m_##name.size()) opt += Strutil::sprintf(#name "=\"%s\" ", m_##name)
1833 INTOPT (optimize);
1834 INTOPT (llvm_optimize);
1835 INTOPT (debug);
1836 INTOPT (profile);
1837 INTOPT (llvm_debug);
1838 BOOLOPT (llvm_debug_layers);
1839 BOOLOPT (llvm_debug_ops);
1840 BOOLOPT (llvm_target_host);
1841 BOOLOPT (llvm_output_bitcode);
1842 BOOLOPT (llvm_dumpasm);
1843 BOOLOPT (llvm_prune_ir_strategy);
1844 BOOLOPT (lazylayers);
1845 BOOLOPT (lazyglobals);
1846 BOOLOPT (lazyunconnected);
1847 BOOLOPT (lazyerror);
1848 BOOLOPT (lazy_userdata);
1849 BOOLOPT (userdata_isconnected);
1850 BOOLOPT (clearmemory);
1851 BOOLOPT (debugnan);
1852 BOOLOPT (debug_uninit);
1853 BOOLOPT (lockgeom_default);
1854 BOOLOPT (strict_messages);
1855 BOOLOPT (error_repeats);
1856 BOOLOPT (range_checking);
1857 BOOLOPT (greedyjit);
1858 BOOLOPT (countlayerexecs);
1859 BOOLOPT (opt_simplify_param);
1860 BOOLOPT (opt_constant_fold);
1861 BOOLOPT (opt_stale_assign);
1862 BOOLOPT (opt_elide_useless_ops);
1863 BOOLOPT (opt_elide_unconnected_outputs);
1864 BOOLOPT (opt_peephole);
1865 BOOLOPT (opt_coalesce_temps);
1866 BOOLOPT (opt_assign);
1867 BOOLOPT (opt_mix);
1868 INTOPT (opt_merge_instances);
1869 BOOLOPT (opt_merge_instances_with_userdata);
1870 BOOLOPT (opt_fold_getattribute);
1871 BOOLOPT (opt_middleman);
1872 BOOLOPT (opt_texture_handle);
1873 BOOLOPT (opt_seed_bblock_aliases);
1874 BOOLOPT (llvm_jit_fma);
1875 BOOLOPT (llvm_jit_aggressive);
1876 INTOPT (vector_width);
1877 STROPT (llvm_jit_target);
1878 INTOPT (opt_passes);
1879 INTOPT (no_noise);
1880 INTOPT (no_pointcloud);
1881 INTOPT (force_derivs);
1882 INTOPT (allow_shader_replacement);
1883 INTOPT (exec_repeat);
1884 INTOPT (opt_warnings);
1885 INTOPT (gpu_opt_error);
1886 STROPT (debug_groupname);
1887 STROPT (debug_layername);
1888 STROPT (archive_groupname);
1889 STROPT (archive_filename);
1890 #undef BOOLOPT
1891 #undef INTOPT
1892 #undef STROPT
1893
1894 // Print the HW info
1895 out << " Build HW support: ";
1896 std::string buildsimd = osl_simd_caps();
1897 if (!buildsimd.size())
1898 buildsimd = "no SIMD";
1899 out << buildsimd << "\n";
1900 OIIO::Strutil::fprintf(out, " Runtime HW: %d cores %.1fGB %s\n",
1901 OIIO::Sysutil::hardware_concurrency(),
1902 OIIO::Sysutil::physical_memory() / float(1 << 30),
1903 OIIO::get_string_attribute("hw:simd"));
1904 // TODO: detect GPU info and print it here
1905 out << "\n";
1906
1907 out << "ShadingSystem Options:\n";
1908 out << " " << Strutil::wordwrap(opt, columns, 4) << "\n";
1909
1910 out << "\nOSL ShadingSystem statistics (" << (void*)this << ")\n";
1911 if (m_stat_shaders_requested == 0 && m_stat_shaders_loaded == 0) {
1912 out << " No shaders requested or loaded\n";
1913 return out.str();
1914 }
1915
1916 out << " Shaders:\n";
1917 out << " Requested: " << m_stat_shaders_requested << "\n";
1918 out << " Loaded: " << m_stat_shaders_loaded << "\n";
1919 out << " Masters: " << m_stat_shaders_loaded << "\n";
1920 out << " Instances: " << m_stat_instances << "\n";
1921 out << " Time loading masters: "
1922 << Strutil::timeintervalformat (m_stat_master_load_time, 2) << "\n";
1923 out << " Shading groups: " << m_stat_groups << "\n";
1924 out << " Total instances in all groups: " << m_stat_groupinstances << "\n";
1925 float iperg = (float)m_stat_groupinstances/std::max((int)m_stat_groups,1);
1926 out << " Avg instances per group: "
1927 << Strutil::sprintf ("%.1f", iperg) << "\n";
1928 out << " Shading contexts: " << m_stat_contexts << "\n";
1929 if (m_countlayerexecs)
1930 out << " Total layers executed: " << m_stat_layers_executed << "\n";
1931
1932 #if 0
1933 long long totalexec = m_layers_executed_uncond + m_layers_executed_lazy +
1934 m_layers_executed_never;
1935 out << Strutil::sprintf (" Total layers run: %10lld\n", totalexec);
1936 double inv_totalexec = 1.0 / std::max (totalexec, 1LL); // prevent div by 0
1937 out << Strutil::sprintf (" Unconditional: %10lld (%.1f%%)\n",
1938 (long long)m_layers_executed_uncond,
1939 (100.0*m_layers_executed_uncond) * inv_totalexec);
1940 out << Strutil::sprintf (" On demand: %10lld (%.1f%%)\n",
1941 (long long)m_layers_executed_lazy,
1942 (100.0*m_layers_executed_lazy) * inv_totalexec);
1943 out << Strutil::sprintf (" Skipped: %10lld (%.1f%%)\n",
1944 (long long)m_layers_executed_never,
1945 (100.0*m_layers_executed_never) * inv_totalexec);
1946
1947 #endif
1948
1949 out << " Compiled " << m_stat_groups_compiled << " groups, "
1950 << m_stat_instances_compiled << " instances\n";
1951 out << " Merged " << (m_stat_merged_inst+m_stat_merged_inst_opt)
1952 << " instances (" << m_stat_merged_inst << " initial, "
1953 << m_stat_merged_inst_opt << " after opt) in "
1954 << Strutil::timeintervalformat (m_stat_inst_merge_time, 2) << "\n";
1955 if (m_stat_instances_compiled > 0)
1956 out << " After optimization, " << m_stat_empty_instances
1957 << " empty instances ("
1958 << (int)(100.0f*m_stat_empty_instances/m_stat_instances_compiled) << "%)\n";
1959 if (m_stat_groups_compiled > 0)
1960 out << " After optimization, " << m_stat_empty_groups << " empty groups ("
1961 << (int)(100.0f*m_stat_empty_groups/m_stat_groups_compiled)<< "%)\n";
1962 if (m_stat_instances_compiled > 0 || m_stat_groups_compiled > 0) {
1963 out << Strutil::sprintf (" Optimized %llu ops to %llu (%.1f%%)\n",
1964 (long long)m_stat_preopt_ops,
1965 (long long)m_stat_postopt_ops,
1966 100.0*(double(m_stat_postopt_ops)/double(std::max(1,(int)m_stat_preopt_ops))-1.0));
1967 out << Strutil::sprintf (" Optimized %llu symbols to %llu (%.1f%%)\n",
1968 (long long)m_stat_preopt_syms,
1969 (long long)m_stat_postopt_syms,
1970 100.0*(double(m_stat_postopt_syms)/double(std::max(1,(int)m_stat_preopt_syms))-1.0));
1971 }
1972 out << Strutil::sprintf (" Constant connections eliminated: %d\n",
1973 (int)m_stat_const_connections);
1974 out << Strutil::sprintf (" Global connections eliminated: %d\n",
1975 (int)m_stat_global_connections);
1976 out << Strutil::sprintf (" Middlemen eliminated: %d\n",
1977 (int)m_stat_middlemen_eliminated);
1978 out << Strutil::sprintf (" Derivatives needed on %d / %d symbols (%.1f%%)\n",
1979 (int)m_stat_syms_with_derivs, (int)m_stat_postopt_syms,
1980 (100.0*(int)m_stat_syms_with_derivs)/std::max((int)m_stat_postopt_syms,1));
1981 out << " Runtime optimization cost: "
1982 << Strutil::timeintervalformat (m_stat_optimization_time, 2) << "\n";
1983 out << " locking: "
1984 << Strutil::timeintervalformat (m_stat_opt_locking_time, 2) << "\n";
1985 out << " runtime specialization: "
1986 << Strutil::timeintervalformat (m_stat_specialization_time, 2) << "\n";
1987 if (m_stat_total_llvm_time > 0.0) {
1988 out << " LLVM setup: "
1989 << Strutil::timeintervalformat (m_stat_llvm_setup_time, 2) << "\n";
1990 out << " LLVM IR gen: "
1991 << Strutil::timeintervalformat (m_stat_llvm_irgen_time, 2) << "\n";
1992 out << " LLVM optimize: "
1993 << Strutil::timeintervalformat (m_stat_llvm_opt_time, 2) << "\n";
1994 out << " LLVM JIT: "
1995 << Strutil::timeintervalformat (m_stat_llvm_jit_time, 2) << "\n";
1996 }
1997
1998 out << " Texture calls compiled: "
1999 << (int)m_stat_tex_calls_codegened
2000 << " (" << (int)m_stat_tex_calls_as_handles << " used handles)\n";
2001 out << " Regex's compiled: " << m_stat_regexes << "\n";
2002 out << " Largest generated function local memory size: "
2003 << m_stat_max_llvm_local_mem/1024 << " KB\n";
2004 if (m_stat_getattribute_calls) {
2005 out << " getattribute calls: " << m_stat_getattribute_calls << " ("
2006 << Strutil::timeintervalformat (m_stat_getattribute_time, 2) << ")\n";
2007 out << " (fail time "
2008 << Strutil::timeintervalformat (m_stat_getattribute_fail_time, 2) << ")\n";
2009 }
2010 out << " Number of get_userdata calls: " << m_stat_get_userdata_calls << "\n";
2011 if (profile() > 1)
2012 out << " Number of noise calls: " << m_stat_noise_calls << "\n";
2013 if (m_stat_pointcloud_searches || m_stat_pointcloud_writes) {
2014 out << " Pointcloud operations:\n";
2015 out << " pointcloud_search calls: " << m_stat_pointcloud_searches << "\n";
2016 out << " max query results: " << m_stat_pointcloud_max_results << "\n";
2017 double avg = m_stat_pointcloud_searches ?
2018 (double)m_stat_pointcloud_searches_total_results/(double)m_stat_pointcloud_searches : 0.0;
2019 out << " average query results: " << Strutil::sprintf ("%.1f", avg) << "\n";
2020 out << " failures: " << m_stat_pointcloud_failures << "\n";
2021 out << " pointcloud_get calls: " << m_stat_pointcloud_gets << "\n";
2022 out << " pointcloud_write calls: " << m_stat_pointcloud_writes << "\n";
2023 }
2024 out << " Memory total: " << m_stat_memory.memstat() << '\n';
2025 out << " Master memory: " << m_stat_mem_master.memstat() << '\n';
2026 out << " Master ops: " << m_stat_mem_master_ops.memstat() << '\n';
2027 out << " Master args: " << m_stat_mem_master_args.memstat() << '\n';
2028 out << " Master syms: " << m_stat_mem_master_syms.memstat() << '\n';
2029 out << " Master defaults: " << m_stat_mem_master_defaults.memstat() << '\n';
2030 out << " Master consts: " << m_stat_mem_master_consts.memstat() << '\n';
2031 out << " Instance memory: " << m_stat_mem_inst.memstat() << '\n';
2032 out << " Instance syms: " << m_stat_mem_inst_syms.memstat() << '\n';
2033 out << " Instance param values: " << m_stat_mem_inst_paramvals.memstat() << '\n';
2034 out << " Instance connections: " << m_stat_mem_inst_connections.memstat() << '\n';
2035
2036 size_t jitmem = LLVM_Util::total_jit_memory_held();
2037 out << " LLVM JIT memory: " << Strutil::memformat(jitmem) << '\n';
2038
2039 if (m_profile) {
2040 out << " Execution profile:\n";
2041 out << " Total shader execution time: "
2042 << Strutil::timeintervalformat(OIIO::Timer::seconds(m_stat_total_shading_time_ticks), 2)
2043 << " (sum of all threads)\n";
2044 // Account for times of any groups that haven't yet been destroyed
2045 {
2046 spin_lock lock (m_all_shader_groups_mutex);
2047 for (auto&& grp : m_all_shader_groups) {
2048 if (ShaderGroupRef g = grp.lock()) {
2049 long long ticks = g->m_stat_total_shading_time_ticks;
2050 m_group_profile_times[g->name()] += ticks;
2051 g->m_stat_total_shading_time_ticks -= ticks;
2052 }
2053 }
2054 }
2055 {
2056 spin_lock lock (m_stat_mutex);
2057 std::vector<GroupTimeVal> grouptimes;
2058 for (std::map<ustring,long long>::const_iterator m = m_group_profile_times.begin();
2059 m != m_group_profile_times.end(); ++m) {
2060 grouptimes.emplace_back(m->first, m->second);
2061 }
2062 std::sort (grouptimes.begin(), grouptimes.end(), group_time_compare());
2063 if (grouptimes.size() > 5)
2064 grouptimes.resize (5);
2065 if (grouptimes.size())
2066 out << " Most expensive shader groups:\n";
2067 for (std::vector<GroupTimeVal>::const_iterator i = grouptimes.begin();
2068 i != grouptimes.end(); ++i) {
2069 out << " " << Strutil::timeintervalformat(OIIO::Timer::seconds(i->second),2)
2070 << ' ' << (i->first.size() ? i->first.c_str() : "<unnamed group>") << "\n";
2071 }
2072 }
2073
2074 }
2075
2076 return out.str();
2077 }
2078
2079
2080
2081 void
printstats() const2082 ShadingSystemImpl::printstats () const
2083 {
2084 if (m_statslevel == 0)
2085 return;
2086 m_err->message (getstats (m_statslevel));
2087 }
2088
2089
2090
2091 bool
Parameter(string_view name,TypeDesc t,const void * val,bool lockgeom)2092 ShadingSystemImpl::Parameter (string_view name, TypeDesc t, const void *val,
2093 bool lockgeom)
2094 {
2095 return Parameter (*m_curgroup, name, t, val, lockgeom);
2096 }
2097
2098
2099
2100 bool
Parameter(ShaderGroup & group,string_view name,TypeDesc t,const void * val,bool lockgeom)2101 ShadingSystemImpl::Parameter (ShaderGroup& group, string_view name,
2102 TypeDesc t, const void *val, bool lockgeom)
2103 {
2104 // We work very hard not to do extra copies of the data. First,
2105 // grow the pending list by one (empty) slot...
2106 group.m_pending_params.grow();
2107 // ...then initialize it in place
2108 group.m_pending_params.back().init (name, t, 1, val);
2109 // If we have a possible geometric override (lockgeom=false), set the
2110 // param's interpolation to VERTEX rather than the default CONSTANT.
2111 if (lockgeom == false)
2112 group.m_pending_params.back().interp (OIIO::ParamValue::INTERP_VERTEX);
2113 return true;
2114 }
2115
2116
2117
2118 ShaderGroupRef
ShaderGroupBegin(string_view groupname)2119 ShadingSystemImpl::ShaderGroupBegin (string_view groupname)
2120 {
2121 ShaderGroupRef group (new ShaderGroup(groupname));
2122 group->m_exec_repeat = m_exec_repeat;
2123 {
2124 // Record the group in the SS's census of all extant groups
2125 spin_lock lock (m_all_shader_groups_mutex);
2126 m_all_shader_groups.push_back (group);
2127 ++m_groups_to_compile_count;
2128 m_curgroup = group;
2129 }
2130 return group;
2131 }
2132
2133
2134
2135 bool
ShaderGroupEnd(void)2136 ShadingSystemImpl::ShaderGroupEnd (void)
2137 {
2138 if (! m_curgroup) {
2139 error ("ShaderGroupEnd() was called without ShaderGroupBegin()");
2140 return false;
2141 }
2142 bool ok = ShaderGroupEnd (*m_curgroup);
2143 m_curgroup.reset(); // no currently active group
2144 return ok;
2145 }
2146
2147
2148
2149 bool
ShaderGroupEnd(ShaderGroup & group)2150 ShadingSystemImpl::ShaderGroupEnd (ShaderGroup& group)
2151 {
2152 // Lock just in case we do something not thread-safe within
2153 // ShaderGroupEnd. This may be overly cautious, but unless it shows
2154 // up as a major bottleneck, I'm inclined to play it safe.
2155 lock_guard lock (m_mutex);
2156
2157 // Mark the layers that can be run lazily
2158 if (! group.m_group_use.empty()) {
2159 int nlayers = group.nlayers ();
2160 for (int layer = 0; layer < nlayers; ++layer) {
2161 ShaderInstance *inst = group[layer];
2162 if (! inst)
2163 continue;
2164 inst->last_layer (layer == nlayers-1);
2165 }
2166
2167 // Merge instances now if they really want it bad, otherwise wait
2168 // until we optimize the group.
2169 if (m_opt_merge_instances >= 2)
2170 merge_instances (group);
2171 }
2172
2173 // Merge the raytype_queries of all the individual layers
2174 group.m_raytype_queries = 0;
2175 for (int layer = 0, n = group.nlayers(); layer < n; ++layer) {
2176 if (ShaderInstance *inst = group[layer])
2177 group.m_raytype_queries |= inst->master()->raytype_queries();
2178 }
2179 // std::cout << "Group " << group.name() << " ray query bits "
2180 // << group.m_raytype_queries << "\n";
2181
2182 ustring groupname = group.name();
2183 if (groupname.size() && groupname == m_archive_groupname) {
2184 std::string filename = m_archive_filename.string();
2185 if (! filename.size())
2186 filename = OIIO::Filesystem::filename (groupname.string()) + ".tar.gz";
2187 archive_shadergroup (group, filename);
2188 }
2189
2190 group.m_complete = true;
2191 return true;
2192 }
2193
2194
2195
2196 bool
Shader(string_view shaderusage,string_view shadername,string_view layername)2197 ShadingSystemImpl::Shader (string_view shaderusage,
2198 string_view shadername,
2199 string_view layername)
2200 {
2201 // Make sure we have a current attrib state
2202 bool singleton = (! m_curgroup);
2203 if (singleton)
2204 ShaderGroupBegin ("");
2205
2206 return Shader (*m_curgroup, shaderusage, shadername, layername);
2207 }
2208
2209
2210
2211 bool
Shader(ShaderGroup & group,string_view shaderusage,string_view shadername,string_view layername)2212 ShadingSystemImpl::Shader (ShaderGroup& group, string_view shaderusage,
2213 string_view shadername, string_view layername)
2214 {
2215 ShaderMaster::ref master = loadshader (shadername);
2216 if (! master) {
2217 errorf("Could not find shader \"%s\"\n"
2218 " group: %s",
2219 shadername, group.name());
2220 return false;
2221 }
2222
2223 if (shaderusage.empty()) {
2224 errorf("Shader usage required\n"
2225 " group: %s",
2226 shadername, group.name());
2227 return false;
2228 }
2229
2230 // If a layer name was not supplied, make one up.
2231 std::string local_layername;
2232 if (layername.empty()) {
2233 local_layername = OIIO::Strutil::sprintf ("%s_%d", master->shadername(),
2234 group.nlayers());
2235 layername = string_view (local_layername);
2236 }
2237
2238 ShaderInstanceRef instance (new ShaderInstance (master, layername));
2239 instance->parameters (group.m_pending_params);
2240 group.m_pending_params.clear ();
2241 group.m_pending_params.shrink_to_fit ();
2242
2243 if (group.m_group_use.empty()) {
2244 // First in a group
2245 group.clear ();
2246 m_stat_groups += 1;
2247 group.m_group_use = shaderusage;
2248 } else if (shaderusage != group.m_group_use) {
2249 errorf("Shader usage \"%s\" does not match current group (%s)\n"
2250 " group: %s",
2251 shaderusage, group.m_group_use, group.name());
2252 return false;
2253 }
2254
2255 group.append (instance);
2256 m_stat_groupinstances += 1;
2257
2258 // FIXME -- check for duplicate layer name within the group?
2259
2260 return true;
2261 }
2262
2263
2264
2265 bool
ConnectShaders(string_view srclayer,string_view srcparam,string_view dstlayer,string_view dstparam)2266 ShadingSystemImpl::ConnectShaders (string_view srclayer, string_view srcparam,
2267 string_view dstlayer, string_view dstparam)
2268 {
2269 if (! m_curgroup) {
2270 error ("ConnectShaders can only be called within ShaderGroupBegin/End");
2271 return false;
2272 }
2273 return ConnectShaders (*m_curgroup, srclayer, srcparam, dstlayer, dstparam);
2274 }
2275
2276
2277
2278 bool
ConnectShaders(ShaderGroup & group,string_view srclayer,string_view srcparam,string_view dstlayer,string_view dstparam)2279 ShadingSystemImpl::ConnectShaders (ShaderGroup& group,
2280 string_view srclayer, string_view srcparam,
2281 string_view dstlayer, string_view dstparam)
2282 {
2283 // Basic sanity checks
2284 // ConnectShaders, and that the layer and parameter names are not empty.
2285 if (! srclayer.size() || ! srcparam.size()) {
2286 errorf("ConnectShaders: badly formed source layer/parameter\n"
2287 " group: %s", group.name());
2288 return false;
2289 }
2290 if (! dstlayer.size() || ! dstparam.size()) {
2291 errorf("ConnectShaders: badly formed destination layer/parameter\n"
2292 " group: %s", group.name());
2293 return false;
2294 }
2295
2296 // Decode the layers, finding the indices within our group and
2297 // pointers to the instances. Error and return if they are not found,
2298 // or if it's not connecting an earlier src to a later dst.
2299 ShaderInstance *srcinst, *dstinst;
2300 int srcinstindex = find_named_layer_in_group (group, ustring(srclayer), srcinst);
2301 int dstinstindex = find_named_layer_in_group (group, ustring(dstlayer), dstinst);
2302 if (! srcinst) {
2303 errorf("ConnectShaders: source layer \"%s\" not found\n"
2304 " group: %s", srclayer, group.name());
2305 return false;
2306 }
2307 if (! dstinst) {
2308 errorf("ConnectShaders: destination layer \"%s\" not found\n"
2309 " group: %s", dstlayer, group.name());
2310 return false;
2311 }
2312 if (dstinstindex <= srcinstindex) {
2313 errorf("ConnectShaders: destination layer must follow source layer (tried to connect %s.%s -> %s.%s)\n"
2314 " group: %s", srclayer, srcparam, dstlayer, dstparam,
2315 group.name());
2316 return false;
2317 }
2318
2319 // Decode the parameter names, find their symbols in their
2320 // respective layers, and also decode request to attach specific
2321 // array elements or color/vector channels.
2322 ConnectedParam srccon = decode_connected_param(srcparam, srclayer, srcinst);
2323 ConnectedParam dstcon = decode_connected_param(dstparam, dstlayer, dstinst);
2324 if (! (srccon.valid() && dstcon.valid())) {
2325 if (connection_error())
2326 errorf("ConnectShaders: cannot connect a %s (%s) to a %s (%s), invalid connection\n"
2327 " group: %s",
2328 srccon.type, srcparam, dstcon.type, dstparam, group.name());
2329 else
2330 warningf("ConnectShaders: cannot connect a %s (%s) to a %s (%s), invalid connection\n"
2331 " group: %s",
2332 srccon.type, srcparam, dstcon.type, dstparam, group.name());
2333 return false;
2334 }
2335
2336 if (srccon.type.is_structure() && dstcon.type.is_structure() &&
2337 equivalent (srccon.type, dstcon.type)) {
2338 // If the connection is whole struct-to-struct (and they are
2339 // structs with equivalent data layout), implement it underneath
2340 // as connections between their respective fields.
2341 StructSpec *srcstruct = srccon.type.structspec();
2342 StructSpec *dststruct = dstcon.type.structspec();
2343 for (size_t i = 0; i < (size_t)srcstruct->numfields(); ++i) {
2344 std::string s = Strutil::sprintf("%s.%s", srcparam, srcstruct->field(i).name);
2345 std::string d = Strutil::sprintf("%s.%s", dstparam, dststruct->field(i).name);
2346 ConnectShaders (group, srclayer, s, dstlayer, d);
2347 }
2348 return true;
2349 }
2350
2351 if (! assignable (dstcon.type, srccon.type)) {
2352 if (connection_error())
2353 errorf("ConnectShaders: cannot connect a %s (%s) to a %s (%s)\n"
2354 " group: %s",
2355 srccon.type, srcparam, dstcon.type, dstparam, group.name());
2356 else
2357 warningf("ConnectShaders: cannot connect a %s (%s) to a %s (%s)\n"
2358 " group: %s",
2359 srccon.type, srcparam, dstcon.type, dstparam, group.name());
2360 return false;
2361 }
2362
2363 const Symbol *dstsym = dstinst->mastersymbol(dstcon.param);
2364 if (dstsym && !dstsym->allowconnect()) {
2365 std::string name = dstlayer.size() ? Strutil::sprintf("%s.%s", dstlayer, dstparam)
2366 : std::string(dstparam);
2367 errorf("ConnectShaders: cannot connect to %s because it has metadata allowconnect=0\n"
2368 " group: %s", name, group.name());
2369 return false;
2370 }
2371
2372 dstinst->add_connection (srcinstindex, srccon, dstcon);
2373 dstinst->instoverride(dstcon.param)->valuesource (Symbol::ConnectedVal);
2374 srcinst->instoverride(srccon.param)->connected_down (true);
2375 srcinst->outgoing_connections (true);
2376
2377 // if (debug())
2378 // message ("ConnectShaders %s %s -> %s %s\n",
2379 // srclayer, srcparam, dstlayer, dstparam);
2380
2381 return true;
2382 }
2383
2384
2385
2386 ShaderGroupRef
ShaderGroupBegin(string_view groupname,string_view usage,string_view groupspec)2387 ShadingSystemImpl::ShaderGroupBegin (string_view groupname,
2388 string_view usage,
2389 string_view groupspec)
2390 {
2391 ShaderGroupRef g = ShaderGroupBegin (groupname);
2392 bool err = false;
2393 std::string errdesc;
2394 string_view errstatement;
2395 std::vector<int> intvals;
2396 std::vector<float> floatvals;
2397 std::vector<ustring> stringvals;
2398 string_view p = groupspec; // parse view
2399 // std::cout << "!!!!!\n---\n" << groupspec << "\n---\n\n";
2400 while (p.size()) {
2401 string_view pstart = p; // save where we were for error reporting
2402 Strutil::skip_whitespace (p);
2403 if (! p.size())
2404 break;
2405 while (Strutil::parse_char (p, ';')) // skip blank statements
2406 ;
2407 string_view keyword = Strutil::parse_word (p);
2408
2409 if (keyword == "shader") {
2410 string_view shadername = Strutil::parse_identifier (p);
2411 Strutil::skip_whitespace (p);
2412 string_view layername = Strutil::parse_until (p, " \t\r\n,;");
2413 bool ok = Shader (*g, usage, shadername, layername);
2414 if (!ok) {
2415 errstatement = pstart;
2416 err = true;
2417 break;
2418 }
2419 Strutil::parse_char (p, ';') || Strutil::parse_char (p, ',');
2420 Strutil::skip_whitespace (p);
2421 continue;
2422 }
2423
2424 if (keyword == "connect") {
2425 Strutil::skip_whitespace (p);
2426 string_view lay1 = Strutil::parse_until (p, " \t\r\n.");
2427 Strutil::parse_char (p, '.');
2428 string_view param1 = Strutil::parse_until (p, " \t\r\n,;");
2429 Strutil::skip_whitespace (p);
2430 string_view lay2 = Strutil::parse_until (p, " \t\r\n.");
2431 Strutil::parse_char (p, '.');
2432 string_view param2 = Strutil::parse_until (p, " \t\r\n,;");
2433 bool ok = ConnectShaders (*g, lay1, param1, lay2, param2);
2434 if (!ok) {
2435 errstatement = pstart;
2436 err = true;
2437 break;
2438 }
2439 Strutil::parse_char (p, ';') || Strutil::parse_char (p, ',');
2440 Strutil::skip_whitespace (p);
2441 continue;
2442 }
2443
2444 // Remaining case -- it should be declaring a parameter.
2445 string_view typestring;
2446 if (keyword == "param") {
2447 typestring = Strutil::parse_word (p);
2448 } else if (TypeDesc(keyword.str().c_str()) != TypeDesc::UNKNOWN) {
2449 // compatibility: let the 'param' keyword be optional, if it's
2450 // obvious that it's a type name.
2451 typestring = keyword;
2452 } else {
2453 err = true;
2454 errdesc = Strutil::sprintf ("Unknown statement (expected 'param', "
2455 "'shader', or 'connect'): \"%s\"",
2456 keyword);
2457 break;
2458 }
2459 TypeDesc type;
2460 if (typestring == "int")
2461 type = TypeDesc::TypeInt;
2462 else if (typestring == "float")
2463 type = TypeDesc::TypeFloat;
2464 else if (typestring == "color")
2465 type = TypeDesc::TypeColor;
2466 else if (typestring == "point")
2467 type = TypeDesc::TypePoint;
2468 else if (typestring == "vector")
2469 type = TypeDesc::TypeVector;
2470 else if (typestring == "normal")
2471 type = TypeDesc::TypeNormal;
2472 else if (typestring == "matrix")
2473 type = TypeDesc::TypeMatrix;
2474 else if (typestring == "string")
2475 type = TypeDesc::TypeString;
2476 else {
2477 err = true;
2478 errdesc = Strutil::sprintf ("Unknown type: %s", typestring);
2479 break; // error
2480 }
2481 if (Strutil::parse_char (p, '[')) {
2482 int arraylen = -1;
2483 Strutil::parse_int (p, arraylen);
2484 Strutil::parse_char (p, ']');
2485 type.arraylen = arraylen;
2486 }
2487 std::string paramname_string;
2488 while (1) {
2489 paramname_string += Strutil::parse_identifier (p);
2490 Strutil::skip_whitespace (p);
2491 if (Strutil::parse_char (p, '.')) {
2492 paramname_string += ".";
2493 } else {
2494 break;
2495 }
2496 }
2497 string_view paramname (paramname_string);
2498 int lockgeom = m_lockgeom_default;
2499 // For speed, reserve space. Note that for "unsized" arrays, we only
2500 // preallocate 1 slot and let it grow as needed. That's ok. For
2501 // everything else, we will reserve the right amount up front.
2502 int vals_to_preallocate = type.is_unsized_array()
2503 ? 1 : type.numelements() * type.aggregate;
2504 // Stop parsing values when we hit the limit based on the
2505 // declaration.
2506 int max_vals = type.is_unsized_array() ? 1<<28 : vals_to_preallocate;
2507 if (type.basetype == TypeDesc::INT) {
2508 intvals.clear ();
2509 intvals.reserve (vals_to_preallocate);
2510 int i;
2511 for (i = 0; i < max_vals; ++i) {
2512 int val = 0;
2513 if (Strutil::parse_int (p, val))
2514 intvals.push_back (val);
2515 else
2516 break;
2517 }
2518 if (type.is_unsized_array()) {
2519 // For unsized arrays, now set the size based on how many
2520 // values we actually read.
2521 type.arraylen = std::max (1, i/type.aggregate);
2522 }
2523 // Zero-pad if we parsed fewer values than we needed
2524 intvals.resize (type.numelements()*type.aggregate, 0);
2525 OSL_DASSERT (int(type.numelements())*type.aggregate == int(intvals.size()));
2526 } else if (type.basetype == TypeDesc::FLOAT) {
2527 floatvals.clear ();
2528 floatvals.reserve (vals_to_preallocate);
2529 int i;
2530 for (i = 0; i < max_vals; ++i) {
2531 float val = 0;
2532 if (Strutil::parse_float (p, val))
2533 floatvals.push_back (val);
2534 else
2535 break;
2536 }
2537 if (type.is_unsized_array()) {
2538 // For unsized arrays, now set the size based on how many
2539 // values we actually read.
2540 type.arraylen = std::max (1, i/type.aggregate);
2541 }
2542 // Zero-pad if we parsed fewer values than we needed
2543 floatvals.resize (type.numelements()*type.aggregate, 0);
2544 OSL_DASSERT (int(type.numelements())*type.aggregate == int(floatvals.size()));
2545 } else if (type.basetype == TypeDesc::STRING) {
2546 stringvals.clear ();
2547 stringvals.reserve (vals_to_preallocate);
2548 int i;
2549 for (i = 0; i < max_vals; ++i) {
2550 std::string unescaped;
2551 string_view s;
2552 Strutil::skip_whitespace (p);
2553 if (p.size() && p[0] == '\"') {
2554 if (! Strutil::parse_string (p, s))
2555 break;
2556 unescaped = Strutil::unescape_chars (s);
2557 s = unescaped;
2558 }
2559 else {
2560 s = Strutil::parse_until (p, " \t\r\n;");
2561 if (s.size() == 0)
2562 break;
2563 }
2564 stringvals.emplace_back(s);
2565 }
2566 if (type.is_unsized_array()) {
2567 // For unsized arrays, now set the size based on how many
2568 // values we actually read.
2569 type.arraylen = std::max (1, i/type.aggregate);
2570 }
2571 // Zero-pad if we parsed fewer values than we needed
2572 stringvals.resize (type.numelements()*type.aggregate, ustring());
2573 OSL_DASSERT (int(type.numelements())*type.aggregate == int(stringvals.size()));
2574 }
2575
2576 if (Strutil::parse_prefix (p, "[[")) { // hints
2577 do {
2578 Strutil::skip_whitespace (p);
2579 string_view hint_typename = Strutil::parse_word (p);
2580 string_view hint_name = Strutil::parse_identifier (p);
2581 TypeDesc hint_type (hint_typename.str().c_str());
2582 if (! hint_name.size() || hint_type == TypeDesc::UNKNOWN) {
2583 err = true;
2584 errdesc = "malformed hint";
2585 break;
2586 }
2587 if (! Strutil::parse_char (p, '=')) {
2588 err = true;
2589 errdesc = "hint expected value";
2590 break;
2591 }
2592 if (hint_name == "lockgeom" && hint_type == TypeDesc::INT) {
2593 if (! Strutil::parse_int (p, lockgeom)) {
2594 err = true;
2595 errdesc = Strutil::sprintf ("hint %s expected int value", hint_name);
2596 break;
2597 }
2598 } else {
2599 err = true;
2600 errdesc = Strutil::sprintf ("unknown hint '%s %s'",
2601 hint_type, hint_name);
2602 break;
2603 }
2604 } while (Strutil::parse_char (p, ','));
2605 if (err)
2606 break;
2607 if (! Strutil::parse_prefix (p, "]]")) {
2608 err = true;
2609 errdesc = "malformed hint";
2610 break;
2611 }
2612 }
2613
2614 bool ok = true;
2615 if (type.basetype == TypeDesc::INT) {
2616 ok = Parameter (*g, paramname, type, &intvals[0], lockgeom);
2617 } else if (type.basetype == TypeDesc::FLOAT) {
2618 ok = Parameter (*g, paramname, type, &floatvals[0], lockgeom);
2619 } else if (type.basetype == TypeDesc::STRING) {
2620 ok = Parameter (*g, paramname, type, &stringvals[0], lockgeom);
2621 }
2622 if (!ok) {
2623 errstatement = pstart;
2624 err = true;
2625 break;
2626 }
2627
2628 Strutil::skip_whitespace (p);
2629 if (! p.size())
2630 break;
2631
2632 if (Strutil::parse_char (p, ';') || Strutil::parse_char (p, ','))
2633 continue; // next command
2634
2635 Strutil::parse_until_char (p, ';');
2636 if (! Strutil::parse_char (p, ';')) {
2637 err = true;
2638 errdesc = "semicolon expected";
2639 }
2640 }
2641
2642 if (err) {
2643 std::string msg = Strutil::sprintf(
2644 "ShaderGroupBegin: error parsing group description: %s\n"
2645 " group: %s",
2646 errdesc, g->name());
2647 if (errstatement.empty()) {
2648 size_t offset = p.data() - groupspec.data();
2649 size_t begin_stmt = std::min (groupspec.find_last_of (';', offset),
2650 groupspec.find_last_of (',', offset));
2651 size_t end_stmt = groupspec.find_first_of (';', begin_stmt+1);
2652 errstatement = groupspec.substr (begin_stmt+1, end_stmt-begin_stmt);
2653 }
2654 if (errstatement.size())
2655 msg += Strutil::sprintf("\n problem might be here: %s",
2656 errstatement);
2657 errorf("%s", msg);
2658 if (debug())
2659 infof("Broken group was:\n---%s\n---\n", groupspec);
2660 return ShaderGroupRef();
2661 }
2662
2663 return g;
2664 }
2665
2666
2667
2668 bool
ReParameter(ShaderGroup & group,string_view layername_,string_view paramname,TypeDesc type,const void * val)2669 ShadingSystemImpl::ReParameter (ShaderGroup &group, string_view layername_,
2670 string_view paramname,
2671 TypeDesc type, const void *val)
2672 {
2673 // Find the named layer
2674 ustring layername (layername_);
2675 ShaderInstance *layer = NULL;
2676 for (int i = 0, e = group.nlayers(); i < e; ++i) {
2677 if (group[i]->layername() == layername) {
2678 layer = group[i];
2679 break;
2680 }
2681 }
2682 if (! layer)
2683 return false; // could not find the named layer
2684
2685 // Find the named parameter within the layer
2686 int paramindex = layer->findparam (ustring(paramname));
2687 if (paramindex < 0)
2688 return false; // could not find the named parameter
2689
2690 Symbol *sym = layer->symbol (paramindex);
2691 if (!sym) {
2692 // Can have a paramindex >= 0, but no symbol when it's a master-symbol
2693 OSL_DASSERT(layer->mastersymbol(paramindex) && "No symbol for paramindex");
2694 return false;
2695 }
2696
2697 // Check for mismatch versus previously-declared type
2698 if (!equivalent(sym->typespec(), type))
2699 return false;
2700
2701 // Can't change param value if the group has already been optimized,
2702 // unless that parameter is marked lockgeom=0.
2703 if (group.optimized() && sym->lockgeom())
2704 return false;
2705
2706 // Do the deed
2707 memcpy (sym->data(), val, type.size());
2708 return true;
2709 }
2710
2711
2712
2713 PerThreadInfo *
create_thread_info()2714 ShadingSystemImpl::create_thread_info()
2715 {
2716 return new PerThreadInfo;
2717 }
2718
2719
2720
2721 void
destroy_thread_info(PerThreadInfo * threadinfo)2722 ShadingSystemImpl::destroy_thread_info (PerThreadInfo *threadinfo)
2723 {
2724 delete threadinfo;
2725 }
2726
2727
2728
2729 ShadingContext *
get_context(PerThreadInfo * threadinfo,TextureSystem::Perthread * texture_threadinfo)2730 ShadingSystemImpl::get_context (PerThreadInfo *threadinfo,
2731 TextureSystem::Perthread *texture_threadinfo)
2732 {
2733 if (! threadinfo) {
2734 #if OSL_VERSION < 20200
2735 threadinfo = get_perthread_info ();
2736 warning ("ShadingSystem::get_context called without a PerThreadInfo");
2737 #else
2738 error ("ShadingSystem::get_context called without a PerThreadInfo");
2739 return nullptr;
2740 #endif
2741 }
2742 ShadingContext *ctx = threadinfo->context_pool.empty()
2743 ? new ShadingContext (*this, threadinfo)
2744 : threadinfo->pop_context ();
2745 ctx->texture_thread_info (texture_threadinfo);
2746 return ctx;
2747 }
2748
2749
2750
2751 void
release_context(ShadingContext * ctx)2752 ShadingSystemImpl::release_context (ShadingContext *ctx)
2753 {
2754 if (! ctx)
2755 return;
2756 ctx->process_errors ();
2757 ctx->thread_info()->context_pool.push (ctx);
2758 }
2759
2760
2761
2762 bool
execute(ShadingContext & ctx,ShaderGroup & group,ShaderGlobals & ssg,bool run)2763 ShadingSystemImpl::execute (ShadingContext &ctx, ShaderGroup &group,
2764 ShaderGlobals &ssg, bool run)
2765 {
2766 return ctx.execute (group, ssg, run);
2767 }
2768
2769
2770
2771 // Deprecated
2772 bool
execute(ShadingContext * ctx,ShaderGroup & group,ShaderGlobals & ssg,bool run)2773 ShadingSystemImpl::execute (ShadingContext *ctx, ShaderGroup &group,
2774 ShaderGlobals &ssg, bool run)
2775 {
2776 bool free_context = false;
2777 OSL::PerThreadInfo *thread_info = nullptr;
2778 if (! ctx) {
2779 thread_info = create_thread_info();
2780 ctx = get_context(thread_info);
2781 free_context = true;
2782 }
2783 bool result = ctx->execute (group, ssg, run);
2784 if (free_context) {
2785 release_context(ctx);
2786 destroy_thread_info(thread_info);
2787 }
2788 return result;
2789 }
2790
2791
2792
2793 const void *
get_symbol(ShadingContext & ctx,ustring layername,ustring symbolname,TypeDesc & type)2794 ShadingSystemImpl::get_symbol (ShadingContext &ctx, ustring layername,
2795 ustring symbolname, TypeDesc &type)
2796 {
2797 const Symbol *sym = ctx.symbol (layername, symbolname);
2798 if (sym) {
2799 type = sym->typespec().simpletype();
2800 return ctx.symbol_data (*sym);
2801 } else {
2802 return NULL;
2803 }
2804 }
2805
2806
2807
2808 int
find_named_layer_in_group(ShaderGroup & group,ustring layername,ShaderInstance * & inst)2809 ShadingSystemImpl::find_named_layer_in_group (ShaderGroup& group,
2810 ustring layername,
2811 ShaderInstance * &inst)
2812 {
2813 inst = NULL;
2814 if (group.m_group_use.empty())
2815 return -1;
2816 for (int i = 0; i < group.nlayers(); ++i) {
2817 if (group[i]->layername() == layername) {
2818 inst = group[i];
2819 return i;
2820 }
2821 }
2822 return -1;
2823 }
2824
2825
2826
2827 ConnectedParam
decode_connected_param(string_view connectionname,string_view layername,ShaderInstance * inst)2828 ShadingSystemImpl::decode_connected_param (string_view connectionname,
2829 string_view layername, ShaderInstance *inst)
2830 {
2831 ConnectedParam c; // initializes to "invalid"
2832
2833 // Look for a bracket in the "parameter name"
2834 size_t bracketpos = connectionname.find ('[');
2835 // Grab just the part of the param name up to the bracket
2836 ustring param (connectionname, 0, bracketpos);
2837 string_view cname_remaining = connectionname.substr (bracketpos);
2838
2839 // Search for the param with that name, fail if not found
2840 c.param = inst->findsymbol (param);
2841 if (c.param < 0) {
2842 if (connection_error())
2843 errorf("ConnectShaders: \"%s\" is not a parameter or global of layer \"%s\" (shader \"%s\")",
2844 param, layername, inst->shadername());
2845 else
2846 warningf("ConnectShaders: \"%s\" is not a parameter or global of layer \"%s\" (shader \"%s\")",
2847 param, layername, inst->shadername());
2848 return c;
2849 }
2850
2851 const Symbol *sym = inst->mastersymbol (c.param);
2852 OSL_ASSERT (sym);
2853
2854 // Only params, output params, and globals are legal for connections
2855 if (! (sym->symtype() == SymTypeParam ||
2856 sym->symtype() == SymTypeOutputParam ||
2857 sym->symtype() == SymTypeGlobal)) {
2858 errorf("ConnectShaders: \"%s\" is not a parameter or global of layer \"%s\" (shader \"%s\")",
2859 param, layername, inst->shadername());
2860 c.param = -1; // mark as invalid
2861 return c;
2862 }
2863
2864 c.type = sym->typespec();
2865
2866 if (! cname_remaining.empty() && c.type.is_array()) {
2867 // There was at least one set of brackets that appears to be
2868 // selecting an array element.
2869 int index = 0;
2870 if (! (Strutil::parse_char (cname_remaining, '[') &&
2871 Strutil::parse_int (cname_remaining, index) &&
2872 Strutil::parse_char (cname_remaining, ']'))) {
2873 errorf("ConnectShaders: malformed parameter \"%s\"", connectionname);
2874 c.param = -1; // mark as invalid
2875 return c;
2876 }
2877 c.arrayindex = index;
2878 if (c.arrayindex >= c.type.arraylength()) {
2879 errorf("ConnectShaders: cannot request array element %s from a %s",
2880 connectionname, c.type);
2881 c.arrayindex = c.type.arraylength() - 1; // clamp it
2882 }
2883 c.type.make_array (0); // chop to the element type
2884 Strutil::skip_whitespace (cname_remaining); // skip to next bracket
2885 }
2886
2887 if (! cname_remaining.empty() && cname_remaining.front() == '[' &&
2888 ! c.type.is_closure() && c.type.aggregate() != TypeDesc::SCALAR) {
2889 // There was at least one set of brackets that appears to be
2890 // selecting a color/vector component.
2891 int index = 0;
2892 if (! (Strutil::parse_char (cname_remaining, '[') &&
2893 Strutil::parse_int (cname_remaining, index) &&
2894 Strutil::parse_char (cname_remaining, ']'))) {
2895 errorf("ConnectShaders: malformed parameter \"%s\"", connectionname);
2896 c.param = -1; // mark as invalid
2897 return c;
2898 }
2899 c.channel = index;
2900 if (c.channel >= (int)c.type.aggregate()) {
2901 errorf("ConnectShaders: cannot request component %s from a %s",
2902 connectionname, c.type);
2903 c.channel = (int)c.type.aggregate() - 1; // clamp it
2904 }
2905 // chop to just the scalar part
2906 c.type = TypeSpec ((TypeDesc::BASETYPE)c.type.simpletype().basetype);
2907 Strutil::skip_whitespace (cname_remaining);
2908 }
2909
2910 // Deal with left over nonsense or unsupported param designations
2911 if (! cname_remaining.empty()) {
2912 // Still a leftover bracket, no idea what to do about that
2913 errorf("ConnectShaders: don't know how to connect '%s' when \"%s\" is a \"%s\"",
2914 connectionname, param, c.type);
2915 c.param = -1; // mark as invalid
2916 }
2917 return c;
2918 }
2919
2920
2921
2922 int
raytype_bit(ustring name)2923 ShadingSystemImpl::raytype_bit (ustring name)
2924 {
2925 for (size_t i = 0, e = m_raytypes.size(); i < e; ++i)
2926 if (name == m_raytypes[i])
2927 return (1 << i);
2928 return 0; // not found
2929 }
2930
2931
2932
2933 bool
is_renderer_output(ustring layername,ustring paramname,ShaderGroup * group) const2934 ShadingSystemImpl::is_renderer_output (ustring layername, ustring paramname,
2935 ShaderGroup *group) const
2936 {
2937 if (group) {
2938 const std::vector<ustring> &aovs (group->m_renderer_outputs);
2939 if (aovs.size() > 0) {
2940 if (std::find(aovs.begin(), aovs.end(), paramname) != aovs.end())
2941 return true;
2942 // Try "layer.name"
2943 ustring name2 = ustring::sprintf("%s.%s", layername, paramname);
2944 if (std::find(aovs.begin(), aovs.end(), name2) != aovs.end())
2945 return true;
2946 }
2947 }
2948 const std::vector<ustring> &aovs (m_renderer_outputs);
2949 if (aovs.size() > 0) {
2950 if (std::find(aovs.begin(), aovs.end(), paramname) != aovs.end())
2951 return true;
2952 ustring name2 = ustring::sprintf("%s.%s", layername, paramname);
2953 if (std::find(aovs.begin(), aovs.end(), name2) != aovs.end())
2954 return true;
2955 }
2956 return false;
2957 }
2958
2959
2960
2961 void
group_post_jit_cleanup(ShaderGroup & group)2962 ShadingSystemImpl::group_post_jit_cleanup (ShaderGroup &group)
2963 {
2964 // Once we're generated the IR, we really don't need the ops and args,
2965 // and we only need the syms that include the params.
2966 off_t symmem = 0;
2967 size_t connectionmem = 0;
2968 for (int layer = 0; layer < group.nlayers(); ++layer) {
2969 ShaderInstance *inst = group[layer];
2970 // We no longer needs ops and args -- create empty vectors and
2971 // swap with the ones in the instance.
2972 OpcodeVec emptyops;
2973 inst->ops().swap (emptyops);
2974 std::vector<int> emptyargs;
2975 inst->args().swap (emptyargs);
2976 if (inst->unused()) {
2977 // If we'll never use the layer, we don't need the syms at all
2978 SymbolVec nosyms;
2979 std::swap (inst->symbols(), nosyms);
2980 symmem += vectorbytes(nosyms);
2981 // also don't need the connection info any more
2982 connectionmem += (off_t) inst->clear_connections ();
2983 }
2984 }
2985 {
2986 // adjust memory stats
2987 spin_lock lock (m_stat_mutex);
2988 m_stat_mem_inst_syms -= symmem;
2989 m_stat_mem_inst_connections -= connectionmem;
2990 m_stat_mem_inst -= symmem + connectionmem;
2991 m_stat_memory -= symmem + connectionmem;
2992 }
2993 }
2994
2995
2996
2997 void
optimize_group(ShaderGroup & group,ShadingContext * ctx,bool do_jit)2998 ShadingSystemImpl::optimize_group (ShaderGroup &group, ShadingContext *ctx, bool do_jit)
2999 {
3000 if (group.optimized() && (!do_jit || group.jitted()))
3001 return; // already optimized and optionally jitted
3002
3003 OIIO::Timer timer;
3004 lock_guard lock (group.m_mutex);
3005 bool need_jit = do_jit && !group.jitted();
3006 if (group.optimized() && !need_jit) {
3007 // The group was somehow optimized by another thread between the
3008 // time we checked group.optimized() and now that we have the lock.
3009 // Nothing to do but record how long we waited for the lock.
3010 spin_lock stat_lock (m_stat_mutex);
3011 double t = timer();
3012 m_stat_optimization_time += t;
3013 m_stat_opt_locking_time += t;
3014 return;
3015 }
3016
3017 if (!m_only_groupname.empty() && m_only_groupname != group.name()) {
3018 // For debugging purposes, we are requested to compile only one
3019 // shader group, and this is not it. Mark it as does_nothing,
3020 // and also as optimized so nobody locks on it again, and record
3021 // how long we waited for the lock.
3022 group.does_nothing (true);
3023 group.m_optimized = true;
3024 group.m_jitted = true;
3025 spin_lock stat_lock (m_stat_mutex);
3026 double t = timer();
3027 m_stat_optimization_time += t;
3028 m_stat_opt_locking_time += t;
3029 return;
3030 }
3031
3032 double locking_time = timer();
3033
3034 bool ctx_allocated = false;
3035 PerThreadInfo *thread_info = nullptr;
3036 if (! ctx) {
3037 thread_info = create_thread_info();
3038 ctx = get_context(thread_info);
3039 ctx_allocated = true;
3040 }
3041 if (!group.optimized()) {
3042 RuntimeOptimizer rop (*this, group, ctx);
3043 rop.run ();
3044 rop.police_failed_optimizations();
3045
3046 // Copy some info recorded by the RuntimeOptimizer into the group
3047 group.m_unknown_textures_needed = rop.m_unknown_textures_needed;
3048 for (auto&& f : rop.m_textures_needed)
3049 group.m_textures_needed.push_back (f);
3050 group.m_unknown_closures_needed = rop.m_unknown_closures_needed;
3051 for (auto&& f : rop.m_closures_needed)
3052 group.m_closures_needed.push_back (f);
3053 for (auto&& f : rop.m_globals_needed)
3054 group.m_globals_needed.push_back (f);
3055 group.m_globals_read = rop.m_globals_read;
3056 group.m_globals_write = rop.m_globals_write;
3057 size_t num_userdata = rop.m_userdata_needed.size();
3058 group.m_userdata_names.reserve (num_userdata);
3059 group.m_userdata_types.reserve (num_userdata);
3060 group.m_userdata_offsets.resize (num_userdata, 0);
3061 group.m_userdata_derivs.reserve (num_userdata);
3062 group.m_userdata_layers.reserve (num_userdata);
3063 group.m_userdata_init_vals.reserve (num_userdata);
3064 for (auto&& n : rop.m_userdata_needed) {
3065 group.m_userdata_names.push_back (n.name);
3066 group.m_userdata_types.push_back (n.type);
3067 group.m_userdata_derivs.push_back (n.derivs);
3068 group.m_userdata_layers.push_back (n.layer_num);
3069 group.m_userdata_init_vals.push_back (n.data);
3070 }
3071 group.m_unknown_attributes_needed = rop.m_unknown_attributes_needed;
3072 for (auto&& f : rop.m_attributes_needed) {
3073 group.m_attributes_needed.push_back (f.name);
3074 group.m_attribute_scopes.push_back (f.scope);
3075 }
3076 group.m_optimized = true;
3077
3078 spin_lock stat_lock (m_stat_mutex);
3079 if (!need_jit) {
3080 m_stat_opt_locking_time += locking_time;
3081 m_stat_optimization_time += timer();
3082 }
3083 m_stat_opt_locking_time += rop.m_stat_opt_locking_time;
3084 m_stat_opt_locking_time += locking_time + rop.m_stat_opt_locking_time;
3085 m_stat_specialization_time += rop.m_stat_specialization_time;
3086 }
3087
3088 if (need_jit) {
3089 BackendLLVM lljitter (*this, group, ctx);
3090 lljitter.run ();
3091
3092 // NOTE: it is now possible to optimize and not JIT
3093 // which would leave the cleanup to happen
3094 // when the ShadingSystem is destroyed
3095 group_post_jit_cleanup (group);
3096
3097 group.m_jitted = true;
3098 spin_lock stat_lock (m_stat_mutex);
3099 m_stat_opt_locking_time += locking_time;
3100 m_stat_optimization_time += timer();
3101 m_stat_total_llvm_time += lljitter.m_stat_total_llvm_time;
3102 m_stat_llvm_setup_time += lljitter.m_stat_llvm_setup_time;
3103 m_stat_llvm_irgen_time += lljitter.m_stat_llvm_irgen_time;
3104 m_stat_llvm_opt_time += lljitter.m_stat_llvm_opt_time;
3105 m_stat_llvm_jit_time += lljitter.m_stat_llvm_jit_time;
3106 m_stat_max_llvm_local_mem = std::max (m_stat_max_llvm_local_mem,
3107 lljitter.m_llvm_local_mem);
3108 }
3109
3110 if (ctx_allocated) {
3111 release_context(ctx);
3112 destroy_thread_info(thread_info);
3113 }
3114
3115 m_stat_groups_compiled += 1;
3116 m_stat_instances_compiled += group.nlayers();
3117 m_groups_to_compile_count -= 1;
3118 }
3119
3120
3121
optimize_all_groups_wrapper(ShadingSystemImpl * ss,int mythread,int totalthreads,bool do_jit)3122 static void optimize_all_groups_wrapper (ShadingSystemImpl *ss, int mythread, int totalthreads, bool do_jit)
3123 {
3124 ss->optimize_all_groups (1, mythread, totalthreads, do_jit);
3125 }
3126
3127
3128
3129 void
optimize_all_groups(int nthreads,int mythread,int totalthreads,bool do_jit)3130 ShadingSystemImpl::optimize_all_groups (int nthreads, int mythread, int totalthreads, bool do_jit)
3131 {
3132 // Spawn a bunch of threads to do this in parallel -- just call this
3133 // routine again (with threads=1) for each thread.
3134 if (nthreads < 1) // threads <= 0 means use all hardware available
3135 nthreads = std::min ((int)std::thread::hardware_concurrency(),
3136 (int)m_groups_to_compile_count);
3137 if (nthreads > 1) {
3138 if (m_threads_currently_compiling)
3139 return; // never mind, somebody else spawned the JIT threads
3140 OIIO::thread_group threads;
3141 m_threads_currently_compiling += nthreads;
3142 for (int t = 0; t < nthreads; ++t)
3143 threads.add_thread (new std::thread (optimize_all_groups_wrapper, this, t, nthreads, do_jit));
3144 threads.join_all ();
3145 m_threads_currently_compiling -= nthreads;
3146 return;
3147 }
3148
3149 // And here's the single thread case
3150 size_t ngroups = 0;
3151 {
3152 spin_lock lock (m_all_shader_groups_mutex);
3153 ngroups = m_all_shader_groups.size();
3154 }
3155 PerThreadInfo* threadinfo = create_thread_info();
3156 ShadingContext* ctx = get_context(threadinfo);
3157 for (size_t i = 0; i < ngroups; ++i) {
3158 // Assign to threads based on mod of totalthreads
3159 if ((i % totalthreads) == (unsigned)mythread) {
3160 ShaderGroupRef group;
3161 {
3162 spin_lock lock (m_all_shader_groups_mutex);
3163 group = m_all_shader_groups[i].lock();
3164 }
3165 if (group && group->m_complete)
3166 optimize_group (*group, ctx, do_jit);
3167 }
3168 }
3169 release_context(ctx);
3170 destroy_thread_info(threadinfo);
3171 }
3172
3173
3174
3175 int
merge_instances(ShaderGroup & group,bool post_opt)3176 ShadingSystemImpl::merge_instances (ShaderGroup &group, bool post_opt)
3177 {
3178 // Look through the shader group for pairs of nodes/layers that
3179 // actually do exactly the same thing, and eliminate one of the
3180 // redundant shaders, carefully rewiring all its outgoing
3181 // connections to later layers to refer to the one we keep.
3182 //
3183 // It turns out that in practice, it's not uncommon to have
3184 // duplicate nodes. For example, some materials are "layered" --
3185 // like a character skin shader that has separate sub-networks for
3186 // skin, oil, wetness, and so on -- and those different sub-nets
3187 // often reference the same texture maps or noise functions by
3188 // repetition. Yes, ideally, the redundancies would be eliminated
3189 // before they were fed to the renderer, but in practice that's hard
3190 // and for many scenes we get substantial savings of time (mostly
3191 // because of reduced texture calls) and instance memory by finding
3192 // these redundancies automatically. The amount of savings is quite
3193 // scene dependent, as well as probably very dependent on the
3194 // general shading and lookdev approach of the studio. But it was
3195 // very helpful for us in many cases.
3196 //
3197 // The basic loop below looks very inefficient, O(n^2) in number of
3198 // instances in the group. But it's really not -- a few seconds (sum
3199 // of all threads) for even our very complex scenes. This is because
3200 // most potential pairs have a very fast rejection case if they are
3201 // not using the same master. Since there's no appreciable cost to
3202 // the brute force approach, it seems silly to have a complex scheme
3203 // to try to reduce the number of pairings.
3204
3205 if (! m_opt_merge_instances || optimize() < 1)
3206 return 0;
3207
3208 OIIO::Timer timer; // Time we spend looking for and doing merges
3209 int merges = 0; // number of merges we do
3210 size_t connectionmem = 0; // Connection memory we free
3211 int nlayers = group.nlayers();
3212
3213 // Need to quickly make sure userdata_params is up to date before any
3214 // mergeability tests.
3215 for (int layer = 0; layer < nlayers; ++layer)
3216 if (! group[layer]->unused())
3217 group[layer]->evaluate_writes_globals_and_userdata_params ();
3218
3219 // Loop over all layers...
3220 for (int a = 0; a < nlayers-1; ++a) {
3221 if (group[a]->unused() || group[a]->entry_layer()) // Don't merge a layer that's not used
3222 continue; // or if it's an entry layer
3223 // Check all later layers...
3224 for (int b = a+1; b < nlayers; ++b) {
3225 if (group[b]->unused()) // Don't merge a layer that's not used
3226 continue;
3227 if (b == nlayers-1) // Don't merge the last layer -- causes
3228 continue; // many tears because it's the group entry
3229
3230 // Now we have two used layers, a and b, to examine.
3231 // See if they are mergeable (identical). All the heavy
3232 // lifting is done by ShaderInstance::mergeable().
3233 if (! group[a]->mergeable (*group[b], group))
3234 continue;
3235
3236 // The two nodes a and b are mergeable, so merge them.
3237 ShaderInstance *A = group[a];
3238 ShaderInstance *B = group[b];
3239 ++merges;
3240
3241 // We'll keep A, get rid of B. For all layers later than B,
3242 // check its incoming connections and replace all references
3243 // to B with references to A.
3244 for (int j = b+1; j < nlayers; ++j) {
3245 ShaderInstance *inst = group[j];
3246 if (inst->unused()) // don't bother if it's unused
3247 continue;
3248 for (int c = 0, ce = inst->nconnections(); c < ce; ++c) {
3249 Connection &con = inst->connection(c);
3250 if (con.srclayer == b) {
3251 con.srclayer = a;
3252 A->outgoing_connections (true);
3253 if (A->symbols().size() && B->symbols().size()) {
3254 OSL_DASSERT (A->symbol(con.src.param)->name() ==
3255 B->symbol(con.src.param)->name());
3256 }
3257 }
3258 }
3259 }
3260
3261 // Mark parameters of B as no longer connected
3262 for (int p = B->firstparam(); p < B->lastparam(); ++p) {
3263 if (B->symbols().size())
3264 B->symbol(p)->connected_down(false);
3265 if (B->m_instoverrides.size())
3266 B->instoverride(p)->connected_down(false);
3267 }
3268 // B won't be used, so mark it as having no outgoing
3269 // connections and clear its incoming connections (which are
3270 // no longer used).
3271 OSL_DASSERT (B->merged_unused() == false);
3272 B->outgoing_connections (false);
3273 connectionmem += B->clear_connections ();
3274 B->m_merged_unused = true;
3275 OSL_DASSERT (B->unused());
3276 }
3277 }
3278
3279 {
3280 // Adjust stats
3281 spin_lock lock (m_stat_mutex);
3282 m_stat_mem_inst_connections -= connectionmem;
3283 m_stat_mem_inst -= connectionmem;
3284 m_stat_memory -= connectionmem;
3285 if (post_opt)
3286 m_stat_merged_inst_opt += merges;
3287 else
3288 m_stat_merged_inst += merges;
3289 m_stat_inst_merge_time += timer();
3290 }
3291
3292 return merges;
3293 }
3294
3295
3296
3297 #if OIIO_HAS_COLORPROCESSOR
3298
3299 OIIO::ColorProcessorHandle
load_transform(StringParam fromspace,StringParam tospace)3300 OCIOColorSystem::load_transform (StringParam fromspace, StringParam tospace)
3301 {
3302 if (fromspace != m_last_colorproc_fromspace ||
3303 tospace != m_last_colorproc_tospace) {
3304 m_last_colorproc = m_colorconfig.createColorProcessor (fromspace, tospace);
3305 m_last_colorproc_fromspace = fromspace;
3306 m_last_colorproc_tospace = tospace;
3307 }
3308 return m_last_colorproc;
3309 }
3310
3311 #endif
3312
3313
3314
3315 template <> bool
ocio_transform(StringParam fromspace,StringParam tospace,const Color3 & C,Color3 & Cout)3316 ShadingSystemImpl::ocio_transform (StringParam fromspace, StringParam tospace,
3317 const Color3& C, Color3& Cout) {
3318 #if OIIO_HAS_COLORPROCESSOR
3319 OIIO::ColorProcessorHandle cp;
3320 {
3321 lock_guard lock (m_mutex);
3322 cp = m_ocio_system.load_transform(fromspace, tospace);
3323 }
3324 if (cp) {
3325 Cout = C;
3326 cp->apply ((float *)&Cout);
3327 return true;
3328 }
3329 #endif
3330 return false;
3331 }
3332
3333
3334
3335 template <> bool
ocio_transform(StringParam fromspace,StringParam tospace,const Dual2<Color3> & C,Dual2<Color3> & Cout)3336 ShadingSystemImpl::ocio_transform (StringParam fromspace, StringParam tospace,
3337 const Dual2<Color3>& C, Dual2<Color3>& Cout) {
3338 #if OIIO_HAS_COLORPROCESSOR
3339 OIIO::ColorProcessorHandle cp;
3340 {
3341 lock_guard lock (m_mutex);
3342 cp = m_ocio_system.load_transform(fromspace, tospace);
3343 }
3344
3345 if (cp) {
3346 // Use finite differencing to approximate the derivative. Make 3
3347 // color values to convert.
3348 const float eps = 0.001f;
3349 Color3 CC[3] = { C.val(), C.val() + eps*C.dx(), C.val() + eps*C.dy() };
3350 cp->apply ((float *)&CC, 3, 1, 3, sizeof(float), sizeof(Color3), 0);
3351 Cout.set (CC[0],
3352 (CC[1] - CC[0]) * (1.0f / eps),
3353 (CC[2] - CC[0]) * (1.0f / eps));
3354 return true;
3355 }
3356 #endif
3357 return false;
3358 }
3359
3360
3361
3362 bool
archive_shadergroup(ShaderGroup & group,string_view filename)3363 ShadingSystemImpl::archive_shadergroup (ShaderGroup& group, string_view filename)
3364 {
3365 std::string filename_base = OIIO::Filesystem::filename(filename);
3366 std::string extension;
3367 for (std::string e = OIIO::Filesystem::extension(filename);
3368 e.size() && filename.size();
3369 e = OIIO::Filesystem::extension(filename)) {
3370 extension = e + extension;
3371 filename.remove_suffix (e.size());
3372 }
3373 if (extension.size() < 2 || extension[0] != '.') {
3374 errorf("archive_shadergroup: invalid filename \"%s\"", filename);
3375 return false;
3376 }
3377 filename_base.erase (filename_base.size() - extension.size());
3378
3379 std::string pattern = OIIO::Filesystem::temp_directory_path() + "/OSL-%%%%-%%%%";
3380 if (! pattern.size()) {
3381 error ("archive_shadergroup: Could not find a temp directory");
3382 return false;
3383 }
3384 std::string tmpdir = OIIO::Filesystem::unique_path(pattern);
3385 if (! pattern.size()) {
3386 error ("archive_shadergroup: Could not find a temp filename");
3387 return false;
3388 }
3389 std::string errmessage;
3390 bool dir_ok = OIIO::Filesystem::create_directory (tmpdir, errmessage);
3391 if (! dir_ok) {
3392 errorf("archive_shadergroup: Could not create temp directory %s (%s)",
3393 tmpdir, errmessage);
3394 return false;
3395 }
3396
3397 bool ok = true;
3398 std::string groupfilename = tmpdir + "/shadergroup";
3399 std::ofstream groupfile;
3400 OIIO::Filesystem::open(groupfile, groupfilename);
3401 if (groupfile.good()) {
3402 groupfile << group.serialize();
3403 groupfile.close ();
3404 } else {
3405 error ("archive_shadergroup: Could not open shadergroup file");
3406 ok = false;
3407 }
3408
3409 std::string filename_list = "shadergroup";
3410 {
3411 std::lock_guard<ShaderGroup> lock (group);
3412 std::set<std::string> entries; // to avoid duplicates
3413 for (int i = 0, nl = group.nlayers(); i < nl; ++i) {
3414 std::string osofile = group[i]->master()->osofilename();
3415 std::string osoname = OIIO::Filesystem::filename (osofile);
3416 if (entries.find(osoname) == entries.end()) {
3417 entries.insert (osoname);
3418 std::string localfile = tmpdir + "/" + osoname;
3419 OIIO::Filesystem::copy (osofile, localfile);
3420 filename_list += " " + osoname;
3421 }
3422 }
3423 }
3424
3425 if (extension == ".tar" || extension == ".tar.gz" || extension == ".tgz") {
3426 std::string z = Strutil::ends_with (extension, "gz") ? "-z" : "";
3427 std::string cmd = Strutil::sprintf ("tar -c %s -C %s -f %s%s %s",
3428 z, tmpdir, filename, extension,
3429 filename_list);
3430 // std::cout << "Command =\n" << cmd << "\n";
3431 if (system (cmd.c_str()) != 0) {
3432 error ("archive_shadergroup: executing tar command failed");
3433 ok = false;
3434 }
3435
3436 } else if (extension == ".zip") {
3437 std::string cmd = Strutil::sprintf ("zip -q %s%s %s",
3438 filename, extension,
3439 filename_list);
3440 // std::cout << "Command =\n" << cmd << "\n";
3441 if (system (cmd.c_str()) != 0) {
3442 error ("archive_shadergroup: executing zip command failed");
3443 ok = false;
3444 }
3445 } else {
3446 error ("archive_shadergroup: no archiving/compressing command");
3447 ok = false;
3448 }
3449
3450 OIIO::Filesystem::remove_all (tmpdir);
3451
3452 return ok;
3453 }
3454
3455
3456
3457 void
register_closure(string_view name,int id,const ClosureParam * params,PrepareClosureFunc prepare,SetupClosureFunc setup)3458 ClosureRegistry::register_closure (string_view name, int id,
3459 const ClosureParam *params,
3460 PrepareClosureFunc prepare,
3461 SetupClosureFunc setup)
3462 {
3463 if (m_closure_table.size() <= (size_t)id)
3464 m_closure_table.resize(id + 1);
3465 ClosureEntry &entry = m_closure_table[id];
3466 entry.id = id;
3467 entry.name = name;
3468 entry.nformal = 0;
3469 entry.nkeyword = 0;
3470 entry.struct_size = 0; /* params could be NULL */
3471 for (int i = 0; params; ++i) {
3472 /* always push so the end marker is there */
3473 entry.params.push_back(params[i]);
3474 if (params[i].type == TypeDesc()) {
3475 entry.struct_size = params[i].offset;
3476 /* CLOSURE_FINISH_PARAM stashes the real struct alignment here
3477 * make sure that the closure struct doesn't want more alignment than ClosureComponent
3478 * because we will be allocating the real struct inside it. */
3479 OSL_ASSERT_MSG(params[i].field_size <= int(alignof(ClosureComponent)),
3480 "Closure %s wants alignment of %d which is larger than that of ClosureComponent",
3481 name.c_str(),
3482 params[i].field_size);
3483 break;
3484 }
3485 if (params[i].key == nullptr)
3486 entry.nformal ++;
3487 else
3488 entry.nkeyword ++;
3489 }
3490 entry.prepare = prepare;
3491 entry.setup = setup;
3492 m_closure_name_to_id[ustring(name)] = id;
3493 }
3494
3495
3496
3497 const ClosureRegistry::ClosureEntry *
get_entry(ustring name) const3498 ClosureRegistry::get_entry(ustring name) const
3499 {
3500 std::map<ustring, int>::const_iterator i = m_closure_name_to_id.find(name);
3501 if (i != m_closure_name_to_id.end())
3502 {
3503 OSL_DASSERT((size_t)i->second < m_closure_table.size());
3504 return &m_closure_table[i->second];
3505 }
3506 else
3507 return NULL;
3508 }
3509
3510
3511
3512 }; // namespace pvt
3513 OSL_NAMESPACE_EXIT
3514
3515
3516
3517 bool
init(const ShaderGroup * group,int layernum)3518 OSL::OSLQuery::init (const ShaderGroup *group, int layernum)
3519 {
3520 geterror(); // clear the error, we're newly initializing
3521 if (! group) {
3522 errorf("No group pointer supplied.");
3523 return false;
3524 }
3525 if (layernum < 0 || layernum >= group->nlayers()) {
3526 errorf("Invalid layer number %d (valid indices: 0-%d).",
3527 layernum, group->nlayers()-1);
3528 return false;
3529 }
3530
3531 const ShaderMaster *master = (*group)[layernum]->master();
3532 m_shadername = master->shadername();
3533 m_shadertypename = master->shadertypename();
3534 m_params.clear();
3535 if (int nparams = master->num_params()) {
3536 m_params.resize (nparams);
3537 for (int i = 0; i < nparams; ++i) {
3538 const Symbol *sym = master->symbol (i);
3539 Parameter &p (m_params[i]);
3540 p.name = sym->name().string();
3541 const TypeSpec &ts (sym->typespec());
3542 p.type = ts.simpletype();
3543 p.isoutput = (sym->symtype() == SymTypeOutputParam);
3544 p.varlenarray = ts.is_unsized_array();
3545 p.isstruct = ts.is_structure() || ts.is_structure_array();
3546 p.isclosure = ts.is_closure_based();
3547 p.data = sym->data();
3548 // In this mode, we don't fill in idefault, fdefault, sdefault,
3549 // or spacename.
3550 p.idefault.clear();
3551 p.fdefault.clear();
3552 p.sdefault.clear();
3553 p.spacename.clear();
3554 int n = int (p.type.numelements() * p.type.aggregate);
3555 if (p.type.basetype == TypeDesc::INT) {
3556 for (int i = 0; i < n; ++i)
3557 p.idefault.push_back (sym->get_int(i));
3558 }
3559 if (p.type.basetype == TypeDesc::FLOAT) {
3560 for (int i = 0; i < n; ++i)
3561 p.fdefault.push_back (sym->get_float(i));
3562 }
3563 if (p.type.basetype == TypeDesc::STRING) {
3564 for (int i = 0; i < n; ++i)
3565 p.sdefault.push_back (sym->get_string(i));
3566 }
3567 p.fields.clear(); // don't bother filling this out
3568 if (StructSpec *ss = ts.structspec()) {
3569 p.structname = ss->name().string();
3570 for (size_t i = 0, e = ss->numfields(); i < e; ++i)
3571 p.fields.push_back (ss->field(i).name);
3572 } else {
3573 p.structname.clear();
3574 }
3575 p.metadata.clear(); // FIXME?
3576 p.validdefault = (p.data != NULL);
3577 }
3578 }
3579
3580 m_meta.clear(); // no metadata available at this point
3581
3582 return true;
3583 }
3584
3585
3586
3587 // vals points to a symbol with a total of ncomps floats (ncomps ==
3588 // aggregate*arraylen). If has_derivs is true, it's actually 3 times
3589 // that length, the main values then the derivatives. We want to check
3590 // for nans in vals[firstcheck..firstcheck+nchecks-1], and also in the
3591 // derivatives if present. Note that if firstcheck==0 and nchecks==ncomps,
3592 // we are checking the entire contents of the symbol. More restrictive
3593 // firstcheck,nchecks are used to check just one element of an array.
3594 OSL_SHADEOP void
osl_naninf_check(int ncomps,const void * vals_,int has_derivs,void * sg,const void * sourcefile,int sourceline,void * symbolname,int firstcheck,int nchecks,const void * opname)3595 osl_naninf_check (int ncomps, const void *vals_, int has_derivs,
3596 void *sg, const void *sourcefile, int sourceline,
3597 void *symbolname, int firstcheck, int nchecks,
3598 const void *opname)
3599 {
3600 ShadingContext *ctx = (ShadingContext *)((ShaderGlobals *)sg)->context;
3601 const float *vals = (const float *)vals_;
3602 for (int d = 0; d < (has_derivs ? 3 : 1); ++d) {
3603 for (int c = firstcheck, e = c+nchecks; c < e; ++c) {
3604 int i = d*ncomps + c;
3605 if (! OIIO::isfinite(vals[i])) {
3606 ctx->errorf("Detected %g value in %s%s at %s:%d (op %s)",
3607 vals[i], d > 0 ? "the derivatives of " : "",
3608 USTR(symbolname), USTR(sourcefile), sourceline,
3609 USTR(opname));
3610 return;
3611 }
3612 }
3613 }
3614 }
3615
3616
3617
3618 // vals points to the data of a float-, int-, or string-based symbol.
3619 // (described by typedesc). We want to check
3620 // vals[firstcheck..firstcheck+nchecks-1] for floats that are NaN , or
3621 // ints that are -MAXINT, or strings that are "!!!uninitialized!!!"
3622 // which would indicate that the value is uninitialized if
3623 // 'debug_uninit' is turned on. Note that if firstcheck==0 and
3624 // nchecks==ncomps, we are checking the entire contents of the symbol.
3625 // More restrictive firstcheck,nchecks are used to check just one
3626 // element of an array.
3627 OSL_SHADEOP void
osl_uninit_check(long long typedesc_,void * vals_,void * sg,const void * sourcefile,int sourceline,const char * groupname,int layer,const char * layername,const char * shadername,int opnum,const char * opname,int argnum,void * symbolname,int firstcheck,int nchecks)3628 osl_uninit_check (long long typedesc_, void *vals_,
3629 void *sg, const void *sourcefile, int sourceline,
3630 const char *groupname, int layer, const char *layername,
3631 const char *shadername,
3632 int opnum, const char *opname, int argnum,
3633 void *symbolname, int firstcheck, int nchecks)
3634 {
3635 TypeDesc typedesc = TYPEDESC(typedesc_);
3636 ShadingContext *ctx = (ShadingContext *)((ShaderGlobals *)sg)->context;
3637 bool uninit = false;
3638 if (typedesc.basetype == TypeDesc::FLOAT) {
3639 float *vals = (float *)vals_;
3640 for (int c = firstcheck, e = firstcheck+nchecks; c < e; ++c)
3641 if (!OIIO::isfinite(vals[c])) {
3642 uninit = true;
3643 vals[c] = 0;
3644 }
3645 }
3646 if (typedesc.basetype == TypeDesc::INT) {
3647 int *vals = (int *)vals_;
3648 for (int c = firstcheck, e = firstcheck+nchecks; c < e; ++c)
3649 if (vals[c] == std::numeric_limits<int>::min()) {
3650 uninit = true;
3651 vals[c] = 0;
3652 }
3653 }
3654 if (typedesc.basetype == TypeDesc::STRING) {
3655 ustring *vals = (ustring *)vals_;
3656 for (int c = firstcheck, e = firstcheck+nchecks; c < e; ++c)
3657 if (vals[c] == Strings::uninitialized_string) {
3658 uninit = true;
3659 vals[c] = ustring();
3660 }
3661 }
3662 if (uninit) {
3663 ctx->errorf("Detected possible use of uninitialized value in %s %s at %s:%d (group %s, layer %d %s, shader %s, op %d '%s', arg %d)",
3664 typedesc.c_str(), USTR(symbolname), USTR(sourcefile), sourceline,
3665 (groupname && groupname[0]) ? groupname: "<unnamed group>",
3666 layer, (layername && layername[0]) ? layername : "<unnamed layer>",
3667 shadername, opnum, USTR(opname), argnum);
3668 }
3669 }
3670
3671
3672
3673 OSL_SHADEOP int
osl_range_check_err(int indexvalue,int length,const char * symname,void * sg,const void * sourcefile,int sourceline,const char * groupname,int layer,const char * layername,const char * shadername)3674 osl_range_check_err (int indexvalue, int length, const char *symname,
3675 void *sg, const void *sourcefile, int sourceline,
3676 const char *groupname, int layer, const char *layername,
3677 const char *shadername)
3678 {
3679 if (indexvalue < 0 || indexvalue >= length) {
3680 ShadingContext *ctx = (ShadingContext *)((ShaderGlobals *)sg)->context;
3681 ctx->errorf("Index [%d] out of range %s[0..%d]: %s:%d"
3682 " (group %s, layer %d %s, shader %s)",
3683 indexvalue, USTR(symname), length-1,
3684 USTR(sourcefile), sourceline,
3685 (groupname && groupname[0]) ? groupname : "<unnamed group>", layer,
3686 (layername && layername[0]) ? layername : "<unnamed layer>",
3687 USTR(shadername));
3688 if (indexvalue >= length)
3689 indexvalue = length-1;
3690 else
3691 indexvalue = 0;
3692 }
3693 return indexvalue;
3694 }
3695
3696
3697
3698 // Asked if the raytype is a name we can't know until mid-shader.
osl_raytype_name(void * sg_,void * name)3699 OSL_SHADEOP int osl_raytype_name (void *sg_, void *name)
3700 {
3701 ShaderGlobals *sg = (ShaderGlobals *)sg_;
3702 int bit = sg->context->shadingsys().raytype_bit (USTR(name));
3703 return (sg->raytype & bit) != 0;
3704 }
3705
3706
osl_get_attribute(void * sg_,int dest_derivs,void * obj_name_,void * attr_name_,int array_lookup,int index,const void * attr_type,void * attr_dest)3707 OSL_SHADEOP int osl_get_attribute(void *sg_,
3708 int dest_derivs,
3709 void *obj_name_,
3710 void *attr_name_,
3711 int array_lookup,
3712 int index,
3713 const void *attr_type,
3714 void *attr_dest)
3715 {
3716 ShaderGlobals *sg = (ShaderGlobals *)sg_;
3717 const ustring &obj_name = USTR(obj_name_);
3718 const ustring &attr_name = USTR(attr_name_);
3719
3720 return sg->context->osl_get_attribute (sg, sg->objdata,
3721 dest_derivs, obj_name, attr_name,
3722 array_lookup, index,
3723 *(const TypeDesc *)attr_type,
3724 attr_dest);
3725 }
3726
3727
3728
3729 OSL_SHADEOP int
osl_bind_interpolated_param(void * sg_,const void * name,long long type,int userdata_has_derivs,void * userdata_data,int,void * symbol_data,int symbol_data_size,char * userdata_initialized,int)3730 osl_bind_interpolated_param (void *sg_, const void *name, long long type,
3731 int userdata_has_derivs, void *userdata_data,
3732 int /*symbol_has_derivs*/, void *symbol_data,
3733 int symbol_data_size,
3734 char *userdata_initialized, int /*userdata_index*/)
3735 {
3736 char status = *userdata_initialized;
3737 if (status == 0) {
3738 // First time retrieving this userdata
3739 ShaderGlobals *sg = (ShaderGlobals *)sg_;
3740 bool ok = sg->renderer->get_userdata (userdata_has_derivs, USTR(name),
3741 TYPEDESC(type),
3742 sg, userdata_data);
3743 // printf ("Binding %s %s : index %d, ok = %d\n", name,
3744 // TYPEDESC(type).c_str(),userdata_index, ok);
3745 *userdata_initialized = status = 1 + ok; // 1 = not found, 2 = found
3746 sg->context->incr_get_userdata_calls ();
3747 }
3748 if (status == 2) {
3749 // If userdata was present, copy it to the shader variable
3750 memcpy (symbol_data, userdata_data, symbol_data_size);
3751 return 1;
3752 }
3753 return 0; // no such user data
3754 }
3755