1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4
5 #include <vector>
6 #include <cstdio>
7 #include <cmath>
8
9 #include <OpenImageIO/sysutil.h>
10 #include <OpenImageIO/timer.h>
11 #include <OpenImageIO/thread.h>
12
13 #include "oslexec_pvt.h"
14 #include "runtimeoptimize.h"
15 #include "../liboslcomp/oslcomp_pvt.h"
16 using namespace OSL;
17 using namespace OSL::pvt;
18
19
20 // names of ops we'll be using frequently
21 static ustring u_nop ("nop"),
22 u_exit ("exit"),
23 u_assign ("assign"),
24 u_add ("add"),
25 u_sub ("sub"),
26 u_mul ("mul"),
27 u_if ("if"),
28 u_for ("for"),
29 u_while ("while"),
30 u_dowhile("dowhile"),
31 u_functioncall ("functioncall"),
32 u_functioncall_nr("functioncall_nr"),
33 u_break ("break"),
34 u_continue ("continue"),
35 u_return ("return"),
36 u_useparam ("useparam"),
37 u_closure ("closure"),
38 u_pointcloud_write ("pointcloud_write"),
39 u_isconnected ("isconnected"),
40 u_setmessage ("setmessage"),
41 u_getmessage ("getmessage"),
42 u_getattribute ("getattribute");
43
44
45 OSL_NAMESPACE_ENTER
46
47 namespace pvt { // OSL::pvt
48
49 using OIIO::spin_lock;
50 using OIIO::Timer;
51
52 DECLFOLDER(constfold_assign); // forward decl
53
54
55
56 /// Wrapper that erases elements of c for which predicate p is true.
57 /// (Unlike std::remove_if, it resizes the container so that it contains
58 /// ONLY elements for which the predicate is true.)
59 template<class Container, class Predicate>
erase_if(Container & c,const Predicate & p)60 void erase_if (Container &c, const Predicate &p)
61 {
62 c.erase (std::remove_if (c.begin(), c.end(), p), c.end());
63 }
64
65
66
OSOProcessorBase(ShadingSystemImpl & shadingsys,ShaderGroup & group,ShadingContext * ctx)67 OSOProcessorBase::OSOProcessorBase (ShadingSystemImpl &shadingsys,
68 ShaderGroup &group, ShadingContext *ctx)
69 : m_shadingsys(shadingsys),
70 m_group(group),
71 m_context(ctx),
72 m_debug(shadingsys.debug()),
73 m_inst(NULL)
74 {
75 set_debug ();
76 }
77
78
79
~OSOProcessorBase()80 OSOProcessorBase::~OSOProcessorBase ()
81 {
82 }
83
84
85
RuntimeOptimizer(ShadingSystemImpl & shadingsys,ShaderGroup & group,ShadingContext * ctx)86 RuntimeOptimizer::RuntimeOptimizer (ShadingSystemImpl &shadingsys,
87 ShaderGroup &group, ShadingContext *ctx)
88 : OSOProcessorBase(shadingsys, group, ctx),
89 m_optimize(shadingsys.optimize()),
90 m_opt_simplify_param(shadingsys.m_opt_simplify_param),
91 m_opt_constant_fold(shadingsys.m_opt_constant_fold),
92 m_opt_stale_assign(shadingsys.m_opt_stale_assign),
93 m_opt_elide_useless_ops(shadingsys.m_opt_elide_useless_ops),
94 m_opt_elide_unconnected_outputs(shadingsys.m_opt_elide_unconnected_outputs),
95 m_opt_peephole(shadingsys.m_opt_peephole),
96 m_opt_coalesce_temps(shadingsys.m_opt_coalesce_temps),
97 m_opt_assign(shadingsys.m_opt_assign),
98 m_opt_mix(shadingsys.m_opt_mix),
99 m_opt_middleman(shadingsys.m_opt_middleman),
100 m_keep_no_return_function_calls(shadingsys.m_llvm_debugging_symbols),
101 m_pass(0),
102 m_next_newconst(0), m_next_newtemp(0),
103 m_stat_opt_locking_time(0), m_stat_specialization_time(0),
104 m_stop_optimizing(false),
105 m_raytypes_on(group.raytypes_on()), m_raytypes_off(group.raytypes_off())
106 {
107 memset ((char *)&m_shaderglobals, 0, sizeof(ShaderGlobals));
108 m_shaderglobals.context = shadingcontext();
109
110 // Disable no_function_return_calls for OptiX renderers, because we
111 // aren't yet set up to support use of debugging symbols for PTX.
112 // FIXME: some day, we are going to want debugging symbols for PTX, and
113 // will need some refactoring of the debugging symbol code.
114 if (shadingsys.renderer()->supports("OptiX"))
115 m_keep_no_return_function_calls = false;
116 }
117
118
119
~RuntimeOptimizer()120 RuntimeOptimizer::~RuntimeOptimizer ()
121 {
122 }
123
124
125
126 void
set_inst(int newlayer)127 OSOProcessorBase::set_inst (int newlayer)
128 {
129 m_layer = newlayer;
130 m_inst = group()[m_layer];
131 OSL_DASSERT (m_inst != NULL);
132 set_debug ();
133 }
134
135
136
137 void
set_inst(int newlayer)138 RuntimeOptimizer::set_inst (int newlayer)
139 {
140 OSOProcessorBase::set_inst (newlayer);
141 m_all_consts.clear ();
142 m_symbol_aliases.clear ();
143 m_block_aliases.clear ();
144 m_param_aliases.clear ();
145 m_bblockids.clear ();
146 }
147
148
149
150 void
set_debug()151 OSOProcessorBase::set_debug ()
152 {
153 // start with the shading system's idea of debugging level
154 m_debug = shadingsys().debug();
155
156 // If either group or layer was specified for debug, surely they want
157 // debugging turned on.
158 if (!shadingsys().debug_groupname().empty() || !shadingsys().debug_layername().empty())
159 m_debug = std::max (m_debug, 1);
160
161 // Force debugging off if a specific group was selected for debug
162 // and we're not it, or a specific layer was selected for debug and
163 // we're not it.
164 bool wronggroup = (!shadingsys().debug_groupname().empty() &&
165 shadingsys().debug_groupname() != group().name());
166 bool wronglayer = (!shadingsys().debug_layername().empty() && inst() &&
167 shadingsys().debug_layername() != inst()->layername());
168 if (wronggroup || wronglayer)
169 m_debug = 0;
170 }
171
172
173
174 void
set_debug()175 RuntimeOptimizer::set_debug ()
176 {
177 OSOProcessorBase::set_debug ();
178
179 // If a specific group is isolated for debugging and the
180 // 'optimize_dondebug' flag is on, fully optimize all other groups.
181 if (!shadingsys().debug_groupname().empty() &&
182 shadingsys().debug_groupname() != group().name()) {
183 if (shadingsys().m_optimize_nondebug) {
184 // Debugging trick: if user said to only debug one group, turn
185 // on full optimization for all others! This prevents
186 // everything from running 10x slower just because you want to
187 // debug one shader.
188 m_optimize = 3;
189 m_opt_simplify_param = true;
190 m_opt_constant_fold = true;
191 m_opt_stale_assign = true;
192 m_opt_elide_useless_ops = true;
193 m_opt_elide_unconnected_outputs = true;
194 m_opt_peephole = true;
195 m_opt_coalesce_temps = true;
196 m_opt_assign = true;
197 m_opt_mix = true;
198 m_opt_middleman = true;
199 }
200 }
201 }
202
203
204
205 int
find_constant(const TypeSpec & type,const void * data)206 RuntimeOptimizer::find_constant (const TypeSpec &type, const void *data)
207 {
208 for (int c : m_all_consts) {
209 const Symbol &s (*inst()->symbol(c));
210 OSL_DASSERT (s.symtype() == SymTypeConst);
211 if (equivalent (s.typespec(), type) &&
212 !memcmp (s.data(), data, s.typespec().simpletype().size())) {
213 return c;
214 }
215 }
216 return -1;
217 }
218
219
220
221 int
add_constant(const TypeSpec & type,const void * data,TypeDesc datatype)222 RuntimeOptimizer::add_constant (const TypeSpec &type, const void *data,
223 TypeDesc datatype)
224 {
225 int ind = find_constant (type, data);
226 if (ind < 0) {
227 // support varlen arrays
228 TypeSpec newtype = type;
229 if (type.is_unsized_array())
230 newtype.make_array (datatype.numelements());
231
232 Symbol newconst (ustring::sprintf ("$newconst%d", m_next_newconst++),
233 newtype, SymTypeConst);
234 void *newdata = nullptr;
235 TypeDesc t (newtype.simpletype());
236 size_t n = t.aggregate * t.numelements();
237 if (datatype == TypeDesc::UNKNOWN)
238 datatype = t;
239 size_t datan = datatype.aggregate * datatype.numelements();
240 if (t.basetype == TypeDesc::INT &&
241 datatype.basetype == TypeDesc::INT && n == datan) {
242 newdata = inst()->shadingsys().alloc_int_constants (n);
243 memcpy (newdata, data, t.size());
244 } else if (t.basetype == TypeDesc::FLOAT &&
245 datatype.basetype == TypeDesc::FLOAT) {
246 newdata = inst()->shadingsys().alloc_float_constants (n);
247 if (n == datan)
248 for (size_t i = 0; i < n; ++i)
249 ((float *)newdata)[i] = ((const float *)data)[i];
250 else if (datan == 1)
251 for (size_t i = 0; i < n; ++i)
252 ((float *)newdata)[i] = ((const float *)data)[0];
253 else {
254 OSL_ASSERT (0 && "unsupported type for add_constant");
255 }
256 } else if (t.basetype == TypeDesc::FLOAT &&
257 datatype.basetype == TypeDesc::INT) {
258 newdata = inst()->shadingsys().alloc_float_constants (n);
259 if (n == datan)
260 for (size_t i = 0; i < n; ++i)
261 ((float *)newdata)[i] = ((const int *)data)[i];
262 else if (datan == 1)
263 for (size_t i = 0; i < n; ++i)
264 ((float *)newdata)[i] = ((const int *)data)[0];
265 else {
266 OSL_ASSERT (0 && "unsupported type for add_constant");
267 }
268 } else if (t.basetype == TypeDesc::STRING &&
269 datatype.basetype == TypeDesc::STRING && n == datan) {
270 newdata = inst()->shadingsys().alloc_string_constants (n);
271 memcpy (newdata, data, t.size());
272 } else {
273 OSL_ASSERT (0 && "unsupported type for add_constant");
274 }
275 newconst.data (newdata);
276 ind = add_symbol (newconst);
277 m_all_consts.push_back (ind);
278 }
279 return ind;
280 }
281
282
283
284 int
add_temp(const TypeSpec & type)285 RuntimeOptimizer::add_temp (const TypeSpec &type)
286 {
287 return add_symbol (Symbol (ustring::sprintf ("$opttemp%d", m_next_newtemp++),
288 type, SymTypeTemp));
289 }
290
291
292
293 int
add_global(ustring name,const TypeSpec & type)294 RuntimeOptimizer::add_global (ustring name, const TypeSpec &type)
295 {
296 int index = inst()->findsymbol (name);
297 if (index < 0)
298 index = add_symbol (Symbol (name, type, SymTypeGlobal));
299 return index;
300 }
301
302
303
304 int
add_symbol(const Symbol & sym)305 RuntimeOptimizer::add_symbol (const Symbol &sym)
306 {
307 size_t index = inst()->symbols().size ();
308 OSL_ASSERT (inst()->symbols().capacity() > index &&
309 "we shouldn't have to realloc here");
310 inst()->symbols().push_back (sym);
311 // Mark the symbol as always read. Next time we recompute symbol
312 // lifetimes, it'll get the correct range for when it's read and
313 // written. But for now, just make sure it doesn't accidentally
314 // look entirely unused.
315 inst()->symbols().back().mark_always_used ();
316 return (int) index;
317 }
318
319
320
321 void
debug_opt_impl(string_view message) const322 RuntimeOptimizer::debug_opt_impl (string_view message) const
323 {
324 static OIIO::spin_mutex mutex;
325 OIIO::spin_lock lock (mutex);
326 std::cout << message;
327 }
328
329
330
331 void
debug_opt_ops(int opbegin,int opend,string_view message) const332 RuntimeOptimizer::debug_opt_ops (int opbegin, int opend, string_view message) const
333 {
334 const Opcode &op (inst()->ops()[opbegin]);
335 std::string oprange;
336 if (opbegin >= 0 && opend-opbegin > 1)
337 oprange = Strutil::sprintf ("ops %d-%d ", opbegin, opend);
338 else if (opbegin >= 0)
339 oprange = Strutil::sprintf ("op %d ", opbegin);
340 debug_optf(" %s%s (@ %s:%d)\n", oprange, message,
341 op.sourcefile(), op.sourceline());
342 }
343
344
345
346 void
debug_turn_into(const Opcode & op,int numops,string_view newop,int newarg0,int newarg1,int newarg2,string_view why)347 RuntimeOptimizer::debug_turn_into (const Opcode &op, int numops,
348 string_view newop,
349 int newarg0, int newarg1, int newarg2,
350 string_view why)
351 {
352 int opnum = &op - &(inst()->ops()[0]);
353 std::string msg;
354 if (numops == 1)
355 msg = Strutil::sprintf ("turned '%s' to '%s", op_string(op), newop);
356 else
357 msg = Strutil::sprintf ("turned to '%s", newop);
358 if (newarg0 >= 0)
359 msg += Strutil::sprintf (" %s", inst()->symbol(newarg0)->name());
360 if (newarg1 >= 0)
361 msg += Strutil::sprintf (" %s", inst()->symbol(newarg1)->name());
362 if (newarg2 >= 0)
363 msg += Strutil::sprintf (" %s", inst()->symbol(newarg2)->name());
364 msg += "'";
365 if (why.size())
366 msg += Strutil::sprintf (" : %s", why);
367 debug_opt_ops (opnum, opnum+numops, msg);
368 }
369
370
371
372 void
turn_into_new_op(Opcode & op,ustring newop,int newarg0,int newarg1,int newarg2,string_view why)373 RuntimeOptimizer::turn_into_new_op (Opcode &op, ustring newop, int newarg0,
374 int newarg1, int newarg2, string_view why)
375 {
376 int opnum = &op - &(inst()->ops()[0]);
377 OSL_DASSERT(opnum >= 0 && opnum < (int)inst()->ops().size());
378 if (debug() > 1)
379 debug_turn_into (op, 1, newop, newarg0, newarg1, newarg2, why);
380 op.reset (newop, newarg2<0 ? 2 : 3);
381 inst()->args()[op.firstarg()+0] = newarg0;
382 op.argwriteonly (0);
383 opargsym(op, 0)->mark_rw (opnum, false, true);
384 inst()->args()[op.firstarg()+1] = newarg1;
385 op.argreadonly (1);
386 opargsym(op, 1)->mark_rw (opnum, true, false);
387 if (newarg2 >= 0) {
388 inst()->args()[op.firstarg()+2] = newarg2;
389 op.argreadonly (2);
390 opargsym(op, 2)->mark_rw (opnum, true, false);
391 }
392 }
393
394
395
396 void
turn_into_assign(Opcode & op,int newarg,string_view why)397 RuntimeOptimizer::turn_into_assign (Opcode &op, int newarg, string_view why)
398 {
399 // We don't know the op num here, so we subtract the pointers
400 int opnum = &op - &(inst()->ops()[0]);
401 if (debug() > 1)
402 debug_turn_into (op, 1, "assign", oparg(op,0), newarg, -1, why);
403 op.reset (u_assign, 2);
404 inst()->args()[op.firstarg()+1] = newarg;
405 op.argwriteonly (0);
406 op.argread (1, true);
407 op.argwrite (1, false);
408 // Need to make sure the symbol we're assigning is marked as read
409 // for this op.
410 OSL_DASSERT(opnum >= 0 && opnum < (int)inst()->ops().size());
411 Symbol *arg = opargsym (op, 1);
412 arg->mark_rw (opnum, true, false);
413 }
414
415
416
417 // Turn the current op into a simple assignment to zero (of the first arg).
418 void
turn_into_assign_zero(Opcode & op,string_view why)419 RuntimeOptimizer::turn_into_assign_zero (Opcode &op, string_view why)
420 {
421 static float zero[16] = { 0, 0, 0, 0, 0, 0, 0, 0,
422 0, 0, 0, 0, 0, 0, 0, 0 };
423 Symbol &R (*(inst()->argsymbol(op.firstarg()+0)));
424 int cind = add_constant (R.typespec(), &zero);
425 turn_into_assign (op, cind, why);
426 }
427
428
429
430 // Turn the current op into a simple assignment to one (of the first arg).
431 void
turn_into_assign_one(Opcode & op,string_view why)432 RuntimeOptimizer::turn_into_assign_one (Opcode &op, string_view why)
433 {
434 Symbol &R (*(inst()->argsymbol(op.firstarg()+0)));
435 if (R.typespec().is_int()) {
436 int one = 1;
437 int cind = add_constant (R.typespec(), &one);
438 turn_into_assign (op, cind, why);
439 } else {
440 OSL_DASSERT (R.typespec().is_triple() || R.typespec().is_float());
441 static float one[3] = { 1, 1, 1 };
442 int cind = add_constant (R.typespec(), &one);
443 turn_into_assign (op, cind, why);
444 }
445 }
446
447
448
449 // Turn the op into a no-op
450 int
turn_into_nop(Opcode & op,string_view why)451 RuntimeOptimizer::turn_into_nop (Opcode &op, string_view why)
452 {
453 if (op.opname() != u_nop) {
454 if (debug() > 1)
455 debug_turn_into (op, 1, "nop", -1, -1, -1, why);
456 op.reset (u_nop, 0);
457 return 1;
458 }
459 return 0;
460 }
461
462
463
464 int
turn_into_nop(int begin,int end,string_view why)465 RuntimeOptimizer::turn_into_nop (int begin, int end, string_view why)
466 {
467 int changed = 0;
468 for (int i = begin; i < end; ++i) {
469 Opcode &op (inst()->ops()[i]);
470 if (op.opname() != u_nop) {
471 op.reset (u_nop, 0);
472 ++changed;
473 }
474 }
475 if (debug() > 1 && changed)
476 debug_turn_into (inst()->ops()[begin], end-begin, "nop", -1, -1, -1, why);
477 return changed;
478 }
479
480 // Turn the op into a no-op functioncall
481 // We keep want to keep the jumps indices so we can correctly
482 // model an inlined function call for the debugger
483 int
turn_into_functioncall_nr(Opcode & op,string_view why)484 RuntimeOptimizer::turn_into_functioncall_nr (Opcode &op, string_view why)
485 {
486 if (op.opname() == u_functioncall) {
487 if (debug() > 1)
488 debug_turn_into (op, 1, "functioncall_nr", -1, -1, -1, why);
489 op.transmute_opname (u_functioncall_nr);
490 return 1;
491 }
492 return 0;
493 }
494
495
496 void
insert_code(int opnum,ustring opname,const cspan<int> args_to_add,RecomputeRWRangesOption recompute_rw_ranges,InsertRelation relation)497 RuntimeOptimizer::insert_code (int opnum, ustring opname,
498 const cspan<int> args_to_add,
499 RecomputeRWRangesOption recompute_rw_ranges,
500 InsertRelation relation)
501 {
502 OpcodeVec &code (inst()->ops());
503 std::vector<int> &opargs (inst()->args());
504 ustring method = (opnum < (int)code.size()) ? code[opnum].method() : OSLCompilerImpl::main_method_name();
505 int nargs = args_to_add.size();
506 Opcode op (opname, method, opargs.size(), nargs);
507 code.insert (code.begin()+opnum, op);
508 opargs.insert (opargs.end(), args_to_add.begin(), args_to_add.end());
509 if (opnum < inst()->m_maincodebegin)
510 ++inst()->m_maincodebegin;
511 ++inst()->m_maincodeend;
512 if ((relation == -1 && opnum > 0) ||
513 (relation == 1 && opnum < (int)code.size()-1)) {
514 code[opnum].method (code[opnum+relation].method());
515 code[opnum].source (code[opnum+relation].sourcefile(),
516 code[opnum+relation].sourceline());
517 }
518
519 // Unless we were inserting at the end, we may need to adjust
520 // the jump addresses of other ops and the param init ranges.
521 if (opnum < (int)code.size()-1) {
522 // Adjust jump offsets
523 for (auto& c : code) {
524 for (int j = 0; j < (int)Opcode::max_jumps && c.jump(j) >= 0; ++j) {
525 if (c.jump(j) > opnum) {
526 c.jump(j) = c.jump(j) + 1;
527 // std::cerr << "Adjusting jump target at op " << n << "\n";
528 }
529 }
530 }
531 // Adjust param init ranges
532 FOREACH_PARAM (auto&& s, inst()) {
533 if (s.initbegin() > opnum)
534 s.initbegin (s.initbegin()+1);
535 if (s.initend() > opnum)
536 s.initend (s.initend()+1);
537 }
538 }
539
540 // Inserting the instruction may change the read/write ranges of
541 // symbols. Not adjusting this can throw off other optimizations.
542 if (recompute_rw_ranges) {
543 for (auto&& s : inst()->symbols()) {
544 if (s.everread()) {
545 int first = s.firstread(), last = s.lastread();
546 if (first >= opnum)
547 ++first;
548 if (last >= opnum)
549 ++last;
550 s.set_read (first, last);
551 }
552 if (s.everwritten()) {
553 int first = s.firstwrite(), last = s.lastwrite();
554 if (first >= opnum)
555 ++first;
556 if (last >= opnum)
557 ++last;
558 s.set_write (first, last);
559 }
560 }
561 }
562
563 // Adjust the basic block IDs and which instructions are inside
564 // conditionals.
565 if (m_bblockids.size()) {
566 OSL_DASSERT (m_bblockids.size() == code.size()-1);
567 m_bblockids.insert (m_bblockids.begin()+opnum, 1, m_bblockids[opnum]);
568 }
569 if (m_in_conditional.size()) {
570 OSL_DASSERT (m_in_conditional.size() == code.size()-1);
571 m_in_conditional.insert (m_in_conditional.begin()+opnum, 1,
572 m_in_conditional[opnum]);
573 }
574 if (m_in_loop.size()) {
575 OSL_DASSERT (m_in_loop.size() == code.size()-1);
576 m_in_loop.insert (m_in_loop.begin()+opnum, 1,
577 m_in_loop[opnum]);
578 }
579 // If the first return happened after this, bump it up
580 if (m_first_return >= opnum)
581 ++m_first_return;
582
583 if (opname == u_if) {
584 // special case for 'if' -- the arg is read, not written
585 inst()->symbol(args_to_add[0])->mark_rw (opnum, true, false);
586 }
587 else if (opname != u_useparam) {
588 // Mark the args as being used for this op (assume that the
589 // first is written, the others are read).
590 for (int a = 0; a < nargs; ++a)
591 inst()->symbol(args_to_add[a])->mark_rw (opnum, a>0, a==0);
592 }
593 }
594
595 void
insert_code(int opnum,ustring opname,InsertRelation relation,int arg0,int arg1,int arg2,int arg3)596 RuntimeOptimizer::insert_code (int opnum, ustring opname,
597 InsertRelation relation,
598 int arg0, int arg1, int arg2, int arg3)
599 {
600 int args[4];
601 int nargs = 0;
602 if (arg0 >= 0) args[nargs++] = arg0;
603 if (arg1 >= 0) args[nargs++] = arg1;
604 if (arg2 >= 0) args[nargs++] = arg2;
605 if (arg3 >= 0) args[nargs++] = arg3;
606 insert_code (opnum, opname, cspan<int>(args, args + nargs), RecomputeRWRanges, relation);
607 }
608
609
610
611 /// Insert a 'useparam' instruction in front of instruction 'opnum', to
612 /// reference the symbols in 'params'.
613 void
insert_useparam(size_t opnum,const std::vector<int> & params_to_use)614 RuntimeOptimizer::insert_useparam (size_t opnum,
615 const std::vector<int> ¶ms_to_use)
616 {
617 OSL_DASSERT (params_to_use.size() > 0);
618 OpcodeVec &code (inst()->ops());
619 insert_code (opnum, u_useparam, params_to_use,
620 RecomputeRWRanges, GroupWithNext);
621
622 // All ops are "read"
623 code[opnum].argwrite (0, false);
624 code[opnum].argread (0, true);
625 if (opnum < code.size()-1) {
626 // We have no parse node, but we set the new instruction's
627 // "source" to the one of the statement right after.
628 code[opnum].source (code[opnum+1].sourcefile(),
629 code[opnum+1].sourceline());
630 // Set the method id to the same as the statement right after
631 code[opnum].method (code[opnum+1].method());
632 } else {
633 // If there IS no "next" instruction, just call it main
634 code[opnum].method (OSLCompilerImpl::main_method_name());
635 }
636 }
637
638
639
640 void
add_useparam(SymbolPtrVec & allsyms)641 RuntimeOptimizer::add_useparam (SymbolPtrVec &allsyms)
642 {
643 OpcodeVec &code (inst()->ops());
644 std::vector<int> &opargs (inst()->args());
645
646 // Mark all symbols as un-initialized
647 for (auto&& s : inst()->symbols())
648 s.initialized (false);
649
650 if (inst()->m_maincodebegin < 0)
651 inst()->m_maincodebegin = (int)code.size();
652
653 // Take care of the output params right off the bat -- as soon as the
654 // shader starts running 'main'.
655 std::vector<int> outputparams;
656 for (int i = 0; i < (int)inst()->symbols().size(); ++i) {
657 Symbol *s = inst()->symbol(i);
658 if (s->symtype() == SymTypeOutputParam &&
659 (s->connected() || s->connected_down() || s->renderer_output() ||
660 (s->valuesource() == Symbol::DefaultVal && s->has_init_ops()))) {
661 outputparams.push_back (i);
662 s->initialized (true);
663 }
664 }
665 if (outputparams.size())
666 insert_useparam (inst()->m_maincodebegin, outputparams);
667
668 // Figure out which statements are inside conditional states
669 find_conditionals ();
670
671 // Loop over all ops...
672 for (int opnum = 0; opnum < (int)code.size(); ++opnum) {
673 Opcode &op (code[opnum]); // handy ref to the op
674 if (op.opname() == u_useparam)
675 continue; // skip useparam ops themselves, if we hit one
676 bool simple_assign = is_simple_assign(op);
677 bool in_main_code = (opnum >= inst()->m_maincodebegin);
678 std::vector<int> params; // list of params referenced by this op
679 // For each argument...
680 for (int a = 0; a < op.nargs(); ++a) {
681 int argind = op.firstarg() + a;
682 SymbolPtr s = inst()->argsymbol (argind);
683 OSL_DASSERT(s->dealias() == s);
684 // If this arg is a param and is read, remember it
685 if (s->symtype() != SymTypeParam && s->symtype() != SymTypeOutputParam)
686 continue; // skip non-params
687 // skip if we've already 'usedparam'ed it unconditionally
688 if (s->initialized() && in_main_code)
689 continue;
690
691 bool inside_init = (opnum >= s->initbegin() && opnum < s->initend());
692 if (op.argread(a) || (op.argwrite(a) && !inside_init)) {
693 // Don't add it more than once
694 if (std::find (params.begin(), params.end(), opargs[argind]) == params.end()) {
695 // If this arg is the one being written to by a
696 // "simple" assignment, it doesn't need a useparam here.
697 if (! (simple_assign && a == 0))
698 params.push_back (opargs[argind]);
699 // mark as already initialized unconditionally, if we do
700 if (op_is_unconditionally_executed(opnum) &&
701 op.method() == OSLCompilerImpl::main_method_name())
702 s->initialized (true);
703 }
704 }
705 }
706
707 // If the arg we are examining read any params, insert a "useparam"
708 // op whose arguments are the list of params we are about to use.
709 if (params.size()) {
710 insert_useparam (opnum, params);
711 // Skip the op we just added
712 ++opnum;
713 }
714 }
715
716 // Mark all symbols as un-initialized
717 for (auto&& s : inst()->symbols())
718 s.initialized (false);
719
720 // Re-track variable lifetimes, since the inserted useparam
721 // instructions will have change the instruction numbers.
722 find_basic_blocks ();
723 track_variable_lifetimes (allsyms);
724 }
725
726
727
728 bool
is_zero(const Symbol & A)729 OSOProcessorBase::is_zero (const Symbol &A)
730 {
731 if (! A.is_constant())
732 return false;
733 const TypeSpec &Atype (A.typespec());
734 static Vec3 Vzero (0, 0, 0);
735 return (Atype.is_float() && *(const float *)A.data() == 0) ||
736 (Atype.is_int() && *(const int *)A.data() == 0) ||
737 (Atype.is_triple() && *(const Vec3 *)A.data() == Vzero);
738 }
739
740
741
742 bool
is_one(const Symbol & A)743 OSOProcessorBase::is_one (const Symbol &A)
744 {
745 if (! A.is_constant())
746 return false;
747 const TypeSpec &Atype (A.typespec());
748 static Vec3 Vone (1, 1, 1);
749 static Matrix44 Mone (1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1);
750 return (Atype.is_float() && *(const float *)A.data() == 1) ||
751 (Atype.is_int() && *(const int *)A.data() == 1) ||
752 (Atype.is_triple() && *(const Vec3 *)A.data() == Vone) ||
753 (Atype.is_matrix() && *(const Matrix44 *)A.data() == Mone);
754 }
755
756
757
758 bool
is_nonzero(const Symbol & A)759 OSOProcessorBase::is_nonzero (const Symbol &A)
760 {
761 if (! A.is_constant())
762 return false;
763 const TypeSpec &Atype (A.typespec());
764 int ncomponents = Atype.numelements() * Atype.aggregate();
765 if (Atype.is_float_based()) {
766 const float *val = (const float *)A.data();
767 for (int i = 0; i < ncomponents; ++i)
768 if (val[i] == 0.0f)
769 return false;
770 return true;
771 }
772 if (Atype.is_int_based()) {
773 const int *val = (const int *)A.data();
774 for (int i = 0; i < ncomponents; ++i)
775 if (val[i] == 0)
776 return false;
777 return true;
778 }
779 return false;
780 }
781
782
783
784 std::string
const_value_as_string(const Symbol & A)785 OSOProcessorBase::const_value_as_string (const Symbol &A)
786 {
787 if (! A.is_constant())
788 return std::string();
789 TypeDesc type (A.typespec().simpletype());
790 int n = type.numelements() * type.aggregate;
791 std::ostringstream s;
792 s.imbue (std::locale::classic()); // force C locale
793 if (type.basetype == TypeDesc::FLOAT) {
794 for (int i = 0; i < n; ++i)
795 s << (i ? "," : "") << ((const float *)A.data())[i];
796 } else if (type.basetype == TypeDesc::INT) {
797 for (int i = 0; i < n; ++i)
798 s << (i ? "," : "") << ((const int *)A.data())[i];
799 } else if (type.basetype == TypeDesc::STRING) {
800 for (int i = 0; i < n; ++i)
801 s << (i ? "," : "") << '\"' << ((const ustring *)A.data())[i] << '\"';
802 }
803 return s.str();
804 }
805
806
807
808 void
register_message(ustring name)809 RuntimeOptimizer::register_message (ustring name)
810 {
811 m_local_messages_sent.push_back (name);
812 }
813
814
815
816 void
register_unknown_message()817 RuntimeOptimizer::register_unknown_message ()
818 {
819 m_local_unknown_message_sent = true;
820 }
821
822
823
824 bool
message_possibly_set(ustring name) const825 RuntimeOptimizer::message_possibly_set (ustring name) const
826 {
827 return m_local_unknown_message_sent || m_unknown_message_sent ||
828 std::find (m_messages_sent.begin(), m_messages_sent.end(), name) != m_messages_sent.end() ||
829 std::find (m_local_messages_sent.begin(), m_local_messages_sent.end(), name) != m_local_messages_sent.end();
830 }
831
832
833
834 /// For all the instance's parameters (that can't be overridden by the
835 /// geometry), if they can be found to be effectively constants or
836 /// globals, make constants for them and alias them to the constant. If
837 /// they are connected to an earlier layer's output, if it can determine
838 /// that the output will be a constant or global, then sever the
839 /// connection and just alias our parameter to that value.
840 void
simplify_params()841 RuntimeOptimizer::simplify_params ()
842 {
843 for (int i = inst()->firstparam(); i < inst()->lastparam(); ++i) {
844 Symbol *s (inst()->symbol(i));
845 if (s->symtype() != SymTypeParam)
846 continue; // Skip non-params
847 if (! s->lockgeom())
848 continue; // Don't mess with params that can change with the geom
849 if (s->typespec().is_structure() || s->typespec().is_closure_based())
850 continue; // We don't mess with struct placeholders or closures
851
852 if (s->valuesource() == Symbol::InstanceVal) {
853 // Instance value -- turn it into a constant and remove init ops
854 make_symbol_room (1);
855 s = inst()->symbol(i); // In case make_symbol_room changed ptrs
856 int cind = add_constant (s->typespec(), s->data());
857 global_alias (i, cind); // Alias this symbol to the new const
858 turn_into_nop (s->initbegin(), s->initend(),
859 "instance value doesn't need init ops");
860 } else if (s->valuesource() == Symbol::DefaultVal && !s->has_init_ops()) {
861 // Plain default value without init ops -- turn it into a constant
862 make_symbol_room (1);
863 s = inst()->symbol(i); // In case make_symbol_room changed ptrs
864 int cind = add_constant (s->typespec(), s->data(), s->typespec().simpletype());
865 global_alias (i, cind); // Alias this symbol to the new const
866 } else if (s->valuesource() == Symbol::DefaultVal && s->has_init_ops()) {
867 // Default val comes from init ops -- special cases? Yes,
868 // if it's a simple assignment from a global whose value is
869 // not reassigned later, we can just alias it, and if we're
870 // lucky that may eliminate all uses of the parameter.
871
872 // First, trim init ops in case nops have accumulated
873 while (s->has_init_ops() && op(s->initbegin()).opname() == u_nop)
874 s->initbegin (s->initbegin()+1);
875 while (s->has_init_ops() && op(s->initend()-1).opname() == u_nop)
876 s->initend (s->initend()-1);
877 if (s->initbegin() == s->initend()-1) { // just one op
878 Opcode &op (inst()->ops()[s->initbegin()]);
879 if (op.opname() == u_assign) {
880 // The default value has init ops, but they consist of
881 // just a single assignment op...
882 Symbol *src = inst()->argsymbol(op.firstarg()+1);
883 // Is it assigning a global, or a parameter that's
884 // got a default or instance value and isn't on the geom,
885 // and its value is never changed and the types match?
886 if ((src->symtype() == SymTypeGlobal ||
887 src->symtype() == SymTypeConst ||
888 (src->symtype() == SymTypeParam && src->lockgeom() &&
889 (src->valuesource() == Symbol::DefaultVal ||
890 src->valuesource() == Symbol::InstanceVal)))
891 && !src->everwritten()
892 && equivalent(src->typespec(), s->typespec())) {
893 // Great, so let's remember the alias. We can't
894 // call global_alias() here, because we're still in
895 // init ops, that'll screw us up. So we just record
896 // it in m_param_aliases and then we'll establish
897 // the global aliases when we hit the main code.
898 m_param_aliases[i] = inst()->arg(op.firstarg()+1);
899 }
900 }
901 }
902 } else if (s->valuesource() == Symbol::ConnectedVal) {
903 // It's connected to an earlier layer. If the output var of
904 // the upstream shader is effectively constant or a global,
905 // then so is this variable.
906 for (auto&& c : inst()->connections()) {
907 if (c.dst.param != i)
908 continue;
909 if (c.dst.is_complete()) {
910 /// All components are being set through either
911 /// float->triple or triple->triple
912 /// Get rid of the un-needed init ops.
913 turn_into_nop (s->initbegin(), s->initend(),
914 "connected value doesn't need init ops");
915 }
916 if (c.is_complete()) {
917 // srcsym is the earlier group's output param, which
918 // is fully connected as the input to the param we're
919 // examining.
920 ShaderInstance *uplayer = group()[c.srclayer];
921 Symbol *srcsym = uplayer->symbol(c.src.param);
922 if (!srcsym->lockgeom())
923 continue; // Not if it can be overridden by geometry
924
925 // Is the source symbol known to be a global, from
926 // earlier analysis by find_params_holding_globals?
927 // If so, make sure the global is in this instance's
928 // symbol table, and alias the parameter to it.
929 ustringmap_t &g (m_params_holding_globals[c.srclayer]);
930 auto f = g.find (srcsym->name());
931 if (f != g.end()) {
932 if (debug() > 1)
933 debug_optf("Remapping %s.%s because it's connected to "
934 "%s.%s, which is known to be %s\n",
935 inst()->layername(), s->name(),
936 uplayer->layername(), srcsym->name(),
937 f->second);
938 make_symbol_room (1);
939 s = inst()->symbol(i); // In case make_symbol_room changed ptrs
940 int ind = add_global (f->second, srcsym->typespec());
941 global_alias (i, ind);
942 shadingsys().m_stat_global_connections += 1;
943 break;
944 }
945
946 if (!srcsym->everwritten() &&
947 (srcsym->valuesource() == Symbol::DefaultVal ||
948 srcsym->valuesource() == Symbol::InstanceVal) &&
949 !srcsym->has_init_ops()) {
950 make_symbol_room (1);
951 s = inst()->symbol(i); // In case make_symbol_room changed ptrs
952 int cind = add_constant (s->typespec(), srcsym->data(),
953 srcsym->typespec().simpletype());
954 // Alias this symbol to the new const
955 global_alias (i, cind);
956 make_param_use_instanceval (s, "- upstream layer sets it to a constant");
957 replace_param_value (s, srcsym->data(), srcsym->typespec());
958 shadingsys().m_stat_const_connections += 1;
959 break;
960 }
961 }
962 }
963 // FIXME / N.B.: We only optimize "fully complete" connections,
964 // not those involving individual components or array elements
965 // of the connected parameters, because we sure don't track the
966 // constness or aliasing of individual components/element, only
967 // whole variables. But there are two cases where the logic
968 // above fails to fully exploit the connection propagating a
969 // constant value. (a) Partial-to-whole connections, for example
970 // connecting one component of an upstream triple output to a
971 // downstream float input, should propagate the constant, but we
972 // currently neglect this case. (b) If *multiple* connections
973 // combine to fully propagate values, for example if someone was
974 // foolish enough to connect R, G, and B components of color
975 // parameters *separately*, we sure don't notice that and treat
976 // it as a full connection of the color.
977 }
978 }
979 }
980
981
982
983 /// For all the instance's parameters, if they are simply assigned globals,
984 /// record that in m_params_holding_globals.
985 void
find_params_holding_globals()986 RuntimeOptimizer::find_params_holding_globals ()
987 {
988 FOREACH_PARAM (auto&& s, inst()) {
989 // Skip if this isn't a shader output parameter that's connected
990 // to a later layer.
991 if (s.symtype() != SymTypeParam && s.symtype() != SymTypeOutputParam)
992 continue; // Skip non-params
993 if (!s.connected_down())
994 continue; // Skip unconnected params -- who cares
995 if (s.valuesource() != Symbol::DefaultVal)
996 continue; // Skip -- must be connected or an instance value
997 if (s.firstwrite() < 0 || s.firstwrite() != s.lastwrite())
998 continue; // Skip -- written more than once
999
1000 int opnum = s.firstwrite();
1001 Opcode &op (inst()->ops()[opnum]);
1002 if (op.opname() != u_assign || ! op_is_unconditionally_executed(opnum))
1003 continue; // Not a simple assignment unconditionally performed
1004
1005 // what s is assigned from (fully dealiased)
1006 Symbol *src = inst()->symbol (dealias_symbol (oparg (op, 1), opnum));
1007
1008 if (src->symtype() != SymTypeGlobal)
1009 continue; // only interested in global assignments
1010
1011 if (debug() > 1)
1012 debug_optf("I think that %s.%s will always be %s\n",
1013 inst()->layername(), s.name(), src->name());
1014 m_params_holding_globals[layer()][s.name()] = src->name();
1015 }
1016 }
1017
1018
1019
1020 void
find_conditionals()1021 OSOProcessorBase::find_conditionals ()
1022 {
1023 OpcodeVec &code (inst()->ops());
1024
1025 m_in_conditional.clear ();
1026 m_in_conditional.resize (code.size(), false);
1027 m_in_loop.clear ();
1028 m_in_loop.resize (code.size(), false);
1029 m_first_return = (int)code.size();
1030 for (int i = 0; i < (int)code.size(); ++i) {
1031 if (code[i].jump(0) >= 0) {
1032 std::fill (m_in_conditional.begin()+i,
1033 m_in_conditional.begin()+code[i].farthest_jump(), true);
1034 if (code[i].opname() == Strings::op_dowhile ||
1035 code[i].opname() == Strings::op_for ||
1036 code[i].opname() == Strings::op_while) {
1037 std::fill (m_in_loop.begin()+i,
1038 m_in_loop.begin()+code[i].farthest_jump(), true);
1039 }
1040 }
1041 if (code[i].opname() == Strings::op_exit)
1042 m_first_return = std::min (m_first_return, i);
1043 }
1044 }
1045
1046
1047
1048 void
find_basic_blocks()1049 OSOProcessorBase::find_basic_blocks ()
1050 {
1051 OpcodeVec &code (inst()->ops());
1052
1053 // Start by setting all basic block IDs to 0
1054 m_bblockids.clear ();
1055 m_bblockids.resize (code.size(), 0);
1056
1057 // First, keep track of all the spots where blocks begin
1058 std::vector<bool> block_begin (code.size(), false);
1059
1060 // Init ops start basic blocks
1061 FOREACH_PARAM (const Symbol &s, inst()) {
1062 if (s.has_init_ops())
1063 block_begin[s.initbegin()] = true;
1064 }
1065
1066 // Main code starts a basic block
1067 block_begin[inst()->maincodebegin()] = true;
1068
1069 for (size_t opnum = 0; opnum < code.size(); ++opnum) {
1070 Opcode &op (code[opnum]);
1071 if (op.opname() == u_functioncall_nr)
1072 { // Treat the 'no return' function call as if it were a nop.
1073 // we use later to generate correct inline debug information.
1074 continue;
1075 }
1076 // Anyplace that's the target of a jump instruction starts a basic block
1077 for (int j = 0; j < (int)Opcode::max_jumps; ++j) {
1078 if (op.jump(j) >= 0)
1079 block_begin[op.jump(j)] = true;
1080 else
1081 break;
1082 }
1083 // The first instruction in a conditional or loop (which is not
1084 // itself a jump target) also begins a basic block. If the op has
1085 // any jump targets at all, it must be a conditional or loop.
1086 if (op.jump(0) >= 0)
1087 block_begin[opnum+1] = true;
1088 // 'break', 'continue', 'return', and 'exit' also cause the next
1089 // statement to begin a new basic block.
1090 if (op.opname() == u_break || op.opname() == u_continue ||
1091 op.opname() == u_return || op.opname() == u_exit)
1092 block_begin[opnum+1] = true;
1093 }
1094
1095 // Now color the blocks with unique identifiers
1096 int bbid = 1; // next basic block ID to use
1097 for (size_t opnum = 0; opnum < code.size(); ++opnum) {
1098 if (block_begin[opnum])
1099 ++bbid;
1100 m_bblockids[opnum] = bbid;
1101 }
1102 }
1103
1104
1105
1106 /// For 'R = A_const' where R and A are different, but coerceable,
1107 /// types, turn it into a constant assignment of the exact type.
1108 /// Return true if a change was made, otherwise return false.
1109 bool
coerce_assigned_constant(Opcode & op)1110 RuntimeOptimizer::coerce_assigned_constant (Opcode &op)
1111 {
1112 OSL_DASSERT (op.opname() == u_assign);
1113 Symbol *R (inst()->argsymbol(op.firstarg()+0));
1114 Symbol *A (inst()->argsymbol(op.firstarg()+1));
1115
1116 if (! A->is_constant() || R->typespec().is_closure_based())
1117 return false; // we don't handle those cases
1118
1119 // turn 'R_float = A_int_const' into a float const assignment
1120 if (A->typespec().is_int() && R->typespec().is_float()) {
1121 float result = *(int *)A->data();
1122 int cind = add_constant (R->typespec(), &result);
1123 turn_into_assign (op, cind, "coerce to correct type");
1124 return true;
1125 }
1126
1127 // turn 'R_int = A_float_const' into an int const assignment
1128 if (A->typespec().is_float() && R->typespec().is_int()) {
1129 int result = (int) *(float *)A->data();
1130 int cind = add_constant (R->typespec(), &result);
1131 turn_into_assign (op, cind, "coerce to correct type");
1132 return true;
1133 }
1134
1135 // turn 'R_triple = A_int_const' into a float const assignment
1136 if (A->typespec().is_int() && R->typespec().is_triple()) {
1137 float f = *(int *)A->data();
1138 Vec3 result (f, f, f);
1139 int cind = add_constant (R->typespec(), &result);
1140 turn_into_assign (op, cind, "coerce to correct type");
1141 return true;
1142 }
1143
1144 // turn 'R_triple = A_float_const' into a triple const assignment
1145 if (A->typespec().is_float() && R->typespec().is_triple()) {
1146 float f = *(float *)A->data();
1147 Vec3 result (f, f, f);
1148 int cind = add_constant (R->typespec(), &result);
1149 turn_into_assign (op, cind, "coerce to correct type");
1150 return true;
1151 }
1152
1153 // Turn 'R_triple = A_other_triple_constant' into a triple const assign
1154 if (A->typespec().is_triple() && R->typespec().is_triple() &&
1155 A->typespec() != R->typespec()) {
1156 Vec3 *f = (Vec3 *)A->data();
1157 int cind = add_constant (R->typespec(), f);
1158 turn_into_assign (op, cind, "coerce to correct type");
1159 return true;
1160 }
1161
1162 // turn 'R_matrix = A_float_const' into a matrix const assignment
1163 if (A->typespec().is_float() && R->typespec().is_matrix()) {
1164 float f = *(float *)A->data();
1165 Matrix44 result (f, 0, 0, 0, 0, f, 0, 0, 0, 0, f, 0, 0, 0, 0, f);
1166 int cind = add_constant (R->typespec(), &result);
1167 turn_into_assign (op, cind, "coerce to correct type");
1168 return true;
1169 }
1170 // turn 'R_matrix = A_int_const' into a matrix const assignment
1171 if (A->typespec().is_int() && R->typespec().is_matrix()) {
1172 float f = *(int *)A->data();
1173 Matrix44 result (f, 0, 0, 0, 0, f, 0, 0, 0, 0, f, 0, 0, 0, 0, f);
1174 int cind = add_constant (R->typespec(), &result);
1175 turn_into_assign (op, cind, "coerce to correct type");
1176 return true;
1177 }
1178
1179 return false;
1180 }
1181
1182
1183
1184 void
clear_stale_syms()1185 RuntimeOptimizer::clear_stale_syms ()
1186 {
1187 m_stale_syms.clear ();
1188 }
1189
1190
1191
1192 void
use_stale_sym(int sym)1193 RuntimeOptimizer::use_stale_sym (int sym)
1194 {
1195 FastIntMap::iterator i = m_stale_syms.find(sym);
1196 if (i != m_stale_syms.end())
1197 m_stale_syms.erase (i);
1198 }
1199
1200
1201
1202 bool
is_simple_assign(Opcode & op,const OpDescriptor * opd)1203 RuntimeOptimizer::is_simple_assign (Opcode &op, const OpDescriptor *opd)
1204 {
1205 // Simple only if arg0 is the only write, and is write only.
1206 if (op.argwrite_bits() != 1 || op.argread(0))
1207 return false;
1208 if (! opd)
1209 opd = shadingsys().op_descriptor (op.opname());
1210 if (!opd || !opd->simple_assign)
1211 return false; // reject all other known non-simple assignments
1212 // Make sure the result isn't also read
1213 int result = oparg(op,0);
1214 for (int i = 1, e = op.nargs(); i < e; ++i)
1215 if (oparg(op,i) == result)
1216 return false;
1217 return true;
1218 }
1219
1220
1221
1222 void
simple_sym_assign(int sym,int opnum)1223 RuntimeOptimizer::simple_sym_assign (int sym, int opnum)
1224 {
1225 if (optimize() >= 2 && m_opt_stale_assign) {
1226 FastIntMap::iterator i = m_stale_syms.find(sym);
1227 if (i != m_stale_syms.end()) {
1228 Opcode &uselessop (inst()->ops()[i->second]);
1229 if (uselessop.opname() != u_nop && uselessop.opname() != u_functioncall_nr)
1230 turn_into_nop (uselessop,
1231 debug() > 1 ? Strutil::sprintf("remove stale value assignment to %s, reassigned on op %d",
1232 opargsym(uselessop,0)->name(), opnum).c_str() : "");
1233 }
1234 }
1235 m_stale_syms[sym] = opnum;
1236 }
1237
1238
1239
1240 bool
unread_after(const Symbol * A,int opnum)1241 RuntimeOptimizer::unread_after (const Symbol *A, int opnum)
1242 {
1243 // Try to figure out if this symbol is completely unused after this
1244 // op (and thus, any values written to it now will never be needed).
1245
1246 // Globals may be read by later layers
1247 if (A->symtype() == SymTypeGlobal)
1248 return false;
1249
1250 // Params may be read afterwards if connected to a downstream
1251 // layer or if "elide_unconnected_outputs" is turned off.
1252 if (A->symtype() == SymTypeOutputParam || A->symtype() == SymTypeParam) {
1253 if (! m_opt_elide_unconnected_outputs)
1254 return false; // Asked not do do this optimization
1255 if (A->connected_down())
1256 return false; // Connected to something downstream
1257 if (A->renderer_output())
1258 return false; // This is a renderer output -- don't cull it
1259 }
1260
1261 // For all else, check if it's either never read at all in this
1262 // layer or it's only read earlier and we're not part of a loop
1263 return !A->everread() || (A->lastread() <= opnum && !m_in_loop[opnum]);
1264 }
1265
1266
1267
1268 void
replace_param_value(Symbol * R,const void * newdata,const TypeSpec & newdata_type)1269 RuntimeOptimizer::replace_param_value (Symbol *R, const void *newdata,
1270 const TypeSpec &newdata_type)
1271 {
1272 OSL_DASSERT (R->symtype() == SymTypeParam || R->symtype() == SymTypeOutputParam);
1273 TypeDesc Rtype = R->typespec().simpletype();
1274 OSL_DASSERT(R->dataoffset() >= 0);
1275 int Rnvals = int(Rtype.aggregate * Rtype.numelements());
1276 TypeDesc Ntype = newdata_type.simpletype();
1277 if (Ntype == TypeDesc::UNKNOWN)
1278 Ntype = Rtype;
1279 int Nnvals = int(Ntype.aggregate * Ntype.numelements());
1280 if (Rtype.basetype == TypeDesc::FLOAT &&
1281 Ntype.basetype == TypeDesc::FLOAT) {
1282 float *Rdefault = &inst()->m_fparams[R->dataoffset()];
1283 OSL_DASSERT((R->dataoffset()+Rnvals) <= (int)inst()->m_fparams.size());
1284 if (Rnvals == Nnvals) // straight copy
1285 for (int i = 0; i < Rnvals; ++i)
1286 Rdefault[i] = ((const float *)newdata)[i];
1287 else if (Nnvals == 1) // scalar -> aggregate, by replication
1288 for (int i = 0; i < Rnvals; ++i)
1289 Rdefault[i] = ((const float *)newdata)[0];
1290 else {
1291 OSL_ASSERT (0 && "replace_param_value: unexpected types");
1292 }
1293 }
1294 else if (Rtype.basetype == TypeDesc::FLOAT &&
1295 Ntype.basetype == TypeDesc::INT) {
1296 // Careful, this is an int-to-float conversion
1297 float *Rdefault = &inst()->m_fparams[R->dataoffset()];
1298 OSL_DASSERT((R->dataoffset()+Rnvals) <= (int)inst()->m_fparams.size());
1299 if (Rnvals == Nnvals) // straight copy
1300 for (int i = 0; i < Rnvals; ++i)
1301 Rdefault[i] = ((const int *)newdata)[i];
1302 else if (Nnvals == 1) // scalar -> aggregate, by replication
1303 for (int i = 0; i < Rnvals; ++i)
1304 Rdefault[i] = ((const int *)newdata)[0];
1305 else {
1306 OSL_ASSERT (0 && "replace_param_value: unexpected types");
1307 }
1308 }
1309 else if (Rtype.basetype == TypeDesc::INT &&
1310 Ntype.basetype == TypeDesc::INT && Rnvals == Nnvals) {
1311 int *Rdefault = &inst()->m_iparams[R->dataoffset()];
1312 OSL_DASSERT((R->dataoffset()+Rnvals) <= (int)inst()->m_iparams.size());
1313 for (int i = 0; i < Rnvals; ++i)
1314 Rdefault[i] = ((const int *)newdata)[i];
1315 }
1316 else if (Rtype.basetype == TypeDesc::STRING &&
1317 Ntype.basetype == TypeDesc::STRING && Rnvals == Nnvals) {
1318 ustring *Rdefault = &inst()->m_sparams[R->dataoffset()];
1319 OSL_DASSERT((R->dataoffset()+Rnvals) <= (int)inst()->m_sparams.size());
1320 for (int i = 0; i < Rnvals; ++i)
1321 Rdefault[i] = ((const ustring *)newdata)[i];
1322 } else {
1323 OSL_ASSERT (0 && "replace_param_value: unexpected types");
1324 }
1325 }
1326
1327
1328
1329 // Predicate to test if the connection's destination is never used
1330 struct ConnectionDestIs
1331 {
ConnectionDestIspvt::ConnectionDestIs1332 ConnectionDestIs (const ShaderInstance &inst, const Symbol *sym)
1333 : m_inst(inst), m_sym(sym) { }
operator ()pvt::ConnectionDestIs1334 bool operator() (const Connection &c) {
1335 return m_inst.symbol(c.dst.param) == m_sym;
1336 }
1337 private:
1338 const ShaderInstance &m_inst;
1339 const Symbol *m_sym;
1340 };
1341
1342
1343
1344 /// Symbol R in the current instance has a connection or init ops we
1345 /// no longer need; turn it into a a plain old instance-value
1346 /// parameter.
1347 void
make_param_use_instanceval(Symbol * R,string_view why)1348 RuntimeOptimizer::make_param_use_instanceval (Symbol *R, string_view why)
1349 {
1350 if (debug() > 1)
1351 std::cout << "Turning " << R->valuesourcename() << ' '
1352 << R->typespec() << ' ' << R->name()
1353 << " into an instance value "
1354 << why << "\n";
1355
1356 // Mark its source as the instance value, not connected
1357 R->valuesource (Symbol::InstanceVal);
1358 // If it isn't a connection or computed, it doesn't need derivs.
1359 R->has_derivs (false);
1360
1361 // Point the symbol's data pointer to its instance value
1362 // uniform
1363 void *Rdefault = NULL;
1364 OSL_DASSERT(R->dataoffset() >= 0);
1365 TypeDesc Rtype = R->typespec().simpletype();
1366 if (Rtype.basetype == TypeDesc::FLOAT)
1367 Rdefault = &inst()->m_fparams[R->dataoffset()];
1368 else if (Rtype.basetype == TypeDesc::INT)
1369 Rdefault = &inst()->m_iparams[R->dataoffset()];
1370 else if (Rtype.basetype == TypeDesc::STRING)
1371 Rdefault = &inst()->m_sparams[R->dataoffset()];
1372 OSL_DASSERT(Rdefault != NULL);
1373 R->data (Rdefault);
1374
1375 // Get rid of any init ops
1376 if (R->has_init_ops()) {
1377 turn_into_nop (R->initbegin(), R->initend(), "init ops not needed");
1378 R->initbegin (0);
1379 R->initend (0);
1380 }
1381 // Erase R's incoming connections
1382 erase_if (inst()->connections(), ConnectionDestIs(*inst(),R));
1383 }
1384
1385
1386
1387 /// Check for conditions under which assignments to output parameters
1388 /// can be removed.
1389 ///
1390 /// Return true if the assignment is removed entirely.
1391 bool
outparam_assign_elision(int opnum,Opcode & op)1392 RuntimeOptimizer::outparam_assign_elision (int opnum, Opcode &op)
1393 {
1394 OSL_DASSERT (op.opname() == u_assign);
1395 Symbol *R (inst()->argsymbol(op.firstarg()+0));
1396 Symbol *A (inst()->argsymbol(op.firstarg()+1));
1397
1398 if (R->symtype() != SymTypeOutputParam)
1399 return false; // This logic is only about output params
1400
1401 // Check for assignment of output params that are written only once
1402 // in the whole shader -- on this statement -- and assigned a
1403 // constant, and the assignment is unconditional. In that case,
1404 // just alias it to the constant from here on out.
1405 if (// R is being assigned a constant of the right type:
1406 A->is_constant() && R->typespec() == A->typespec()
1407 // FIXME -- can this be equivalent() rather than == ?
1408 // and it's written only on this op, and unconditionally:
1409 && R->firstwrite() == opnum && R->lastwrite() == opnum
1410 && !m_in_conditional[opnum]
1411 // and this is not a case of an init op for an output param that
1412 // actually will get an instance value or a connection:
1413 && ! ((R->valuesource() == Symbol::InstanceVal || R->connected())
1414 && R->initbegin() <= opnum && R->initend() > opnum)
1415 ) {
1416 // Alias it to the constant it's being assigned
1417 int cind = inst()->args()[op.firstarg()+1];
1418 global_alias (inst()->args()[op.firstarg()], cind);
1419 // If it's also never read before this assignment and isn't a
1420 // designated renderer output (which we obviously must write!), just
1421 // replace its default value entirely and get rid of the assignment.
1422 if (R->firstread() > opnum && ! R->renderer_output() &&
1423 m_opt_elide_unconnected_outputs) {
1424 make_param_use_instanceval (R, Strutil::sprintf("- written once, with a constant (%s), before any reads", const_value_as_string(*A)));
1425 replace_param_value (R, A->data(), A->typespec());
1426 turn_into_nop (op, debug() > 1 ? Strutil::sprintf("oparam %s never subsequently read or connected", R->name()).c_str() : "");
1427 return true;
1428 }
1429 }
1430
1431 // If the output param will neither be read later in the shader nor
1432 // connected to a downstream layer, then we don't really need this
1433 // assignment at all. Note that unread_after() does take into
1434 // consideration whether it's a renderer output.
1435 if (unread_after(R,opnum)) {
1436 turn_into_nop (op, debug() > 1 ? Strutil::sprintf("oparam %s never subsequently read or connected", R->name()).c_str() : "");
1437 return true;
1438 }
1439
1440 return false;
1441 }
1442
1443
1444
1445
1446 /// If every potentially-written argument to this op is NEVER read, turn
1447 /// it into a nop and return true. We don't do this to ops that have no
1448 /// written args at all, since they tend to have side effects (e.g.,
1449 /// printf, setmessage).
1450 bool
useless_op_elision(Opcode & op,int opnum)1451 RuntimeOptimizer::useless_op_elision (Opcode &op, int opnum)
1452 {
1453 if (op.nargs()) {
1454 bool writes_something = false;
1455 for (int a = 0; a < op.nargs(); ++a) {
1456 if (op.argwrite(a)) {
1457 writes_something = true;
1458 Symbol *A = opargsym (op, a);
1459 if (! unread_after(A,opnum))
1460 return false;
1461 }
1462 }
1463 // If we get this far, nothing written had any effect
1464 if (writes_something) {
1465 // Enumerate exceptions -- ops that write something, but have
1466 // side effects that means they shouldn't be eliminated.
1467 if (op.opname() == u_pointcloud_write)
1468 return false;
1469 // It's a useless op, eliminate it
1470 turn_into_nop (op, "eliminated op whose writes will never be read");
1471 return true;
1472 }
1473 }
1474 return false;
1475 }
1476
1477
1478
1479 int
dealias_symbol(int symindex,int opnum)1480 RuntimeOptimizer::dealias_symbol (int symindex, int opnum)
1481 {
1482 do {
1483 int i = block_alias (symindex);
1484 if (i >= 0) {
1485 // block-specific alias for the sym
1486 symindex = i;
1487 continue;
1488 }
1489 FastIntMap::const_iterator found;
1490 found = m_symbol_aliases.find (symindex);
1491 if (found != m_symbol_aliases.end()) {
1492 // permanent alias for the sym
1493 symindex = found->second;
1494 continue;
1495 }
1496 if (inst()->symbol(symindex)->symtype() == SymTypeParam &&
1497 opnum >= inst()->maincodebegin()) {
1498 // Only check parameter aliases for main code
1499 found = m_param_aliases.find (symindex);
1500 if (found != m_param_aliases.end()) {
1501 symindex = found->second;
1502 continue;
1503 }
1504 }
1505 } while (0);
1506 return symindex;
1507 }
1508
1509
1510
1511 void
block_unalias(int symindex)1512 RuntimeOptimizer::block_unalias (int symindex)
1513 {
1514 FastIntMap::iterator i = m_block_aliases.find (symindex);
1515 if (i != m_block_aliases.end())
1516 i->second = -1;
1517 // In addition to the current block_aliases, unalias from any
1518 // saved alias lists.
1519 for (auto& ba : m_block_aliases_stack) {
1520 FastIntMap::iterator i = ba->find (symindex);
1521 if (i != ba->end())
1522 i->second = -1;
1523 }
1524 }
1525
1526
1527
1528 /// Make sure there's room for at least one more symbol, so that we can
1529 /// add a const if we need to, without worrying about the addresses of
1530 /// symbols changing if we add a new one soon.
1531 void
make_symbol_room(int howmany)1532 RuntimeOptimizer::make_symbol_room (int howmany)
1533 {
1534 inst()->make_symbol_room (howmany);
1535 }
1536
1537
1538
1539
1540 // Predicate to test if a symbol (specified by symbol index, symbol
1541 // pointer, or by the inbound Connection record) is never used within
1542 // the shader or passed along. Subtlety: you can't base the test for
1543 // params on sym->everused(), since of course it may be used within its
1544 // own init ops, but then never subsequently used, and thus be a prime
1545 // candidate for culling. Instead, for params we test whether it was
1546 // used at any point AFTER its init ops.
1547 class SymNeverUsed
1548 {
1549 public:
SymNeverUsed(const RuntimeOptimizer & rop,const ShaderInstance * inst)1550 SymNeverUsed (const RuntimeOptimizer &rop, const ShaderInstance *inst)
1551 : m_rop(rop), m_inst(inst)
1552 { }
operator ()(const Symbol & sym) const1553 bool operator() (const Symbol &sym) const {
1554 if (sym.symtype() == SymTypeParam)
1555 return (sym.lastuse() < sym.initend()) && !sym.connected_down();
1556 if (sym.symtype() == SymTypeOutputParam) {
1557 if (! m_rop.opt_elide_unconnected_outputs())
1558 return false; // Asked not to do this optimization
1559 if (sym.connected_down())
1560 return false; // Connected to something downstream
1561 if (sym.renderer_output())
1562 return false; // This is a renderer output
1563 return (sym.lastuse() < sym.initend());
1564 }
1565 return ! sym.everused(); // all other symbol types
1566 }
operator ()(int symid) const1567 bool operator() (int symid) const {
1568 return (*this)(*m_inst->symbol(symid));
1569 }
operator ()(const Connection & c) const1570 bool operator() (const Connection &c) const {
1571 return (*this)(c.dst.param);
1572 }
1573 private:
1574 const RuntimeOptimizer &m_rop;
1575 const ShaderInstance *m_inst;
1576 };
1577
1578
1579
1580 int
next_block_instruction(int opnum)1581 RuntimeOptimizer::next_block_instruction (int opnum)
1582 {
1583 int end = (int)inst()->ops().size();
1584 for (int n = opnum+1; n < end && m_bblockids[n] == m_bblockids[opnum]; ++n)
1585 if (inst()->ops()[n].opname() != u_nop && inst()->ops()[n].opname() != u_functioncall_nr)
1586 return n; // Found it!
1587 return 0; // End of ops or end of basic block
1588 }
1589
1590
1591
1592 int
peephole2(int opnum,int op2num)1593 RuntimeOptimizer::peephole2 (int opnum, int op2num)
1594 {
1595 Opcode &op (inst()->ops()[opnum]);
1596 Opcode &next (inst()->ops()[op2num]);
1597
1598 // N.B. Some of these transformations may look strange, you may
1599 // think "nobody will write code that does that", but (a) they do;
1600 // and (b) it can end up like that after other optimizations have
1601 // changed the code around.
1602
1603 // Ping-pong assignments can eliminate the second one:
1604 // assign a b
1605 // assign b a <-- turn into nop
1606 // But note that if a is an int and b is a float, this transformation
1607 // is not safe because of the intentional truncation.
1608 if (op.opname() == u_assign && next.opname() == u_assign) {
1609 Symbol *a = opargsym(op,0);
1610 Symbol *b = opargsym(op,1);
1611 Symbol *c = opargsym(next,0);
1612 Symbol *d = opargsym(next,1);
1613 if (a == d && b == c) {
1614 // Exclude the integer truncation case
1615 if (! (a->typespec().is_int() && b->typespec().is_float_based())) {
1616 // std::cerr << "ping-pong assignment " << opnum << " of "
1617 // << opargsym(op,0)->mangled() << " and "
1618 // << opargsym(op,1)->mangled() << "\n";
1619 turn_into_nop (next, "ping-pong assignments");
1620 return 1;
1621 }
1622 }
1623 }
1624
1625 // Daisy chain assignments -> use common source
1626 // assign a b
1627 // assign c a
1628 // turns into:
1629 // assign a b
1630 // assign c b
1631 // This may allow a to be eliminated if it's not used elsewhere.
1632 // But note that this doesn't work for float = int = float,
1633 // which intentionally truncates before the assignment to c!
1634 if (op.opname() == u_assign && next.opname() == u_assign) {
1635 Symbol *a = opargsym(op,0);
1636 Symbol *b = opargsym(op,1);
1637 Symbol *c = opargsym(next,0);
1638 Symbol *d = opargsym(next,1);
1639 if (a == d && assignable (c->typespec(), b->typespec())) {
1640 // Exclude the float=int=float case
1641 if (! (a->typespec().is_int() && b->typespec().is_float_based() &&
1642 c->typespec().is_float_based() && !c->typespec().is_array())) {
1643 turn_into_assign (next, inst()->arg(op.firstarg()+1),
1644 "daisy-chain assignments");
1645 return 1;
1646 }
1647 }
1648 }
1649
1650 // Look for adjacent add and subtract of the same value:
1651 // add a a b
1652 // sub a a b
1653 // (or vice versa)
1654 if (((op.opname() == u_add && next.opname() == u_sub) ||
1655 (op.opname() == u_sub && next.opname() == u_add)) &&
1656 opargsym(op,0) == opargsym(next,0) &&
1657 opargsym(op,1) == opargsym(next,1) &&
1658 opargsym(op,2) == opargsym(next,2) &&
1659 opargsym(op,0) == opargsym(op,1)) {
1660 // std::cerr << "dueling add/sub " << opnum << " & " << op2num << ": "
1661 // << opargsym(op,0)->mangled() << "\n";
1662 turn_into_nop (op, "simplify add/sub pair");
1663 turn_into_nop (next, "simplify add/sub pair");
1664 return 2;
1665 }
1666
1667 // Look for add of a value then subtract of the same value
1668 // add a b c or: sub a b c
1669 // sub d a c add d a c
1670 // the second instruction should be changed to
1671 // assign d b
1672 // and furthermore, if the only use of a is on these two lines or
1673 // if a == d, then the first instruction can be changed to a 'nop'.
1674 // Careful, "only used on these two lines" can be tricky if 'a' is a
1675 // global or output parameter, which are used after the shader finishes!
1676 if (((op.opname() == u_add && next.opname() == u_sub) ||
1677 (op.opname() == u_sub && next.opname() == u_add)) &&
1678 opargsym(op,0) == opargsym(next,1) &&
1679 opargsym(op,2) == opargsym(next,2) &&
1680 opargsym(op,0) != opargsym(next,2) /* a != c */) {
1681 Symbol *a = opargsym(op,0);
1682 Symbol *d = opargsym(next,0);
1683 turn_into_assign (next, oparg(op,1)/*b*/, "simplify add/sub pair");
1684 if ((a->firstuse() >= opnum && a->lastuse() <= op2num &&
1685 ((a->symtype() != SymTypeGlobal && a->symtype() != SymTypeOutputParam)))
1686 || a == d) {
1687 turn_into_nop (op, "simplify add/sub pair");
1688 return 2;
1689 }
1690 else
1691 return 1;
1692 }
1693
1694 // Look for simple functions followed by an assignment:
1695 // OP a b...
1696 // assign c a
1697 // If OP is "simple" (completely overwrites its first argument, only
1698 // reads the rest), and a and c are the same type, and a is never
1699 // used again, then we can replace those two instructions with:
1700 // OP c b...
1701 // Careful, "never used again" can be tricky if 'a' is a global or
1702 // output parameter, which are used after the shader finishes!
1703 if (next.opname() == u_assign &&
1704 op.nargs() >= 1 && opargsym(op,0) == opargsym(next,1) &&
1705 is_simple_assign(op)) {
1706 Symbol *a = opargsym(op,0);
1707 Symbol *c = opargsym(next,0);
1708 if (a->firstuse() >= opnum && a->lastuse() <= op2num &&
1709 (a->symtype() != SymTypeGlobal && a->symtype() != SymTypeOutputParam) &&
1710 equivalent (a->typespec(), c->typespec())) {
1711 if (debug() > 1)
1712 debug_opt_ops (opnum, opnum+1,
1713 Strutil::sprintf ("turned '%s %s...' to '%s %s...' as part of daisy-chain",
1714 op.opname(), a->name(), op.opname(), c->name()));
1715 inst()->args()[op.firstarg()] = inst()->args()[next.firstarg()];
1716 c->mark_rw (opnum, false, true);
1717 // Any time we write to a variable that wasn't written to at
1718 // this op previously, we need to block_unalias it, or it
1719 // can dealias to the wrong thing when examining subsequent
1720 // instructions.
1721 block_unalias (oparg(op,0)); // clear any aliases
1722 turn_into_nop (next, "daisy-chain op and assignment");
1723 return 2;
1724 }
1725 }
1726
1727 // Convert this combination
1728 // closure A name arg...
1729 // mul B A weight
1730 // into
1731 // closure B C name arg...
1732 // That is, collapse a creation and immediate scale of a closure into
1733 // a single closure-with-scale constructor. (Valid if A is not used
1734 // elsewhere.) Further refinement: if weight = 1, no need to do
1735 // the scale, and if weight == 0, eliminate the work entirely.
1736 // We only do this optimization on pass > 1, to give a fair chance
1737 // for other optimizations to be able to turn the weight into a
1738 // constant before we do this one (since if it's 1 or 0, we can
1739 // simplify further).
1740 if (op.opname() == u_closure && next.opname() == u_mul
1741 && optimization_pass() > 1) {
1742 Symbol *a = opargsym(op,0);
1743 Symbol *name = opargsym(op,1);
1744 Symbol *aa = opargsym(next,1);
1745 Symbol *weight = opargsym(next,2);
1746 int weightarg = 2;
1747 if (weight->typespec().is_closure()) { // opposite order
1748 std::swap (aa, weight);
1749 weightarg = 1;
1750 }
1751 if (name->typespec().is_string() &&
1752 a->firstuse() >= opnum && a->lastuse() <= op2num &&
1753 a == aa && weight->typespec().is_triple()) {
1754 if (is_zero(*weight)) {
1755 turn_into_nop (op, "zero-weighted closure");
1756 turn_into_assign (next, add_constant(0.0f),
1757 "zero-weighted closure");
1758 return 1;
1759 }
1760 // FIXME - handle weight being a float as well
1761 std::vector<int> newargs;
1762 newargs.push_back (oparg(next,0)); // B
1763 if (! is_one(*weight))
1764 newargs.push_back (oparg(next,weightarg)); // weight
1765 for (int i = 1; i < op.nargs(); ++i)
1766 newargs.push_back (oparg(op,i));
1767 turn_into_nop (op, "combine closure+mul");
1768 turn_into_nop (next, "combine closure+mul");
1769 insert_code (opnum, u_closure, newargs,
1770 RecomputeRWRanges, GroupWithNext);
1771 if (debug() > 1)
1772 std::cout << "op " << opnum << "-" << (op2num)
1773 << " combined closure+mul\n";
1774 return 1;
1775 }
1776 }
1777
1778 // No changes
1779 return 0;
1780 }
1781
1782
1783
1784 /// Mark our params that feed to later layers, and whether we have any
1785 /// outgoing connections.
1786 void
mark_outgoing_connections()1787 RuntimeOptimizer::mark_outgoing_connections ()
1788 {
1789 OSL_ASSERT (! inst()->m_instoverrides.size() &&
1790 "don't call this before copy_code_from_master");
1791 inst()->outgoing_connections (false);
1792 FOREACH_PARAM (auto&& s, inst())
1793 s.connected_down (false);
1794 for (int lay = layer()+1; lay < group().nlayers(); ++lay) {
1795 for (auto&& c : group()[lay]->m_connections)
1796 if (c.srclayer == layer()) {
1797 inst()->symbol(c.src.param)->connected_down (true);
1798 inst()->outgoing_connections (true);
1799 }
1800 }
1801 }
1802
1803
1804
1805 /// Check all params and output params to find any that are neither used
1806 /// in the shader (aside from their own init ops, which shouldn't count)
1807 /// nor connected to downstream layers, and for those, remove their init
1808 /// ops and connections.
1809 /// Precondition: mark_outgoing_connections should be up to date.
1810 int
remove_unused_params()1811 RuntimeOptimizer::remove_unused_params ()
1812 {
1813 int alterations = 0;
1814 SymNeverUsed param_never_used (*this, inst()); // handy predicate
1815
1816 // Get rid of unused params' init ops and clear their read/write ranges
1817 FOREACH_PARAM (auto&& s, inst()) {
1818 if (param_never_used(s) && s.has_init_ops()) {
1819 std::string why;
1820 if (debug() > 1)
1821 why = Strutil::sprintf ("remove init ops of unused param %s %s", s.typespec(), s.name());
1822 turn_into_nop (s.initbegin(), s.initend(), why);
1823 s.set_initrange (0, 0);
1824 s.clear_rw(); // mark as totally unused
1825 ++alterations;
1826 }
1827 }
1828
1829 // Get rid of the Connections themselves
1830 if (debug() > 1) {
1831 for (auto&& c : inst()->connections()) {
1832 if (param_never_used(c)) {
1833 debug_optf(" Connection no longer needed: %s %s\n",
1834 group()[c.srclayer]->layername(),
1835 c.str(group(), inst()));
1836 }
1837 }
1838 }
1839 erase_if (inst()->connections(), param_never_used);
1840
1841 return alterations;
1842 }
1843
1844
1845
1846 void
catalog_symbol_writes(int opbegin,int opend,FastIntSet & syms)1847 RuntimeOptimizer::catalog_symbol_writes (int opbegin, int opend,
1848 FastIntSet &syms)
1849 {
1850 for (int i = opbegin; i < opend; ++i) {
1851 const Opcode &op (inst()->ops()[i]);
1852 for (int a = 0, nargs = op.nargs(); a < nargs; ++a) {
1853 if (op.argwrite(a))
1854 syms.insert (oparg (op, a));
1855 }
1856 }
1857 }
1858
1859
1860
1861 /// Find situations where an output is simply a copy of a connected
1862 /// input, and eliminate the middleman.
1863 int
eliminate_middleman()1864 RuntimeOptimizer::eliminate_middleman ()
1865 {
1866 int changed = 0;
1867 FOREACH_PARAM (auto&& s, inst()) {
1868 // Skip if this isn't a shader output parameter that's connected
1869 // to a later layer.
1870 if (s.symtype() != SymTypeOutputParam || !s.connected_down())
1871 continue;
1872 // If it's written more than once, or has init ops, don't bother
1873 if (s.firstwrite() != s.lastwrite() || s.has_init_ops())
1874 continue;
1875 // Ok, s is a connected output, written only once, without init ops.
1876
1877 // If the one time it's written isn't a simple assignment, never mind
1878 int opnum = s.firstwrite();
1879 Opcode &op (inst()->ops()[opnum]);
1880 if (op.opname() != u_assign)
1881 continue; // only consider direct assignments
1882 // Now what's it assigned from? If it's not a connected
1883 // parameter, or if it's not an equivalent data type, or if it's
1884 // a closure, never mind.
1885 int src_index = oparg (op, 1);
1886 Symbol *src = opargsym (op, 1);
1887
1888 if (! (src->symtype() == SymTypeParam && src->connected()) ||
1889 ! equivalent(src->typespec(), s.typespec()) ||
1890 s.typespec().is_closure())
1891 continue;
1892
1893 // Only works if the assignment is unconditional. Needs to not
1894 // be in a conditional or loop, and not have any exit or return
1895 // statement before the assignment.
1896 if (! op_is_unconditionally_executed (opnum))
1897 continue;
1898
1899 // OK, output param 's' is simply and unconditionally assigned
1900 // the value of the equivalently-typed input parameter 'src'.
1901 // Doctor downstream shaders that use s to connect directly to
1902 // src.
1903
1904 // First, find what src is connected to.
1905 int upstream_layer = -1, upstream_symbol = -1;
1906 for (int i = 0, e = inst()->nconnections(); i < e; ++i) {
1907 const Connection &c = inst()->connection(i);
1908 if (c.dst.param == src_index && // the connection we want
1909 c.src.is_complete() && c.dst.is_complete() &&
1910 equivalent(c.src.type,c.dst.type) &&
1911 !c.src.type.is_closure() && ! c.dst.type.is_closure()) {
1912 upstream_layer = c.srclayer;
1913 upstream_symbol = c.src.param;
1914 break;
1915 }
1916 }
1917 if (upstream_layer < 0 || upstream_symbol < 0)
1918 continue; // not a complete connection, forget it
1919
1920 ShaderInstance *upinst = group()[upstream_layer];
1921 if (debug() > 1)
1922 std::cout << "Noticing that " << inst()->layername() << "."
1923 << s.name() << " merely copied from " << src->name()
1924 << ", connected from " << upinst->layername() << "."
1925 << upinst->symbol(upstream_symbol)->name() << "\n";
1926
1927 // Find all the downstream connections of s, make them
1928 // connections to src.
1929 int s_index = inst()->symbolindex(&s);
1930 for (int laynum = layer()+1; laynum < group().nlayers(); ++laynum) {
1931 ShaderInstance *downinst = group()[laynum];
1932 for (int i = 0, e = downinst->nconnections(); i < e; ++i) {
1933 Connection &c = downinst->connections()[i];
1934 if (c.srclayer == layer() && // connected to our layer
1935 c.src.param == s_index && // connected to s
1936 c.src.is_complete() && c.dst.is_complete() &&
1937 equivalent(c.src.type,c.dst.type)) {
1938 // just change the connection's referrant to the
1939 // upstream source of s.
1940 c.srclayer = upstream_layer;
1941 c.src.param = upstream_symbol;
1942 ++changed;
1943 shadingsys().m_stat_middlemen_eliminated += 1;
1944 if (debug() > 1) {
1945 const Symbol *dsym = downinst->symbol(c.dst.param);
1946 if (! dsym)
1947 dsym = downinst->mastersymbol(c.dst.param);
1948 const Symbol *usym = upinst->symbol(upstream_symbol);
1949 if (! usym)
1950 usym = upinst->mastersymbol(upstream_symbol);
1951 OSL_DASSERT (dsym && usym);
1952 std::cout << "Removed " << inst()->layername() << "."
1953 << s.name() << " middleman for "
1954 << downinst->layername() << "."
1955 << dsym->name() << ", now connected to "
1956 << upinst->layername() << "."
1957 << usym->name() << "\n";
1958 }
1959 }
1960 }
1961 }
1962 }
1963 return changed;
1964 }
1965
1966
1967
1968 int
optimize_assignment(Opcode & op,int opnum)1969 RuntimeOptimizer::optimize_assignment (Opcode &op, int opnum)
1970 {
1971 // Various optimizations specific to assignment statements
1972 OSL_DASSERT (op.opname() == u_assign);
1973 int changed = 0;
1974 Symbol *R (inst()->argsymbol(op.firstarg()+0));
1975 Symbol *A (inst()->argsymbol(op.firstarg()+1));
1976 bool R_local_or_tmp = (R->symtype() == SymTypeLocal ||
1977 R->symtype() == SymTypeTemp);
1978 if (block_alias(inst()->arg(op.firstarg())) == inst()->arg(op.firstarg()+1) ||
1979 block_alias(inst()->arg(op.firstarg()+1)) == inst()->arg(op.firstarg())) {
1980 // We're re-assigning something already aliased, skip it
1981 turn_into_nop (op, "reassignment of current value (2)");
1982 return ++changed;
1983 }
1984 if (coerce_assigned_constant (op)) {
1985 // A may have changed, so we need to reset it
1986 A = inst()->argsymbol(op.firstarg()+1);
1987 ++changed;
1988 }
1989 // NOW do assignment constant folding, only after we
1990 // have performed all the other transformations that may
1991 // turn this op into an assignment.
1992 changed += constfold_assign (*this, opnum);
1993 if (op.opname() != u_assign) {
1994 // The const fold has changed the assignment to something
1995 // other than assign (presumably nop), so skip the other
1996 // assignment transformations below.
1997 return 0;
1998 }
1999 if ((A->is_constant() || A->lastwrite() < opnum) &&
2000 equivalent(R->typespec(), A->typespec())) {
2001 // Safe to alias R to A for this block, if A is a
2002 // constant or if it's never written to again.
2003 block_alias (inst()->arg(op.firstarg()),
2004 inst()->arg(op.firstarg()+1));
2005 // std::cerr << opnum << " aliasing " << R->mangled() << " to "
2006 // << inst()->argsymbol(op.firstarg()+1)->mangled() << "\n";
2007 }
2008 if (A->is_constant() && R->typespec() == A->typespec() &&
2009 R_local_or_tmp &&
2010 R->firstwrite() == opnum && R->lastwrite() == opnum) {
2011 // This local or temp is written only once in the
2012 // whole shader -- on this statement -- and it's
2013 // assigned a constant. So just alias it to the
2014 // constant.
2015 int cind = inst()->args()[op.firstarg()+1];
2016 global_alias (inst()->args()[op.firstarg()], cind);
2017 turn_into_nop (op, "replace symbol with constant");
2018 return ++changed;
2019 }
2020 if (R_local_or_tmp && ! R->everread()) {
2021 // This local is written but NEVER READ. nop it.
2022 turn_into_nop (op, "local/tmp never read");
2023 return ++changed;
2024 }
2025 if (outparam_assign_elision (opnum, op)) {
2026 return ++changed;
2027 }
2028 if (R == A) {
2029 // Just an assignment to itself -- turn into NOP!
2030 turn_into_nop (op, "self-assignment");
2031 return ++changed;
2032 } else if (R_local_or_tmp && R->lastread() < opnum
2033 && ! m_in_loop[opnum]) {
2034 // Don't bother assigning if we never read it again
2035 turn_into_nop (op, "symbol never read again");
2036 return ++changed;
2037 }
2038 return changed;
2039 }
2040
2041
2042
2043 void
copy_block_aliases(const FastIntMap & old_block_aliases,FastIntMap & new_block_aliases,const FastIntSet * excluded,bool copy_temps)2044 RuntimeOptimizer::copy_block_aliases (const FastIntMap &old_block_aliases,
2045 FastIntMap &new_block_aliases,
2046 const FastIntSet *excluded,
2047 bool copy_temps)
2048 {
2049 OSL_ASSERT (&old_block_aliases != &new_block_aliases &&
2050 "copy_block_aliases does not work in-place");
2051 // Find all symbols written anywhere in the instruction range
2052 new_block_aliases.clear ();
2053 new_block_aliases.reserve (old_block_aliases.size());
2054 for (auto&& oba : old_block_aliases) {
2055 if (oba.second < 0)
2056 continue; // erased alias -- don't copy
2057 if (! copy_temps && (inst()->symbol(oba.first)->is_temp() ||
2058 inst()->symbol(oba.second)->is_temp()))
2059 continue; // don't copy temp aliases unless told to
2060 if (excluded && (excluded->find(oba.first) != excluded->end() ||
2061 excluded->find(oba.second) != excluded->end()))
2062 continue; // don't copy from excluded list
2063 new_block_aliases[oba.first] = oba.second;
2064 }
2065 }
2066
2067
2068
2069 int
optimize_ops(int beginop,int endop,FastIntMap * seed_block_aliases)2070 RuntimeOptimizer::optimize_ops (int beginop, int endop,
2071 FastIntMap *seed_block_aliases)
2072 {
2073 if (beginop >= endop)
2074 return 0;
2075
2076 // Constant aliases valid for just this basic block
2077 clear_block_aliases ();
2078
2079 // Provide a place where, if we recurse, we can save prior block
2080 // aliases. Register them on the block_aliases_stack so that calls to
2081 // block_unalias() will unalias from there, too.
2082 FastIntMap saved_block_aliases;
2083 m_block_aliases_stack.push_back (&saved_block_aliases);
2084
2085 int lastblock = -1;
2086 int skipops = 0; // extra inserted ops to skip over
2087 int changed = 0;
2088 size_t num_ops = inst()->ops().size();
2089 size_t old_num_ops = num_ops; // track when it changes
2090 for (int opnum = beginop; opnum < endop; opnum += 1) {
2091 OSL_DASSERT (old_num_ops == num_ops); // better not happen unknowingly
2092 OSL_DASSERT(num_ops == inst()->ops().size());
2093 OSL_DASSERT(size_t(opnum) < inst()->ops().size());
2094 if (m_stop_optimizing)
2095 break;
2096 Opcode *op = &inst()->ops()[opnum];
2097 if (skipops) {
2098 // If a previous optimization inserted ops and told us
2099 // to skip over the new ones, we still need to unalias
2100 // any symbols written by this op, but otherwise skip
2101 // all subsequent optimizations until we run down the
2102 // skipops counter.
2103 block_unalias_written_args (*op);
2104 OSL_ASSERT (lastblock == m_bblockids[opnum] &&
2105 "this should not be a new basic block");
2106 --skipops;
2107 continue; // Move along to the next op, no optimization here
2108 }
2109 // Things to do if we've just moved to a new basic block
2110 if (lastblock != m_bblockids[opnum]) {
2111 clear_block_aliases (seed_block_aliases);
2112 seed_block_aliases = NULL; // only the first time
2113 clear_stale_syms ();
2114 lastblock = m_bblockids[opnum];
2115 }
2116 // Things to do at the start of main code:
2117 // * Alias output params to their initial values, if known.
2118 if (opnum == inst()->m_maincodebegin) {
2119 for (int i = inst()->firstparam(); i < inst()->lastparam(); ++i) {
2120 Symbol *s (inst()->symbol(i));
2121 if (s->symtype() == SymTypeOutputParam && s->lockgeom() &&
2122 (s->valuesource() == Symbol::DefaultVal ||
2123 s->valuesource() == Symbol::InstanceVal) &&
2124 ! s->has_init_ops() &&
2125 ! s->typespec().is_closure_based() &&
2126 ! s->typespec().is_structure_based()) {
2127 make_symbol_room (1); // Make sure add_constant is ok
2128 s = inst()->symbol(i);
2129 int cind = add_constant (s->typespec(), s->data());
2130 block_alias (i, cind); // Alias this symbol to the new const
2131 }
2132 }
2133 }
2134 // Nothing below here to do for no-ops, take early out.
2135 if (op->opname() == u_nop || op->opname() == u_functioncall_nr)
2136 continue;
2137 // De-alias the readable args to the op and figure out if
2138 // there are any constants involved.
2139 for (int i = 0, e = op->nargs(); i < e; ++i) {
2140 if (! op->argwrite(i)) { // Don't de-alias args that are written
2141 int argindex = op->firstarg() + i;
2142 int argsymindex = dealias_symbol (inst()->arg(argindex), opnum);
2143 inst()->args()[argindex] = argsymindex;
2144 }
2145 if (op->argread(i))
2146 use_stale_sym (oparg(*op,i));
2147 }
2148
2149 const OpDescriptor *opd = shadingsys().op_descriptor (op->opname());
2150 // If it's a simple assignment and the lvalue is "stale", go
2151 // back and eliminate its last assignment.
2152 if (is_simple_assign(*op, opd))
2153 simple_sym_assign (oparg (*op, 0), opnum);
2154 // Make sure there's room for several more symbols, so that we
2155 // can add a few consts if we need to, without worrying about
2156 // the addresses of symbols changing when we add a new one below.
2157 make_symbol_room (max_new_consts_per_fold);
2158 // For various ops that we know how to effectively
2159 // constant-fold, dispatch to the appropriate routine.
2160 if (optimize() >= 2 && m_opt_constant_fold) {
2161 if (opd && opd->folder) {
2162 int c = (*opd->folder) (*this, opnum);
2163 if (c) {
2164 changed += c;
2165 // Re-check num_ops in case the folder inserted something
2166 num_ops = inst()->ops().size();
2167 skipops = num_ops - old_num_ops;
2168 endop += num_ops - old_num_ops; // adjust how far we loop
2169 old_num_ops = num_ops;
2170 op = &inst()->ops()[opnum]; // in case ops resized
2171 }
2172 }
2173 }
2174 // Clear local block aliases for any args that were written
2175 // by this op
2176 block_unalias_written_args (*op);
2177
2178 // Now we handle assignments.
2179 if (optimize() >= 2 && op->opname() == u_assign && m_opt_assign)
2180 changed += optimize_assignment (*op, opnum);
2181 if (optimize() >= 2 && m_opt_elide_useless_ops && opd
2182 && !(opd->flags & OpDescriptor::SideEffects))
2183 changed += useless_op_elision (*op, opnum);
2184 if (m_stop_optimizing)
2185 break;
2186 // Peephole optimization involving pair of instructions (the second
2187 // instruction will be in the same basic block.
2188 if (optimize() >= 2 && m_opt_peephole && op->opname() != u_nop && op->opname() != u_functioncall_nr) {
2189 // Find the next instruction in the same basic block
2190 int op2num = next_block_instruction (opnum);
2191 if (op2num) {
2192 int c = peephole2 (opnum, op2num);
2193 if (c) {
2194 changed += c;
2195 // Re-check num_ops in case the folder inserted something
2196 num_ops = inst()->ops().size();
2197 // skipops = num_ops - old_num_ops;
2198 endop += num_ops - old_num_ops; // adjust how far we loop
2199 old_num_ops = num_ops;
2200 op = &inst()->ops()[opnum]; // in case ops resized
2201 }
2202 }
2203 }
2204
2205 // Special cases for "if", "functioncall", and loops: Optimize the
2206 // sequences of instructions in the bodies recursively in a way that
2207 // allows us to be clever about the basic block alias tracking.
2208 ustring opname = op->opname();
2209 if ((opname == u_if || opname == u_functioncall ||
2210 opname == u_for || opname == u_while || opname == u_dowhile)
2211 && shadingsys().m_opt_seed_bblock_aliases) {
2212 // Find all symbols written anywhere in the instruction range
2213 // of the bodies.
2214 FastIntSet symwrites;
2215 catalog_symbol_writes (opnum+1, op->farthest_jump(), symwrites);
2216 // Save the aliases from the basic block we are exiting.
2217 // If & function call: save all prior aliases.
2218 // Loops: dont save aliases involving syms written in the loop.
2219 // Note that for both cases, we don't copy aliases involving
2220 // temps, because that breaks our later assumptions (for temp
2221 // coalescing) that temp uses never cross basic block boundaries.
2222 if (opname == u_if || opname == u_functioncall)
2223 copy_block_aliases (m_block_aliases, saved_block_aliases);
2224 else
2225 copy_block_aliases (m_block_aliases, saved_block_aliases,
2226 &symwrites);
2227 // 'if' has 2 blocks (then, else), function call has just
2228 // one (the body), loops have 4 (init, cond, body, incr),
2229 int njumps = (opname == u_if) ? 2 : (opname == u_functioncall ? 1 : 4);
2230 // Recursively optimize each body block.
2231 for (int j = 0; j < njumps; ++j) {
2232 changed += optimize_ops (j==0 ? opnum+1 : op->jump(j-1),
2233 op->jump(j), &saved_block_aliases);
2234 op = &inst()->ops()[opnum]; // in case ops resized
2235 }
2236 // Adjust optimization loop end if any instructions were added
2237 num_ops = inst()->ops().size();
2238 endop += num_ops - old_num_ops;
2239 old_num_ops = num_ops;
2240 // Now we can restore the original aliases to seed the basic
2241 // block that follows. For if/function, we need to remove all
2242 // aliases referencing syms written within the conditional or
2243 // function body. For loops, recall that we already excluded
2244 // the written syms from the saved_block_aliases.
2245 if (opname == u_if || opname == u_functioncall) {
2246 FastIntMap restored_aliases;
2247 restored_aliases.swap (saved_block_aliases);
2248 // catalog again, in case optimizations in those blocks
2249 // caused writes that weren't apparent before.
2250 catalog_symbol_writes (opnum+1, op->farthest_jump(), symwrites);
2251 copy_block_aliases (restored_aliases, saved_block_aliases,
2252 &symwrites);
2253 }
2254 seed_block_aliases = &saved_block_aliases;
2255 // Get ready to increment to the next instruction
2256 opnum = op->farthest_jump() - 1;
2257 }
2258 }
2259 m_block_aliases_stack.pop_back(); // Done with saved_block_aliases
2260 return changed;
2261 }
2262
2263
2264
2265 void
optimize_instance()2266 RuntimeOptimizer::optimize_instance ()
2267 {
2268 // If "opt_layername" attribute is set, only optimize the named layer
2269 if (!shadingsys().m_opt_layername.empty() &&
2270 shadingsys().m_opt_layername != inst()->layername())
2271 return;
2272
2273 // Make a list of the indices of all constants.
2274 for (int i = 0, e = (int)inst()->symbols().size(); i < e; ++i)
2275 if (inst()->symbol(i)->symtype() == SymTypeConst)
2276 m_all_consts.push_back (i);
2277
2278 // Turn all parameters with instance or default values, and which
2279 // cannot be overridden by geometry values, into constants or
2280 // aliases for globals. Also turn connections from earlier layers'
2281 // outputs that are known to be constants or globals into constants
2282 // or global aliases without any connection.
2283 if (optimize() >= 2 && m_opt_simplify_param) {
2284 simplify_params ();
2285 }
2286
2287 #ifndef NDEBUG
2288 // Confirm that the symbols between [firstparam,lastparam] are all
2289 // input or output params.
2290 FOREACH_PARAM (const Symbol &s, inst()) {
2291 OSL_DASSERT (s.symtype() == SymTypeParam ||
2292 s.symtype() == SymTypeOutputParam);
2293 }
2294 #endif
2295
2296 // Recompute which of our params have downstream connections.
2297 mark_outgoing_connections ();
2298
2299 // Try to fold constants. We take several passes, until we get to
2300 // the point that not much is improving. It rarely goes beyond 3-4
2301 // passes, but we have a hard cutoff just to be sure we don't
2302 // ever get into an infinite loop from an unforseen cycle where we
2303 // end up inadvertently transforming A => B => A => etc.
2304 int totalchanged = 0;
2305 int reallydone = 0; // Force a few passes after we think we're done
2306 int npasses = shadingsys().opt_passes();
2307 for (m_pass = 0; m_pass < npasses; ++m_pass) {
2308
2309 // Once we've made one pass (and therefore called
2310 // mark_outgoing_connections), we may notice that the layer is
2311 // unused, and therefore can stop doing work to optimize it.
2312 if (m_pass != 0 && inst()->unused())
2313 break;
2314
2315 if (m_stop_optimizing)
2316 break;
2317
2318 if (debug() > 1)
2319 debug_optf("layer %d \"%s\", pass %d:\n",
2320 layer(), inst()->layername(), m_pass);
2321
2322 // Track basic blocks and conditional states
2323 find_conditionals ();
2324 find_basic_blocks ();
2325
2326 // Clear local messages for this instance
2327 m_local_unknown_message_sent = false;
2328 m_local_messages_sent.clear ();
2329
2330 // Figure out which params are just aliases for globals (only
2331 // necessary to do once, on the first pass).
2332 if (m_pass == 0 && optimize() >= 2)
2333 find_params_holding_globals ();
2334
2335 // Here is the meat of the optimization, where we pass over the
2336 // code for this instance and make various transformations.
2337 int changed = optimize_ops (0, (int)inst()->ops().size());
2338
2339 // Now that we've rewritten the code, we need to re-track the
2340 // variable lifetimes.
2341 track_variable_lifetimes ();
2342
2343 // Recompute which of our params have downstream connections.
2344 mark_outgoing_connections ();
2345
2346 // Find situations where an output is simply a copy of a connected
2347 // input, and eliminate the middleman.
2348 if (optimize() >= 2 && m_opt_middleman) {
2349 int c = eliminate_middleman ();
2350 if (c)
2351 mark_outgoing_connections ();
2352 changed += c;
2353 }
2354
2355 // Elide unconnected parameters that are never read.
2356 if (optimize() >= 1)
2357 changed += remove_unused_params ();
2358
2359 // FIXME -- we should re-evaluate whether writes_globals() is still
2360 // true for this layer.
2361
2362 // If nothing changed, we're done optimizing. But wait, it may be
2363 // that after re-tracking variable lifetimes, we can notice new
2364 // optimizations! So force another pass, then we're really done.
2365 totalchanged += changed;
2366 if (changed < 1) {
2367 if (++reallydone > 3)
2368 break;
2369 } else {
2370 reallydone = 0;
2371 }
2372 }
2373
2374 // A layer that was allowed to run lazily originally, if it no
2375 // longer (post-optimized) has any outgoing connections, is no
2376 // longer needed at all.
2377 if (inst()->unused()) {
2378 // Not needed. Remove all its connections and ops.
2379 inst()->connections().clear ();
2380 turn_into_nop (0, (int)inst()->ops().size()-1,
2381 debug() > 1 ? Strutil::sprintf("eliminate layer %s with no outward connections", inst()->layername().c_str()).c_str() : "");
2382 for (auto&& s : inst()->symbols())
2383 s.clear_rw ();
2384 }
2385
2386 // Now that we've optimized this layer, walk through the ops and
2387 // note which messages may have been sent, so subsequent layers will
2388 // know.
2389 for (auto& op : inst()->ops()) {
2390 if (op.opname() == u_setmessage) {
2391 Symbol &Name (*inst()->argsymbol(op.firstarg()+0));
2392 if (Name.is_constant())
2393 m_messages_sent.push_back (*(ustring *)Name.data());
2394 else
2395 m_unknown_message_sent = true;
2396 }
2397 }
2398 }
2399
2400
2401
2402 void
resolve_isconnected()2403 RuntimeOptimizer::resolve_isconnected ()
2404 {
2405 for (auto& op : inst()->ops()) {
2406 if (op.opname() == u_isconnected) {
2407 inst()->make_symbol_room (1);
2408 SymbolPtr s = inst()->argsymbol (op.firstarg() + 1);
2409 while (const StructSpec *structspec = s->typespec().structspec()) {
2410 // How to deal with structures -- just change the reference
2411 // to the first field in the struct.
2412 // FIXME -- if we ever allow separate layer connection of
2413 // individual struct members, this will need something more
2414 // sophisticated.
2415 OSL_DASSERT (structspec && structspec->numfields() >= 1);
2416 std::string fieldname = (s->name().string() + "." +
2417 structspec->field(0).name.string());
2418 int fieldsymid = inst()->findparam (ustring(fieldname));
2419 OSL_DASSERT (fieldsymid >= 0);
2420 s = inst()->symbol(fieldsymid);
2421 }
2422 bool upconnected = s->connected();
2423 if (!s->lockgeom() && shadingsys().userdata_isconnected())
2424 upconnected = true;
2425 int val = (upconnected ? 1 : 0) + (s->connected_down() ? 2 : 0);
2426 turn_into_assign (op, add_constant(TypeDesc::TypeInt, &val),
2427 "resolve isconnected()");
2428 }
2429 }
2430 }
2431
2432
2433
2434 void
track_variable_lifetimes(const SymbolPtrVec & allsymptrs)2435 RuntimeOptimizer::track_variable_lifetimes (const SymbolPtrVec &allsymptrs)
2436 {
2437 SymbolPtrVec oparg_ptrs;
2438 oparg_ptrs.reserve (inst()->args().size());
2439 for (auto&& a : inst()->args())
2440 oparg_ptrs.push_back (inst()->symbol (a));
2441
2442 if (m_bblockids.size() != inst()->ops().size())
2443 find_basic_blocks ();
2444
2445 OSLCompilerImpl::track_variable_lifetimes (inst()->ops(), oparg_ptrs,
2446 allsymptrs, &m_bblockids);
2447 }
2448
2449
2450
2451 void
track_variable_lifetimes()2452 RuntimeOptimizer::track_variable_lifetimes ()
2453 {
2454 SymbolPtrVec allsymptrs;
2455 allsymptrs.reserve (inst()->symbols().size());
2456 for (auto&& s : inst()->symbols())
2457 allsymptrs.push_back (&s);
2458
2459 track_variable_lifetimes (allsymptrs);
2460 }
2461
2462
2463 // This has O(n^2) memory usage, so only for debugging
2464 //#define DEBUG_SYMBOL_DEPENDENCIES
2465
2466 // Add to the dependency map that "symbol A depends on symbol B".
2467 void
add_dependency(SymDependency & dmap,int A,int B)2468 RuntimeOptimizer::add_dependency (SymDependency &dmap, int A, int B)
2469 {
2470 OSL_DASSERT (A < (int)inst()->symbols().size());
2471 OSL_DASSERT (B < (int)inst()->symbols().size());
2472 dmap[A].insert (B);
2473
2474 #ifdef DEBUG_SYMBOL_DEPENDENCIES
2475 // Unification -- make all of B's dependencies be dependencies of A.
2476 for (auto&& r : dmap[B])
2477 dmap[A].insert (r);
2478 #endif
2479 }
2480
2481
2482 void
syms_used_in_op(Opcode & op,std::vector<int> & rsyms,std::vector<int> & wsyms)2483 RuntimeOptimizer::syms_used_in_op (Opcode &op, std::vector<int> &rsyms,
2484 std::vector<int> &wsyms)
2485 {
2486 rsyms.clear ();
2487 wsyms.clear ();
2488 for (int i = 0; i < op.nargs(); ++i) {
2489 int arg = inst()->arg (i + op.firstarg());
2490 if (op.argread(i))
2491 if (std::find (rsyms.begin(), rsyms.end(), arg) == rsyms.end())
2492 rsyms.push_back (arg);
2493 if (op.argwrite(i))
2494 if (std::find (wsyms.begin(), wsyms.end(), arg) == wsyms.end())
2495 wsyms.push_back (arg);
2496 }
2497 }
2498
2499
2500
2501 // Fake symbol index for "derivatives" entry in dependency map.
2502 static const int DerivSym = -1;
2503
2504
2505 // Recursively mark symbols that have derivatives from dependency map
2506 void
mark_symbol_derivatives(SymDependency & symdeps,SymIntSet & visited,int d)2507 RuntimeOptimizer::mark_symbol_derivatives (SymDependency &symdeps, SymIntSet &visited, int d)
2508 {
2509 for (auto&& r : symdeps[d]) {
2510 if (visited.find(r) == visited.end()) {
2511 visited.insert(r);
2512
2513 Symbol *s = inst()->symbol(r);
2514
2515 if (s->typespec().elementtype().is_float_based())
2516 s->has_derivs (true);
2517
2518 mark_symbol_derivatives(symdeps, visited, r);
2519 }
2520 }
2521 }
2522
2523
2524 /// Run through all the ops, for each one marking its 'written'
2525 /// arguments as dependent upon its 'read' arguments (and performing
2526 /// unification as we go), yielding a dependency map that lets us look
2527 /// up any symbol and see the set of other symbols on which it ever
2528 /// depends on during execution of the shader.
2529 void
track_variable_dependencies()2530 RuntimeOptimizer::track_variable_dependencies ()
2531 {
2532 SymDependency symdeps;
2533
2534 // It's important to note that this is simplistically conservative
2535 // in that it overestimates dependencies. To see why this is the
2536 // case, consider the following code:
2537 // // inputs a,b; outputs x,y; local variable t
2538 // t = a;
2539 // x = t;
2540 // t = b;
2541 // y = t;
2542 // We can see that x depends on a and y depends on b. But the
2543 // dependency analysis we do below thinks that y also depends on a
2544 // (because t depended on both a and b, but at different times).
2545 //
2546 // This naivite will never miss a dependency, but it may
2547 // overestimate dependencies. (Hence we call this "conservative"
2548 // rather than "wrong.") We deem this acceptable for now, since
2549 // it's so much easer to implement the conservative dependency
2550 // analysis, and it's not yet clear that getting it closer to
2551 // optimal will have any performance impact on final shaders. Also
2552 // because this is probably no worse than the "dependency slop" that
2553 // would happen with loops and conditionals. But we certainly may
2554 // revisit with a more sophisticated algorithm if this crops up
2555 // a legitimate issue.
2556 //
2557 // Because of this conservative approach, it is critical that this
2558 // analysis is done BEFORE temporaries are coalesced (which would
2559 // cause them to be reassigned in exactly the way that confuses this
2560 // analysis).
2561
2562 symdeps.clear ();
2563
2564 std::vector<int> read, written;
2565 bool forcederivs = shadingsys().force_derivs();
2566 // Loop over all ops...
2567 for (auto&& op : inst()->ops()) {
2568 // Gather the list of syms read and written by the op. Reuse the
2569 // vectors defined outside the loop to cut down on malloc/free.
2570 read.clear ();
2571 written.clear ();
2572 syms_used_in_op (op, read, written);
2573
2574 // FIXME -- special cases here! like if any ops implicitly read
2575 // or write to globals without them needing to be arguments.
2576
2577 // For each symbol w written by the op...
2578 for (auto&& w : written) {
2579 // For each symbol r read by the op, make w depend on r.
2580 // (Unless r is a constant , in which case it's not necessary.)
2581 for (auto&& r : read)
2582 if (inst()->symbol(r)->symtype() != SymTypeConst)
2583 add_dependency (symdeps, w, r);
2584 // If the op takes derivs, make the pseudo-symbol DerivSym
2585 // depend on those arguments.
2586 if (op.argtakesderivs_all() || forcederivs) {
2587 for (int a = 0; a < op.nargs(); ++a)
2588 if (op.argtakesderivs(a) || forcederivs) {
2589 Symbol &s (*opargsym (op, a));
2590 // Constants can't take derivs
2591 if (s.symtype() == SymTypeConst)
2592 continue;
2593 // Non-float types can't take derivs
2594 if (s.typespec().is_closure() ||
2595 s.typespec().simpletype().basetype != TypeDesc::FLOAT)
2596 continue;
2597 // Careful -- not all globals can take derivs
2598 if (s.symtype() == SymTypeGlobal &&
2599 ! (s.mangled() == Strings::P ||
2600 s.mangled() == Strings::I ||
2601 s.mangled() == Strings::u ||
2602 s.mangled() == Strings::v ||
2603 s.mangled() == Strings::Ps))
2604 continue;
2605 add_dependency (symdeps, DerivSym,
2606 inst()->arg(a+op.firstarg()));
2607 }
2608 }
2609 }
2610 }
2611
2612 // Propagate derivative dependencies for any syms already known to
2613 // need derivs. It's probably marked that way because another layer
2614 // downstream connects to it and needs derivatives of that
2615 // connection.
2616 int snum = 0;
2617 for (auto&& s : inst()->symbols()) {
2618 // Globals that get written should always provide derivs.
2619 // Exclude N, since its derivs are unreliable anyway, so no point
2620 // making it cause the whole disp shader to need derivs.
2621 if (s.symtype() == SymTypeGlobal && s.everwritten() &&
2622 !s.typespec().is_closure_based() && s.mangled() != Strings::N)
2623 s.has_derivs(true);
2624 if (s.has_derivs())
2625 add_dependency (symdeps, DerivSym, snum);
2626 ++snum;
2627 }
2628
2629 // Mark all symbols needing derivatives as such
2630 SymIntSet visited;
2631 mark_symbol_derivatives (symdeps, visited, DerivSym);
2632
2633 // Only some globals are allowed to have derivatives
2634 for (auto&& s : inst()->symbols()) {
2635 if (s.symtype() == SymTypeGlobal &&
2636 ! (s.mangled() == Strings::P ||
2637 s.mangled() == Strings::I ||
2638 s.mangled() == Strings::u ||
2639 s.mangled() == Strings::v ||
2640 s.mangled() == Strings::Ps))
2641 s.has_derivs (false);
2642 }
2643
2644 #ifdef DEBUG_SYMBOL_DEPENDENCIES
2645 // Helpful for debugging
2646
2647 std::cerr << "track_variable_dependencies\n";
2648 std::cerr << "\nDependencies:\n";
2649 for (auto&& m : symdeps) {
2650 if (m.first == DerivSym)
2651 std::cerr << "$derivs depends on ";
2652 else
2653 std::cerr << inst->symbol(m.first)->mangled() << " depends on ";
2654 for (auto&& d : m.second) {
2655 if (d == DerivSym)
2656 std::cerr << "$derivs ";
2657 else
2658 std::cerr << inst->symbol(d)->mangled() << ' ';
2659 }
2660 std::cerr << "\n";
2661 }
2662 std::cerr << "\n\n";
2663
2664 // Invert the dependency
2665 SymDependency influences;
2666 for (auto&& m : symdeps)
2667 for (auto&& d : m.second)
2668 influences[d].insert (m.first);
2669
2670 std::cerr << "\nReverse dependencies:\n";
2671 for (auto&& m : influences) {
2672 if (m.first == DerivSym)
2673 std::cerr << "$derivs contrbutes to ";
2674 else
2675 std::cerr << inst->symbol(m.first)->mangled() << " contributes to ";
2676 for (auto&& d : m.second) {
2677 if (d == DerivSym)
2678 std::cerr << "$derivs ";
2679 else
2680 std::cerr << inst->symbol(d)->mangled() << ' ';
2681 }
2682 std::cerr << "\n";
2683 }
2684 std::cerr << "\n\n";
2685 #endif
2686 }
2687
2688
2689
2690 // Is the symbol coalescable?
2691 inline bool
coalescable(const Symbol & s)2692 coalescable (const Symbol &s)
2693 {
2694 return (s.symtype() == SymTypeTemp && // only coalesce temporaries
2695 s.everused() && // only if they're used
2696 s.dealias() == &s && // only if not already aliased
2697 ! s.typespec().is_structure() && // only if not a struct
2698 s.fieldid() < 0); // or a struct field
2699 }
2700
2701
2702
2703 /// Coalesce temporaries. During code generation, we make a new
2704 /// temporary EVERY time we need one. Now we examine them all and merge
2705 /// ones of identical type and non-overlapping lifetimes.
2706 void
coalesce_temporaries()2707 RuntimeOptimizer::coalesce_temporaries ()
2708 {
2709 // We keep looping until we can't coalesce any more.
2710 int ncoalesced = 1;
2711 while (ncoalesced) {
2712 ncoalesced = 0; // assume we're done, unless we coalesce something
2713
2714 // We use a greedy algorithm that loops over each symbol, and
2715 // then examines all higher-numbered symbols (in order) and
2716 // tries to merge the first one it can find that doesn't overlap
2717 // lifetimes. The temps were created as we generated code, so
2718 // they are already sorted by their "first use". Thus, for any
2719 // pair t1 and t2 that are merged, it is guaranteed that t2 is
2720 // the symbol whose first use the earliest of all symbols whose
2721 // lifetimes do not overlap t1.
2722
2723 SymbolVec::iterator s;
2724 for (s = inst()->symbols().begin(); s != inst()->symbols().end(); ++s) {
2725 // Skip syms that can't be (or don't need to be) coalesced
2726 if (! coalescable(*s))
2727 continue;
2728
2729 int sfirst = s->firstuse ();
2730 int slast = s->lastuse ();
2731
2732 // Loop through every other symbol
2733 for (SymbolVec::iterator t = s+1; t != inst()->symbols().end(); ++t) {
2734 // Coalesce s and t if both syms are coalescable,
2735 // equivalent types, have nonoverlapping lifetimes,
2736 // and either both do or both do not need derivatives.
2737 if (coalescable (*t) &&
2738 equivalent (s->typespec(), t->typespec()) &&
2739 s->has_derivs() == t->has_derivs() &&
2740 (slast < t->firstuse() || sfirst > t->lastuse())) {
2741 // Make all future t references alias to s
2742 t->alias (&(*s));
2743 // s gets union of the lifetimes
2744 s->union_rw (t->firstread(), t->lastread(),
2745 t->firstwrite(), t->lastwrite());
2746 sfirst = s->firstuse ();
2747 slast = s->lastuse ();
2748 // t gets marked as unused
2749 t->clear_rw ();
2750 ++ncoalesced;
2751 }
2752 }
2753 }
2754 // std::cerr << "Coalesced " << ncoalesced << "\n";
2755 }
2756
2757 // Since we may have aliased temps, now we need to make sure all
2758 // symbol refs are dealiased.
2759 for (auto&& arg : inst()->args()) {
2760 Symbol *s = inst()->symbol(arg);
2761 s = s->dealias ();
2762 arg = s - inst()->symbol(0);
2763 }
2764 }
2765
2766
2767
2768 void
post_optimize_instance()2769 RuntimeOptimizer::post_optimize_instance ()
2770 {
2771 inst()->evaluate_writes_globals_and_userdata_params ();
2772
2773 if (inst()->unused())
2774 return; // skip the expensive stuff if we're not used anyway
2775
2776 SymbolPtrVec allsymptrs;
2777 allsymptrs.reserve (inst()->symbols().size());
2778 for (auto&& s : inst()->symbols())
2779 allsymptrs.push_back (&s);
2780
2781 m_bblockids.clear (); // Keep insert_code from getting confused
2782 m_in_conditional.clear ();
2783 m_in_loop.clear ();
2784
2785 add_useparam (allsymptrs);
2786
2787 if (optimize() >= 1 && m_opt_coalesce_temps)
2788 coalesce_temporaries ();
2789 }
2790
2791
2792
2793 void
collapse_syms()2794 RuntimeOptimizer::collapse_syms ()
2795 {
2796 //
2797 // Make a new symbol table that removes all the unused symbols.
2798 //
2799
2800 // Mark our params that feed to later layers, so that unused params
2801 // that aren't needed downstream can be removed.
2802 mark_outgoing_connections ();
2803
2804 SymbolVec new_symbols; // buffer for new symbol table
2805 std::vector<int> symbol_remap; // mapping of old sym index to new
2806 int total_syms = 0; // number of new symbols we'll need
2807 SymNeverUsed never_used (*this, inst()); // handy predicate
2808
2809 // First, just count how many we need and set up the mapping
2810 for (auto&& s : inst()->symbols()) {
2811 symbol_remap.push_back (total_syms);
2812 if (! never_used (s))
2813 ++total_syms;
2814 }
2815
2816 // Now make a new table of the right (new) size, and copy the used syms
2817 new_symbols.reserve (total_syms);
2818 for (auto&& s : inst()->symbols()) {
2819 if (! never_used (s))
2820 new_symbols.push_back (s);
2821 }
2822
2823 // Remap all the function arguments to the new indices
2824 for (auto&& arg : inst()->m_instargs)
2825 arg = symbol_remap[arg];
2826
2827 // Fix our connections from upstream shaders
2828 for (auto&& c : inst()->m_connections)
2829 c.dst.param = symbol_remap[c.dst.param];
2830
2831 // Fix downstream connections that reference us
2832 for (int lay = layer()+1; lay < group().nlayers(); ++lay) {
2833 for (auto&& c : group()[lay]->m_connections)
2834 if (c.srclayer == layer())
2835 c.src.param = symbol_remap[c.src.param];
2836 }
2837
2838 // Swap the new symbol list for the old.
2839 std::swap (inst()->m_instsymbols, new_symbols);
2840 {
2841 // adjust memory stats
2842 // Remember that they're already swapped
2843 off_t mem = vectorbytes(new_symbols) - vectorbytes(inst()->m_instsymbols);
2844 ShadingSystemImpl &ss (shadingsys());
2845 spin_lock lock (ss.m_stat_mutex);
2846 ss.m_stat_mem_inst_syms -= mem;
2847 ss.m_stat_mem_inst -= mem;
2848 ss.m_stat_memory -= mem;
2849 }
2850
2851 // Miscellaneous cleanup of other things that used symbol indices
2852 inst()->m_Psym = -1;
2853 inst()->m_Nsym = -1;
2854 inst()->m_firstparam = -1;
2855 inst()->m_lastparam = -1;
2856 int i = 0;
2857 for (auto&& s : inst()->symbols()) {
2858 if (s.symtype() == SymTypeParam || s.symtype() == SymTypeOutputParam) {
2859 if (inst()->m_firstparam < 0)
2860 inst()->m_firstparam = i;
2861 inst()->m_lastparam = i+1;
2862 }
2863 if (s.name() == Strings::P)
2864 inst()->m_Psym = i;
2865 else if (s.name() == Strings::N)
2866 inst()->m_Nsym = i;
2867 ++i;
2868 }
2869 #ifndef NDEBUG
2870 // Confirm that the symbols between [firstparam,lastparam] are all
2871 // input or output params.
2872 FOREACH_PARAM (const Symbol &s, inst()) {
2873 OSL_DASSERT (s.symtype() == SymTypeParam ||
2874 s.symtype() == SymTypeOutputParam);
2875 }
2876 #endif
2877 }
2878
2879
2880
2881 void
collapse_ops()2882 RuntimeOptimizer::collapse_ops ()
2883 {
2884 //
2885 // Make new code that removes all the nops
2886 //
2887 OpcodeVec new_ops; // buffer for new code
2888 std::vector<int> op_remap; // mapping of old opcode indices to new
2889 int total_ops = 0; // number of new ops we'll need
2890
2891 // First, just count how many we need and set up the mapping
2892 for (auto&& op : inst()->ops()) {
2893 op_remap.push_back (total_ops);
2894 if (op.opname() != u_nop)
2895 ++total_ops;
2896 }
2897
2898 // Now make a new table of the right (new) size, copy the used ops, and
2899 // reset the jump addresses.
2900 new_ops.reserve (total_ops);
2901 for (auto&& op : inst()->ops()) {
2902 if (op.opname() != u_nop) {
2903 new_ops.push_back (op);
2904 Opcode &newop (new_ops.back());
2905 for (int i = 0; i < (int)Opcode::max_jumps; ++i)
2906 if (newop.jump(i) >= 0)
2907 newop.jump(i) = op_remap[newop.jump(i)];
2908 }
2909 }
2910
2911 // Adjust 'main' code range and init op ranges
2912 inst()->m_maincodebegin = op_remap[inst()->m_maincodebegin];
2913 inst()->m_maincodeend = (int)new_ops.size();
2914 FOREACH_PARAM (auto&& s, inst()) {
2915 if (s.has_init_ops()) {
2916 s.initbegin (op_remap[s.initbegin()]);
2917 if (s.initend() < (int)op_remap.size())
2918 s.initend (op_remap[s.initend()]);
2919 else
2920 s.initend ((int)new_ops.size());
2921 }
2922 }
2923
2924 // Swap the new code for the old.
2925 std::swap (inst()->m_instops, new_ops);
2926
2927 // These are no longer valid
2928 m_bblockids.clear ();
2929 m_in_conditional.clear ();
2930 m_in_loop.clear ();
2931 }
2932
2933
2934
2935 std::ostream &
printinst(std::ostream & out) const2936 RuntimeOptimizer::printinst (std::ostream &out) const
2937 {
2938 out << "Shader " << inst()->shadername() << "\n";
2939 out << (inst()->unused() ? " UNUSED" : "");
2940 out << " connections in=" << inst()->nconnections();
2941 out << " out=" << inst()->outgoing_connections();
2942 out << (inst()->writes_globals() ? " writes_globals" : "");
2943 out << (inst()->userdata_params() ? " userdata_params" : "");
2944 out << (inst()->run_lazily() ? " run_lazily" : " run_unconditionally");
2945 out << (inst()->outgoing_connections() ? " outgoing_connections" : "");
2946 out << (inst()->renderer_outputs() ? " renderer_outputs" : "");
2947 out << (inst()->writes_globals() ? " writes_globals" : "");
2948 out << (inst()->entry_layer() ? " entry_layer" : "");
2949 out << (inst()->last_layer() ? " last_layer" : "");
2950 out << "\n";
2951 out << " symbols:\n";
2952 for (size_t i = 0, e = inst()->symbols().size(); i < e; ++i)
2953 inst()->symbol(i)->print (out, 256);
2954 #if 0
2955 out << " int consts:\n ";
2956 for (size_t i = 0; i < inst()->m_iconsts.size(); ++i)
2957 out << inst()->m_iconsts[i] << ' ';
2958 out << "\n";
2959 out << " float consts:\n ";
2960 for (size_t i = 0; i < inst()->m_fconsts.size(); ++i)
2961 out << inst()->m_fconsts[i] << ' ';
2962 out << "\n";
2963 out << " string consts:\n ";
2964 for (size_t i = 0; i < inst()->m_sconsts.size(); ++i)
2965 out << "\"" << Strutil::escape_chars(inst()->m_sconsts[i]) << "\" ";
2966 out << "\n";
2967 #endif
2968 out << " code:\n";
2969 for (size_t i = 0, e = inst()->ops().size(); i < e; ++i) {
2970 const Opcode &op (inst()->ops()[i]);
2971 if (i == (size_t)inst()->maincodebegin())
2972 out << "(main)\n";
2973 out << " " << i << ": " << op.opname();
2974 bool allconst = true;
2975 for (int a = 0; a < op.nargs(); ++a) {
2976 const Symbol *s (inst()->argsymbol(op.firstarg()+a));
2977 out << " " << s->name();
2978 if (s->symtype() == SymTypeConst) {
2979 out << " (";
2980 s->print_vals(out,16);
2981 out << ")";
2982 }
2983 if (op.argread(a))
2984 allconst &= s->is_constant();
2985 }
2986 for (size_t j = 0; j < Opcode::max_jumps; ++j)
2987 if (op.jump(j) >= 0)
2988 out << " " << op.jump(j);
2989 out << "\t# ";
2990 // out << " rw " << Strutil::sprintf("%x",op.argread_bits())
2991 // << ' ' << op.argwrite_bits();
2992 if (op.argtakesderivs_all())
2993 out << " %derivs(" << op.argtakesderivs_all() << ") ";
2994 if (allconst)
2995 out << " CONST";
2996 if (i == 0 || bblockid(i) != bblockid(i-1))
2997 out << " BBLOCK-START";
2998 std::string filename = op.sourcefile().string();
2999 size_t slash = filename.find_last_of ("/");
3000 if (slash != std::string::npos)
3001 filename.erase (0, slash+1);
3002 if (filename.length())
3003 out << " (" << filename << ":" << op.sourceline() << ")";
3004 out << "\n";
3005 }
3006 if (inst()->nconnections()) {
3007 out << " connections upstream:\n";
3008 for (int i = 0, e = inst()->nconnections(); i < e; ++i) {
3009 const Connection &c (inst()->connection(i));
3010 out << " " << c.dst.type.c_str() << ' '
3011 << inst()->symbol(c.dst.param)->name();
3012 if (c.dst.arrayindex >= 0)
3013 out << '[' << c.dst.arrayindex << ']';
3014 out << " upconnected from layer " << c.srclayer << ' ';
3015 const ShaderInstance *up = group()[c.srclayer];
3016 out << "(" << up->layername() << ") ";
3017 out << " " << c.src.type.c_str() << ' '
3018 << up->symbol(c.src.param)->name();
3019 if (c.src.arrayindex >= 0)
3020 out << '[' << c.src.arrayindex << ']';
3021 out << "\n";
3022 }
3023 }
3024 return out;
3025 }
3026
3027
3028
3029 void
run()3030 RuntimeOptimizer::run ()
3031 {
3032 Timer rop_timer;
3033 int nlayers = (int) group().nlayers ();
3034 if (debug())
3035 shadingcontext()->infof("About to optimize shader group %s (%d layers):",
3036 group().name(), nlayers);
3037 if (debug())
3038 std::cout << "About to optimize shader group " << group().name() << "\n";
3039
3040 for (int layer = 0; layer < nlayers; ++layer) {
3041 set_inst (layer);
3042 // These need to happen before merge_instances
3043 inst()->copy_code_from_master (group());
3044 mark_outgoing_connections();
3045 }
3046
3047 // Inventory the network and print pre-optimized debug info
3048 size_t old_nsyms = 0, old_nops = 0;
3049 for (int layer = 0; layer < nlayers; ++layer) {
3050 set_inst (layer);
3051 if (debug() /* && optimize() >= 1*/) {
3052 find_basic_blocks ();
3053 std::cout.flush ();
3054 std::cout << "Before optimizing layer " << layer << " \""
3055 << inst()->layername() << "\" (ID " << inst()->id() << ") :\n";
3056 printinst (std::cout);
3057 std::cout << "\n--------------------------------\n" << std::endl;
3058 }
3059 old_nsyms += inst()->symbols().size();
3060 old_nops += inst()->ops().size();
3061 }
3062
3063 if (shadingsys().m_opt_merge_instances == 1)
3064 shadingsys().merge_instances (group());
3065
3066 m_params_holding_globals.resize (nlayers);
3067
3068 // Inventory for error calls so that if lazyerror=0 we don't incorrectly
3069 // assume the layer is unused.
3070 check_for_error_calls(false);
3071
3072 // Optimize each layer, from first to last
3073 for (int layer = 0; layer < nlayers; ++layer) {
3074 set_inst (layer);
3075 if (inst()->unused())
3076 continue;
3077 // N.B. we need to resolve isconnected() calls before the instance
3078 // is otherwise optimized, or else isconnected() may not reflect
3079 // the original connectivity after substitutions are made.
3080 resolve_isconnected ();
3081 optimize_instance ();
3082 }
3083 check_for_error_calls(false); // re-check
3084
3085 // Optimize each layer again, from last to first (because some
3086 // optimizations are only apparent when the subsequent shaders have
3087 // been simplified).
3088 for (int layer = nlayers-1; layer >= 0; --layer) {
3089 set_inst (layer);
3090 if (! inst()->unused())
3091 optimize_instance ();
3092 }
3093
3094 // Try merging instances again, now that we've optimized
3095 shadingsys().merge_instances (group(), true);
3096
3097 for (int layer = nlayers-1; layer >= 0; --layer) {
3098 set_inst (layer);
3099 if (inst()->unused())
3100 continue;
3101 find_basic_blocks ();
3102 track_variable_dependencies ();
3103
3104 // For our parameters that require derivatives, mark their
3105 // upstream connections as also needing derivatives.
3106 for (auto&& c : inst()->m_connections) {
3107 if (inst()->symbol(c.dst.param)->has_derivs()) {
3108 Symbol *source = group()[c.srclayer]->symbol(c.src.param);
3109 if (source->typespec().elementtype().is_float_based())
3110 source->has_derivs (true);
3111 }
3112 }
3113 }
3114
3115 // Post-opt cleanup: add useparam, coalesce temporaries, etc.
3116 for (int layer = 0; layer < nlayers; ++layer) {
3117 set_inst (layer);
3118 post_optimize_instance ();
3119 }
3120
3121 // Last chance to eliminate duplicate instances
3122 shadingsys().merge_instances (group(), true);
3123
3124 // Last inventory of error() calls, issue warnings if needed.
3125 check_for_error_calls(true);
3126
3127 // Get rid of nop instructions and unused symbols.
3128 size_t new_nsyms = 0, new_nops = 0, new_deriv_syms = 0;
3129 for (int layer = 0; layer < nlayers; ++layer) {
3130 set_inst (layer);
3131 if (inst()->unused())
3132 continue; // no need to print or gather stats for unused layers
3133 if (optimize() >= 1) {
3134 collapse_syms ();
3135 collapse_ops ();
3136 }
3137 if (debug() && !inst()->unused()) {
3138 track_variable_lifetimes ();
3139 std::cout << "After optimizing layer " << layer << " \""
3140 << inst()->layername() << "\" (ID " << inst()->id() << ") :\n";
3141 printinst (std::cout);
3142 std::cout << "\n--------------------------------\n" << std::endl;
3143 }
3144 new_nsyms += inst()->symbols().size();
3145 new_nops += inst()->ops().size();
3146 }
3147
3148 m_unknown_textures_needed = false;
3149 m_unknown_closures_needed = false;
3150 m_unknown_attributes_needed = false;
3151 m_textures_needed.clear();
3152 m_closures_needed.clear();
3153 m_globals_read = 0;
3154 m_globals_write = 0;
3155 m_globals_needed.clear();
3156 m_userdata_needed.clear();
3157 m_attributes_needed.clear();
3158 bool does_nothing = true;
3159 for (int layer = 0; layer < nlayers; ++layer) {
3160 set_inst (layer);
3161 if (inst()->unused())
3162 continue; // no need to print or gather stats for unused layers
3163 FOREACH_SYM (Symbol &s, inst()) {
3164 // set the layer numbers
3165 s.layer (layer);
3166 // Find interpolated parameters
3167 if ((s.symtype() == SymTypeParam || s.symtype() == SymTypeOutputParam)
3168 && ! s.lockgeom()) {
3169 UserDataNeeded udn (s.name(), layer, s.typespec().simpletype(),
3170 s.data(), s.has_derivs());
3171 std::set<UserDataNeeded>::iterator found;
3172 found = m_userdata_needed.find (udn);
3173 if (found == m_userdata_needed.end())
3174 m_userdata_needed.insert (udn);
3175 else if (udn.derivs && ! found->derivs) {
3176 m_userdata_needed.erase (found);
3177 m_userdata_needed.insert (udn);
3178 }
3179 }
3180 // Track which globals the group needs
3181 if (s.symtype() == SymTypeGlobal) {
3182 m_globals_needed.insert (s.name());
3183 int bit = int(ShadingSystem::globals_bit (s.name()));
3184 if (s.everread())
3185 m_globals_read |= bit;
3186 if (s.everwritten())
3187 m_globals_write |= bit;
3188 }
3189 if (s.has_derivs())
3190 ++new_deriv_syms;
3191 }
3192 for (auto&& op : inst()->ops()) {
3193 const OpDescriptor *opd = shadingsys().op_descriptor (op.opname());
3194 if (! opd)
3195 continue;
3196 // a non-unused layer with a nontrivial op does something
3197 if (op.opname() != Strings::end && op.opname() != Strings::useparam)
3198 does_nothing = false;
3199 // Useparam of a down-connected or renderer output does something
3200 if (op.opname() == Strings::useparam) {
3201 for (int i = 0, e = op.nargs(); i < e; ++i) {
3202 Symbol *sym = opargsym (op, i);
3203 if (sym->connected_down() || sym->renderer_output())
3204 does_nothing = false;
3205 }
3206 }
3207 if (opd->flags & OpDescriptor::Tex) {
3208 // for all the texture ops, arg 1 is the texture name
3209 Symbol *sym = opargsym (op, 1);
3210 OSL_DASSERT (sym && sym->typespec().is_string());
3211 if (sym->is_constant()) {
3212 ustring texname = *(ustring *)sym->data();
3213 m_textures_needed.insert (texname);
3214 } else {
3215 m_unknown_textures_needed = true;
3216 }
3217 }
3218 if (op.opname() == u_closure) {
3219 // It's either 'closure result weight name' or 'closure result name'
3220 Symbol *sym = opargsym (op, 1); // arg 1 is the closure name
3221 if (sym && !sym->typespec().is_string())
3222 sym = opargsym (op, 2);
3223 OSL_DASSERT (sym && sym->typespec().is_string());
3224 if (sym->is_constant()) {
3225 ustring closurename = *(ustring *)sym->data();
3226 m_closures_needed.insert (closurename);
3227 } else {
3228 m_unknown_closures_needed = true;
3229 }
3230 } else if (op.opname() == u_getattribute) {
3231 Symbol *sym1 = opargsym (op, 1);
3232 OSL_DASSERT (sym1 && sym1->typespec().is_string());
3233 if (sym1->is_constant()) {
3234 if (op.nargs() == 3) {
3235 // getattribute( attributename, result )
3236 m_attributes_needed.insert( AttributeNeeded( *(ustring *)sym1->data() ) );
3237 } else {
3238 OSL_DASSERT (op.nargs() == 4 || op.nargs() == 5);
3239 Symbol *sym2 = opargsym (op, 2);
3240 if (sym2->typespec().is_string()) {
3241 // getattribute( scopename, attributename, result ) or
3242 // getattribute( scopename, attributename, arrayindex, result )
3243 if (sym2->is_constant()) {
3244 m_attributes_needed.insert( AttributeNeeded(
3245 *(ustring *)sym2->data(), *(ustring *)sym1->data()
3246 ) );
3247 } else {
3248 m_unknown_attributes_needed = true;
3249 }
3250 } else {
3251 // getattribute( attributename, arrayindex, result )
3252 m_attributes_needed.insert( AttributeNeeded( *(ustring *)sym1->data() ) );
3253 }
3254 }
3255 } else { // sym1 not constant
3256 m_unknown_attributes_needed = true;
3257 }
3258 }
3259 }
3260 }
3261 group().does_nothing (does_nothing);
3262
3263 m_stat_specialization_time = rop_timer();
3264 {
3265 // adjust memory stats
3266 ShadingSystemImpl &ss (shadingsys());
3267 spin_lock lock (ss.m_stat_mutex);
3268 ss.m_stat_preopt_syms += old_nsyms;
3269 ss.m_stat_preopt_ops += old_nops;
3270 ss.m_stat_postopt_syms += new_nsyms;
3271 ss.m_stat_postopt_ops += new_nops;
3272 ss.m_stat_syms_with_derivs += new_deriv_syms;
3273 if (does_nothing)
3274 ss.m_stat_empty_groups += 1;
3275 }
3276 if (shadingsys().m_compile_report) {
3277 shadingcontext()->infof("Optimized shader group %s:", group().name());
3278 shadingcontext()->infof(" spec %1.2fs, New syms %llu/%llu (%5.1f%%), ops %llu/%llu (%5.1f%%)",
3279 m_stat_specialization_time, new_nsyms, old_nsyms,
3280 100.0*double((long long)new_nsyms-(long long)old_nsyms)/double(old_nsyms),
3281 new_nops, old_nops,
3282 100.0*double((long long)new_nops-(long long)old_nops)/double(old_nops));
3283 if (does_nothing)
3284 shadingcontext()->infof("Group does nothing");
3285 if (m_textures_needed.size()) {
3286 shadingcontext()->infof("Group needs textures:");
3287 for (auto&& f : m_textures_needed)
3288 shadingcontext()->infof(" %s", f);
3289 if (m_unknown_textures_needed)
3290 shadingcontext()->infof(" Also may construct texture names on the fly.");
3291 }
3292 if (m_userdata_needed.size()) {
3293 shadingcontext()->infof("Group potentially needs userdata:");
3294 for (auto&& f : m_userdata_needed)
3295 shadingcontext()->infof(" %s %s %s", f.name, f.type,
3296 f.derivs ? "(derivs)" : "");
3297 }
3298 if (m_attributes_needed.size()) {
3299 shadingcontext()->infof("Group needs attributes:");
3300 for (auto&& f : m_attributes_needed)
3301 shadingcontext()->infof(" %s %s", f.name, f.scope);
3302 if (m_unknown_attributes_needed)
3303 shadingcontext()->infof(" Also may construct attribute names on the fly.");
3304 }
3305 }
3306 }
3307
3308
3309
3310 bool
police(const Opcode & op,string_view msg,int type)3311 RuntimeOptimizer::police(const Opcode& op, string_view msg, int type)
3312 {
3313 if ((type & police_gpu_err) && shadingsys().m_gpu_opt_error) {
3314 shadingcontext()->errorf("Optimization error for GPUs:\n"
3315 " group: %s\n"
3316 " layer: %s\n"
3317 " source: %s:%d\n"
3318 " issue: %s",
3319 group().name(), inst()->layername(),
3320 op.sourcefile(), op.sourceline(), msg);
3321 return true;
3322 } else if ((type & police_opt_warn) && shadingsys().m_opt_warnings) {
3323 shadingcontext()->warningf("Optimization warning:\n"
3324 " group: %s\n"
3325 " layer: %s\n"
3326 " source: %s:%d\n"
3327 " issue: %s",
3328 group().name(), inst()->layername(),
3329 op.sourcefile(), op.sourceline(), msg);
3330 }
3331 return false;
3332 }
3333
3334
3335
3336 bool
check_for_error_calls(bool warn)3337 RuntimeOptimizer::check_for_error_calls(bool warn)
3338 {
3339 // If the "lazyerror" option is set, there's nothing to do.
3340 if (shadingsys().m_lazyerror)
3341 return false;
3342
3343 // Check all the layers (even ones we think are unused) for `error()`
3344 // calls that still remain after runtime optimization. If found, warn
3345 // and mark the layer as having error calls.
3346 bool err = false;
3347 int nlayers = (int) group().nlayers ();
3348 for (int layer = 0; layer < nlayers; ++layer) {
3349 set_inst (layer);
3350 inst()->has_error_op(false);
3351 for (auto&& op : inst()->ops()) {
3352 if (op.opname() == Strings::error) {
3353 inst()->has_error_op(true);
3354 if (warn)
3355 err |= police (op, "error() call present in optimized shader.",
3356 police_opt_warn);
3357 }
3358 }
3359 }
3360 return err;
3361 }
3362
3363
3364
3365 bool
police_failed_optimizations()3366 RuntimeOptimizer::police_failed_optimizations()
3367 {
3368 bool err = false;
3369 bool do_warn = shadingsys().m_opt_warnings;
3370 bool do_gpu_err = shadingsys().m_gpu_opt_error;
3371 if (!do_warn && !do_gpu_err)
3372 return false; // no need for any of this expense
3373
3374 int nlayers = (int) group().nlayers ();
3375 for (int layer = 0; layer < nlayers; ++layer) {
3376 set_inst (layer);
3377 if (inst()->unused())
3378 continue; // no need to print or gather stats for unused layers
3379 for (auto&& op : inst()->ops()) {
3380 const OpDescriptor *opd = shadingsys().op_descriptor (op.opname());
3381 if (! opd)
3382 continue;
3383 if (opd->flags & OpDescriptor::Tex) {
3384 Symbol *sym = opargsym (op, 1); // arg 1 is texture name
3385 OSL_DASSERT(sym && sym->typespec().is_string());
3386 if (! sym->is_constant()) {
3387 err |= police (op, OIIO::Strutil::sprintf("%s(): texture name cannot be reduced to a constant.",
3388 op.opname()),
3389 police_gpu_err);
3390 }
3391 }
3392 // FIXME: Will add more tests and warnings as we go
3393 }
3394 }
3395 return err;
3396 }
3397
3398 }; // namespace pvt
3399 OSL_NAMESPACE_EXIT
3400