1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4 
5 #include <vector>
6 #include <cmath>
7 #include <cstdlib>
8 
9 #include <OpenImageIO/fmath.h>
10 #include <OpenImageIO/sysutil.h>
11 
12 #include "oslexec_pvt.h"
13 #include "opcolor.h"
14 #include "runtimeoptimize.h"
15 #include <OSL/dual.h>
16 #include <OSL/oslnoise.h>
17 using namespace OSL;
18 using namespace OSL::pvt;
19 
20 
21 // names of ops we'll be using frequently
22 static ustring u_nop    ("nop"),
23                u_assign ("assign"),
24                u_aassign ("aassign"),
25                u_compassign ("compassign"),
26                u_mxcompassign ("mxcompassign"),
27                u_add    ("add"),
28                u_sub    ("sub"),
29                u_mul    ("mul"),
30                u_sqrt   ("sqrt"),
31                u_inversesqrt ("inversesqrt"),
32                u_cbrt   ("cbrt"),
33                u_if     ("if"),
34                u_eq     ("eq"),
35                u_return ("return"),
36                u_error  ("error"),
37                u_fmterror("%s"),
38                u_fmt_range_check("Index [%d] out of range %s[0..%d]: %s:%d (group %s, layer %d %s, shader %s)");
39 
40 static ustring u_cell ("cell"), u_cellnoise ("cellnoise");
41 
42 
43 OSL_NAMESPACE_ENTER
44 
45 namespace pvt {   // OSL::pvt
46 
47 
48 inline bool
equal_consts(const Symbol & A,const Symbol & B)49 equal_consts (const Symbol &A, const Symbol &B)
50 {
51     return (&A == &B ||
52             (equivalent (A.typespec(), B.typespec()) &&
53              !memcmp (A.data(), B.data(), A.typespec().simpletype().size())));
54 }
55 
56 
57 
58 inline bool
unequal_consts(const Symbol & A,const Symbol & B)59 unequal_consts (const Symbol &A, const Symbol &B)
60 {
61     return (equivalent (A.typespec(), B.typespec()) &&
62             memcmp (A.data(), B.data(), A.typespec().simpletype().size()));
63 }
64 
65 
66 
constfold_none(RuntimeOptimizer &,int)67 int constfold_none(RuntimeOptimizer& /*rop*/, int /*opnum*/)
68 {
69     return 0;
70 }
71 
72 
73 
DECLFOLDER(constfold_add)74 DECLFOLDER(constfold_add)
75 {
76     Opcode &op (rop.inst()->ops()[opnum]);
77     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
78     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
79     if (rop.is_zero(A)) {
80         // R = 0 + B  =>   R = B
81         rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+2),
82                               "0 + A => A");
83         return 1;
84     }
85     if (rop.is_zero(B)) {
86             // R = A + 0   =>   R = A
87         rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1),
88                               "A + 0 => A");
89         return 1;
90     }
91     if (A.is_constant() && B.is_constant()) {
92         if (A.typespec().is_int() && B.typespec().is_int()) {
93             int result = *(int *)A.data() + *(int *)B.data();
94             int cind = rop.add_constant (A.typespec(), &result);
95             rop.turn_into_assign (op, cind, "const + const");
96             return 1;
97         } else if (A.typespec().is_float() && B.typespec().is_float()) {
98             float result = *(float *)A.data() + *(float *)B.data();
99             int cind = rop.add_constant (A.typespec(), &result);
100             rop.turn_into_assign (op, cind, "const + const");
101             return 1;
102         } else if (A.typespec().is_triple() && B.typespec().is_triple()) {
103             Vec3 result = *(Vec3 *)A.data() + *(Vec3 *)B.data();
104             int cind = rop.add_constant (A.typespec(), &result);
105             rop.turn_into_assign (op, cind, "const + const");
106             return 1;
107         } else if (A.typespec().is_triple() && B.typespec().is_float()) {
108             Vec3 result = *(Vec3 *)A.data() + Vec3(*(float *)B.data());
109             int cind = rop.add_constant (A.typespec(), &result);
110             rop.turn_into_assign (op, cind, "const + const");
111             return 1;
112         } else if (A.typespec().is_float() && B.typespec().is_triple()) {
113             Vec3 result = Vec3(*(float *)A.data()) + *(Vec3 *)B.data();
114             int cind = rop.add_constant (B.typespec(), &result);
115             rop.turn_into_assign (op, cind, "const + const");
116             return 1;
117         }
118     }
119     return 0;
120 }
121 
122 
123 
DECLFOLDER(constfold_sub)124 DECLFOLDER(constfold_sub)
125 {
126     Opcode &op (rop.inst()->ops()[opnum]);
127     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
128     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
129     if (rop.is_zero(B)) {
130         // R = A - 0   =>   R = A
131         rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1),
132                               "A - 0 => A");
133         return 1;
134     }
135     // R = A - B, if both are constants, =>  R = C
136     if (A.is_constant() && B.is_constant()) {
137         if (A.typespec().is_int() && B.typespec().is_int()) {
138             int result = *(int *)A.data() - *(int *)B.data();
139             int cind = rop.add_constant (A.typespec(), &result);
140             rop.turn_into_assign (op, cind, "const - const");
141             return 1;
142         } else if (A.typespec().is_float() && B.typespec().is_float()) {
143             float result = *(float *)A.data() - *(float *)B.data();
144             int cind = rop.add_constant (A.typespec(), &result);
145             rop.turn_into_assign (op, cind, "const - const");
146             return 1;
147         } else if (A.typespec().is_triple() && B.typespec().is_triple()) {
148             Vec3 result = *(Vec3 *)A.data() - *(Vec3 *)B.data();
149             int cind = rop.add_constant (A.typespec(), &result);
150             rop.turn_into_assign (op, cind, "const - const");
151             return 1;
152         } else if (A.typespec().is_triple() && B.typespec().is_float()) {
153             Vec3 result = *(Vec3 *)A.data() - Vec3(*(float *)B.data());
154             int cind = rop.add_constant (A.typespec(), &result);
155             rop.turn_into_assign (op, cind, "const - const");
156             return 1;
157         } else if (A.typespec().is_float() && B.typespec().is_triple()) {
158             Vec3 result = Vec3(*(float *)A.data()) - *(Vec3 *)B.data();
159             int cind = rop.add_constant (B.typespec(), &result);
160             rop.turn_into_assign (op, cind, "const - const");
161             return 1;
162         }
163     }
164     // R = A - A  =>  R = 0    even if not constant!
165     if (&A == &B) {
166         rop.turn_into_assign_zero (op, "A - A => 0");
167     }
168     return 0;
169 }
170 
171 
172 
DECLFOLDER(constfold_mul)173 DECLFOLDER(constfold_mul)
174 {
175     Opcode &op (rop.inst()->ops()[opnum]);
176     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
177     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
178     if (rop.is_one(A)) {
179         // R = 1 * B  =>   R = B
180         rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+2),
181                               "1 * A => A");
182         return 1;
183     }
184     if (rop.is_zero(A)) {
185         // R = 0 * B  =>   R = 0
186         rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1),
187                               "0 * A => 0");
188         return 1;
189     }
190     if (rop.is_one(B)) {
191         // R = A * 1   =>   R = A
192         rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1),
193                               "A * 1 => A");
194         return 1;
195     }
196     if (rop.is_zero(B)) {
197         // R = A * 0   =>   R = 0
198         rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+2),
199                               "A * 0 => 0");
200         return 1;
201     }
202     if (A.is_constant() && B.is_constant()) {
203         if (A.typespec().is_int() && B.typespec().is_int()) {
204             int result = *(int *)A.data() * *(int *)B.data();
205             int cind = rop.add_constant (A.typespec(), &result);
206             rop.turn_into_assign (op, cind, "const * const");
207             return 1;
208         } else if (A.typespec().is_float() && B.typespec().is_float()) {
209             float result = (*(float *)A.data()) * (*(float *)B.data());
210             int cind = rop.add_constant (A.typespec(), &result);
211             rop.turn_into_assign (op, cind, "const * const");
212             return 1;
213         } else if (A.typespec().is_triple() && B.typespec().is_triple()) {
214             Vec3 result = (*(Vec3 *)A.data()) * (*(Vec3 *)B.data());
215             int cind = rop.add_constant (A.typespec(), &result);
216             rop.turn_into_assign (op, cind, "const * const");
217             return 1;
218         } else if (A.typespec().is_triple() && B.typespec().is_float()) {
219             Vec3 result = (*(Vec3 *)A.data()) * (*(float *)B.data());
220             int cind = rop.add_constant (A.typespec(), &result);
221             rop.turn_into_assign (op, cind, "const * const");
222             return 1;
223         } else if (A.typespec().is_float() && B.typespec().is_triple()) {
224             Vec3 result = (*(float *)A.data()) * (*(Vec3 *)B.data());
225             int cind = rop.add_constant (B.typespec(), &result);
226             rop.turn_into_assign (op, cind, "const * const");
227             return 1;
228         }
229     }
230     return 0;
231 }
232 
233 
234 
DECLFOLDER(constfold_div)235 DECLFOLDER(constfold_div)
236 {
237     Opcode &op (rop.inst()->ops()[opnum]);
238     Symbol &R (*rop.inst()->argsymbol(op.firstarg()+0));
239     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
240     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
241     if (rop.is_one(B)) {
242         // R = A / 1   =>   R = A
243         rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1),
244                               "A / 1 => A");
245         return 1;
246     }
247     if (rop.is_zero(B) && (B.typespec().is_float() ||
248                            B.typespec().is_triple() || B.typespec().is_int())) {
249         // R = A / 0   =>   R = 0      because of OSL div by zero rule
250         rop.turn_into_assign_zero (op, "A / 0 => 0 (by OSL division rules)");
251         return 1;
252     }
253     if (A.is_constant() && B.is_constant()) {
254         int cind = -1;
255         if (A.typespec().is_int() && B.typespec().is_int()) {
256             int result = *(int *)A.data() / *(int *)B.data();
257             cind = rop.add_constant (R.typespec(), &result);
258         } else if (A.typespec().is_float() && B.typespec().is_int()) {
259             float result = *(float *)A.data() / *(int *)B.data();
260             cind = rop.add_constant (R.typespec(), &result);
261         } else if (A.typespec().is_float() && B.typespec().is_float()) {
262             float result = *(float *)A.data() / *(float *)B.data();
263             cind = rop.add_constant (R.typespec(), &result);
264         } else if (A.typespec().is_int() && B.typespec().is_float()) {
265             float result = *(int *)A.data() / *(float *)B.data();
266             cind = rop.add_constant (R.typespec(), &result);
267         } else if (A.typespec().is_triple() && B.typespec().is_triple()) {
268             Vec3 result = *(Vec3 *)A.data() / *(Vec3 *)B.data();
269             cind = rop.add_constant (R.typespec(), &result);
270         } else if (A.typespec().is_triple() && B.typespec().is_float()) {
271             Vec3 result = *(Vec3 *)A.data() / *(float *)B.data();
272             cind = rop.add_constant (R.typespec(), &result);
273         } else if (A.typespec().is_float() && B.typespec().is_triple()) {
274             float a = *(float *)A.data();
275             Vec3 result = Vec3(a,a,a) / *(Vec3 *)B.data();
276             cind = rop.add_constant (R.typespec(), &result);
277         }
278         if (cind >= 0) {
279             rop.turn_into_assign (op, cind, "const / const");
280             return 1;
281         }
282     }
283     return 0;
284 }
285 
286 
287 
DECLFOLDER(constfold_mod)288 DECLFOLDER(constfold_mod)
289 {
290     Opcode &op (rop.inst()->ops()[opnum]);
291     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
292     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
293 
294     if (rop.is_zero(A)) {
295         // R = 0 % B  =>   R = 0
296         rop.turn_into_assign_zero (op, "0 % A => 0");
297         return 1;
298     }
299     if (rop.is_zero(B)) {
300         // R = A % 0   =>   R = 0
301         rop.turn_into_assign_zero (op, "A % 0 => 0");
302         return 1;
303     }
304     if (A.is_constant() && B.is_constant() &&
305         A.typespec().is_int() && B.typespec().is_int()) {
306         int a = A.get_int();
307         int b = B.get_int();
308         int cind = rop.add_constant (b ? (a % b) : 0);
309         rop.turn_into_assign (op, cind, "const % const");
310         return 1;
311     }
312     return 0;
313 }
314 
315 
316 
DECLFOLDER(constfold_dot)317 DECLFOLDER(constfold_dot)
318 {
319     Opcode &op (rop.inst()->ops()[opnum]);
320     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
321     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
322 
323     // Dot with (0,0,0) -> 0
324     if (rop.is_zero(A) || rop.is_zero(B)) {
325         rop.turn_into_assign_zero (op, "dot(a,(0,0,0)) => 0");
326         return 1;
327     }
328 
329     // dot(const,const) -> const
330     if (A.is_constant() && B.is_constant()) {
331         OSL_DASSERT(A.typespec().is_triple() && B.typespec().is_triple());
332         float result = (*(Vec3 *)A.data()).dot (*(Vec3 *)B.data());
333         int cind = rop.add_constant (TypeDesc::TypeFloat, &result);
334         rop.turn_into_assign (op, cind, "dot(const,const)");
335         return 1;
336     }
337 
338     return 0;
339 }
340 
341 
342 
DECLFOLDER(constfold_neg)343 DECLFOLDER(constfold_neg)
344 {
345     Opcode &op (rop.inst()->ops()[opnum]);
346     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
347     if (A.is_constant()) {
348         if (A.typespec().is_int()) {
349             int result =  - *(int *)A.data();
350             int cind = rop.add_constant (A.typespec(), &result);
351             rop.turn_into_assign (op, cind, "-const");
352             return 1;
353         } else if (A.typespec().is_float()) {
354             float result =  - *(float *)A.data();
355             int cind = rop.add_constant (A.typespec(), &result);
356             rop.turn_into_assign (op, cind, "-const");
357             return 1;
358         } else if (A.typespec().is_triple()) {
359             Vec3 result = - *(Vec3 *)A.data();
360             int cind = rop.add_constant (A.typespec(), &result);
361             rop.turn_into_assign (op, cind, "-const");
362             return 1;
363         }
364     }
365     return 0;
366 }
367 
368 
369 
DECLFOLDER(constfold_abs)370 DECLFOLDER(constfold_abs)
371 {
372     Opcode &op (rop.inst()->ops()[opnum]);
373     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
374     if (A.is_constant()) {
375         if (A.typespec().is_int()) {
376             int result = std::abs(*(int *)A.data());
377             int cind = rop.add_constant (A.typespec(), &result);
378             rop.turn_into_assign (op, cind, "abs(const)");
379             return 1;
380         } else if (A.typespec().is_float()) {
381             float result =  std::abs(*(float *)A.data());
382             int cind = rop.add_constant (A.typespec(), &result);
383             rop.turn_into_assign (op, cind, "abs(const)");
384             return 1;
385         } else if (A.typespec().is_triple()) {
386             Vec3 result = *(Vec3 *)A.data();
387             result.x = std::abs(result.x);
388             result.y = std::abs(result.y);
389             result.z = std::abs(result.z);
390             int cind = rop.add_constant (A.typespec(), &result);
391             rop.turn_into_assign (op, cind, "abs(const)");
392             return 1;
393         }
394     }
395     return 0;
396 }
397 
398 
399 
DECLFOLDER(constfold_eq)400 DECLFOLDER(constfold_eq)
401 {
402     Opcode &op (rop.inst()->ops()[opnum]);
403     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
404     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
405     if (A.is_constant() && B.is_constant()) {
406         bool val = false;
407         if (equivalent (A.typespec(), B.typespec())) {
408             val = equal_consts (A, B);
409         } else if (A.typespec().is_float() && B.typespec().is_int()) {
410             val = (*(float *)A.data() == *(int *)B.data());
411         } else if (A.typespec().is_int() && B.typespec().is_float()) {
412             val = (*(int *)A.data() == *(float *)B.data());
413         } else {
414             return 0;  // unhandled cases
415         }
416         // Turn the 'eq R A B' into 'assign R X' where X is 0 or 1.
417         static const int int_zero = 0, int_one = 1;
418         int cind = rop.add_constant (TypeDesc::TypeInt,
419                                      val ? &int_one : &int_zero);
420         rop.turn_into_assign (op, cind, "const == const");
421         return 1;
422     }
423     return 0;
424 }
425 
426 
427 
DECLFOLDER(constfold_neq)428 DECLFOLDER(constfold_neq)
429 {
430     Opcode &op (rop.inst()->ops()[opnum]);
431     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
432     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
433     if (A.is_constant() && B.is_constant()) {
434         bool val = false;
435         if (equivalent (A.typespec(), B.typespec())) {
436             val = ! equal_consts (A, B);
437         } else if (A.typespec().is_float() && B.typespec().is_int()) {
438             val = (*(float *)A.data() != *(int *)B.data());
439         } else if (A.typespec().is_int() && B.typespec().is_float()) {
440             val = (*(int *)A.data() != *(float *)B.data());
441         } else {
442             return 0;  // unhandled case
443         }
444         // Turn the 'neq R A B' into 'assign R X' where X is 0 or 1.
445         static const int int_zero = 0, int_one = 1;
446         int cind = rop.add_constant (TypeDesc::TypeInt,
447                                      val ? &int_one : &int_zero);
448         rop.turn_into_assign (op, cind, "const != const");
449         return 1;
450     }
451     return 0;
452 }
453 
454 
455 
DECLFOLDER(constfold_lt)456 DECLFOLDER(constfold_lt)
457 {
458     static const int int_zero = 0, int_one = 1;
459     Opcode &op (rop.inst()->ops()[opnum]);
460     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
461     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
462     const TypeSpec &ta (A.typespec());
463     const TypeSpec &tb (B.typespec());
464     if (A.is_constant() && B.is_constant()) {
465         // Turn the 'leq R A B' into 'assign R X' where X is 0 or 1.
466         bool val = false;
467         if (ta.is_float() && tb.is_float()) {
468             val = (*(float *)A.data() < *(float *)B.data());
469         } else if (ta.is_float() && tb.is_int()) {
470             val = (*(float *)A.data() < *(int *)B.data());
471         } else if (ta.is_int() && tb.is_float()) {
472             val = (*(int *)A.data() < *(float *)B.data());
473         } else if (ta.is_int() && tb.is_int()) {
474             val = (*(int *)A.data() < *(int *)B.data());
475         } else {
476             return 0;  // unhandled case
477         }
478         int cind = rop.add_constant (TypeDesc::TypeInt,
479                                      val ? &int_one : &int_zero);
480         rop.turn_into_assign (op, cind, "const < const");
481         return 1;
482     }
483     return 0;
484 }
485 
486 
487 
DECLFOLDER(constfold_le)488 DECLFOLDER(constfold_le)
489 {
490     static const int int_zero = 0, int_one = 1;
491     Opcode &op (rop.inst()->ops()[opnum]);
492     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
493     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
494     const TypeSpec &ta (A.typespec());
495     const TypeSpec &tb (B.typespec());
496     if (A.is_constant() && B.is_constant()) {
497         // Turn the 'leq R A B' into 'assign R X' where X is 0 or 1.
498         bool val = false;
499         if (ta.is_float() && tb.is_float()) {
500             val = (*(float *)A.data() <= *(float *)B.data());
501         } else if (ta.is_float() && tb.is_int()) {
502             val = (*(float *)A.data() <= *(int *)B.data());
503         } else if (ta.is_int() && tb.is_float()) {
504             val = (*(int *)A.data() <= *(float *)B.data());
505         } else if (ta.is_int() && tb.is_int()) {
506             val = (*(int *)A.data() <= *(int *)B.data());
507         } else {
508             return 0;  // unhandled case
509         }
510         int cind = rop.add_constant (TypeDesc::TypeInt,
511                                      val ? &int_one : &int_zero);
512         rop.turn_into_assign (op, cind, "const <= const");
513         return 1;
514     }
515     return 0;
516 }
517 
518 
519 
DECLFOLDER(constfold_gt)520 DECLFOLDER(constfold_gt)
521 {
522     static const int int_zero = 0, int_one = 1;
523     Opcode &op (rop.inst()->ops()[opnum]);
524     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
525     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
526     const TypeSpec &ta (A.typespec());
527     const TypeSpec &tb (B.typespec());
528     if (A.is_constant() && B.is_constant()) {
529         // Turn the 'gt R A B' into 'assign R X' where X is 0 or 1.
530         bool val = false;
531         if (ta.is_float() && tb.is_float()) {
532             val = (*(float *)A.data() > *(float *)B.data());
533         } else if (ta.is_float() && tb.is_int()) {
534             val = (*(float *)A.data() > *(int *)B.data());
535         } else if (ta.is_int() && tb.is_float()) {
536             val = (*(int *)A.data() > *(float *)B.data());
537         } else if (ta.is_int() && tb.is_int()) {
538             val = (*(int *)A.data() > *(int *)B.data());
539         } else {
540             return 0;  // unhandled case
541         }
542         int cind = rop.add_constant (TypeDesc::TypeInt,
543                                      val ? &int_one : &int_zero);
544         rop.turn_into_assign (op, cind, "const > const");
545         return 1;
546     }
547     return 0;
548 }
549 
550 
551 
DECLFOLDER(constfold_ge)552 DECLFOLDER(constfold_ge)
553 {
554     static const int int_zero = 0, int_one = 1;
555     Opcode &op (rop.inst()->ops()[opnum]);
556     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
557     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
558     const TypeSpec &ta (A.typespec());
559     const TypeSpec &tb (B.typespec());
560     if (A.is_constant() && B.is_constant()) {
561         // Turn the 'leq R A B' into 'assign R X' where X is 0 or 1.
562         bool val = false;
563         if (ta.is_float() && tb.is_float()) {
564             val = (*(float *)A.data() >= *(float *)B.data());
565         } else if (ta.is_float() && tb.is_int()) {
566             val = (*(float *)A.data() >= *(int *)B.data());
567         } else if (ta.is_int() && tb.is_float()) {
568             val = (*(int *)A.data() >= *(float *)B.data());
569         } else if (ta.is_int() && tb.is_int()) {
570             val = (*(int *)A.data() >= *(int *)B.data());
571         } else {
572             return 0;  // unhandled case
573         }
574         int cind = rop.add_constant (TypeDesc::TypeInt,
575                                      val ? &int_one : &int_zero);
576         rop.turn_into_assign (op, cind, "const >= const");
577         return 1;
578     }
579     return 0;
580 }
581 
582 
583 
DECLFOLDER(constfold_or)584 DECLFOLDER(constfold_or)
585 {
586     Opcode &op (rop.inst()->ops()[opnum]);
587     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
588     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
589     if (A.is_constant() && B.is_constant()) {
590         OSL_DASSERT(A.typespec().is_int() && B.typespec().is_int());
591         bool val = *(int *)A.data() || *(int *)B.data();
592         // Turn the 'or R A B' into 'assign R X' where X is 0 or 1.
593         static const int int_zero = 0, int_one = 1;
594         int cind = rop.add_constant (TypeDesc::TypeInt,
595                                      val ? &int_one : &int_zero);
596         rop.turn_into_assign (op, cind, "const || const");
597         return 1;
598     }
599     return 0;
600 }
601 
602 
603 
DECLFOLDER(constfold_and)604 DECLFOLDER(constfold_and)
605 {
606     Opcode &op (rop.inst()->ops()[opnum]);
607     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
608     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
609     if (A.is_constant() && B.is_constant()) {
610         // Turn the 'and R A B' into 'assign R X' where X is 0 or 1.
611         OSL_DASSERT(A.typespec().is_int() && B.typespec().is_int());
612         bool val = *(int *)A.data() && *(int *)B.data();
613         static const int int_zero = 0, int_one = 1;
614         int cind = rop.add_constant (TypeDesc::TypeInt,
615                                      val ? &int_one : &int_zero);
616         rop.turn_into_assign (op, cind, "const && const");
617         return 1;
618     }
619     return 0;
620 }
621 
622 
623 
DECLFOLDER(constfold_bitand)624 DECLFOLDER(constfold_bitand)
625 {
626     Opcode &op (rop.op(opnum));
627     Symbol &A (*rop.opargsym(op, 1));
628     Symbol &B (*rop.opargsym(op, 2));
629     if (A.is_constant() && B.is_constant()) {
630         // Turn the 'bitand R A B' into 'assign R X'.
631         OSL_DASSERT(A.typespec().is_int() && B.typespec().is_int());
632         int cind = rop.add_constant (A.get_int() & B.get_int());
633         rop.turn_into_assign (op, cind, "const & const");
634         return 1;
635     }
636     return 0;
637 }
638 
639 
640 
DECLFOLDER(constfold_bitor)641 DECLFOLDER(constfold_bitor)
642 {
643     Opcode &op (rop.op(opnum));
644     Symbol &A (*rop.opargsym(op, 1));
645     Symbol &B (*rop.opargsym(op, 2));
646     if (A.is_constant() && B.is_constant()) {
647         // Turn the 'bitor R A B' into 'assign R X'.
648         OSL_DASSERT(A.typespec().is_int() && B.typespec().is_int());
649         int cind = rop.add_constant (A.get_int() | B.get_int());
650         rop.turn_into_assign (op, cind, "const | const");
651         return 1;
652     }
653     return 0;
654 }
655 
656 
657 
DECLFOLDER(constfold_xor)658 DECLFOLDER(constfold_xor)
659 {
660     Opcode &op (rop.op(opnum));
661     Symbol &A (*rop.opargsym(op, 1));
662     Symbol &B (*rop.opargsym(op, 2));
663     if (A.is_constant() && B.is_constant()) {
664         // Turn the 'xor R A B' into 'assign R X'.
665         OSL_DASSERT(A.typespec().is_int() && B.typespec().is_int());
666         int cind = rop.add_constant (A.get_int() ^ B.get_int());
667         rop.turn_into_assign (op, cind, "const ^ const");
668         return 1;
669     }
670     return 0;
671 }
672 
673 
674 
DECLFOLDER(constfold_compl)675 DECLFOLDER(constfold_compl)
676 {
677     Opcode &op (rop.op(opnum));
678     Symbol &A (*rop.opargsym(op, 1));
679     if (A.is_constant()) {
680         // Turn the 'compl R A' into 'assign R X'.
681         OSL_DASSERT(A.typespec().is_int());
682         int cind = rop.add_constant (~(A.get_int()));
683         rop.turn_into_assign (op, cind, "~const");
684         return 1;
685     }
686     return 0;
687 }
688 
689 
690 
DECLFOLDER(constfold_if)691 DECLFOLDER(constfold_if)
692 {
693     Opcode &op (rop.inst()->ops()[opnum]);
694     Symbol &C (*rop.inst()->argsymbol(op.firstarg()+0));
695     if (C.is_constant()) {
696         int result = -1;   // -1 == we don't know
697         if (C.typespec().is_int())
698             result = (((int *)C.data())[0] != 0);
699         else if (C.typespec().is_float())
700             result = (((float *)C.data())[0] != 0.0f);
701         else if (C.typespec().is_triple())
702             result = (((Vec3 *)C.data())[0] != Vec3(0,0,0));
703         else if (C.typespec().is_string()) {
704             ustring s = ((ustring *)C.data())[0];
705             result = (s.length() != 0);
706         }
707         int changed = 0;
708         if (result > 0) {
709             changed += rop.turn_into_nop (op.jump(0), op.jump(1), "elide 'else'");
710             changed += rop.turn_into_nop (op, "elide 'else'");
711         } else if (result == 0) {
712             changed += rop.turn_into_nop (opnum, op.jump(0), "elide 'if'");
713         }
714         return changed;
715     }
716 
717     // Eliminate 'if' that contains no statements to execute
718     int jump = op.farthest_jump ();
719     bool only_nops = true;
720     for (int i = opnum+1;  i < jump && only_nops;  ++i)
721         only_nops &= (rop.inst()->ops()[i].opname() == u_nop);
722     if (only_nops) {
723         rop.turn_into_nop (op, "'if' with no body");
724         return 1;
725     }
726 
727     return 0;
728 }
729 
730 
731 
732 // Is an array known to have all elements having the same value?
733 static bool
array_all_elements_equal(const Symbol & s)734 array_all_elements_equal (const Symbol &s)
735 {
736     TypeDesc t = s.typespec().simpletype();
737     size_t size = t.elementsize();
738     size_t n = t.numelements();
739     for (size_t i = 1;  i < n;  ++i)
740         if (memcmp ((const char *)s.data(), (const char *)s.data()+i*size, size))
741             return false;
742     return true;
743 }
744 
745 
746 
DECLFOLDER(constfold_aref)747 DECLFOLDER(constfold_aref)
748 {
749     Opcode &op (rop.inst()->ops()[opnum]);
750     Symbol &R (*rop.inst()->argsymbol(op.firstarg()+0));
751     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
752     Symbol &Index (*rop.inst()->argsymbol(op.firstarg()+2));
753     OSL_DASSERT(A.typespec().is_array() && Index.typespec().is_int());
754 
755     // Try to turn R=A[I] into R=C if A and I are const.
756     if (A.is_constant() && Index.is_constant()) {
757         TypeSpec elemtype = A.typespec().elementtype();
758         OSL_ASSERT (equivalent(elemtype, R.typespec()));
759         const int length = A.typespec().arraylength();
760         const int orig_index = *(int *)Index.data(), index = OIIO::clamp(orig_index, 0, length - 1);
761         OSL_DASSERT(index >=0 && index < length);
762         int cind = rop.add_constant (elemtype,
763                         (char *)A.data() + index*elemtype.simpletype().size());
764         rop.turn_into_assign (op, cind, "aref const fold: const_array[const]");
765         if (rop.inst()->master()->range_checking() && index != orig_index) {
766             // the original index was out of range, and the user cares about reporting errors
767             const int args_to_add[] = {
768                     rop.add_constant(u_fmt_range_check),
769                     rop.add_constant(orig_index),
770                     rop.add_constant(A.unmangled()),
771                     rop.add_constant(length - 1),
772                     rop.add_constant(op.sourcefile()),
773                     rop.add_constant(op.sourceline()),
774                     rop.add_constant(rop.group().name()),
775                     rop.add_constant(rop.layer()),
776                     rop.add_constant(rop.inst()->layername()),
777                     rop.add_constant(ustring(rop.inst()->shadername()))
778             };
779             rop.insert_code(opnum, u_error, args_to_add,
780                          RuntimeOptimizer::RecomputeRWRanges,
781                          RuntimeOptimizer::GroupWithNext);
782             Opcode &newop (rop.inst()->ops()[opnum]);
783             newop.argreadonly(0);
784         }
785         return 1;
786     }
787     // Even if the index isn't constant, we still know the answer if all
788     // the array elements are equal!
789     if (A.is_constant() && array_all_elements_equal(A)) {
790         TypeSpec elemtype = A.typespec().elementtype();
791         OSL_ASSERT (equivalent(elemtype, R.typespec()));
792         int cind = rop.add_constant (elemtype, (char *)A.data());
793         rop.turn_into_assign (op, cind, "aref of elements-equal array");
794         return 1;
795     }
796     return 0;
797 }
798 
799 
800 
DECLFOLDER(constfold_arraylength)801 DECLFOLDER(constfold_arraylength)
802 {
803     Opcode &op (rop.inst()->ops()[opnum]);
804     OSL_MAYBE_UNUSED Symbol &R (*rop.inst()->argsymbol(op.firstarg()+0));
805     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
806     OSL_DASSERT (R.typespec().is_int() && A.typespec().is_array());
807 
808     // Try to turn R=arraylength(A) into R=C if the array length is known
809     int len = A.typespec().is_unsized_array() ? A.initializers()
810                                               : A.typespec().arraylength();
811     if (len > 0) {
812         int cind = rop.add_constant (TypeSpec(TypeDesc::INT), &len);
813         rop.turn_into_assign (op, cind, "const fold arraylength");
814         return 1;
815     }
816     return 0;
817 }
818 
819 
820 
DECLFOLDER(constfold_aassign)821 DECLFOLDER(constfold_aassign)
822 {
823     // Array assignment
824     Opcode &op (rop.inst()->ops()[opnum]);
825     Symbol *R (rop.inst()->argsymbol(op.firstarg()+0));
826     Symbol *I (rop.inst()->argsymbol(op.firstarg()+1));
827     Symbol *C (rop.inst()->argsymbol(op.firstarg()+2));
828     if (! I->is_constant() || !C->is_constant())
829         return 0;  // not much we can do if not assigning constants
830     OSL_DASSERT (R->typespec().is_array() && I->typespec().is_int());
831 
832     TypeSpec elemtype = R->typespec().elementtype();
833     if (elemtype.is_closure())
834         return 0;   // don't worry about closures
835     TypeDesc elemsimpletype = elemtype.simpletype();
836 
837     // Look for patterns where all array elements are assigned in
838     // succession within the same block, in which case we can turn the
839     // result into a constant!
840     int len = R->typespec().arraylength();
841     if (len <= 0)
842         return 0;  // don't handle arrays of unknown length
843     int elemsize = (int)elemsimpletype.size();
844     std::vector<int> index_assigned (len, -1);
845     std::vector<char> filled_values (elemsize * len);  // constant storage
846     char *fill = (char *)&filled_values[0];
847     int num_assigned = 0;
848     int opindex = opnum;
849     int highestop = opindex;
850     for ( ; ; ) {
851         Opcode &opi (rop.inst()->ops()[opindex]);
852         if (opi.opname() != u_aassign)
853             break;   // not a successive aassign op
854         Symbol *Ri (rop.inst()->argsymbol(opi.firstarg()+0));
855         if (Ri != R)
856             break;   // not a compassign to the same variable
857         Symbol *Ii (rop.inst()->argsymbol(opi.firstarg()+1));
858         Symbol *Ci (rop.inst()->argsymbol(opi.firstarg()+2));
859         if (! Ii->is_constant() || !Ci->is_constant())
860             break;   // not assigning constants
861         int indexval = *(int *)Ii->data();
862         if (indexval < 0 || indexval >= len)
863             break;  // out of range index; let runtime deal with it
864         if (equivalent(elemtype, Ci->typespec())) {
865             // equivalent types
866             memcpy (fill + indexval*elemsize, Ci->data(), elemsize);
867         } else if (elemtype.is_float() && Ci->typespec().is_int()) {
868             // special case of float[i] = int
869             float c = Ci->typespec().is_int() ? *(int *)Ci->data()
870                                               : *(float *)Ci->data();
871             ((float *)fill)[indexval] = c;
872         } else {
873             break;   // a case we don't handle
874         }
875         if (index_assigned[indexval] < 0)
876             ++num_assigned;
877         index_assigned[indexval] = opindex;
878         highestop = opindex;
879         opindex = rop.next_block_instruction(opindex);
880         if (! opindex)
881             break;
882     }
883     if (num_assigned == len) {
884         // woo-hoo! we had a succession of constant aassign ops to the
885         // same variable, filling in all indices. Turn the whole shebang
886         // into a single assignment.
887         int cind = rop.add_constant (R->typespec(), fill);
888         rop.turn_into_assign (op, cind, "replaced element-by-element assignment");
889         rop.turn_into_nop (opnum+1, highestop+1, "replaced element-by-element assignment");
890         return highestop+1-opnum;
891     }
892 
893     return 0;
894 }
895 
896 
897 
DECLFOLDER(constfold_compassign)898 DECLFOLDER(constfold_compassign)
899 {
900     // Component assignment
901     Opcode &op (rop.inst()->ops()[opnum]);
902     Symbol *R (rop.inst()->argsymbol(op.firstarg()+0));
903     Symbol *I (rop.inst()->argsymbol(op.firstarg()+1));
904     Symbol *C (rop.inst()->argsymbol(op.firstarg()+2));
905     if (! I->is_constant() || !C->is_constant())
906         return 0;  // not much we can do if not assigning constants
907     OSL_DASSERT (R->typespec().is_triple() && I->typespec().is_int() &&
908                  (C->typespec().is_float() || C->typespec().is_int()));
909 
910     // We are obviously not assigning to a constant, but it could be
911     // that at this point in our current block, the value of A is known,
912     // and that will show up as a block alias.
913     int Aalias = rop.block_alias (rop.inst()->arg(op.firstarg()+0));
914     Symbol *AA = rop.inst()->symbol(Aalias);
915     // N.B. symbol returns NULL if Aalias is < 0
916 
917     // Try to simplify A[I]=C if we already know the old value of A as a
918     // constant. We can turn it into A[I] = N, where N is the old A but with
919     // the Ith component set to C. If it turns out that the old A[I] == C,
920     // and thus the assignment doesn't change A's value, we can eliminate
921     // the assignment entirely.
922     if (AA && AA->is_constant()) {
923         OSL_DASSERT (AA->typespec().is_triple());
924         int index = *(int *)I->data();
925         if (index < 0 || index >= 3) {
926             // We are indexing a const triple out of range.  But this
927             // isn't necessarily a reportable error, because it may be a
928             // code path that will never be taken.  Punt -- don't
929             // optimize this op, leave it to the execute-time range
930             // check to catch, if indeed it is a problem.
931             return 0;
932         }
933         float *aa = (float *)AA->data();
934         float c = C->typespec().is_int() ? *(int *)C->data()
935                                          : *(float *)C->data();
936         if (aa[index] == c) {
937             // If the component assignment doesn't change that component,
938             // just omit the op entirely.
939             rop.turn_into_nop (op, "useless compassign");
940             return 1;
941         }
942         // If the previous value of the triple was a constant, and we're
943         // assigning a new constant to one component (and the index is
944         // also a constant), just turn it into an assignment of a new
945         // constant triple.
946         Vec3 newval (aa[0], aa[1], aa[2]);
947         newval[index] = c;
948         int cind = rop.add_constant (AA->typespec(), &newval);
949         rop.turn_into_assign (op, cind, "fold compassign");
950         return 1;
951     }
952 
953     // Look for patterns where all three components are assigned in
954     // succession within the same block, in which case we can turn the
955     // result into a constant!
956     int index_assigned[3] = { -1, -1, -1 };
957     float filled_values[3];
958     int num_assigned = 0;
959     int opindex = opnum;
960     int highestop = opindex;
961     for ( ; ; ) {
962         Opcode &opi (rop.inst()->ops()[opindex]);
963         if (opi.opname() != u_compassign)
964             break;   // not a successive compassign op
965         Symbol *Ri (rop.inst()->argsymbol(opi.firstarg()+0));
966         if (Ri != R)
967             break;   // not a compassign to the same variable
968         Symbol *Ii (rop.inst()->argsymbol(opi.firstarg()+1));
969         Symbol *Ci (rop.inst()->argsymbol(opi.firstarg()+2));
970         if (! Ii->is_constant() || !Ci->is_constant())
971             break;   // not assigning constants
972         int indexval = *(int *)Ii->data();
973         if (indexval < 0 || indexval >= 3)
974             break;  // out of range index; let runtime deal with it
975         float c = Ci->typespec().is_int() ? *(int *)Ci->data()
976                                           : *(float *)Ci->data();
977         filled_values[indexval] = c;
978         if (index_assigned[indexval] < 0)
979             ++num_assigned;
980         index_assigned[indexval] = opindex;
981         highestop = opindex;
982         opindex = rop.next_block_instruction(opindex);
983         if (! opindex)
984             break;
985     }
986     if (num_assigned == 3) {
987         // woo-hoo! we had a succession of constant compassign ops to the
988         // same variable, filling in all indices. Turn the whole shebang
989         // into a single assignment.
990         int cind = rop.add_constant (R->typespec(), filled_values);
991         rop.turn_into_assign (op, cind, "replaced element-by-element assignment");
992         rop.turn_into_nop (opnum+1, highestop+1, "replaced element-by-element assignment");
993         return highestop+1-opnum;
994     }
995 
996     return 0;
997 }
998 
999 
1000 
DECLFOLDER(constfold_mxcompassign)1001 DECLFOLDER(constfold_mxcompassign)
1002 {
1003     // Matrix component assignment
1004     Opcode &op (rop.inst()->ops()[opnum]);
1005     Symbol *R (rop.inst()->argsymbol(op.firstarg()+0));
1006     Symbol *J (rop.inst()->argsymbol(op.firstarg()+1));
1007     Symbol *I (rop.inst()->argsymbol(op.firstarg()+2));
1008     Symbol *C (rop.inst()->argsymbol(op.firstarg()+3));
1009     if (! J->is_constant() || ! I->is_constant() || !C->is_constant())
1010         return 0;  // not much we can do if not assigning constants
1011     OSL_DASSERT (R->typespec().is_matrix() &&
1012                  J->typespec().is_int() && I->typespec().is_int() &&
1013                  (C->typespec().is_float() || C->typespec().is_int()));
1014 
1015     // We are obviously not assigning to a constant, but it could be
1016     // that at this point in our current block, the value of A is known,
1017     // and that will show up as a block alias.
1018     int Aalias = rop.block_alias (rop.inst()->arg(op.firstarg()+0));
1019     Symbol *AA = rop.inst()->symbol(Aalias);
1020     // N.B. symbol returns NULL if Aalias is < 0
1021 
1022     // Try to simplify A[J,I]=C if we already know the old value of A as a
1023     // constant. We can turn it into A[J,I] = N, where N is the old A but with
1024     // the designated component set to C. If it turns out that the old
1025     // A[J,I] == C, and thus the assignment doesn't change A's value, we can
1026     // eliminate the assignment entirely.
1027     if (AA && AA->is_constant()) {
1028         OSL_DASSERT (AA->typespec().is_matrix());
1029         int jndex = *(int *)J->data();
1030         int index = *(int *)I->data();
1031         if (index < 0 || index >= 3 || jndex < 0 || jndex >= 3) {
1032             // We are indexing a const matrix out of range.  But this
1033             // isn't necessarily a reportable error, because it may be a
1034             // code path that will never be taken.  Punt -- don't
1035             // optimize this op, leave it to the execute-time range
1036             // check to catch, if indeed it is a problem.
1037             return 0;
1038         }
1039         Matrix44 *aa = (Matrix44 *)AA->data();
1040         float c = C->typespec().is_int() ? *(int *)C->data()
1041                                          : *(float *)C->data();
1042         if ((*aa)[jndex][index] == c) {
1043             // If the component assignment doesn't change that component,
1044             // just omit the op entirely.
1045             rop.turn_into_nop (op, "useless mxcompassign");
1046             return 1;
1047         }
1048         // If the previous value of the matrix was a constant, and we're
1049         // assigning a new constant to one component (and the index is
1050         // also a constant), just turn it into an assignment of a new
1051         // constant triple.
1052         Matrix44 newval = *aa;
1053         newval[jndex][index] = c;
1054         int cind = rop.add_constant (AA->typespec(), &newval);
1055         rop.turn_into_assign (op, cind, "fold mxcompassign");
1056         return 1;
1057     }
1058 
1059     // Look for patterns where all 16 components are assigned in
1060     // succession within the same block, in which case we can turn the
1061     // result into a constant!
1062     int index_assigned[4][4] = { {-1, -1, -1, -1}, {-1, -1, -1, -1},
1063                                  {-1, -1, -1, -1}, {-1, -1, -1, -1} };
1064     float filled_values[4][4];
1065     int num_assigned = 0;
1066     int opindex = opnum;
1067     int highestop = opindex;
1068     for ( ; ; ) {
1069         Opcode &opi (rop.inst()->ops()[opindex]);
1070         if (opi.opname() != u_mxcompassign)
1071             break;   // not a successive mxcompassign op
1072         Symbol *Ri (rop.inst()->argsymbol(opi.firstarg()+0));
1073         if (Ri != R)
1074             break;   // not a mxcompassign to the same variable
1075         Symbol *Ji (rop.inst()->argsymbol(opi.firstarg()+1));
1076         Symbol *Ii (rop.inst()->argsymbol(opi.firstarg()+2));
1077         Symbol *Ci (rop.inst()->argsymbol(opi.firstarg()+3));
1078         if (! Ji->is_constant() || ! Ii->is_constant() || !Ci->is_constant())
1079             break;   // not assigning constants
1080         int jndexval = *(int *)Ji->data();
1081         int indexval = *(int *)Ii->data();
1082         if (jndexval < 0 || jndexval >= 4 || indexval < 0 || indexval >= 4)
1083             break;  // out of range index; let runtime deal with it
1084         float c = Ci->typespec().is_int() ? *(int *)Ci->data()
1085                                           : *(float *)Ci->data();
1086         filled_values[jndexval][indexval] = c;
1087         if (index_assigned[jndexval][indexval] < 0)
1088             ++num_assigned;
1089         index_assigned[jndexval][indexval] = opindex;
1090         highestop = opindex;
1091         opindex = rop.next_block_instruction(opindex);
1092         if (! opindex)
1093             break;
1094     }
1095     if (num_assigned == 16) {
1096         // woo-hoo! we had a succession of constant mxcompassign ops to the
1097         // same variable, filling in all indices. Turn the whole shebang
1098         // into a single assignment.
1099         int cind = rop.add_constant (R->typespec(), filled_values);
1100         rop.turn_into_assign (op, cind, "replaced element-by-element assignment");
1101         rop.turn_into_nop (opnum+1, highestop+1, "replaced element-by-element assignment");
1102         return highestop+1-opnum;
1103     }
1104 
1105     return 0;
1106 }
1107 
1108 
1109 
DECLFOLDER(constfold_compref)1110 DECLFOLDER(constfold_compref)
1111 {
1112     // Component reference
1113     // Try to turn R=A[I] into R=C if A and I are const.
1114     Opcode &op (rop.inst()->ops()[opnum]);
1115     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
1116     Symbol &Index (*rop.inst()->argsymbol(op.firstarg()+2));
1117     if (A.is_constant() && Index.is_constant()) {
1118         OSL_DASSERT (A.typespec().is_triple() && Index.typespec().is_int());
1119         int index = *(int *)Index.data();
1120         if (index < 0 || index >= 3) {
1121             // We are indexing a const triple out of range.  But this
1122             // isn't necessarily a reportable error, because it may be a
1123             // code path that will never be taken.  Punt -- don't
1124             // optimize this op, leave it to the execute-time range
1125             // check to catch, if indeed it is a problem.
1126             return 0;
1127         }
1128         int cind = rop.add_constant (TypeDesc::TypeFloat, (float *)A.data() + index);
1129         rop.turn_into_assign (op, cind, "const_triple[const]");
1130         return 1;
1131     }
1132     return 0;
1133 }
1134 
1135 
1136 
DECLFOLDER(constfold_strlen)1137 DECLFOLDER(constfold_strlen)
1138 {
1139     // Try to turn R=strlen(s) into R=C
1140     Opcode &op (rop.inst()->ops()[opnum]);
1141     Symbol &S (*rop.inst()->argsymbol(op.firstarg()+1));
1142     if (S.is_constant()) {
1143         OSL_DASSERT (S.typespec().is_string());
1144         int result = (int) (*(ustring *)S.data()).length();
1145         int cind = rop.add_constant (result);
1146         rop.turn_into_assign (op, cind, "const fold strlen");
1147         return 1;
1148     }
1149     return 0;
1150 }
1151 
1152 
1153 
DECLFOLDER(constfold_hash)1154 DECLFOLDER(constfold_hash)
1155 {
1156     // Try to turn R=hash(s) into R=C
1157     Opcode &op (rop.inst()->ops()[opnum]);
1158     Symbol *S (rop.inst()->argsymbol(op.firstarg()+1));
1159     Symbol *T (op.nargs() > 2 ? rop.inst()->argsymbol(op.firstarg()+2) : NULL);
1160     if (S->is_constant() && (T == NULL || T->is_constant())) {
1161         int cind = -1;
1162         if (S->typespec().is_string()) {
1163             cind = rop.add_constant ((int) (*(ustring *)S->data()).hash());
1164         } else if (op.nargs() == 2 && S->typespec().is_int()) {
1165             cind = rop.add_constant (inthashi (S->get_int()));
1166         } else if (op.nargs() == 2 && S->typespec().is_float()) {
1167             cind = rop.add_constant (inthashf (S->get_float()));
1168         } else if (op.nargs() == 3 && S->typespec().is_float() && T->typespec().is_float()) {
1169             cind = rop.add_constant (inthashf (S->get_float(), T->get_float()));
1170         } else if (op.nargs() == 2 && S->typespec().is_triple()) {
1171             cind = rop.add_constant (inthashf ((const float *)S->data()));
1172         } else if (op.nargs() == 3 && S->typespec().is_triple() && T->typespec().is_float()) {
1173             cind = rop.add_constant (inthashf ((const float *)S->data(), T->get_float()));
1174         }
1175         if (cind >= 0) {
1176             rop.turn_into_assign (op, cind, "const fold hash");
1177             return 1;
1178         }
1179     }
1180     return 0;
1181 }
1182 
1183 
1184 
DECLFOLDER(constfold_getchar)1185 DECLFOLDER(constfold_getchar)
1186 {
1187     // Try to turn R=getchar(s,i) into R=C
1188     Opcode &op (rop.inst()->ops()[opnum]);
1189     Symbol &S (*rop.inst()->argsymbol(op.firstarg()+1));
1190     Symbol &I (*rop.inst()->argsymbol(op.firstarg()+2));
1191     if (S.is_constant() && I.is_constant()) {
1192         OSL_DASSERT (S.typespec().is_string() && I.typespec().is_int());
1193         int idx = (int) (*(int *)I.data());
1194         int len = (int) (*(ustring *)S.data()).length();
1195         int result = idx >= 0 && idx < len ? (*(ustring *)S.data()).c_str()[idx] : 0;
1196         int cind = rop.add_constant (result);
1197         rop.turn_into_assign (op, cind, "const fold getchar");
1198         return 1;
1199     }
1200     return 0;
1201 }
1202 
1203 
1204 
DECLFOLDER(constfold_endswith)1205 DECLFOLDER(constfold_endswith)
1206 {
1207     // Try to turn R=endswith(s,e) into R=C
1208     Opcode &op (rop.inst()->ops()[opnum]);
1209     Symbol &S (*rop.inst()->argsymbol(op.firstarg()+1));
1210     Symbol &E (*rop.inst()->argsymbol(op.firstarg()+2));
1211     if (S.is_constant() && E.is_constant()) {
1212         OSL_DASSERT (S.typespec().is_string() && E.typespec().is_string());
1213         ustring s = *(ustring *)S.data();
1214         ustring e = *(ustring *)E.data();
1215         size_t elen = e.length(), slen = s.length();
1216         int result = 0;
1217         if (elen <= slen)
1218             result = (strncmp (s.c_str()+slen-elen, e.c_str(), elen) == 0);
1219         int cind = rop.add_constant (result);
1220         rop.turn_into_assign (op, cind, "const fold endswith");
1221         return 1;
1222     }
1223     return 0;
1224 }
1225 
1226 
1227 
DECLFOLDER(constfold_stoi)1228 DECLFOLDER(constfold_stoi)
1229 {
1230     // Try to turn R=stoi(s) into R=C
1231     Opcode &op (rop.inst()->ops()[opnum]);
1232     Symbol &S (*rop.inst()->argsymbol(op.firstarg()+1));
1233     if (S.is_constant()) {
1234         OSL_DASSERT (S.typespec().is_string());
1235         ustring s = *(ustring *)S.data();
1236         int cind = rop.add_constant (Strutil::from_string<int>(s));
1237         rop.turn_into_assign (op, cind, "const fold stoi");
1238         return 1;
1239     }
1240     return 0;
1241 }
1242 
1243 
1244 
DECLFOLDER(constfold_stof)1245 DECLFOLDER(constfold_stof)
1246 {
1247     // Try to turn R=stof(s) into R=C
1248     Opcode &op (rop.inst()->ops()[opnum]);
1249     Symbol &S (*rop.inst()->argsymbol(op.firstarg()+1));
1250     if (S.is_constant()) {
1251         OSL_DASSERT (S.typespec().is_string());
1252         ustring s = *(ustring *)S.data();
1253         int cind = rop.add_constant (Strutil::from_string<float>(s));
1254         rop.turn_into_assign (op, cind, "const fold stof");
1255         return 1;
1256     }
1257     return 0;
1258 }
1259 
1260 
1261 
DECLFOLDER(constfold_split)1262 DECLFOLDER(constfold_split)
1263 {
1264     Opcode &op (rop.inst()->ops()[opnum]);
1265     // Symbol &R (*rop.inst()->argsymbol(op.firstarg()+0));
1266     Symbol &Str (*rop.opargsym (op, 1));
1267     Symbol &Results (*rop.opargsym (op, 2));
1268     Symbol *Sep (rop.opargsym (op, 3));
1269     Symbol *Maxsplit (rop.opargsym (op, 4));
1270     if (Str.is_constant() && (!Sep || Sep->is_constant()) &&
1271                              (!Maxsplit || Maxsplit->is_constant())) {
1272         // The split string, separator string, and maxsplit are all constants.
1273         // Compute the results with Strutil::split.
1274         int resultslen = Results.typespec().arraylength();
1275         int maxsplit = Maxsplit ? *(int *)Maxsplit->data() : resultslen;
1276         maxsplit = std::min (maxsplit, resultslen);
1277         std::vector<std::string> splits;
1278         ustring sep = Sep ? (*(ustring *)Sep->data()) : ustring("");
1279         Strutil::split ((*(ustring *)Str.data()).string(), splits,
1280                         sep.string(), maxsplit);
1281         int n = std::min (std::max(0,maxsplit), (int)splits.size());
1282         // Temporarily stash the index of the symbol holding results
1283         int resultsarg = rop.inst()->args()[op.firstarg()+2];
1284         // Turn the 'split' into a straight assignment of the return value...
1285         rop.turn_into_assign (op, rop.add_constant(n));
1286         // Create a constant array holding the split results
1287         std::vector<ustring> usplits (resultslen);
1288         for (int i = 0;  i < n;  ++i)
1289             usplits[i] = ustring(splits[i]);
1290         int cind = rop.add_constant (TypeDesc(TypeDesc::STRING,resultslen),
1291                                      usplits.data());
1292         // And insert an instruction copying our constant array to the
1293         // user's results array.
1294         const int args[] = { resultsarg, cind };
1295         rop.insert_code (opnum, u_assign, args,
1296                          RuntimeOptimizer::RecomputeRWRanges,
1297                          RuntimeOptimizer::GroupWithNext);
1298         return 1;
1299     }
1300 
1301     return 0;
1302 }
1303 
1304 
1305 
DECLFOLDER(constfold_concat)1306 DECLFOLDER(constfold_concat)
1307 {
1308     // Try to turn R=concat(s,...) into R=C
1309     Opcode &op (rop.inst()->ops()[opnum]);
1310     ustring result;
1311     for (int i = 1;  i < op.nargs();  ++i) {
1312         Symbol &S (*rop.inst()->argsymbol(op.firstarg()+i));
1313         if (! S.is_constant())
1314             return 0;  // something non-constant
1315         ustring old = result;
1316         ustring s = *(ustring *)S.data();
1317         result = ustring::sprintf ("%s%s", old.c_str() ? old.c_str() : "",
1318                                   s.c_str() ? s.c_str() : "");
1319     }
1320     // If we made it this far, all args were constants, and the
1321     // concatenation is in result.
1322     int cind = rop.add_constant (TypeDesc::TypeString, &result);
1323     rop.turn_into_assign (op, cind, "const fold concat");
1324     return 1;
1325 }
1326 
1327 
1328 
DECLFOLDER(constfold_format)1329 DECLFOLDER(constfold_format)
1330 {
1331     // Try to turn R=format(fmt,...) into R=C
1332     Opcode &op (rop.inst()->ops()[opnum]);
1333     Symbol &Format (*rop.opargsym(op, 1));
1334     if (! Format.is_constant())
1335         return 0;
1336     ustring fmt = *(ustring *)Format.data();
1337 
1338     // split fmt into the prefix (the starting part of the string that we
1339     // haven't yet processed) and the suffix (the ending part that we've
1340     // fully processed).
1341     std::string prefix = fmt.string();
1342     std::string suffix;
1343     int args_expanded = 0;
1344 
1345     // While there is still a constant argument at the end of the arg list,
1346     // peel it off and use it to rewrite the format string.
1347     for (int argnum = op.nargs()-1; argnum >= 2; --argnum) {
1348         Symbol &Arg (*rop.opargsym(op, argnum));
1349         if (! Arg.is_constant())
1350             break;   // no more constants
1351 
1352         // find the last format specification
1353         size_t pos = std::string::npos;
1354         while (1) {
1355             pos = prefix.find_last_of ('%', pos); // find at or before pos
1356             if (pos == std::string::npos) {
1357                 // Fewer '%' tokens than arguments? Must be malformed. Punt.
1358                 return 0;
1359             }
1360             if (pos == 0 || prefix[pos-1] != '%') {
1361                 // we found the format specifier
1362                 break;
1363             }
1364             // False alarm! Beware of %% which is a literal % rather than a
1365             // format specifier. Back up and try again.
1366             if (pos >= 2)
1367                 pos -= 2;   // back up
1368             else {
1369                 // This can only happen if the %% is at the start of the
1370                 // format string, but it shouldn't be since there are still
1371                 // args to process. Punt.
1372                 return 0;
1373             }
1374         }
1375         OSL_ASSERT (pos < prefix.length() && prefix[pos] == '%');
1376 
1377         // cleave off the last format specification into mid
1378         std::string mid = std::string (prefix, pos);
1379         std::string formatted;
1380         const TypeSpec &argtype = Arg.typespec();
1381         if (argtype.is_int())
1382             formatted = Strutil::sprintf (mid.c_str(), *(int *)Arg.data());
1383         else if (argtype.is_float())
1384             formatted = Strutil::sprintf (mid.c_str(), *(float *)Arg.data());
1385         else if (argtype.is_triple())
1386             formatted = Strutil::sprintf (mid.c_str(), *(Vec3 *)Arg.data());
1387         else if (argtype.is_matrix())
1388             formatted = Strutil::sprintf (mid.c_str(), *(Matrix44 *)Arg.data());
1389         else if (argtype.is_string())
1390             formatted = Strutil::sprintf (mid.c_str(), *(ustring *)Arg.data());
1391         else
1392             break;   // something else we don't handle -- we're done
1393 
1394         // We were able to format, so rejigger the strings.
1395         prefix.erase (pos, std::string::npos);
1396         suffix = formatted + suffix;
1397         args_expanded += 1;
1398     }
1399 
1400     // Rewrite the op
1401     if (args_expanded == op.nargs()-2) {
1402         // Special case -- completely expanded, replace with a string
1403         // assignment
1404         int cind = rop.add_constant (ustring(prefix + suffix));
1405         rop.turn_into_assign (op, cind, "fully constant fold format()");
1406         return 1;
1407     } else if (args_expanded != 0) {
1408         // Partially expanded -- rewrite the instruction. It's actually
1409         // easier to turn this instruction into a nop and insert a new one.
1410         // Grab the previous arguments, drop the ones we folded, and
1411         // replace the format string with our new one.
1412         int *argstart = &rop.inst()->args()[0] + op.firstarg();
1413         std::vector<int> newargs (argstart, argstart + op.nargs() - args_expanded);
1414         newargs[1] = rop.add_constant (ustring(prefix + suffix));
1415         ustring opname = op.opname();
1416         rop.turn_into_nop (op, "partial constant fold format()");
1417         rop.insert_code (opnum, opname, newargs,
1418                          RuntimeOptimizer::RecomputeRWRanges);
1419         return 1;
1420     }
1421 
1422     return 0;
1423 }
1424 
1425 
1426 
DECLFOLDER(constfold_substr)1427 DECLFOLDER(constfold_substr)
1428 {
1429     // Try to turn R=substr(s,start,len) into R=C
1430     Opcode &op (rop.inst()->ops()[opnum]);
1431     Symbol &S (*rop.opargsym (op, 1));
1432     Symbol &Start (*rop.opargsym (op, 2));
1433     Symbol &Len (*rop.opargsym (op, 3));
1434     if (S.is_constant() && Start.is_constant() && Len.is_constant()) {
1435         OSL_DASSERT (S.typespec().is_string() && Start.typespec().is_int() &&
1436                      Len.typespec().is_int());
1437         ustring s = *(ustring *)S.data();
1438         int start = *(int *)Start.data();
1439         int len = *(int *)Len.data();
1440         int slen = s.length();
1441         int b = start;
1442         if (b < 0)
1443             b += slen;
1444         b = Imath::clamp (b, 0, slen);
1445         ustring r (s, b, Imath::clamp (len, 0, slen));
1446         int cind = rop.add_constant (r);
1447         rop.turn_into_assign (op, cind, "const fold substr");
1448         return 1;
1449     }
1450     return 0;
1451 }
1452 
1453 
1454 
DECLFOLDER(constfold_regex_search)1455 DECLFOLDER(constfold_regex_search)
1456 {
1457     // Try to turn R=regex_search(subj,reg) into R=C
1458     Opcode &op (rop.inst()->ops()[opnum]);
1459     Symbol &Subj (*rop.inst()->argsymbol(op.firstarg()+1));
1460     Symbol &Reg (*rop.inst()->argsymbol(op.firstarg()+2));
1461     if (op.nargs() == 3 // only the 2-arg version without search results
1462           && Subj.is_constant() && Reg.is_constant()) {
1463         OSL_DASSERT(Subj.typespec().is_string() && Reg.typespec().is_string());
1464         const ustring &s (*(ustring *)Subj.data());
1465         const ustring &r (*(ustring *)Reg.data());
1466         regex reg (r.string());
1467         int result = regex_search (s.string(), reg);
1468         int cind = rop.add_constant (result);
1469         rop.turn_into_assign (op, cind, "const fold regex_search");
1470         return 1;
1471     }
1472     return 0;
1473 }
1474 
1475 
1476 
clamp(float x,float minv,float maxv)1477 inline float clamp (float x, float minv, float maxv)
1478 {
1479     if (x < minv) return minv;
1480     else if (x > maxv) return maxv;
1481     else return x;
1482 }
1483 
1484 
1485 
DECLFOLDER(constfold_clamp)1486 DECLFOLDER(constfold_clamp)
1487 {
1488     // Try to turn R=clamp(x,min,max) into R=C
1489     Opcode &op (rop.inst()->ops()[opnum]);
1490     Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1));
1491     Symbol &Min (*rop.inst()->argsymbol(op.firstarg()+2));
1492     Symbol &Max (*rop.inst()->argsymbol(op.firstarg()+3));
1493     if (X.is_constant() && Min.is_constant() && Max.is_constant() &&
1494         equivalent(X.typespec(), Min.typespec()) &&
1495         equivalent(X.typespec(), Max.typespec()) &&
1496         (X.typespec().is_float() || X.typespec().is_triple())) {
1497         const float *x = (const float *) X.data();
1498         const float *min = (const float *) Min.data();
1499         const float *max = (const float *) Max.data();
1500         float result[3];
1501         result[0] = clamp (x[0], min[0], max[0]);
1502         if (X.typespec().is_triple()) {
1503             result[1] = clamp (x[1], min[1], max[1]);
1504             result[2] = clamp (x[2], min[2], max[2]);
1505         }
1506         int cind = rop.add_constant (X.typespec(), &result);
1507         rop.turn_into_assign (op, cind, "const fold clamp");
1508         return 1;
1509     }
1510     return 0;
1511 }
1512 
1513 
1514 
DECLFOLDER(constfold_mix)1515 DECLFOLDER(constfold_mix)
1516 {
1517     // Try to turn R=mix(a,b,x) into
1518     //   R = c             if a,b,x are all are constant
1519     //   R = a             if x is constant and x == 0
1520     //   R = b             if x is constant and x == 1
1521     //   R = a             if a and b are the same (even if not constant)
1522     //
1523     Opcode &op (rop.inst()->ops()[opnum]);
1524     int Rind = rop.oparg(op,0);
1525     int Aind = rop.oparg(op,1);
1526     int Bind = rop.oparg(op,2);
1527     int Xind = rop.oparg(op,3);
1528     Symbol &R (*rop.inst()->symbol(Rind));
1529     Symbol &A (*rop.inst()->symbol(Aind));
1530     Symbol &B (*rop.inst()->symbol(Bind));
1531     Symbol &X (*rop.inst()->symbol(Xind));
1532     // Everything better be a float or triple
1533     if (! ((A.typespec().is_float() || A.typespec().is_triple()) &&
1534            (B.typespec().is_float() || B.typespec().is_triple()) &&
1535            (X.typespec().is_float() || X.typespec().is_triple())))
1536         return 0;
1537     if (X.is_constant() && A.is_constant() && B.is_constant()) {
1538         // All three constants
1539         float result[3];
1540         const float *a = (const float *) A.data();
1541         const float *b = (const float *) B.data();
1542         const float *x = (const float *) X.data();
1543         bool atriple = A.typespec().is_triple();
1544         bool btriple = B.typespec().is_triple();
1545         bool xtriple = X.typespec().is_triple();
1546         bool rtriple = R.typespec().is_triple();
1547         int ncomps = rtriple ? 3 : 1;
1548         for (int i = 0;  i < ncomps;  ++i) {
1549             float xval = x[xtriple*i];
1550             result[i] = (1.0f-xval) * a[atriple*i] + xval * b[btriple*i];
1551         }
1552         int cind = rop.add_constant (R.typespec(), &result);
1553         rop.turn_into_assign (op, cind, "const fold mix");
1554         return 1;
1555     }
1556 
1557     // Two special cases... X is 0, X is 1
1558     if (rop.is_zero(X)) {  // mix(A,B,0) == A
1559         rop.turn_into_assign (op, Aind, "mix(a,b,0) => a");
1560         return 1;
1561     }
1562     if (rop.is_one(X)) {  // mix(A,B,1) == B
1563         rop.turn_into_assign (op, Bind, "mix(a,b,1) => b");
1564         return 1;
1565     }
1566 
1567     if (rop.is_zero(A) &&
1568         (! B.connected() || !rop.opt_mix() || rop.optimization_pass() > 2)) {
1569         // mix(0,b,x) == b*x, but only do this if b is not connected.
1570         // Because if b is connected, it may pull on something expensive.
1571         rop.turn_into_new_op (op, u_mul, Rind, Bind, Xind, "mix(0,b,x) => b*x");
1572         return 1;
1573     }
1574 #if 0
1575     // This seems to almost never happen, so don't worry about it
1576     if (rop.is_zero(B) && ! A.connected()) {
1577         // mix(a,0,x) == (1-x)*a, but only do this if b is not connected
1578     }
1579 #endif
1580 
1581     // mix (a, a, x) is a, regardless of x and even if none are constants
1582     if (Aind == Bind) {
1583         rop.turn_into_assign (op, Aind, "const fold: mix(a,a,x) -> a");
1584     }
1585 
1586     // Special sauce: mix(a,b,x) is implemented as a*(1-x)+b*x.  But
1587     // consider cases where x is not constant (thus not foldable), but
1588     // nonetheless turns out to be 0 or 1 much of the time.  If a and b
1589     // are short local computations, it's not so bad, but if they are
1590     // shader parameters connected to other layers, this affair may
1591     // needlessly evaluate other layers for no purpose other than to
1592     // multiply their results by zero.  So we try to ameliorate that
1593     // case with some extra tests here.  N.B. we delay doing this until
1594     // a few optimization passes in, to give enough time to optimize
1595     // away the inputs in other ways before introducing the 'if'.
1596     if (rop.opt_mix() && rop.optimization_pass() > 1 &&
1597         !X.is_constant() && (A.connected() || B.connected())) {
1598         // A or B are connected, and thus presumed expensive, so turn into:
1599         //    if (X == 0)  // But eliminate this clause if B not connected
1600         //        R = A;
1601         //    else if (X == 1)  // But eliminate this clause if A not connected
1602         //        R = B;
1603         //    else
1604         //        R = A*(1-X) + B*X;
1605         int if0op = -1;  // Op where we have the 'if' for testing x==0
1606         int if1op = -1;  // Op where we have the 'if' for testing x==1
1607         if (B.connected()) {
1608             // Add the test and conditional for X==0, in which case we can
1609             // just R=A and not have to access B
1610             int cond = rop.add_temp (TypeDesc::TypeInt);
1611             int fzero = rop.add_constant (0.0f);
1612             rop.insert_code (opnum++, u_eq, RuntimeOptimizer::GroupWithNext,
1613                              cond, Xind, fzero);
1614             if0op = opnum;
1615             rop.insert_code (opnum++, u_if, RuntimeOptimizer::GroupWithNext, cond);
1616             rop.op(if0op).argreadonly (0);
1617             rop.symbol(cond)->mark_rw (if0op, true, false);
1618             // Add the true (R=A) clause
1619             rop.insert_code (opnum++, u_assign,
1620                              RuntimeOptimizer::GroupWithNext, Rind, Aind);
1621         }
1622         int if0op_false = opnum;  // Where we jump if the 'if x==0' is false
1623         if (A.connected()) {
1624             // Add the test and conditional for X==1, in which case we can
1625             // just R=B and not have to access A
1626             int cond = rop.add_temp (TypeDesc::TypeInt);
1627             int fone = rop.add_constant (1.0f);
1628             rop.insert_code (opnum++, u_eq, RuntimeOptimizer::GroupWithNext,
1629                              cond, Xind, fone);
1630             if1op = opnum;
1631             rop.insert_code (opnum++, u_if, RuntimeOptimizer::GroupWithNext, cond);
1632             rop.op(if1op).argreadonly (0);
1633             rop.symbol(cond)->mark_rw (if1op, true, false);
1634             // Add the true (R=B) clause
1635             rop.insert_code (opnum++, u_assign, RuntimeOptimizer::GroupWithNext,
1636                              Rind, Bind);
1637         }
1638         int if1op_false = opnum;  // Where we jump if the 'if x==1' is false
1639         // Add the (R=A*(1-X)+B*X) clause -- always need that
1640         int one_minus_x = rop.add_temp (X.typespec());
1641         int temp1 = rop.add_temp (A.typespec());
1642         int temp2 = rop.add_temp (B.typespec());
1643         int fone = rop.add_constant (1.0f);
1644         rop.insert_code (opnum++, u_sub, RuntimeOptimizer::GroupWithNext,
1645                          one_minus_x, fone, Xind);
1646         rop.insert_code (opnum++, u_mul, RuntimeOptimizer::GroupWithNext,
1647                          temp1, Aind, one_minus_x);
1648         rop.insert_code (opnum++, u_mul, RuntimeOptimizer::GroupWithNext,
1649                          temp2, Bind, Xind);
1650         rop.insert_code (opnum++, u_add, RuntimeOptimizer::GroupWithNext,
1651                          Rind, temp1, temp2);
1652         // Now go back and patch the 'if' ops with the right jump addresses
1653         if (if0op >= 0)
1654             rop.op(if0op).set_jump (if0op_false, opnum);
1655         if (if1op >= 0)
1656             rop.op(if1op).set_jump (if1op_false, opnum);
1657         // The next op is the original mix, make it nop
1658         rop.turn_into_nop (rop.op(opnum), "smart 'mix'");
1659         return 1;
1660     }
1661 
1662     return 0;
1663 }
1664 
1665 
1666 
DECLFOLDER(constfold_select)1667 DECLFOLDER(constfold_select)
1668 {
1669     // Try to turn R=select(a,b,cond) into (per component):
1670     //   R[c] = a          if cond is constant and zero
1671     //   R[c] = b          if cond is constant and nonzero
1672     //   R = a             if a == b (even if nothing is constant)
1673     //
1674     Opcode &op (rop.inst()->ops()[opnum]);
1675     // int Rind = rop.oparg(op,0);
1676     int Aind = rop.oparg(op,1);
1677     int Bind = rop.oparg(op,2);
1678     int Cind = rop.oparg(op,3);
1679     Symbol &C (*rop.inst()->symbol(Cind));
1680 
1681     if (C.is_constant() && rop.is_zero(C)) {
1682         rop.turn_into_assign (op, Aind, "select(A,B,0) => A");
1683         return 1;
1684     }
1685     if (C.is_constant() && rop.is_nonzero(C)) {
1686         rop.turn_into_assign (op, Bind, "select(A,B,non-0) => B");
1687         return 1;
1688     }
1689     if (Aind == Bind) {
1690         rop.turn_into_assign (op, Aind, "select(c,a,a) -> a");
1691         return 1;
1692     }
1693     return 0;
1694 }
1695 
1696 
1697 
DECLFOLDER(constfold_min)1698 DECLFOLDER(constfold_min)
1699 {
1700     // Try to turn R=min(x,y) into R=C
1701     Opcode &op (rop.inst()->ops()[opnum]);
1702     Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1));
1703     Symbol &Y (*rop.inst()->argsymbol(op.firstarg()+2));
1704     if (X.is_constant() && Y.is_constant() &&
1705         equivalent(X.typespec(), Y.typespec())) {
1706         if (X.typespec().is_float() || X.typespec().is_triple()) {
1707             const float *x = (const float *) X.data();
1708             const float *y = (const float *) Y.data();
1709             float result[3];
1710             result[0] = std::min (x[0], y[0]);
1711             if (X.typespec().is_triple()) {
1712                 result[1] = std::min (x[1], y[1]);
1713                 result[2] = std::min (x[2], y[2]);
1714             }
1715             int cind = rop.add_constant (X.typespec(), &result);
1716             rop.turn_into_assign (op, cind, "const fold min");
1717             return 1;
1718         }
1719         if (X.typespec().is_int()) {
1720             const int *x = (const int *) X.data();
1721             const int *y = (const int *) Y.data();
1722             int result = std::min (x[0], y[0]);
1723             int cind = rop.add_constant (result);
1724             rop.turn_into_assign (op, cind, "const fold min");
1725             return 1;
1726         }
1727     }
1728     return 0;
1729 }
1730 
1731 
1732 
DECLFOLDER(constfold_max)1733 DECLFOLDER(constfold_max)
1734 {
1735     // Try to turn R=max(x,y) into R=C
1736     Opcode &op (rop.inst()->ops()[opnum]);
1737     Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1));
1738     Symbol &Y (*rop.inst()->argsymbol(op.firstarg()+2));
1739     if (X.is_constant() && Y.is_constant() &&
1740         equivalent(X.typespec(), Y.typespec())) {
1741         if (X.typespec().is_float() || X.typespec().is_triple()) {
1742             const float *x = (const float *) X.data();
1743             const float *y = (const float *) Y.data();
1744             float result[3];
1745             result[0] = std::max (x[0], y[0]);
1746             if (X.typespec().is_triple()) {
1747                 result[1] = std::max (x[1], y[1]);
1748                 result[2] = std::max (x[2], y[2]);
1749             }
1750             int cind = rop.add_constant (X.typespec(), &result);
1751             rop.turn_into_assign (op, cind, "const fold max");
1752             return 1;
1753         }
1754         if (X.typespec().is_int()) {
1755             const int *x = (const int *) X.data();
1756             const int *y = (const int *) Y.data();
1757             int result = std::max (x[0], y[0]);
1758             int cind = rop.add_constant (result);
1759             rop.turn_into_assign (op, cind, "const fold max");
1760             return 1;
1761         }
1762     }
1763     return 0;
1764 }
1765 
1766 
1767 
1768 // Handy macro for automatically constructing a constant-folder for
1769 // a simple function of one argument that can be float or triple
1770 // and returns the same type as its argument.
1771 #define AUTO_DECLFOLDER_FLOAT_OR_TRIPLE(name,impl)                      \
1772 DECLFOLDER(constfold_ ## name)                                          \
1773 {                                                                       \
1774     /* Try to turn R=f(x) into R=C */                                   \
1775     Opcode &op (rop.inst()->ops()[opnum]);                              \
1776     Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1));                \
1777     if (X.is_constant() &&                                              \
1778           (X.typespec().is_float() || X.typespec().is_triple())) {      \
1779         const float *x = (const float *) X.data();                      \
1780         float result[3];                                                \
1781         result[0] = impl (x[0]);                                        \
1782         if (X.typespec().is_triple()) {                                 \
1783             result[1] = impl (x[1]);                                    \
1784             result[2] = impl (x[2]);                                    \
1785         }                                                               \
1786         int cind = rop.add_constant (X.typespec(), &result);            \
1787         rop.turn_into_assign (op, cind, "const fold " # name);          \
1788         return 1;                                                       \
1789     }                                                                   \
1790     return 0;                                                           \
1791 }
1792 
1793 
1794 
AUTO_DECLFOLDER_FLOAT_OR_TRIPLE(sqrt,OIIO::safe_sqrt)1795 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (sqrt   , OIIO::safe_sqrt)
1796 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (inversesqrt, OIIO::safe_inversesqrt)
1797 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (degrees, OIIO::degrees)
1798 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (radians, OIIO::radians)
1799 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (floor  , floorf)
1800 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (ceil   , ceilf)
1801 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (erf    , OIIO::fast_erf)
1802 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (erfc   , OIIO::fast_erfc)
1803 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (logb   , OIIO::fast_logb)
1804 #if OSL_FAST_MATH
1805 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (cos    , OIIO::fast_cos)
1806 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (sin    , OIIO::fast_sin)
1807 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (acos   , OIIO::fast_acos)
1808 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (asin   , OIIO::fast_asin)
1809 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (exp    , OIIO::fast_exp)
1810 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (exp2   , OIIO::fast_exp2)
1811 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (expm1  , OIIO::fast_expm1)
1812 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log    , OIIO::fast_log)
1813 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log10  , OIIO::fast_log10)
1814 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log2   , OIIO::fast_log2)
1815 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (cbrt   , OIIO::fast_cbrt)
1816 #else
1817 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (cos    , cosf)
1818 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (sin    , sinf)
1819 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (acos   , OIIO::safe_acos)
1820 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (asin   , OIIO::safe_asin)
1821 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (exp    , expf)
1822 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (exp2   , exp2f)
1823 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (expm1  , expm1f)
1824 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log    , OIIO::safe_log)
1825 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log10  , OIIO::safe_log10)
1826 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log2   , OIIO::safe_log2)
1827 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (cbrt   , cbrtf)
1828 #endif
1829 
1830 DECLFOLDER(constfold_pow)
1831 {
1832     Opcode &op (rop.inst()->ops()[opnum]);
1833     Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1));
1834     Symbol &Y (*rop.inst()->argsymbol(op.firstarg()+2));
1835 
1836     if (rop.is_zero(Y)) {
1837         // x^0 == 1
1838         rop.turn_into_assign_one (op, "pow(x,0) => 1");
1839         return 1;
1840     }
1841     if (rop.is_one(Y)) {
1842         // x^1 == x
1843         rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1), "pow(x,1) => x");
1844         return 1;
1845     }
1846     if (rop.is_zero(X)) {
1847         // 0^y == 0
1848         rop.turn_into_assign_zero (op, "pow(0,x) => 0");
1849         return 1;
1850     }
1851     if (X.is_constant() && Y.is_constant()) {
1852         // if x and y are both constant, pre-compute x^y
1853         const float *x = (const float *) X.data();
1854         const float *y = (const float *) Y.data();
1855         int nxcomps = X.typespec().is_triple() ? 3 : 1;
1856         int nycomps = Y.typespec().is_triple() ? 3 : 1;
1857         float result[3];
1858         for (int i = 0;  i < nxcomps;  ++i) {
1859             int j = std::min (i, nycomps-1);
1860 #if OSL_FAST_MATH
1861             result[i] = OIIO::fast_safe_pow (x[i], y[j]);
1862 #else
1863             result[i] = OIIO::safe_pow (x[i], y[j]);
1864 #endif
1865         }
1866         int cind = rop.add_constant (X.typespec(), &result);
1867         rop.turn_into_assign (op, cind, "const fold pow");
1868         return 1;
1869     }
1870 
1871     // A few special cases of constant y:
1872     if (Y.is_constant() && Y.typespec().is_float()) {
1873         int resultarg = rop.inst()->args()[op.firstarg()+0];
1874         int xarg = rop.inst()->args()[op.firstarg()+1];
1875         float yval = *(const float *)Y.data();
1876         if (yval == 2.0f) {
1877             rop.turn_into_new_op (op, u_mul, resultarg, xarg, xarg,
1878                                   "pow(x,2) => x*x");
1879             return 1;
1880         }
1881         if (yval == 0.5f) {
1882             rop.turn_into_new_op (op, u_sqrt, resultarg, xarg, -1,
1883                                   "pow(x,0.5) => sqrt(x)");
1884             return 1;
1885         }
1886         if (yval == -0.5f) {
1887             rop.turn_into_new_op (op, u_inversesqrt, resultarg, xarg, -1,
1888                                   "pow(x,-0.5) => inversesqrt(x)");
1889             return 1;
1890         }
1891         if (yval == 1.0f / 3.0f)  {
1892             rop.turn_into_new_op (op, u_cbrt, resultarg, xarg, -1,
1893                                   "pow(x,1.0/3.0) => cbrt(x)");
1894             return 1;
1895         }
1896     }
1897 
1898     return 0;
1899 }
1900 
1901 
1902 
DECLFOLDER(constfold_sincos)1903 DECLFOLDER(constfold_sincos)
1904 {
1905     // Try to turn sincos(const_angle,s,c) into s=sin_a, c = cos_a
1906     Opcode &op (rop.inst()->ops()[opnum]);
1907     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+0));
1908     if (A.is_constant()) {
1909         int sinarg = rop.inst()->args()[op.firstarg()+1];
1910         int cosarg = rop.inst()->args()[op.firstarg()+2];
1911         float angle = *(const float *)A.data();
1912         float s, c;
1913 #if OSL_FAST_MATH
1914         OIIO::fast_sincos (angle, &s, &c);
1915 #else
1916         OIIO::sincos (angle, &s, &c);
1917 #endif
1918         // Turn this op into the sin assignment
1919         rop.turn_into_new_op (op, u_assign, sinarg, rop.add_constant (s), -1,
1920                               "const fold sincos");
1921         // And insert a new op for the cos assignment
1922         const int args_to_add[] = { cosarg, rop.add_constant (c) };
1923         rop.insert_code (opnum, u_assign, args_to_add,
1924                          RuntimeOptimizer::RecomputeRWRanges,
1925                          RuntimeOptimizer::GroupWithNext);
1926         return 1;
1927     }
1928     return 0;
1929 }
1930 
1931 
1932 
DECLFOLDER(constfold_normalize)1933 DECLFOLDER(constfold_normalize)
1934 {
1935     // Try to turn R=normalize(x) into R=C
1936     Opcode &op (rop.inst()->ops()[opnum]);
1937     Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1));
1938     OSL_DASSERT(X.typespec().is_triple());
1939     if (X.is_constant()) {
1940         Vec3 result = *(const Vec3 *)X.data();
1941         result.normalize();
1942         int cind = rop.add_constant (X.typespec(), &result);
1943         rop.turn_into_assign (op, cind, "const fold normalize");
1944         return 1;
1945     }
1946     return 0;
1947 }
1948 
1949 
1950 
DECLFOLDER(constfold_triple)1951 DECLFOLDER(constfold_triple)
1952 {
1953     // Turn R=triple(a,b,c) into R=C if the components are all constants
1954     Opcode &op (rop.inst()->ops()[opnum]);
1955     OSL_DASSERT(op.nargs() == 4 || op.nargs() == 5);
1956     bool using_space = (op.nargs() == 5);
1957     Symbol &R (*rop.inst()->argsymbol(op.firstarg()+0));
1958     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1+using_space));
1959     Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2+using_space));
1960     Symbol &C (*rop.inst()->argsymbol(op.firstarg()+3+using_space));
1961     if (using_space) {
1962         // If we're using a space name and it's equivalent to "common",
1963         // just pretend it doesn't exist.
1964         Symbol &Space (*rop.inst()->argsymbol(op.firstarg()+1));
1965         if (Space.is_constant() && (Space.get_string() == Strings::common ||
1966                                     Space.get_string() == rop.shadingsys().commonspace_synonym()))
1967             using_space = false;
1968     }
1969     if (A.is_constant() && A.typespec().is_float() &&
1970             B.is_constant() && C.is_constant() && !using_space) {
1971         OSL_DASSERT(A.typespec().is_float() &&
1972                  B.typespec().is_float() && C.typespec().is_float());
1973         float result[3];
1974         result[0] = *(const float *)A.data();
1975         result[1] = *(const float *)B.data();
1976         result[2] = *(const float *)C.data();
1977         int cind = rop.add_constant (R.typespec(), &result);
1978         rop.turn_into_assign (op, cind, "triple(const,const,const) => triple constant");
1979         return 1;
1980     }
1981     return 0;
1982 }
1983 
1984 
1985 
DECLFOLDER(constfold_matrix)1986 DECLFOLDER(constfold_matrix)
1987 {
1988     Opcode &op (rop.inst()->ops()[opnum]);
1989     int nargs = op.nargs();
1990     int using_space = rop.opargsym(op,1)->typespec().is_string() ? 1 : 0;
1991     if (using_space && nargs > 2 && rop.opargsym(op,2)->typespec().is_string())
1992         using_space = 2;
1993     int nfloats = nargs - 1 - using_space;
1994     OSL_DASSERT (nfloats == 1 || nfloats == 16 || (nfloats == 0 && using_space == 2));
1995     if (nargs == 3 && using_space == 2) {
1996         // Try to simplify R=matrix(from,to) in cases of an identify
1997         // transform: if From and To are the same variable (even if not a
1998         // constant), or if their values are the same, or if one is "common"
1999         // and the other is the designated common space synonym.
2000         Symbol &From (*rop.inst()->argsymbol(op.firstarg()+1));
2001         Symbol &To (*rop.inst()->argsymbol(op.firstarg()+2));
2002         ustring from = From.is_constant() ? *(ustring *)From.data() : ustring("$unknown1$");
2003         ustring to   = To.is_constant()   ? *(ustring *)To.data()   : ustring("$unknown2$");
2004         ustring commonsyn = rop.inst()->shadingsys().commonspace_synonym();
2005         if (&From == &To || from == to ||
2006             ((from == Strings::common && to == commonsyn) ||
2007              (from == commonsyn && to == Strings::common))) {
2008             static Matrix44 ident (1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1);
2009             rop.turn_into_assign (op, rop.add_constant (ident),
2010                                   "matrix(spaceA,spaceA) => identity matrix");
2011             return 1;
2012         }
2013         // Try to simplify R=matrix(from,to) in cases of an constant (but
2014         // different) names -- do the matrix retrieval now, if not time-
2015         // varying matrices.
2016         if (! (From.is_constant() && To.is_constant()))
2017             return 0;
2018         // Shader and object spaces will vary from execution to execution,
2019         // so we can't optimize those away.
2020         if (from == Strings::shader || from == Strings::object ||
2021             to == Strings::shader || to == Strings::object)
2022             return 0;
2023         // But whatever spaces are left *may* be optimizable if they are
2024         // not time-varying.
2025         RendererServices *rs = rop.shadingsys().renderer();
2026         Matrix44 Mfrom, Mto;
2027         bool ok = true;
2028         if (from == Strings::common || from == commonsyn)
2029             Mfrom.makeIdentity ();
2030         else
2031             ok &= rs->get_matrix (rop.shaderglobals(), Mfrom, from);
2032         if (to == Strings::common || to == commonsyn)
2033             Mto.makeIdentity ();
2034         else
2035             ok &= rs->get_inverse_matrix (rop.shaderglobals(), Mto, to);
2036         if (ok) {
2037             // The from-to matrix is known and not time-varying, so just
2038             // turn it into a constant rather than calling getmatrix at
2039             // execution time.
2040             Matrix44 Mresult = Mfrom * Mto;
2041             int cind = rop.add_constant (TypeDesc::TypeMatrix, &Mresult);
2042             rop.turn_into_assign (op, cind, "const fold matrix");
2043             return 1;
2044         }
2045     }
2046     if (using_space == 1 && nfloats == 1) {
2047         // Turn matrix("common",1) info identity matrix.
2048         Symbol &From (*rop.inst()->argsymbol(op.firstarg()+1));
2049         Symbol &Val (*rop.inst()->argsymbol(op.firstarg()+2));
2050         if (From.is_constant() && Val.is_constant() && *(float *)Val.data() == 1.0f) {
2051             ustring from = *(ustring *)From.data();
2052             if (from == Strings::common ||
2053                 from == rop.inst()->shadingsys().commonspace_synonym()) {
2054                 static Matrix44 ident (1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1);
2055                 rop.turn_into_assign (op, rop.add_constant (ident),
2056                                       "matrix(\"common\",1) => identity matrix");
2057             }
2058         }
2059     }
2060     if (nfloats == 16 && using_space == 0) {
2061         // Try to turn matrix(...16 float consts...) into just a const
2062         // matrix assign.
2063         bool all_const = true;
2064         float M[16];
2065         for (int i = 0; i < 16; ++i) {
2066             Symbol &Val (*rop.inst()->argsymbol(op.firstarg()+1+i));
2067             if (Val.is_constant())
2068                 M[i] = *(const float *)Val.data();
2069             else {
2070                 all_const = false;
2071                 break;
2072             }
2073         }
2074         if (all_const) {
2075             rop.turn_into_assign (op, rop.add_constant (TypeDesc::TypeMatrix, M),
2076                                   "const fold matrix");
2077             return 1;
2078         }
2079     }
2080     if (nfloats == 1 && using_space == 0) {
2081         // Try to turn matrix(const float) into just a const matrix assign.
2082         Symbol &Val (*rop.inst()->argsymbol(op.firstarg()+1));
2083         if (Val.is_constant()) {
2084             float val = *(float *)Val.data();
2085             Matrix44 M (val,0,0,0, 0,val,0,0, 0,0,val,0, 0,0,0,val);
2086             rop.turn_into_assign (op, rop.add_constant (M), "const fold matrix");
2087             return 1;
2088         }
2089     }
2090     return 0;
2091 }
2092 
2093 
2094 
DECLFOLDER(constfold_getmatrix)2095 DECLFOLDER(constfold_getmatrix)
2096 {
2097     // Try to turn R=getmatrix(from,to,M) into R=1,M=const if it's an
2098     // identity transform or if the result is a non-time-varying matrix.
2099     Opcode &op (rop.inst()->ops()[opnum]);
2100     Symbol &From (*rop.inst()->argsymbol(op.firstarg()+1));
2101     Symbol &To (*rop.inst()->argsymbol(op.firstarg()+2));
2102     if (! (From.is_constant() && To.is_constant()))
2103         return 0;
2104     // OK, From and To are constant strings.
2105     ustring from = *(ustring *)From.data();
2106     ustring to = *(ustring *)To.data();
2107     ustring commonsyn = rop.inst()->shadingsys().commonspace_synonym();
2108 
2109     // Shader and object spaces will vary from execution to execution,
2110     // so we can't optimize those away.
2111     if (from == Strings::shader || from == Strings::object ||
2112         to == Strings::shader || to == Strings::object)
2113         return 0;
2114 
2115     // But whatever spaces are left *may* be optimizable if they are
2116     // not time-varying.
2117     RendererServices *rs = rop.shadingsys().renderer();
2118     Matrix44 Mfrom, Mto;
2119     bool ok = true;
2120     if (from == Strings::common || from == commonsyn || from == to)
2121         Mfrom.makeIdentity ();
2122     else
2123         ok &= rs->get_matrix (rop.shaderglobals(), Mfrom, from);
2124     if (to == Strings::common || to == commonsyn || from == to)
2125         Mto.makeIdentity ();
2126     else
2127         ok &= rs->get_inverse_matrix (rop.shaderglobals(), Mto, to);
2128     if (ok) {
2129         // The from-to matrix is known and not time-varying, so just
2130         // turn it into a constant rather than calling getmatrix at
2131         // execution time.
2132         int resultarg = rop.inst()->args()[op.firstarg()+0];
2133         int dataarg = rop.inst()->args()[op.firstarg()+3];
2134         // Make data the first argument
2135         rop.inst()->args()[op.firstarg()+0] = dataarg;
2136         // Now turn it into an assignment
2137         Matrix44 Mresult = Mfrom * Mto;
2138         int cind = rop.add_constant (TypeDesc::TypeMatrix, &Mresult);
2139         rop.turn_into_assign (op, cind, "getmatrix of known matrix");
2140 
2141         // Now insert a new instruction that assigns 1 to the
2142         // original return result of getmatrix.
2143         const int one = 1;
2144         const int args_to_add[] = { resultarg, rop.add_constant (TypeDesc::TypeInt, &one) };
2145         rop.insert_code (opnum, u_assign, args_to_add,
2146                          RuntimeOptimizer::RecomputeRWRanges,
2147                          RuntimeOptimizer::GroupWithNext);
2148         return 1;
2149     }
2150     return 0;
2151 }
2152 
2153 
2154 
DECLFOLDER(constfold_transform)2155 DECLFOLDER(constfold_transform)
2156 {
2157     // Try to turn identity transforms into assignments
2158     Opcode &op (rop.inst()->ops()[opnum]);
2159     Symbol &M (*rop.inst()->argsymbol(op.firstarg()+1));
2160     if (op.nargs() == 3 && M.typespec().is_matrix() && rop.is_one(M)) {
2161         rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+2),
2162                               "transform by identity");
2163         return 1;
2164     }
2165     if (op.nargs() == 4) {
2166         Symbol &T (*rop.inst()->argsymbol(op.firstarg()+2));
2167         if (M.is_constant() && T.is_constant()) {
2168             OSL_DASSERT(M.typespec().is_string() && T.typespec().is_string());
2169             ustring from = *(ustring *)M.data();
2170             ustring to = *(ustring *)T.data();
2171             ustring syn = rop.shadingsys().commonspace_synonym();
2172             if (from == syn)
2173                 from = Strings::common;
2174             if (to == syn)
2175                 to = Strings::common;
2176             if (from == to) {
2177                 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+3),
2178                                       "transform by identity");
2179                 return 1;
2180             }
2181         }
2182     }
2183     return 0;
2184 }
2185 
2186 
2187 
DECLFOLDER(constfold_transformc)2188 DECLFOLDER(constfold_transformc)
2189 {
2190     Opcode &op (rop.inst()->ops()[opnum]);
2191     // Symbol &Result = *rop.opargsym (op, 0);
2192     Symbol &From = *rop.opargsym (op, 1);
2193     Symbol &To = *rop.opargsym (op, 2);
2194     Symbol &C = *rop.opargsym (op, 3);
2195 
2196     if (From.is_constant() && To.is_constant()) {
2197         ustring from = From.get_string();
2198         ustring to = To.get_string();
2199         if (from == Strings::RGB)
2200             from = Strings::rgb;
2201         if (to == Strings::RGB)
2202             to = Strings::rgb;
2203         if (from == to) {
2204             rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+3),
2205                                   "transformc by identity");
2206             return 1;
2207         }
2208         if (C.is_constant()) {
2209             Color3 Cin (C.get_float(0), C.get_float(1), C.get_float(2));
2210             Color3 result = rop.shadingsys().colorsystem().transformc (from, to, Cin, rop.shadingcontext());
2211             rop.turn_into_assign (op, rop.add_constant(result),
2212                                   "transformc => constant");
2213             return 1;
2214         }
2215     }
2216     return 0;
2217 }
2218 
2219 
2220 
DECLFOLDER(constfold_setmessage)2221 DECLFOLDER(constfold_setmessage)
2222 {
2223     Opcode &op (rop.inst()->ops()[opnum]);
2224     Symbol &Name (*rop.inst()->argsymbol(op.firstarg()+0));
2225 
2226     // Record that the inst set a message
2227     if (Name.is_constant()) {
2228         OSL_DASSERT (Name.typespec().is_string());
2229         rop.register_message (*(ustring *)Name.data());
2230     } else {
2231         rop.register_unknown_message ();
2232     }
2233 
2234     return 0;
2235 }
2236 
2237 
2238 
2239 
DECLFOLDER(constfold_getmessage)2240 DECLFOLDER(constfold_getmessage)
2241 {
2242     Opcode &op (rop.inst()->ops()[opnum]);
2243     int has_source = (op.nargs() == 4);
2244     if (has_source)
2245         return 0;    // Don't optimize away sourced getmessage
2246     Symbol &Name (*rop.inst()->argsymbol(op.firstarg()+1+(int)has_source));
2247     if (Name.is_constant()) {
2248         OSL_DASSERT (Name.typespec().is_string());
2249         if (! rop.message_possibly_set (*(ustring *)Name.data())) {
2250             // If the messages could not have been sent, get rid of the
2251             // getmessage op, leave the destination value alone, and
2252             // assign 0 to the returned status of getmessage.
2253             rop.turn_into_assign_zero (op, "impossible getmessage");
2254             return 1;
2255         }
2256     }
2257     return 0;
2258 }
2259 
2260 
2261 
2262 
DECLFOLDER(constfold_getattribute)2263 DECLFOLDER(constfold_getattribute)
2264 {
2265     if (! rop.shadingsys().fold_getattribute())
2266         return 0;
2267 
2268     // getattribute() has eight "flavors":
2269     //   * getattribute (attribute_name, value)
2270     //   * getattribute (attribute_name, value[])
2271     //   * getattribute (attribute_name, index, value)
2272     //   * getattribute (attribute_name, index, value[])
2273     //   * getattribute (object, attribute_name, value)
2274     //   * getattribute (object, attribute_name, value[])
2275     //   * getattribute (object, attribute_name, index, value)
2276     //   * getattribute (object, attribute_name, index, value[])
2277     Opcode &op (rop.inst()->ops()[opnum]);
2278     int nargs = op.nargs();
2279     OSL_DASSERT(nargs >= 3 && nargs <= 5);
2280     bool array_lookup = rop.opargsym(op,nargs-2)->typespec().is_int();
2281     bool object_lookup = rop.opargsym(op,2)->typespec().is_string() && nargs >= 4;
2282     int object_slot = (int)object_lookup;
2283     int attrib_slot = object_slot + 1;
2284     int index_slot = nargs - 2;
2285     int dest_slot = nargs - 1;
2286 
2287 //    Symbol& Result      = *rop.opargsym (op, 0);
2288     Symbol& ObjectName  = *rop.opargsym (op, object_slot); // only valid if object_slot is true
2289     Symbol& Attribute   = *rop.opargsym (op, attrib_slot);
2290     Symbol& Index       = *rop.opargsym (op, index_slot);  // only valid if array_lookup is true
2291     Symbol& Destination = *rop.opargsym (op, dest_slot);
2292 
2293     if (! Attribute.is_constant() ||
2294         (object_lookup && ! ObjectName.is_constant()) ||
2295         (array_lookup && ! Index.is_constant()))
2296         return 0;   // Non-constant things prevent a fold
2297     if (Destination.typespec().is_array())
2298         return 0;   // Punt on arrays for now
2299 
2300     ustring attr_name = *(const ustring *)Attribute.data();
2301     const size_t maxbufsize = 1024;
2302     char buf[maxbufsize];
2303     TypeDesc attr_type = Destination.typespec().simpletype();
2304     if (attr_type.size() > maxbufsize)
2305         return 0;  // Don't constant fold humongous things
2306 
2307     bool found = false;
2308 
2309     // Check global things first
2310     if (attr_name == "osl:version" && attr_type == TypeDesc::TypeInt) {
2311         int *val = (int *)(char *)buf;
2312         *val = OSL_VERSION;
2313         found = true;
2314     } else if (attr_name == "shader:shadername" && attr_type == TypeDesc::TypeString) {
2315         ustring *up = (ustring *)(char *)buf;
2316         *up = ustring(rop.inst()->shadername());
2317         found = true;
2318     } else if (attr_name == "shader:layername" && attr_type == TypeDesc::TypeString) {
2319         ustring *up = (ustring *)(char *)buf;
2320         *up = rop.inst()->layername();
2321         found = true;
2322     } else if (attr_name == "shader:groupname" && attr_type == TypeDesc::TypeString) {
2323         ustring *up = (ustring *)(char *)buf;
2324         *up = rop.group().name();
2325         found = true;
2326     }
2327 
2328     if (!found) {
2329         // If the object name is not supplied, it implies that we are
2330         // supposed to search the shaded object first, then if that fails,
2331         // the scene-wide namespace.  We can't do that yet, have to wait
2332         // until shade time.
2333         ustring obj_name;
2334         if (object_lookup)
2335             obj_name = *(const ustring *)ObjectName.data();
2336         if (obj_name.empty())
2337             return 0;
2338 
2339         found = array_lookup
2340             ? rop.renderer()->get_array_attribute (NULL, false,
2341                                                    obj_name, attr_type, attr_name,
2342                                                    *(const int *)Index.data(), buf)
2343             : rop.renderer()->get_attribute (NULL, false,
2344                                              obj_name, attr_type, attr_name,
2345                                              buf);
2346     }
2347 
2348     if (found) {
2349         // Now we turn the existing getattribute op into this for success:
2350         //       assign result 1
2351         //       assign data [retrieved values]
2352         // but if it fails, don't change anything, because we want it to
2353         // issue errors at runtime.
2354 
2355         // Make the data destination be the first argument
2356         int oldresultarg = rop.inst()->args()[op.firstarg()+0];
2357         int dataarg = rop.inst()->args()[op.firstarg()+dest_slot];
2358         rop.inst()->args()[op.firstarg()+0] = dataarg;
2359         // Now turn it into an assignment
2360         int cind = rop.add_constant (attr_type, &buf);
2361         rop.turn_into_assign (op, cind, "const fold getattribute");
2362         // Now insert a new instruction that assigns 1 to the
2363         // original return result of getattribute.
2364         const int one = 1;
2365         const int args_to_add[] = { oldresultarg, rop.add_constant (TypeDesc::TypeInt, &one) };
2366         rop.insert_code (opnum, u_assign, args_to_add,
2367                          RuntimeOptimizer::RecomputeRWRanges,
2368                          RuntimeOptimizer::GroupWithNext);
2369         return 1;
2370     } else {
2371         return 0;
2372     }
2373 }
2374 
2375 
2376 
DECLFOLDER(constfold_gettextureinfo)2377 DECLFOLDER(constfold_gettextureinfo)
2378 {
2379     Opcode &op (rop.inst()->ops()[opnum]);
2380     OSL_MAYBE_UNUSED Symbol &Result (*rop.inst()->argsymbol(op.firstarg()+0));
2381     Symbol &Filename (*rop.inst()->argsymbol(op.firstarg()+1));
2382     Symbol &Dataname (*rop.inst()->argsymbol(op.firstarg()+2));
2383     Symbol &Data (*rop.inst()->argsymbol(op.firstarg()+3));
2384     OSL_DASSERT (Result.typespec().is_int() &&
2385                  Filename.typespec().is_string() &&
2386                  Dataname.typespec().is_string());
2387 
2388     if (Filename.is_constant() && Dataname.is_constant()) {
2389         ustring filename = *(ustring *)Filename.data();
2390         ustring dataname = *(ustring *)Dataname.data();
2391         TypeDesc t = Data.typespec().simpletype();
2392         void *mydata = OIIO_ALLOCA(char, t.size());
2393         // FIXME(ptex) -- exclude folding of ptex, since these things
2394         // can vary per face.
2395         ustring errormessage;
2396         int result = rop.renderer()->get_texture_info (filename, nullptr,
2397                                                        rop.shadingcontext()->texture_thread_info(),
2398                                                        rop.shadingcontext(),
2399                                                        0 /* TODO: subimage? */,
2400                                                        dataname, t, mydata, &errormessage);
2401         // Now we turn
2402         //       gettextureinfo result filename dataname data
2403         // into this for success:
2404         //       assign data [retrieved values]
2405         //       assign result 1
2406         // into this for failure:
2407         //       error "%s" errormesage
2408         //       assign result 0
2409         if (result) {
2410             int oldresultarg = rop.inst()->args()[op.firstarg()+0];
2411             int dataarg = rop.inst()->args()[op.firstarg()+3];
2412             // Make data the first argument
2413             rop.inst()->args()[op.firstarg()+0] = dataarg;
2414             // Now turn it into an assignment
2415             int cind = rop.add_constant (Data.typespec(), mydata);
2416             rop.turn_into_assign (op, cind, "const fold gettextureinfo");
2417 
2418             // Now insert a new instruction that assigns 1 to the
2419             // original return result of gettextureinfo.
2420             int one = 1;
2421             const int args_to_add[] = {
2422                 oldresultarg,
2423                 rop.add_constant (TypeDesc::TypeInt, &one)
2424             };
2425             rop.insert_code (opnum, u_assign, args_to_add,
2426                              RuntimeOptimizer::RecomputeRWRanges,
2427                              RuntimeOptimizer::GroupWithNext);
2428             return 1;
2429         } else {
2430             // Constant fold to 0
2431             rop.turn_into_assign_zero (op, "const fold gettextureinfo");
2432             if (errormessage.size()) {
2433                 // display the error message if control flow ever reaches here
2434                 const int args_to_add[] = {
2435                     rop.add_constant(u_fmterror),
2436                     rop.add_constant(errormessage)
2437                 };
2438                 rop.insert_code(opnum, u_error, args_to_add,
2439                                  RuntimeOptimizer::RecomputeRWRanges,
2440                                  RuntimeOptimizer::GroupWithNext);
2441                 Opcode &newop (rop.inst()->ops()[opnum]);
2442                 newop.argreadonly(0);
2443                 newop.argreadonly(1);
2444             }
2445             return 1;
2446         }
2447     }
2448     return 0;
2449 }
2450 
2451 
2452 
2453 // texture -- we can eliminate a lot of superfluous setting of optional
2454 // parameters to their default values.
DECLFOLDER(constfold_texture)2455 DECLFOLDER(constfold_texture)
2456 {
2457     Opcode &op (rop.inst()->ops()[opnum]);
2458     // Symbol &Result = *rop.opargsym (op, 0);
2459     // Symbol &Filename = *rop.opargsym (op, 1);
2460     // Symbol &S = *rop.opargsym (op, 2);
2461     // Symbol &T = *rop.opargsym (op, 3);
2462 
2463     int first_optional_arg = 4;
2464     if (op.nargs() > 4 && rop.opargsym(op,4)->typespec().is_float()) {
2465         //user_derivs = true;
2466         first_optional_arg = 8;
2467         OSL_DASSERT(rop.opargsym(op,5)->typespec().is_float());
2468         OSL_DASSERT(rop.opargsym(op,6)->typespec().is_float());
2469         OSL_DASSERT(rop.opargsym(op,7)->typespec().is_float());
2470     }
2471 
2472     TextureOpt opt;  // So we can check the defaults
2473     bool swidth_set = false, twidth_set = false, rwidth_set = false;
2474     bool sblur_set = false, tblur_set = false, rblur_set = false;
2475     bool swrap_set = false, twrap_set = false, rwrap_set = false;
2476     bool firstchannel_set = false, fill_set = false, interp_set = false;
2477     bool any_elided = false;
2478     for (int i = first_optional_arg;  i < op.nargs()-1;  i += 2) {
2479         Symbol &Name = *rop.opargsym (op, i);
2480         Symbol &Value = *rop.opargsym (op, i+1);
2481         OSL_DASSERT(Name.typespec().is_string());
2482         if (Name.is_constant() && Value.is_constant()) {
2483             ustring name = *(ustring *)Name.data();
2484             bool elide = false;
2485             void *value = Value.data();
2486             TypeDesc valuetype = Value.typespec().simpletype();
2487 
2488 // Keep from repeating the same tedious code for {s,t,r, }{width,blur,wrap}
2489 #define CHECK(field,ctype,osltype)                              \
2490             if (name == Strings::field && ! field##_set) {      \
2491                 if (valuetype == osltype &&                     \
2492                          *(ctype *)value == opt.field)          \
2493                     elide = true;                               \
2494                 else if (osltype == TypeDesc::FLOAT &&          \
2495                          valuetype == TypeDesc::INT &&          \
2496                          *(int *)value == opt.field)            \
2497                     elide = true;                               \
2498                 else                                            \
2499                     field##_set = true;                         \
2500             }
2501 #define CHECK_str(field,ctype,osltype)                              \
2502             CHECK (s##field,ctype,osltype)                          \
2503             else CHECK (t##field,ctype,osltype)                     \
2504             else CHECK (r##field,ctype,osltype)                     \
2505             else if (name == Strings::field && !s##field##_set &&   \
2506                      ! t##field##_set && ! r##field##_set) {        \
2507                 if (valuetype == osltype) {                         \
2508                     ctype *v = (ctype *)value;                      \
2509                     if (*v == opt.s##field && *v == opt.t##field    \
2510                         && *v == opt.r##field)                      \
2511                         elide = true;                               \
2512                     else {                                          \
2513                         s##field##_set = true;                      \
2514                         t##field##_set = true;                      \
2515                         r##field##_set = true;                      \
2516                     }                                               \
2517                 } else if (osltype == TypeDesc::FLOAT &&            \
2518                            valuetype == TypeDesc::INT) {            \
2519                     int *v = (int *)value;                          \
2520                     if (*v == opt.s##field && *v == opt.t##field    \
2521                         && *v == opt.r##field)                      \
2522                         elide = true;                               \
2523                     else {                                          \
2524                         s##field##_set = true;                      \
2525                         t##field##_set = true;                      \
2526                         r##field##_set = true;                      \
2527                     }                                               \
2528                 }                                                   \
2529             }
2530 
2531 #ifdef __clang__
2532 #pragma clang diagnostic push
2533 #pragma clang diagnostic ignored "-Wtautological-compare"
2534 #endif
2535             CHECK_str (width, float, TypeDesc::FLOAT)
2536             else CHECK_str (blur, float, TypeDesc::FLOAT)
2537             else CHECK (firstchannel, int, TypeDesc::INT)
2538             else CHECK (fill, float, TypeDesc::FLOAT)
2539 
2540             else if ((name == Strings::wrap || name == Strings::swrap ||
2541                  name == Strings::twrap || name == Strings::rwrap)
2542                  && value && valuetype == TypeDesc::STRING) {
2543                 // Special trick is needed for wrap modes because the input
2544                 // is a string but the field we're setting is an int enum.
2545                 OIIO::Tex::Wrap wrapmode = OIIO::Tex::decode_wrapmode (*(ustring *)value);
2546                 void* value = &wrapmode;
2547                 CHECK_str (wrap, int, TypeDesc::INT);
2548             }
2549 #ifdef __clang__
2550 #pragma clang diagnostic pop
2551 #endif
2552 #undef CHECK_STR
2553 #undef CHECK
2554 
2555             // Cases that don't fit the pattern
2556             else if (name == Strings::interp && !interp_set) {
2557                 if (value && valuetype == TypeDesc::STRING &&
2558                     tex_interp_to_code(*(ustring *)value) == opt.interpmode)
2559                     elide = true;
2560                 else
2561                     interp_set = true;
2562             }
2563 
2564             if (elide) {
2565                 // Just turn the param name into empty string and it will
2566                 // be skipped.
2567                 ustring empty;
2568                 int cind = rop.add_constant (TypeDesc::TypeString, &empty);
2569                 rop.inst()->args()[op.firstarg()+i] = cind;
2570                 rop.inst()->args()[op.firstarg()+i+1] = cind;
2571                 any_elided = true;
2572             }
2573         }
2574     }
2575     return any_elided;
2576 }
2577 
2578 
2579 
DECLFOLDER(constfold_pointcloud_search)2580 DECLFOLDER(constfold_pointcloud_search)
2581 {
2582     Opcode &op (rop.inst()->ops()[opnum]);
2583     OSL_DASSERT(op.nargs() >= 5);
2584     int result_sym     = rop.oparg (op, 0);
2585     Symbol& Filename   = *rop.opargsym (op, 1);
2586     Symbol& Center     = *rop.opargsym (op, 2);
2587     Symbol& Radius     = *rop.opargsym (op, 3);
2588     Symbol& Max_points = *rop.opargsym (op, 4);
2589     OSL_DASSERT(Filename.typespec().is_string() &&
2590              Center.typespec().is_triple() && Radius.typespec().is_float() &&
2591              Max_points.typespec().is_int());
2592 
2593     // Can't constant fold unless all the required input args are constant
2594     if (! (Filename.is_constant() && Center.is_constant() &&
2595            Radius.is_constant() && Max_points.is_constant()))
2596         return 0;
2597 
2598     // Handle the optional 'sort' flag, and don't bother constant folding
2599     // if sorted results may be required.
2600     int attr_arg_offset = 5; // where the opt attrs begin
2601     if (op.nargs() > 5 && rop.opargsym(op,5)->typespec().is_int()) {
2602         // Sorting requested
2603         Symbol *Sort = rop.opargsym(op,5);
2604         if (! Sort->is_constant() || *(int *)Sort->data())
2605             return 0;  // forget it if sorted data might be requested
2606         ++attr_arg_offset;
2607     }
2608     int nattrs = (op.nargs() - attr_arg_offset) / 2;
2609 
2610     // First pass through the optional arguments: gather the query names,
2611     // types, and destinations.  If any of the query names are not known
2612     // constants, we can't optimize this call so just return.
2613     std::vector<ustring> names;
2614     std::vector<int> value_args;
2615     std::vector<TypeDesc> value_types;
2616     for (int i = 0, num_queries = 0; i < nattrs; ++i) {
2617         Symbol& Name  = *rop.opargsym (op, attr_arg_offset + i*2);
2618         Symbol& Value = *rop.opargsym (op, attr_arg_offset + i*2 + 1);
2619         OSL_ASSERT (Name.typespec().is_string());
2620         if (!Name.is_constant())
2621             return 0;  // unknown optional argument, punt
2622         if (++num_queries > RuntimeOptimizer::max_new_consts_per_fold)
2623             return 0;
2624         names.push_back (*(ustring *)Name.data());
2625         value_args.push_back (rop.oparg (op, attr_arg_offset + i*2 + 1));
2626         value_types.push_back (Value.typespec().simpletype());
2627     }
2628 
2629     // We're doing a fixed query, so instead of running at every shade,
2630     // perform the search now.
2631     const int maxconst = 256;  // Max number of points to consider a constant
2632     size_t indices[maxconst+1]; // Make room for one more!
2633     float distances[maxconst+1];
2634     int maxpoints = std::min (maxconst+1, *(int *)Max_points.data());
2635     ustring filename = *(ustring *)Filename.data();
2636     int count = 0;
2637     if (! filename.empty()) {
2638         count = rop.renderer()->pointcloud_search (rop.shaderglobals(), filename,
2639                              *(Vec3 *)Center.data(), *(float *)Radius.data(),
2640                              maxpoints, false, indices, distances, 0);
2641         rop.shadingsys().pointcloud_stats (1, 0, count);
2642     }
2643 
2644     // If it returns few enough results (256 points or less), just fold
2645     // those results into constant arrays.  If more than that, let the
2646     // query happen at runtime to avoid tying up a bunch of memory.
2647     if (count > maxconst)
2648         return 0;
2649 
2650     // If the query returned no matching points, just turn the whole
2651     // pointcloud_search call into an assignment of 0 to the 'result'.
2652     if (count < 1) {
2653         rop.turn_into_assign_zero (op, "Folded constant pointcloud_search lookup");
2654         return 1;
2655     }
2656 
2657     // From here on out, we are able to fold the query (it returned
2658     // results, but not too many).  Start by removing the original
2659     // pointcloud_search call itself from the shader code.
2660     rop.turn_into_nop (op, "Folded constant pointcloud_search lookup");
2661 
2662     // Now, for each optional individual query, do a pointcloud_get NOW
2663     // to retrieve it, create a constant array for the shader to hold
2664     // those results, and add to the shader an array copy to move it
2665     // from the constant into the place the shader wanted the query
2666     // results to go.  (This assignment can be further optimized later
2667     // on as well, depending on how it's used.)  If any of the individual
2668     // queries fail now, we will return a failed result in the end.
2669     std::vector<char> tmp;  // temporary data
2670     for (int i = 0; i < nattrs; ++i) {
2671         // We had stashed names, data types, and destinations earlier.
2672         // Retrieve them now to build a query.
2673         if (names[i].empty())
2674             continue;
2675         void *const_data = NULL;
2676         TypeDesc const_valtype = value_types[i];
2677         tmp.clear ();
2678         tmp.resize (const_valtype.size(), 0);
2679         const_data = &tmp[0];
2680         if (names[i] == "index") {
2681             // "index" is a special case -- it's retrieving the hit point
2682             // indices, not data on those hit points.
2683             //
2684             // Because the presumed Partio underneath passes indices as
2685             // size_t, but OSL only allows int parameters, we need to
2686             // copy.  But just cast if size_t and int are the same size.
2687             if (sizeof(size_t) == sizeof(int)) {
2688                 const_data = indices;
2689             } else {
2690                 int *int_indices = (int *)const_data;
2691                 for (int i = 0;  i < count;  ++i)
2692                     int_indices[i] = (int) indices[i];
2693             }
2694         } else {
2695             // Named queries.
2696             bool ok = rop.renderer()->pointcloud_get (rop.shaderglobals(),
2697                                           filename, indices, count,
2698                                           names[i], const_valtype, const_data);
2699             rop.shadingsys().pointcloud_stats (0, 1, 0);
2700             if (! ok) {
2701                 count = 0;  // Make it look like an error in the end
2702                 break;
2703             }
2704         }
2705         // Now make a constant array for those results we just retrieved...
2706         int const_array_sym = rop.add_constant (const_valtype, const_data);
2707         // ... and add an instruction to copy the constant into the
2708         // original destination for the query.
2709         const int args_to_add[] = { value_args[i], const_array_sym };
2710         rop.insert_code (opnum, u_assign, args_to_add,
2711                          RuntimeOptimizer::RecomputeRWRanges,
2712                          RuntimeOptimizer::GroupWithNext);
2713     }
2714 
2715     // Query results all copied.  The only thing left to do is to assign
2716     // status (query result count) to the original "result".
2717     const int args_to_add[] = { result_sym, rop.add_constant (TypeDesc::TypeInt, &count) };
2718     rop.insert_code (opnum, u_assign, args_to_add,
2719                      RuntimeOptimizer::RecomputeRWRanges,
2720                      RuntimeOptimizer::GroupWithNext);
2721 
2722     return 1;
2723 }
2724 
2725 
2726 
DECLFOLDER(constfold_pointcloud_get)2727 DECLFOLDER(constfold_pointcloud_get)
2728 {
2729     Opcode &op (rop.inst()->ops()[opnum]);
2730     // Symbol& Result     = *rop.opargsym (op, 0);
2731     Symbol& Filename   = *rop.opargsym (op, 1);
2732     Symbol& Indices    = *rop.opargsym (op, 2);
2733     Symbol& Count      = *rop.opargsym (op, 3);
2734     Symbol& Attr_name  = *rop.opargsym (op, 4);
2735     Symbol& Data       = *rop.opargsym (op, 5);
2736     if (! (Filename.is_constant() && Indices.is_constant() &&
2737            Count.is_constant() && Attr_name.is_constant()))
2738         return 0;
2739 
2740     // All inputs are constants -- we can just turn this into an array
2741     // assignment.
2742 
2743     ustring filename = *(ustring *)Filename.data();
2744     int count = *(int *)Count.data();
2745     if (filename.empty() || count < 1) {
2746         rop.turn_into_assign_zero (op, "Folded constant pointcloud_get");
2747         return 1;
2748     }
2749 
2750     if (count >= 1024)  // Too many, don't bother folding
2751         return 0;
2752 
2753     // Must transfer to size_t array
2754     size_t *indices = OIIO_ALLOCA(size_t, count);
2755     for (int i = 0;  i < count;  ++i)
2756         indices[i] = ((int *)Indices.data())[i];
2757 
2758     TypeDesc valtype = Data.typespec().simpletype();
2759     std::vector<char> data (valtype.size());
2760     int ok = rop.renderer()->pointcloud_get (rop.shaderglobals(), filename,
2761                                              indices, count,
2762                                              *(ustring *)Attr_name.data(),
2763                                              valtype, &data[0]);
2764     rop.shadingsys().pointcloud_stats (0, 1, 0);
2765 
2766     rop.turn_into_assign (op, rop.add_constant (TypeDesc::TypeInt, &ok),
2767                           "Folded constant pointcloud_get");
2768 
2769     // Now make a constant array for those results we just retrieved...
2770     int const_array_sym = rop.add_constant (valtype, &data[0]);
2771     // ... and add an instruction to copy the constant into the
2772     // original destination for the query.
2773     const int args_to_add[] = { rop.oparg(op,5) /* Data symbol*/, const_array_sym };
2774     rop.insert_code (opnum, u_assign, args_to_add,
2775                      RuntimeOptimizer::RecomputeRWRanges,
2776                      RuntimeOptimizer::GroupWithNext);
2777     return 1;
2778 }
2779 
2780 
2781 
DECLFOLDER(constfold_noise)2782 DECLFOLDER(constfold_noise)
2783 {
2784     Opcode &op (rop.inst()->ops()[opnum]);
2785 
2786     // Decode some info about which noise function we're dealing with
2787 //    bool periodic = (op.opname() == Strings::pnoise);
2788     int arg = 0;   // Next arg to read
2789     Symbol &Result = *rop.opargsym (op, arg++);
2790     int outdim = Result.typespec().is_triple() ? 3 : 1;
2791     Symbol *Name = rop.opargsym (op, arg++);
2792     ustring name;
2793     if (Name->typespec().is_string()) {
2794         if (Name->is_constant())
2795             name = *(ustring *)Name->data();
2796     } else {
2797         // Not a string, must be the old-style noise/pnoise
2798         --arg;  // forget that arg
2799         Name = NULL;
2800         name = op.opname();
2801     }
2802 
2803     // Noise with name that is not a constant at osl-compile-time was marked
2804     // as taking the derivs of its coordinate arguments. If at this point we
2805     // can determine that the name is known and not "gabor", when we can
2806     // turn its derivative taking off.
2807     if (op.argtakesderivs_all() &&  name.length() && name != "gabor")
2808         op.argtakesderivs_all(0);
2809 
2810     // Gabor noise is the only one that takes optional arguments, so
2811     // optimize them away for other noise types.
2812     if (name.length() && name != "gabor") {
2813         for (int a = arg; a < op.nargs(); ++a) {
2814             // Advance until we hit a string argument, which will be the
2815             // first optional token/value pair. Then just turn all arguments
2816             // from that point on into empty strings, which will later be
2817             // skipped, and in the mean time will eliminate the dependencies
2818             // on whatever values were previously passed.
2819             if (rop.opargsym(op,a)->typespec().is_string()) {
2820                 for ( ; a < op.nargs(); a += 2) {
2821                     OSL_ASSERT (a+1 < op.nargs());
2822                     int cind = rop.add_constant (ustring());
2823                     rop.inst()->args()[op.firstarg()+a] = cind;
2824                     rop.inst()->args()[op.firstarg()+a+1] = cind;
2825                 }
2826             }
2827         }
2828     }
2829 
2830     // Early out: for now, we only fold cell noise
2831     if (name != u_cellnoise && name != u_cell)
2832         return 0;
2833 
2834     // Take an early out if any args are not constant (other than the result)
2835     for (int i = 1; i < op.nargs(); ++i)
2836         if (! rop.opargsym(op,i)->is_constant())
2837             return 0;
2838 
2839     // Extract the constant input coordinates
2840     float input[4];
2841     int indim = 0;
2842     for ( ; arg < op.nargs() && indim < 4; ++arg) {
2843         Symbol *in = rop.opargsym(op,arg);
2844         if (in->typespec().is_float()) {
2845             input[indim++] = ((float *)in->data())[0];
2846         } else if (in->typespec().is_triple()) {
2847             input[indim++] = ((float *)in->data())[0];
2848             input[indim++] = ((float *)in->data())[1];
2849             input[indim++] = ((float *)in->data())[2];
2850         }
2851         else
2852             return 0;  // optional args starting, we don't fold them yet
2853     }
2854 
2855 #if OSL_GNUC_VERSION >= 90000
2856 #    pragma GCC diagnostic push
2857 #    pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
2858 #endif
2859     if (name == u_cellnoise || name == u_cell) {
2860         CellNoise cell;
2861         if (outdim == 1) {
2862             float n;
2863             if (indim == 1)
2864                 cell (n, input[0]);
2865             else if (indim == 2)
2866                 cell (n, input[0], input[1]);
2867             else if (indim == 3)
2868                 cell (n, Vec3(input[0], input[1], input[2]));
2869             else
2870                 cell (n, Vec3(input[0], input[1], input[2]), input[3]);
2871             int cind = rop.add_constant (n);
2872             rop.turn_into_assign (op, cind, "const fold cellnoise");
2873             return 1;
2874         } else {
2875             OSL_DASSERT (outdim == 3);
2876             Vec3 n;
2877             if (indim == 1)
2878                 cell (n, input[0]);
2879             else if (indim == 2)
2880                 cell (n, input[0], input[1]);
2881             else if (indim == 3)
2882                 cell (n, Vec3(input[0], input[1], input[2]));
2883             else
2884                 cell (n, Vec3(input[0], input[1], input[2]), input[3]);
2885             int cind = rop.add_constant (TypeDesc::TypePoint, &n);
2886             rop.turn_into_assign (op, cind, "const fold cellnoise");
2887             return 1;
2888         }
2889     }
2890 #if OSL_GNUC_VERSION >= 90000
2891 #    pragma GCC diagnostic pop
2892 #endif
2893 
2894     return 0;
2895 }
2896 
2897 
2898 
DECLFOLDER(constfold_functioncall)2899 DECLFOLDER(constfold_functioncall)
2900 {
2901     Opcode &op (rop.inst()->ops()[opnum]);
2902     // Make a "functioncall" block disappear if the only non-nop statements
2903     // inside it is 'return'.
2904     bool has_return = false;
2905     bool has_anything_else = false;
2906     for (int i = opnum+1, e = op.jump(0);  i < e;  ++i) {
2907         Opcode &op (rop.inst()->ops()[i]);
2908         if (op.opname() == u_return)
2909             has_return = true;
2910         else if (op.opname() != u_nop)
2911             has_anything_else = true;
2912     }
2913     int changed = 0;
2914     if (! has_anything_else) {
2915         // Possibly due to optimizations, there's nothing in the
2916         // function body but the return.  So just eliminate the whole
2917         // block of ops.
2918         for (int i = opnum, e = op.jump(0);  i < e;  ++i) {
2919             if (rop.inst()->ops()[i].opname() != u_nop) {
2920                 rop.turn_into_nop (rop.inst()->ops()[i], "empty function");
2921                 ++changed;
2922             }
2923         }
2924     } else if (! has_return) {
2925         // The function is just a straight-up execution, no return
2926         // statement, so kill the "function" op.
2927         if (rop.keep_no_return_function_calls()) {
2928             rop.turn_into_functioncall_nr (op, "'functioncall' transmuted to 'no return' version");
2929         } else {
2930             rop.turn_into_nop (op, "'function' not necessary");
2931         }
2932         ++changed;
2933     }
2934 
2935     return changed;
2936 }
2937 
2938 
2939 
2940 
DECLFOLDER(constfold_useparam)2941 DECLFOLDER(constfold_useparam)
2942 {
2943     // Just eliminate useparam (from shaders compiled with old oslc)
2944     Opcode &op (rop.inst()->ops()[opnum]);
2945     rop.turn_into_nop (op);
2946     return 1;
2947 }
2948 
2949 
2950 
DECLFOLDER(constfold_assign)2951 DECLFOLDER(constfold_assign)
2952 {
2953     Opcode &op (rop.inst()->ops()[opnum]);
2954     Symbol *B (rop.inst()->argsymbol(op.firstarg()+1));
2955     int Aalias = rop.block_alias (rop.inst()->arg(op.firstarg()+0));
2956     Symbol *AA = rop.inst()->symbol(Aalias);
2957     // N.B. symbol() returns NULL if alias is < 0
2958 
2959     if (B->is_constant() && AA && AA->is_constant()) {
2960         // Try to turn A=C into nop if A already is C
2961         if (AA->typespec().is_int() && B->typespec().is_int()) {
2962             if (*(int *)AA->data() == *(int *)B->data()) {
2963                 rop.turn_into_nop (op, "reassignment of current value");
2964                 return 1;
2965             }
2966         } else if (AA->typespec().is_float() && B->typespec().is_float()) {
2967             if (*(float *)AA->data() == *(float *)B->data()) {
2968                 rop.turn_into_nop (op, "reassignment of current value");
2969                 return 1;
2970             }
2971         } else if (AA->typespec().is_float() && B->typespec().is_int()) {
2972             if (*(float *)AA->data() == *(int *)B->data()) {
2973                 rop.turn_into_nop (op, "reassignment of current value");
2974                 return 1;
2975             }
2976         } else if (AA->typespec().is_triple() && B->typespec().is_triple()) {
2977             if (*(Vec3 *)AA->data() == *(Vec3 *)B->data()) {
2978                 rop.turn_into_nop (op, "reassignment of current value");
2979                 return 1;
2980             }
2981         } else if (AA->typespec().is_triple() && B->typespec().is_float()) {
2982             float b = *(float *)B->data();
2983             if (*(Vec3 *)AA->data() == Vec3(b,b,b)) {
2984                 rop.turn_into_nop (op, "reassignment of current value");
2985                 return 1;
2986             }
2987         }
2988     }
2989     return 0;
2990 }
2991 
2992 
2993 
DECLFOLDER(constfold_warning)2994 DECLFOLDER(constfold_warning)
2995 {
2996    if (rop.shadingsys().max_warnings_per_thread() == 0) {
2997       Opcode &op (rop.inst()->ops()[opnum]);
2998       rop.turn_into_nop(op, "warnings disabled by max_warnings_per_thread == 0");
2999       return 1;
3000    }
3001    return 0;
3002 }
3003 
3004 
3005 
DECLFOLDER(constfold_deriv)3006 DECLFOLDER(constfold_deriv)
3007 {
3008     Opcode &op (rop.inst()->ops()[opnum]);
3009     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
3010     if (A.is_constant()) {
3011         rop.turn_into_assign_zero (op, "deriv of constant => 0");
3012         return 1;
3013     }
3014     return 0;
3015 }
3016 
3017 
3018 
DECLFOLDER(constfold_isconstant)3019 DECLFOLDER(constfold_isconstant)
3020 {
3021     Opcode &op (rop.inst()->ops()[opnum]);
3022     Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
3023     // If at this point we know it's a constant, it's certainly a constant,
3024     // so we can constant fold it. Note that if it's not known to be a
3025     // constant at this point, that doesn't mean we won't detect it to be
3026     // constant after further optimization, so we never fold this to 0.
3027     if (A.is_constant()) {
3028         rop.turn_into_assign_one (op, "isconstant => 1");
3029         return 1;
3030     }
3031     return 0;
3032 }
3033 
3034 
3035 
DECLFOLDER(constfold_raytype)3036 DECLFOLDER(constfold_raytype)
3037 {
3038     Opcode &op (rop.inst()->ops()[opnum]);
3039     Symbol& Name = *rop.opargsym (op, 1);
3040     OSL_DASSERT(Name.typespec().is_string());
3041     if (! Name.is_constant())
3042         return 0;   // Can't optimize non-constant raytype name
3043 
3044     int bit = rop.shadingsys().raytype_bit (*(ustring *)Name.data());
3045     if (bit & rop.raytypes_on()) {
3046         rop.turn_into_assign_one (op, "raytype => 1");
3047         return 1;
3048     }
3049     if (bit & rop.raytypes_off()) {
3050         rop.turn_into_assign_zero (op, "raytype => 0");
3051         return 1;
3052     }
3053     return 0;  // indeterminate until execution time
3054 }
3055 
3056 
3057 }; // namespace pvt
3058 OSL_NAMESPACE_EXIT
3059