1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4
5 #include <vector>
6 #include <cmath>
7 #include <cstdlib>
8
9 #include <OpenImageIO/fmath.h>
10 #include <OpenImageIO/sysutil.h>
11
12 #include "oslexec_pvt.h"
13 #include "opcolor.h"
14 #include "runtimeoptimize.h"
15 #include <OSL/dual.h>
16 #include <OSL/oslnoise.h>
17 using namespace OSL;
18 using namespace OSL::pvt;
19
20
21 // names of ops we'll be using frequently
22 static ustring u_nop ("nop"),
23 u_assign ("assign"),
24 u_aassign ("aassign"),
25 u_compassign ("compassign"),
26 u_mxcompassign ("mxcompassign"),
27 u_add ("add"),
28 u_sub ("sub"),
29 u_mul ("mul"),
30 u_sqrt ("sqrt"),
31 u_inversesqrt ("inversesqrt"),
32 u_cbrt ("cbrt"),
33 u_if ("if"),
34 u_eq ("eq"),
35 u_return ("return"),
36 u_error ("error"),
37 u_fmterror("%s"),
38 u_fmt_range_check("Index [%d] out of range %s[0..%d]: %s:%d (group %s, layer %d %s, shader %s)");
39
40 static ustring u_cell ("cell"), u_cellnoise ("cellnoise");
41
42
43 OSL_NAMESPACE_ENTER
44
45 namespace pvt { // OSL::pvt
46
47
48 inline bool
equal_consts(const Symbol & A,const Symbol & B)49 equal_consts (const Symbol &A, const Symbol &B)
50 {
51 return (&A == &B ||
52 (equivalent (A.typespec(), B.typespec()) &&
53 !memcmp (A.data(), B.data(), A.typespec().simpletype().size())));
54 }
55
56
57
58 inline bool
unequal_consts(const Symbol & A,const Symbol & B)59 unequal_consts (const Symbol &A, const Symbol &B)
60 {
61 return (equivalent (A.typespec(), B.typespec()) &&
62 memcmp (A.data(), B.data(), A.typespec().simpletype().size()));
63 }
64
65
66
constfold_none(RuntimeOptimizer &,int)67 int constfold_none(RuntimeOptimizer& /*rop*/, int /*opnum*/)
68 {
69 return 0;
70 }
71
72
73
DECLFOLDER(constfold_add)74 DECLFOLDER(constfold_add)
75 {
76 Opcode &op (rop.inst()->ops()[opnum]);
77 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
78 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
79 if (rop.is_zero(A)) {
80 // R = 0 + B => R = B
81 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+2),
82 "0 + A => A");
83 return 1;
84 }
85 if (rop.is_zero(B)) {
86 // R = A + 0 => R = A
87 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1),
88 "A + 0 => A");
89 return 1;
90 }
91 if (A.is_constant() && B.is_constant()) {
92 if (A.typespec().is_int() && B.typespec().is_int()) {
93 int result = *(int *)A.data() + *(int *)B.data();
94 int cind = rop.add_constant (A.typespec(), &result);
95 rop.turn_into_assign (op, cind, "const + const");
96 return 1;
97 } else if (A.typespec().is_float() && B.typespec().is_float()) {
98 float result = *(float *)A.data() + *(float *)B.data();
99 int cind = rop.add_constant (A.typespec(), &result);
100 rop.turn_into_assign (op, cind, "const + const");
101 return 1;
102 } else if (A.typespec().is_triple() && B.typespec().is_triple()) {
103 Vec3 result = *(Vec3 *)A.data() + *(Vec3 *)B.data();
104 int cind = rop.add_constant (A.typespec(), &result);
105 rop.turn_into_assign (op, cind, "const + const");
106 return 1;
107 } else if (A.typespec().is_triple() && B.typespec().is_float()) {
108 Vec3 result = *(Vec3 *)A.data() + Vec3(*(float *)B.data());
109 int cind = rop.add_constant (A.typespec(), &result);
110 rop.turn_into_assign (op, cind, "const + const");
111 return 1;
112 } else if (A.typespec().is_float() && B.typespec().is_triple()) {
113 Vec3 result = Vec3(*(float *)A.data()) + *(Vec3 *)B.data();
114 int cind = rop.add_constant (B.typespec(), &result);
115 rop.turn_into_assign (op, cind, "const + const");
116 return 1;
117 }
118 }
119 return 0;
120 }
121
122
123
DECLFOLDER(constfold_sub)124 DECLFOLDER(constfold_sub)
125 {
126 Opcode &op (rop.inst()->ops()[opnum]);
127 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
128 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
129 if (rop.is_zero(B)) {
130 // R = A - 0 => R = A
131 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1),
132 "A - 0 => A");
133 return 1;
134 }
135 // R = A - B, if both are constants, => R = C
136 if (A.is_constant() && B.is_constant()) {
137 if (A.typespec().is_int() && B.typespec().is_int()) {
138 int result = *(int *)A.data() - *(int *)B.data();
139 int cind = rop.add_constant (A.typespec(), &result);
140 rop.turn_into_assign (op, cind, "const - const");
141 return 1;
142 } else if (A.typespec().is_float() && B.typespec().is_float()) {
143 float result = *(float *)A.data() - *(float *)B.data();
144 int cind = rop.add_constant (A.typespec(), &result);
145 rop.turn_into_assign (op, cind, "const - const");
146 return 1;
147 } else if (A.typespec().is_triple() && B.typespec().is_triple()) {
148 Vec3 result = *(Vec3 *)A.data() - *(Vec3 *)B.data();
149 int cind = rop.add_constant (A.typespec(), &result);
150 rop.turn_into_assign (op, cind, "const - const");
151 return 1;
152 } else if (A.typespec().is_triple() && B.typespec().is_float()) {
153 Vec3 result = *(Vec3 *)A.data() - Vec3(*(float *)B.data());
154 int cind = rop.add_constant (A.typespec(), &result);
155 rop.turn_into_assign (op, cind, "const - const");
156 return 1;
157 } else if (A.typespec().is_float() && B.typespec().is_triple()) {
158 Vec3 result = Vec3(*(float *)A.data()) - *(Vec3 *)B.data();
159 int cind = rop.add_constant (B.typespec(), &result);
160 rop.turn_into_assign (op, cind, "const - const");
161 return 1;
162 }
163 }
164 // R = A - A => R = 0 even if not constant!
165 if (&A == &B) {
166 rop.turn_into_assign_zero (op, "A - A => 0");
167 }
168 return 0;
169 }
170
171
172
DECLFOLDER(constfold_mul)173 DECLFOLDER(constfold_mul)
174 {
175 Opcode &op (rop.inst()->ops()[opnum]);
176 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
177 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
178 if (rop.is_one(A)) {
179 // R = 1 * B => R = B
180 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+2),
181 "1 * A => A");
182 return 1;
183 }
184 if (rop.is_zero(A)) {
185 // R = 0 * B => R = 0
186 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1),
187 "0 * A => 0");
188 return 1;
189 }
190 if (rop.is_one(B)) {
191 // R = A * 1 => R = A
192 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1),
193 "A * 1 => A");
194 return 1;
195 }
196 if (rop.is_zero(B)) {
197 // R = A * 0 => R = 0
198 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+2),
199 "A * 0 => 0");
200 return 1;
201 }
202 if (A.is_constant() && B.is_constant()) {
203 if (A.typespec().is_int() && B.typespec().is_int()) {
204 int result = *(int *)A.data() * *(int *)B.data();
205 int cind = rop.add_constant (A.typespec(), &result);
206 rop.turn_into_assign (op, cind, "const * const");
207 return 1;
208 } else if (A.typespec().is_float() && B.typespec().is_float()) {
209 float result = (*(float *)A.data()) * (*(float *)B.data());
210 int cind = rop.add_constant (A.typespec(), &result);
211 rop.turn_into_assign (op, cind, "const * const");
212 return 1;
213 } else if (A.typespec().is_triple() && B.typespec().is_triple()) {
214 Vec3 result = (*(Vec3 *)A.data()) * (*(Vec3 *)B.data());
215 int cind = rop.add_constant (A.typespec(), &result);
216 rop.turn_into_assign (op, cind, "const * const");
217 return 1;
218 } else if (A.typespec().is_triple() && B.typespec().is_float()) {
219 Vec3 result = (*(Vec3 *)A.data()) * (*(float *)B.data());
220 int cind = rop.add_constant (A.typespec(), &result);
221 rop.turn_into_assign (op, cind, "const * const");
222 return 1;
223 } else if (A.typespec().is_float() && B.typespec().is_triple()) {
224 Vec3 result = (*(float *)A.data()) * (*(Vec3 *)B.data());
225 int cind = rop.add_constant (B.typespec(), &result);
226 rop.turn_into_assign (op, cind, "const * const");
227 return 1;
228 }
229 }
230 return 0;
231 }
232
233
234
DECLFOLDER(constfold_div)235 DECLFOLDER(constfold_div)
236 {
237 Opcode &op (rop.inst()->ops()[opnum]);
238 Symbol &R (*rop.inst()->argsymbol(op.firstarg()+0));
239 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
240 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
241 if (rop.is_one(B)) {
242 // R = A / 1 => R = A
243 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1),
244 "A / 1 => A");
245 return 1;
246 }
247 if (rop.is_zero(B) && (B.typespec().is_float() ||
248 B.typespec().is_triple() || B.typespec().is_int())) {
249 // R = A / 0 => R = 0 because of OSL div by zero rule
250 rop.turn_into_assign_zero (op, "A / 0 => 0 (by OSL division rules)");
251 return 1;
252 }
253 if (A.is_constant() && B.is_constant()) {
254 int cind = -1;
255 if (A.typespec().is_int() && B.typespec().is_int()) {
256 int result = *(int *)A.data() / *(int *)B.data();
257 cind = rop.add_constant (R.typespec(), &result);
258 } else if (A.typespec().is_float() && B.typespec().is_int()) {
259 float result = *(float *)A.data() / *(int *)B.data();
260 cind = rop.add_constant (R.typespec(), &result);
261 } else if (A.typespec().is_float() && B.typespec().is_float()) {
262 float result = *(float *)A.data() / *(float *)B.data();
263 cind = rop.add_constant (R.typespec(), &result);
264 } else if (A.typespec().is_int() && B.typespec().is_float()) {
265 float result = *(int *)A.data() / *(float *)B.data();
266 cind = rop.add_constant (R.typespec(), &result);
267 } else if (A.typespec().is_triple() && B.typespec().is_triple()) {
268 Vec3 result = *(Vec3 *)A.data() / *(Vec3 *)B.data();
269 cind = rop.add_constant (R.typespec(), &result);
270 } else if (A.typespec().is_triple() && B.typespec().is_float()) {
271 Vec3 result = *(Vec3 *)A.data() / *(float *)B.data();
272 cind = rop.add_constant (R.typespec(), &result);
273 } else if (A.typespec().is_float() && B.typespec().is_triple()) {
274 float a = *(float *)A.data();
275 Vec3 result = Vec3(a,a,a) / *(Vec3 *)B.data();
276 cind = rop.add_constant (R.typespec(), &result);
277 }
278 if (cind >= 0) {
279 rop.turn_into_assign (op, cind, "const / const");
280 return 1;
281 }
282 }
283 return 0;
284 }
285
286
287
DECLFOLDER(constfold_mod)288 DECLFOLDER(constfold_mod)
289 {
290 Opcode &op (rop.inst()->ops()[opnum]);
291 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
292 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
293
294 if (rop.is_zero(A)) {
295 // R = 0 % B => R = 0
296 rop.turn_into_assign_zero (op, "0 % A => 0");
297 return 1;
298 }
299 if (rop.is_zero(B)) {
300 // R = A % 0 => R = 0
301 rop.turn_into_assign_zero (op, "A % 0 => 0");
302 return 1;
303 }
304 if (A.is_constant() && B.is_constant() &&
305 A.typespec().is_int() && B.typespec().is_int()) {
306 int a = A.get_int();
307 int b = B.get_int();
308 int cind = rop.add_constant (b ? (a % b) : 0);
309 rop.turn_into_assign (op, cind, "const % const");
310 return 1;
311 }
312 return 0;
313 }
314
315
316
DECLFOLDER(constfold_dot)317 DECLFOLDER(constfold_dot)
318 {
319 Opcode &op (rop.inst()->ops()[opnum]);
320 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
321 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
322
323 // Dot with (0,0,0) -> 0
324 if (rop.is_zero(A) || rop.is_zero(B)) {
325 rop.turn_into_assign_zero (op, "dot(a,(0,0,0)) => 0");
326 return 1;
327 }
328
329 // dot(const,const) -> const
330 if (A.is_constant() && B.is_constant()) {
331 OSL_DASSERT(A.typespec().is_triple() && B.typespec().is_triple());
332 float result = (*(Vec3 *)A.data()).dot (*(Vec3 *)B.data());
333 int cind = rop.add_constant (TypeDesc::TypeFloat, &result);
334 rop.turn_into_assign (op, cind, "dot(const,const)");
335 return 1;
336 }
337
338 return 0;
339 }
340
341
342
DECLFOLDER(constfold_neg)343 DECLFOLDER(constfold_neg)
344 {
345 Opcode &op (rop.inst()->ops()[opnum]);
346 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
347 if (A.is_constant()) {
348 if (A.typespec().is_int()) {
349 int result = - *(int *)A.data();
350 int cind = rop.add_constant (A.typespec(), &result);
351 rop.turn_into_assign (op, cind, "-const");
352 return 1;
353 } else if (A.typespec().is_float()) {
354 float result = - *(float *)A.data();
355 int cind = rop.add_constant (A.typespec(), &result);
356 rop.turn_into_assign (op, cind, "-const");
357 return 1;
358 } else if (A.typespec().is_triple()) {
359 Vec3 result = - *(Vec3 *)A.data();
360 int cind = rop.add_constant (A.typespec(), &result);
361 rop.turn_into_assign (op, cind, "-const");
362 return 1;
363 }
364 }
365 return 0;
366 }
367
368
369
DECLFOLDER(constfold_abs)370 DECLFOLDER(constfold_abs)
371 {
372 Opcode &op (rop.inst()->ops()[opnum]);
373 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
374 if (A.is_constant()) {
375 if (A.typespec().is_int()) {
376 int result = std::abs(*(int *)A.data());
377 int cind = rop.add_constant (A.typespec(), &result);
378 rop.turn_into_assign (op, cind, "abs(const)");
379 return 1;
380 } else if (A.typespec().is_float()) {
381 float result = std::abs(*(float *)A.data());
382 int cind = rop.add_constant (A.typespec(), &result);
383 rop.turn_into_assign (op, cind, "abs(const)");
384 return 1;
385 } else if (A.typespec().is_triple()) {
386 Vec3 result = *(Vec3 *)A.data();
387 result.x = std::abs(result.x);
388 result.y = std::abs(result.y);
389 result.z = std::abs(result.z);
390 int cind = rop.add_constant (A.typespec(), &result);
391 rop.turn_into_assign (op, cind, "abs(const)");
392 return 1;
393 }
394 }
395 return 0;
396 }
397
398
399
DECLFOLDER(constfold_eq)400 DECLFOLDER(constfold_eq)
401 {
402 Opcode &op (rop.inst()->ops()[opnum]);
403 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
404 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
405 if (A.is_constant() && B.is_constant()) {
406 bool val = false;
407 if (equivalent (A.typespec(), B.typespec())) {
408 val = equal_consts (A, B);
409 } else if (A.typespec().is_float() && B.typespec().is_int()) {
410 val = (*(float *)A.data() == *(int *)B.data());
411 } else if (A.typespec().is_int() && B.typespec().is_float()) {
412 val = (*(int *)A.data() == *(float *)B.data());
413 } else {
414 return 0; // unhandled cases
415 }
416 // Turn the 'eq R A B' into 'assign R X' where X is 0 or 1.
417 static const int int_zero = 0, int_one = 1;
418 int cind = rop.add_constant (TypeDesc::TypeInt,
419 val ? &int_one : &int_zero);
420 rop.turn_into_assign (op, cind, "const == const");
421 return 1;
422 }
423 return 0;
424 }
425
426
427
DECLFOLDER(constfold_neq)428 DECLFOLDER(constfold_neq)
429 {
430 Opcode &op (rop.inst()->ops()[opnum]);
431 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
432 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
433 if (A.is_constant() && B.is_constant()) {
434 bool val = false;
435 if (equivalent (A.typespec(), B.typespec())) {
436 val = ! equal_consts (A, B);
437 } else if (A.typespec().is_float() && B.typespec().is_int()) {
438 val = (*(float *)A.data() != *(int *)B.data());
439 } else if (A.typespec().is_int() && B.typespec().is_float()) {
440 val = (*(int *)A.data() != *(float *)B.data());
441 } else {
442 return 0; // unhandled case
443 }
444 // Turn the 'neq R A B' into 'assign R X' where X is 0 or 1.
445 static const int int_zero = 0, int_one = 1;
446 int cind = rop.add_constant (TypeDesc::TypeInt,
447 val ? &int_one : &int_zero);
448 rop.turn_into_assign (op, cind, "const != const");
449 return 1;
450 }
451 return 0;
452 }
453
454
455
DECLFOLDER(constfold_lt)456 DECLFOLDER(constfold_lt)
457 {
458 static const int int_zero = 0, int_one = 1;
459 Opcode &op (rop.inst()->ops()[opnum]);
460 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
461 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
462 const TypeSpec &ta (A.typespec());
463 const TypeSpec &tb (B.typespec());
464 if (A.is_constant() && B.is_constant()) {
465 // Turn the 'leq R A B' into 'assign R X' where X is 0 or 1.
466 bool val = false;
467 if (ta.is_float() && tb.is_float()) {
468 val = (*(float *)A.data() < *(float *)B.data());
469 } else if (ta.is_float() && tb.is_int()) {
470 val = (*(float *)A.data() < *(int *)B.data());
471 } else if (ta.is_int() && tb.is_float()) {
472 val = (*(int *)A.data() < *(float *)B.data());
473 } else if (ta.is_int() && tb.is_int()) {
474 val = (*(int *)A.data() < *(int *)B.data());
475 } else {
476 return 0; // unhandled case
477 }
478 int cind = rop.add_constant (TypeDesc::TypeInt,
479 val ? &int_one : &int_zero);
480 rop.turn_into_assign (op, cind, "const < const");
481 return 1;
482 }
483 return 0;
484 }
485
486
487
DECLFOLDER(constfold_le)488 DECLFOLDER(constfold_le)
489 {
490 static const int int_zero = 0, int_one = 1;
491 Opcode &op (rop.inst()->ops()[opnum]);
492 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
493 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
494 const TypeSpec &ta (A.typespec());
495 const TypeSpec &tb (B.typespec());
496 if (A.is_constant() && B.is_constant()) {
497 // Turn the 'leq R A B' into 'assign R X' where X is 0 or 1.
498 bool val = false;
499 if (ta.is_float() && tb.is_float()) {
500 val = (*(float *)A.data() <= *(float *)B.data());
501 } else if (ta.is_float() && tb.is_int()) {
502 val = (*(float *)A.data() <= *(int *)B.data());
503 } else if (ta.is_int() && tb.is_float()) {
504 val = (*(int *)A.data() <= *(float *)B.data());
505 } else if (ta.is_int() && tb.is_int()) {
506 val = (*(int *)A.data() <= *(int *)B.data());
507 } else {
508 return 0; // unhandled case
509 }
510 int cind = rop.add_constant (TypeDesc::TypeInt,
511 val ? &int_one : &int_zero);
512 rop.turn_into_assign (op, cind, "const <= const");
513 return 1;
514 }
515 return 0;
516 }
517
518
519
DECLFOLDER(constfold_gt)520 DECLFOLDER(constfold_gt)
521 {
522 static const int int_zero = 0, int_one = 1;
523 Opcode &op (rop.inst()->ops()[opnum]);
524 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
525 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
526 const TypeSpec &ta (A.typespec());
527 const TypeSpec &tb (B.typespec());
528 if (A.is_constant() && B.is_constant()) {
529 // Turn the 'gt R A B' into 'assign R X' where X is 0 or 1.
530 bool val = false;
531 if (ta.is_float() && tb.is_float()) {
532 val = (*(float *)A.data() > *(float *)B.data());
533 } else if (ta.is_float() && tb.is_int()) {
534 val = (*(float *)A.data() > *(int *)B.data());
535 } else if (ta.is_int() && tb.is_float()) {
536 val = (*(int *)A.data() > *(float *)B.data());
537 } else if (ta.is_int() && tb.is_int()) {
538 val = (*(int *)A.data() > *(int *)B.data());
539 } else {
540 return 0; // unhandled case
541 }
542 int cind = rop.add_constant (TypeDesc::TypeInt,
543 val ? &int_one : &int_zero);
544 rop.turn_into_assign (op, cind, "const > const");
545 return 1;
546 }
547 return 0;
548 }
549
550
551
DECLFOLDER(constfold_ge)552 DECLFOLDER(constfold_ge)
553 {
554 static const int int_zero = 0, int_one = 1;
555 Opcode &op (rop.inst()->ops()[opnum]);
556 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
557 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
558 const TypeSpec &ta (A.typespec());
559 const TypeSpec &tb (B.typespec());
560 if (A.is_constant() && B.is_constant()) {
561 // Turn the 'leq R A B' into 'assign R X' where X is 0 or 1.
562 bool val = false;
563 if (ta.is_float() && tb.is_float()) {
564 val = (*(float *)A.data() >= *(float *)B.data());
565 } else if (ta.is_float() && tb.is_int()) {
566 val = (*(float *)A.data() >= *(int *)B.data());
567 } else if (ta.is_int() && tb.is_float()) {
568 val = (*(int *)A.data() >= *(float *)B.data());
569 } else if (ta.is_int() && tb.is_int()) {
570 val = (*(int *)A.data() >= *(int *)B.data());
571 } else {
572 return 0; // unhandled case
573 }
574 int cind = rop.add_constant (TypeDesc::TypeInt,
575 val ? &int_one : &int_zero);
576 rop.turn_into_assign (op, cind, "const >= const");
577 return 1;
578 }
579 return 0;
580 }
581
582
583
DECLFOLDER(constfold_or)584 DECLFOLDER(constfold_or)
585 {
586 Opcode &op (rop.inst()->ops()[opnum]);
587 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
588 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
589 if (A.is_constant() && B.is_constant()) {
590 OSL_DASSERT(A.typespec().is_int() && B.typespec().is_int());
591 bool val = *(int *)A.data() || *(int *)B.data();
592 // Turn the 'or R A B' into 'assign R X' where X is 0 or 1.
593 static const int int_zero = 0, int_one = 1;
594 int cind = rop.add_constant (TypeDesc::TypeInt,
595 val ? &int_one : &int_zero);
596 rop.turn_into_assign (op, cind, "const || const");
597 return 1;
598 }
599 return 0;
600 }
601
602
603
DECLFOLDER(constfold_and)604 DECLFOLDER(constfold_and)
605 {
606 Opcode &op (rop.inst()->ops()[opnum]);
607 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
608 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2));
609 if (A.is_constant() && B.is_constant()) {
610 // Turn the 'and R A B' into 'assign R X' where X is 0 or 1.
611 OSL_DASSERT(A.typespec().is_int() && B.typespec().is_int());
612 bool val = *(int *)A.data() && *(int *)B.data();
613 static const int int_zero = 0, int_one = 1;
614 int cind = rop.add_constant (TypeDesc::TypeInt,
615 val ? &int_one : &int_zero);
616 rop.turn_into_assign (op, cind, "const && const");
617 return 1;
618 }
619 return 0;
620 }
621
622
623
DECLFOLDER(constfold_bitand)624 DECLFOLDER(constfold_bitand)
625 {
626 Opcode &op (rop.op(opnum));
627 Symbol &A (*rop.opargsym(op, 1));
628 Symbol &B (*rop.opargsym(op, 2));
629 if (A.is_constant() && B.is_constant()) {
630 // Turn the 'bitand R A B' into 'assign R X'.
631 OSL_DASSERT(A.typespec().is_int() && B.typespec().is_int());
632 int cind = rop.add_constant (A.get_int() & B.get_int());
633 rop.turn_into_assign (op, cind, "const & const");
634 return 1;
635 }
636 return 0;
637 }
638
639
640
DECLFOLDER(constfold_bitor)641 DECLFOLDER(constfold_bitor)
642 {
643 Opcode &op (rop.op(opnum));
644 Symbol &A (*rop.opargsym(op, 1));
645 Symbol &B (*rop.opargsym(op, 2));
646 if (A.is_constant() && B.is_constant()) {
647 // Turn the 'bitor R A B' into 'assign R X'.
648 OSL_DASSERT(A.typespec().is_int() && B.typespec().is_int());
649 int cind = rop.add_constant (A.get_int() | B.get_int());
650 rop.turn_into_assign (op, cind, "const | const");
651 return 1;
652 }
653 return 0;
654 }
655
656
657
DECLFOLDER(constfold_xor)658 DECLFOLDER(constfold_xor)
659 {
660 Opcode &op (rop.op(opnum));
661 Symbol &A (*rop.opargsym(op, 1));
662 Symbol &B (*rop.opargsym(op, 2));
663 if (A.is_constant() && B.is_constant()) {
664 // Turn the 'xor R A B' into 'assign R X'.
665 OSL_DASSERT(A.typespec().is_int() && B.typespec().is_int());
666 int cind = rop.add_constant (A.get_int() ^ B.get_int());
667 rop.turn_into_assign (op, cind, "const ^ const");
668 return 1;
669 }
670 return 0;
671 }
672
673
674
DECLFOLDER(constfold_compl)675 DECLFOLDER(constfold_compl)
676 {
677 Opcode &op (rop.op(opnum));
678 Symbol &A (*rop.opargsym(op, 1));
679 if (A.is_constant()) {
680 // Turn the 'compl R A' into 'assign R X'.
681 OSL_DASSERT(A.typespec().is_int());
682 int cind = rop.add_constant (~(A.get_int()));
683 rop.turn_into_assign (op, cind, "~const");
684 return 1;
685 }
686 return 0;
687 }
688
689
690
DECLFOLDER(constfold_if)691 DECLFOLDER(constfold_if)
692 {
693 Opcode &op (rop.inst()->ops()[opnum]);
694 Symbol &C (*rop.inst()->argsymbol(op.firstarg()+0));
695 if (C.is_constant()) {
696 int result = -1; // -1 == we don't know
697 if (C.typespec().is_int())
698 result = (((int *)C.data())[0] != 0);
699 else if (C.typespec().is_float())
700 result = (((float *)C.data())[0] != 0.0f);
701 else if (C.typespec().is_triple())
702 result = (((Vec3 *)C.data())[0] != Vec3(0,0,0));
703 else if (C.typespec().is_string()) {
704 ustring s = ((ustring *)C.data())[0];
705 result = (s.length() != 0);
706 }
707 int changed = 0;
708 if (result > 0) {
709 changed += rop.turn_into_nop (op.jump(0), op.jump(1), "elide 'else'");
710 changed += rop.turn_into_nop (op, "elide 'else'");
711 } else if (result == 0) {
712 changed += rop.turn_into_nop (opnum, op.jump(0), "elide 'if'");
713 }
714 return changed;
715 }
716
717 // Eliminate 'if' that contains no statements to execute
718 int jump = op.farthest_jump ();
719 bool only_nops = true;
720 for (int i = opnum+1; i < jump && only_nops; ++i)
721 only_nops &= (rop.inst()->ops()[i].opname() == u_nop);
722 if (only_nops) {
723 rop.turn_into_nop (op, "'if' with no body");
724 return 1;
725 }
726
727 return 0;
728 }
729
730
731
732 // Is an array known to have all elements having the same value?
733 static bool
array_all_elements_equal(const Symbol & s)734 array_all_elements_equal (const Symbol &s)
735 {
736 TypeDesc t = s.typespec().simpletype();
737 size_t size = t.elementsize();
738 size_t n = t.numelements();
739 for (size_t i = 1; i < n; ++i)
740 if (memcmp ((const char *)s.data(), (const char *)s.data()+i*size, size))
741 return false;
742 return true;
743 }
744
745
746
DECLFOLDER(constfold_aref)747 DECLFOLDER(constfold_aref)
748 {
749 Opcode &op (rop.inst()->ops()[opnum]);
750 Symbol &R (*rop.inst()->argsymbol(op.firstarg()+0));
751 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
752 Symbol &Index (*rop.inst()->argsymbol(op.firstarg()+2));
753 OSL_DASSERT(A.typespec().is_array() && Index.typespec().is_int());
754
755 // Try to turn R=A[I] into R=C if A and I are const.
756 if (A.is_constant() && Index.is_constant()) {
757 TypeSpec elemtype = A.typespec().elementtype();
758 OSL_ASSERT (equivalent(elemtype, R.typespec()));
759 const int length = A.typespec().arraylength();
760 const int orig_index = *(int *)Index.data(), index = OIIO::clamp(orig_index, 0, length - 1);
761 OSL_DASSERT(index >=0 && index < length);
762 int cind = rop.add_constant (elemtype,
763 (char *)A.data() + index*elemtype.simpletype().size());
764 rop.turn_into_assign (op, cind, "aref const fold: const_array[const]");
765 if (rop.inst()->master()->range_checking() && index != orig_index) {
766 // the original index was out of range, and the user cares about reporting errors
767 const int args_to_add[] = {
768 rop.add_constant(u_fmt_range_check),
769 rop.add_constant(orig_index),
770 rop.add_constant(A.unmangled()),
771 rop.add_constant(length - 1),
772 rop.add_constant(op.sourcefile()),
773 rop.add_constant(op.sourceline()),
774 rop.add_constant(rop.group().name()),
775 rop.add_constant(rop.layer()),
776 rop.add_constant(rop.inst()->layername()),
777 rop.add_constant(ustring(rop.inst()->shadername()))
778 };
779 rop.insert_code(opnum, u_error, args_to_add,
780 RuntimeOptimizer::RecomputeRWRanges,
781 RuntimeOptimizer::GroupWithNext);
782 Opcode &newop (rop.inst()->ops()[opnum]);
783 newop.argreadonly(0);
784 }
785 return 1;
786 }
787 // Even if the index isn't constant, we still know the answer if all
788 // the array elements are equal!
789 if (A.is_constant() && array_all_elements_equal(A)) {
790 TypeSpec elemtype = A.typespec().elementtype();
791 OSL_ASSERT (equivalent(elemtype, R.typespec()));
792 int cind = rop.add_constant (elemtype, (char *)A.data());
793 rop.turn_into_assign (op, cind, "aref of elements-equal array");
794 return 1;
795 }
796 return 0;
797 }
798
799
800
DECLFOLDER(constfold_arraylength)801 DECLFOLDER(constfold_arraylength)
802 {
803 Opcode &op (rop.inst()->ops()[opnum]);
804 OSL_MAYBE_UNUSED Symbol &R (*rop.inst()->argsymbol(op.firstarg()+0));
805 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
806 OSL_DASSERT (R.typespec().is_int() && A.typespec().is_array());
807
808 // Try to turn R=arraylength(A) into R=C if the array length is known
809 int len = A.typespec().is_unsized_array() ? A.initializers()
810 : A.typespec().arraylength();
811 if (len > 0) {
812 int cind = rop.add_constant (TypeSpec(TypeDesc::INT), &len);
813 rop.turn_into_assign (op, cind, "const fold arraylength");
814 return 1;
815 }
816 return 0;
817 }
818
819
820
DECLFOLDER(constfold_aassign)821 DECLFOLDER(constfold_aassign)
822 {
823 // Array assignment
824 Opcode &op (rop.inst()->ops()[opnum]);
825 Symbol *R (rop.inst()->argsymbol(op.firstarg()+0));
826 Symbol *I (rop.inst()->argsymbol(op.firstarg()+1));
827 Symbol *C (rop.inst()->argsymbol(op.firstarg()+2));
828 if (! I->is_constant() || !C->is_constant())
829 return 0; // not much we can do if not assigning constants
830 OSL_DASSERT (R->typespec().is_array() && I->typespec().is_int());
831
832 TypeSpec elemtype = R->typespec().elementtype();
833 if (elemtype.is_closure())
834 return 0; // don't worry about closures
835 TypeDesc elemsimpletype = elemtype.simpletype();
836
837 // Look for patterns where all array elements are assigned in
838 // succession within the same block, in which case we can turn the
839 // result into a constant!
840 int len = R->typespec().arraylength();
841 if (len <= 0)
842 return 0; // don't handle arrays of unknown length
843 int elemsize = (int)elemsimpletype.size();
844 std::vector<int> index_assigned (len, -1);
845 std::vector<char> filled_values (elemsize * len); // constant storage
846 char *fill = (char *)&filled_values[0];
847 int num_assigned = 0;
848 int opindex = opnum;
849 int highestop = opindex;
850 for ( ; ; ) {
851 Opcode &opi (rop.inst()->ops()[opindex]);
852 if (opi.opname() != u_aassign)
853 break; // not a successive aassign op
854 Symbol *Ri (rop.inst()->argsymbol(opi.firstarg()+0));
855 if (Ri != R)
856 break; // not a compassign to the same variable
857 Symbol *Ii (rop.inst()->argsymbol(opi.firstarg()+1));
858 Symbol *Ci (rop.inst()->argsymbol(opi.firstarg()+2));
859 if (! Ii->is_constant() || !Ci->is_constant())
860 break; // not assigning constants
861 int indexval = *(int *)Ii->data();
862 if (indexval < 0 || indexval >= len)
863 break; // out of range index; let runtime deal with it
864 if (equivalent(elemtype, Ci->typespec())) {
865 // equivalent types
866 memcpy (fill + indexval*elemsize, Ci->data(), elemsize);
867 } else if (elemtype.is_float() && Ci->typespec().is_int()) {
868 // special case of float[i] = int
869 float c = Ci->typespec().is_int() ? *(int *)Ci->data()
870 : *(float *)Ci->data();
871 ((float *)fill)[indexval] = c;
872 } else {
873 break; // a case we don't handle
874 }
875 if (index_assigned[indexval] < 0)
876 ++num_assigned;
877 index_assigned[indexval] = opindex;
878 highestop = opindex;
879 opindex = rop.next_block_instruction(opindex);
880 if (! opindex)
881 break;
882 }
883 if (num_assigned == len) {
884 // woo-hoo! we had a succession of constant aassign ops to the
885 // same variable, filling in all indices. Turn the whole shebang
886 // into a single assignment.
887 int cind = rop.add_constant (R->typespec(), fill);
888 rop.turn_into_assign (op, cind, "replaced element-by-element assignment");
889 rop.turn_into_nop (opnum+1, highestop+1, "replaced element-by-element assignment");
890 return highestop+1-opnum;
891 }
892
893 return 0;
894 }
895
896
897
DECLFOLDER(constfold_compassign)898 DECLFOLDER(constfold_compassign)
899 {
900 // Component assignment
901 Opcode &op (rop.inst()->ops()[opnum]);
902 Symbol *R (rop.inst()->argsymbol(op.firstarg()+0));
903 Symbol *I (rop.inst()->argsymbol(op.firstarg()+1));
904 Symbol *C (rop.inst()->argsymbol(op.firstarg()+2));
905 if (! I->is_constant() || !C->is_constant())
906 return 0; // not much we can do if not assigning constants
907 OSL_DASSERT (R->typespec().is_triple() && I->typespec().is_int() &&
908 (C->typespec().is_float() || C->typespec().is_int()));
909
910 // We are obviously not assigning to a constant, but it could be
911 // that at this point in our current block, the value of A is known,
912 // and that will show up as a block alias.
913 int Aalias = rop.block_alias (rop.inst()->arg(op.firstarg()+0));
914 Symbol *AA = rop.inst()->symbol(Aalias);
915 // N.B. symbol returns NULL if Aalias is < 0
916
917 // Try to simplify A[I]=C if we already know the old value of A as a
918 // constant. We can turn it into A[I] = N, where N is the old A but with
919 // the Ith component set to C. If it turns out that the old A[I] == C,
920 // and thus the assignment doesn't change A's value, we can eliminate
921 // the assignment entirely.
922 if (AA && AA->is_constant()) {
923 OSL_DASSERT (AA->typespec().is_triple());
924 int index = *(int *)I->data();
925 if (index < 0 || index >= 3) {
926 // We are indexing a const triple out of range. But this
927 // isn't necessarily a reportable error, because it may be a
928 // code path that will never be taken. Punt -- don't
929 // optimize this op, leave it to the execute-time range
930 // check to catch, if indeed it is a problem.
931 return 0;
932 }
933 float *aa = (float *)AA->data();
934 float c = C->typespec().is_int() ? *(int *)C->data()
935 : *(float *)C->data();
936 if (aa[index] == c) {
937 // If the component assignment doesn't change that component,
938 // just omit the op entirely.
939 rop.turn_into_nop (op, "useless compassign");
940 return 1;
941 }
942 // If the previous value of the triple was a constant, and we're
943 // assigning a new constant to one component (and the index is
944 // also a constant), just turn it into an assignment of a new
945 // constant triple.
946 Vec3 newval (aa[0], aa[1], aa[2]);
947 newval[index] = c;
948 int cind = rop.add_constant (AA->typespec(), &newval);
949 rop.turn_into_assign (op, cind, "fold compassign");
950 return 1;
951 }
952
953 // Look for patterns where all three components are assigned in
954 // succession within the same block, in which case we can turn the
955 // result into a constant!
956 int index_assigned[3] = { -1, -1, -1 };
957 float filled_values[3];
958 int num_assigned = 0;
959 int opindex = opnum;
960 int highestop = opindex;
961 for ( ; ; ) {
962 Opcode &opi (rop.inst()->ops()[opindex]);
963 if (opi.opname() != u_compassign)
964 break; // not a successive compassign op
965 Symbol *Ri (rop.inst()->argsymbol(opi.firstarg()+0));
966 if (Ri != R)
967 break; // not a compassign to the same variable
968 Symbol *Ii (rop.inst()->argsymbol(opi.firstarg()+1));
969 Symbol *Ci (rop.inst()->argsymbol(opi.firstarg()+2));
970 if (! Ii->is_constant() || !Ci->is_constant())
971 break; // not assigning constants
972 int indexval = *(int *)Ii->data();
973 if (indexval < 0 || indexval >= 3)
974 break; // out of range index; let runtime deal with it
975 float c = Ci->typespec().is_int() ? *(int *)Ci->data()
976 : *(float *)Ci->data();
977 filled_values[indexval] = c;
978 if (index_assigned[indexval] < 0)
979 ++num_assigned;
980 index_assigned[indexval] = opindex;
981 highestop = opindex;
982 opindex = rop.next_block_instruction(opindex);
983 if (! opindex)
984 break;
985 }
986 if (num_assigned == 3) {
987 // woo-hoo! we had a succession of constant compassign ops to the
988 // same variable, filling in all indices. Turn the whole shebang
989 // into a single assignment.
990 int cind = rop.add_constant (R->typespec(), filled_values);
991 rop.turn_into_assign (op, cind, "replaced element-by-element assignment");
992 rop.turn_into_nop (opnum+1, highestop+1, "replaced element-by-element assignment");
993 return highestop+1-opnum;
994 }
995
996 return 0;
997 }
998
999
1000
DECLFOLDER(constfold_mxcompassign)1001 DECLFOLDER(constfold_mxcompassign)
1002 {
1003 // Matrix component assignment
1004 Opcode &op (rop.inst()->ops()[opnum]);
1005 Symbol *R (rop.inst()->argsymbol(op.firstarg()+0));
1006 Symbol *J (rop.inst()->argsymbol(op.firstarg()+1));
1007 Symbol *I (rop.inst()->argsymbol(op.firstarg()+2));
1008 Symbol *C (rop.inst()->argsymbol(op.firstarg()+3));
1009 if (! J->is_constant() || ! I->is_constant() || !C->is_constant())
1010 return 0; // not much we can do if not assigning constants
1011 OSL_DASSERT (R->typespec().is_matrix() &&
1012 J->typespec().is_int() && I->typespec().is_int() &&
1013 (C->typespec().is_float() || C->typespec().is_int()));
1014
1015 // We are obviously not assigning to a constant, but it could be
1016 // that at this point in our current block, the value of A is known,
1017 // and that will show up as a block alias.
1018 int Aalias = rop.block_alias (rop.inst()->arg(op.firstarg()+0));
1019 Symbol *AA = rop.inst()->symbol(Aalias);
1020 // N.B. symbol returns NULL if Aalias is < 0
1021
1022 // Try to simplify A[J,I]=C if we already know the old value of A as a
1023 // constant. We can turn it into A[J,I] = N, where N is the old A but with
1024 // the designated component set to C. If it turns out that the old
1025 // A[J,I] == C, and thus the assignment doesn't change A's value, we can
1026 // eliminate the assignment entirely.
1027 if (AA && AA->is_constant()) {
1028 OSL_DASSERT (AA->typespec().is_matrix());
1029 int jndex = *(int *)J->data();
1030 int index = *(int *)I->data();
1031 if (index < 0 || index >= 3 || jndex < 0 || jndex >= 3) {
1032 // We are indexing a const matrix out of range. But this
1033 // isn't necessarily a reportable error, because it may be a
1034 // code path that will never be taken. Punt -- don't
1035 // optimize this op, leave it to the execute-time range
1036 // check to catch, if indeed it is a problem.
1037 return 0;
1038 }
1039 Matrix44 *aa = (Matrix44 *)AA->data();
1040 float c = C->typespec().is_int() ? *(int *)C->data()
1041 : *(float *)C->data();
1042 if ((*aa)[jndex][index] == c) {
1043 // If the component assignment doesn't change that component,
1044 // just omit the op entirely.
1045 rop.turn_into_nop (op, "useless mxcompassign");
1046 return 1;
1047 }
1048 // If the previous value of the matrix was a constant, and we're
1049 // assigning a new constant to one component (and the index is
1050 // also a constant), just turn it into an assignment of a new
1051 // constant triple.
1052 Matrix44 newval = *aa;
1053 newval[jndex][index] = c;
1054 int cind = rop.add_constant (AA->typespec(), &newval);
1055 rop.turn_into_assign (op, cind, "fold mxcompassign");
1056 return 1;
1057 }
1058
1059 // Look for patterns where all 16 components are assigned in
1060 // succession within the same block, in which case we can turn the
1061 // result into a constant!
1062 int index_assigned[4][4] = { {-1, -1, -1, -1}, {-1, -1, -1, -1},
1063 {-1, -1, -1, -1}, {-1, -1, -1, -1} };
1064 float filled_values[4][4];
1065 int num_assigned = 0;
1066 int opindex = opnum;
1067 int highestop = opindex;
1068 for ( ; ; ) {
1069 Opcode &opi (rop.inst()->ops()[opindex]);
1070 if (opi.opname() != u_mxcompassign)
1071 break; // not a successive mxcompassign op
1072 Symbol *Ri (rop.inst()->argsymbol(opi.firstarg()+0));
1073 if (Ri != R)
1074 break; // not a mxcompassign to the same variable
1075 Symbol *Ji (rop.inst()->argsymbol(opi.firstarg()+1));
1076 Symbol *Ii (rop.inst()->argsymbol(opi.firstarg()+2));
1077 Symbol *Ci (rop.inst()->argsymbol(opi.firstarg()+3));
1078 if (! Ji->is_constant() || ! Ii->is_constant() || !Ci->is_constant())
1079 break; // not assigning constants
1080 int jndexval = *(int *)Ji->data();
1081 int indexval = *(int *)Ii->data();
1082 if (jndexval < 0 || jndexval >= 4 || indexval < 0 || indexval >= 4)
1083 break; // out of range index; let runtime deal with it
1084 float c = Ci->typespec().is_int() ? *(int *)Ci->data()
1085 : *(float *)Ci->data();
1086 filled_values[jndexval][indexval] = c;
1087 if (index_assigned[jndexval][indexval] < 0)
1088 ++num_assigned;
1089 index_assigned[jndexval][indexval] = opindex;
1090 highestop = opindex;
1091 opindex = rop.next_block_instruction(opindex);
1092 if (! opindex)
1093 break;
1094 }
1095 if (num_assigned == 16) {
1096 // woo-hoo! we had a succession of constant mxcompassign ops to the
1097 // same variable, filling in all indices. Turn the whole shebang
1098 // into a single assignment.
1099 int cind = rop.add_constant (R->typespec(), filled_values);
1100 rop.turn_into_assign (op, cind, "replaced element-by-element assignment");
1101 rop.turn_into_nop (opnum+1, highestop+1, "replaced element-by-element assignment");
1102 return highestop+1-opnum;
1103 }
1104
1105 return 0;
1106 }
1107
1108
1109
DECLFOLDER(constfold_compref)1110 DECLFOLDER(constfold_compref)
1111 {
1112 // Component reference
1113 // Try to turn R=A[I] into R=C if A and I are const.
1114 Opcode &op (rop.inst()->ops()[opnum]);
1115 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
1116 Symbol &Index (*rop.inst()->argsymbol(op.firstarg()+2));
1117 if (A.is_constant() && Index.is_constant()) {
1118 OSL_DASSERT (A.typespec().is_triple() && Index.typespec().is_int());
1119 int index = *(int *)Index.data();
1120 if (index < 0 || index >= 3) {
1121 // We are indexing a const triple out of range. But this
1122 // isn't necessarily a reportable error, because it may be a
1123 // code path that will never be taken. Punt -- don't
1124 // optimize this op, leave it to the execute-time range
1125 // check to catch, if indeed it is a problem.
1126 return 0;
1127 }
1128 int cind = rop.add_constant (TypeDesc::TypeFloat, (float *)A.data() + index);
1129 rop.turn_into_assign (op, cind, "const_triple[const]");
1130 return 1;
1131 }
1132 return 0;
1133 }
1134
1135
1136
DECLFOLDER(constfold_strlen)1137 DECLFOLDER(constfold_strlen)
1138 {
1139 // Try to turn R=strlen(s) into R=C
1140 Opcode &op (rop.inst()->ops()[opnum]);
1141 Symbol &S (*rop.inst()->argsymbol(op.firstarg()+1));
1142 if (S.is_constant()) {
1143 OSL_DASSERT (S.typespec().is_string());
1144 int result = (int) (*(ustring *)S.data()).length();
1145 int cind = rop.add_constant (result);
1146 rop.turn_into_assign (op, cind, "const fold strlen");
1147 return 1;
1148 }
1149 return 0;
1150 }
1151
1152
1153
DECLFOLDER(constfold_hash)1154 DECLFOLDER(constfold_hash)
1155 {
1156 // Try to turn R=hash(s) into R=C
1157 Opcode &op (rop.inst()->ops()[opnum]);
1158 Symbol *S (rop.inst()->argsymbol(op.firstarg()+1));
1159 Symbol *T (op.nargs() > 2 ? rop.inst()->argsymbol(op.firstarg()+2) : NULL);
1160 if (S->is_constant() && (T == NULL || T->is_constant())) {
1161 int cind = -1;
1162 if (S->typespec().is_string()) {
1163 cind = rop.add_constant ((int) (*(ustring *)S->data()).hash());
1164 } else if (op.nargs() == 2 && S->typespec().is_int()) {
1165 cind = rop.add_constant (inthashi (S->get_int()));
1166 } else if (op.nargs() == 2 && S->typespec().is_float()) {
1167 cind = rop.add_constant (inthashf (S->get_float()));
1168 } else if (op.nargs() == 3 && S->typespec().is_float() && T->typespec().is_float()) {
1169 cind = rop.add_constant (inthashf (S->get_float(), T->get_float()));
1170 } else if (op.nargs() == 2 && S->typespec().is_triple()) {
1171 cind = rop.add_constant (inthashf ((const float *)S->data()));
1172 } else if (op.nargs() == 3 && S->typespec().is_triple() && T->typespec().is_float()) {
1173 cind = rop.add_constant (inthashf ((const float *)S->data(), T->get_float()));
1174 }
1175 if (cind >= 0) {
1176 rop.turn_into_assign (op, cind, "const fold hash");
1177 return 1;
1178 }
1179 }
1180 return 0;
1181 }
1182
1183
1184
DECLFOLDER(constfold_getchar)1185 DECLFOLDER(constfold_getchar)
1186 {
1187 // Try to turn R=getchar(s,i) into R=C
1188 Opcode &op (rop.inst()->ops()[opnum]);
1189 Symbol &S (*rop.inst()->argsymbol(op.firstarg()+1));
1190 Symbol &I (*rop.inst()->argsymbol(op.firstarg()+2));
1191 if (S.is_constant() && I.is_constant()) {
1192 OSL_DASSERT (S.typespec().is_string() && I.typespec().is_int());
1193 int idx = (int) (*(int *)I.data());
1194 int len = (int) (*(ustring *)S.data()).length();
1195 int result = idx >= 0 && idx < len ? (*(ustring *)S.data()).c_str()[idx] : 0;
1196 int cind = rop.add_constant (result);
1197 rop.turn_into_assign (op, cind, "const fold getchar");
1198 return 1;
1199 }
1200 return 0;
1201 }
1202
1203
1204
DECLFOLDER(constfold_endswith)1205 DECLFOLDER(constfold_endswith)
1206 {
1207 // Try to turn R=endswith(s,e) into R=C
1208 Opcode &op (rop.inst()->ops()[opnum]);
1209 Symbol &S (*rop.inst()->argsymbol(op.firstarg()+1));
1210 Symbol &E (*rop.inst()->argsymbol(op.firstarg()+2));
1211 if (S.is_constant() && E.is_constant()) {
1212 OSL_DASSERT (S.typespec().is_string() && E.typespec().is_string());
1213 ustring s = *(ustring *)S.data();
1214 ustring e = *(ustring *)E.data();
1215 size_t elen = e.length(), slen = s.length();
1216 int result = 0;
1217 if (elen <= slen)
1218 result = (strncmp (s.c_str()+slen-elen, e.c_str(), elen) == 0);
1219 int cind = rop.add_constant (result);
1220 rop.turn_into_assign (op, cind, "const fold endswith");
1221 return 1;
1222 }
1223 return 0;
1224 }
1225
1226
1227
DECLFOLDER(constfold_stoi)1228 DECLFOLDER(constfold_stoi)
1229 {
1230 // Try to turn R=stoi(s) into R=C
1231 Opcode &op (rop.inst()->ops()[opnum]);
1232 Symbol &S (*rop.inst()->argsymbol(op.firstarg()+1));
1233 if (S.is_constant()) {
1234 OSL_DASSERT (S.typespec().is_string());
1235 ustring s = *(ustring *)S.data();
1236 int cind = rop.add_constant (Strutil::from_string<int>(s));
1237 rop.turn_into_assign (op, cind, "const fold stoi");
1238 return 1;
1239 }
1240 return 0;
1241 }
1242
1243
1244
DECLFOLDER(constfold_stof)1245 DECLFOLDER(constfold_stof)
1246 {
1247 // Try to turn R=stof(s) into R=C
1248 Opcode &op (rop.inst()->ops()[opnum]);
1249 Symbol &S (*rop.inst()->argsymbol(op.firstarg()+1));
1250 if (S.is_constant()) {
1251 OSL_DASSERT (S.typespec().is_string());
1252 ustring s = *(ustring *)S.data();
1253 int cind = rop.add_constant (Strutil::from_string<float>(s));
1254 rop.turn_into_assign (op, cind, "const fold stof");
1255 return 1;
1256 }
1257 return 0;
1258 }
1259
1260
1261
DECLFOLDER(constfold_split)1262 DECLFOLDER(constfold_split)
1263 {
1264 Opcode &op (rop.inst()->ops()[opnum]);
1265 // Symbol &R (*rop.inst()->argsymbol(op.firstarg()+0));
1266 Symbol &Str (*rop.opargsym (op, 1));
1267 Symbol &Results (*rop.opargsym (op, 2));
1268 Symbol *Sep (rop.opargsym (op, 3));
1269 Symbol *Maxsplit (rop.opargsym (op, 4));
1270 if (Str.is_constant() && (!Sep || Sep->is_constant()) &&
1271 (!Maxsplit || Maxsplit->is_constant())) {
1272 // The split string, separator string, and maxsplit are all constants.
1273 // Compute the results with Strutil::split.
1274 int resultslen = Results.typespec().arraylength();
1275 int maxsplit = Maxsplit ? *(int *)Maxsplit->data() : resultslen;
1276 maxsplit = std::min (maxsplit, resultslen);
1277 std::vector<std::string> splits;
1278 ustring sep = Sep ? (*(ustring *)Sep->data()) : ustring("");
1279 Strutil::split ((*(ustring *)Str.data()).string(), splits,
1280 sep.string(), maxsplit);
1281 int n = std::min (std::max(0,maxsplit), (int)splits.size());
1282 // Temporarily stash the index of the symbol holding results
1283 int resultsarg = rop.inst()->args()[op.firstarg()+2];
1284 // Turn the 'split' into a straight assignment of the return value...
1285 rop.turn_into_assign (op, rop.add_constant(n));
1286 // Create a constant array holding the split results
1287 std::vector<ustring> usplits (resultslen);
1288 for (int i = 0; i < n; ++i)
1289 usplits[i] = ustring(splits[i]);
1290 int cind = rop.add_constant (TypeDesc(TypeDesc::STRING,resultslen),
1291 usplits.data());
1292 // And insert an instruction copying our constant array to the
1293 // user's results array.
1294 const int args[] = { resultsarg, cind };
1295 rop.insert_code (opnum, u_assign, args,
1296 RuntimeOptimizer::RecomputeRWRanges,
1297 RuntimeOptimizer::GroupWithNext);
1298 return 1;
1299 }
1300
1301 return 0;
1302 }
1303
1304
1305
DECLFOLDER(constfold_concat)1306 DECLFOLDER(constfold_concat)
1307 {
1308 // Try to turn R=concat(s,...) into R=C
1309 Opcode &op (rop.inst()->ops()[opnum]);
1310 ustring result;
1311 for (int i = 1; i < op.nargs(); ++i) {
1312 Symbol &S (*rop.inst()->argsymbol(op.firstarg()+i));
1313 if (! S.is_constant())
1314 return 0; // something non-constant
1315 ustring old = result;
1316 ustring s = *(ustring *)S.data();
1317 result = ustring::sprintf ("%s%s", old.c_str() ? old.c_str() : "",
1318 s.c_str() ? s.c_str() : "");
1319 }
1320 // If we made it this far, all args were constants, and the
1321 // concatenation is in result.
1322 int cind = rop.add_constant (TypeDesc::TypeString, &result);
1323 rop.turn_into_assign (op, cind, "const fold concat");
1324 return 1;
1325 }
1326
1327
1328
DECLFOLDER(constfold_format)1329 DECLFOLDER(constfold_format)
1330 {
1331 // Try to turn R=format(fmt,...) into R=C
1332 Opcode &op (rop.inst()->ops()[opnum]);
1333 Symbol &Format (*rop.opargsym(op, 1));
1334 if (! Format.is_constant())
1335 return 0;
1336 ustring fmt = *(ustring *)Format.data();
1337
1338 // split fmt into the prefix (the starting part of the string that we
1339 // haven't yet processed) and the suffix (the ending part that we've
1340 // fully processed).
1341 std::string prefix = fmt.string();
1342 std::string suffix;
1343 int args_expanded = 0;
1344
1345 // While there is still a constant argument at the end of the arg list,
1346 // peel it off and use it to rewrite the format string.
1347 for (int argnum = op.nargs()-1; argnum >= 2; --argnum) {
1348 Symbol &Arg (*rop.opargsym(op, argnum));
1349 if (! Arg.is_constant())
1350 break; // no more constants
1351
1352 // find the last format specification
1353 size_t pos = std::string::npos;
1354 while (1) {
1355 pos = prefix.find_last_of ('%', pos); // find at or before pos
1356 if (pos == std::string::npos) {
1357 // Fewer '%' tokens than arguments? Must be malformed. Punt.
1358 return 0;
1359 }
1360 if (pos == 0 || prefix[pos-1] != '%') {
1361 // we found the format specifier
1362 break;
1363 }
1364 // False alarm! Beware of %% which is a literal % rather than a
1365 // format specifier. Back up and try again.
1366 if (pos >= 2)
1367 pos -= 2; // back up
1368 else {
1369 // This can only happen if the %% is at the start of the
1370 // format string, but it shouldn't be since there are still
1371 // args to process. Punt.
1372 return 0;
1373 }
1374 }
1375 OSL_ASSERT (pos < prefix.length() && prefix[pos] == '%');
1376
1377 // cleave off the last format specification into mid
1378 std::string mid = std::string (prefix, pos);
1379 std::string formatted;
1380 const TypeSpec &argtype = Arg.typespec();
1381 if (argtype.is_int())
1382 formatted = Strutil::sprintf (mid.c_str(), *(int *)Arg.data());
1383 else if (argtype.is_float())
1384 formatted = Strutil::sprintf (mid.c_str(), *(float *)Arg.data());
1385 else if (argtype.is_triple())
1386 formatted = Strutil::sprintf (mid.c_str(), *(Vec3 *)Arg.data());
1387 else if (argtype.is_matrix())
1388 formatted = Strutil::sprintf (mid.c_str(), *(Matrix44 *)Arg.data());
1389 else if (argtype.is_string())
1390 formatted = Strutil::sprintf (mid.c_str(), *(ustring *)Arg.data());
1391 else
1392 break; // something else we don't handle -- we're done
1393
1394 // We were able to format, so rejigger the strings.
1395 prefix.erase (pos, std::string::npos);
1396 suffix = formatted + suffix;
1397 args_expanded += 1;
1398 }
1399
1400 // Rewrite the op
1401 if (args_expanded == op.nargs()-2) {
1402 // Special case -- completely expanded, replace with a string
1403 // assignment
1404 int cind = rop.add_constant (ustring(prefix + suffix));
1405 rop.turn_into_assign (op, cind, "fully constant fold format()");
1406 return 1;
1407 } else if (args_expanded != 0) {
1408 // Partially expanded -- rewrite the instruction. It's actually
1409 // easier to turn this instruction into a nop and insert a new one.
1410 // Grab the previous arguments, drop the ones we folded, and
1411 // replace the format string with our new one.
1412 int *argstart = &rop.inst()->args()[0] + op.firstarg();
1413 std::vector<int> newargs (argstart, argstart + op.nargs() - args_expanded);
1414 newargs[1] = rop.add_constant (ustring(prefix + suffix));
1415 ustring opname = op.opname();
1416 rop.turn_into_nop (op, "partial constant fold format()");
1417 rop.insert_code (opnum, opname, newargs,
1418 RuntimeOptimizer::RecomputeRWRanges);
1419 return 1;
1420 }
1421
1422 return 0;
1423 }
1424
1425
1426
DECLFOLDER(constfold_substr)1427 DECLFOLDER(constfold_substr)
1428 {
1429 // Try to turn R=substr(s,start,len) into R=C
1430 Opcode &op (rop.inst()->ops()[opnum]);
1431 Symbol &S (*rop.opargsym (op, 1));
1432 Symbol &Start (*rop.opargsym (op, 2));
1433 Symbol &Len (*rop.opargsym (op, 3));
1434 if (S.is_constant() && Start.is_constant() && Len.is_constant()) {
1435 OSL_DASSERT (S.typespec().is_string() && Start.typespec().is_int() &&
1436 Len.typespec().is_int());
1437 ustring s = *(ustring *)S.data();
1438 int start = *(int *)Start.data();
1439 int len = *(int *)Len.data();
1440 int slen = s.length();
1441 int b = start;
1442 if (b < 0)
1443 b += slen;
1444 b = Imath::clamp (b, 0, slen);
1445 ustring r (s, b, Imath::clamp (len, 0, slen));
1446 int cind = rop.add_constant (r);
1447 rop.turn_into_assign (op, cind, "const fold substr");
1448 return 1;
1449 }
1450 return 0;
1451 }
1452
1453
1454
DECLFOLDER(constfold_regex_search)1455 DECLFOLDER(constfold_regex_search)
1456 {
1457 // Try to turn R=regex_search(subj,reg) into R=C
1458 Opcode &op (rop.inst()->ops()[opnum]);
1459 Symbol &Subj (*rop.inst()->argsymbol(op.firstarg()+1));
1460 Symbol &Reg (*rop.inst()->argsymbol(op.firstarg()+2));
1461 if (op.nargs() == 3 // only the 2-arg version without search results
1462 && Subj.is_constant() && Reg.is_constant()) {
1463 OSL_DASSERT(Subj.typespec().is_string() && Reg.typespec().is_string());
1464 const ustring &s (*(ustring *)Subj.data());
1465 const ustring &r (*(ustring *)Reg.data());
1466 regex reg (r.string());
1467 int result = regex_search (s.string(), reg);
1468 int cind = rop.add_constant (result);
1469 rop.turn_into_assign (op, cind, "const fold regex_search");
1470 return 1;
1471 }
1472 return 0;
1473 }
1474
1475
1476
clamp(float x,float minv,float maxv)1477 inline float clamp (float x, float minv, float maxv)
1478 {
1479 if (x < minv) return minv;
1480 else if (x > maxv) return maxv;
1481 else return x;
1482 }
1483
1484
1485
DECLFOLDER(constfold_clamp)1486 DECLFOLDER(constfold_clamp)
1487 {
1488 // Try to turn R=clamp(x,min,max) into R=C
1489 Opcode &op (rop.inst()->ops()[opnum]);
1490 Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1));
1491 Symbol &Min (*rop.inst()->argsymbol(op.firstarg()+2));
1492 Symbol &Max (*rop.inst()->argsymbol(op.firstarg()+3));
1493 if (X.is_constant() && Min.is_constant() && Max.is_constant() &&
1494 equivalent(X.typespec(), Min.typespec()) &&
1495 equivalent(X.typespec(), Max.typespec()) &&
1496 (X.typespec().is_float() || X.typespec().is_triple())) {
1497 const float *x = (const float *) X.data();
1498 const float *min = (const float *) Min.data();
1499 const float *max = (const float *) Max.data();
1500 float result[3];
1501 result[0] = clamp (x[0], min[0], max[0]);
1502 if (X.typespec().is_triple()) {
1503 result[1] = clamp (x[1], min[1], max[1]);
1504 result[2] = clamp (x[2], min[2], max[2]);
1505 }
1506 int cind = rop.add_constant (X.typespec(), &result);
1507 rop.turn_into_assign (op, cind, "const fold clamp");
1508 return 1;
1509 }
1510 return 0;
1511 }
1512
1513
1514
DECLFOLDER(constfold_mix)1515 DECLFOLDER(constfold_mix)
1516 {
1517 // Try to turn R=mix(a,b,x) into
1518 // R = c if a,b,x are all are constant
1519 // R = a if x is constant and x == 0
1520 // R = b if x is constant and x == 1
1521 // R = a if a and b are the same (even if not constant)
1522 //
1523 Opcode &op (rop.inst()->ops()[opnum]);
1524 int Rind = rop.oparg(op,0);
1525 int Aind = rop.oparg(op,1);
1526 int Bind = rop.oparg(op,2);
1527 int Xind = rop.oparg(op,3);
1528 Symbol &R (*rop.inst()->symbol(Rind));
1529 Symbol &A (*rop.inst()->symbol(Aind));
1530 Symbol &B (*rop.inst()->symbol(Bind));
1531 Symbol &X (*rop.inst()->symbol(Xind));
1532 // Everything better be a float or triple
1533 if (! ((A.typespec().is_float() || A.typespec().is_triple()) &&
1534 (B.typespec().is_float() || B.typespec().is_triple()) &&
1535 (X.typespec().is_float() || X.typespec().is_triple())))
1536 return 0;
1537 if (X.is_constant() && A.is_constant() && B.is_constant()) {
1538 // All three constants
1539 float result[3];
1540 const float *a = (const float *) A.data();
1541 const float *b = (const float *) B.data();
1542 const float *x = (const float *) X.data();
1543 bool atriple = A.typespec().is_triple();
1544 bool btriple = B.typespec().is_triple();
1545 bool xtriple = X.typespec().is_triple();
1546 bool rtriple = R.typespec().is_triple();
1547 int ncomps = rtriple ? 3 : 1;
1548 for (int i = 0; i < ncomps; ++i) {
1549 float xval = x[xtriple*i];
1550 result[i] = (1.0f-xval) * a[atriple*i] + xval * b[btriple*i];
1551 }
1552 int cind = rop.add_constant (R.typespec(), &result);
1553 rop.turn_into_assign (op, cind, "const fold mix");
1554 return 1;
1555 }
1556
1557 // Two special cases... X is 0, X is 1
1558 if (rop.is_zero(X)) { // mix(A,B,0) == A
1559 rop.turn_into_assign (op, Aind, "mix(a,b,0) => a");
1560 return 1;
1561 }
1562 if (rop.is_one(X)) { // mix(A,B,1) == B
1563 rop.turn_into_assign (op, Bind, "mix(a,b,1) => b");
1564 return 1;
1565 }
1566
1567 if (rop.is_zero(A) &&
1568 (! B.connected() || !rop.opt_mix() || rop.optimization_pass() > 2)) {
1569 // mix(0,b,x) == b*x, but only do this if b is not connected.
1570 // Because if b is connected, it may pull on something expensive.
1571 rop.turn_into_new_op (op, u_mul, Rind, Bind, Xind, "mix(0,b,x) => b*x");
1572 return 1;
1573 }
1574 #if 0
1575 // This seems to almost never happen, so don't worry about it
1576 if (rop.is_zero(B) && ! A.connected()) {
1577 // mix(a,0,x) == (1-x)*a, but only do this if b is not connected
1578 }
1579 #endif
1580
1581 // mix (a, a, x) is a, regardless of x and even if none are constants
1582 if (Aind == Bind) {
1583 rop.turn_into_assign (op, Aind, "const fold: mix(a,a,x) -> a");
1584 }
1585
1586 // Special sauce: mix(a,b,x) is implemented as a*(1-x)+b*x. But
1587 // consider cases where x is not constant (thus not foldable), but
1588 // nonetheless turns out to be 0 or 1 much of the time. If a and b
1589 // are short local computations, it's not so bad, but if they are
1590 // shader parameters connected to other layers, this affair may
1591 // needlessly evaluate other layers for no purpose other than to
1592 // multiply their results by zero. So we try to ameliorate that
1593 // case with some extra tests here. N.B. we delay doing this until
1594 // a few optimization passes in, to give enough time to optimize
1595 // away the inputs in other ways before introducing the 'if'.
1596 if (rop.opt_mix() && rop.optimization_pass() > 1 &&
1597 !X.is_constant() && (A.connected() || B.connected())) {
1598 // A or B are connected, and thus presumed expensive, so turn into:
1599 // if (X == 0) // But eliminate this clause if B not connected
1600 // R = A;
1601 // else if (X == 1) // But eliminate this clause if A not connected
1602 // R = B;
1603 // else
1604 // R = A*(1-X) + B*X;
1605 int if0op = -1; // Op where we have the 'if' for testing x==0
1606 int if1op = -1; // Op where we have the 'if' for testing x==1
1607 if (B.connected()) {
1608 // Add the test and conditional for X==0, in which case we can
1609 // just R=A and not have to access B
1610 int cond = rop.add_temp (TypeDesc::TypeInt);
1611 int fzero = rop.add_constant (0.0f);
1612 rop.insert_code (opnum++, u_eq, RuntimeOptimizer::GroupWithNext,
1613 cond, Xind, fzero);
1614 if0op = opnum;
1615 rop.insert_code (opnum++, u_if, RuntimeOptimizer::GroupWithNext, cond);
1616 rop.op(if0op).argreadonly (0);
1617 rop.symbol(cond)->mark_rw (if0op, true, false);
1618 // Add the true (R=A) clause
1619 rop.insert_code (opnum++, u_assign,
1620 RuntimeOptimizer::GroupWithNext, Rind, Aind);
1621 }
1622 int if0op_false = opnum; // Where we jump if the 'if x==0' is false
1623 if (A.connected()) {
1624 // Add the test and conditional for X==1, in which case we can
1625 // just R=B and not have to access A
1626 int cond = rop.add_temp (TypeDesc::TypeInt);
1627 int fone = rop.add_constant (1.0f);
1628 rop.insert_code (opnum++, u_eq, RuntimeOptimizer::GroupWithNext,
1629 cond, Xind, fone);
1630 if1op = opnum;
1631 rop.insert_code (opnum++, u_if, RuntimeOptimizer::GroupWithNext, cond);
1632 rop.op(if1op).argreadonly (0);
1633 rop.symbol(cond)->mark_rw (if1op, true, false);
1634 // Add the true (R=B) clause
1635 rop.insert_code (opnum++, u_assign, RuntimeOptimizer::GroupWithNext,
1636 Rind, Bind);
1637 }
1638 int if1op_false = opnum; // Where we jump if the 'if x==1' is false
1639 // Add the (R=A*(1-X)+B*X) clause -- always need that
1640 int one_minus_x = rop.add_temp (X.typespec());
1641 int temp1 = rop.add_temp (A.typespec());
1642 int temp2 = rop.add_temp (B.typespec());
1643 int fone = rop.add_constant (1.0f);
1644 rop.insert_code (opnum++, u_sub, RuntimeOptimizer::GroupWithNext,
1645 one_minus_x, fone, Xind);
1646 rop.insert_code (opnum++, u_mul, RuntimeOptimizer::GroupWithNext,
1647 temp1, Aind, one_minus_x);
1648 rop.insert_code (opnum++, u_mul, RuntimeOptimizer::GroupWithNext,
1649 temp2, Bind, Xind);
1650 rop.insert_code (opnum++, u_add, RuntimeOptimizer::GroupWithNext,
1651 Rind, temp1, temp2);
1652 // Now go back and patch the 'if' ops with the right jump addresses
1653 if (if0op >= 0)
1654 rop.op(if0op).set_jump (if0op_false, opnum);
1655 if (if1op >= 0)
1656 rop.op(if1op).set_jump (if1op_false, opnum);
1657 // The next op is the original mix, make it nop
1658 rop.turn_into_nop (rop.op(opnum), "smart 'mix'");
1659 return 1;
1660 }
1661
1662 return 0;
1663 }
1664
1665
1666
DECLFOLDER(constfold_select)1667 DECLFOLDER(constfold_select)
1668 {
1669 // Try to turn R=select(a,b,cond) into (per component):
1670 // R[c] = a if cond is constant and zero
1671 // R[c] = b if cond is constant and nonzero
1672 // R = a if a == b (even if nothing is constant)
1673 //
1674 Opcode &op (rop.inst()->ops()[opnum]);
1675 // int Rind = rop.oparg(op,0);
1676 int Aind = rop.oparg(op,1);
1677 int Bind = rop.oparg(op,2);
1678 int Cind = rop.oparg(op,3);
1679 Symbol &C (*rop.inst()->symbol(Cind));
1680
1681 if (C.is_constant() && rop.is_zero(C)) {
1682 rop.turn_into_assign (op, Aind, "select(A,B,0) => A");
1683 return 1;
1684 }
1685 if (C.is_constant() && rop.is_nonzero(C)) {
1686 rop.turn_into_assign (op, Bind, "select(A,B,non-0) => B");
1687 return 1;
1688 }
1689 if (Aind == Bind) {
1690 rop.turn_into_assign (op, Aind, "select(c,a,a) -> a");
1691 return 1;
1692 }
1693 return 0;
1694 }
1695
1696
1697
DECLFOLDER(constfold_min)1698 DECLFOLDER(constfold_min)
1699 {
1700 // Try to turn R=min(x,y) into R=C
1701 Opcode &op (rop.inst()->ops()[opnum]);
1702 Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1));
1703 Symbol &Y (*rop.inst()->argsymbol(op.firstarg()+2));
1704 if (X.is_constant() && Y.is_constant() &&
1705 equivalent(X.typespec(), Y.typespec())) {
1706 if (X.typespec().is_float() || X.typespec().is_triple()) {
1707 const float *x = (const float *) X.data();
1708 const float *y = (const float *) Y.data();
1709 float result[3];
1710 result[0] = std::min (x[0], y[0]);
1711 if (X.typespec().is_triple()) {
1712 result[1] = std::min (x[1], y[1]);
1713 result[2] = std::min (x[2], y[2]);
1714 }
1715 int cind = rop.add_constant (X.typespec(), &result);
1716 rop.turn_into_assign (op, cind, "const fold min");
1717 return 1;
1718 }
1719 if (X.typespec().is_int()) {
1720 const int *x = (const int *) X.data();
1721 const int *y = (const int *) Y.data();
1722 int result = std::min (x[0], y[0]);
1723 int cind = rop.add_constant (result);
1724 rop.turn_into_assign (op, cind, "const fold min");
1725 return 1;
1726 }
1727 }
1728 return 0;
1729 }
1730
1731
1732
DECLFOLDER(constfold_max)1733 DECLFOLDER(constfold_max)
1734 {
1735 // Try to turn R=max(x,y) into R=C
1736 Opcode &op (rop.inst()->ops()[opnum]);
1737 Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1));
1738 Symbol &Y (*rop.inst()->argsymbol(op.firstarg()+2));
1739 if (X.is_constant() && Y.is_constant() &&
1740 equivalent(X.typespec(), Y.typespec())) {
1741 if (X.typespec().is_float() || X.typespec().is_triple()) {
1742 const float *x = (const float *) X.data();
1743 const float *y = (const float *) Y.data();
1744 float result[3];
1745 result[0] = std::max (x[0], y[0]);
1746 if (X.typespec().is_triple()) {
1747 result[1] = std::max (x[1], y[1]);
1748 result[2] = std::max (x[2], y[2]);
1749 }
1750 int cind = rop.add_constant (X.typespec(), &result);
1751 rop.turn_into_assign (op, cind, "const fold max");
1752 return 1;
1753 }
1754 if (X.typespec().is_int()) {
1755 const int *x = (const int *) X.data();
1756 const int *y = (const int *) Y.data();
1757 int result = std::max (x[0], y[0]);
1758 int cind = rop.add_constant (result);
1759 rop.turn_into_assign (op, cind, "const fold max");
1760 return 1;
1761 }
1762 }
1763 return 0;
1764 }
1765
1766
1767
1768 // Handy macro for automatically constructing a constant-folder for
1769 // a simple function of one argument that can be float or triple
1770 // and returns the same type as its argument.
1771 #define AUTO_DECLFOLDER_FLOAT_OR_TRIPLE(name,impl) \
1772 DECLFOLDER(constfold_ ## name) \
1773 { \
1774 /* Try to turn R=f(x) into R=C */ \
1775 Opcode &op (rop.inst()->ops()[opnum]); \
1776 Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1)); \
1777 if (X.is_constant() && \
1778 (X.typespec().is_float() || X.typespec().is_triple())) { \
1779 const float *x = (const float *) X.data(); \
1780 float result[3]; \
1781 result[0] = impl (x[0]); \
1782 if (X.typespec().is_triple()) { \
1783 result[1] = impl (x[1]); \
1784 result[2] = impl (x[2]); \
1785 } \
1786 int cind = rop.add_constant (X.typespec(), &result); \
1787 rop.turn_into_assign (op, cind, "const fold " # name); \
1788 return 1; \
1789 } \
1790 return 0; \
1791 }
1792
1793
1794
AUTO_DECLFOLDER_FLOAT_OR_TRIPLE(sqrt,OIIO::safe_sqrt)1795 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (sqrt , OIIO::safe_sqrt)
1796 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (inversesqrt, OIIO::safe_inversesqrt)
1797 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (degrees, OIIO::degrees)
1798 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (radians, OIIO::radians)
1799 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (floor , floorf)
1800 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (ceil , ceilf)
1801 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (erf , OIIO::fast_erf)
1802 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (erfc , OIIO::fast_erfc)
1803 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (logb , OIIO::fast_logb)
1804 #if OSL_FAST_MATH
1805 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (cos , OIIO::fast_cos)
1806 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (sin , OIIO::fast_sin)
1807 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (acos , OIIO::fast_acos)
1808 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (asin , OIIO::fast_asin)
1809 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (exp , OIIO::fast_exp)
1810 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (exp2 , OIIO::fast_exp2)
1811 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (expm1 , OIIO::fast_expm1)
1812 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log , OIIO::fast_log)
1813 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log10 , OIIO::fast_log10)
1814 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log2 , OIIO::fast_log2)
1815 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (cbrt , OIIO::fast_cbrt)
1816 #else
1817 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (cos , cosf)
1818 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (sin , sinf)
1819 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (acos , OIIO::safe_acos)
1820 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (asin , OIIO::safe_asin)
1821 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (exp , expf)
1822 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (exp2 , exp2f)
1823 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (expm1 , expm1f)
1824 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log , OIIO::safe_log)
1825 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log10 , OIIO::safe_log10)
1826 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (log2 , OIIO::safe_log2)
1827 AUTO_DECLFOLDER_FLOAT_OR_TRIPLE (cbrt , cbrtf)
1828 #endif
1829
1830 DECLFOLDER(constfold_pow)
1831 {
1832 Opcode &op (rop.inst()->ops()[opnum]);
1833 Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1));
1834 Symbol &Y (*rop.inst()->argsymbol(op.firstarg()+2));
1835
1836 if (rop.is_zero(Y)) {
1837 // x^0 == 1
1838 rop.turn_into_assign_one (op, "pow(x,0) => 1");
1839 return 1;
1840 }
1841 if (rop.is_one(Y)) {
1842 // x^1 == x
1843 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+1), "pow(x,1) => x");
1844 return 1;
1845 }
1846 if (rop.is_zero(X)) {
1847 // 0^y == 0
1848 rop.turn_into_assign_zero (op, "pow(0,x) => 0");
1849 return 1;
1850 }
1851 if (X.is_constant() && Y.is_constant()) {
1852 // if x and y are both constant, pre-compute x^y
1853 const float *x = (const float *) X.data();
1854 const float *y = (const float *) Y.data();
1855 int nxcomps = X.typespec().is_triple() ? 3 : 1;
1856 int nycomps = Y.typespec().is_triple() ? 3 : 1;
1857 float result[3];
1858 for (int i = 0; i < nxcomps; ++i) {
1859 int j = std::min (i, nycomps-1);
1860 #if OSL_FAST_MATH
1861 result[i] = OIIO::fast_safe_pow (x[i], y[j]);
1862 #else
1863 result[i] = OIIO::safe_pow (x[i], y[j]);
1864 #endif
1865 }
1866 int cind = rop.add_constant (X.typespec(), &result);
1867 rop.turn_into_assign (op, cind, "const fold pow");
1868 return 1;
1869 }
1870
1871 // A few special cases of constant y:
1872 if (Y.is_constant() && Y.typespec().is_float()) {
1873 int resultarg = rop.inst()->args()[op.firstarg()+0];
1874 int xarg = rop.inst()->args()[op.firstarg()+1];
1875 float yval = *(const float *)Y.data();
1876 if (yval == 2.0f) {
1877 rop.turn_into_new_op (op, u_mul, resultarg, xarg, xarg,
1878 "pow(x,2) => x*x");
1879 return 1;
1880 }
1881 if (yval == 0.5f) {
1882 rop.turn_into_new_op (op, u_sqrt, resultarg, xarg, -1,
1883 "pow(x,0.5) => sqrt(x)");
1884 return 1;
1885 }
1886 if (yval == -0.5f) {
1887 rop.turn_into_new_op (op, u_inversesqrt, resultarg, xarg, -1,
1888 "pow(x,-0.5) => inversesqrt(x)");
1889 return 1;
1890 }
1891 if (yval == 1.0f / 3.0f) {
1892 rop.turn_into_new_op (op, u_cbrt, resultarg, xarg, -1,
1893 "pow(x,1.0/3.0) => cbrt(x)");
1894 return 1;
1895 }
1896 }
1897
1898 return 0;
1899 }
1900
1901
1902
DECLFOLDER(constfold_sincos)1903 DECLFOLDER(constfold_sincos)
1904 {
1905 // Try to turn sincos(const_angle,s,c) into s=sin_a, c = cos_a
1906 Opcode &op (rop.inst()->ops()[opnum]);
1907 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+0));
1908 if (A.is_constant()) {
1909 int sinarg = rop.inst()->args()[op.firstarg()+1];
1910 int cosarg = rop.inst()->args()[op.firstarg()+2];
1911 float angle = *(const float *)A.data();
1912 float s, c;
1913 #if OSL_FAST_MATH
1914 OIIO::fast_sincos (angle, &s, &c);
1915 #else
1916 OIIO::sincos (angle, &s, &c);
1917 #endif
1918 // Turn this op into the sin assignment
1919 rop.turn_into_new_op (op, u_assign, sinarg, rop.add_constant (s), -1,
1920 "const fold sincos");
1921 // And insert a new op for the cos assignment
1922 const int args_to_add[] = { cosarg, rop.add_constant (c) };
1923 rop.insert_code (opnum, u_assign, args_to_add,
1924 RuntimeOptimizer::RecomputeRWRanges,
1925 RuntimeOptimizer::GroupWithNext);
1926 return 1;
1927 }
1928 return 0;
1929 }
1930
1931
1932
DECLFOLDER(constfold_normalize)1933 DECLFOLDER(constfold_normalize)
1934 {
1935 // Try to turn R=normalize(x) into R=C
1936 Opcode &op (rop.inst()->ops()[opnum]);
1937 Symbol &X (*rop.inst()->argsymbol(op.firstarg()+1));
1938 OSL_DASSERT(X.typespec().is_triple());
1939 if (X.is_constant()) {
1940 Vec3 result = *(const Vec3 *)X.data();
1941 result.normalize();
1942 int cind = rop.add_constant (X.typespec(), &result);
1943 rop.turn_into_assign (op, cind, "const fold normalize");
1944 return 1;
1945 }
1946 return 0;
1947 }
1948
1949
1950
DECLFOLDER(constfold_triple)1951 DECLFOLDER(constfold_triple)
1952 {
1953 // Turn R=triple(a,b,c) into R=C if the components are all constants
1954 Opcode &op (rop.inst()->ops()[opnum]);
1955 OSL_DASSERT(op.nargs() == 4 || op.nargs() == 5);
1956 bool using_space = (op.nargs() == 5);
1957 Symbol &R (*rop.inst()->argsymbol(op.firstarg()+0));
1958 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1+using_space));
1959 Symbol &B (*rop.inst()->argsymbol(op.firstarg()+2+using_space));
1960 Symbol &C (*rop.inst()->argsymbol(op.firstarg()+3+using_space));
1961 if (using_space) {
1962 // If we're using a space name and it's equivalent to "common",
1963 // just pretend it doesn't exist.
1964 Symbol &Space (*rop.inst()->argsymbol(op.firstarg()+1));
1965 if (Space.is_constant() && (Space.get_string() == Strings::common ||
1966 Space.get_string() == rop.shadingsys().commonspace_synonym()))
1967 using_space = false;
1968 }
1969 if (A.is_constant() && A.typespec().is_float() &&
1970 B.is_constant() && C.is_constant() && !using_space) {
1971 OSL_DASSERT(A.typespec().is_float() &&
1972 B.typespec().is_float() && C.typespec().is_float());
1973 float result[3];
1974 result[0] = *(const float *)A.data();
1975 result[1] = *(const float *)B.data();
1976 result[2] = *(const float *)C.data();
1977 int cind = rop.add_constant (R.typespec(), &result);
1978 rop.turn_into_assign (op, cind, "triple(const,const,const) => triple constant");
1979 return 1;
1980 }
1981 return 0;
1982 }
1983
1984
1985
DECLFOLDER(constfold_matrix)1986 DECLFOLDER(constfold_matrix)
1987 {
1988 Opcode &op (rop.inst()->ops()[opnum]);
1989 int nargs = op.nargs();
1990 int using_space = rop.opargsym(op,1)->typespec().is_string() ? 1 : 0;
1991 if (using_space && nargs > 2 && rop.opargsym(op,2)->typespec().is_string())
1992 using_space = 2;
1993 int nfloats = nargs - 1 - using_space;
1994 OSL_DASSERT (nfloats == 1 || nfloats == 16 || (nfloats == 0 && using_space == 2));
1995 if (nargs == 3 && using_space == 2) {
1996 // Try to simplify R=matrix(from,to) in cases of an identify
1997 // transform: if From and To are the same variable (even if not a
1998 // constant), or if their values are the same, or if one is "common"
1999 // and the other is the designated common space synonym.
2000 Symbol &From (*rop.inst()->argsymbol(op.firstarg()+1));
2001 Symbol &To (*rop.inst()->argsymbol(op.firstarg()+2));
2002 ustring from = From.is_constant() ? *(ustring *)From.data() : ustring("$unknown1$");
2003 ustring to = To.is_constant() ? *(ustring *)To.data() : ustring("$unknown2$");
2004 ustring commonsyn = rop.inst()->shadingsys().commonspace_synonym();
2005 if (&From == &To || from == to ||
2006 ((from == Strings::common && to == commonsyn) ||
2007 (from == commonsyn && to == Strings::common))) {
2008 static Matrix44 ident (1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1);
2009 rop.turn_into_assign (op, rop.add_constant (ident),
2010 "matrix(spaceA,spaceA) => identity matrix");
2011 return 1;
2012 }
2013 // Try to simplify R=matrix(from,to) in cases of an constant (but
2014 // different) names -- do the matrix retrieval now, if not time-
2015 // varying matrices.
2016 if (! (From.is_constant() && To.is_constant()))
2017 return 0;
2018 // Shader and object spaces will vary from execution to execution,
2019 // so we can't optimize those away.
2020 if (from == Strings::shader || from == Strings::object ||
2021 to == Strings::shader || to == Strings::object)
2022 return 0;
2023 // But whatever spaces are left *may* be optimizable if they are
2024 // not time-varying.
2025 RendererServices *rs = rop.shadingsys().renderer();
2026 Matrix44 Mfrom, Mto;
2027 bool ok = true;
2028 if (from == Strings::common || from == commonsyn)
2029 Mfrom.makeIdentity ();
2030 else
2031 ok &= rs->get_matrix (rop.shaderglobals(), Mfrom, from);
2032 if (to == Strings::common || to == commonsyn)
2033 Mto.makeIdentity ();
2034 else
2035 ok &= rs->get_inverse_matrix (rop.shaderglobals(), Mto, to);
2036 if (ok) {
2037 // The from-to matrix is known and not time-varying, so just
2038 // turn it into a constant rather than calling getmatrix at
2039 // execution time.
2040 Matrix44 Mresult = Mfrom * Mto;
2041 int cind = rop.add_constant (TypeDesc::TypeMatrix, &Mresult);
2042 rop.turn_into_assign (op, cind, "const fold matrix");
2043 return 1;
2044 }
2045 }
2046 if (using_space == 1 && nfloats == 1) {
2047 // Turn matrix("common",1) info identity matrix.
2048 Symbol &From (*rop.inst()->argsymbol(op.firstarg()+1));
2049 Symbol &Val (*rop.inst()->argsymbol(op.firstarg()+2));
2050 if (From.is_constant() && Val.is_constant() && *(float *)Val.data() == 1.0f) {
2051 ustring from = *(ustring *)From.data();
2052 if (from == Strings::common ||
2053 from == rop.inst()->shadingsys().commonspace_synonym()) {
2054 static Matrix44 ident (1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1);
2055 rop.turn_into_assign (op, rop.add_constant (ident),
2056 "matrix(\"common\",1) => identity matrix");
2057 }
2058 }
2059 }
2060 if (nfloats == 16 && using_space == 0) {
2061 // Try to turn matrix(...16 float consts...) into just a const
2062 // matrix assign.
2063 bool all_const = true;
2064 float M[16];
2065 for (int i = 0; i < 16; ++i) {
2066 Symbol &Val (*rop.inst()->argsymbol(op.firstarg()+1+i));
2067 if (Val.is_constant())
2068 M[i] = *(const float *)Val.data();
2069 else {
2070 all_const = false;
2071 break;
2072 }
2073 }
2074 if (all_const) {
2075 rop.turn_into_assign (op, rop.add_constant (TypeDesc::TypeMatrix, M),
2076 "const fold matrix");
2077 return 1;
2078 }
2079 }
2080 if (nfloats == 1 && using_space == 0) {
2081 // Try to turn matrix(const float) into just a const matrix assign.
2082 Symbol &Val (*rop.inst()->argsymbol(op.firstarg()+1));
2083 if (Val.is_constant()) {
2084 float val = *(float *)Val.data();
2085 Matrix44 M (val,0,0,0, 0,val,0,0, 0,0,val,0, 0,0,0,val);
2086 rop.turn_into_assign (op, rop.add_constant (M), "const fold matrix");
2087 return 1;
2088 }
2089 }
2090 return 0;
2091 }
2092
2093
2094
DECLFOLDER(constfold_getmatrix)2095 DECLFOLDER(constfold_getmatrix)
2096 {
2097 // Try to turn R=getmatrix(from,to,M) into R=1,M=const if it's an
2098 // identity transform or if the result is a non-time-varying matrix.
2099 Opcode &op (rop.inst()->ops()[opnum]);
2100 Symbol &From (*rop.inst()->argsymbol(op.firstarg()+1));
2101 Symbol &To (*rop.inst()->argsymbol(op.firstarg()+2));
2102 if (! (From.is_constant() && To.is_constant()))
2103 return 0;
2104 // OK, From and To are constant strings.
2105 ustring from = *(ustring *)From.data();
2106 ustring to = *(ustring *)To.data();
2107 ustring commonsyn = rop.inst()->shadingsys().commonspace_synonym();
2108
2109 // Shader and object spaces will vary from execution to execution,
2110 // so we can't optimize those away.
2111 if (from == Strings::shader || from == Strings::object ||
2112 to == Strings::shader || to == Strings::object)
2113 return 0;
2114
2115 // But whatever spaces are left *may* be optimizable if they are
2116 // not time-varying.
2117 RendererServices *rs = rop.shadingsys().renderer();
2118 Matrix44 Mfrom, Mto;
2119 bool ok = true;
2120 if (from == Strings::common || from == commonsyn || from == to)
2121 Mfrom.makeIdentity ();
2122 else
2123 ok &= rs->get_matrix (rop.shaderglobals(), Mfrom, from);
2124 if (to == Strings::common || to == commonsyn || from == to)
2125 Mto.makeIdentity ();
2126 else
2127 ok &= rs->get_inverse_matrix (rop.shaderglobals(), Mto, to);
2128 if (ok) {
2129 // The from-to matrix is known and not time-varying, so just
2130 // turn it into a constant rather than calling getmatrix at
2131 // execution time.
2132 int resultarg = rop.inst()->args()[op.firstarg()+0];
2133 int dataarg = rop.inst()->args()[op.firstarg()+3];
2134 // Make data the first argument
2135 rop.inst()->args()[op.firstarg()+0] = dataarg;
2136 // Now turn it into an assignment
2137 Matrix44 Mresult = Mfrom * Mto;
2138 int cind = rop.add_constant (TypeDesc::TypeMatrix, &Mresult);
2139 rop.turn_into_assign (op, cind, "getmatrix of known matrix");
2140
2141 // Now insert a new instruction that assigns 1 to the
2142 // original return result of getmatrix.
2143 const int one = 1;
2144 const int args_to_add[] = { resultarg, rop.add_constant (TypeDesc::TypeInt, &one) };
2145 rop.insert_code (opnum, u_assign, args_to_add,
2146 RuntimeOptimizer::RecomputeRWRanges,
2147 RuntimeOptimizer::GroupWithNext);
2148 return 1;
2149 }
2150 return 0;
2151 }
2152
2153
2154
DECLFOLDER(constfold_transform)2155 DECLFOLDER(constfold_transform)
2156 {
2157 // Try to turn identity transforms into assignments
2158 Opcode &op (rop.inst()->ops()[opnum]);
2159 Symbol &M (*rop.inst()->argsymbol(op.firstarg()+1));
2160 if (op.nargs() == 3 && M.typespec().is_matrix() && rop.is_one(M)) {
2161 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+2),
2162 "transform by identity");
2163 return 1;
2164 }
2165 if (op.nargs() == 4) {
2166 Symbol &T (*rop.inst()->argsymbol(op.firstarg()+2));
2167 if (M.is_constant() && T.is_constant()) {
2168 OSL_DASSERT(M.typespec().is_string() && T.typespec().is_string());
2169 ustring from = *(ustring *)M.data();
2170 ustring to = *(ustring *)T.data();
2171 ustring syn = rop.shadingsys().commonspace_synonym();
2172 if (from == syn)
2173 from = Strings::common;
2174 if (to == syn)
2175 to = Strings::common;
2176 if (from == to) {
2177 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+3),
2178 "transform by identity");
2179 return 1;
2180 }
2181 }
2182 }
2183 return 0;
2184 }
2185
2186
2187
DECLFOLDER(constfold_transformc)2188 DECLFOLDER(constfold_transformc)
2189 {
2190 Opcode &op (rop.inst()->ops()[opnum]);
2191 // Symbol &Result = *rop.opargsym (op, 0);
2192 Symbol &From = *rop.opargsym (op, 1);
2193 Symbol &To = *rop.opargsym (op, 2);
2194 Symbol &C = *rop.opargsym (op, 3);
2195
2196 if (From.is_constant() && To.is_constant()) {
2197 ustring from = From.get_string();
2198 ustring to = To.get_string();
2199 if (from == Strings::RGB)
2200 from = Strings::rgb;
2201 if (to == Strings::RGB)
2202 to = Strings::rgb;
2203 if (from == to) {
2204 rop.turn_into_assign (op, rop.inst()->arg(op.firstarg()+3),
2205 "transformc by identity");
2206 return 1;
2207 }
2208 if (C.is_constant()) {
2209 Color3 Cin (C.get_float(0), C.get_float(1), C.get_float(2));
2210 Color3 result = rop.shadingsys().colorsystem().transformc (from, to, Cin, rop.shadingcontext());
2211 rop.turn_into_assign (op, rop.add_constant(result),
2212 "transformc => constant");
2213 return 1;
2214 }
2215 }
2216 return 0;
2217 }
2218
2219
2220
DECLFOLDER(constfold_setmessage)2221 DECLFOLDER(constfold_setmessage)
2222 {
2223 Opcode &op (rop.inst()->ops()[opnum]);
2224 Symbol &Name (*rop.inst()->argsymbol(op.firstarg()+0));
2225
2226 // Record that the inst set a message
2227 if (Name.is_constant()) {
2228 OSL_DASSERT (Name.typespec().is_string());
2229 rop.register_message (*(ustring *)Name.data());
2230 } else {
2231 rop.register_unknown_message ();
2232 }
2233
2234 return 0;
2235 }
2236
2237
2238
2239
DECLFOLDER(constfold_getmessage)2240 DECLFOLDER(constfold_getmessage)
2241 {
2242 Opcode &op (rop.inst()->ops()[opnum]);
2243 int has_source = (op.nargs() == 4);
2244 if (has_source)
2245 return 0; // Don't optimize away sourced getmessage
2246 Symbol &Name (*rop.inst()->argsymbol(op.firstarg()+1+(int)has_source));
2247 if (Name.is_constant()) {
2248 OSL_DASSERT (Name.typespec().is_string());
2249 if (! rop.message_possibly_set (*(ustring *)Name.data())) {
2250 // If the messages could not have been sent, get rid of the
2251 // getmessage op, leave the destination value alone, and
2252 // assign 0 to the returned status of getmessage.
2253 rop.turn_into_assign_zero (op, "impossible getmessage");
2254 return 1;
2255 }
2256 }
2257 return 0;
2258 }
2259
2260
2261
2262
DECLFOLDER(constfold_getattribute)2263 DECLFOLDER(constfold_getattribute)
2264 {
2265 if (! rop.shadingsys().fold_getattribute())
2266 return 0;
2267
2268 // getattribute() has eight "flavors":
2269 // * getattribute (attribute_name, value)
2270 // * getattribute (attribute_name, value[])
2271 // * getattribute (attribute_name, index, value)
2272 // * getattribute (attribute_name, index, value[])
2273 // * getattribute (object, attribute_name, value)
2274 // * getattribute (object, attribute_name, value[])
2275 // * getattribute (object, attribute_name, index, value)
2276 // * getattribute (object, attribute_name, index, value[])
2277 Opcode &op (rop.inst()->ops()[opnum]);
2278 int nargs = op.nargs();
2279 OSL_DASSERT(nargs >= 3 && nargs <= 5);
2280 bool array_lookup = rop.opargsym(op,nargs-2)->typespec().is_int();
2281 bool object_lookup = rop.opargsym(op,2)->typespec().is_string() && nargs >= 4;
2282 int object_slot = (int)object_lookup;
2283 int attrib_slot = object_slot + 1;
2284 int index_slot = nargs - 2;
2285 int dest_slot = nargs - 1;
2286
2287 // Symbol& Result = *rop.opargsym (op, 0);
2288 Symbol& ObjectName = *rop.opargsym (op, object_slot); // only valid if object_slot is true
2289 Symbol& Attribute = *rop.opargsym (op, attrib_slot);
2290 Symbol& Index = *rop.opargsym (op, index_slot); // only valid if array_lookup is true
2291 Symbol& Destination = *rop.opargsym (op, dest_slot);
2292
2293 if (! Attribute.is_constant() ||
2294 (object_lookup && ! ObjectName.is_constant()) ||
2295 (array_lookup && ! Index.is_constant()))
2296 return 0; // Non-constant things prevent a fold
2297 if (Destination.typespec().is_array())
2298 return 0; // Punt on arrays for now
2299
2300 ustring attr_name = *(const ustring *)Attribute.data();
2301 const size_t maxbufsize = 1024;
2302 char buf[maxbufsize];
2303 TypeDesc attr_type = Destination.typespec().simpletype();
2304 if (attr_type.size() > maxbufsize)
2305 return 0; // Don't constant fold humongous things
2306
2307 bool found = false;
2308
2309 // Check global things first
2310 if (attr_name == "osl:version" && attr_type == TypeDesc::TypeInt) {
2311 int *val = (int *)(char *)buf;
2312 *val = OSL_VERSION;
2313 found = true;
2314 } else if (attr_name == "shader:shadername" && attr_type == TypeDesc::TypeString) {
2315 ustring *up = (ustring *)(char *)buf;
2316 *up = ustring(rop.inst()->shadername());
2317 found = true;
2318 } else if (attr_name == "shader:layername" && attr_type == TypeDesc::TypeString) {
2319 ustring *up = (ustring *)(char *)buf;
2320 *up = rop.inst()->layername();
2321 found = true;
2322 } else if (attr_name == "shader:groupname" && attr_type == TypeDesc::TypeString) {
2323 ustring *up = (ustring *)(char *)buf;
2324 *up = rop.group().name();
2325 found = true;
2326 }
2327
2328 if (!found) {
2329 // If the object name is not supplied, it implies that we are
2330 // supposed to search the shaded object first, then if that fails,
2331 // the scene-wide namespace. We can't do that yet, have to wait
2332 // until shade time.
2333 ustring obj_name;
2334 if (object_lookup)
2335 obj_name = *(const ustring *)ObjectName.data();
2336 if (obj_name.empty())
2337 return 0;
2338
2339 found = array_lookup
2340 ? rop.renderer()->get_array_attribute (NULL, false,
2341 obj_name, attr_type, attr_name,
2342 *(const int *)Index.data(), buf)
2343 : rop.renderer()->get_attribute (NULL, false,
2344 obj_name, attr_type, attr_name,
2345 buf);
2346 }
2347
2348 if (found) {
2349 // Now we turn the existing getattribute op into this for success:
2350 // assign result 1
2351 // assign data [retrieved values]
2352 // but if it fails, don't change anything, because we want it to
2353 // issue errors at runtime.
2354
2355 // Make the data destination be the first argument
2356 int oldresultarg = rop.inst()->args()[op.firstarg()+0];
2357 int dataarg = rop.inst()->args()[op.firstarg()+dest_slot];
2358 rop.inst()->args()[op.firstarg()+0] = dataarg;
2359 // Now turn it into an assignment
2360 int cind = rop.add_constant (attr_type, &buf);
2361 rop.turn_into_assign (op, cind, "const fold getattribute");
2362 // Now insert a new instruction that assigns 1 to the
2363 // original return result of getattribute.
2364 const int one = 1;
2365 const int args_to_add[] = { oldresultarg, rop.add_constant (TypeDesc::TypeInt, &one) };
2366 rop.insert_code (opnum, u_assign, args_to_add,
2367 RuntimeOptimizer::RecomputeRWRanges,
2368 RuntimeOptimizer::GroupWithNext);
2369 return 1;
2370 } else {
2371 return 0;
2372 }
2373 }
2374
2375
2376
DECLFOLDER(constfold_gettextureinfo)2377 DECLFOLDER(constfold_gettextureinfo)
2378 {
2379 Opcode &op (rop.inst()->ops()[opnum]);
2380 OSL_MAYBE_UNUSED Symbol &Result (*rop.inst()->argsymbol(op.firstarg()+0));
2381 Symbol &Filename (*rop.inst()->argsymbol(op.firstarg()+1));
2382 Symbol &Dataname (*rop.inst()->argsymbol(op.firstarg()+2));
2383 Symbol &Data (*rop.inst()->argsymbol(op.firstarg()+3));
2384 OSL_DASSERT (Result.typespec().is_int() &&
2385 Filename.typespec().is_string() &&
2386 Dataname.typespec().is_string());
2387
2388 if (Filename.is_constant() && Dataname.is_constant()) {
2389 ustring filename = *(ustring *)Filename.data();
2390 ustring dataname = *(ustring *)Dataname.data();
2391 TypeDesc t = Data.typespec().simpletype();
2392 void *mydata = OIIO_ALLOCA(char, t.size());
2393 // FIXME(ptex) -- exclude folding of ptex, since these things
2394 // can vary per face.
2395 ustring errormessage;
2396 int result = rop.renderer()->get_texture_info (filename, nullptr,
2397 rop.shadingcontext()->texture_thread_info(),
2398 rop.shadingcontext(),
2399 0 /* TODO: subimage? */,
2400 dataname, t, mydata, &errormessage);
2401 // Now we turn
2402 // gettextureinfo result filename dataname data
2403 // into this for success:
2404 // assign data [retrieved values]
2405 // assign result 1
2406 // into this for failure:
2407 // error "%s" errormesage
2408 // assign result 0
2409 if (result) {
2410 int oldresultarg = rop.inst()->args()[op.firstarg()+0];
2411 int dataarg = rop.inst()->args()[op.firstarg()+3];
2412 // Make data the first argument
2413 rop.inst()->args()[op.firstarg()+0] = dataarg;
2414 // Now turn it into an assignment
2415 int cind = rop.add_constant (Data.typespec(), mydata);
2416 rop.turn_into_assign (op, cind, "const fold gettextureinfo");
2417
2418 // Now insert a new instruction that assigns 1 to the
2419 // original return result of gettextureinfo.
2420 int one = 1;
2421 const int args_to_add[] = {
2422 oldresultarg,
2423 rop.add_constant (TypeDesc::TypeInt, &one)
2424 };
2425 rop.insert_code (opnum, u_assign, args_to_add,
2426 RuntimeOptimizer::RecomputeRWRanges,
2427 RuntimeOptimizer::GroupWithNext);
2428 return 1;
2429 } else {
2430 // Constant fold to 0
2431 rop.turn_into_assign_zero (op, "const fold gettextureinfo");
2432 if (errormessage.size()) {
2433 // display the error message if control flow ever reaches here
2434 const int args_to_add[] = {
2435 rop.add_constant(u_fmterror),
2436 rop.add_constant(errormessage)
2437 };
2438 rop.insert_code(opnum, u_error, args_to_add,
2439 RuntimeOptimizer::RecomputeRWRanges,
2440 RuntimeOptimizer::GroupWithNext);
2441 Opcode &newop (rop.inst()->ops()[opnum]);
2442 newop.argreadonly(0);
2443 newop.argreadonly(1);
2444 }
2445 return 1;
2446 }
2447 }
2448 return 0;
2449 }
2450
2451
2452
2453 // texture -- we can eliminate a lot of superfluous setting of optional
2454 // parameters to their default values.
DECLFOLDER(constfold_texture)2455 DECLFOLDER(constfold_texture)
2456 {
2457 Opcode &op (rop.inst()->ops()[opnum]);
2458 // Symbol &Result = *rop.opargsym (op, 0);
2459 // Symbol &Filename = *rop.opargsym (op, 1);
2460 // Symbol &S = *rop.opargsym (op, 2);
2461 // Symbol &T = *rop.opargsym (op, 3);
2462
2463 int first_optional_arg = 4;
2464 if (op.nargs() > 4 && rop.opargsym(op,4)->typespec().is_float()) {
2465 //user_derivs = true;
2466 first_optional_arg = 8;
2467 OSL_DASSERT(rop.opargsym(op,5)->typespec().is_float());
2468 OSL_DASSERT(rop.opargsym(op,6)->typespec().is_float());
2469 OSL_DASSERT(rop.opargsym(op,7)->typespec().is_float());
2470 }
2471
2472 TextureOpt opt; // So we can check the defaults
2473 bool swidth_set = false, twidth_set = false, rwidth_set = false;
2474 bool sblur_set = false, tblur_set = false, rblur_set = false;
2475 bool swrap_set = false, twrap_set = false, rwrap_set = false;
2476 bool firstchannel_set = false, fill_set = false, interp_set = false;
2477 bool any_elided = false;
2478 for (int i = first_optional_arg; i < op.nargs()-1; i += 2) {
2479 Symbol &Name = *rop.opargsym (op, i);
2480 Symbol &Value = *rop.opargsym (op, i+1);
2481 OSL_DASSERT(Name.typespec().is_string());
2482 if (Name.is_constant() && Value.is_constant()) {
2483 ustring name = *(ustring *)Name.data();
2484 bool elide = false;
2485 void *value = Value.data();
2486 TypeDesc valuetype = Value.typespec().simpletype();
2487
2488 // Keep from repeating the same tedious code for {s,t,r, }{width,blur,wrap}
2489 #define CHECK(field,ctype,osltype) \
2490 if (name == Strings::field && ! field##_set) { \
2491 if (valuetype == osltype && \
2492 *(ctype *)value == opt.field) \
2493 elide = true; \
2494 else if (osltype == TypeDesc::FLOAT && \
2495 valuetype == TypeDesc::INT && \
2496 *(int *)value == opt.field) \
2497 elide = true; \
2498 else \
2499 field##_set = true; \
2500 }
2501 #define CHECK_str(field,ctype,osltype) \
2502 CHECK (s##field,ctype,osltype) \
2503 else CHECK (t##field,ctype,osltype) \
2504 else CHECK (r##field,ctype,osltype) \
2505 else if (name == Strings::field && !s##field##_set && \
2506 ! t##field##_set && ! r##field##_set) { \
2507 if (valuetype == osltype) { \
2508 ctype *v = (ctype *)value; \
2509 if (*v == opt.s##field && *v == opt.t##field \
2510 && *v == opt.r##field) \
2511 elide = true; \
2512 else { \
2513 s##field##_set = true; \
2514 t##field##_set = true; \
2515 r##field##_set = true; \
2516 } \
2517 } else if (osltype == TypeDesc::FLOAT && \
2518 valuetype == TypeDesc::INT) { \
2519 int *v = (int *)value; \
2520 if (*v == opt.s##field && *v == opt.t##field \
2521 && *v == opt.r##field) \
2522 elide = true; \
2523 else { \
2524 s##field##_set = true; \
2525 t##field##_set = true; \
2526 r##field##_set = true; \
2527 } \
2528 } \
2529 }
2530
2531 #ifdef __clang__
2532 #pragma clang diagnostic push
2533 #pragma clang diagnostic ignored "-Wtautological-compare"
2534 #endif
2535 CHECK_str (width, float, TypeDesc::FLOAT)
2536 else CHECK_str (blur, float, TypeDesc::FLOAT)
2537 else CHECK (firstchannel, int, TypeDesc::INT)
2538 else CHECK (fill, float, TypeDesc::FLOAT)
2539
2540 else if ((name == Strings::wrap || name == Strings::swrap ||
2541 name == Strings::twrap || name == Strings::rwrap)
2542 && value && valuetype == TypeDesc::STRING) {
2543 // Special trick is needed for wrap modes because the input
2544 // is a string but the field we're setting is an int enum.
2545 OIIO::Tex::Wrap wrapmode = OIIO::Tex::decode_wrapmode (*(ustring *)value);
2546 void* value = &wrapmode;
2547 CHECK_str (wrap, int, TypeDesc::INT);
2548 }
2549 #ifdef __clang__
2550 #pragma clang diagnostic pop
2551 #endif
2552 #undef CHECK_STR
2553 #undef CHECK
2554
2555 // Cases that don't fit the pattern
2556 else if (name == Strings::interp && !interp_set) {
2557 if (value && valuetype == TypeDesc::STRING &&
2558 tex_interp_to_code(*(ustring *)value) == opt.interpmode)
2559 elide = true;
2560 else
2561 interp_set = true;
2562 }
2563
2564 if (elide) {
2565 // Just turn the param name into empty string and it will
2566 // be skipped.
2567 ustring empty;
2568 int cind = rop.add_constant (TypeDesc::TypeString, &empty);
2569 rop.inst()->args()[op.firstarg()+i] = cind;
2570 rop.inst()->args()[op.firstarg()+i+1] = cind;
2571 any_elided = true;
2572 }
2573 }
2574 }
2575 return any_elided;
2576 }
2577
2578
2579
DECLFOLDER(constfold_pointcloud_search)2580 DECLFOLDER(constfold_pointcloud_search)
2581 {
2582 Opcode &op (rop.inst()->ops()[opnum]);
2583 OSL_DASSERT(op.nargs() >= 5);
2584 int result_sym = rop.oparg (op, 0);
2585 Symbol& Filename = *rop.opargsym (op, 1);
2586 Symbol& Center = *rop.opargsym (op, 2);
2587 Symbol& Radius = *rop.opargsym (op, 3);
2588 Symbol& Max_points = *rop.opargsym (op, 4);
2589 OSL_DASSERT(Filename.typespec().is_string() &&
2590 Center.typespec().is_triple() && Radius.typespec().is_float() &&
2591 Max_points.typespec().is_int());
2592
2593 // Can't constant fold unless all the required input args are constant
2594 if (! (Filename.is_constant() && Center.is_constant() &&
2595 Radius.is_constant() && Max_points.is_constant()))
2596 return 0;
2597
2598 // Handle the optional 'sort' flag, and don't bother constant folding
2599 // if sorted results may be required.
2600 int attr_arg_offset = 5; // where the opt attrs begin
2601 if (op.nargs() > 5 && rop.opargsym(op,5)->typespec().is_int()) {
2602 // Sorting requested
2603 Symbol *Sort = rop.opargsym(op,5);
2604 if (! Sort->is_constant() || *(int *)Sort->data())
2605 return 0; // forget it if sorted data might be requested
2606 ++attr_arg_offset;
2607 }
2608 int nattrs = (op.nargs() - attr_arg_offset) / 2;
2609
2610 // First pass through the optional arguments: gather the query names,
2611 // types, and destinations. If any of the query names are not known
2612 // constants, we can't optimize this call so just return.
2613 std::vector<ustring> names;
2614 std::vector<int> value_args;
2615 std::vector<TypeDesc> value_types;
2616 for (int i = 0, num_queries = 0; i < nattrs; ++i) {
2617 Symbol& Name = *rop.opargsym (op, attr_arg_offset + i*2);
2618 Symbol& Value = *rop.opargsym (op, attr_arg_offset + i*2 + 1);
2619 OSL_ASSERT (Name.typespec().is_string());
2620 if (!Name.is_constant())
2621 return 0; // unknown optional argument, punt
2622 if (++num_queries > RuntimeOptimizer::max_new_consts_per_fold)
2623 return 0;
2624 names.push_back (*(ustring *)Name.data());
2625 value_args.push_back (rop.oparg (op, attr_arg_offset + i*2 + 1));
2626 value_types.push_back (Value.typespec().simpletype());
2627 }
2628
2629 // We're doing a fixed query, so instead of running at every shade,
2630 // perform the search now.
2631 const int maxconst = 256; // Max number of points to consider a constant
2632 size_t indices[maxconst+1]; // Make room for one more!
2633 float distances[maxconst+1];
2634 int maxpoints = std::min (maxconst+1, *(int *)Max_points.data());
2635 ustring filename = *(ustring *)Filename.data();
2636 int count = 0;
2637 if (! filename.empty()) {
2638 count = rop.renderer()->pointcloud_search (rop.shaderglobals(), filename,
2639 *(Vec3 *)Center.data(), *(float *)Radius.data(),
2640 maxpoints, false, indices, distances, 0);
2641 rop.shadingsys().pointcloud_stats (1, 0, count);
2642 }
2643
2644 // If it returns few enough results (256 points or less), just fold
2645 // those results into constant arrays. If more than that, let the
2646 // query happen at runtime to avoid tying up a bunch of memory.
2647 if (count > maxconst)
2648 return 0;
2649
2650 // If the query returned no matching points, just turn the whole
2651 // pointcloud_search call into an assignment of 0 to the 'result'.
2652 if (count < 1) {
2653 rop.turn_into_assign_zero (op, "Folded constant pointcloud_search lookup");
2654 return 1;
2655 }
2656
2657 // From here on out, we are able to fold the query (it returned
2658 // results, but not too many). Start by removing the original
2659 // pointcloud_search call itself from the shader code.
2660 rop.turn_into_nop (op, "Folded constant pointcloud_search lookup");
2661
2662 // Now, for each optional individual query, do a pointcloud_get NOW
2663 // to retrieve it, create a constant array for the shader to hold
2664 // those results, and add to the shader an array copy to move it
2665 // from the constant into the place the shader wanted the query
2666 // results to go. (This assignment can be further optimized later
2667 // on as well, depending on how it's used.) If any of the individual
2668 // queries fail now, we will return a failed result in the end.
2669 std::vector<char> tmp; // temporary data
2670 for (int i = 0; i < nattrs; ++i) {
2671 // We had stashed names, data types, and destinations earlier.
2672 // Retrieve them now to build a query.
2673 if (names[i].empty())
2674 continue;
2675 void *const_data = NULL;
2676 TypeDesc const_valtype = value_types[i];
2677 tmp.clear ();
2678 tmp.resize (const_valtype.size(), 0);
2679 const_data = &tmp[0];
2680 if (names[i] == "index") {
2681 // "index" is a special case -- it's retrieving the hit point
2682 // indices, not data on those hit points.
2683 //
2684 // Because the presumed Partio underneath passes indices as
2685 // size_t, but OSL only allows int parameters, we need to
2686 // copy. But just cast if size_t and int are the same size.
2687 if (sizeof(size_t) == sizeof(int)) {
2688 const_data = indices;
2689 } else {
2690 int *int_indices = (int *)const_data;
2691 for (int i = 0; i < count; ++i)
2692 int_indices[i] = (int) indices[i];
2693 }
2694 } else {
2695 // Named queries.
2696 bool ok = rop.renderer()->pointcloud_get (rop.shaderglobals(),
2697 filename, indices, count,
2698 names[i], const_valtype, const_data);
2699 rop.shadingsys().pointcloud_stats (0, 1, 0);
2700 if (! ok) {
2701 count = 0; // Make it look like an error in the end
2702 break;
2703 }
2704 }
2705 // Now make a constant array for those results we just retrieved...
2706 int const_array_sym = rop.add_constant (const_valtype, const_data);
2707 // ... and add an instruction to copy the constant into the
2708 // original destination for the query.
2709 const int args_to_add[] = { value_args[i], const_array_sym };
2710 rop.insert_code (opnum, u_assign, args_to_add,
2711 RuntimeOptimizer::RecomputeRWRanges,
2712 RuntimeOptimizer::GroupWithNext);
2713 }
2714
2715 // Query results all copied. The only thing left to do is to assign
2716 // status (query result count) to the original "result".
2717 const int args_to_add[] = { result_sym, rop.add_constant (TypeDesc::TypeInt, &count) };
2718 rop.insert_code (opnum, u_assign, args_to_add,
2719 RuntimeOptimizer::RecomputeRWRanges,
2720 RuntimeOptimizer::GroupWithNext);
2721
2722 return 1;
2723 }
2724
2725
2726
DECLFOLDER(constfold_pointcloud_get)2727 DECLFOLDER(constfold_pointcloud_get)
2728 {
2729 Opcode &op (rop.inst()->ops()[opnum]);
2730 // Symbol& Result = *rop.opargsym (op, 0);
2731 Symbol& Filename = *rop.opargsym (op, 1);
2732 Symbol& Indices = *rop.opargsym (op, 2);
2733 Symbol& Count = *rop.opargsym (op, 3);
2734 Symbol& Attr_name = *rop.opargsym (op, 4);
2735 Symbol& Data = *rop.opargsym (op, 5);
2736 if (! (Filename.is_constant() && Indices.is_constant() &&
2737 Count.is_constant() && Attr_name.is_constant()))
2738 return 0;
2739
2740 // All inputs are constants -- we can just turn this into an array
2741 // assignment.
2742
2743 ustring filename = *(ustring *)Filename.data();
2744 int count = *(int *)Count.data();
2745 if (filename.empty() || count < 1) {
2746 rop.turn_into_assign_zero (op, "Folded constant pointcloud_get");
2747 return 1;
2748 }
2749
2750 if (count >= 1024) // Too many, don't bother folding
2751 return 0;
2752
2753 // Must transfer to size_t array
2754 size_t *indices = OIIO_ALLOCA(size_t, count);
2755 for (int i = 0; i < count; ++i)
2756 indices[i] = ((int *)Indices.data())[i];
2757
2758 TypeDesc valtype = Data.typespec().simpletype();
2759 std::vector<char> data (valtype.size());
2760 int ok = rop.renderer()->pointcloud_get (rop.shaderglobals(), filename,
2761 indices, count,
2762 *(ustring *)Attr_name.data(),
2763 valtype, &data[0]);
2764 rop.shadingsys().pointcloud_stats (0, 1, 0);
2765
2766 rop.turn_into_assign (op, rop.add_constant (TypeDesc::TypeInt, &ok),
2767 "Folded constant pointcloud_get");
2768
2769 // Now make a constant array for those results we just retrieved...
2770 int const_array_sym = rop.add_constant (valtype, &data[0]);
2771 // ... and add an instruction to copy the constant into the
2772 // original destination for the query.
2773 const int args_to_add[] = { rop.oparg(op,5) /* Data symbol*/, const_array_sym };
2774 rop.insert_code (opnum, u_assign, args_to_add,
2775 RuntimeOptimizer::RecomputeRWRanges,
2776 RuntimeOptimizer::GroupWithNext);
2777 return 1;
2778 }
2779
2780
2781
DECLFOLDER(constfold_noise)2782 DECLFOLDER(constfold_noise)
2783 {
2784 Opcode &op (rop.inst()->ops()[opnum]);
2785
2786 // Decode some info about which noise function we're dealing with
2787 // bool periodic = (op.opname() == Strings::pnoise);
2788 int arg = 0; // Next arg to read
2789 Symbol &Result = *rop.opargsym (op, arg++);
2790 int outdim = Result.typespec().is_triple() ? 3 : 1;
2791 Symbol *Name = rop.opargsym (op, arg++);
2792 ustring name;
2793 if (Name->typespec().is_string()) {
2794 if (Name->is_constant())
2795 name = *(ustring *)Name->data();
2796 } else {
2797 // Not a string, must be the old-style noise/pnoise
2798 --arg; // forget that arg
2799 Name = NULL;
2800 name = op.opname();
2801 }
2802
2803 // Noise with name that is not a constant at osl-compile-time was marked
2804 // as taking the derivs of its coordinate arguments. If at this point we
2805 // can determine that the name is known and not "gabor", when we can
2806 // turn its derivative taking off.
2807 if (op.argtakesderivs_all() && name.length() && name != "gabor")
2808 op.argtakesderivs_all(0);
2809
2810 // Gabor noise is the only one that takes optional arguments, so
2811 // optimize them away for other noise types.
2812 if (name.length() && name != "gabor") {
2813 for (int a = arg; a < op.nargs(); ++a) {
2814 // Advance until we hit a string argument, which will be the
2815 // first optional token/value pair. Then just turn all arguments
2816 // from that point on into empty strings, which will later be
2817 // skipped, and in the mean time will eliminate the dependencies
2818 // on whatever values were previously passed.
2819 if (rop.opargsym(op,a)->typespec().is_string()) {
2820 for ( ; a < op.nargs(); a += 2) {
2821 OSL_ASSERT (a+1 < op.nargs());
2822 int cind = rop.add_constant (ustring());
2823 rop.inst()->args()[op.firstarg()+a] = cind;
2824 rop.inst()->args()[op.firstarg()+a+1] = cind;
2825 }
2826 }
2827 }
2828 }
2829
2830 // Early out: for now, we only fold cell noise
2831 if (name != u_cellnoise && name != u_cell)
2832 return 0;
2833
2834 // Take an early out if any args are not constant (other than the result)
2835 for (int i = 1; i < op.nargs(); ++i)
2836 if (! rop.opargsym(op,i)->is_constant())
2837 return 0;
2838
2839 // Extract the constant input coordinates
2840 float input[4];
2841 int indim = 0;
2842 for ( ; arg < op.nargs() && indim < 4; ++arg) {
2843 Symbol *in = rop.opargsym(op,arg);
2844 if (in->typespec().is_float()) {
2845 input[indim++] = ((float *)in->data())[0];
2846 } else if (in->typespec().is_triple()) {
2847 input[indim++] = ((float *)in->data())[0];
2848 input[indim++] = ((float *)in->data())[1];
2849 input[indim++] = ((float *)in->data())[2];
2850 }
2851 else
2852 return 0; // optional args starting, we don't fold them yet
2853 }
2854
2855 #if OSL_GNUC_VERSION >= 90000
2856 # pragma GCC diagnostic push
2857 # pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
2858 #endif
2859 if (name == u_cellnoise || name == u_cell) {
2860 CellNoise cell;
2861 if (outdim == 1) {
2862 float n;
2863 if (indim == 1)
2864 cell (n, input[0]);
2865 else if (indim == 2)
2866 cell (n, input[0], input[1]);
2867 else if (indim == 3)
2868 cell (n, Vec3(input[0], input[1], input[2]));
2869 else
2870 cell (n, Vec3(input[0], input[1], input[2]), input[3]);
2871 int cind = rop.add_constant (n);
2872 rop.turn_into_assign (op, cind, "const fold cellnoise");
2873 return 1;
2874 } else {
2875 OSL_DASSERT (outdim == 3);
2876 Vec3 n;
2877 if (indim == 1)
2878 cell (n, input[0]);
2879 else if (indim == 2)
2880 cell (n, input[0], input[1]);
2881 else if (indim == 3)
2882 cell (n, Vec3(input[0], input[1], input[2]));
2883 else
2884 cell (n, Vec3(input[0], input[1], input[2]), input[3]);
2885 int cind = rop.add_constant (TypeDesc::TypePoint, &n);
2886 rop.turn_into_assign (op, cind, "const fold cellnoise");
2887 return 1;
2888 }
2889 }
2890 #if OSL_GNUC_VERSION >= 90000
2891 # pragma GCC diagnostic pop
2892 #endif
2893
2894 return 0;
2895 }
2896
2897
2898
DECLFOLDER(constfold_functioncall)2899 DECLFOLDER(constfold_functioncall)
2900 {
2901 Opcode &op (rop.inst()->ops()[opnum]);
2902 // Make a "functioncall" block disappear if the only non-nop statements
2903 // inside it is 'return'.
2904 bool has_return = false;
2905 bool has_anything_else = false;
2906 for (int i = opnum+1, e = op.jump(0); i < e; ++i) {
2907 Opcode &op (rop.inst()->ops()[i]);
2908 if (op.opname() == u_return)
2909 has_return = true;
2910 else if (op.opname() != u_nop)
2911 has_anything_else = true;
2912 }
2913 int changed = 0;
2914 if (! has_anything_else) {
2915 // Possibly due to optimizations, there's nothing in the
2916 // function body but the return. So just eliminate the whole
2917 // block of ops.
2918 for (int i = opnum, e = op.jump(0); i < e; ++i) {
2919 if (rop.inst()->ops()[i].opname() != u_nop) {
2920 rop.turn_into_nop (rop.inst()->ops()[i], "empty function");
2921 ++changed;
2922 }
2923 }
2924 } else if (! has_return) {
2925 // The function is just a straight-up execution, no return
2926 // statement, so kill the "function" op.
2927 if (rop.keep_no_return_function_calls()) {
2928 rop.turn_into_functioncall_nr (op, "'functioncall' transmuted to 'no return' version");
2929 } else {
2930 rop.turn_into_nop (op, "'function' not necessary");
2931 }
2932 ++changed;
2933 }
2934
2935 return changed;
2936 }
2937
2938
2939
2940
DECLFOLDER(constfold_useparam)2941 DECLFOLDER(constfold_useparam)
2942 {
2943 // Just eliminate useparam (from shaders compiled with old oslc)
2944 Opcode &op (rop.inst()->ops()[opnum]);
2945 rop.turn_into_nop (op);
2946 return 1;
2947 }
2948
2949
2950
DECLFOLDER(constfold_assign)2951 DECLFOLDER(constfold_assign)
2952 {
2953 Opcode &op (rop.inst()->ops()[opnum]);
2954 Symbol *B (rop.inst()->argsymbol(op.firstarg()+1));
2955 int Aalias = rop.block_alias (rop.inst()->arg(op.firstarg()+0));
2956 Symbol *AA = rop.inst()->symbol(Aalias);
2957 // N.B. symbol() returns NULL if alias is < 0
2958
2959 if (B->is_constant() && AA && AA->is_constant()) {
2960 // Try to turn A=C into nop if A already is C
2961 if (AA->typespec().is_int() && B->typespec().is_int()) {
2962 if (*(int *)AA->data() == *(int *)B->data()) {
2963 rop.turn_into_nop (op, "reassignment of current value");
2964 return 1;
2965 }
2966 } else if (AA->typespec().is_float() && B->typespec().is_float()) {
2967 if (*(float *)AA->data() == *(float *)B->data()) {
2968 rop.turn_into_nop (op, "reassignment of current value");
2969 return 1;
2970 }
2971 } else if (AA->typespec().is_float() && B->typespec().is_int()) {
2972 if (*(float *)AA->data() == *(int *)B->data()) {
2973 rop.turn_into_nop (op, "reassignment of current value");
2974 return 1;
2975 }
2976 } else if (AA->typespec().is_triple() && B->typespec().is_triple()) {
2977 if (*(Vec3 *)AA->data() == *(Vec3 *)B->data()) {
2978 rop.turn_into_nop (op, "reassignment of current value");
2979 return 1;
2980 }
2981 } else if (AA->typespec().is_triple() && B->typespec().is_float()) {
2982 float b = *(float *)B->data();
2983 if (*(Vec3 *)AA->data() == Vec3(b,b,b)) {
2984 rop.turn_into_nop (op, "reassignment of current value");
2985 return 1;
2986 }
2987 }
2988 }
2989 return 0;
2990 }
2991
2992
2993
DECLFOLDER(constfold_warning)2994 DECLFOLDER(constfold_warning)
2995 {
2996 if (rop.shadingsys().max_warnings_per_thread() == 0) {
2997 Opcode &op (rop.inst()->ops()[opnum]);
2998 rop.turn_into_nop(op, "warnings disabled by max_warnings_per_thread == 0");
2999 return 1;
3000 }
3001 return 0;
3002 }
3003
3004
3005
DECLFOLDER(constfold_deriv)3006 DECLFOLDER(constfold_deriv)
3007 {
3008 Opcode &op (rop.inst()->ops()[opnum]);
3009 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
3010 if (A.is_constant()) {
3011 rop.turn_into_assign_zero (op, "deriv of constant => 0");
3012 return 1;
3013 }
3014 return 0;
3015 }
3016
3017
3018
DECLFOLDER(constfold_isconstant)3019 DECLFOLDER(constfold_isconstant)
3020 {
3021 Opcode &op (rop.inst()->ops()[opnum]);
3022 Symbol &A (*rop.inst()->argsymbol(op.firstarg()+1));
3023 // If at this point we know it's a constant, it's certainly a constant,
3024 // so we can constant fold it. Note that if it's not known to be a
3025 // constant at this point, that doesn't mean we won't detect it to be
3026 // constant after further optimization, so we never fold this to 0.
3027 if (A.is_constant()) {
3028 rop.turn_into_assign_one (op, "isconstant => 1");
3029 return 1;
3030 }
3031 return 0;
3032 }
3033
3034
3035
DECLFOLDER(constfold_raytype)3036 DECLFOLDER(constfold_raytype)
3037 {
3038 Opcode &op (rop.inst()->ops()[opnum]);
3039 Symbol& Name = *rop.opargsym (op, 1);
3040 OSL_DASSERT(Name.typespec().is_string());
3041 if (! Name.is_constant())
3042 return 0; // Can't optimize non-constant raytype name
3043
3044 int bit = rop.shadingsys().raytype_bit (*(ustring *)Name.data());
3045 if (bit & rop.raytypes_on()) {
3046 rop.turn_into_assign_one (op, "raytype => 1");
3047 return 1;
3048 }
3049 if (bit & rop.raytypes_off()) {
3050 rop.turn_into_assign_zero (op, "raytype => 0");
3051 return 1;
3052 }
3053 return 0; // indeterminate until execution time
3054 }
3055
3056
3057 }; // namespace pvt
3058 OSL_NAMESPACE_EXIT
3059