1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __KERNEL_TYPES_H__
18 #define __KERNEL_TYPES_H__
19 
20 #if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
21 #  include <embree3/rtcore.h>
22 #  include <embree3/rtcore_scene.h>
23 #  define __EMBREE__
24 #endif
25 
26 #include "kernel/kernel_math.h"
27 #include "kernel/svm/svm_types.h"
28 #include "util/util_static_assert.h"
29 
30 #ifndef __KERNEL_GPU__
31 #  define __KERNEL_CPU__
32 #endif
33 
34 /* TODO(sergey): This is only to make it possible to include this header
35  * from outside of the kernel. but this could be done somewhat cleaner?
36  */
37 #ifndef ccl_addr_space
38 #  define ccl_addr_space
39 #endif
40 
41 CCL_NAMESPACE_BEGIN
42 
43 /* Constants */
44 #define OBJECT_MOTION_PASS_SIZE 2
45 #define FILTER_TABLE_SIZE 1024
46 #define RAMP_TABLE_SIZE 256
47 #define SHUTTER_TABLE_SIZE 256
48 
49 #define BSSRDF_MIN_RADIUS 1e-8f
50 #define BSSRDF_MAX_HITS 4
51 #define BSSRDF_MAX_BOUNCES 256
52 #define LOCAL_MAX_HITS 4
53 
54 #define VOLUME_BOUNDS_MAX 1024
55 
56 #define BECKMANN_TABLE_SIZE 256
57 
58 #define SHADER_NONE (~0)
59 #define OBJECT_NONE (~0)
60 #define PRIM_NONE (~0)
61 #define LAMP_NONE (~0)
62 #define ID_NONE (0.0f)
63 
64 #define VOLUME_STACK_SIZE 32
65 
66 /* Split kernel constants */
67 #define WORK_POOL_SIZE_GPU 64
68 #define WORK_POOL_SIZE_CPU 1
69 #ifdef __KERNEL_GPU__
70 #  define WORK_POOL_SIZE WORK_POOL_SIZE_GPU
71 #else
72 #  define WORK_POOL_SIZE WORK_POOL_SIZE_CPU
73 #endif
74 
75 #define SHADER_SORT_BLOCK_SIZE 2048
76 
77 #ifdef __KERNEL_OPENCL__
78 #  define SHADER_SORT_LOCAL_SIZE 64
79 #elif defined(__KERNEL_CUDA__)
80 #  define SHADER_SORT_LOCAL_SIZE 32
81 #else
82 #  define SHADER_SORT_LOCAL_SIZE 1
83 #endif
84 
85 /* Kernel features */
86 #define __SOBOL__
87 #define __DPDU__
88 #define __BACKGROUND__
89 #define __CAUSTICS_TRICKS__
90 #define __VISIBILITY_FLAG__
91 #define __RAY_DIFFERENTIALS__
92 #define __CAMERA_CLIPPING__
93 #define __INTERSECTION_REFINE__
94 #define __CLAMP_SAMPLE__
95 #define __PATCH_EVAL__
96 #define __SHADOW_TRICKS__
97 #define __DENOISING_FEATURES__
98 #define __SHADER_RAYTRACE__
99 #define __AO__
100 #define __PASSES__
101 #define __HAIR__
102 
103 /* Without these we get an AO render, used by OpenCL preview kernel. */
104 #ifndef __KERNEL_AO_PREVIEW__
105 #  define __SVM__
106 #  define __EMISSION__
107 #  define __HOLDOUT__
108 #  define __MULTI_CLOSURE__
109 #  define __TRANSPARENT_SHADOWS__
110 #  define __BACKGROUND_MIS__
111 #  define __LAMP_MIS__
112 #  define __CAMERA_MOTION__
113 #  define __OBJECT_MOTION__
114 #  define __BAKING__
115 #  define __PRINCIPLED__
116 #  define __SUBSURFACE__
117 #  define __VOLUME__
118 #  define __VOLUME_SCATTER__
119 #  define __CMJ__
120 #  define __SHADOW_RECORD_ALL__
121 #  define __BRANCHED_PATH__
122 #endif
123 
124 /* Device specific features */
125 #ifdef __KERNEL_CPU__
126 #  ifdef WITH_OSL
127 #    define __OSL__
128 #  endif
129 #  define __VOLUME_DECOUPLED__
130 #  define __VOLUME_RECORD_ALL__
131 #endif /* __KERNEL_CPU__ */
132 
133 #ifdef __KERNEL_CUDA__
134 #  ifdef __SPLIT_KERNEL__
135 #    undef __BRANCHED_PATH__
136 #  endif
137 #endif /* __KERNEL_CUDA__ */
138 
139 #ifdef __KERNEL_OPTIX__
140 #  undef __BAKING__
141 #  undef __BRANCHED_PATH__
142 /* TODO(pmours): Cannot use optixTrace in non-inlined functions */
143 #  undef __SHADER_RAYTRACE__
144 #endif /* __KERNEL_OPTIX__ */
145 
146 #ifdef __KERNEL_OPENCL__
147 #endif /* __KERNEL_OPENCL__ */
148 
149 /* Scene-based selective features compilation. */
150 #ifdef __NO_CAMERA_MOTION__
151 #  undef __CAMERA_MOTION__
152 #endif
153 #ifdef __NO_OBJECT_MOTION__
154 #  undef __OBJECT_MOTION__
155 #endif
156 #ifdef __NO_HAIR__
157 #  undef __HAIR__
158 #endif
159 #ifdef __NO_VOLUME__
160 #  undef __VOLUME__
161 #  undef __VOLUME_SCATTER__
162 #endif
163 #ifdef __NO_SUBSURFACE__
164 #  undef __SUBSURFACE__
165 #endif
166 #ifdef __NO_BAKING__
167 #  undef __BAKING__
168 #endif
169 #ifdef __NO_BRANCHED_PATH__
170 #  undef __BRANCHED_PATH__
171 #endif
172 #ifdef __NO_PATCH_EVAL__
173 #  undef __PATCH_EVAL__
174 #endif
175 #ifdef __NO_TRANSPARENT__
176 #  undef __TRANSPARENT_SHADOWS__
177 #endif
178 #ifdef __NO_SHADOW_TRICKS__
179 #  undef __SHADOW_TRICKS__
180 #endif
181 #ifdef __NO_PRINCIPLED__
182 #  undef __PRINCIPLED__
183 #endif
184 #ifdef __NO_DENOISING__
185 #  undef __DENOISING_FEATURES__
186 #endif
187 #ifdef __NO_SHADER_RAYTRACE__
188 #  undef __SHADER_RAYTRACE__
189 #endif
190 
191 /* Features that enable others */
192 #ifdef WITH_CYCLES_DEBUG
193 #  define __KERNEL_DEBUG__
194 #endif
195 
196 #if defined(__SUBSURFACE__) || defined(__SHADER_RAYTRACE__)
197 #  define __BVH_LOCAL__
198 #endif
199 
200 /* Shader Evaluation */
201 
202 typedef enum ShaderEvalType {
203   SHADER_EVAL_DISPLACE,
204   SHADER_EVAL_BACKGROUND,
205   /* bake types */
206   SHADER_EVAL_BAKE, /* no real shade, it's used in the code to
207                      * differentiate the type of shader eval from the above
208                      */
209   /* data passes */
210   SHADER_EVAL_NORMAL,
211   SHADER_EVAL_UV,
212   SHADER_EVAL_ROUGHNESS,
213   SHADER_EVAL_DIFFUSE_COLOR,
214   SHADER_EVAL_GLOSSY_COLOR,
215   SHADER_EVAL_TRANSMISSION_COLOR,
216   SHADER_EVAL_EMISSION,
217   SHADER_EVAL_AOV_COLOR,
218   SHADER_EVAL_AOV_VALUE,
219 
220   /* light passes */
221   SHADER_EVAL_AO,
222   SHADER_EVAL_COMBINED,
223   SHADER_EVAL_SHADOW,
224   SHADER_EVAL_DIFFUSE,
225   SHADER_EVAL_GLOSSY,
226   SHADER_EVAL_TRANSMISSION,
227 
228   /* extra */
229   SHADER_EVAL_ENVIRONMENT,
230 } ShaderEvalType;
231 
232 /* Path Tracing
233  * note we need to keep the u/v pairs at even values */
234 
235 enum PathTraceDimension {
236   PRNG_FILTER_U = 0,
237   PRNG_FILTER_V = 1,
238   PRNG_LENS_U = 2,
239   PRNG_LENS_V = 3,
240   PRNG_TIME = 4,
241   PRNG_UNUSED_0 = 5,
242   PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */
243   PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */
244   PRNG_BASE_NUM = 10,
245 
246   PRNG_BSDF_U = 0,
247   PRNG_BSDF_V = 1,
248   PRNG_LIGHT_U = 2,
249   PRNG_LIGHT_V = 3,
250   PRNG_LIGHT_TERMINATE = 4,
251   PRNG_TERMINATE = 5,
252   PRNG_PHASE_CHANNEL = 6,
253   PRNG_SCATTER_DISTANCE = 7,
254   PRNG_BOUNCE_NUM = 8,
255 
256   PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */
257   PRNG_BEVEL_V = 7,
258 };
259 
260 enum SamplingPattern {
261   SAMPLING_PATTERN_SOBOL = 0,
262   SAMPLING_PATTERN_CMJ = 1,
263   SAMPLING_PATTERN_PMJ = 2,
264 
265   SAMPLING_NUM_PATTERNS,
266 };
267 
268 /* these flags values correspond to raytypes in osl.cpp, so keep them in sync! */
269 
270 enum PathRayFlag {
271   /* Ray visibility. */
272   PATH_RAY_CAMERA = (1 << 0),
273   PATH_RAY_REFLECT = (1 << 1),
274   PATH_RAY_TRANSMIT = (1 << 2),
275   PATH_RAY_DIFFUSE = (1 << 3),
276   PATH_RAY_GLOSSY = (1 << 4),
277   PATH_RAY_SINGULAR = (1 << 5),
278   PATH_RAY_TRANSPARENT = (1 << 6),
279 
280   /* Shadow ray visibility. */
281   PATH_RAY_SHADOW_OPAQUE_NON_CATCHER = (1 << 7),
282   PATH_RAY_SHADOW_OPAQUE_CATCHER = (1 << 8),
283   PATH_RAY_SHADOW_OPAQUE = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER | PATH_RAY_SHADOW_OPAQUE_CATCHER),
284   PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER = (1 << 9),
285   PATH_RAY_SHADOW_TRANSPARENT_CATCHER = (1 << 10),
286   PATH_RAY_SHADOW_TRANSPARENT = (PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER |
287                                  PATH_RAY_SHADOW_TRANSPARENT_CATCHER),
288   PATH_RAY_SHADOW_NON_CATCHER = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER |
289                                  PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER),
290   PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE | PATH_RAY_SHADOW_TRANSPARENT),
291 
292   /* Unused, free to reuse. */
293   PATH_RAY_UNUSED = (1 << 11),
294 
295   /* Ray visibility for volume scattering. */
296   PATH_RAY_VOLUME_SCATTER = (1 << 12),
297 
298   /* Special flag to tag unaligned BVH nodes. */
299   PATH_RAY_NODE_UNALIGNED = (1 << 13),
300 
301   PATH_RAY_ALL_VISIBILITY = ((1 << 14) - 1),
302 
303   /* Don't apply multiple importance sampling weights to emission from
304    * lamp or surface hits, because they were not direct light sampled. */
305   PATH_RAY_MIS_SKIP = (1 << 14),
306   /* Diffuse bounce earlier in the path, skip SSS to improve performance
307    * and avoid branching twice with disk sampling SSS. */
308   PATH_RAY_DIFFUSE_ANCESTOR = (1 << 15),
309   /* Single pass has been written. */
310   PATH_RAY_SINGLE_PASS_DONE = (1 << 16),
311   /* Ray is behind a shadow catcher .*/
312   PATH_RAY_SHADOW_CATCHER = (1 << 17),
313   /* Store shadow data for shadow catcher or denoising. */
314   PATH_RAY_STORE_SHADOW_INFO = (1 << 18),
315   /* Zero background alpha, for camera or transparent glass rays. */
316   PATH_RAY_TRANSPARENT_BACKGROUND = (1 << 19),
317   /* Terminate ray immediately at next bounce. */
318   PATH_RAY_TERMINATE_IMMEDIATE = (1 << 20),
319   /* Ray is to be terminated, but continue with transparent bounces and
320    * emission as long as we encounter them. This is required to make the
321    * MIS between direct and indirect light rays match, as shadow rays go
322    * through transparent surfaces to reach emission too. */
323   PATH_RAY_TERMINATE_AFTER_TRANSPARENT = (1 << 21),
324   /* Ray is to be terminated. */
325   PATH_RAY_TERMINATE = (PATH_RAY_TERMINATE_IMMEDIATE | PATH_RAY_TERMINATE_AFTER_TRANSPARENT),
326   /* Path and shader is being evaluated for direct lighting emission. */
327   PATH_RAY_EMISSION = (1 << 22)
328 };
329 
330 /* Closure Label */
331 
332 typedef enum ClosureLabel {
333   LABEL_NONE = 0,
334   LABEL_TRANSMIT = 1,
335   LABEL_REFLECT = 2,
336   LABEL_DIFFUSE = 4,
337   LABEL_GLOSSY = 8,
338   LABEL_SINGULAR = 16,
339   LABEL_TRANSPARENT = 32,
340   LABEL_VOLUME_SCATTER = 64,
341   LABEL_TRANSMIT_TRANSPARENT = 128,
342 } ClosureLabel;
343 
344 /* Render Passes */
345 
346 #define PASS_NAME_JOIN(a, b) a##_##b
347 #define PASSMASK(pass) (1 << ((PASS_NAME_JOIN(PASS, pass)) % 32))
348 
349 #define PASSMASK_COMPONENT(comp) \
350   (PASSMASK(PASS_NAME_JOIN(comp, DIRECT)) | PASSMASK(PASS_NAME_JOIN(comp, INDIRECT)) | \
351    PASSMASK(PASS_NAME_JOIN(comp, COLOR)))
352 
353 typedef enum PassType {
354   PASS_NONE = 0,
355 
356   /* Main passes */
357   PASS_COMBINED = 1,
358   PASS_DEPTH,
359   PASS_NORMAL,
360   PASS_UV,
361   PASS_OBJECT_ID,
362   PASS_MATERIAL_ID,
363   PASS_MOTION,
364   PASS_MOTION_WEIGHT,
365 #ifdef __KERNEL_DEBUG__
366   PASS_BVH_TRAVERSED_NODES,
367   PASS_BVH_TRAVERSED_INSTANCES,
368   PASS_BVH_INTERSECTIONS,
369   PASS_RAY_BOUNCES,
370 #endif
371   PASS_RENDER_TIME,
372   PASS_CRYPTOMATTE,
373   PASS_AOV_COLOR,
374   PASS_AOV_VALUE,
375   PASS_ADAPTIVE_AUX_BUFFER,
376   PASS_SAMPLE_COUNT,
377   PASS_CATEGORY_MAIN_END = 31,
378 
379   PASS_MIST = 32,
380   PASS_EMISSION,
381   PASS_BACKGROUND,
382   PASS_AO,
383   PASS_SHADOW,
384   PASS_LIGHT, /* no real pass, used to force use_light_pass */
385   PASS_DIFFUSE_DIRECT,
386   PASS_DIFFUSE_INDIRECT,
387   PASS_DIFFUSE_COLOR,
388   PASS_GLOSSY_DIRECT,
389   PASS_GLOSSY_INDIRECT,
390   PASS_GLOSSY_COLOR,
391   PASS_TRANSMISSION_DIRECT,
392   PASS_TRANSMISSION_INDIRECT,
393   PASS_TRANSMISSION_COLOR,
394   PASS_VOLUME_DIRECT = 50,
395   PASS_VOLUME_INDIRECT,
396   /* No Scatter color since it's tricky to define what it would even mean. */
397   PASS_CATEGORY_LIGHT_END = 63,
398 
399   PASS_BAKE_PRIMITIVE,
400   PASS_BAKE_DIFFERENTIAL,
401   PASS_CATEGORY_BAKE_END = 95
402 } PassType;
403 
404 #define PASS_ANY (~0)
405 
406 typedef enum CryptomatteType {
407   CRYPT_NONE = 0,
408   CRYPT_OBJECT = (1 << 0),
409   CRYPT_MATERIAL = (1 << 1),
410   CRYPT_ASSET = (1 << 2),
411   CRYPT_ACCURATE = (1 << 3),
412 } CryptomatteType;
413 
414 typedef enum DenoisingPassOffsets {
415   DENOISING_PASS_NORMAL = 0,
416   DENOISING_PASS_NORMAL_VAR = 3,
417   DENOISING_PASS_ALBEDO = 6,
418   DENOISING_PASS_ALBEDO_VAR = 9,
419   DENOISING_PASS_DEPTH = 12,
420   DENOISING_PASS_DEPTH_VAR = 13,
421   DENOISING_PASS_SHADOW_A = 14,
422   DENOISING_PASS_SHADOW_B = 17,
423   DENOISING_PASS_COLOR = 20,
424   DENOISING_PASS_COLOR_VAR = 23,
425   DENOISING_PASS_CLEAN = 26,
426 
427   DENOISING_PASS_PREFILTERED_DEPTH = 0,
428   DENOISING_PASS_PREFILTERED_NORMAL = 1,
429   DENOISING_PASS_PREFILTERED_SHADOWING = 4,
430   DENOISING_PASS_PREFILTERED_ALBEDO = 5,
431   DENOISING_PASS_PREFILTERED_COLOR = 8,
432   DENOISING_PASS_PREFILTERED_VARIANCE = 11,
433   DENOISING_PASS_PREFILTERED_INTENSITY = 14,
434 
435   DENOISING_PASS_SIZE_BASE = 26,
436   DENOISING_PASS_SIZE_CLEAN = 3,
437   DENOISING_PASS_SIZE_PREFILTERED = 15,
438 } DenoisingPassOffsets;
439 
440 typedef enum eBakePassFilter {
441   BAKE_FILTER_NONE = 0,
442   BAKE_FILTER_DIRECT = (1 << 0),
443   BAKE_FILTER_INDIRECT = (1 << 1),
444   BAKE_FILTER_COLOR = (1 << 2),
445   BAKE_FILTER_DIFFUSE = (1 << 3),
446   BAKE_FILTER_GLOSSY = (1 << 4),
447   BAKE_FILTER_TRANSMISSION = (1 << 5),
448   BAKE_FILTER_EMISSION = (1 << 6),
449   BAKE_FILTER_AO = (1 << 7),
450 } eBakePassFilter;
451 
452 typedef enum BakePassFilterCombos {
453   BAKE_FILTER_COMBINED = (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE |
454                           BAKE_FILTER_GLOSSY | BAKE_FILTER_TRANSMISSION | BAKE_FILTER_EMISSION |
455                           BAKE_FILTER_AO),
456   BAKE_FILTER_DIFFUSE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_DIFFUSE),
457   BAKE_FILTER_GLOSSY_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_GLOSSY),
458   BAKE_FILTER_TRANSMISSION_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_TRANSMISSION),
459   BAKE_FILTER_DIFFUSE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE),
460   BAKE_FILTER_GLOSSY_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_GLOSSY),
461   BAKE_FILTER_TRANSMISSION_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_TRANSMISSION),
462 } BakePassFilterCombos;
463 
464 typedef enum DenoiseFlag {
465   DENOISING_CLEAN_DIFFUSE_DIR = (1 << 0),
466   DENOISING_CLEAN_DIFFUSE_IND = (1 << 1),
467   DENOISING_CLEAN_GLOSSY_DIR = (1 << 2),
468   DENOISING_CLEAN_GLOSSY_IND = (1 << 3),
469   DENOISING_CLEAN_TRANSMISSION_DIR = (1 << 4),
470   DENOISING_CLEAN_TRANSMISSION_IND = (1 << 5),
471   DENOISING_CLEAN_ALL_PASSES = (1 << 6) - 1,
472 } DenoiseFlag;
473 
474 #ifdef __KERNEL_DEBUG__
475 /* NOTE: This is a runtime-only struct, alignment is not
476  * really important here.
477  */
478 typedef struct DebugData {
479   int num_bvh_traversed_nodes;
480   int num_bvh_traversed_instances;
481   int num_bvh_intersections;
482   int num_ray_bounces;
483 } DebugData;
484 #endif
485 
486 typedef ccl_addr_space struct PathRadianceState {
487 #ifdef __PASSES__
488   float3 diffuse;
489   float3 glossy;
490   float3 transmission;
491   float3 volume;
492 
493   float3 direct;
494 #endif
495 } PathRadianceState;
496 
497 typedef ccl_addr_space struct PathRadiance {
498 #ifdef __PASSES__
499   int use_light_pass;
500 #endif
501 
502   float transparent;
503   float3 emission;
504 #ifdef __PASSES__
505   float3 background;
506   float3 ao;
507 
508   float3 indirect;
509   float3 direct_emission;
510 
511   float3 color_diffuse;
512   float3 color_glossy;
513   float3 color_transmission;
514 
515   float3 direct_diffuse;
516   float3 direct_glossy;
517   float3 direct_transmission;
518   float3 direct_volume;
519 
520   float3 indirect_diffuse;
521   float3 indirect_glossy;
522   float3 indirect_transmission;
523   float3 indirect_volume;
524 
525   float3 shadow;
526   float mist;
527 #endif
528 
529   struct PathRadianceState state;
530 
531 #ifdef __SHADOW_TRICKS__
532   /* Total light reachable across the path, ignoring shadow blocked queries. */
533   float3 path_total;
534   /* Total light reachable across the path with shadow blocked queries
535    * applied here.
536    *
537    * Dividing this figure by path_total will give estimate of shadow pass.
538    */
539   float3 path_total_shaded;
540 
541   /* Color of the background on which shadow is alpha-overed. */
542   float3 shadow_background_color;
543 
544   /* Path radiance sum and throughput at the moment when ray hits shadow
545    * catcher object.
546    */
547   float shadow_throughput;
548 
549   /* Accumulated transparency along the path after shadow catcher bounce. */
550   float shadow_transparency;
551 
552   /* Indicate if any shadow catcher data is set. */
553   int has_shadow_catcher;
554 #endif
555 
556 #ifdef __DENOISING_FEATURES__
557   float3 denoising_normal;
558   float3 denoising_albedo;
559   float denoising_depth;
560 #endif /* __DENOISING_FEATURES__ */
561 
562 #ifdef __KERNEL_DEBUG__
563   DebugData debug_data;
564 #endif /* __KERNEL_DEBUG__ */
565 } PathRadiance;
566 
567 typedef struct BsdfEval {
568 #ifdef __PASSES__
569   int use_light_pass;
570 #endif
571 
572   float3 diffuse;
573 #ifdef __PASSES__
574   float3 glossy;
575   float3 transmission;
576   float3 transparent;
577   float3 volume;
578 #endif
579 #ifdef __SHADOW_TRICKS__
580   float3 sum_no_mis;
581 #endif
582 } BsdfEval;
583 
584 /* Shader Flag */
585 
586 typedef enum ShaderFlag {
587   SHADER_SMOOTH_NORMAL = (1 << 31),
588   SHADER_CAST_SHADOW = (1 << 30),
589   SHADER_AREA_LIGHT = (1 << 29),
590   SHADER_USE_MIS = (1 << 28),
591   SHADER_EXCLUDE_DIFFUSE = (1 << 27),
592   SHADER_EXCLUDE_GLOSSY = (1 << 26),
593   SHADER_EXCLUDE_TRANSMIT = (1 << 25),
594   SHADER_EXCLUDE_CAMERA = (1 << 24),
595   SHADER_EXCLUDE_SCATTER = (1 << 23),
596   SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE | SHADER_EXCLUDE_GLOSSY | SHADER_EXCLUDE_TRANSMIT |
597                         SHADER_EXCLUDE_CAMERA | SHADER_EXCLUDE_SCATTER),
598 
599   SHADER_MASK = ~(SHADER_SMOOTH_NORMAL | SHADER_CAST_SHADOW | SHADER_AREA_LIGHT | SHADER_USE_MIS |
600                   SHADER_EXCLUDE_ANY)
601 } ShaderFlag;
602 
603 /* Light Type */
604 
605 typedef enum LightType {
606   LIGHT_POINT,
607   LIGHT_DISTANT,
608   LIGHT_BACKGROUND,
609   LIGHT_AREA,
610   LIGHT_SPOT,
611   LIGHT_TRIANGLE
612 } LightType;
613 
614 /* Camera Type */
615 
616 enum CameraType { CAMERA_PERSPECTIVE, CAMERA_ORTHOGRAPHIC, CAMERA_PANORAMA };
617 
618 /* Panorama Type */
619 
620 enum PanoramaType {
621   PANORAMA_EQUIRECTANGULAR = 0,
622   PANORAMA_FISHEYE_EQUIDISTANT = 1,
623   PANORAMA_FISHEYE_EQUISOLID = 2,
624   PANORAMA_MIRRORBALL = 3,
625 
626   PANORAMA_NUM_TYPES,
627 };
628 
629 /* Differential */
630 
631 typedef struct differential3 {
632   float3 dx;
633   float3 dy;
634 } differential3;
635 
636 typedef struct differential {
637   float dx;
638   float dy;
639 } differential;
640 
641 /* Ray */
642 
643 typedef struct Ray {
644 /* TODO(sergey): This is only needed because current AMD
645  * compiler has hard time building the kernel with this
646  * reshuffle. And at the same time reshuffle will cause
647  * less optimal CPU code in certain places.
648  *
649  * We'll get rid of this nasty exception once AMD compiler
650  * is fixed.
651  */
652 #ifndef __KERNEL_OPENCL_AMD__
653   float3 P;   /* origin */
654   float3 D;   /* direction */
655   float t;    /* length of the ray */
656   float time; /* time (for motion blur) */
657 #else
658   float t;    /* length of the ray */
659   float time; /* time (for motion blur) */
660   float3 P;   /* origin */
661   float3 D;   /* direction */
662 #endif
663 
664 #ifdef __RAY_DIFFERENTIALS__
665   differential3 dP;
666   differential3 dD;
667 #endif
668 } Ray;
669 
670 /* Intersection */
671 
672 typedef struct Intersection {
673 #ifdef __EMBREE__
674   float3 Ng;
675 #endif
676   float t, u, v;
677   int prim;
678   int object;
679   int type;
680 
681 #ifdef __KERNEL_DEBUG__
682   int num_traversed_nodes;
683   int num_traversed_instances;
684   int num_intersections;
685 #endif
686 } Intersection;
687 
688 /* Primitives */
689 
690 typedef enum PrimitiveType {
691   PRIMITIVE_NONE = 0,
692   PRIMITIVE_TRIANGLE = (1 << 0),
693   PRIMITIVE_MOTION_TRIANGLE = (1 << 1),
694   PRIMITIVE_CURVE_THICK = (1 << 2),
695   PRIMITIVE_MOTION_CURVE_THICK = (1 << 3),
696   PRIMITIVE_CURVE_RIBBON = (1 << 4),
697   PRIMITIVE_MOTION_CURVE_RIBBON = (1 << 5),
698   /* Lamp primitive is not included below on purpose,
699    * since it is no real traceable primitive.
700    */
701   PRIMITIVE_LAMP = (1 << 6),
702 
703   PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE | PRIMITIVE_MOTION_TRIANGLE),
704   PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE_THICK | PRIMITIVE_MOTION_CURVE_THICK |
705                          PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON),
706   PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE | PRIMITIVE_MOTION_CURVE_THICK |
707                           PRIMITIVE_MOTION_CURVE_RIBBON),
708   PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE | PRIMITIVE_ALL_CURVE),
709 
710   /* Total number of different traceable primitives.
711    * NOTE: This is an actual value, not a bitflag.
712    */
713   PRIMITIVE_NUM_TOTAL = 6,
714 } PrimitiveType;
715 
716 #define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << PRIMITIVE_NUM_TOTAL) | (type))
717 #define PRIMITIVE_UNPACK_SEGMENT(type) (type >> PRIMITIVE_NUM_TOTAL)
718 
719 typedef enum CurveShapeType {
720   CURVE_RIBBON = 0,
721   CURVE_THICK = 1,
722 
723   CURVE_NUM_SHAPE_TYPES,
724 } CurveShapeType;
725 
726 /* Attributes */
727 
728 typedef enum AttributePrimitive {
729   ATTR_PRIM_GEOMETRY = 0,
730   ATTR_PRIM_SUBD,
731 
732   ATTR_PRIM_TYPES
733 } AttributePrimitive;
734 
735 typedef enum AttributeElement {
736   ATTR_ELEMENT_NONE,
737   ATTR_ELEMENT_OBJECT,
738   ATTR_ELEMENT_MESH,
739   ATTR_ELEMENT_FACE,
740   ATTR_ELEMENT_VERTEX,
741   ATTR_ELEMENT_VERTEX_MOTION,
742   ATTR_ELEMENT_CORNER,
743   ATTR_ELEMENT_CORNER_BYTE,
744   ATTR_ELEMENT_CURVE,
745   ATTR_ELEMENT_CURVE_KEY,
746   ATTR_ELEMENT_CURVE_KEY_MOTION,
747   ATTR_ELEMENT_VOXEL
748 } AttributeElement;
749 
750 typedef enum AttributeStandard {
751   ATTR_STD_NONE = 0,
752   ATTR_STD_VERTEX_NORMAL,
753   ATTR_STD_FACE_NORMAL,
754   ATTR_STD_UV,
755   ATTR_STD_UV_TANGENT,
756   ATTR_STD_UV_TANGENT_SIGN,
757   ATTR_STD_VERTEX_COLOR,
758   ATTR_STD_GENERATED,
759   ATTR_STD_GENERATED_TRANSFORM,
760   ATTR_STD_POSITION_UNDEFORMED,
761   ATTR_STD_POSITION_UNDISPLACED,
762   ATTR_STD_MOTION_VERTEX_POSITION,
763   ATTR_STD_MOTION_VERTEX_NORMAL,
764   ATTR_STD_PARTICLE,
765   ATTR_STD_CURVE_INTERCEPT,
766   ATTR_STD_CURVE_RANDOM,
767   ATTR_STD_PTEX_FACE_ID,
768   ATTR_STD_PTEX_UV,
769   ATTR_STD_VOLUME_DENSITY,
770   ATTR_STD_VOLUME_COLOR,
771   ATTR_STD_VOLUME_FLAME,
772   ATTR_STD_VOLUME_HEAT,
773   ATTR_STD_VOLUME_TEMPERATURE,
774   ATTR_STD_VOLUME_VELOCITY,
775   ATTR_STD_POINTINESS,
776   ATTR_STD_RANDOM_PER_ISLAND,
777   ATTR_STD_NUM,
778 
779   ATTR_STD_NOT_FOUND = ~0
780 } AttributeStandard;
781 
782 typedef enum AttributeFlag {
783   ATTR_FINAL_SIZE = (1 << 0),
784   ATTR_SUBDIVIDED = (1 << 1),
785 } AttributeFlag;
786 
787 typedef struct AttributeDescriptor {
788   AttributeElement element;
789   NodeAttributeType type;
790   uint flags; /* see enum AttributeFlag */
791   int offset;
792 } AttributeDescriptor;
793 
794 /* Closure data */
795 
796 #ifdef __MULTI_CLOSURE__
797 #  ifdef __SPLIT_KERNEL__
798 #    define MAX_CLOSURE 1
799 #  else
800 #    ifndef __MAX_CLOSURE__
801 #      define MAX_CLOSURE 64
802 #    else
803 #      define MAX_CLOSURE __MAX_CLOSURE__
804 #    endif
805 #  endif
806 #else
807 #  define MAX_CLOSURE 1
808 #endif
809 
810 /* This struct is the base class for all closures. The common members are
811  * duplicated in all derived classes since we don't have C++ in the kernel
812  * yet, and because it lets us lay out the members to minimize padding. The
813  * weight member is located at the beginning of the struct for this reason.
814  *
815  * ShaderClosure has a fixed size, and any extra space must be allocated
816  * with closure_alloc_extra().
817  *
818  * We pad the struct to align to 16 bytes. All shader closures are assumed
819  * to fit in this struct size. CPU sizes are a bit larger because float3 is
820  * padded to be 16 bytes, while it's only 12 bytes on the GPU. */
821 
822 #define SHADER_CLOSURE_BASE \
823   float3 weight; \
824   ClosureType type; \
825   float sample_weight; \
826   float3 N
827 
828 typedef ccl_addr_space struct ccl_align(16) ShaderClosure
829 {
830   SHADER_CLOSURE_BASE;
831 
832 #ifdef __KERNEL_CPU__
833   float pad[2];
834 #endif
835   float data[10];
836 }
837 ShaderClosure;
838 
839 /* Shader Data
840  *
841  * Main shader state at a point on the surface or in a volume. All coordinates
842  * are in world space.
843  */
844 
845 enum ShaderDataFlag {
846   /* Runtime flags. */
847 
848   /* Set when ray hits backside of surface. */
849   SD_BACKFACING = (1 << 0),
850   /* Shader has non-zero emission. */
851   SD_EMISSION = (1 << 1),
852   /* Shader has BSDF closure. */
853   SD_BSDF = (1 << 2),
854   /* Shader has non-singular BSDF closure. */
855   SD_BSDF_HAS_EVAL = (1 << 3),
856   /* Shader has BSSRDF closure. */
857   SD_BSSRDF = (1 << 4),
858   /* Shader has holdout closure. */
859   SD_HOLDOUT = (1 << 5),
860   /* Shader has non-zero volume extinction. */
861   SD_EXTINCTION = (1 << 6),
862   /* Shader has have volume phase (scatter) closure. */
863   SD_SCATTER = (1 << 7),
864   /* Shader has transparent closure. */
865   SD_TRANSPARENT = (1 << 9),
866   /* BSDF requires LCG for evaluation. */
867   SD_BSDF_NEEDS_LCG = (1 << 10),
868 
869   SD_CLOSURE_FLAGS = (SD_EMISSION | SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSSRDF | SD_HOLDOUT |
870                       SD_EXTINCTION | SD_SCATTER | SD_BSDF_NEEDS_LCG),
871 
872   /* Shader flags. */
873 
874   /* direct light sample */
875   SD_USE_MIS = (1 << 16),
876   /* Has transparent shadow. */
877   SD_HAS_TRANSPARENT_SHADOW = (1 << 17),
878   /* Has volume shader. */
879   SD_HAS_VOLUME = (1 << 18),
880   /* Has only volume shader, no surface. */
881   SD_HAS_ONLY_VOLUME = (1 << 19),
882   /* Has heterogeneous volume. */
883   SD_HETEROGENEOUS_VOLUME = (1 << 20),
884   /* BSSRDF normal uses bump. */
885   SD_HAS_BSSRDF_BUMP = (1 << 21),
886   /* Use equiangular volume sampling */
887   SD_VOLUME_EQUIANGULAR = (1 << 22),
888   /* Use multiple importance volume sampling. */
889   SD_VOLUME_MIS = (1 << 23),
890   /* Use cubic interpolation for voxels. */
891   SD_VOLUME_CUBIC = (1 << 24),
892   /* Has data connected to the displacement input or uses bump map. */
893   SD_HAS_BUMP = (1 << 25),
894   /* Has true displacement. */
895   SD_HAS_DISPLACEMENT = (1 << 26),
896   /* Has constant emission (value stored in __shaders) */
897   SD_HAS_CONSTANT_EMISSION = (1 << 27),
898   /* Needs to access attributes for volume rendering */
899   SD_NEED_VOLUME_ATTRIBUTES = (1 << 28),
900 
901   SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME |
902                      SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR |
903                      SD_VOLUME_MIS | SD_VOLUME_CUBIC | SD_HAS_BUMP | SD_HAS_DISPLACEMENT |
904                      SD_HAS_CONSTANT_EMISSION | SD_NEED_VOLUME_ATTRIBUTES)
905 };
906 
907 /* Object flags. */
908 enum ShaderDataObjectFlag {
909   /* Holdout for camera rays. */
910   SD_OBJECT_HOLDOUT_MASK = (1 << 0),
911   /* Has object motion blur. */
912   SD_OBJECT_MOTION = (1 << 1),
913   /* Vertices have transform applied. */
914   SD_OBJECT_TRANSFORM_APPLIED = (1 << 2),
915   /* Vertices have negative scale applied. */
916   SD_OBJECT_NEGATIVE_SCALE_APPLIED = (1 << 3),
917   /* Object has a volume shader. */
918   SD_OBJECT_HAS_VOLUME = (1 << 4),
919   /* Object intersects AABB of an object with volume shader. */
920   SD_OBJECT_INTERSECTS_VOLUME = (1 << 5),
921   /* Has position for motion vertices. */
922   SD_OBJECT_HAS_VERTEX_MOTION = (1 << 6),
923   /* object is used to catch shadows */
924   SD_OBJECT_SHADOW_CATCHER = (1 << 7),
925   /* object has volume attributes */
926   SD_OBJECT_HAS_VOLUME_ATTRIBUTES = (1 << 8),
927 
928   SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK | SD_OBJECT_MOTION | SD_OBJECT_TRANSFORM_APPLIED |
929                      SD_OBJECT_NEGATIVE_SCALE_APPLIED | SD_OBJECT_HAS_VOLUME |
930                      SD_OBJECT_INTERSECTS_VOLUME | SD_OBJECT_SHADOW_CATCHER |
931                      SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
932 };
933 
934 typedef ccl_addr_space struct ccl_align(16) ShaderData
935 {
936   /* position */
937   float3 P;
938   /* smooth normal for shading */
939   float3 N;
940   /* true geometric normal */
941   float3 Ng;
942   /* view/incoming direction */
943   float3 I;
944   /* shader id */
945   int shader;
946   /* booleans describing shader, see ShaderDataFlag */
947   int flag;
948   /* booleans describing object of the shader, see ShaderDataObjectFlag */
949   int object_flag;
950 
951   /* primitive id if there is one, ~0 otherwise */
952   int prim;
953 
954   /* combined type and curve segment for hair */
955   int type;
956 
957   /* parametric coordinates
958    * - barycentric weights for triangles */
959   float u;
960   float v;
961   /* object id if there is one, ~0 otherwise */
962   int object;
963   /* lamp id if there is one, ~0 otherwise */
964   int lamp;
965 
966   /* motion blur sample time */
967   float time;
968 
969   /* length of the ray being shaded */
970   float ray_length;
971 
972 #ifdef __RAY_DIFFERENTIALS__
973   /* differential of P. these are orthogonal to Ng, not N */
974   differential3 dP;
975   /* differential of I */
976   differential3 dI;
977   /* differential of u, v */
978   differential du;
979   differential dv;
980 #endif
981 #ifdef __DPDU__
982   /* differential of P w.r.t. parametric coordinates. note that dPdu is
983    * not readily suitable as a tangent for shading on triangles. */
984   float3 dPdu;
985   float3 dPdv;
986 #endif
987 
988 #ifdef __OBJECT_MOTION__
989   /* object <-> world space transformations, cached to avoid
990    * re-interpolating them constantly for shading */
991   Transform ob_tfm;
992   Transform ob_itfm;
993 #endif
994 
995   /* ray start position, only set for backgrounds */
996   float3 ray_P;
997   differential3 ray_dP;
998 
999 #ifdef __OSL__
1000   struct KernelGlobals *osl_globals;
1001   struct PathState *osl_path_state;
1002 #endif
1003 
1004   /* LCG state for closures that require additional random numbers. */
1005   uint lcg_state;
1006 
1007   /* Closure data, we store a fixed array of closures */
1008   int num_closure;
1009   int num_closure_left;
1010   float randb_closure;
1011   float3 svm_closure_weight;
1012 
1013   /* Closure weights summed directly, so we can evaluate
1014    * emission and shadow transparency with MAX_CLOSURE 0. */
1015   float3 closure_emission_background;
1016   float3 closure_transparent_extinction;
1017 
1018   /* At the end so we can adjust size in ShaderDataTinyStorage. */
1019   struct ShaderClosure closure[MAX_CLOSURE];
1020 }
1021 ShaderData;
1022 
1023 /* ShaderDataTinyStorage needs the same alignment as ShaderData, or else
1024  * the pointer cast in AS_SHADER_DATA invokes undefined behavior. */
1025 typedef ccl_addr_space struct ccl_align(16) ShaderDataTinyStorage
1026 {
1027   char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
1028 }
1029 ShaderDataTinyStorage;
1030 #define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData *)shader_data_tiny_storage)
1031 
1032 /* Path State */
1033 
1034 #ifdef __VOLUME__
1035 typedef struct VolumeStack {
1036   int object;
1037   int shader;
1038 } VolumeStack;
1039 #endif
1040 
1041 typedef struct PathState {
1042   /* see enum PathRayFlag */
1043   int flag;
1044 
1045   /* random number generator state */
1046   uint rng_hash;       /* per pixel hash */
1047   int rng_offset;      /* dimension offset */
1048   int sample;          /* path sample number */
1049   int num_samples;     /* total number of times this path will be sampled */
1050   float branch_factor; /* number of branches in indirect paths */
1051 
1052   /* bounce counting */
1053   int bounce;
1054   int diffuse_bounce;
1055   int glossy_bounce;
1056   int transmission_bounce;
1057   int transparent_bounce;
1058 
1059 #ifdef __DENOISING_FEATURES__
1060   float denoising_feature_weight;
1061   float3 denoising_feature_throughput;
1062 #endif /* __DENOISING_FEATURES__ */
1063 
1064   /* multiple importance sampling */
1065   float min_ray_pdf; /* smallest bounce pdf over entire path up to now */
1066   float ray_pdf;     /* last bounce pdf */
1067 #ifdef __LAMP_MIS__
1068   float ray_t; /* accumulated distance through transparent surfaces */
1069 #endif
1070 
1071   /* volume rendering */
1072 #ifdef __VOLUME__
1073   int volume_bounce;
1074   int volume_bounds_bounce;
1075   VolumeStack volume_stack[VOLUME_STACK_SIZE];
1076 #endif
1077 } PathState;
1078 
1079 #ifdef __VOLUME__
1080 typedef struct VolumeState {
1081 #  ifdef __SPLIT_KERNEL__
1082 #  else
1083   PathState ps;
1084 #  endif
1085 } VolumeState;
1086 #endif
1087 
1088 /* Struct to gather multiple nearby intersections. */
1089 typedef struct LocalIntersection {
1090   Ray ray;
1091   float3 weight[LOCAL_MAX_HITS];
1092 
1093   int num_hits;
1094   struct Intersection hits[LOCAL_MAX_HITS];
1095   float3 Ng[LOCAL_MAX_HITS];
1096 } LocalIntersection;
1097 
1098 /* Subsurface */
1099 
1100 /* Struct to gather SSS indirect rays and delay tracing them. */
1101 typedef struct SubsurfaceIndirectRays {
1102   PathState state[BSSRDF_MAX_HITS];
1103 
1104   int num_rays;
1105 
1106   struct Ray rays[BSSRDF_MAX_HITS];
1107   float3 throughputs[BSSRDF_MAX_HITS];
1108   struct PathRadianceState L_state[BSSRDF_MAX_HITS];
1109 } SubsurfaceIndirectRays;
1110 static_assert(BSSRDF_MAX_HITS <= LOCAL_MAX_HITS, "BSSRDF hits too high.");
1111 
1112 /* Constant Kernel Data
1113  *
1114  * These structs are passed from CPU to various devices, and the struct layout
1115  * must match exactly. Structs are padded to ensure 16 byte alignment, and we
1116  * do not use float3 because its size may not be the same on all devices. */
1117 
1118 typedef struct KernelCamera {
1119   /* type */
1120   int type;
1121 
1122   /* panorama */
1123   int panorama_type;
1124   float fisheye_fov;
1125   float fisheye_lens;
1126   float4 equirectangular_range;
1127 
1128   /* stereo */
1129   float interocular_offset;
1130   float convergence_distance;
1131   float pole_merge_angle_from;
1132   float pole_merge_angle_to;
1133 
1134   /* matrices */
1135   Transform cameratoworld;
1136   ProjectionTransform rastertocamera;
1137 
1138   /* differentials */
1139   float4 dx;
1140   float4 dy;
1141 
1142   /* depth of field */
1143   float aperturesize;
1144   float blades;
1145   float bladesrotation;
1146   float focaldistance;
1147 
1148   /* motion blur */
1149   float shuttertime;
1150   int num_motion_steps, have_perspective_motion;
1151 
1152   /* clipping */
1153   float nearclip;
1154   float cliplength;
1155 
1156   /* sensor size */
1157   float sensorwidth;
1158   float sensorheight;
1159 
1160   /* render size */
1161   float width, height;
1162   int resolution;
1163 
1164   /* anamorphic lens bokeh */
1165   float inv_aperture_ratio;
1166 
1167   int is_inside_volume;
1168 
1169   /* more matrices */
1170   ProjectionTransform screentoworld;
1171   ProjectionTransform rastertoworld;
1172   ProjectionTransform ndctoworld;
1173   ProjectionTransform worldtoscreen;
1174   ProjectionTransform worldtoraster;
1175   ProjectionTransform worldtondc;
1176   Transform worldtocamera;
1177 
1178   /* Stores changes in the projection matrix. Use for camera zoom motion
1179    * blur and motion pass output for perspective camera. */
1180   ProjectionTransform perspective_pre;
1181   ProjectionTransform perspective_post;
1182 
1183   /* Transforms for motion pass. */
1184   Transform motion_pass_pre;
1185   Transform motion_pass_post;
1186 
1187   int shutter_table_offset;
1188 
1189   /* Rolling shutter */
1190   int rolling_shutter_type;
1191   float rolling_shutter_duration;
1192 
1193   int pad;
1194 } KernelCamera;
1195 static_assert_align(KernelCamera, 16);
1196 
1197 typedef struct KernelFilm {
1198   float exposure;
1199   int pass_flag;
1200 
1201   int light_pass_flag;
1202   int pass_stride;
1203   int use_light_pass;
1204 
1205   int pass_combined;
1206   int pass_depth;
1207   int pass_normal;
1208   int pass_motion;
1209 
1210   int pass_motion_weight;
1211   int pass_uv;
1212   int pass_object_id;
1213   int pass_material_id;
1214 
1215   int pass_diffuse_color;
1216   int pass_glossy_color;
1217   int pass_transmission_color;
1218 
1219   int pass_diffuse_indirect;
1220   int pass_glossy_indirect;
1221   int pass_transmission_indirect;
1222   int pass_volume_indirect;
1223 
1224   int pass_diffuse_direct;
1225   int pass_glossy_direct;
1226   int pass_transmission_direct;
1227   int pass_volume_direct;
1228 
1229   int pass_emission;
1230   int pass_background;
1231   int pass_ao;
1232   float pass_alpha_threshold;
1233 
1234   int pass_shadow;
1235   float pass_shadow_scale;
1236   int filter_table_offset;
1237   int cryptomatte_passes;
1238   int cryptomatte_depth;
1239   int pass_cryptomatte;
1240 
1241   int pass_adaptive_aux_buffer;
1242   int pass_sample_count;
1243 
1244   int pass_mist;
1245   float mist_start;
1246   float mist_inv_depth;
1247   float mist_falloff;
1248 
1249   int pass_denoising_data;
1250   int pass_denoising_clean;
1251   int denoising_flags;
1252 
1253   int pass_aov_color;
1254   int pass_aov_value;
1255   int pass_aov_color_num;
1256   int pass_aov_value_num;
1257   int pad1, pad2, pad3;
1258 
1259   /* XYZ to rendering color space transform. float4 instead of float3 to
1260    * ensure consistent padding/alignment across devices. */
1261   float4 xyz_to_r;
1262   float4 xyz_to_g;
1263   float4 xyz_to_b;
1264   float4 rgb_to_y;
1265 
1266   int pass_bake_primitive;
1267   int pass_bake_differential;
1268   int pad;
1269 
1270 #ifdef __KERNEL_DEBUG__
1271   int pass_bvh_traversed_nodes;
1272   int pass_bvh_traversed_instances;
1273   int pass_bvh_intersections;
1274   int pass_ray_bounces;
1275 #endif
1276 
1277   /* viewport rendering options */
1278   int display_pass_stride;
1279   int display_pass_components;
1280   int display_divide_pass_stride;
1281   int use_display_exposure;
1282   int use_display_pass_alpha;
1283 
1284   int pad4, pad5, pad6;
1285 } KernelFilm;
1286 static_assert_align(KernelFilm, 16);
1287 
1288 typedef struct KernelBackground {
1289   /* only shader index */
1290   int surface_shader;
1291   int volume_shader;
1292   float volume_step_size;
1293   int transparent;
1294   float transparent_roughness_squared_threshold;
1295 
1296   /* ambient occlusion */
1297   float ao_factor;
1298   float ao_distance;
1299   float ao_bounces_factor;
1300 
1301   /* portal sampling */
1302   float portal_weight;
1303   int num_portals;
1304   int portal_offset;
1305 
1306   /* sun sampling */
1307   float sun_weight;
1308   /* xyz store direction, w the angle. float4 instead of float3 is used
1309    * to ensure consistent padding/alignment across devices. */
1310   float4 sun;
1311 
1312   /* map sampling */
1313   float map_weight;
1314   int map_res_x;
1315   int map_res_y;
1316 
1317   int use_mis;
1318 } KernelBackground;
1319 static_assert_align(KernelBackground, 16);
1320 
1321 typedef struct KernelIntegrator {
1322   /* emission */
1323   int use_direct_light;
1324   int use_ambient_occlusion;
1325   int num_distribution;
1326   int num_all_lights;
1327   float pdf_triangles;
1328   float pdf_lights;
1329   float light_inv_rr_threshold;
1330 
1331   /* bounces */
1332   int min_bounce;
1333   int max_bounce;
1334 
1335   int max_diffuse_bounce;
1336   int max_glossy_bounce;
1337   int max_transmission_bounce;
1338   int max_volume_bounce;
1339 
1340   int ao_bounces;
1341 
1342   /* transparent */
1343   int transparent_min_bounce;
1344   int transparent_max_bounce;
1345   int transparent_shadows;
1346 
1347   /* caustics */
1348   int caustics_reflective;
1349   int caustics_refractive;
1350   float filter_glossy;
1351 
1352   /* seed */
1353   int seed;
1354 
1355   /* clamp */
1356   float sample_clamp_direct;
1357   float sample_clamp_indirect;
1358 
1359   /* branched path */
1360   int branched;
1361   int volume_decoupled;
1362   int diffuse_samples;
1363   int glossy_samples;
1364   int transmission_samples;
1365   int ao_samples;
1366   int mesh_light_samples;
1367   int subsurface_samples;
1368   int sample_all_lights_direct;
1369   int sample_all_lights_indirect;
1370 
1371   /* mis */
1372   int use_lamp_mis;
1373 
1374   /* sampler */
1375   int sampling_pattern;
1376   int aa_samples;
1377   int adaptive_min_samples;
1378   int adaptive_step;
1379   int adaptive_stop_per_sample;
1380   float adaptive_threshold;
1381 
1382   /* volume render */
1383   int use_volumes;
1384   int volume_max_steps;
1385   float volume_step_rate;
1386   int volume_samples;
1387 
1388   int start_sample;
1389 
1390   int max_closures;
1391 
1392   int pad1, pad2;
1393 } KernelIntegrator;
1394 static_assert_align(KernelIntegrator, 16);
1395 
1396 typedef enum KernelBVHLayout {
1397   BVH_LAYOUT_NONE = 0,
1398 
1399   BVH_LAYOUT_BVH2 = (1 << 0),
1400   BVH_LAYOUT_EMBREE = (1 << 1),
1401   BVH_LAYOUT_OPTIX = (1 << 2),
1402 
1403   /* Default BVH layout to use for CPU. */
1404   BVH_LAYOUT_AUTO = BVH_LAYOUT_EMBREE,
1405   BVH_LAYOUT_ALL = (unsigned int)(~0u),
1406 } KernelBVHLayout;
1407 
1408 typedef struct KernelBVH {
1409   /* Own BVH */
1410   int root;
1411   int have_motion;
1412   int have_curves;
1413   int bvh_layout;
1414   int use_bvh_steps;
1415   int curve_subdivisions;
1416 
1417   /* Custom BVH */
1418 #ifdef __KERNEL_OPTIX__
1419   OptixTraversableHandle scene;
1420 #else
1421 #  ifdef __EMBREE__
1422   RTCScene scene;
1423 #    ifndef __KERNEL_64_BIT__
1424   int pad2;
1425 #    endif
1426 #  else
1427   int scene, pad2;
1428 #  endif
1429 #endif
1430 } KernelBVH;
1431 static_assert_align(KernelBVH, 16);
1432 
1433 typedef struct KernelTables {
1434   int beckmann_offset;
1435   int pad1, pad2, pad3;
1436 } KernelTables;
1437 static_assert_align(KernelTables, 16);
1438 
1439 typedef struct KernelBake {
1440   int object_index;
1441   int tri_offset;
1442   int type;
1443   int pass_filter;
1444 } KernelBake;
1445 static_assert_align(KernelBake, 16);
1446 
1447 typedef struct KernelData {
1448   KernelCamera cam;
1449   KernelFilm film;
1450   KernelBackground background;
1451   KernelIntegrator integrator;
1452   KernelBVH bvh;
1453   KernelTables tables;
1454   KernelBake bake;
1455 } KernelData;
1456 static_assert_align(KernelData, 16);
1457 
1458 /* Kernel data structures. */
1459 
1460 typedef struct KernelObject {
1461   Transform tfm;
1462   Transform itfm;
1463 
1464   float surface_area;
1465   float pass_id;
1466   float random_number;
1467   float color[3];
1468   int particle_index;
1469 
1470   float dupli_generated[3];
1471   float dupli_uv[2];
1472 
1473   int numkeys;
1474   int numsteps;
1475   int numverts;
1476 
1477   uint patch_map_offset;
1478   uint attribute_map_offset;
1479   uint motion_offset;
1480 
1481   float cryptomatte_object;
1482   float cryptomatte_asset;
1483 
1484   float shadow_terminator_offset;
1485   float pad1, pad2, pad3;
1486 } KernelObject;
1487 static_assert_align(KernelObject, 16);
1488 
1489 typedef struct KernelSpotLight {
1490   float radius;
1491   float invarea;
1492   float spot_angle;
1493   float spot_smooth;
1494   float dir[3];
1495   float pad;
1496 } KernelSpotLight;
1497 
1498 /* PointLight is SpotLight with only radius and invarea being used. */
1499 
1500 typedef struct KernelAreaLight {
1501   float axisu[3];
1502   float invarea;
1503   float axisv[3];
1504   float pad1;
1505   float dir[3];
1506   float pad2;
1507 } KernelAreaLight;
1508 
1509 typedef struct KernelDistantLight {
1510   float radius;
1511   float cosangle;
1512   float invarea;
1513   float pad;
1514 } KernelDistantLight;
1515 
1516 typedef struct KernelLight {
1517   int type;
1518   float co[3];
1519   int shader_id;
1520   int samples;
1521   float max_bounces;
1522   float random;
1523   float strength[3];
1524   float pad1;
1525   Transform tfm;
1526   Transform itfm;
1527   union {
1528     KernelSpotLight spot;
1529     KernelAreaLight area;
1530     KernelDistantLight distant;
1531   };
1532 } KernelLight;
1533 static_assert_align(KernelLight, 16);
1534 
1535 typedef struct KernelLightDistribution {
1536   float totarea;
1537   int prim;
1538   union {
1539     struct {
1540       int shader_flag;
1541       int object_id;
1542     } mesh_light;
1543     struct {
1544       float pad;
1545       float size;
1546     } lamp;
1547   };
1548 } KernelLightDistribution;
1549 static_assert_align(KernelLightDistribution, 16);
1550 
1551 typedef struct KernelParticle {
1552   int index;
1553   float age;
1554   float lifetime;
1555   float size;
1556   float4 rotation;
1557   /* Only xyz are used of the following. float4 instead of float3 are used
1558    * to ensure consistent padding/alignment across devices. */
1559   float4 location;
1560   float4 velocity;
1561   float4 angular_velocity;
1562 } KernelParticle;
1563 static_assert_align(KernelParticle, 16);
1564 
1565 typedef struct KernelShader {
1566   float constant_emission[3];
1567   float cryptomatte_id;
1568   int flags;
1569   int pass_id;
1570   int pad2, pad3;
1571 } KernelShader;
1572 static_assert_align(KernelShader, 16);
1573 
1574 /* Declarations required for split kernel */
1575 
1576 /* Macro for queues */
1577 /* Value marking queue's empty slot */
1578 #define QUEUE_EMPTY_SLOT -1
1579 
1580 /*
1581  * Queue 1 - Active rays
1582  * Queue 2 - Background queue
1583  * Queue 3 - Shadow ray cast kernel - AO
1584  * Queue 4 - Shadow ray cast kernel - direct lighting
1585  */
1586 
1587 /* Queue names */
1588 enum QueueNumber {
1589   /* All active rays and regenerated rays are enqueued here. */
1590   QUEUE_ACTIVE_AND_REGENERATED_RAYS = 0,
1591 
1592   /* All
1593    * 1. Background-hit rays,
1594    * 2. Rays that has exited path-iteration but needs to update output buffer
1595    * 3. Rays to be regenerated
1596    * are enqueued here.
1597    */
1598   QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
1599 
1600   /* All rays for which a shadow ray should be cast to determine radiance
1601    * contribution for AO are enqueued here.
1602    */
1603   QUEUE_SHADOW_RAY_CAST_AO_RAYS,
1604 
1605   /* All rays for which a shadow ray should be cast to determine radiance
1606    * contributing for direct lighting are enqueued here.
1607    */
1608   QUEUE_SHADOW_RAY_CAST_DL_RAYS,
1609 
1610   /* Rays sorted according to shader->id */
1611   QUEUE_SHADER_SORTED_RAYS,
1612 
1613 #ifdef __BRANCHED_PATH__
1614   /* All rays moving to next iteration of the indirect loop for light */
1615   QUEUE_LIGHT_INDIRECT_ITER,
1616   /* Queue of all inactive rays. These are candidates for sharing work of indirect loops */
1617   QUEUE_INACTIVE_RAYS,
1618 #  ifdef __VOLUME__
1619   /* All rays moving to next iteration of the indirect loop for volumes */
1620   QUEUE_VOLUME_INDIRECT_ITER,
1621 #  endif
1622 #  ifdef __SUBSURFACE__
1623   /* All rays moving to next iteration of the indirect loop for subsurface */
1624   QUEUE_SUBSURFACE_INDIRECT_ITER,
1625 #  endif
1626 #endif /* __BRANCHED_PATH__ */
1627 
1628   NUM_QUEUES
1629 };
1630 
1631 /* We use RAY_STATE_MASK to get ray_state */
1632 #define RAY_STATE_MASK 0x0F
1633 #define RAY_FLAG_MASK 0xF0
1634 enum RayState {
1635   RAY_INVALID = 0,
1636   /* Denotes ray is actively involved in path-iteration. */
1637   RAY_ACTIVE,
1638   /* Denotes ray has completed processing all samples and is inactive. */
1639   RAY_INACTIVE,
1640   /* Denotes ray has exited path-iteration and needs to update output buffer. */
1641   RAY_UPDATE_BUFFER,
1642   /* Denotes ray needs to skip most surface shader work. */
1643   RAY_HAS_ONLY_VOLUME,
1644   /* Donotes ray has hit background */
1645   RAY_HIT_BACKGROUND,
1646   /* Denotes ray has to be regenerated */
1647   RAY_TO_REGENERATE,
1648   /* Denotes ray has been regenerated */
1649   RAY_REGENERATED,
1650   /* Denotes ray is moving to next iteration of the branched indirect loop */
1651   RAY_LIGHT_INDIRECT_NEXT_ITER,
1652   RAY_VOLUME_INDIRECT_NEXT_ITER,
1653   RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
1654 
1655   /* Ray flags */
1656 
1657   /* Flags to denote that the ray is currently evaluating the branched indirect loop */
1658   RAY_BRANCHED_LIGHT_INDIRECT = (1 << 4),
1659   RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
1660   RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
1661   RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT |
1662                            RAY_BRANCHED_SUBSURFACE_INDIRECT),
1663 
1664   /* Ray is evaluating an iteration of an indirect loop for another thread */
1665   RAY_BRANCHED_INDIRECT_SHARED = (1 << 7),
1666 };
1667 
1668 #define ASSIGN_RAY_STATE(ray_state, ray_index, state) \
1669   (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
1670 #define IS_STATE(ray_state, ray_index, state) \
1671   ((ray_index) != QUEUE_EMPTY_SLOT && ((ray_state)[(ray_index)] & RAY_STATE_MASK) == (state))
1672 #define ADD_RAY_FLAG(ray_state, ray_index, flag) \
1673   (ray_state[ray_index] = (ray_state[ray_index] | flag))
1674 #define REMOVE_RAY_FLAG(ray_state, ray_index, flag) \
1675   (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
1676 #define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag)
1677 
1678 /* Patches */
1679 
1680 #define PATCH_MAX_CONTROL_VERTS 16
1681 
1682 /* Patch map node flags */
1683 
1684 #define PATCH_MAP_NODE_IS_SET (1 << 30)
1685 #define PATCH_MAP_NODE_IS_LEAF (1u << 31)
1686 #define PATCH_MAP_NODE_INDEX_MASK (~(PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF))
1687 
1688 /* Work Tiles */
1689 
1690 typedef struct WorkTile {
1691   uint x, y, w, h;
1692 
1693   uint start_sample;
1694   uint num_samples;
1695 
1696   int offset;
1697   uint stride;
1698 
1699   ccl_global float *buffer;
1700 } WorkTile;
1701 
1702 /* Precoumputed sample table sizes for PMJ02 sampler. */
1703 #define NUM_PMJ_SAMPLES 64 * 64
1704 #define NUM_PMJ_PATTERNS 48
1705 
1706 CCL_NAMESPACE_END
1707 
1708 #endif /*  __KERNEL_TYPES_H__ */
1709