1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 CCL_NAMESPACE_BEGIN
18 
19 /* Events for probalistic scattering */
20 
21 typedef enum VolumeIntegrateResult {
22   VOLUME_PATH_SCATTERED = 0,
23   VOLUME_PATH_ATTENUATED = 1,
24   VOLUME_PATH_MISSED = 2
25 } VolumeIntegrateResult;
26 
27 /* Volume shader properties
28  *
29  * extinction coefficient = absorption coefficient + scattering coefficient
30  * sigma_t = sigma_a + sigma_s */
31 
32 typedef struct VolumeShaderCoefficients {
33   float3 sigma_t;
34   float3 sigma_s;
35   float3 emission;
36 } VolumeShaderCoefficients;
37 
38 #ifdef __VOLUME__
39 
40 /* evaluate shader to get extinction coefficient at P */
volume_shader_extinction_sample(KernelGlobals * kg,ShaderData * sd,ccl_addr_space PathState * state,float3 P,float3 * extinction)41 ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
42                                                        ShaderData *sd,
43                                                        ccl_addr_space PathState *state,
44                                                        float3 P,
45                                                        float3 *extinction)
46 {
47   sd->P = P;
48   shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
49 
50   if (sd->flag & SD_EXTINCTION) {
51     const float density = object_volume_density(kg, sd->object);
52     *extinction = sd->closure_transparent_extinction * density;
53     return true;
54   }
55   else {
56     return false;
57   }
58 }
59 
60 /* evaluate shader to get absorption, scattering and emission at P */
volume_shader_sample(KernelGlobals * kg,ShaderData * sd,ccl_addr_space PathState * state,float3 P,VolumeShaderCoefficients * coeff)61 ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
62                                             ShaderData *sd,
63                                             ccl_addr_space PathState *state,
64                                             float3 P,
65                                             VolumeShaderCoefficients *coeff)
66 {
67   sd->P = P;
68   shader_eval_volume(kg, sd, state, state->volume_stack, state->flag);
69 
70   if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION)))
71     return false;
72 
73   coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
74   coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction :
75                                                 make_float3(0.0f, 0.0f, 0.0f);
76   coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background :
77                                                make_float3(0.0f, 0.0f, 0.0f);
78 
79   if (sd->flag & SD_SCATTER) {
80     for (int i = 0; i < sd->num_closure; i++) {
81       const ShaderClosure *sc = &sd->closure[i];
82 
83       if (CLOSURE_IS_VOLUME(sc->type))
84         coeff->sigma_s += sc->weight;
85     }
86   }
87 
88   const float density = object_volume_density(kg, sd->object);
89   coeff->sigma_s *= density;
90   coeff->sigma_t *= density;
91   coeff->emission *= density;
92 
93   return true;
94 }
95 
96 #endif /* __VOLUME__ */
97 
volume_color_transmittance(float3 sigma,float t)98 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
99 {
100   return exp3(-sigma * t);
101 }
102 
kernel_volume_channel_get(float3 value,int channel)103 ccl_device float kernel_volume_channel_get(float3 value, int channel)
104 {
105   return (channel == 0) ? value.x : ((channel == 1) ? value.y : value.z);
106 }
107 
108 #ifdef __VOLUME__
109 
volume_stack_step_size(KernelGlobals * kg,ccl_addr_space VolumeStack * stack)110 ccl_device float volume_stack_step_size(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
111 {
112   float step_size = FLT_MAX;
113 
114   for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
115     int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
116 
117     bool heterogeneous = false;
118 
119     if (shader_flag & SD_HETEROGENEOUS_VOLUME) {
120       heterogeneous = true;
121     }
122     else if (shader_flag & SD_NEED_VOLUME_ATTRIBUTES) {
123       /* We want to render world or objects without any volume grids
124        * as homogeneous, but can only verify this at run-time since other
125        * heterogeneous volume objects may be using the same shader. */
126       int object = stack[i].object;
127       if (object != OBJECT_NONE) {
128         int object_flag = kernel_tex_fetch(__object_flag, object);
129         if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
130           heterogeneous = true;
131         }
132       }
133     }
134 
135     if (heterogeneous) {
136       float object_step_size = object_volume_step_size(kg, stack[i].object);
137       object_step_size *= kernel_data.integrator.volume_step_rate;
138       step_size = fminf(object_step_size, step_size);
139     }
140   }
141 
142   return step_size;
143 }
144 
volume_stack_sampling_method(KernelGlobals * kg,VolumeStack * stack)145 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
146 {
147   if (kernel_data.integrator.num_all_lights == 0)
148     return 0;
149 
150   int method = -1;
151 
152   for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
153     int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
154 
155     if (shader_flag & SD_VOLUME_MIS) {
156       return SD_VOLUME_MIS;
157     }
158     else if (shader_flag & SD_VOLUME_EQUIANGULAR) {
159       if (method == 0)
160         return SD_VOLUME_MIS;
161 
162       method = SD_VOLUME_EQUIANGULAR;
163     }
164     else {
165       if (method == SD_VOLUME_EQUIANGULAR)
166         return SD_VOLUME_MIS;
167 
168       method = 0;
169     }
170   }
171 
172   return method;
173 }
174 
kernel_volume_step_init(KernelGlobals * kg,ccl_addr_space PathState * state,const float object_step_size,float t,float * step_size,float * step_offset)175 ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg,
176                                                ccl_addr_space PathState *state,
177                                                const float object_step_size,
178                                                float t,
179                                                float *step_size,
180                                                float *step_offset)
181 {
182   const int max_steps = kernel_data.integrator.volume_max_steps;
183   float step = min(object_step_size, t);
184 
185   /* compute exact steps in advance for malloc */
186   if (t > max_steps * step) {
187     step = t / (float)max_steps;
188   }
189 
190   *step_size = step;
191   *step_offset = path_state_rng_1D_hash(kg, state, 0x1e31d8a4) * step;
192 }
193 
194 /* Volume Shadows
195  *
196  * These functions are used to attenuate shadow rays to lights. Both absorption
197  * and scattering will block light, represented by the extinction coefficient. */
198 
199 /* homogeneous volume: assume shader evaluation at the starts gives
200  * the extinction coefficient for the entire line segment */
kernel_volume_shadow_homogeneous(KernelGlobals * kg,ccl_addr_space PathState * state,Ray * ray,ShaderData * sd,float3 * throughput)201 ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
202                                                  ccl_addr_space PathState *state,
203                                                  Ray *ray,
204                                                  ShaderData *sd,
205                                                  float3 *throughput)
206 {
207   float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
208 
209   if (volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
210     *throughput *= volume_color_transmittance(sigma_t, ray->t);
211 }
212 
213 /* heterogeneous volume: integrate stepping through the volume until we
214  * reach the end, get absorbed entirely, or run out of iterations */
kernel_volume_shadow_heterogeneous(KernelGlobals * kg,ccl_addr_space PathState * state,Ray * ray,ShaderData * sd,float3 * throughput,const float object_step_size)215 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
216                                                    ccl_addr_space PathState *state,
217                                                    Ray *ray,
218                                                    ShaderData *sd,
219                                                    float3 *throughput,
220                                                    const float object_step_size)
221 {
222   float3 tp = *throughput;
223   const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
224 
225   /* prepare for stepping */
226   int max_steps = kernel_data.integrator.volume_max_steps;
227   float step_offset, step_size;
228   kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
229 
230   /* compute extinction at the start */
231   float t = 0.0f;
232 
233   float3 sum = make_float3(0.0f, 0.0f, 0.0f);
234 
235   for (int i = 0; i < max_steps; i++) {
236     /* advance to new position */
237     float new_t = min(ray->t, (i + 1) * step_size);
238 
239     /* use random position inside this segment to sample shader, adjust
240      * for last step that is shorter than other steps. */
241     if (new_t == ray->t) {
242       step_offset *= (new_t - t) / step_size;
243     }
244 
245     float3 new_P = ray->P + ray->D * (t + step_offset);
246     float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
247 
248     /* compute attenuation over segment */
249     if (volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
250       /* Compute expf() only for every Nth step, to save some calculations
251        * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
252 
253       sum += (-sigma_t * (new_t - t));
254       if ((i & 0x07) == 0) { /* ToDo: Other interval? */
255         tp = *throughput * exp3(sum);
256 
257         /* stop if nearly all light is blocked */
258         if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
259           break;
260       }
261     }
262 
263     /* stop if at the end of the volume */
264     t = new_t;
265     if (t == ray->t) {
266       /* Update throughput in case we haven't done it above */
267       tp = *throughput * exp3(sum);
268       break;
269     }
270   }
271 
272   *throughput = tp;
273 }
274 
275 /* get the volume attenuation over line segment defined by ray, with the
276  * assumption that there are no surfaces blocking light between the endpoints */
kernel_volume_shadow(KernelGlobals * kg,ShaderData * shadow_sd,ccl_addr_space PathState * state,Ray * ray,float3 * throughput)277 ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
278                                               ShaderData *shadow_sd,
279                                               ccl_addr_space PathState *state,
280                                               Ray *ray,
281                                               float3 *throughput)
282 {
283   shader_setup_from_volume(kg, shadow_sd, ray);
284 
285   float step_size = volume_stack_step_size(kg, state->volume_stack);
286   if (step_size != FLT_MAX)
287     kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput, step_size);
288   else
289     kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
290 }
291 
292 #endif /* __VOLUME__ */
293 
294 /* Equi-angular sampling as in:
295  * "Importance Sampling Techniques for Path Tracing in Participating Media" */
296 
kernel_volume_equiangular_sample(Ray * ray,float3 light_P,float xi,float * pdf)297 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
298 {
299   float t = ray->t;
300 
301   float delta = dot((light_P - ray->P), ray->D);
302   float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
303   if (UNLIKELY(D == 0.0f)) {
304     *pdf = 0.0f;
305     return 0.0f;
306   }
307   float theta_a = -atan2f(delta, D);
308   float theta_b = atan2f(t - delta, D);
309   float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
310   if (UNLIKELY(theta_b == theta_a)) {
311     *pdf = 0.0f;
312     return 0.0f;
313   }
314   *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
315 
316   return min(t, delta + t_); /* min is only for float precision errors */
317 }
318 
kernel_volume_equiangular_pdf(Ray * ray,float3 light_P,float sample_t)319 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
320 {
321   float delta = dot((light_P - ray->P), ray->D);
322   float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
323   if (UNLIKELY(D == 0.0f)) {
324     return 0.0f;
325   }
326 
327   float t = ray->t;
328   float t_ = sample_t - delta;
329 
330   float theta_a = -atan2f(delta, D);
331   float theta_b = atan2f(t - delta, D);
332   if (UNLIKELY(theta_b == theta_a)) {
333     return 0.0f;
334   }
335 
336   float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
337 
338   return pdf;
339 }
340 
341 /* Distance sampling */
342 
kernel_volume_distance_sample(float max_t,float3 sigma_t,int channel,float xi,float3 * transmittance,float3 * pdf)343 ccl_device float kernel_volume_distance_sample(
344     float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
345 {
346   /* xi is [0, 1[ so log(0) should never happen, division by zero is
347    * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
348   float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
349   float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
350   float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
351 
352   float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t);
353 
354   *transmittance = volume_color_transmittance(sigma_t, sample_t);
355   *pdf = safe_divide_color(sigma_t * *transmittance,
356                            make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
357 
358   /* todo: optimization: when taken together with hit/miss decision,
359    * the full_transmittance cancels out drops out and xi does not
360    * need to be remapped */
361 
362   return sample_t;
363 }
364 
kernel_volume_distance_pdf(float max_t,float3 sigma_t,float sample_t)365 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
366 {
367   float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
368   float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
369 
370   return safe_divide_color(sigma_t * transmittance,
371                            make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
372 }
373 
374 /* Emission */
375 
kernel_volume_emission_integrate(VolumeShaderCoefficients * coeff,int closure_flag,float3 transmittance,float t)376 ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff,
377                                                    int closure_flag,
378                                                    float3 transmittance,
379                                                    float t)
380 {
381   /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
382    * this goes to E * t as sigma_t goes to zero
383    *
384    * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
385   float3 emission = coeff->emission;
386 
387   if (closure_flag & SD_EXTINCTION) {
388     float3 sigma_t = coeff->sigma_t;
389 
390     emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t;
391     emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t;
392     emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t;
393   }
394   else
395     emission *= t;
396 
397   return emission;
398 }
399 
400 /* Volume Path */
401 
kernel_volume_sample_channel(float3 albedo,float3 throughput,float rand,float3 * pdf)402 ccl_device int kernel_volume_sample_channel(float3 albedo,
403                                             float3 throughput,
404                                             float rand,
405                                             float3 *pdf)
406 {
407   /* Sample color channel proportional to throughput and single scattering
408    * albedo, to significantly reduce noise with many bounce, following:
409    *
410    * "Practical and Controllable Subsurface Scattering for Production Path
411    *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
412   float3 weights = fabs(throughput * albedo);
413   float sum_weights = weights.x + weights.y + weights.z;
414   float3 weights_pdf;
415 
416   if (sum_weights > 0.0f) {
417     weights_pdf = weights / sum_weights;
418   }
419   else {
420     weights_pdf = make_float3(1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f);
421   }
422 
423   *pdf = weights_pdf;
424 
425   /* OpenCL does not support -> on float3, so don't use pdf->x. */
426   if (rand < weights_pdf.x) {
427     return 0;
428   }
429   else if (rand < weights_pdf.x + weights_pdf.y) {
430     return 1;
431   }
432   else {
433     return 2;
434   }
435 }
436 
437 #ifdef __VOLUME__
438 
439 /* homogeneous volume: assume shader evaluation at the start gives
440  * the volume shading coefficient for the entire line segment */
441 ccl_device VolumeIntegrateResult
kernel_volume_integrate_homogeneous(KernelGlobals * kg,ccl_addr_space PathState * state,Ray * ray,ShaderData * sd,PathRadiance * L,ccl_addr_space float3 * throughput,bool probalistic_scatter)442 kernel_volume_integrate_homogeneous(KernelGlobals *kg,
443                                     ccl_addr_space PathState *state,
444                                     Ray *ray,
445                                     ShaderData *sd,
446                                     PathRadiance *L,
447                                     ccl_addr_space float3 *throughput,
448                                     bool probalistic_scatter)
449 {
450   VolumeShaderCoefficients coeff ccl_optional_struct_init;
451 
452   if (!volume_shader_sample(kg, sd, state, ray->P, &coeff))
453     return VOLUME_PATH_MISSED;
454 
455   int closure_flag = sd->flag;
456   float t = ray->t;
457   float3 new_tp;
458 
459 #  ifdef __VOLUME_SCATTER__
460   /* randomly scatter, and if we do t is shortened */
461   if (closure_flag & SD_SCATTER) {
462     /* Sample channel, use MIS with balance heuristic. */
463     float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
464     float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
465     float3 channel_pdf;
466     int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
467 
468     /* decide if we will hit or miss */
469     bool scatter = true;
470     float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
471 
472     if (probalistic_scatter) {
473       float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
474       float sample_transmittance = expf(-sample_sigma_t * t);
475 
476       if (1.0f - xi >= sample_transmittance) {
477         scatter = true;
478 
479         /* rescale random number so we can reuse it */
480         xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance);
481       }
482       else
483         scatter = false;
484     }
485 
486     if (scatter) {
487       /* scattering */
488       float3 pdf;
489       float3 transmittance;
490       float sample_t;
491 
492       /* distance sampling */
493       sample_t = kernel_volume_distance_sample(
494           ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
495 
496       /* modify pdf for hit/miss decision */
497       if (probalistic_scatter)
498         pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
499 
500       new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
501       t = sample_t;
502     }
503     else {
504       /* no scattering */
505       float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
506       float pdf = dot(channel_pdf, transmittance);
507       new_tp = *throughput * transmittance / pdf;
508     }
509   }
510   else
511 #  endif
512       if (closure_flag & SD_EXTINCTION) {
513     /* absorption only, no sampling needed */
514     float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
515     new_tp = *throughput * transmittance;
516   }
517   else {
518     new_tp = *throughput;
519   }
520 
521   /* integrate emission attenuated by extinction */
522   if (L && (closure_flag & SD_EMISSION)) {
523     float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
524     float3 emission = kernel_volume_emission_integrate(
525         &coeff, closure_flag, transmittance, ray->t);
526     path_radiance_accum_emission(kg, L, state, *throughput, emission);
527   }
528 
529   /* modify throughput */
530   if (closure_flag & SD_EXTINCTION) {
531     *throughput = new_tp;
532 
533     /* prepare to scatter to new direction */
534     if (t < ray->t) {
535       /* adjust throughput and move to new location */
536       sd->P = ray->P + t * ray->D;
537 
538       return VOLUME_PATH_SCATTERED;
539     }
540   }
541 
542   return VOLUME_PATH_ATTENUATED;
543 }
544 
545 /* heterogeneous volume distance sampling: integrate stepping through the
546  * volume until we reach the end, get absorbed entirely, or run out of
547  * iterations. this does probabilistically scatter or get transmitted through
548  * for path tracing where we don't want to branch. */
549 ccl_device VolumeIntegrateResult
kernel_volume_integrate_heterogeneous_distance(KernelGlobals * kg,ccl_addr_space PathState * state,Ray * ray,ShaderData * sd,PathRadiance * L,ccl_addr_space float3 * throughput,const float object_step_size)550 kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
551                                                ccl_addr_space PathState *state,
552                                                Ray *ray,
553                                                ShaderData *sd,
554                                                PathRadiance *L,
555                                                ccl_addr_space float3 *throughput,
556                                                const float object_step_size)
557 {
558   float3 tp = *throughput;
559   const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
560 
561   /* prepare for stepping */
562   int max_steps = kernel_data.integrator.volume_max_steps;
563   float step_offset, step_size;
564   kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
565 
566   /* compute coefficients at the start */
567   float t = 0.0f;
568   float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
569 
570   /* pick random color channel, we use the Veach one-sample
571    * model with balance heuristic for the channels */
572   float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
573   float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
574   bool has_scatter = false;
575 
576   for (int i = 0; i < max_steps; i++) {
577     /* advance to new position */
578     float new_t = min(ray->t, (i + 1) * step_size);
579     float dt = new_t - t;
580 
581     /* use random position inside this segment to sample shader,
582      * for last shorter step we remap it to fit within the segment. */
583     if (new_t == ray->t) {
584       step_offset *= (new_t - t) / step_size;
585     }
586 
587     float3 new_P = ray->P + ray->D * (t + step_offset);
588     VolumeShaderCoefficients coeff ccl_optional_struct_init;
589 
590     /* compute segment */
591     if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
592       int closure_flag = sd->flag;
593       float3 new_tp;
594       float3 transmittance;
595       bool scatter = false;
596 
597       /* distance sampling */
598 #  ifdef __VOLUME_SCATTER__
599       if ((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
600         has_scatter = true;
601 
602         /* Sample channel, use MIS with balance heuristic. */
603         float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
604         float3 channel_pdf;
605         int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
606 
607         /* compute transmittance over full step */
608         transmittance = volume_color_transmittance(coeff.sigma_t, dt);
609 
610         /* decide if we will scatter or continue */
611         float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
612 
613         if (1.0f - xi >= sample_transmittance) {
614           /* compute sampling distance */
615           float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
616           float new_dt = -logf(1.0f - xi) / sample_sigma_t;
617           new_t = t + new_dt;
618 
619           /* transmittance and pdf */
620           float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
621           float3 pdf = coeff.sigma_t * new_transmittance;
622 
623           /* throughput */
624           new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
625           scatter = true;
626         }
627         else {
628           /* throughput */
629           float pdf = dot(channel_pdf, transmittance);
630           new_tp = tp * transmittance / pdf;
631 
632           /* remap xi so we can reuse it and keep thing stratified */
633           xi = 1.0f - (1.0f - xi) / sample_transmittance;
634         }
635       }
636       else
637 #  endif
638           if (closure_flag & SD_EXTINCTION) {
639         /* absorption only, no sampling needed */
640         transmittance = volume_color_transmittance(coeff.sigma_t, dt);
641         new_tp = tp * transmittance;
642       }
643       else {
644         transmittance = make_float3(0.0f, 0.0f, 0.0f);
645         new_tp = tp;
646       }
647 
648       /* integrate emission attenuated by absorption */
649       if (L && (closure_flag & SD_EMISSION)) {
650         float3 emission = kernel_volume_emission_integrate(
651             &coeff, closure_flag, transmittance, dt);
652         path_radiance_accum_emission(kg, L, state, tp, emission);
653       }
654 
655       /* modify throughput */
656       if (closure_flag & SD_EXTINCTION) {
657         tp = new_tp;
658 
659         /* stop if nearly all light blocked */
660         if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
661           tp = make_float3(0.0f, 0.0f, 0.0f);
662           break;
663         }
664       }
665 
666       /* prepare to scatter to new direction */
667       if (scatter) {
668         /* adjust throughput and move to new location */
669         sd->P = ray->P + new_t * ray->D;
670         *throughput = tp;
671 
672         return VOLUME_PATH_SCATTERED;
673       }
674       else {
675         /* accumulate transmittance */
676         accum_transmittance *= transmittance;
677       }
678     }
679 
680     /* stop if at the end of the volume */
681     t = new_t;
682     if (t == ray->t)
683       break;
684   }
685 
686   *throughput = tp;
687 
688   return VOLUME_PATH_ATTENUATED;
689 }
690 
691 /* get the volume attenuation and emission over line segment defined by
692  * ray, with the assumption that there are no surfaces blocking light
693  * between the endpoints. distance sampling is used to decide if we will
694  * scatter or not. */
695 ccl_device_noinline_cpu VolumeIntegrateResult
kernel_volume_integrate(KernelGlobals * kg,ccl_addr_space PathState * state,ShaderData * sd,Ray * ray,PathRadiance * L,ccl_addr_space float3 * throughput,float step_size)696 kernel_volume_integrate(KernelGlobals *kg,
697                         ccl_addr_space PathState *state,
698                         ShaderData *sd,
699                         Ray *ray,
700                         PathRadiance *L,
701                         ccl_addr_space float3 *throughput,
702                         float step_size)
703 {
704   shader_setup_from_volume(kg, sd, ray);
705 
706   if (step_size != FLT_MAX)
707     return kernel_volume_integrate_heterogeneous_distance(
708         kg, state, ray, sd, L, throughput, step_size);
709   else
710     return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
711 }
712 
713 #  ifndef __SPLIT_KERNEL__
714 /* Decoupled Volume Sampling
715  *
716  * VolumeSegment is list of coefficients and transmittance stored at all steps
717  * through a volume. This can then later be used for decoupled sampling as in:
718  * "Importance Sampling Techniques for Path Tracing in Participating Media"
719  *
720  * On the GPU this is only supported (but currently not enabled)
721  * for homogeneous volumes (1 step), due to
722  * no support for malloc/free and too much stack usage with a fix size array. */
723 
724 typedef struct VolumeStep {
725   float3 sigma_s;             /* scatter coefficient */
726   float3 sigma_t;             /* extinction coefficient */
727   float3 accum_transmittance; /* accumulated transmittance including this step */
728   float3 cdf_distance;        /* cumulative density function for distance sampling */
729   float t;                    /* distance at end of this step */
730   float shade_t;              /* jittered distance where shading was done in step */
731   int closure_flag;           /* shader evaluation closure flags */
732 } VolumeStep;
733 
734 typedef struct VolumeSegment {
735   VolumeStep stack_step; /* stack storage for homogeneous step, to avoid malloc */
736   VolumeStep *steps;     /* recorded steps */
737   int numsteps;          /* number of steps */
738   int closure_flag;      /* accumulated closure flags from all steps */
739 
740   float3 accum_emission;      /* accumulated emission at end of segment */
741   float3 accum_transmittance; /* accumulated transmittance at end of segment */
742   float3 accum_albedo;        /* accumulated average albedo over segment */
743 
744   int sampling_method; /* volume sampling method */
745 } VolumeSegment;
746 
747 /* record volume steps to the end of the volume.
748  *
749  * it would be nice if we could only record up to the point that we need to scatter,
750  * but the entire segment is needed to do always scattering, rather than probabilistically
751  * hitting or missing the volume. if we don't know the transmittance at the end of the
752  * volume we can't generate stratified distance samples up to that transmittance */
753 #    ifdef __VOLUME_DECOUPLED__
kernel_volume_decoupled_record(KernelGlobals * kg,PathState * state,Ray * ray,ShaderData * sd,VolumeSegment * segment,const float object_step_size)754 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg,
755                                                PathState *state,
756                                                Ray *ray,
757                                                ShaderData *sd,
758                                                VolumeSegment *segment,
759                                                const float object_step_size)
760 {
761   const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
762 
763   /* prepare for volume stepping */
764   int max_steps;
765   float step_size, step_offset;
766 
767   if (object_step_size != FLT_MAX) {
768     max_steps = kernel_data.integrator.volume_max_steps;
769     kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
770 
771 #      ifdef __KERNEL_CPU__
772     /* NOTE: For the branched path tracing it's possible to have direct
773      * and indirect light integration both having volume segments allocated.
774      * We detect this using index in the pre-allocated memory. Currently we
775      * only support two segments allocated at a time, if more needed some
776      * modifications to the KernelGlobals will be needed.
777      *
778      * This gives us restrictions that decoupled record should only happen
779      * in the stack manner, meaning if there's subsequent call of decoupled
780      * record it'll need to free memory before its caller frees memory.
781      */
782     const int index = kg->decoupled_volume_steps_index;
783     assert(index < sizeof(kg->decoupled_volume_steps) / sizeof(*kg->decoupled_volume_steps));
784     if (kg->decoupled_volume_steps[index] == NULL) {
785       kg->decoupled_volume_steps[index] = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps);
786     }
787     segment->steps = kg->decoupled_volume_steps[index];
788     ++kg->decoupled_volume_steps_index;
789 #      else
790     segment->steps = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps);
791 #      endif
792   }
793   else {
794     max_steps = 1;
795     step_size = ray->t;
796     step_offset = 0.0f;
797     segment->steps = &segment->stack_step;
798   }
799 
800   /* init accumulation variables */
801   float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
802   float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
803   float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
804   float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
805   float t = 0.0f;
806 
807   segment->numsteps = 0;
808   segment->closure_flag = 0;
809   bool is_last_step_empty = false;
810 
811   VolumeStep *step = segment->steps;
812 
813   for (int i = 0; i < max_steps; i++, step++) {
814     /* advance to new position */
815     float new_t = min(ray->t, (i + 1) * step_size);
816     float dt = new_t - t;
817 
818     /* use random position inside this segment to sample shader,
819      * for last shorter step we remap it to fit within the segment. */
820     if (new_t == ray->t) {
821       step_offset *= (new_t - t) / step_size;
822     }
823 
824     float3 new_P = ray->P + ray->D * (t + step_offset);
825     VolumeShaderCoefficients coeff ccl_optional_struct_init;
826 
827     /* compute segment */
828     if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
829       int closure_flag = sd->flag;
830       float3 sigma_t = coeff.sigma_t;
831 
832       /* compute average albedo for channel sampling */
833       if (closure_flag & SD_SCATTER) {
834         accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
835       }
836 
837       /* compute accumulated transmittance */
838       float3 transmittance = volume_color_transmittance(sigma_t, dt);
839 
840       /* compute emission attenuated by absorption */
841       if (closure_flag & SD_EMISSION) {
842         float3 emission = kernel_volume_emission_integrate(
843             &coeff, closure_flag, transmittance, dt);
844         accum_emission += accum_transmittance * emission;
845       }
846 
847       accum_transmittance *= transmittance;
848 
849       /* compute pdf for distance sampling */
850       float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
851       cdf_distance = cdf_distance + pdf_distance;
852 
853       /* write step data */
854       step->sigma_t = sigma_t;
855       step->sigma_s = coeff.sigma_s;
856       step->closure_flag = closure_flag;
857 
858       segment->closure_flag |= closure_flag;
859 
860       is_last_step_empty = false;
861       segment->numsteps++;
862     }
863     else {
864       if (is_last_step_empty) {
865         /* consecutive empty step, merge */
866         step--;
867       }
868       else {
869         /* store empty step */
870         step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
871         step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
872         step->closure_flag = 0;
873 
874         segment->numsteps++;
875         is_last_step_empty = true;
876       }
877     }
878 
879     step->accum_transmittance = accum_transmittance;
880     step->cdf_distance = cdf_distance;
881     step->t = new_t;
882     step->shade_t = t + step_offset;
883 
884     /* stop if at the end of the volume */
885     t = new_t;
886     if (t == ray->t)
887       break;
888 
889     /* stop if nearly all light blocked */
890     if (accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps &&
891         accum_transmittance.z < tp_eps)
892       break;
893   }
894 
895   /* store total emission and transmittance */
896   segment->accum_emission = accum_emission;
897   segment->accum_transmittance = accum_transmittance;
898   segment->accum_albedo = accum_albedo;
899 
900   /* normalize cumulative density function for distance sampling */
901   VolumeStep *last_step = segment->steps + segment->numsteps - 1;
902 
903   if (!is_zero(last_step->cdf_distance)) {
904     VolumeStep *step = &segment->steps[0];
905     int numsteps = segment->numsteps;
906     float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
907 
908     for (int i = 0; i < numsteps; i++, step++)
909       step->cdf_distance *= inv_cdf_distance_sum;
910   }
911 }
912 
kernel_volume_decoupled_free(KernelGlobals * kg,VolumeSegment * segment)913 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
914 {
915   if (segment->steps != &segment->stack_step) {
916 #      ifdef __KERNEL_CPU__
917     /* NOTE: We only allow free last allocated segment.
918      * No random order of alloc/free is supported.
919      */
920     assert(kg->decoupled_volume_steps_index > 0);
921     assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
922     --kg->decoupled_volume_steps_index;
923 #      else
924     free(segment->steps);
925 #      endif
926   }
927 }
928 #    endif /* __VOLUME_DECOUPLED__ */
929 
930 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
931  * marching.
932  *
933  * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
kernel_volume_decoupled_scatter(KernelGlobals * kg,PathState * state,Ray * ray,ShaderData * sd,float3 * throughput,float rphase,float rscatter,const VolumeSegment * segment,const float3 * light_P,bool probalistic_scatter)934 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(KernelGlobals *kg,
935                                                                  PathState *state,
936                                                                  Ray *ray,
937                                                                  ShaderData *sd,
938                                                                  float3 *throughput,
939                                                                  float rphase,
940                                                                  float rscatter,
941                                                                  const VolumeSegment *segment,
942                                                                  const float3 *light_P,
943                                                                  bool probalistic_scatter)
944 {
945   kernel_assert(segment->closure_flag & SD_SCATTER);
946 
947   /* Sample color channel, use MIS with balance heuristic. */
948   float3 channel_pdf;
949   int channel = kernel_volume_sample_channel(
950       segment->accum_albedo, *throughput, rphase, &channel_pdf);
951 
952   float xi = rscatter;
953 
954   /* probabilistic scattering decision based on transmittance */
955   if (probalistic_scatter) {
956     float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
957 
958     if (1.0f - xi >= sample_transmittance) {
959       /* rescale random number so we can reuse it */
960       xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance);
961     }
962     else {
963       *throughput /= sample_transmittance;
964       return VOLUME_PATH_MISSED;
965     }
966   }
967 
968   VolumeStep *step;
969   float3 transmittance;
970   float pdf, sample_t;
971   float mis_weight = 1.0f;
972   bool distance_sample = true;
973   bool use_mis = false;
974 
975   if (segment->sampling_method && light_P) {
976     if (segment->sampling_method == SD_VOLUME_MIS) {
977       /* multiple importance sample: randomly pick between
978        * equiangular and distance sampling strategy */
979       if (xi < 0.5f) {
980         xi *= 2.0f;
981       }
982       else {
983         xi = (xi - 0.5f) * 2.0f;
984         distance_sample = false;
985       }
986 
987       use_mis = true;
988     }
989     else {
990       /* only equiangular sampling */
991       distance_sample = false;
992     }
993   }
994 
995   /* distance sampling */
996   if (distance_sample) {
997     /* find step in cdf */
998     step = segment->steps;
999 
1000     float prev_t = 0.0f;
1001     float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
1002 
1003     if (segment->numsteps > 1) {
1004       float prev_cdf = 0.0f;
1005       float step_cdf = 1.0f;
1006       float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
1007 
1008       for (int i = 0;; i++, step++) {
1009         /* todo: optimize using binary search */
1010         step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
1011 
1012         if (xi < step_cdf || i == segment->numsteps - 1)
1013           break;
1014 
1015         prev_cdf = step_cdf;
1016         prev_t = step->t;
1017         prev_cdf_distance = step->cdf_distance;
1018       }
1019 
1020       /* remap xi so we can reuse it */
1021       xi = (xi - prev_cdf) / (step_cdf - prev_cdf);
1022 
1023       /* pdf for picking step */
1024       step_pdf_distance = step->cdf_distance - prev_cdf_distance;
1025     }
1026 
1027     /* determine range in which we will sample */
1028     float step_t = step->t - prev_t;
1029 
1030     /* sample distance and compute transmittance */
1031     float3 distance_pdf;
1032     sample_t = prev_t + kernel_volume_distance_sample(
1033                             step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
1034 
1035     /* modify pdf for hit/miss decision */
1036     if (probalistic_scatter)
1037       distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
1038 
1039     pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
1040 
1041     /* multiple importance sampling */
1042     if (use_mis) {
1043       float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
1044       mis_weight = 2.0f * power_heuristic(pdf, equi_pdf);
1045     }
1046   }
1047   /* equi-angular sampling */
1048   else {
1049     /* sample distance */
1050     sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
1051 
1052     /* find step in which sampled distance is located */
1053     step = segment->steps;
1054 
1055     float prev_t = 0.0f;
1056     float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
1057 
1058     if (segment->numsteps > 1) {
1059       float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
1060 
1061       int numsteps = segment->numsteps;
1062       int high = numsteps - 1;
1063       int low = 0;
1064       int mid;
1065 
1066       while (low < high) {
1067         mid = (low + high) >> 1;
1068 
1069         if (sample_t < step[mid].t)
1070           high = mid;
1071         else if (sample_t >= step[mid + 1].t)
1072           low = mid + 1;
1073         else {
1074           /* found our interval in step[mid] .. step[mid+1] */
1075           prev_t = step[mid].t;
1076           prev_cdf_distance = step[mid].cdf_distance;
1077           step += mid + 1;
1078           break;
1079         }
1080       }
1081 
1082       if (low >= numsteps - 1) {
1083         prev_t = step[numsteps - 1].t;
1084         prev_cdf_distance = step[numsteps - 1].cdf_distance;
1085         step += numsteps - 1;
1086       }
1087 
1088       /* pdf for picking step with distance sampling */
1089       step_pdf_distance = step->cdf_distance - prev_cdf_distance;
1090     }
1091 
1092     /* determine range in which we will sample */
1093     float step_t = step->t - prev_t;
1094     float step_sample_t = sample_t - prev_t;
1095 
1096     /* compute transmittance */
1097     transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
1098 
1099     /* multiple importance sampling */
1100     if (use_mis) {
1101       float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
1102       float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
1103       mis_weight = 2.0f * power_heuristic(pdf, distance_pdf);
1104     }
1105   }
1106   if (sample_t < 0.0f || pdf == 0.0f) {
1107     return VOLUME_PATH_MISSED;
1108   }
1109 
1110   /* compute transmittance up to this step */
1111   if (step != segment->steps)
1112     transmittance *= (step - 1)->accum_transmittance;
1113 
1114   /* modify throughput */
1115   *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
1116 
1117   /* evaluate shader to create closures at shading point */
1118   if (segment->numsteps > 1) {
1119     sd->P = ray->P + step->shade_t * ray->D;
1120 
1121     VolumeShaderCoefficients coeff;
1122     volume_shader_sample(kg, sd, state, sd->P, &coeff);
1123   }
1124 
1125   /* move to new position */
1126   sd->P = ray->P + sample_t * ray->D;
1127 
1128   return VOLUME_PATH_SCATTERED;
1129 }
1130 #  endif /* __SPLIT_KERNEL */
1131 
1132 /* decide if we need to use decoupled or not */
kernel_volume_use_decoupled(KernelGlobals * kg,bool heterogeneous,bool direct,int sampling_method)1133 ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg,
1134                                             bool heterogeneous,
1135                                             bool direct,
1136                                             int sampling_method)
1137 {
1138   /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
1139    * which also means equiangular and multiple importance sampling is not
1140    * support for that case */
1141   if (!kernel_data.integrator.volume_decoupled)
1142     return false;
1143 
1144 #  ifdef __KERNEL_GPU__
1145   if (heterogeneous)
1146     return false;
1147 #  endif
1148 
1149   /* equiangular and multiple importance sampling only implemented for decoupled */
1150   if (sampling_method != 0)
1151     return true;
1152 
1153   /* for all light sampling use decoupled, reusing shader evaluations is
1154    * typically faster in that case */
1155   if (direct)
1156     return kernel_data.integrator.sample_all_lights_direct;
1157   else
1158     return kernel_data.integrator.sample_all_lights_indirect;
1159 }
1160 
1161 /* Volume Stack
1162  *
1163  * This is an array of object/shared ID's that the current segment of the path
1164  * is inside of. */
1165 
kernel_volume_stack_init(KernelGlobals * kg,ShaderData * stack_sd,ccl_addr_space const PathState * state,ccl_addr_space const Ray * ray,ccl_addr_space VolumeStack * stack)1166 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
1167                                          ShaderData *stack_sd,
1168                                          ccl_addr_space const PathState *state,
1169                                          ccl_addr_space const Ray *ray,
1170                                          ccl_addr_space VolumeStack *stack)
1171 {
1172   /* NULL ray happens in the baker, does it need proper initialization of
1173    * camera in volume?
1174    */
1175   if (!kernel_data.cam.is_inside_volume || ray == NULL) {
1176     /* Camera is guaranteed to be in the air, only take background volume
1177      * into account in this case.
1178      */
1179     if (kernel_data.background.volume_shader != SHADER_NONE) {
1180       stack[0].shader = kernel_data.background.volume_shader;
1181       stack[0].object = PRIM_NONE;
1182       stack[1].shader = SHADER_NONE;
1183     }
1184     else {
1185       stack[0].shader = SHADER_NONE;
1186     }
1187     return;
1188   }
1189 
1190   kernel_assert(state->flag & PATH_RAY_CAMERA);
1191 
1192   Ray volume_ray = *ray;
1193   volume_ray.t = FLT_MAX;
1194 
1195   const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
1196   int stack_index = 0, enclosed_index = 0;
1197 
1198 #  ifdef __VOLUME_RECORD_ALL__
1199   Intersection hits[2 * VOLUME_STACK_SIZE + 1];
1200   uint num_hits = scene_intersect_volume_all(
1201       kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, visibility);
1202   if (num_hits > 0) {
1203     int enclosed_volumes[VOLUME_STACK_SIZE];
1204     Intersection *isect = hits;
1205 
1206     qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1207 
1208     for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
1209       shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1210       if (stack_sd->flag & SD_BACKFACING) {
1211         bool need_add = true;
1212         for (int i = 0; i < enclosed_index && need_add; ++i) {
1213           /* If ray exited the volume and never entered to that volume
1214            * it means that camera is inside such a volume.
1215            */
1216           if (enclosed_volumes[i] == stack_sd->object) {
1217             need_add = false;
1218           }
1219         }
1220         for (int i = 0; i < stack_index && need_add; ++i) {
1221           /* Don't add intersections twice. */
1222           if (stack[i].object == stack_sd->object) {
1223             need_add = false;
1224             break;
1225           }
1226         }
1227         if (need_add && stack_index < VOLUME_STACK_SIZE - 1) {
1228           stack[stack_index].object = stack_sd->object;
1229           stack[stack_index].shader = stack_sd->shader;
1230           ++stack_index;
1231         }
1232       }
1233       else {
1234         /* If ray from camera enters the volume, this volume shouldn't
1235          * be added to the stack on exit.
1236          */
1237         enclosed_volumes[enclosed_index++] = stack_sd->object;
1238       }
1239     }
1240   }
1241 #  else
1242   int enclosed_volumes[VOLUME_STACK_SIZE];
1243   int step = 0;
1244 
1245   while (stack_index < VOLUME_STACK_SIZE - 1 && enclosed_index < VOLUME_STACK_SIZE - 1 &&
1246          step < 2 * VOLUME_STACK_SIZE) {
1247     Intersection isect;
1248     if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
1249       break;
1250     }
1251 
1252     shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1253     if (stack_sd->flag & SD_BACKFACING) {
1254       /* If ray exited the volume and never entered to that volume
1255        * it means that camera is inside such a volume.
1256        */
1257       bool need_add = true;
1258       for (int i = 0; i < enclosed_index && need_add; ++i) {
1259         /* If ray exited the volume and never entered to that volume
1260          * it means that camera is inside such a volume.
1261          */
1262         if (enclosed_volumes[i] == stack_sd->object) {
1263           need_add = false;
1264         }
1265       }
1266       for (int i = 0; i < stack_index && need_add; ++i) {
1267         /* Don't add intersections twice. */
1268         if (stack[i].object == stack_sd->object) {
1269           need_add = false;
1270           break;
1271         }
1272       }
1273       if (need_add) {
1274         stack[stack_index].object = stack_sd->object;
1275         stack[stack_index].shader = stack_sd->shader;
1276         ++stack_index;
1277       }
1278     }
1279     else {
1280       /* If ray from camera enters the volume, this volume shouldn't
1281        * be added to the stack on exit.
1282        */
1283       enclosed_volumes[enclosed_index++] = stack_sd->object;
1284     }
1285 
1286     /* Move ray forward. */
1287     volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1288     ++step;
1289   }
1290 #  endif
1291   /* stack_index of 0 means quick checks outside of the kernel gave false
1292    * positive, nothing to worry about, just we've wasted quite a few of
1293    * ticks just to come into conclusion that camera is in the air.
1294    *
1295    * In this case we're doing the same above -- check whether background has
1296    * volume.
1297    */
1298   if (stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
1299     stack[0].shader = kernel_data.background.volume_shader;
1300     stack[0].object = OBJECT_NONE;
1301     stack[1].shader = SHADER_NONE;
1302   }
1303   else {
1304     stack[stack_index].shader = SHADER_NONE;
1305   }
1306 }
1307 
kernel_volume_stack_enter_exit(KernelGlobals * kg,ShaderData * sd,ccl_addr_space VolumeStack * stack)1308 ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg,
1309                                                ShaderData *sd,
1310                                                ccl_addr_space VolumeStack *stack)
1311 {
1312   /* todo: we should have some way for objects to indicate if they want the
1313    * world shader to work inside them. excluding it by default is problematic
1314    * because non-volume objects can't be assumed to be closed manifolds */
1315 
1316   if (!(sd->flag & SD_HAS_VOLUME))
1317     return;
1318 
1319   if (sd->flag & SD_BACKFACING) {
1320     /* exit volume object: remove from stack */
1321     for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
1322       if (stack[i].object == sd->object) {
1323         /* shift back next stack entries */
1324         do {
1325           stack[i] = stack[i + 1];
1326           i++;
1327         } while (stack[i].shader != SHADER_NONE);
1328 
1329         return;
1330       }
1331     }
1332   }
1333   else {
1334     /* enter volume object: add to stack */
1335     int i;
1336 
1337     for (i = 0; stack[i].shader != SHADER_NONE; i++) {
1338       /* already in the stack? then we have nothing to do */
1339       if (stack[i].object == sd->object)
1340         return;
1341     }
1342 
1343     /* if we exceed the stack limit, ignore */
1344     if (i >= VOLUME_STACK_SIZE - 1)
1345       return;
1346 
1347     /* add to the end of the stack */
1348     stack[i].shader = sd->shader;
1349     stack[i].object = sd->object;
1350     stack[i + 1].shader = SHADER_NONE;
1351   }
1352 }
1353 
1354 #  ifdef __SUBSURFACE__
kernel_volume_stack_update_for_subsurface(KernelGlobals * kg,ShaderData * stack_sd,Ray * ray,ccl_addr_space VolumeStack * stack)1355 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
1356                                                           ShaderData *stack_sd,
1357                                                           Ray *ray,
1358                                                           ccl_addr_space VolumeStack *stack)
1359 {
1360   kernel_assert(kernel_data.integrator.use_volumes);
1361 
1362   Ray volume_ray = *ray;
1363 
1364 #    ifdef __VOLUME_RECORD_ALL__
1365   Intersection hits[2 * VOLUME_STACK_SIZE + 1];
1366   uint num_hits = scene_intersect_volume_all(
1367       kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, PATH_RAY_ALL_VISIBILITY);
1368   if (num_hits > 0) {
1369     Intersection *isect = hits;
1370 
1371     qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1372 
1373     for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
1374       shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1375       kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1376     }
1377   }
1378 #    else
1379   Intersection isect;
1380   int step = 0;
1381   float3 Pend = ray->P + ray->D * ray->t;
1382   while (step < 2 * VOLUME_STACK_SIZE &&
1383          scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
1384     shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1385     kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1386 
1387     /* Move ray forward. */
1388     volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1389     if (volume_ray.t != FLT_MAX) {
1390       volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
1391     }
1392     ++step;
1393   }
1394 #    endif
1395 }
1396 #  endif
1397 
1398 /* Clean stack after the last bounce.
1399  *
1400  * It is expected that all volumes are closed manifolds, so at the time when ray
1401  * hits nothing (for example, it is a last bounce which goes to environment) the
1402  * only expected volume in the stack is the world's one. All the rest volume
1403  * entries should have been exited already.
1404  *
1405  * This isn't always true because of ray intersection precision issues, which
1406  * could lead us to an infinite non-world volume in the stack, causing render
1407  * artifacts.
1408  *
1409  * Use this function after the last bounce to get rid of all volumes apart from
1410  * the world's one after the last bounce to avoid render artifacts.
1411  */
kernel_volume_clean_stack(KernelGlobals * kg,ccl_addr_space VolumeStack * volume_stack)1412 ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
1413                                                  ccl_addr_space VolumeStack *volume_stack)
1414 {
1415   if (kernel_data.background.volume_shader != SHADER_NONE) {
1416     /* Keep the world's volume in stack. */
1417     volume_stack[1].shader = SHADER_NONE;
1418   }
1419   else {
1420     volume_stack[0].shader = SHADER_NONE;
1421   }
1422 }
1423 
1424 #endif /* __VOLUME__ */
1425 
1426 CCL_NAMESPACE_END
1427