1 /*
2 * Copyright 2011-2013 Blender Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Events for probalistic scattering */
20
21 typedef enum VolumeIntegrateResult {
22 VOLUME_PATH_SCATTERED = 0,
23 VOLUME_PATH_ATTENUATED = 1,
24 VOLUME_PATH_MISSED = 2
25 } VolumeIntegrateResult;
26
27 /* Volume shader properties
28 *
29 * extinction coefficient = absorption coefficient + scattering coefficient
30 * sigma_t = sigma_a + sigma_s */
31
32 typedef struct VolumeShaderCoefficients {
33 float3 sigma_t;
34 float3 sigma_s;
35 float3 emission;
36 } VolumeShaderCoefficients;
37
38 #ifdef __VOLUME__
39
40 /* evaluate shader to get extinction coefficient at P */
volume_shader_extinction_sample(KernelGlobals * kg,ShaderData * sd,ccl_addr_space PathState * state,float3 P,float3 * extinction)41 ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
42 ShaderData *sd,
43 ccl_addr_space PathState *state,
44 float3 P,
45 float3 *extinction)
46 {
47 sd->P = P;
48 shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW);
49
50 if (sd->flag & SD_EXTINCTION) {
51 const float density = object_volume_density(kg, sd->object);
52 *extinction = sd->closure_transparent_extinction * density;
53 return true;
54 }
55 else {
56 return false;
57 }
58 }
59
60 /* evaluate shader to get absorption, scattering and emission at P */
volume_shader_sample(KernelGlobals * kg,ShaderData * sd,ccl_addr_space PathState * state,float3 P,VolumeShaderCoefficients * coeff)61 ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
62 ShaderData *sd,
63 ccl_addr_space PathState *state,
64 float3 P,
65 VolumeShaderCoefficients *coeff)
66 {
67 sd->P = P;
68 shader_eval_volume(kg, sd, state, state->volume_stack, state->flag);
69
70 if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION)))
71 return false;
72
73 coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
74 coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction :
75 make_float3(0.0f, 0.0f, 0.0f);
76 coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background :
77 make_float3(0.0f, 0.0f, 0.0f);
78
79 if (sd->flag & SD_SCATTER) {
80 for (int i = 0; i < sd->num_closure; i++) {
81 const ShaderClosure *sc = &sd->closure[i];
82
83 if (CLOSURE_IS_VOLUME(sc->type))
84 coeff->sigma_s += sc->weight;
85 }
86 }
87
88 const float density = object_volume_density(kg, sd->object);
89 coeff->sigma_s *= density;
90 coeff->sigma_t *= density;
91 coeff->emission *= density;
92
93 return true;
94 }
95
96 #endif /* __VOLUME__ */
97
volume_color_transmittance(float3 sigma,float t)98 ccl_device float3 volume_color_transmittance(float3 sigma, float t)
99 {
100 return exp3(-sigma * t);
101 }
102
kernel_volume_channel_get(float3 value,int channel)103 ccl_device float kernel_volume_channel_get(float3 value, int channel)
104 {
105 return (channel == 0) ? value.x : ((channel == 1) ? value.y : value.z);
106 }
107
108 #ifdef __VOLUME__
109
volume_stack_step_size(KernelGlobals * kg,ccl_addr_space VolumeStack * stack)110 ccl_device float volume_stack_step_size(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
111 {
112 float step_size = FLT_MAX;
113
114 for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
115 int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
116
117 bool heterogeneous = false;
118
119 if (shader_flag & SD_HETEROGENEOUS_VOLUME) {
120 heterogeneous = true;
121 }
122 else if (shader_flag & SD_NEED_VOLUME_ATTRIBUTES) {
123 /* We want to render world or objects without any volume grids
124 * as homogeneous, but can only verify this at run-time since other
125 * heterogeneous volume objects may be using the same shader. */
126 int object = stack[i].object;
127 if (object != OBJECT_NONE) {
128 int object_flag = kernel_tex_fetch(__object_flag, object);
129 if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
130 heterogeneous = true;
131 }
132 }
133 }
134
135 if (heterogeneous) {
136 float object_step_size = object_volume_step_size(kg, stack[i].object);
137 object_step_size *= kernel_data.integrator.volume_step_rate;
138 step_size = fminf(object_step_size, step_size);
139 }
140 }
141
142 return step_size;
143 }
144
volume_stack_sampling_method(KernelGlobals * kg,VolumeStack * stack)145 ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack)
146 {
147 if (kernel_data.integrator.num_all_lights == 0)
148 return 0;
149
150 int method = -1;
151
152 for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
153 int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
154
155 if (shader_flag & SD_VOLUME_MIS) {
156 return SD_VOLUME_MIS;
157 }
158 else if (shader_flag & SD_VOLUME_EQUIANGULAR) {
159 if (method == 0)
160 return SD_VOLUME_MIS;
161
162 method = SD_VOLUME_EQUIANGULAR;
163 }
164 else {
165 if (method == SD_VOLUME_EQUIANGULAR)
166 return SD_VOLUME_MIS;
167
168 method = 0;
169 }
170 }
171
172 return method;
173 }
174
kernel_volume_step_init(KernelGlobals * kg,ccl_addr_space PathState * state,const float object_step_size,float t,float * step_size,float * step_offset)175 ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg,
176 ccl_addr_space PathState *state,
177 const float object_step_size,
178 float t,
179 float *step_size,
180 float *step_offset)
181 {
182 const int max_steps = kernel_data.integrator.volume_max_steps;
183 float step = min(object_step_size, t);
184
185 /* compute exact steps in advance for malloc */
186 if (t > max_steps * step) {
187 step = t / (float)max_steps;
188 }
189
190 *step_size = step;
191 *step_offset = path_state_rng_1D_hash(kg, state, 0x1e31d8a4) * step;
192 }
193
194 /* Volume Shadows
195 *
196 * These functions are used to attenuate shadow rays to lights. Both absorption
197 * and scattering will block light, represented by the extinction coefficient. */
198
199 /* homogeneous volume: assume shader evaluation at the starts gives
200 * the extinction coefficient for the entire line segment */
kernel_volume_shadow_homogeneous(KernelGlobals * kg,ccl_addr_space PathState * state,Ray * ray,ShaderData * sd,float3 * throughput)201 ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
202 ccl_addr_space PathState *state,
203 Ray *ray,
204 ShaderData *sd,
205 float3 *throughput)
206 {
207 float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
208
209 if (volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
210 *throughput *= volume_color_transmittance(sigma_t, ray->t);
211 }
212
213 /* heterogeneous volume: integrate stepping through the volume until we
214 * reach the end, get absorbed entirely, or run out of iterations */
kernel_volume_shadow_heterogeneous(KernelGlobals * kg,ccl_addr_space PathState * state,Ray * ray,ShaderData * sd,float3 * throughput,const float object_step_size)215 ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
216 ccl_addr_space PathState *state,
217 Ray *ray,
218 ShaderData *sd,
219 float3 *throughput,
220 const float object_step_size)
221 {
222 float3 tp = *throughput;
223 const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
224
225 /* prepare for stepping */
226 int max_steps = kernel_data.integrator.volume_max_steps;
227 float step_offset, step_size;
228 kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
229
230 /* compute extinction at the start */
231 float t = 0.0f;
232
233 float3 sum = make_float3(0.0f, 0.0f, 0.0f);
234
235 for (int i = 0; i < max_steps; i++) {
236 /* advance to new position */
237 float new_t = min(ray->t, (i + 1) * step_size);
238
239 /* use random position inside this segment to sample shader, adjust
240 * for last step that is shorter than other steps. */
241 if (new_t == ray->t) {
242 step_offset *= (new_t - t) / step_size;
243 }
244
245 float3 new_P = ray->P + ray->D * (t + step_offset);
246 float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
247
248 /* compute attenuation over segment */
249 if (volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
250 /* Compute expf() only for every Nth step, to save some calculations
251 * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */
252
253 sum += (-sigma_t * (new_t - t));
254 if ((i & 0x07) == 0) { /* ToDo: Other interval? */
255 tp = *throughput * exp3(sum);
256
257 /* stop if nearly all light is blocked */
258 if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
259 break;
260 }
261 }
262
263 /* stop if at the end of the volume */
264 t = new_t;
265 if (t == ray->t) {
266 /* Update throughput in case we haven't done it above */
267 tp = *throughput * exp3(sum);
268 break;
269 }
270 }
271
272 *throughput = tp;
273 }
274
275 /* get the volume attenuation over line segment defined by ray, with the
276 * assumption that there are no surfaces blocking light between the endpoints */
kernel_volume_shadow(KernelGlobals * kg,ShaderData * shadow_sd,ccl_addr_space PathState * state,Ray * ray,float3 * throughput)277 ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
278 ShaderData *shadow_sd,
279 ccl_addr_space PathState *state,
280 Ray *ray,
281 float3 *throughput)
282 {
283 shader_setup_from_volume(kg, shadow_sd, ray);
284
285 float step_size = volume_stack_step_size(kg, state->volume_stack);
286 if (step_size != FLT_MAX)
287 kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput, step_size);
288 else
289 kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput);
290 }
291
292 #endif /* __VOLUME__ */
293
294 /* Equi-angular sampling as in:
295 * "Importance Sampling Techniques for Path Tracing in Participating Media" */
296
kernel_volume_equiangular_sample(Ray * ray,float3 light_P,float xi,float * pdf)297 ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
298 {
299 float t = ray->t;
300
301 float delta = dot((light_P - ray->P), ray->D);
302 float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
303 if (UNLIKELY(D == 0.0f)) {
304 *pdf = 0.0f;
305 return 0.0f;
306 }
307 float theta_a = -atan2f(delta, D);
308 float theta_b = atan2f(t - delta, D);
309 float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
310 if (UNLIKELY(theta_b == theta_a)) {
311 *pdf = 0.0f;
312 return 0.0f;
313 }
314 *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
315
316 return min(t, delta + t_); /* min is only for float precision errors */
317 }
318
kernel_volume_equiangular_pdf(Ray * ray,float3 light_P,float sample_t)319 ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
320 {
321 float delta = dot((light_P - ray->P), ray->D);
322 float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
323 if (UNLIKELY(D == 0.0f)) {
324 return 0.0f;
325 }
326
327 float t = ray->t;
328 float t_ = sample_t - delta;
329
330 float theta_a = -atan2f(delta, D);
331 float theta_b = atan2f(t - delta, D);
332 if (UNLIKELY(theta_b == theta_a)) {
333 return 0.0f;
334 }
335
336 float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
337
338 return pdf;
339 }
340
341 /* Distance sampling */
342
kernel_volume_distance_sample(float max_t,float3 sigma_t,int channel,float xi,float3 * transmittance,float3 * pdf)343 ccl_device float kernel_volume_distance_sample(
344 float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
345 {
346 /* xi is [0, 1[ so log(0) should never happen, division by zero is
347 * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
348 float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
349 float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
350 float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
351
352 float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t);
353
354 *transmittance = volume_color_transmittance(sigma_t, sample_t);
355 *pdf = safe_divide_color(sigma_t * *transmittance,
356 make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
357
358 /* todo: optimization: when taken together with hit/miss decision,
359 * the full_transmittance cancels out drops out and xi does not
360 * need to be remapped */
361
362 return sample_t;
363 }
364
kernel_volume_distance_pdf(float max_t,float3 sigma_t,float sample_t)365 ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
366 {
367 float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
368 float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
369
370 return safe_divide_color(sigma_t * transmittance,
371 make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
372 }
373
374 /* Emission */
375
kernel_volume_emission_integrate(VolumeShaderCoefficients * coeff,int closure_flag,float3 transmittance,float t)376 ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff,
377 int closure_flag,
378 float3 transmittance,
379 float t)
380 {
381 /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
382 * this goes to E * t as sigma_t goes to zero
383 *
384 * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
385 float3 emission = coeff->emission;
386
387 if (closure_flag & SD_EXTINCTION) {
388 float3 sigma_t = coeff->sigma_t;
389
390 emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t;
391 emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t;
392 emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t;
393 }
394 else
395 emission *= t;
396
397 return emission;
398 }
399
400 /* Volume Path */
401
kernel_volume_sample_channel(float3 albedo,float3 throughput,float rand,float3 * pdf)402 ccl_device int kernel_volume_sample_channel(float3 albedo,
403 float3 throughput,
404 float rand,
405 float3 *pdf)
406 {
407 /* Sample color channel proportional to throughput and single scattering
408 * albedo, to significantly reduce noise with many bounce, following:
409 *
410 * "Practical and Controllable Subsurface Scattering for Production Path
411 * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
412 float3 weights = fabs(throughput * albedo);
413 float sum_weights = weights.x + weights.y + weights.z;
414 float3 weights_pdf;
415
416 if (sum_weights > 0.0f) {
417 weights_pdf = weights / sum_weights;
418 }
419 else {
420 weights_pdf = make_float3(1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f);
421 }
422
423 *pdf = weights_pdf;
424
425 /* OpenCL does not support -> on float3, so don't use pdf->x. */
426 if (rand < weights_pdf.x) {
427 return 0;
428 }
429 else if (rand < weights_pdf.x + weights_pdf.y) {
430 return 1;
431 }
432 else {
433 return 2;
434 }
435 }
436
437 #ifdef __VOLUME__
438
439 /* homogeneous volume: assume shader evaluation at the start gives
440 * the volume shading coefficient for the entire line segment */
441 ccl_device VolumeIntegrateResult
kernel_volume_integrate_homogeneous(KernelGlobals * kg,ccl_addr_space PathState * state,Ray * ray,ShaderData * sd,PathRadiance * L,ccl_addr_space float3 * throughput,bool probalistic_scatter)442 kernel_volume_integrate_homogeneous(KernelGlobals *kg,
443 ccl_addr_space PathState *state,
444 Ray *ray,
445 ShaderData *sd,
446 PathRadiance *L,
447 ccl_addr_space float3 *throughput,
448 bool probalistic_scatter)
449 {
450 VolumeShaderCoefficients coeff ccl_optional_struct_init;
451
452 if (!volume_shader_sample(kg, sd, state, ray->P, &coeff))
453 return VOLUME_PATH_MISSED;
454
455 int closure_flag = sd->flag;
456 float t = ray->t;
457 float3 new_tp;
458
459 # ifdef __VOLUME_SCATTER__
460 /* randomly scatter, and if we do t is shortened */
461 if (closure_flag & SD_SCATTER) {
462 /* Sample channel, use MIS with balance heuristic. */
463 float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
464 float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
465 float3 channel_pdf;
466 int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf);
467
468 /* decide if we will hit or miss */
469 bool scatter = true;
470 float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
471
472 if (probalistic_scatter) {
473 float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
474 float sample_transmittance = expf(-sample_sigma_t * t);
475
476 if (1.0f - xi >= sample_transmittance) {
477 scatter = true;
478
479 /* rescale random number so we can reuse it */
480 xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance);
481 }
482 else
483 scatter = false;
484 }
485
486 if (scatter) {
487 /* scattering */
488 float3 pdf;
489 float3 transmittance;
490 float sample_t;
491
492 /* distance sampling */
493 sample_t = kernel_volume_distance_sample(
494 ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf);
495
496 /* modify pdf for hit/miss decision */
497 if (probalistic_scatter)
498 pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t);
499
500 new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf);
501 t = sample_t;
502 }
503 else {
504 /* no scattering */
505 float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
506 float pdf = dot(channel_pdf, transmittance);
507 new_tp = *throughput * transmittance / pdf;
508 }
509 }
510 else
511 # endif
512 if (closure_flag & SD_EXTINCTION) {
513 /* absorption only, no sampling needed */
514 float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
515 new_tp = *throughput * transmittance;
516 }
517 else {
518 new_tp = *throughput;
519 }
520
521 /* integrate emission attenuated by extinction */
522 if (L && (closure_flag & SD_EMISSION)) {
523 float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t);
524 float3 emission = kernel_volume_emission_integrate(
525 &coeff, closure_flag, transmittance, ray->t);
526 path_radiance_accum_emission(kg, L, state, *throughput, emission);
527 }
528
529 /* modify throughput */
530 if (closure_flag & SD_EXTINCTION) {
531 *throughput = new_tp;
532
533 /* prepare to scatter to new direction */
534 if (t < ray->t) {
535 /* adjust throughput and move to new location */
536 sd->P = ray->P + t * ray->D;
537
538 return VOLUME_PATH_SCATTERED;
539 }
540 }
541
542 return VOLUME_PATH_ATTENUATED;
543 }
544
545 /* heterogeneous volume distance sampling: integrate stepping through the
546 * volume until we reach the end, get absorbed entirely, or run out of
547 * iterations. this does probabilistically scatter or get transmitted through
548 * for path tracing where we don't want to branch. */
549 ccl_device VolumeIntegrateResult
kernel_volume_integrate_heterogeneous_distance(KernelGlobals * kg,ccl_addr_space PathState * state,Ray * ray,ShaderData * sd,PathRadiance * L,ccl_addr_space float3 * throughput,const float object_step_size)550 kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
551 ccl_addr_space PathState *state,
552 Ray *ray,
553 ShaderData *sd,
554 PathRadiance *L,
555 ccl_addr_space float3 *throughput,
556 const float object_step_size)
557 {
558 float3 tp = *throughput;
559 const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
560
561 /* prepare for stepping */
562 int max_steps = kernel_data.integrator.volume_max_steps;
563 float step_offset, step_size;
564 kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
565
566 /* compute coefficients at the start */
567 float t = 0.0f;
568 float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
569
570 /* pick random color channel, we use the Veach one-sample
571 * model with balance heuristic for the channels */
572 float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
573 float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
574 bool has_scatter = false;
575
576 for (int i = 0; i < max_steps; i++) {
577 /* advance to new position */
578 float new_t = min(ray->t, (i + 1) * step_size);
579 float dt = new_t - t;
580
581 /* use random position inside this segment to sample shader,
582 * for last shorter step we remap it to fit within the segment. */
583 if (new_t == ray->t) {
584 step_offset *= (new_t - t) / step_size;
585 }
586
587 float3 new_P = ray->P + ray->D * (t + step_offset);
588 VolumeShaderCoefficients coeff ccl_optional_struct_init;
589
590 /* compute segment */
591 if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
592 int closure_flag = sd->flag;
593 float3 new_tp;
594 float3 transmittance;
595 bool scatter = false;
596
597 /* distance sampling */
598 # ifdef __VOLUME_SCATTER__
599 if ((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) {
600 has_scatter = true;
601
602 /* Sample channel, use MIS with balance heuristic. */
603 float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
604 float3 channel_pdf;
605 int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf);
606
607 /* compute transmittance over full step */
608 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
609
610 /* decide if we will scatter or continue */
611 float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
612
613 if (1.0f - xi >= sample_transmittance) {
614 /* compute sampling distance */
615 float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel);
616 float new_dt = -logf(1.0f - xi) / sample_sigma_t;
617 new_t = t + new_dt;
618
619 /* transmittance and pdf */
620 float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
621 float3 pdf = coeff.sigma_t * new_transmittance;
622
623 /* throughput */
624 new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf);
625 scatter = true;
626 }
627 else {
628 /* throughput */
629 float pdf = dot(channel_pdf, transmittance);
630 new_tp = tp * transmittance / pdf;
631
632 /* remap xi so we can reuse it and keep thing stratified */
633 xi = 1.0f - (1.0f - xi) / sample_transmittance;
634 }
635 }
636 else
637 # endif
638 if (closure_flag & SD_EXTINCTION) {
639 /* absorption only, no sampling needed */
640 transmittance = volume_color_transmittance(coeff.sigma_t, dt);
641 new_tp = tp * transmittance;
642 }
643 else {
644 transmittance = make_float3(0.0f, 0.0f, 0.0f);
645 new_tp = tp;
646 }
647
648 /* integrate emission attenuated by absorption */
649 if (L && (closure_flag & SD_EMISSION)) {
650 float3 emission = kernel_volume_emission_integrate(
651 &coeff, closure_flag, transmittance, dt);
652 path_radiance_accum_emission(kg, L, state, tp, emission);
653 }
654
655 /* modify throughput */
656 if (closure_flag & SD_EXTINCTION) {
657 tp = new_tp;
658
659 /* stop if nearly all light blocked */
660 if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) {
661 tp = make_float3(0.0f, 0.0f, 0.0f);
662 break;
663 }
664 }
665
666 /* prepare to scatter to new direction */
667 if (scatter) {
668 /* adjust throughput and move to new location */
669 sd->P = ray->P + new_t * ray->D;
670 *throughput = tp;
671
672 return VOLUME_PATH_SCATTERED;
673 }
674 else {
675 /* accumulate transmittance */
676 accum_transmittance *= transmittance;
677 }
678 }
679
680 /* stop if at the end of the volume */
681 t = new_t;
682 if (t == ray->t)
683 break;
684 }
685
686 *throughput = tp;
687
688 return VOLUME_PATH_ATTENUATED;
689 }
690
691 /* get the volume attenuation and emission over line segment defined by
692 * ray, with the assumption that there are no surfaces blocking light
693 * between the endpoints. distance sampling is used to decide if we will
694 * scatter or not. */
695 ccl_device_noinline_cpu VolumeIntegrateResult
kernel_volume_integrate(KernelGlobals * kg,ccl_addr_space PathState * state,ShaderData * sd,Ray * ray,PathRadiance * L,ccl_addr_space float3 * throughput,float step_size)696 kernel_volume_integrate(KernelGlobals *kg,
697 ccl_addr_space PathState *state,
698 ShaderData *sd,
699 Ray *ray,
700 PathRadiance *L,
701 ccl_addr_space float3 *throughput,
702 float step_size)
703 {
704 shader_setup_from_volume(kg, sd, ray);
705
706 if (step_size != FLT_MAX)
707 return kernel_volume_integrate_heterogeneous_distance(
708 kg, state, ray, sd, L, throughput, step_size);
709 else
710 return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true);
711 }
712
713 # ifndef __SPLIT_KERNEL__
714 /* Decoupled Volume Sampling
715 *
716 * VolumeSegment is list of coefficients and transmittance stored at all steps
717 * through a volume. This can then later be used for decoupled sampling as in:
718 * "Importance Sampling Techniques for Path Tracing in Participating Media"
719 *
720 * On the GPU this is only supported (but currently not enabled)
721 * for homogeneous volumes (1 step), due to
722 * no support for malloc/free and too much stack usage with a fix size array. */
723
724 typedef struct VolumeStep {
725 float3 sigma_s; /* scatter coefficient */
726 float3 sigma_t; /* extinction coefficient */
727 float3 accum_transmittance; /* accumulated transmittance including this step */
728 float3 cdf_distance; /* cumulative density function for distance sampling */
729 float t; /* distance at end of this step */
730 float shade_t; /* jittered distance where shading was done in step */
731 int closure_flag; /* shader evaluation closure flags */
732 } VolumeStep;
733
734 typedef struct VolumeSegment {
735 VolumeStep stack_step; /* stack storage for homogeneous step, to avoid malloc */
736 VolumeStep *steps; /* recorded steps */
737 int numsteps; /* number of steps */
738 int closure_flag; /* accumulated closure flags from all steps */
739
740 float3 accum_emission; /* accumulated emission at end of segment */
741 float3 accum_transmittance; /* accumulated transmittance at end of segment */
742 float3 accum_albedo; /* accumulated average albedo over segment */
743
744 int sampling_method; /* volume sampling method */
745 } VolumeSegment;
746
747 /* record volume steps to the end of the volume.
748 *
749 * it would be nice if we could only record up to the point that we need to scatter,
750 * but the entire segment is needed to do always scattering, rather than probabilistically
751 * hitting or missing the volume. if we don't know the transmittance at the end of the
752 * volume we can't generate stratified distance samples up to that transmittance */
753 # ifdef __VOLUME_DECOUPLED__
kernel_volume_decoupled_record(KernelGlobals * kg,PathState * state,Ray * ray,ShaderData * sd,VolumeSegment * segment,const float object_step_size)754 ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg,
755 PathState *state,
756 Ray *ray,
757 ShaderData *sd,
758 VolumeSegment *segment,
759 const float object_step_size)
760 {
761 const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
762
763 /* prepare for volume stepping */
764 int max_steps;
765 float step_size, step_offset;
766
767 if (object_step_size != FLT_MAX) {
768 max_steps = kernel_data.integrator.volume_max_steps;
769 kernel_volume_step_init(kg, state, object_step_size, ray->t, &step_size, &step_offset);
770
771 # ifdef __KERNEL_CPU__
772 /* NOTE: For the branched path tracing it's possible to have direct
773 * and indirect light integration both having volume segments allocated.
774 * We detect this using index in the pre-allocated memory. Currently we
775 * only support two segments allocated at a time, if more needed some
776 * modifications to the KernelGlobals will be needed.
777 *
778 * This gives us restrictions that decoupled record should only happen
779 * in the stack manner, meaning if there's subsequent call of decoupled
780 * record it'll need to free memory before its caller frees memory.
781 */
782 const int index = kg->decoupled_volume_steps_index;
783 assert(index < sizeof(kg->decoupled_volume_steps) / sizeof(*kg->decoupled_volume_steps));
784 if (kg->decoupled_volume_steps[index] == NULL) {
785 kg->decoupled_volume_steps[index] = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps);
786 }
787 segment->steps = kg->decoupled_volume_steps[index];
788 ++kg->decoupled_volume_steps_index;
789 # else
790 segment->steps = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps);
791 # endif
792 }
793 else {
794 max_steps = 1;
795 step_size = ray->t;
796 step_offset = 0.0f;
797 segment->steps = &segment->stack_step;
798 }
799
800 /* init accumulation variables */
801 float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
802 float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
803 float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f);
804 float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
805 float t = 0.0f;
806
807 segment->numsteps = 0;
808 segment->closure_flag = 0;
809 bool is_last_step_empty = false;
810
811 VolumeStep *step = segment->steps;
812
813 for (int i = 0; i < max_steps; i++, step++) {
814 /* advance to new position */
815 float new_t = min(ray->t, (i + 1) * step_size);
816 float dt = new_t - t;
817
818 /* use random position inside this segment to sample shader,
819 * for last shorter step we remap it to fit within the segment. */
820 if (new_t == ray->t) {
821 step_offset *= (new_t - t) / step_size;
822 }
823
824 float3 new_P = ray->P + ray->D * (t + step_offset);
825 VolumeShaderCoefficients coeff ccl_optional_struct_init;
826
827 /* compute segment */
828 if (volume_shader_sample(kg, sd, state, new_P, &coeff)) {
829 int closure_flag = sd->flag;
830 float3 sigma_t = coeff.sigma_t;
831
832 /* compute average albedo for channel sampling */
833 if (closure_flag & SD_SCATTER) {
834 accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t);
835 }
836
837 /* compute accumulated transmittance */
838 float3 transmittance = volume_color_transmittance(sigma_t, dt);
839
840 /* compute emission attenuated by absorption */
841 if (closure_flag & SD_EMISSION) {
842 float3 emission = kernel_volume_emission_integrate(
843 &coeff, closure_flag, transmittance, dt);
844 accum_emission += accum_transmittance * emission;
845 }
846
847 accum_transmittance *= transmittance;
848
849 /* compute pdf for distance sampling */
850 float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
851 cdf_distance = cdf_distance + pdf_distance;
852
853 /* write step data */
854 step->sigma_t = sigma_t;
855 step->sigma_s = coeff.sigma_s;
856 step->closure_flag = closure_flag;
857
858 segment->closure_flag |= closure_flag;
859
860 is_last_step_empty = false;
861 segment->numsteps++;
862 }
863 else {
864 if (is_last_step_empty) {
865 /* consecutive empty step, merge */
866 step--;
867 }
868 else {
869 /* store empty step */
870 step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
871 step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
872 step->closure_flag = 0;
873
874 segment->numsteps++;
875 is_last_step_empty = true;
876 }
877 }
878
879 step->accum_transmittance = accum_transmittance;
880 step->cdf_distance = cdf_distance;
881 step->t = new_t;
882 step->shade_t = t + step_offset;
883
884 /* stop if at the end of the volume */
885 t = new_t;
886 if (t == ray->t)
887 break;
888
889 /* stop if nearly all light blocked */
890 if (accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps &&
891 accum_transmittance.z < tp_eps)
892 break;
893 }
894
895 /* store total emission and transmittance */
896 segment->accum_emission = accum_emission;
897 segment->accum_transmittance = accum_transmittance;
898 segment->accum_albedo = accum_albedo;
899
900 /* normalize cumulative density function for distance sampling */
901 VolumeStep *last_step = segment->steps + segment->numsteps - 1;
902
903 if (!is_zero(last_step->cdf_distance)) {
904 VolumeStep *step = &segment->steps[0];
905 int numsteps = segment->numsteps;
906 float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
907
908 for (int i = 0; i < numsteps; i++, step++)
909 step->cdf_distance *= inv_cdf_distance_sum;
910 }
911 }
912
kernel_volume_decoupled_free(KernelGlobals * kg,VolumeSegment * segment)913 ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
914 {
915 if (segment->steps != &segment->stack_step) {
916 # ifdef __KERNEL_CPU__
917 /* NOTE: We only allow free last allocated segment.
918 * No random order of alloc/free is supported.
919 */
920 assert(kg->decoupled_volume_steps_index > 0);
921 assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]);
922 --kg->decoupled_volume_steps_index;
923 # else
924 free(segment->steps);
925 # endif
926 }
927 }
928 # endif /* __VOLUME_DECOUPLED__ */
929
930 /* scattering for homogeneous and heterogeneous volumes, using decoupled ray
931 * marching.
932 *
933 * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */
kernel_volume_decoupled_scatter(KernelGlobals * kg,PathState * state,Ray * ray,ShaderData * sd,float3 * throughput,float rphase,float rscatter,const VolumeSegment * segment,const float3 * light_P,bool probalistic_scatter)934 ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(KernelGlobals *kg,
935 PathState *state,
936 Ray *ray,
937 ShaderData *sd,
938 float3 *throughput,
939 float rphase,
940 float rscatter,
941 const VolumeSegment *segment,
942 const float3 *light_P,
943 bool probalistic_scatter)
944 {
945 kernel_assert(segment->closure_flag & SD_SCATTER);
946
947 /* Sample color channel, use MIS with balance heuristic. */
948 float3 channel_pdf;
949 int channel = kernel_volume_sample_channel(
950 segment->accum_albedo, *throughput, rphase, &channel_pdf);
951
952 float xi = rscatter;
953
954 /* probabilistic scattering decision based on transmittance */
955 if (probalistic_scatter) {
956 float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel);
957
958 if (1.0f - xi >= sample_transmittance) {
959 /* rescale random number so we can reuse it */
960 xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance);
961 }
962 else {
963 *throughput /= sample_transmittance;
964 return VOLUME_PATH_MISSED;
965 }
966 }
967
968 VolumeStep *step;
969 float3 transmittance;
970 float pdf, sample_t;
971 float mis_weight = 1.0f;
972 bool distance_sample = true;
973 bool use_mis = false;
974
975 if (segment->sampling_method && light_P) {
976 if (segment->sampling_method == SD_VOLUME_MIS) {
977 /* multiple importance sample: randomly pick between
978 * equiangular and distance sampling strategy */
979 if (xi < 0.5f) {
980 xi *= 2.0f;
981 }
982 else {
983 xi = (xi - 0.5f) * 2.0f;
984 distance_sample = false;
985 }
986
987 use_mis = true;
988 }
989 else {
990 /* only equiangular sampling */
991 distance_sample = false;
992 }
993 }
994
995 /* distance sampling */
996 if (distance_sample) {
997 /* find step in cdf */
998 step = segment->steps;
999
1000 float prev_t = 0.0f;
1001 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
1002
1003 if (segment->numsteps > 1) {
1004 float prev_cdf = 0.0f;
1005 float step_cdf = 1.0f;
1006 float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
1007
1008 for (int i = 0;; i++, step++) {
1009 /* todo: optimize using binary search */
1010 step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
1011
1012 if (xi < step_cdf || i == segment->numsteps - 1)
1013 break;
1014
1015 prev_cdf = step_cdf;
1016 prev_t = step->t;
1017 prev_cdf_distance = step->cdf_distance;
1018 }
1019
1020 /* remap xi so we can reuse it */
1021 xi = (xi - prev_cdf) / (step_cdf - prev_cdf);
1022
1023 /* pdf for picking step */
1024 step_pdf_distance = step->cdf_distance - prev_cdf_distance;
1025 }
1026
1027 /* determine range in which we will sample */
1028 float step_t = step->t - prev_t;
1029
1030 /* sample distance and compute transmittance */
1031 float3 distance_pdf;
1032 sample_t = prev_t + kernel_volume_distance_sample(
1033 step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
1034
1035 /* modify pdf for hit/miss decision */
1036 if (probalistic_scatter)
1037 distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance;
1038
1039 pdf = dot(channel_pdf, distance_pdf * step_pdf_distance);
1040
1041 /* multiple importance sampling */
1042 if (use_mis) {
1043 float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t);
1044 mis_weight = 2.0f * power_heuristic(pdf, equi_pdf);
1045 }
1046 }
1047 /* equi-angular sampling */
1048 else {
1049 /* sample distance */
1050 sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf);
1051
1052 /* find step in which sampled distance is located */
1053 step = segment->steps;
1054
1055 float prev_t = 0.0f;
1056 float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f);
1057
1058 if (segment->numsteps > 1) {
1059 float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
1060
1061 int numsteps = segment->numsteps;
1062 int high = numsteps - 1;
1063 int low = 0;
1064 int mid;
1065
1066 while (low < high) {
1067 mid = (low + high) >> 1;
1068
1069 if (sample_t < step[mid].t)
1070 high = mid;
1071 else if (sample_t >= step[mid + 1].t)
1072 low = mid + 1;
1073 else {
1074 /* found our interval in step[mid] .. step[mid+1] */
1075 prev_t = step[mid].t;
1076 prev_cdf_distance = step[mid].cdf_distance;
1077 step += mid + 1;
1078 break;
1079 }
1080 }
1081
1082 if (low >= numsteps - 1) {
1083 prev_t = step[numsteps - 1].t;
1084 prev_cdf_distance = step[numsteps - 1].cdf_distance;
1085 step += numsteps - 1;
1086 }
1087
1088 /* pdf for picking step with distance sampling */
1089 step_pdf_distance = step->cdf_distance - prev_cdf_distance;
1090 }
1091
1092 /* determine range in which we will sample */
1093 float step_t = step->t - prev_t;
1094 float step_sample_t = sample_t - prev_t;
1095
1096 /* compute transmittance */
1097 transmittance = volume_color_transmittance(step->sigma_t, step_sample_t);
1098
1099 /* multiple importance sampling */
1100 if (use_mis) {
1101 float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t);
1102 float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance);
1103 mis_weight = 2.0f * power_heuristic(pdf, distance_pdf);
1104 }
1105 }
1106 if (sample_t < 0.0f || pdf == 0.0f) {
1107 return VOLUME_PATH_MISSED;
1108 }
1109
1110 /* compute transmittance up to this step */
1111 if (step != segment->steps)
1112 transmittance *= (step - 1)->accum_transmittance;
1113
1114 /* modify throughput */
1115 *throughput *= step->sigma_s * transmittance * (mis_weight / pdf);
1116
1117 /* evaluate shader to create closures at shading point */
1118 if (segment->numsteps > 1) {
1119 sd->P = ray->P + step->shade_t * ray->D;
1120
1121 VolumeShaderCoefficients coeff;
1122 volume_shader_sample(kg, sd, state, sd->P, &coeff);
1123 }
1124
1125 /* move to new position */
1126 sd->P = ray->P + sample_t * ray->D;
1127
1128 return VOLUME_PATH_SCATTERED;
1129 }
1130 # endif /* __SPLIT_KERNEL */
1131
1132 /* decide if we need to use decoupled or not */
kernel_volume_use_decoupled(KernelGlobals * kg,bool heterogeneous,bool direct,int sampling_method)1133 ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg,
1134 bool heterogeneous,
1135 bool direct,
1136 int sampling_method)
1137 {
1138 /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
1139 * which also means equiangular and multiple importance sampling is not
1140 * support for that case */
1141 if (!kernel_data.integrator.volume_decoupled)
1142 return false;
1143
1144 # ifdef __KERNEL_GPU__
1145 if (heterogeneous)
1146 return false;
1147 # endif
1148
1149 /* equiangular and multiple importance sampling only implemented for decoupled */
1150 if (sampling_method != 0)
1151 return true;
1152
1153 /* for all light sampling use decoupled, reusing shader evaluations is
1154 * typically faster in that case */
1155 if (direct)
1156 return kernel_data.integrator.sample_all_lights_direct;
1157 else
1158 return kernel_data.integrator.sample_all_lights_indirect;
1159 }
1160
1161 /* Volume Stack
1162 *
1163 * This is an array of object/shared ID's that the current segment of the path
1164 * is inside of. */
1165
kernel_volume_stack_init(KernelGlobals * kg,ShaderData * stack_sd,ccl_addr_space const PathState * state,ccl_addr_space const Ray * ray,ccl_addr_space VolumeStack * stack)1166 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
1167 ShaderData *stack_sd,
1168 ccl_addr_space const PathState *state,
1169 ccl_addr_space const Ray *ray,
1170 ccl_addr_space VolumeStack *stack)
1171 {
1172 /* NULL ray happens in the baker, does it need proper initialization of
1173 * camera in volume?
1174 */
1175 if (!kernel_data.cam.is_inside_volume || ray == NULL) {
1176 /* Camera is guaranteed to be in the air, only take background volume
1177 * into account in this case.
1178 */
1179 if (kernel_data.background.volume_shader != SHADER_NONE) {
1180 stack[0].shader = kernel_data.background.volume_shader;
1181 stack[0].object = PRIM_NONE;
1182 stack[1].shader = SHADER_NONE;
1183 }
1184 else {
1185 stack[0].shader = SHADER_NONE;
1186 }
1187 return;
1188 }
1189
1190 kernel_assert(state->flag & PATH_RAY_CAMERA);
1191
1192 Ray volume_ray = *ray;
1193 volume_ray.t = FLT_MAX;
1194
1195 const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY);
1196 int stack_index = 0, enclosed_index = 0;
1197
1198 # ifdef __VOLUME_RECORD_ALL__
1199 Intersection hits[2 * VOLUME_STACK_SIZE + 1];
1200 uint num_hits = scene_intersect_volume_all(
1201 kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, visibility);
1202 if (num_hits > 0) {
1203 int enclosed_volumes[VOLUME_STACK_SIZE];
1204 Intersection *isect = hits;
1205
1206 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1207
1208 for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
1209 shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1210 if (stack_sd->flag & SD_BACKFACING) {
1211 bool need_add = true;
1212 for (int i = 0; i < enclosed_index && need_add; ++i) {
1213 /* If ray exited the volume and never entered to that volume
1214 * it means that camera is inside such a volume.
1215 */
1216 if (enclosed_volumes[i] == stack_sd->object) {
1217 need_add = false;
1218 }
1219 }
1220 for (int i = 0; i < stack_index && need_add; ++i) {
1221 /* Don't add intersections twice. */
1222 if (stack[i].object == stack_sd->object) {
1223 need_add = false;
1224 break;
1225 }
1226 }
1227 if (need_add && stack_index < VOLUME_STACK_SIZE - 1) {
1228 stack[stack_index].object = stack_sd->object;
1229 stack[stack_index].shader = stack_sd->shader;
1230 ++stack_index;
1231 }
1232 }
1233 else {
1234 /* If ray from camera enters the volume, this volume shouldn't
1235 * be added to the stack on exit.
1236 */
1237 enclosed_volumes[enclosed_index++] = stack_sd->object;
1238 }
1239 }
1240 }
1241 # else
1242 int enclosed_volumes[VOLUME_STACK_SIZE];
1243 int step = 0;
1244
1245 while (stack_index < VOLUME_STACK_SIZE - 1 && enclosed_index < VOLUME_STACK_SIZE - 1 &&
1246 step < 2 * VOLUME_STACK_SIZE) {
1247 Intersection isect;
1248 if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
1249 break;
1250 }
1251
1252 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1253 if (stack_sd->flag & SD_BACKFACING) {
1254 /* If ray exited the volume and never entered to that volume
1255 * it means that camera is inside such a volume.
1256 */
1257 bool need_add = true;
1258 for (int i = 0; i < enclosed_index && need_add; ++i) {
1259 /* If ray exited the volume and never entered to that volume
1260 * it means that camera is inside such a volume.
1261 */
1262 if (enclosed_volumes[i] == stack_sd->object) {
1263 need_add = false;
1264 }
1265 }
1266 for (int i = 0; i < stack_index && need_add; ++i) {
1267 /* Don't add intersections twice. */
1268 if (stack[i].object == stack_sd->object) {
1269 need_add = false;
1270 break;
1271 }
1272 }
1273 if (need_add) {
1274 stack[stack_index].object = stack_sd->object;
1275 stack[stack_index].shader = stack_sd->shader;
1276 ++stack_index;
1277 }
1278 }
1279 else {
1280 /* If ray from camera enters the volume, this volume shouldn't
1281 * be added to the stack on exit.
1282 */
1283 enclosed_volumes[enclosed_index++] = stack_sd->object;
1284 }
1285
1286 /* Move ray forward. */
1287 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1288 ++step;
1289 }
1290 # endif
1291 /* stack_index of 0 means quick checks outside of the kernel gave false
1292 * positive, nothing to worry about, just we've wasted quite a few of
1293 * ticks just to come into conclusion that camera is in the air.
1294 *
1295 * In this case we're doing the same above -- check whether background has
1296 * volume.
1297 */
1298 if (stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
1299 stack[0].shader = kernel_data.background.volume_shader;
1300 stack[0].object = OBJECT_NONE;
1301 stack[1].shader = SHADER_NONE;
1302 }
1303 else {
1304 stack[stack_index].shader = SHADER_NONE;
1305 }
1306 }
1307
kernel_volume_stack_enter_exit(KernelGlobals * kg,ShaderData * sd,ccl_addr_space VolumeStack * stack)1308 ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg,
1309 ShaderData *sd,
1310 ccl_addr_space VolumeStack *stack)
1311 {
1312 /* todo: we should have some way for objects to indicate if they want the
1313 * world shader to work inside them. excluding it by default is problematic
1314 * because non-volume objects can't be assumed to be closed manifolds */
1315
1316 if (!(sd->flag & SD_HAS_VOLUME))
1317 return;
1318
1319 if (sd->flag & SD_BACKFACING) {
1320 /* exit volume object: remove from stack */
1321 for (int i = 0; stack[i].shader != SHADER_NONE; i++) {
1322 if (stack[i].object == sd->object) {
1323 /* shift back next stack entries */
1324 do {
1325 stack[i] = stack[i + 1];
1326 i++;
1327 } while (stack[i].shader != SHADER_NONE);
1328
1329 return;
1330 }
1331 }
1332 }
1333 else {
1334 /* enter volume object: add to stack */
1335 int i;
1336
1337 for (i = 0; stack[i].shader != SHADER_NONE; i++) {
1338 /* already in the stack? then we have nothing to do */
1339 if (stack[i].object == sd->object)
1340 return;
1341 }
1342
1343 /* if we exceed the stack limit, ignore */
1344 if (i >= VOLUME_STACK_SIZE - 1)
1345 return;
1346
1347 /* add to the end of the stack */
1348 stack[i].shader = sd->shader;
1349 stack[i].object = sd->object;
1350 stack[i + 1].shader = SHADER_NONE;
1351 }
1352 }
1353
1354 # ifdef __SUBSURFACE__
kernel_volume_stack_update_for_subsurface(KernelGlobals * kg,ShaderData * stack_sd,Ray * ray,ccl_addr_space VolumeStack * stack)1355 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
1356 ShaderData *stack_sd,
1357 Ray *ray,
1358 ccl_addr_space VolumeStack *stack)
1359 {
1360 kernel_assert(kernel_data.integrator.use_volumes);
1361
1362 Ray volume_ray = *ray;
1363
1364 # ifdef __VOLUME_RECORD_ALL__
1365 Intersection hits[2 * VOLUME_STACK_SIZE + 1];
1366 uint num_hits = scene_intersect_volume_all(
1367 kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, PATH_RAY_ALL_VISIBILITY);
1368 if (num_hits > 0) {
1369 Intersection *isect = hits;
1370
1371 qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
1372
1373 for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
1374 shader_setup_from_ray(kg, stack_sd, isect, &volume_ray);
1375 kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1376 }
1377 }
1378 # else
1379 Intersection isect;
1380 int step = 0;
1381 float3 Pend = ray->P + ray->D * ray->t;
1382 while (step < 2 * VOLUME_STACK_SIZE &&
1383 scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
1384 shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray);
1385 kernel_volume_stack_enter_exit(kg, stack_sd, stack);
1386
1387 /* Move ray forward. */
1388 volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
1389 if (volume_ray.t != FLT_MAX) {
1390 volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t);
1391 }
1392 ++step;
1393 }
1394 # endif
1395 }
1396 # endif
1397
1398 /* Clean stack after the last bounce.
1399 *
1400 * It is expected that all volumes are closed manifolds, so at the time when ray
1401 * hits nothing (for example, it is a last bounce which goes to environment) the
1402 * only expected volume in the stack is the world's one. All the rest volume
1403 * entries should have been exited already.
1404 *
1405 * This isn't always true because of ray intersection precision issues, which
1406 * could lead us to an infinite non-world volume in the stack, causing render
1407 * artifacts.
1408 *
1409 * Use this function after the last bounce to get rid of all volumes apart from
1410 * the world's one after the last bounce to avoid render artifacts.
1411 */
kernel_volume_clean_stack(KernelGlobals * kg,ccl_addr_space VolumeStack * volume_stack)1412 ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
1413 ccl_addr_space VolumeStack *volume_stack)
1414 {
1415 if (kernel_data.background.volume_shader != SHADER_NONE) {
1416 /* Keep the world's volume in stack. */
1417 volume_stack[1].shader = SHADER_NONE;
1418 }
1419 else {
1420 volume_stack[0].shader = SHADER_NONE;
1421 }
1422 }
1423
1424 #endif /* __VOLUME__ */
1425
1426 CCL_NAMESPACE_END
1427