1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 CCL_NAMESPACE_BEGIN
18 
19 /* BSSRDF using disk based importance sampling.
20  *
21  * BSSRDF Importance Sampling, SIGGRAPH 2013
22  * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
23  */
24 
25 ccl_device_inline float3
subsurface_scatter_eval(ShaderData * sd,const ShaderClosure * sc,float disk_r,float r,bool all)26 subsurface_scatter_eval(ShaderData *sd, const ShaderClosure *sc, float disk_r, float r, bool all)
27 {
28   /* this is the veach one-sample model with balance heuristic, some pdf
29    * factors drop out when using balance heuristic weighting */
30   float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
31   float pdf_sum = 0.0f;
32   float sample_weight_inv = 0.0f;
33 
34   if (!all) {
35     float sample_weight_sum = 0.0f;
36 
37     for (int i = 0; i < sd->num_closure; i++) {
38       sc = &sd->closure[i];
39 
40       if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
41         sample_weight_sum += sc->sample_weight;
42       }
43     }
44 
45     sample_weight_inv = 1.0f / sample_weight_sum;
46   }
47 
48   for (int i = 0; i < sd->num_closure; i++) {
49     sc = &sd->closure[i];
50 
51     if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
52       /* in case of branched path integrate we sample all bssrdf's once,
53        * for path trace we pick one, so adjust pdf for that */
54       float sample_weight = (all) ? 1.0f : sc->sample_weight * sample_weight_inv;
55 
56       /* compute pdf */
57       float3 eval = bssrdf_eval(sc, r);
58       float pdf = bssrdf_pdf(sc, disk_r);
59 
60       eval_sum += sc->weight * eval;
61       pdf_sum += sample_weight * pdf;
62     }
63   }
64 
65   return (pdf_sum > 0.0f) ? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
66 }
67 
68 /* replace closures with a single diffuse bsdf closure after scatter step */
subsurface_scatter_setup_diffuse_bsdf(KernelGlobals * kg,ShaderData * sd,ClosureType type,float roughness,float3 weight,float3 N)69 ccl_device void subsurface_scatter_setup_diffuse_bsdf(
70     KernelGlobals *kg, ShaderData *sd, ClosureType type, float roughness, float3 weight, float3 N)
71 {
72   sd->flag &= ~SD_CLOSURE_FLAGS;
73   sd->num_closure = 0;
74   sd->num_closure_left = kernel_data.integrator.max_closures;
75 
76 #ifdef __PRINCIPLED__
77   if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
78     PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
79         sd, sizeof(PrincipledDiffuseBsdf), weight);
80 
81     if (bsdf) {
82       bsdf->N = N;
83       bsdf->roughness = roughness;
84       sd->flag |= bsdf_principled_diffuse_setup(bsdf);
85 
86       /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
87        * can recognize it as not being a regular Disney principled diffuse closure */
88       bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
89     }
90   }
91   else if (CLOSURE_IS_BSDF_BSSRDF(type) || CLOSURE_IS_BSSRDF(type))
92 #endif /* __PRINCIPLED__ */
93   {
94     DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
95 
96     if (bsdf) {
97       bsdf->N = N;
98       sd->flag |= bsdf_diffuse_setup(bsdf);
99 
100       /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
101        * can recognize it as not being a regular diffuse closure */
102       bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
103     }
104   }
105 }
106 
107 /* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
subsurface_color_pow(float3 color,float exponent)108 ccl_device float3 subsurface_color_pow(float3 color, float exponent)
109 {
110   color = max(color, make_float3(0.0f, 0.0f, 0.0f));
111 
112   if (exponent == 1.0f) {
113     /* nothing to do */
114   }
115   else if (exponent == 0.5f) {
116     color.x = sqrtf(color.x);
117     color.y = sqrtf(color.y);
118     color.z = sqrtf(color.z);
119   }
120   else {
121     color.x = powf(color.x, exponent);
122     color.y = powf(color.y, exponent);
123     color.z = powf(color.z, exponent);
124   }
125 
126   return color;
127 }
128 
subsurface_color_bump_blur(KernelGlobals * kg,ShaderData * sd,ccl_addr_space PathState * state,float3 * eval,float3 * N)129 ccl_device void subsurface_color_bump_blur(
130     KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float3 *eval, float3 *N)
131 {
132   /* average color and texture blur at outgoing point */
133   float texture_blur;
134   float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
135 
136   /* do we have bump mapping? */
137   bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
138 
139   if (bump || texture_blur > 0.0f) {
140     /* average color and normal at incoming point */
141     shader_eval_surface(kg, sd, state, NULL, state->flag);
142     float3 in_color = shader_bssrdf_sum(sd, (bump) ? N : NULL, NULL);
143 
144     /* we simply divide out the average color and multiply with the average
145      * of the other one. we could try to do this per closure but it's quite
146      * tricky to match closures between shader evaluations, their number and
147      * order may change, this is simpler */
148     if (texture_blur > 0.0f) {
149       out_color = subsurface_color_pow(out_color, texture_blur);
150       in_color = subsurface_color_pow(in_color, texture_blur);
151 
152       *eval *= safe_divide_color(in_color, out_color);
153     }
154   }
155 }
156 
157 /* Subsurface scattering step, from a point on the surface to other
158  * nearby points on the same object.
159  */
subsurface_scatter_disk(KernelGlobals * kg,LocalIntersection * ss_isect,ShaderData * sd,const ShaderClosure * sc,uint * lcg_state,float disk_u,float disk_v,bool all)160 ccl_device_inline int subsurface_scatter_disk(KernelGlobals *kg,
161                                               LocalIntersection *ss_isect,
162                                               ShaderData *sd,
163                                               const ShaderClosure *sc,
164                                               uint *lcg_state,
165                                               float disk_u,
166                                               float disk_v,
167                                               bool all)
168 {
169   /* pick random axis in local frame and point on disk */
170   float3 disk_N, disk_T, disk_B;
171   float pick_pdf_N, pick_pdf_T, pick_pdf_B;
172 
173   disk_N = sd->Ng;
174   make_orthonormals(disk_N, &disk_T, &disk_B);
175 
176   if (disk_v < 0.5f) {
177     pick_pdf_N = 0.5f;
178     pick_pdf_T = 0.25f;
179     pick_pdf_B = 0.25f;
180     disk_v *= 2.0f;
181   }
182   else if (disk_v < 0.75f) {
183     float3 tmp = disk_N;
184     disk_N = disk_T;
185     disk_T = tmp;
186     pick_pdf_N = 0.25f;
187     pick_pdf_T = 0.5f;
188     pick_pdf_B = 0.25f;
189     disk_v = (disk_v - 0.5f) * 4.0f;
190   }
191   else {
192     float3 tmp = disk_N;
193     disk_N = disk_B;
194     disk_B = tmp;
195     pick_pdf_N = 0.25f;
196     pick_pdf_T = 0.25f;
197     pick_pdf_B = 0.5f;
198     disk_v = (disk_v - 0.75f) * 4.0f;
199   }
200 
201   /* sample point on disk */
202   float phi = M_2PI_F * disk_v;
203   float disk_height, disk_r;
204 
205   bssrdf_sample(sc, disk_u, &disk_r, &disk_height);
206 
207   float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
208 
209   /* create ray */
210 #ifdef __SPLIT_KERNEL__
211   Ray ray_object = ss_isect->ray;
212   Ray *ray = &ray_object;
213 #else
214   Ray *ray = &ss_isect->ray;
215 #endif
216   ray->P = sd->P + disk_N * disk_height + disk_P;
217   ray->D = -disk_N;
218   ray->t = 2.0f * disk_height;
219   ray->dP = sd->dP;
220   ray->dD = differential3_zero();
221   ray->time = sd->time;
222 
223   /* intersect with the same object. if multiple intersections are found it
224    * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
225   scene_intersect_local(kg, ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
226   int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
227 
228   for (int hit = 0; hit < num_eval_hits; hit++) {
229     /* Quickly retrieve P and Ng without setting up ShaderData. */
230     float3 hit_P;
231     if (sd->type & PRIMITIVE_TRIANGLE) {
232       hit_P = triangle_refine_local(kg, sd, &ss_isect->hits[hit], ray);
233     }
234 #ifdef __OBJECT_MOTION__
235     else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
236       float3 verts[3];
237       motion_triangle_vertices(kg,
238                                sd->object,
239                                kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim),
240                                sd->time,
241                                verts);
242       hit_P = motion_triangle_refine_local(kg, sd, &ss_isect->hits[hit], ray, verts);
243     }
244 #endif /* __OBJECT_MOTION__ */
245     else {
246       ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f);
247       continue;
248     }
249 
250     float3 hit_Ng = ss_isect->Ng[hit];
251     if (ss_isect->hits[hit].object != OBJECT_NONE) {
252       object_normal_transform(kg, sd, &hit_Ng);
253     }
254 
255     /* Probability densities for local frame axes. */
256     float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
257     float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
258     float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
259 
260     /* Multiple importance sample between 3 axes, power heuristic
261      * found to be slightly better than balance heuristic. pdf_N
262      * in the MIS weight and denominator cancelled out. */
263     float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
264     if (ss_isect->num_hits > BSSRDF_MAX_HITS) {
265       w *= ss_isect->num_hits / (float)BSSRDF_MAX_HITS;
266     }
267 
268     /* Real distance to sampled point. */
269     float r = len(hit_P - sd->P);
270 
271     /* Evaluate profiles. */
272     float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
273 
274     ss_isect->weight[hit] = eval;
275   }
276 
277 #ifdef __SPLIT_KERNEL__
278   ss_isect->ray = *ray;
279 #endif
280 
281   return num_eval_hits;
282 }
283 
subsurface_scatter_multi_setup(KernelGlobals * kg,LocalIntersection * ss_isect,int hit,ShaderData * sd,ccl_addr_space PathState * state,ClosureType type,float roughness)284 ccl_device_noinline void subsurface_scatter_multi_setup(KernelGlobals *kg,
285                                                         LocalIntersection *ss_isect,
286                                                         int hit,
287                                                         ShaderData *sd,
288                                                         ccl_addr_space PathState *state,
289                                                         ClosureType type,
290                                                         float roughness)
291 {
292 #ifdef __SPLIT_KERNEL__
293   Ray ray_object = ss_isect->ray;
294   Ray *ray = &ray_object;
295 #else
296   Ray *ray = &ss_isect->ray;
297 #endif
298 
299   /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
300 #if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
301   kernel_split_params.dummy_sd_flag = sd->flag;
302 #endif
303 
304   /* Setup new shading point. */
305   shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
306 
307   /* Optionally blur colors and bump mapping. */
308   float3 weight = ss_isect->weight[hit];
309   float3 N = sd->N;
310   subsurface_color_bump_blur(kg, sd, state, &weight, &N);
311 
312   /* Setup diffuse BSDF. */
313   subsurface_scatter_setup_diffuse_bsdf(kg, sd, type, roughness, weight, N);
314 }
315 
316 /* Random walk subsurface scattering.
317  *
318  * "Practical and Controllable Subsurface Scattering for Production Path
319  *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
320 
subsurface_random_walk_remap(const float A,const float d,float * sigma_t,float * sigma_s)321 ccl_device void subsurface_random_walk_remap(const float A,
322                                              const float d,
323                                              float *sigma_t,
324                                              float *sigma_s)
325 {
326   /* Compute attenuation and scattering coefficients from albedo. */
327   const float a = 1.0f - expf(A * (-5.09406f + A * (2.61188f - A * 4.31805f)));
328   const float s = 1.9f - A + 3.5f * sqr(A - 0.8f);
329 
330   *sigma_t = 1.0f / fmaxf(d * s, 1e-16f);
331   *sigma_s = *sigma_t * a;
332 }
333 
subsurface_random_walk_coefficients(const ShaderClosure * sc,float3 * sigma_t,float3 * sigma_s,float3 * weight)334 ccl_device void subsurface_random_walk_coefficients(const ShaderClosure *sc,
335                                                     float3 *sigma_t,
336                                                     float3 *sigma_s,
337                                                     float3 *weight)
338 {
339   const Bssrdf *bssrdf = (const Bssrdf *)sc;
340   const float3 A = bssrdf->albedo;
341   const float3 d = bssrdf->radius;
342   float sigma_t_x, sigma_t_y, sigma_t_z;
343   float sigma_s_x, sigma_s_y, sigma_s_z;
344 
345   subsurface_random_walk_remap(A.x, d.x, &sigma_t_x, &sigma_s_x);
346   subsurface_random_walk_remap(A.y, d.y, &sigma_t_y, &sigma_s_y);
347   subsurface_random_walk_remap(A.z, d.z, &sigma_t_z, &sigma_s_z);
348 
349   *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
350   *sigma_s = make_float3(sigma_s_x, sigma_s_y, sigma_s_z);
351 
352   /* Closure mixing and Fresnel weights separate from albedo. */
353   *weight = safe_divide_color(bssrdf->weight, A);
354 }
355 
356 #ifdef __KERNEL_OPTIX__
357 ccl_device_inline /* inline trace calls */
358 #else
359 ccl_device_noinline
360 #endif
361     bool
subsurface_random_walk(KernelGlobals * kg,LocalIntersection * ss_isect,ShaderData * sd,ccl_addr_space PathState * state,const ShaderClosure * sc,const float bssrdf_u,const float bssrdf_v)362     subsurface_random_walk(KernelGlobals *kg,
363                            LocalIntersection *ss_isect,
364                            ShaderData *sd,
365                            ccl_addr_space PathState *state,
366                            const ShaderClosure *sc,
367                            const float bssrdf_u,
368                            const float bssrdf_v)
369 {
370   /* Sample diffuse surface scatter into the object. */
371   float3 D;
372   float pdf;
373   sample_cos_hemisphere(-sd->N, bssrdf_u, bssrdf_v, &D, &pdf);
374   if (dot(-sd->Ng, D) <= 0.0f) {
375     return 0;
376   }
377 
378   /* Convert subsurface to volume coefficients. */
379   float3 sigma_t, sigma_s;
380   float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
381   subsurface_random_walk_coefficients(sc, &sigma_t, &sigma_s, &throughput);
382 
383   /* Setup ray. */
384 #ifdef __SPLIT_KERNEL__
385   Ray ray_object = ss_isect->ray;
386   Ray *ray = &ray_object;
387 #else
388   Ray *ray = &ss_isect->ray;
389 #endif
390   ray->P = ray_offset(sd->P, -sd->Ng);
391   ray->D = D;
392   ray->t = FLT_MAX;
393   ray->time = sd->time;
394 
395   /* Modify state for RNGs, decorrelated from other paths. */
396   uint prev_rng_offset = state->rng_offset;
397   uint prev_rng_hash = state->rng_hash;
398   state->rng_hash = cmj_hash(state->rng_hash + state->rng_offset, 0xdeadbeef);
399 
400   /* Random walk until we hit the surface again. */
401   bool hit = false;
402 
403   for (int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) {
404     /* Advance random number offset. */
405     state->rng_offset += PRNG_BOUNCE_NUM;
406 
407     if (bounce > 0) {
408       /* Sample scattering direction. */
409       const float anisotropy = 0.0f;
410       float scatter_u, scatter_v;
411       path_state_rng_2D(kg, state, PRNG_BSDF_U, &scatter_u, &scatter_v);
412       ray->D = henyey_greenstrein_sample(ray->D, anisotropy, scatter_u, scatter_v, NULL);
413     }
414 
415     /* Sample color channel, use MIS with balance heuristic. */
416     float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
417     float3 albedo = safe_divide_color(sigma_s, sigma_t);
418     float3 channel_pdf;
419     int channel = kernel_volume_sample_channel(albedo, throughput, rphase, &channel_pdf);
420 
421     /* Distance sampling. */
422     float rdist = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
423     float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
424     float t = -logf(1.0f - rdist) / sample_sigma_t;
425 
426     ray->t = t;
427     scene_intersect_local(kg, ray, ss_isect, sd->object, NULL, 1);
428     hit = (ss_isect->num_hits > 0);
429 
430     if (hit) {
431 #ifdef __KERNEL_OPTIX__
432       /* t is always in world space with OptiX. */
433       t = ss_isect->hits[0].t;
434 #else
435       /* Compute world space distance to surface hit. */
436       float3 D = ray->D;
437       object_inverse_dir_transform(kg, sd, &D);
438       D = normalize(D) * ss_isect->hits[0].t;
439       object_dir_transform(kg, sd, &D);
440       t = len(D);
441 #endif
442     }
443 
444     /* Advance to new scatter location. */
445     ray->P += t * ray->D;
446 
447     /* Update throughput. */
448     float3 transmittance = volume_color_transmittance(sigma_t, t);
449     float pdf = dot(channel_pdf, (hit) ? transmittance : sigma_t * transmittance);
450     throughput *= ((hit) ? transmittance : sigma_s * transmittance) / pdf;
451 
452     if (hit) {
453       /* If we hit the surface, we are done. */
454       break;
455     }
456 
457     /* Russian roulette. */
458     float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
459     float probability = min(max3(fabs(throughput)), 1.0f);
460     if (terminate >= probability) {
461       break;
462     }
463     throughput /= probability;
464   }
465 
466   kernel_assert(isfinite_safe(throughput.x) && isfinite_safe(throughput.y) &&
467                 isfinite_safe(throughput.z));
468 
469   state->rng_offset = prev_rng_offset;
470   state->rng_hash = prev_rng_hash;
471 
472   /* Return number of hits in ss_isect. */
473   if (!hit) {
474     return 0;
475   }
476 
477   /* TODO: gain back performance lost from merging with disk BSSRDF. We
478    * only need to return on hit so this indirect ray push/pop overhead
479    * is not actually needed, but it does keep the code simpler. */
480   ss_isect->weight[0] = throughput;
481 #ifdef __SPLIT_KERNEL__
482   ss_isect->ray = *ray;
483 #endif
484 
485   return 1;
486 }
487 
subsurface_scatter_multi_intersect(KernelGlobals * kg,LocalIntersection * ss_isect,ShaderData * sd,ccl_addr_space PathState * state,const ShaderClosure * sc,uint * lcg_state,float bssrdf_u,float bssrdf_v,bool all)488 ccl_device_inline int subsurface_scatter_multi_intersect(KernelGlobals *kg,
489                                                          LocalIntersection *ss_isect,
490                                                          ShaderData *sd,
491                                                          ccl_addr_space PathState *state,
492                                                          const ShaderClosure *sc,
493                                                          uint *lcg_state,
494                                                          float bssrdf_u,
495                                                          float bssrdf_v,
496                                                          bool all)
497 {
498   if (CLOSURE_IS_DISK_BSSRDF(sc->type)) {
499     return subsurface_scatter_disk(kg, ss_isect, sd, sc, lcg_state, bssrdf_u, bssrdf_v, all);
500   }
501   else {
502     return subsurface_random_walk(kg, ss_isect, sd, state, sc, bssrdf_u, bssrdf_v);
503   }
504 }
505 
506 CCL_NAMESPACE_END
507