1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __KERNEL_BSSRDF_H__
18 #define __KERNEL_BSSRDF_H__
19 
20 CCL_NAMESPACE_BEGIN
21 
22 typedef ccl_addr_space struct Bssrdf {
23   SHADER_CLOSURE_BASE;
24 
25   float3 radius;
26   float3 albedo;
27   float sharpness;
28   float texture_blur;
29   float roughness;
30   float channels;
31 } Bssrdf;
32 
33 static_assert(sizeof(ShaderClosure) >= sizeof(Bssrdf), "Bssrdf is too large!");
34 
35 /* Planar Truncated Gaussian
36  *
37  * Note how this is different from the typical gaussian, this one integrates
38  * to 1 over the plane (where you get an extra 2*pi*x factor). We are lucky
39  * that integrating x*exp(-x) gives a nice closed form solution. */
40 
41 /* paper suggests 1/12.46 which is much too small, suspect it's *12.46 */
42 #define GAUSS_TRUNCATE 12.46f
43 
bssrdf_gaussian_eval(const float radius,float r)44 ccl_device float bssrdf_gaussian_eval(const float radius, float r)
45 {
46   /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
47    * = 1 - exp(-Rm*Rm/(2*v)) */
48   const float v = radius * radius * (0.25f * 0.25f);
49   const float Rm = sqrtf(v * GAUSS_TRUNCATE);
50 
51   if (r >= Rm)
52     return 0.0f;
53 
54   return expf(-r * r / (2.0f * v)) / (2.0f * M_PI_F * v);
55 }
56 
bssrdf_gaussian_pdf(const float radius,float r)57 ccl_device float bssrdf_gaussian_pdf(const float radius, float r)
58 {
59   /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
60   const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
61 
62   return bssrdf_gaussian_eval(radius, r) * (1.0f / (area_truncated));
63 }
64 
bssrdf_gaussian_sample(const float radius,float xi,float * r,float * h)65 ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, float *h)
66 {
67   /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
68    * r = sqrt(-2*v*logf(xi)) */
69   const float v = radius * radius * (0.25f * 0.25f);
70   const float Rm = sqrtf(v * GAUSS_TRUNCATE);
71 
72   /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
73   const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
74 
75   /* r(xi) */
76   const float r_squared = -2.0f * v * logf(1.0f - xi * area_truncated);
77   *r = sqrtf(r_squared);
78 
79   /* h^2 + r^2 = Rm^2 */
80   *h = safe_sqrtf(Rm * Rm - r_squared);
81 }
82 
83 /* Planar Cubic BSSRDF falloff
84  *
85  * This is basically (Rm - x)^3, with some factors to normalize it. For sampling
86  * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as
87  * far as I can tell has no closed form solution. So we get an iterative solution
88  * instead with newton-raphson. */
89 
bssrdf_cubic_eval(const float radius,const float sharpness,float r)90 ccl_device float bssrdf_cubic_eval(const float radius, const float sharpness, float r)
91 {
92   if (sharpness == 0.0f) {
93     const float Rm = radius;
94 
95     if (r >= Rm)
96       return 0.0f;
97 
98     /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
99     const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm;
100     const float f = Rm - r;
101     const float num = f * f * f;
102 
103     return (10.0f * num) / (Rm5 * M_PI_F);
104   }
105   else {
106     float Rm = radius * (1.0f + sharpness);
107 
108     if (r >= Rm)
109       return 0.0f;
110 
111     /* custom variation with extra sharpness, to match the previous code */
112     const float y = 1.0f / (1.0f + sharpness);
113     float Rmy, ry, ryinv;
114 
115     if (sharpness == 1.0f) {
116       Rmy = sqrtf(Rm);
117       ry = sqrtf(r);
118       ryinv = (ry > 0.0f) ? 1.0f / ry : 0.0f;
119     }
120     else {
121       Rmy = powf(Rm, y);
122       ry = powf(r, y);
123       ryinv = (r > 0.0f) ? powf(r, y - 1.0f) : 0.0f;
124     }
125 
126     const float Rmy5 = (Rmy * Rmy) * (Rmy * Rmy) * Rmy;
127     const float f = Rmy - ry;
128     const float num = f * (f * f) * (y * ryinv);
129 
130     return (10.0f * num) / (Rmy5 * M_PI_F);
131   }
132 }
133 
bssrdf_cubic_pdf(const float radius,const float sharpness,float r)134 ccl_device float bssrdf_cubic_pdf(const float radius, const float sharpness, float r)
135 {
136   return bssrdf_cubic_eval(radius, sharpness, r);
137 }
138 
139 /* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
bssrdf_cubic_quintic_root_find(float xi)140 ccl_device_forceinline float bssrdf_cubic_quintic_root_find(float xi)
141 {
142   /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
143    * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
144    * should not be too bad */
145   const float tolerance = 1e-6f;
146   const int max_iteration_count = 10;
147   float x = 0.25f;
148   int i;
149 
150   for (i = 0; i < max_iteration_count; i++) {
151     float x2 = x * x;
152     float x3 = x2 * x;
153     float nx = (1.0f - x);
154 
155     float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi;
156     float f_ = 20.0f * (x * nx) * (nx * nx);
157 
158     if (fabsf(f) < tolerance || f_ == 0.0f)
159       break;
160 
161     x = saturate(x - f / f_);
162   }
163 
164   return x;
165 }
166 
bssrdf_cubic_sample(const float radius,const float sharpness,float xi,float * r,float * h)167 ccl_device void bssrdf_cubic_sample(
168     const float radius, const float sharpness, float xi, float *r, float *h)
169 {
170   float Rm = radius;
171   float r_ = bssrdf_cubic_quintic_root_find(xi);
172 
173   if (sharpness != 0.0f) {
174     r_ = powf(r_, 1.0f + sharpness);
175     Rm *= (1.0f + sharpness);
176   }
177 
178   r_ *= Rm;
179   *r = r_;
180 
181   /* h^2 + r^2 = Rm^2 */
182   *h = safe_sqrtf(Rm * Rm - r_ * r_);
183 }
184 
185 /* Approximate Reflectance Profiles
186  * http://graphics.pixar.com/library/ApproxBSSRDF/paper.pdf
187  */
188 
189 /* This is a bit arbitrary, just need big enough radius so it matches
190  * the mean free length, but still not too big so sampling is still
191  * effective. Might need some further tweaks.
192  */
193 #define BURLEY_TRUNCATE 16.0f
194 #define BURLEY_TRUNCATE_CDF 0.9963790093708328f  // cdf(BURLEY_TRUNCATE)
195 
bssrdf_burley_fitting(float A)196 ccl_device_inline float bssrdf_burley_fitting(float A)
197 {
198   /* Diffuse surface transmission, equation (6). */
199   return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f);
200 }
201 
202 /* Scale mean free path length so it gives similar looking result
203  * to Cubic and Gaussian models.
204  */
bssrdf_burley_compatible_mfp(float3 r)205 ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r)
206 {
207   return 0.25f * M_1_PI_F * r;
208 }
209 
bssrdf_burley_setup(Bssrdf * bssrdf)210 ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf)
211 {
212   /* Mean free path length. */
213   const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
214   /* Surface albedo. */
215   const float3 A = bssrdf->albedo;
216   const float3 s = make_float3(
217       bssrdf_burley_fitting(A.x), bssrdf_burley_fitting(A.y), bssrdf_burley_fitting(A.z));
218 
219   bssrdf->radius = l / s;
220 }
221 
bssrdf_burley_eval(const float d,float r)222 ccl_device float bssrdf_burley_eval(const float d, float r)
223 {
224   const float Rm = BURLEY_TRUNCATE * d;
225 
226   if (r >= Rm)
227     return 0.0f;
228 
229   /* Burley reflectance profile, equation (3).
230    *
231    * NOTES:
232    * - Surface albedo is already included into sc->weight, no need to
233    *   multiply by this term here.
234    * - This is normalized diffuse model, so the equation is multiplied
235    *   by 2*pi, which also matches cdf().
236    */
237   float exp_r_3_d = expf(-r / (3.0f * d));
238   float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
239   return (exp_r_d + exp_r_3_d) / (4.0f * d);
240 }
241 
bssrdf_burley_pdf(const float d,float r)242 ccl_device float bssrdf_burley_pdf(const float d, float r)
243 {
244   return bssrdf_burley_eval(d, r) * (1.0f / BURLEY_TRUNCATE_CDF);
245 }
246 
247 /* Find the radius for desired CDF value.
248  * Returns scaled radius, meaning the result is to be scaled up by d.
249  * Since there's no closed form solution we do Newton-Raphson method to find it.
250  */
bssrdf_burley_root_find(float xi)251 ccl_device_forceinline float bssrdf_burley_root_find(float xi)
252 {
253   const float tolerance = 1e-6f;
254   const int max_iteration_count = 10;
255   /* Do initial guess based on manual curve fitting, this allows us to reduce
256    * number of iterations to maximum 4 across the [0..1] range. We keep maximum
257    * number of iteration higher just to be sure we didn't miss root in some
258    * corner case.
259    */
260   float r;
261   if (xi <= 0.9f) {
262     r = expf(xi * xi * 2.4f) - 1.0f;
263   }
264   else {
265     /* TODO(sergey): Some nicer curve fit is possible here. */
266     r = 15.0f;
267   }
268   /* Solve against scaled radius. */
269   for (int i = 0; i < max_iteration_count; i++) {
270     float exp_r_3 = expf(-r / 3.0f);
271     float exp_r = exp_r_3 * exp_r_3 * exp_r_3;
272     float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi;
273     float f_ = 0.25f * exp_r + 0.25f * exp_r_3;
274 
275     if (fabsf(f) < tolerance || f_ == 0.0f) {
276       break;
277     }
278 
279     r = r - f / f_;
280     if (r < 0.0f) {
281       r = 0.0f;
282     }
283   }
284   return r;
285 }
286 
bssrdf_burley_sample(const float d,float xi,float * r,float * h)287 ccl_device void bssrdf_burley_sample(const float d, float xi, float *r, float *h)
288 {
289   const float Rm = BURLEY_TRUNCATE * d;
290   const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
291 
292   *r = r_;
293 
294   /* h^2 + r^2 = Rm^2 */
295   *h = safe_sqrtf(Rm * Rm - r_ * r_);
296 }
297 
298 /* None BSSRDF falloff
299  *
300  * Samples distributed over disk with no falloff, for reference. */
301 
bssrdf_none_eval(const float radius,float r)302 ccl_device float bssrdf_none_eval(const float radius, float r)
303 {
304   const float Rm = radius;
305   return (r < Rm) ? 1.0f : 0.0f;
306 }
307 
bssrdf_none_pdf(const float radius,float r)308 ccl_device float bssrdf_none_pdf(const float radius, float r)
309 {
310   /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
311   const float Rm = radius;
312   const float area = (M_PI_F * Rm * Rm);
313 
314   return bssrdf_none_eval(radius, r) / area;
315 }
316 
bssrdf_none_sample(const float radius,float xi,float * r,float * h)317 ccl_device void bssrdf_none_sample(const float radius, float xi, float *r, float *h)
318 {
319   /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
320    * r = sqrt(xi)*Rm */
321   const float Rm = radius;
322   const float r_ = sqrtf(xi) * Rm;
323 
324   *r = r_;
325 
326   /* h^2 + r^2 = Rm^2 */
327   *h = safe_sqrtf(Rm * Rm - r_ * r_);
328 }
329 
330 /* Generic */
331 
bssrdf_alloc(ShaderData * sd,float3 weight)332 ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
333 {
334   Bssrdf *bssrdf = (Bssrdf *)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
335 
336   if (bssrdf == NULL) {
337     return NULL;
338   }
339 
340   float sample_weight = fabsf(average(weight));
341   bssrdf->sample_weight = sample_weight;
342   return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
343 }
344 
bssrdf_setup(ShaderData * sd,Bssrdf * bssrdf,ClosureType type)345 ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type)
346 {
347   int flag = 0;
348   int bssrdf_channels = 3;
349   float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f);
350 
351   /* Verify if the radii are large enough to sample without precision issues. */
352   if (bssrdf->radius.x < BSSRDF_MIN_RADIUS) {
353     diffuse_weight.x = bssrdf->weight.x;
354     bssrdf->weight.x = 0.0f;
355     bssrdf->radius.x = 0.0f;
356     bssrdf_channels--;
357   }
358   if (bssrdf->radius.y < BSSRDF_MIN_RADIUS) {
359     diffuse_weight.y = bssrdf->weight.y;
360     bssrdf->weight.y = 0.0f;
361     bssrdf->radius.y = 0.0f;
362     bssrdf_channels--;
363   }
364   if (bssrdf->radius.z < BSSRDF_MIN_RADIUS) {
365     diffuse_weight.z = bssrdf->weight.z;
366     bssrdf->weight.z = 0.0f;
367     bssrdf->radius.z = 0.0f;
368     bssrdf_channels--;
369   }
370 
371   if (bssrdf_channels < 3) {
372     /* Add diffuse BSDF if any radius too small. */
373 #ifdef __PRINCIPLED__
374     if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
375       float roughness = bssrdf->roughness;
376       float3 N = bssrdf->N;
377 
378       PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
379           sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
380 
381       if (bsdf) {
382         bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
383         bsdf->N = N;
384         bsdf->roughness = roughness;
385         flag |= bsdf_principled_diffuse_setup(bsdf);
386       }
387     }
388     else
389 #endif /* __PRINCIPLED__ */
390     {
391       DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight);
392 
393       if (bsdf) {
394         bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
395         bsdf->N = bssrdf->N;
396         flag |= bsdf_diffuse_setup(bsdf);
397       }
398     }
399   }
400 
401   /* Setup BSSRDF if radius is large enough. */
402   if (bssrdf_channels > 0) {
403     bssrdf->type = type;
404     bssrdf->channels = bssrdf_channels;
405     bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels;
406     bssrdf->texture_blur = saturate(bssrdf->texture_blur);
407     bssrdf->sharpness = saturate(bssrdf->sharpness);
408 
409     if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
410         type == CLOSURE_BSSRDF_RANDOM_WALK_ID ||
411         type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
412       bssrdf_burley_setup(bssrdf);
413     }
414 
415     flag |= SD_BSSRDF;
416   }
417   else {
418     bssrdf->type = type;
419     bssrdf->sample_weight = 0.0f;
420   }
421 
422   return flag;
423 }
424 
bssrdf_sample(const ShaderClosure * sc,float xi,float * r,float * h)425 ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float *h)
426 {
427   const Bssrdf *bssrdf = (const Bssrdf *)sc;
428   float radius;
429 
430   /* Sample color channel and reuse random number. Only a subset of channels
431    * may be used if their radius was too small to handle as BSSRDF. */
432   xi *= bssrdf->channels;
433 
434   if (xi < 1.0f) {
435     radius = (bssrdf->radius.x > 0.0f) ?
436                  bssrdf->radius.x :
437                  (bssrdf->radius.y > 0.0f) ? bssrdf->radius.y : bssrdf->radius.z;
438   }
439   else if (xi < 2.0f) {
440     xi -= 1.0f;
441     radius = (bssrdf->radius.x > 0.0f && bssrdf->radius.y > 0.0f) ? bssrdf->radius.y :
442                                                                     bssrdf->radius.z;
443   }
444   else {
445     xi -= 2.0f;
446     radius = bssrdf->radius.z;
447   }
448 
449   /* Sample BSSRDF. */
450   if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
451     bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h);
452   }
453   else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
454     bssrdf_gaussian_sample(radius, xi, r, h);
455   }
456   else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
457           *     bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) */
458     bssrdf_burley_sample(radius, xi, r, h);
459   }
460 }
461 
bssrdf_channel_pdf(const Bssrdf * bssrdf,float radius,float r)462 ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r)
463 {
464   if (radius == 0.0f) {
465     return 0.0f;
466   }
467   else if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
468     return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r);
469   }
470   else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
471     return bssrdf_gaussian_pdf(radius, r);
472   }
473   else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
474           *     bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
475     return bssrdf_burley_pdf(radius, r);
476   }
477 }
478 
bssrdf_eval(const ShaderClosure * sc,float r)479 ccl_device_forceinline float3 bssrdf_eval(const ShaderClosure *sc, float r)
480 {
481   const Bssrdf *bssrdf = (const Bssrdf *)sc;
482 
483   return make_float3(bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r),
484                      bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r),
485                      bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r));
486 }
487 
bssrdf_pdf(const ShaderClosure * sc,float r)488 ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r)
489 {
490   const Bssrdf *bssrdf = (const Bssrdf *)sc;
491   float3 pdf = bssrdf_eval(sc, r);
492 
493   return (pdf.x + pdf.y + pdf.z) / bssrdf->channels;
494 }
495 
496 CCL_NAMESPACE_END
497 
498 #endif /* __KERNEL_BSSRDF_H__ */
499