1 /*
2 * Copyright 2011-2013 Blender Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef __KERNEL_BSSRDF_H__
18 #define __KERNEL_BSSRDF_H__
19
20 CCL_NAMESPACE_BEGIN
21
22 typedef ccl_addr_space struct Bssrdf {
23 SHADER_CLOSURE_BASE;
24
25 float3 radius;
26 float3 albedo;
27 float sharpness;
28 float texture_blur;
29 float roughness;
30 float channels;
31 } Bssrdf;
32
33 static_assert(sizeof(ShaderClosure) >= sizeof(Bssrdf), "Bssrdf is too large!");
34
35 /* Planar Truncated Gaussian
36 *
37 * Note how this is different from the typical gaussian, this one integrates
38 * to 1 over the plane (where you get an extra 2*pi*x factor). We are lucky
39 * that integrating x*exp(-x) gives a nice closed form solution. */
40
41 /* paper suggests 1/12.46 which is much too small, suspect it's *12.46 */
42 #define GAUSS_TRUNCATE 12.46f
43
bssrdf_gaussian_eval(const float radius,float r)44 ccl_device float bssrdf_gaussian_eval(const float radius, float r)
45 {
46 /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm
47 * = 1 - exp(-Rm*Rm/(2*v)) */
48 const float v = radius * radius * (0.25f * 0.25f);
49 const float Rm = sqrtf(v * GAUSS_TRUNCATE);
50
51 if (r >= Rm)
52 return 0.0f;
53
54 return expf(-r * r / (2.0f * v)) / (2.0f * M_PI_F * v);
55 }
56
bssrdf_gaussian_pdf(const float radius,float r)57 ccl_device float bssrdf_gaussian_pdf(const float radius, float r)
58 {
59 /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
60 const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
61
62 return bssrdf_gaussian_eval(radius, r) * (1.0f / (area_truncated));
63 }
64
bssrdf_gaussian_sample(const float radius,float xi,float * r,float * h)65 ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, float *h)
66 {
67 /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v))
68 * r = sqrt(-2*v*logf(xi)) */
69 const float v = radius * radius * (0.25f * 0.25f);
70 const float Rm = sqrtf(v * GAUSS_TRUNCATE);
71
72 /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */
73 const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE);
74
75 /* r(xi) */
76 const float r_squared = -2.0f * v * logf(1.0f - xi * area_truncated);
77 *r = sqrtf(r_squared);
78
79 /* h^2 + r^2 = Rm^2 */
80 *h = safe_sqrtf(Rm * Rm - r_squared);
81 }
82
83 /* Planar Cubic BSSRDF falloff
84 *
85 * This is basically (Rm - x)^3, with some factors to normalize it. For sampling
86 * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as
87 * far as I can tell has no closed form solution. So we get an iterative solution
88 * instead with newton-raphson. */
89
bssrdf_cubic_eval(const float radius,const float sharpness,float r)90 ccl_device float bssrdf_cubic_eval(const float radius, const float sharpness, float r)
91 {
92 if (sharpness == 0.0f) {
93 const float Rm = radius;
94
95 if (r >= Rm)
96 return 0.0f;
97
98 /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
99 const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm;
100 const float f = Rm - r;
101 const float num = f * f * f;
102
103 return (10.0f * num) / (Rm5 * M_PI_F);
104 }
105 else {
106 float Rm = radius * (1.0f + sharpness);
107
108 if (r >= Rm)
109 return 0.0f;
110
111 /* custom variation with extra sharpness, to match the previous code */
112 const float y = 1.0f / (1.0f + sharpness);
113 float Rmy, ry, ryinv;
114
115 if (sharpness == 1.0f) {
116 Rmy = sqrtf(Rm);
117 ry = sqrtf(r);
118 ryinv = (ry > 0.0f) ? 1.0f / ry : 0.0f;
119 }
120 else {
121 Rmy = powf(Rm, y);
122 ry = powf(r, y);
123 ryinv = (r > 0.0f) ? powf(r, y - 1.0f) : 0.0f;
124 }
125
126 const float Rmy5 = (Rmy * Rmy) * (Rmy * Rmy) * Rmy;
127 const float f = Rmy - ry;
128 const float num = f * (f * f) * (y * ryinv);
129
130 return (10.0f * num) / (Rmy5 * M_PI_F);
131 }
132 }
133
bssrdf_cubic_pdf(const float radius,const float sharpness,float r)134 ccl_device float bssrdf_cubic_pdf(const float radius, const float sharpness, float r)
135 {
136 return bssrdf_cubic_eval(radius, sharpness, r);
137 }
138
139 /* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
bssrdf_cubic_quintic_root_find(float xi)140 ccl_device_forceinline float bssrdf_cubic_quintic_root_find(float xi)
141 {
142 /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
143 * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
144 * should not be too bad */
145 const float tolerance = 1e-6f;
146 const int max_iteration_count = 10;
147 float x = 0.25f;
148 int i;
149
150 for (i = 0; i < max_iteration_count; i++) {
151 float x2 = x * x;
152 float x3 = x2 * x;
153 float nx = (1.0f - x);
154
155 float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi;
156 float f_ = 20.0f * (x * nx) * (nx * nx);
157
158 if (fabsf(f) < tolerance || f_ == 0.0f)
159 break;
160
161 x = saturate(x - f / f_);
162 }
163
164 return x;
165 }
166
bssrdf_cubic_sample(const float radius,const float sharpness,float xi,float * r,float * h)167 ccl_device void bssrdf_cubic_sample(
168 const float radius, const float sharpness, float xi, float *r, float *h)
169 {
170 float Rm = radius;
171 float r_ = bssrdf_cubic_quintic_root_find(xi);
172
173 if (sharpness != 0.0f) {
174 r_ = powf(r_, 1.0f + sharpness);
175 Rm *= (1.0f + sharpness);
176 }
177
178 r_ *= Rm;
179 *r = r_;
180
181 /* h^2 + r^2 = Rm^2 */
182 *h = safe_sqrtf(Rm * Rm - r_ * r_);
183 }
184
185 /* Approximate Reflectance Profiles
186 * http://graphics.pixar.com/library/ApproxBSSRDF/paper.pdf
187 */
188
189 /* This is a bit arbitrary, just need big enough radius so it matches
190 * the mean free length, but still not too big so sampling is still
191 * effective. Might need some further tweaks.
192 */
193 #define BURLEY_TRUNCATE 16.0f
194 #define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE)
195
bssrdf_burley_fitting(float A)196 ccl_device_inline float bssrdf_burley_fitting(float A)
197 {
198 /* Diffuse surface transmission, equation (6). */
199 return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f);
200 }
201
202 /* Scale mean free path length so it gives similar looking result
203 * to Cubic and Gaussian models.
204 */
bssrdf_burley_compatible_mfp(float3 r)205 ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r)
206 {
207 return 0.25f * M_1_PI_F * r;
208 }
209
bssrdf_burley_setup(Bssrdf * bssrdf)210 ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf)
211 {
212 /* Mean free path length. */
213 const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
214 /* Surface albedo. */
215 const float3 A = bssrdf->albedo;
216 const float3 s = make_float3(
217 bssrdf_burley_fitting(A.x), bssrdf_burley_fitting(A.y), bssrdf_burley_fitting(A.z));
218
219 bssrdf->radius = l / s;
220 }
221
bssrdf_burley_eval(const float d,float r)222 ccl_device float bssrdf_burley_eval(const float d, float r)
223 {
224 const float Rm = BURLEY_TRUNCATE * d;
225
226 if (r >= Rm)
227 return 0.0f;
228
229 /* Burley reflectance profile, equation (3).
230 *
231 * NOTES:
232 * - Surface albedo is already included into sc->weight, no need to
233 * multiply by this term here.
234 * - This is normalized diffuse model, so the equation is multiplied
235 * by 2*pi, which also matches cdf().
236 */
237 float exp_r_3_d = expf(-r / (3.0f * d));
238 float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
239 return (exp_r_d + exp_r_3_d) / (4.0f * d);
240 }
241
bssrdf_burley_pdf(const float d,float r)242 ccl_device float bssrdf_burley_pdf(const float d, float r)
243 {
244 return bssrdf_burley_eval(d, r) * (1.0f / BURLEY_TRUNCATE_CDF);
245 }
246
247 /* Find the radius for desired CDF value.
248 * Returns scaled radius, meaning the result is to be scaled up by d.
249 * Since there's no closed form solution we do Newton-Raphson method to find it.
250 */
bssrdf_burley_root_find(float xi)251 ccl_device_forceinline float bssrdf_burley_root_find(float xi)
252 {
253 const float tolerance = 1e-6f;
254 const int max_iteration_count = 10;
255 /* Do initial guess based on manual curve fitting, this allows us to reduce
256 * number of iterations to maximum 4 across the [0..1] range. We keep maximum
257 * number of iteration higher just to be sure we didn't miss root in some
258 * corner case.
259 */
260 float r;
261 if (xi <= 0.9f) {
262 r = expf(xi * xi * 2.4f) - 1.0f;
263 }
264 else {
265 /* TODO(sergey): Some nicer curve fit is possible here. */
266 r = 15.0f;
267 }
268 /* Solve against scaled radius. */
269 for (int i = 0; i < max_iteration_count; i++) {
270 float exp_r_3 = expf(-r / 3.0f);
271 float exp_r = exp_r_3 * exp_r_3 * exp_r_3;
272 float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi;
273 float f_ = 0.25f * exp_r + 0.25f * exp_r_3;
274
275 if (fabsf(f) < tolerance || f_ == 0.0f) {
276 break;
277 }
278
279 r = r - f / f_;
280 if (r < 0.0f) {
281 r = 0.0f;
282 }
283 }
284 return r;
285 }
286
bssrdf_burley_sample(const float d,float xi,float * r,float * h)287 ccl_device void bssrdf_burley_sample(const float d, float xi, float *r, float *h)
288 {
289 const float Rm = BURLEY_TRUNCATE * d;
290 const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d;
291
292 *r = r_;
293
294 /* h^2 + r^2 = Rm^2 */
295 *h = safe_sqrtf(Rm * Rm - r_ * r_);
296 }
297
298 /* None BSSRDF falloff
299 *
300 * Samples distributed over disk with no falloff, for reference. */
301
bssrdf_none_eval(const float radius,float r)302 ccl_device float bssrdf_none_eval(const float radius, float r)
303 {
304 const float Rm = radius;
305 return (r < Rm) ? 1.0f : 0.0f;
306 }
307
bssrdf_none_pdf(const float radius,float r)308 ccl_device float bssrdf_none_pdf(const float radius, float r)
309 {
310 /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */
311 const float Rm = radius;
312 const float area = (M_PI_F * Rm * Rm);
313
314 return bssrdf_none_eval(radius, r) / area;
315 }
316
bssrdf_none_sample(const float radius,float xi,float * r,float * h)317 ccl_device void bssrdf_none_sample(const float radius, float xi, float *r, float *h)
318 {
319 /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2
320 * r = sqrt(xi)*Rm */
321 const float Rm = radius;
322 const float r_ = sqrtf(xi) * Rm;
323
324 *r = r_;
325
326 /* h^2 + r^2 = Rm^2 */
327 *h = safe_sqrtf(Rm * Rm - r_ * r_);
328 }
329
330 /* Generic */
331
bssrdf_alloc(ShaderData * sd,float3 weight)332 ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
333 {
334 Bssrdf *bssrdf = (Bssrdf *)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
335
336 if (bssrdf == NULL) {
337 return NULL;
338 }
339
340 float sample_weight = fabsf(average(weight));
341 bssrdf->sample_weight = sample_weight;
342 return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL;
343 }
344
bssrdf_setup(ShaderData * sd,Bssrdf * bssrdf,ClosureType type)345 ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type)
346 {
347 int flag = 0;
348 int bssrdf_channels = 3;
349 float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f);
350
351 /* Verify if the radii are large enough to sample without precision issues. */
352 if (bssrdf->radius.x < BSSRDF_MIN_RADIUS) {
353 diffuse_weight.x = bssrdf->weight.x;
354 bssrdf->weight.x = 0.0f;
355 bssrdf->radius.x = 0.0f;
356 bssrdf_channels--;
357 }
358 if (bssrdf->radius.y < BSSRDF_MIN_RADIUS) {
359 diffuse_weight.y = bssrdf->weight.y;
360 bssrdf->weight.y = 0.0f;
361 bssrdf->radius.y = 0.0f;
362 bssrdf_channels--;
363 }
364 if (bssrdf->radius.z < BSSRDF_MIN_RADIUS) {
365 diffuse_weight.z = bssrdf->weight.z;
366 bssrdf->weight.z = 0.0f;
367 bssrdf->radius.z = 0.0f;
368 bssrdf_channels--;
369 }
370
371 if (bssrdf_channels < 3) {
372 /* Add diffuse BSDF if any radius too small. */
373 #ifdef __PRINCIPLED__
374 if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
375 float roughness = bssrdf->roughness;
376 float3 N = bssrdf->N;
377
378 PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc(
379 sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
380
381 if (bsdf) {
382 bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
383 bsdf->N = N;
384 bsdf->roughness = roughness;
385 flag |= bsdf_principled_diffuse_setup(bsdf);
386 }
387 }
388 else
389 #endif /* __PRINCIPLED__ */
390 {
391 DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight);
392
393 if (bsdf) {
394 bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
395 bsdf->N = bssrdf->N;
396 flag |= bsdf_diffuse_setup(bsdf);
397 }
398 }
399 }
400
401 /* Setup BSSRDF if radius is large enough. */
402 if (bssrdf_channels > 0) {
403 bssrdf->type = type;
404 bssrdf->channels = bssrdf_channels;
405 bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels;
406 bssrdf->texture_blur = saturate(bssrdf->texture_blur);
407 bssrdf->sharpness = saturate(bssrdf->sharpness);
408
409 if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
410 type == CLOSURE_BSSRDF_RANDOM_WALK_ID ||
411 type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) {
412 bssrdf_burley_setup(bssrdf);
413 }
414
415 flag |= SD_BSSRDF;
416 }
417 else {
418 bssrdf->type = type;
419 bssrdf->sample_weight = 0.0f;
420 }
421
422 return flag;
423 }
424
bssrdf_sample(const ShaderClosure * sc,float xi,float * r,float * h)425 ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float *h)
426 {
427 const Bssrdf *bssrdf = (const Bssrdf *)sc;
428 float radius;
429
430 /* Sample color channel and reuse random number. Only a subset of channels
431 * may be used if their radius was too small to handle as BSSRDF. */
432 xi *= bssrdf->channels;
433
434 if (xi < 1.0f) {
435 radius = (bssrdf->radius.x > 0.0f) ?
436 bssrdf->radius.x :
437 (bssrdf->radius.y > 0.0f) ? bssrdf->radius.y : bssrdf->radius.z;
438 }
439 else if (xi < 2.0f) {
440 xi -= 1.0f;
441 radius = (bssrdf->radius.x > 0.0f && bssrdf->radius.y > 0.0f) ? bssrdf->radius.y :
442 bssrdf->radius.z;
443 }
444 else {
445 xi -= 2.0f;
446 radius = bssrdf->radius.z;
447 }
448
449 /* Sample BSSRDF. */
450 if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
451 bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h);
452 }
453 else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
454 bssrdf_gaussian_sample(radius, xi, r, h);
455 }
456 else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
457 * bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) */
458 bssrdf_burley_sample(radius, xi, r, h);
459 }
460 }
461
bssrdf_channel_pdf(const Bssrdf * bssrdf,float radius,float r)462 ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r)
463 {
464 if (radius == 0.0f) {
465 return 0.0f;
466 }
467 else if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) {
468 return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r);
469 }
470 else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) {
471 return bssrdf_gaussian_pdf(radius, r);
472 }
473 else { /* if (bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID ||
474 * bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/
475 return bssrdf_burley_pdf(radius, r);
476 }
477 }
478
bssrdf_eval(const ShaderClosure * sc,float r)479 ccl_device_forceinline float3 bssrdf_eval(const ShaderClosure *sc, float r)
480 {
481 const Bssrdf *bssrdf = (const Bssrdf *)sc;
482
483 return make_float3(bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r),
484 bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r),
485 bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r));
486 }
487
bssrdf_pdf(const ShaderClosure * sc,float r)488 ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r)
489 {
490 const Bssrdf *bssrdf = (const Bssrdf *)sc;
491 float3 pdf = bssrdf_eval(sc, r);
492
493 return (pdf.x + pdf.y + pdf.z) / bssrdf->channels;
494 }
495
496 CCL_NAMESPACE_END
497
498 #endif /* __KERNEL_BSSRDF_H__ */
499