1 /*
2 * Copyright 2011-2013 Blender Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef __UTIL_TRANSFORM_H__
18 #define __UTIL_TRANSFORM_H__
19
20 #ifndef __KERNEL_GPU__
21 # include <string.h>
22 #endif
23
24 #include "util/util_math.h"
25 #include "util/util_types.h"
26
27 CCL_NAMESPACE_BEGIN
28
29 /* Affine transformation, stored as 4x3 matrix. */
30
31 typedef struct Transform {
32 float4 x, y, z;
33
34 #ifndef __KERNEL_GPU__
35 float4 operator[](int i) const
36 {
37 return *(&x + i);
38 }
39 float4 &operator[](int i)
40 {
41 return *(&x + i);
42 }
43 #endif
44 } Transform;
45
46 /* Transform decomposed in rotation/translation/scale. we use the same data
47 * structure as Transform, and tightly pack decomposition into it. first the
48 * rotation (4), then translation (3), then 3x3 scale matrix (9). */
49
50 typedef struct DecomposedTransform {
51 float4 x, y, z, w;
52 } DecomposedTransform;
53
54 /* Functions */
55
transform_point(const Transform * t,const float3 a)56 ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
57 {
58 /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
59 #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
60 ssef x, y, z, w, aa;
61 aa = a.m128;
62
63 x = _mm_loadu_ps(&t->x.x);
64 y = _mm_loadu_ps(&t->y.x);
65 z = _mm_loadu_ps(&t->z.x);
66 w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
67
68 _MM_TRANSPOSE4_PS(x, y, z, w);
69
70 ssef tmp = shuffle<0>(aa) * x;
71 tmp = madd(shuffle<1>(aa), y, tmp);
72 tmp = madd(shuffle<2>(aa), z, tmp);
73 tmp += w;
74
75 return float3(tmp.m128);
76 #else
77 float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w,
78 a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w,
79 a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w);
80
81 return c;
82 #endif
83 }
84
transform_direction(const Transform * t,const float3 a)85 ccl_device_inline float3 transform_direction(const Transform *t, const float3 a)
86 {
87 #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
88 ssef x, y, z, w, aa;
89 aa = a.m128;
90 x = _mm_loadu_ps(&t->x.x);
91 y = _mm_loadu_ps(&t->y.x);
92 z = _mm_loadu_ps(&t->z.x);
93 w = _mm_setzero_ps();
94
95 _MM_TRANSPOSE4_PS(x, y, z, w);
96
97 ssef tmp = shuffle<0>(aa) * x;
98 tmp = madd(shuffle<1>(aa), y, tmp);
99 tmp = madd(shuffle<2>(aa), z, tmp);
100
101 return float3(tmp.m128);
102 #else
103 float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z,
104 a.x * t->y.x + a.y * t->y.y + a.z * t->y.z,
105 a.x * t->z.x + a.y * t->z.y + a.z * t->z.z);
106
107 return c;
108 #endif
109 }
110
transform_direction_transposed(const Transform * t,const float3 a)111 ccl_device_inline float3 transform_direction_transposed(const Transform *t, const float3 a)
112 {
113 float3 x = make_float3(t->x.x, t->y.x, t->z.x);
114 float3 y = make_float3(t->x.y, t->y.y, t->z.y);
115 float3 z = make_float3(t->x.z, t->y.z, t->z.z);
116
117 return make_float3(dot(x, a), dot(y, a), dot(z, a));
118 }
119
make_transform(float a,float b,float c,float d,float e,float f,float g,float h,float i,float j,float k,float l)120 ccl_device_inline Transform make_transform(float a,
121 float b,
122 float c,
123 float d,
124 float e,
125 float f,
126 float g,
127 float h,
128 float i,
129 float j,
130 float k,
131 float l)
132 {
133 Transform t;
134
135 t.x.x = a;
136 t.x.y = b;
137 t.x.z = c;
138 t.x.w = d;
139 t.y.x = e;
140 t.y.y = f;
141 t.y.z = g;
142 t.y.w = h;
143 t.z.x = i;
144 t.z.y = j;
145 t.z.z = k;
146 t.z.w = l;
147
148 return t;
149 }
150
euler_to_transform(const float3 euler)151 ccl_device_inline Transform euler_to_transform(const float3 euler)
152 {
153 float cx = cosf(euler.x);
154 float cy = cosf(euler.y);
155 float cz = cosf(euler.z);
156 float sx = sinf(euler.x);
157 float sy = sinf(euler.y);
158 float sz = sinf(euler.z);
159
160 Transform t;
161 t.x.x = cy * cz;
162 t.y.x = cy * sz;
163 t.z.x = -sy;
164
165 t.x.y = sy * sx * cz - cx * sz;
166 t.y.y = sy * sx * sz + cx * cz;
167 t.z.y = cy * sx;
168
169 t.x.z = sy * cx * cz + sx * sz;
170 t.y.z = sy * cx * sz - sx * cz;
171 t.z.z = cy * cx;
172
173 t.x.w = t.y.w = t.z.w = 0.0f;
174 return t;
175 }
176
177 /* Constructs a coordinate frame from a normalized normal. */
make_transform_frame(float3 N)178 ccl_device_inline Transform make_transform_frame(float3 N)
179 {
180 const float3 dx0 = cross(make_float3(1.0f, 0.0f, 0.0f), N);
181 const float3 dx1 = cross(make_float3(0.0f, 1.0f, 0.0f), N);
182 const float3 dx = normalize((dot(dx0, dx0) > dot(dx1, dx1)) ? dx0 : dx1);
183 const float3 dy = normalize(cross(N, dx));
184 return make_transform(dx.x, dx.y, dx.z, 0.0f, dy.x, dy.y, dy.z, 0.0f, N.x, N.y, N.z, 0.0f);
185 }
186
187 #ifndef __KERNEL_GPU__
188
189 ccl_device_inline Transform operator*(const Transform a, const Transform b)
190 {
191 float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f);
192 float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f);
193 float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f);
194 float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f);
195
196 Transform t;
197 t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w));
198 t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w));
199 t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w));
200
201 return t;
202 }
203
print_transform(const char * label,const Transform & t)204 ccl_device_inline void print_transform(const char *label, const Transform &t)
205 {
206 print_float4(label, t.x);
207 print_float4(label, t.y);
208 print_float4(label, t.z);
209 printf("\n");
210 }
211
transform_translate(float3 t)212 ccl_device_inline Transform transform_translate(float3 t)
213 {
214 return make_transform(1, 0, 0, t.x, 0, 1, 0, t.y, 0, 0, 1, t.z);
215 }
216
transform_translate(float x,float y,float z)217 ccl_device_inline Transform transform_translate(float x, float y, float z)
218 {
219 return transform_translate(make_float3(x, y, z));
220 }
221
transform_scale(float3 s)222 ccl_device_inline Transform transform_scale(float3 s)
223 {
224 return make_transform(s.x, 0, 0, 0, 0, s.y, 0, 0, 0, 0, s.z, 0);
225 }
226
transform_scale(float x,float y,float z)227 ccl_device_inline Transform transform_scale(float x, float y, float z)
228 {
229 return transform_scale(make_float3(x, y, z));
230 }
231
transform_rotate(float angle,float3 axis)232 ccl_device_inline Transform transform_rotate(float angle, float3 axis)
233 {
234 float s = sinf(angle);
235 float c = cosf(angle);
236 float t = 1.0f - c;
237
238 axis = normalize(axis);
239
240 return make_transform(axis.x * axis.x * t + c,
241 axis.x * axis.y * t - s * axis.z,
242 axis.x * axis.z * t + s * axis.y,
243 0.0f,
244
245 axis.y * axis.x * t + s * axis.z,
246 axis.y * axis.y * t + c,
247 axis.y * axis.z * t - s * axis.x,
248 0.0f,
249
250 axis.z * axis.x * t - s * axis.y,
251 axis.z * axis.y * t + s * axis.x,
252 axis.z * axis.z * t + c,
253 0.0f);
254 }
255
256 /* Euler is assumed to be in XYZ order. */
transform_euler(float3 euler)257 ccl_device_inline Transform transform_euler(float3 euler)
258 {
259 return transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) *
260 transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) *
261 transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f));
262 }
263
transform_identity()264 ccl_device_inline Transform transform_identity()
265 {
266 return transform_scale(1.0f, 1.0f, 1.0f);
267 }
268
269 ccl_device_inline bool operator==(const Transform &A, const Transform &B)
270 {
271 return memcmp(&A, &B, sizeof(Transform)) == 0;
272 }
273
274 ccl_device_inline bool operator!=(const Transform &A, const Transform &B)
275 {
276 return !(A == B);
277 }
278
transform_get_column(const Transform * t,int column)279 ccl_device_inline float3 transform_get_column(const Transform *t, int column)
280 {
281 return make_float3(t->x[column], t->y[column], t->z[column]);
282 }
283
transform_set_column(Transform * t,int column,float3 value)284 ccl_device_inline void transform_set_column(Transform *t, int column, float3 value)
285 {
286 t->x[column] = value.x;
287 t->y[column] = value.y;
288 t->z[column] = value.z;
289 }
290
291 Transform transform_inverse(const Transform &a);
292 Transform transform_transposed_inverse(const Transform &a);
293
transform_uniform_scale(const Transform & tfm,float & scale)294 ccl_device_inline bool transform_uniform_scale(const Transform &tfm, float &scale)
295 {
296 /* the epsilon here is quite arbitrary, but this function is only used for
297 * surface area and bump, where we expect it to not be so sensitive */
298 float eps = 1e-6f;
299
300 float sx = len_squared(float4_to_float3(tfm.x));
301 float sy = len_squared(float4_to_float3(tfm.y));
302 float sz = len_squared(float4_to_float3(tfm.z));
303 float stx = len_squared(transform_get_column(&tfm, 0));
304 float sty = len_squared(transform_get_column(&tfm, 1));
305 float stz = len_squared(transform_get_column(&tfm, 2));
306
307 if (fabsf(sx - sy) < eps && fabsf(sx - sz) < eps && fabsf(sx - stx) < eps &&
308 fabsf(sx - sty) < eps && fabsf(sx - stz) < eps) {
309 scale = sx;
310 return true;
311 }
312
313 return false;
314 }
315
transform_negative_scale(const Transform & tfm)316 ccl_device_inline bool transform_negative_scale(const Transform &tfm)
317 {
318 float3 c0 = transform_get_column(&tfm, 0);
319 float3 c1 = transform_get_column(&tfm, 1);
320 float3 c2 = transform_get_column(&tfm, 2);
321
322 return (dot(cross(c0, c1), c2) < 0.0f);
323 }
324
transform_clear_scale(const Transform & tfm)325 ccl_device_inline Transform transform_clear_scale(const Transform &tfm)
326 {
327 Transform ntfm = tfm;
328
329 transform_set_column(&ntfm, 0, normalize(transform_get_column(&ntfm, 0)));
330 transform_set_column(&ntfm, 1, normalize(transform_get_column(&ntfm, 1)));
331 transform_set_column(&ntfm, 2, normalize(transform_get_column(&ntfm, 2)));
332
333 return ntfm;
334 }
335
transform_empty()336 ccl_device_inline Transform transform_empty()
337 {
338 return make_transform(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
339 }
340
341 #endif
342
343 /* Motion Transform */
344
quat_interpolate(float4 q1,float4 q2,float t)345 ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
346 {
347 /* Optix is using lerp to interpolate motion transformations. */
348 #ifdef __KERNEL_OPTIX__
349 return normalize((1.0f - t) * q1 + t * q2);
350 #else /* __KERNEL_OPTIX__ */
351 /* note: this does not ensure rotation around shortest angle, q1 and q2
352 * are assumed to be matched already in transform_motion_decompose */
353 float costheta = dot(q1, q2);
354
355 /* possible optimization: it might be possible to precompute theta/qperp */
356
357 if (costheta > 0.9995f) {
358 /* linear interpolation in degenerate case */
359 return normalize((1.0f - t) * q1 + t * q2);
360 }
361 else {
362 /* slerp */
363 float theta = acosf(clamp(costheta, -1.0f, 1.0f));
364 float4 qperp = normalize(q2 - q1 * costheta);
365 float thetap = theta * t;
366 return q1 * cosf(thetap) + qperp * sinf(thetap);
367 }
368 #endif /* __KERNEL_OPTIX__ */
369 }
370
transform_quick_inverse(Transform M)371 ccl_device_inline Transform transform_quick_inverse(Transform M)
372 {
373 /* possible optimization: can we avoid doing this altogether and construct
374 * the inverse matrix directly from negated translation, transposed rotation,
375 * scale can be inverted but what about shearing? */
376 Transform R;
377 float det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
378 M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
379 if (det == 0.0f) {
380 M.x.x += 1e-8f;
381 M.y.y += 1e-8f;
382 M.z.z += 1e-8f;
383 det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
384 M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
385 }
386 det = (det != 0.0f) ? 1.0f / det : 0.0f;
387
388 float3 Rx = det * make_float3(M.z.z * M.y.y - M.z.y * M.y.z,
389 M.z.y * M.x.z - M.z.z * M.x.y,
390 M.y.z * M.x.y - M.y.y * M.x.z);
391 float3 Ry = det * make_float3(M.z.x * M.y.z - M.z.z * M.y.x,
392 M.z.z * M.x.x - M.z.x * M.x.z,
393 M.y.x * M.x.z - M.y.z * M.x.x);
394 float3 Rz = det * make_float3(M.z.y * M.y.x - M.z.x * M.y.y,
395 M.z.x * M.x.y - M.z.y * M.x.x,
396 M.y.y * M.x.x - M.y.x * M.x.y);
397 float3 T = -make_float3(M.x.w, M.y.w, M.z.w);
398
399 R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
400 R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
401 R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
402
403 return R;
404 }
405
transform_compose(Transform * tfm,const DecomposedTransform * decomp)406 ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransform *decomp)
407 {
408 /* rotation */
409 float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
410
411 q0 = M_SQRT2_F * decomp->x.w;
412 q1 = M_SQRT2_F * decomp->x.x;
413 q2 = M_SQRT2_F * decomp->x.y;
414 q3 = M_SQRT2_F * decomp->x.z;
415
416 qda = q0 * q1;
417 qdb = q0 * q2;
418 qdc = q0 * q3;
419 qaa = q1 * q1;
420 qab = q1 * q2;
421 qac = q1 * q3;
422 qbb = q2 * q2;
423 qbc = q2 * q3;
424 qcc = q3 * q3;
425
426 float3 rotation_x = make_float3(1.0f - qbb - qcc, -qdc + qab, qdb + qac);
427 float3 rotation_y = make_float3(qdc + qab, 1.0f - qaa - qcc, -qda + qbc);
428 float3 rotation_z = make_float3(-qdb + qac, qda + qbc, 1.0f - qaa - qbb);
429
430 /* scale */
431 float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y);
432 float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z);
433 float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w);
434
435 /* compose with translation */
436 tfm->x = make_float4(
437 dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
438 tfm->y = make_float4(
439 dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
440 tfm->z = make_float4(
441 dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
442 }
443
444 /* Interpolate from array of decomposed transforms. */
transform_motion_array_interpolate(Transform * tfm,const ccl_global DecomposedTransform * motion,uint numsteps,float time)445 ccl_device void transform_motion_array_interpolate(Transform *tfm,
446 const ccl_global DecomposedTransform *motion,
447 uint numsteps,
448 float time)
449 {
450 /* Figure out which steps we need to interpolate. */
451 int maxstep = numsteps - 1;
452 int step = min((int)(time * maxstep), maxstep - 1);
453 float t = time * maxstep - step;
454
455 const ccl_global DecomposedTransform *a = motion + step;
456 const ccl_global DecomposedTransform *b = motion + step + 1;
457
458 /* Interpolate rotation, translation and scale. */
459 DecomposedTransform decomp;
460 decomp.x = quat_interpolate(a->x, b->x, t);
461 decomp.y = (1.0f - t) * a->y + t * b->y;
462 decomp.z = (1.0f - t) * a->z + t * b->z;
463 decomp.w = (1.0f - t) * a->w + t * b->w;
464
465 /* Compose rotation, translation, scale into matrix. */
466 transform_compose(tfm, &decomp);
467 }
468
transform_isfinite_safe(Transform * tfm)469 ccl_device_inline bool transform_isfinite_safe(Transform *tfm)
470 {
471 return isfinite4_safe(tfm->x) && isfinite4_safe(tfm->y) && isfinite4_safe(tfm->z);
472 }
473
transform_decomposed_isfinite_safe(DecomposedTransform * decomp)474 ccl_device_inline bool transform_decomposed_isfinite_safe(DecomposedTransform *decomp)
475 {
476 return isfinite4_safe(decomp->x) && isfinite4_safe(decomp->y) && isfinite4_safe(decomp->z) &&
477 isfinite4_safe(decomp->w);
478 }
479
480 #ifndef __KERNEL_GPU__
481
482 class BoundBox2D;
483
484 ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B)
485 {
486 return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0;
487 }
488
489 float4 transform_to_quat(const Transform &tfm);
490 void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size);
491 Transform transform_from_viewplane(BoundBox2D &viewplane);
492
493 #endif
494
495 /* TODO(sergey): This is only for until we've got OpenCL 2.0
496 * on all devices we consider supported. It'll be replaced with
497 * generic address space.
498 */
499
500 #ifdef __KERNEL_OPENCL__
501
502 # define OPENCL_TRANSFORM_ADDRSPACE_GLUE(a, b) a##b
503 # define OPENCL_TRANSFORM_ADDRSPACE_DECLARE(function) \
504 ccl_device_inline float3 OPENCL_TRANSFORM_ADDRSPACE_GLUE(function, _addrspace)( \
505 ccl_addr_space const Transform *t, const float3 a) \
506 { \
507 Transform private_tfm = *t; \
508 return function(&private_tfm, a); \
509 }
510
511 OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_point)
512 OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction)
513 OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction_transposed)
514
515 # undef OPENCL_TRANSFORM_ADDRSPACE_DECLARE
516 # undef OPENCL_TRANSFORM_ADDRSPACE_GLUE
517 # define transform_point_auto transform_point_addrspace
518 # define transform_direction_auto transform_direction_addrspace
519 # define transform_direction_transposed_auto transform_direction_transposed_addrspace
520 #else
521 # define transform_point_auto transform_point
522 # define transform_direction_auto transform_direction
523 # define transform_direction_transposed_auto transform_direction_transposed
524 #endif
525
526 CCL_NAMESPACE_END
527
528 #endif /* __UTIL_TRANSFORM_H__ */
529