1// Copyright 2019-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#include "../common/export_util.h"
5#include "UnstructuredSamplerBase.ih"
6#include "UnstructuredVolume.ih"
7
8inline bool pointInAABBTest(const uniform box3fa &box, const vec3f &point)
9{
10  bool t1 = point.x >= box.lower.x;
11  bool t2 = point.y >= box.lower.y;
12  bool t3 = point.z >= box.lower.z;
13  bool t4 = point.x <= box.upper.x;
14  bool t5 = point.y <= box.upper.y;
15  bool t6 = point.z <= box.upper.z;
16  return t1 & t2 & t3 & t4 & t5 & t6;
17}
18
19#define template_traverseEmbree(userFuncType, resultType)                      \
20  inline void traverseEmbree(uniform Node *uniform root,                       \
21                             const void *uniform userPtr,                      \
22                             uniform userFuncType userFunc,                    \
23                             resultType &result,                               \
24                             const vec3f &samplePos)                           \
25  {                                                                            \
26    uniform Node *uniform node = root;                                         \
27    uniform Node *uniform nodeStack[32]; /* xxx */                             \
28    uniform int stackPtr = 0;                                                  \
29                                                                               \
30    while (1) {                                                                \
31      uniform bool isLeaf = (node->nominalLength.x < 0);                       \
32      if (isLeaf) {                                                            \
33        uniform LeafNode *uniform leaf = (uniform LeafNode * uniform) node;    \
34        if (userFunc(userPtr, leaf->cellID, result, samplePos))                \
35          return;                                                              \
36      } else {                                                                 \
37        uniform InnerNode *uniform inner = (uniform InnerNode * uniform) node; \
38        const bool in0 = pointInAABBTest(inner->bounds[0], samplePos);         \
39        const bool in1 = pointInAABBTest(inner->bounds[1], samplePos);         \
40                                                                               \
41        if (any(in0)) {                                                        \
42          if (any(in1)) {                                                      \
43            nodeStack[stackPtr++] = inner->children[1];                        \
44            node                  = inner->children[0];                        \
45            continue;                                                          \
46          } else {                                                             \
47            node = inner->children[0];                                         \
48            continue;                                                          \
49          }                                                                    \
50        } else {                                                               \
51          if (any(in1)) {                                                      \
52            node = inner->children[1];                                         \
53            continue;                                                          \
54          } else {                                                             \
55            /* Do nothing, just pop. */                                        \
56          }                                                                    \
57        }                                                                      \
58      }                                                                        \
59      if (stackPtr == 0)                                                       \
60        return;                                                                \
61      node = nodeStack[--stackPtr];                                            \
62    }                                                                          \
63  }
64
65// #define USE_STACKLESS_TRAVERSAL
66
67#ifndef USE_STACKLESS_TRAVERSAL
68template_traverseEmbree(intersectAndSamplePrim, float);
69#endif
70
71template_traverseEmbree(intersectAndGradientPrim, vec3f);
72#undef template_traverseEmbree
73
74#ifdef USE_STACKLESS_TRAVERSAL
75// stackless traversal; this approach may be useful with traversal caches in the
76// future
77inline void traverseEmbree(uniform Node *uniform root,
78                           const void *uniform userPtr,
79                           uniform intersectAndSamplePrim userFunc,
80                           float &result,
81                           const vec3f &samplePos)
82{
83  uniform Node *uniform node = root;
84  uniform uint32 bitstack    = 0;
85
86  while (1) {
87    uniform bool isInner = (node->nominalLength.x >= 0);
88
89    if (isInner) {
90      uniform InnerNode *uniform inner = (uniform InnerNode * uniform) node;
91      const bool in0 = pointInAABBTest(inner->bounds[0], samplePos);
92      const bool in1 = pointInAABBTest(inner->bounds[1], samplePos);
93
94      if (any(in0) || any(in1)) {
95        bitstack = bitstack << 1;
96
97        if (any(in0) && any(in1)) {
98          node     = inner->children[0];  // should choose nearest child!
99          bitstack = bitstack | 1;
100        } else {
101          if (any(in0)) {
102            node = inner->children[0];
103          } else {
104            node = inner->children[1];
105          }
106        }
107
108        continue;
109      }
110    } else {
111      // leaf
112      uniform LeafNode *uniform leaf = (uniform LeafNode * uniform) node;
113      const bool in0                 = pointInAABBTest(leaf->bounds, samplePos);
114      if (in0 && userFunc(userPtr, leaf->cellID, result, samplePos))
115        return;
116    }
117
118    // backtrack
119    while ((bitstack & 1) == 0) {
120      if (!bitstack) {
121        return;
122      }
123      node     = node->parent;
124      bitstack = bitstack >> 1;
125    }
126
127    // sibling pointer could be used here if we had it
128    node = sibling(node);
129
130    bitstack = bitstack ^ 1;
131  }
132}
133#endif
134
135struct LinearSpace3f
136{
137  vec3f vx;
138  vec3f vy;
139  vec3f vz;
140};
141
142inline varying LinearSpace3f make_LinearSpace3f(const varying vec3f x,
143                                                const varying vec3f y,
144                                                const varying vec3f z)
145{
146  varying LinearSpace3f l;
147  l.vx = x;
148  l.vy = y;
149  l.vz = z;
150  return l;
151}
152
153inline varying float det(const varying LinearSpace3f l)
154{
155  return dot(l.vx, cross(l.vy, l.vz));
156}
157
158// Read 32/64-bit integer value from given array
159static inline uniform uint64 readInteger(const uniform Data1D array,
160                                         const uniform bool is32Bit,
161                                         const uniform uint64 id)
162{
163  return is32Bit ? get_uint32(array, id) : get_uint64(array, id);
164}
165
166// Get cell offset (location) in index array
167static inline uniform uint64 getCellOffset(
168    const VKLUnstructuredVolume *uniform self, const uniform uint64 id)
169{
170  return readInteger(self->cell, self->cell32Bit, id) + self->cellSkipIds;
171}
172
173// Get vertex index from index array
174static inline uniform uint64
175getVertexId(const VKLUnstructuredVolume *uniform self, const uniform uint64 id)
176{
177  return readInteger(self->index, self->index32Bit, id);
178}
179
180static inline uniform vec3f
181calcPlaneNormal(const VKLUnstructuredVolume *uniform self,
182                const uniform uint64 id,
183                const uniform uint32 plane[3])
184{
185  // Retrieve cell offset first
186  const uniform uint64 cOffset = getCellOffset(self, id);
187
188  // Get 3 vertices for normal calculation
189  const uniform vec3f v0 =
190      get_vec3f(self->vertex, getVertexId(self, cOffset + plane[0]));
191  const uniform vec3f v1 =
192      get_vec3f(self->vertex, getVertexId(self, cOffset + plane[1]));
193  const uniform vec3f v2 =
194      get_vec3f(self->vertex, getVertexId(self, cOffset + plane[2]));
195
196  // Calculate normal
197  return normalize(cross(v0 - v1, v2 - v1));
198}
199
200static inline uniform vec3f
201tetrahedronNormal(const VKLUnstructuredVolume *uniform self,
202                  const uniform uint64 id,
203                  const uniform int planeID)
204{
205  // Get precomputed normal if available
206  if (self->faceNormals)
207    return self->faceNormals[(id * 6) + planeID];
208
209  // Prepare vertex offset bys plane
210  const uniform uint32 planes[4][3] = {
211      {2, 0, 1}, {3, 1, 0}, {3, 2, 1}, {2, 3, 0}};
212  return calcPlaneNormal(self, id, planes[planeID]);
213}
214
215static inline uniform vec3f
216hexahedronNormal(const VKLUnstructuredVolume *uniform self,
217                 const uniform uint64 id,
218                 const uniform int planeID)
219{
220  // Get precomputed normal if available
221  if (self->faceNormals)
222    return self->faceNormals[(id * 6) + planeID];
223
224  // Prepare vertex offsets by plane
225  const uniform uint32 planes[6][3] = {
226      {3, 0, 1}, {5, 1, 0}, {6, 2, 1}, {7, 3, 2}, {7, 4, 0}, {6, 5, 4}};
227  return calcPlaneNormal(self, id, planes[planeID]);
228}
229
230static inline uniform vec3f
231wedgeNormal(const VKLUnstructuredVolume *uniform self,
232            const uniform uint64 id,
233            const uniform int planeID)
234{
235  // Get precomputed normal if available
236  if (self->faceNormals)
237    return self->faceNormals[(id * 6) + planeID];
238
239  // Prepare vertex offsets by plane
240  const uniform uint32 planes[5][3] = {
241      {2, 0, 1}, {4, 1, 0}, {5, 2, 1}, {5, 3, 0}, {5, 4, 3}};
242  return calcPlaneNormal(self, id, planes[planeID]);
243}
244
245static inline uniform vec3f
246pyramidNormal(const VKLUnstructuredVolume *uniform self,
247              const uniform uint64 id,
248              const uniform int planeID)
249{
250  // Get precomputed normal if available
251  if (self->faceNormals)
252    return self->faceNormals[(id * 6) + planeID];
253
254  // Prepare vertex offsets by plane
255  const uniform uint32 planes[5][3] = {
256      {3, 0, 1}, {4, 1, 0}, {4, 2, 1}, {4, 3, 2}, {3, 4, 0}};
257  return calcPlaneNormal(self, id, planes[planeID]);
258}
259
260static bool intersectAndSampleTet(const void *uniform userData,
261                                  uniform uint64 id,
262                                  uniform bool assumeInside,
263                                  float &result,
264                                  vec3f samplePos)
265{
266  const VKLUnstructuredVolume *uniform self =
267      (const VKLUnstructuredVolume *uniform)userData;
268
269  // Get cell offset in index buffer
270  const uniform uint64 cOffset = getCellOffset(self, id);
271
272  const uniform vec3f p0 =
273      get_vec3f(self->vertex, getVertexId(self, cOffset + 0));
274  const uniform vec3f p1 =
275      get_vec3f(self->vertex, getVertexId(self, cOffset + 1));
276  const uniform vec3f p2 =
277      get_vec3f(self->vertex, getVertexId(self, cOffset + 2));
278  const uniform vec3f p3 =
279      get_vec3f(self->vertex, getVertexId(self, cOffset + 3));
280
281  const uniform vec3f norm0 = tetrahedronNormal(self, id, 0);
282  const uniform vec3f norm1 = tetrahedronNormal(self, id, 1);
283  const uniform vec3f norm2 = tetrahedronNormal(self, id, 2);
284  const uniform vec3f norm3 = tetrahedronNormal(self, id, 3);
285
286  // Distance from the world point to the faces.
287  const float d0 = dot(norm0, p0 - samplePos);
288  const float d1 = dot(norm1, p1 - samplePos);
289  const float d2 = dot(norm2, p2 - samplePos);
290  const float d3 = dot(norm3, p3 - samplePos);
291
292  // Exit if samplePos is outside the cell
293  if (!assumeInside && !(d0 > 0 && d1 > 0 && d2 > 0 && d3 > 0))
294    return false;
295
296  // Skip interpolation if values are defined per cell
297  if (isValid(self->cellValue)) {
298    result = get_float(self->cellValue, id);
299    return true;
300  }
301
302  // Distance of tetrahedron corners to their opposite faces.
303  const uniform float h0 = dot(norm0, p0 - p3);
304  const uniform float h1 = dot(norm1, p1 - p2);
305  const uniform float h2 = dot(norm2, p2 - p0);
306  const uniform float h3 = dot(norm3, p3 - p1);
307
308  // Local coordinates = ratio of distances.
309  const float z0 = d0 / h0;
310  const float z1 = d1 / h1;
311  const float z2 = d2 / h2;
312  const float z3 = d3 / h3;
313
314  // Field/attribute values at the tetrahedron corners.
315  const uniform float v0 =
316      get_float(self->vertexValue, getVertexId(self, cOffset + 0));
317  const uniform float v1 =
318      get_float(self->vertexValue, getVertexId(self, cOffset + 1));
319  const uniform float v2 =
320      get_float(self->vertexValue, getVertexId(self, cOffset + 2));
321  const uniform float v3 =
322      get_float(self->vertexValue, getVertexId(self, cOffset + 3));
323
324  // Interpolated field/attribute value at the world position.
325  result = z0 * v3 + z1 * v2 + z2 * v0 + z3 * v1;
326  return true;
327}
328
329//----------------------------------------------------------------------------
330// Compute iso-parametric interpolation functions
331//
332static inline void wedgeInterpolationFunctions(float pcoords[3], float sf[6])
333{
334  sf[0] = (1.0 - pcoords[0] - pcoords[1]) * (1.0 - pcoords[2]);
335  sf[1] = pcoords[0] * (1.0 - pcoords[2]);
336  sf[2] = pcoords[1] * (1.0 - pcoords[2]);
337  sf[3] = (1.0 - pcoords[0] - pcoords[1]) * pcoords[2];
338  sf[4] = pcoords[0] * pcoords[2];
339  sf[5] = pcoords[1] * pcoords[2];
340}
341
342//----------------------------------------------------------------------------
343static inline void wedgeInterpolationDerivs(float pcoords[3], float derivs[18])
344{
345  // r-derivatives
346  derivs[0] = -1.0 + pcoords[2];
347  derivs[1] = 1.0 - pcoords[2];
348  derivs[2] = 0.0;
349  derivs[3] = -pcoords[2];
350  derivs[4] = pcoords[2];
351  derivs[5] = 0.0;
352
353  // s-derivatives
354  derivs[6]  = -1.0 + pcoords[2];
355  derivs[7]  = 0.0;
356  derivs[8]  = 1.0 - pcoords[2];
357  derivs[9]  = -pcoords[2];
358  derivs[10] = 0.0;
359  derivs[11] = pcoords[2];
360
361  // t-derivatives
362  derivs[12] = -1.0 + pcoords[0] + pcoords[1];
363  derivs[13] = -pcoords[0];
364  derivs[14] = -pcoords[1];
365  derivs[15] = 1.0 - pcoords[0] - pcoords[1];
366  derivs[16] = pcoords[0];
367  derivs[17] = pcoords[1];
368}
369
370static const uniform float WEDGE_DIVERGED               = 1.e6;
371static const uniform int WEDGE_MAX_ITERATION            = 10;
372static const uniform float WEDGE_CONVERGED              = 1.e-04;
373static const uniform float WEDGE_OUTSIDE_CELL_TOLERANCE = 1.e-06;
374
375static bool intersectAndSampleWedge(const void *uniform userData,
376                                    uniform uint64 id,
377                                    uniform bool assumeInside,
378                                    float &result,
379                                    vec3f samplePos)
380{
381  const VKLUnstructuredVolume *uniform self =
382      (const VKLUnstructuredVolume *uniform)userData;
383
384  float pcoords[3] = {0.5, 0.5, 0.5};
385  float derivs[18];
386  float weights[6];
387
388  // Get cell offset in index buffer
389  const uniform uint64 cOffset             = getCellOffset(self, id);
390  const uniform float determinantTolerance = self->iterativeTolerance[id];
391
392  // Enter iteration loop
393  bool converged = false;
394  for (uniform int iteration = 0;
395       !converged && (iteration < WEDGE_MAX_ITERATION);
396       iteration++) {
397    unmasked
398    {
399      // Calculate element interpolation functions and derivatives
400      wedgeInterpolationFunctions(pcoords, weights);
401      wedgeInterpolationDerivs(pcoords, derivs);
402
403      // Calculate newton functions
404      vec3f fcol = make_vec3f(0.f, 0.f, 0.f);
405      vec3f rcol = make_vec3f(0.f, 0.f, 0.f);
406      vec3f scol = make_vec3f(0.f, 0.f, 0.f);
407      vec3f tcol = make_vec3f(0.f, 0.f, 0.f);
408      for (uniform int i = 0; i < 6; i++) {
409        const uniform vec3f pt =
410            get_vec3f(self->vertex, getVertexId(self, cOffset + i));
411        fcol = fcol + pt * weights[i];
412        rcol = rcol + pt * derivs[i];
413        scol = scol + pt * derivs[i + 6];
414        tcol = tcol + pt * derivs[i + 12];
415      }
416
417      fcol = fcol - samplePos;
418
419      // Compute determinants and generate improvements
420      const float d = det(make_LinearSpace3f(rcol, scol, tcol));
421    }
422
423    if (absf(d) < determinantTolerance) {
424      return false;
425    }
426
427    const float d0 = det(make_LinearSpace3f(fcol, scol, tcol)) / d;
428    const float d1 = det(make_LinearSpace3f(rcol, fcol, tcol)) / d;
429    const float d2 = det(make_LinearSpace3f(rcol, scol, fcol)) / d;
430
431    pcoords[0] = pcoords[0] - d0;
432    pcoords[1] = pcoords[1] - d1;
433    pcoords[2] = pcoords[2] - d2;
434
435    // Convergence/divergence test - if neither, repeat
436    if ((absf(d0) < WEDGE_CONVERGED) & (absf(d1) < WEDGE_CONVERGED) &
437        (absf(d2) < WEDGE_CONVERGED)) {
438      converged = true;
439    } else if ((absf(pcoords[0]) > WEDGE_DIVERGED) |
440               (absf(pcoords[1]) > WEDGE_DIVERGED) |
441               (absf(pcoords[2]) > WEDGE_DIVERGED)) {
442      return false;
443    }
444  }
445
446  if (!converged) {
447    return false;
448  }
449
450  const uniform float lowerlimit = 0.0 - WEDGE_OUTSIDE_CELL_TOLERANCE;
451  const uniform float upperlimit = 1.0 + WEDGE_OUTSIDE_CELL_TOLERANCE;
452  if (assumeInside || (pcoords[0] >= lowerlimit && pcoords[0] <= upperlimit &&
453                       pcoords[1] >= lowerlimit && pcoords[1] <= upperlimit &&
454                       pcoords[2] >= lowerlimit && pcoords[2] <= upperlimit &&
455                       pcoords[0] + pcoords[1] <= upperlimit)) {
456    // Evaluation
457    if (isValid(self->cellValue)) {
458      result = get_float(self->cellValue, id);
459    } else {
460      float val = 0.f;
461      for (uniform int i = 0; i < 6; i++) {
462        val += weights[i] *
463               get_float(self->vertexValue, getVertexId(self, cOffset + i));
464      }
465      result = val;
466    }
467
468    return true;
469  }
470
471  return false;
472}
473
474static bool intersectAndSampleHexFast(const void *uniform userData,
475                                      uniform uint64 id,
476                                      float &result,
477                                      vec3f samplePos)
478{
479  const VKLUnstructuredVolume *uniform self =
480      (const VKLUnstructuredVolume *uniform)userData;
481
482  // Get cell offset in index buffer
483  const uniform uint64 cOffset = getCellOffset(self, id);
484
485  // Calculate distances from each hexahedron face
486  float dist[6];
487  for (uniform int plane = 0; plane < 6; plane++) {
488    const uniform vec3f v =
489        get_vec3f(self->vertex, getVertexId(self, cOffset + plane));
490    dist[plane] = dot(samplePos - v, hexahedronNormal(self, id, plane));
491    if (dist[plane] > 0.f)  // samplePos is outside of the cell
492      return false;
493  }
494
495  // Skip interpolation if values are defined per cell
496  if (isValid(self->cellValue)) {
497    result = get_float(self->cellValue, id);
498    return true;
499  }
500
501  // Calculate 0..1 isoparametrics
502  const float u0 = dist[2] / (dist[2] + dist[4]);
503  const float v0 = dist[5] / (dist[5] + dist[0]);
504  const float w0 = dist[3] / (dist[3] + dist[1]);
505  const float u1 = 1.f - u0;
506  const float v1 = 1.f - v0;
507  const float w1 = 1.f - w0;
508
509  // Do the trilinear interpolation
510  result = u0 * v0 * w0 *
511               get_float(self->vertexValue, getVertexId(self, cOffset + 0)) +
512           u1 * v0 * w0 *
513               get_float(self->vertexValue, getVertexId(self, cOffset + 1)) +
514           u1 * v0 * w1 *
515               get_float(self->vertexValue, getVertexId(self, cOffset + 2)) +
516           u0 * v0 * w1 *
517               get_float(self->vertexValue, getVertexId(self, cOffset + 3)) +
518           u0 * v1 * w0 *
519               get_float(self->vertexValue, getVertexId(self, cOffset + 4)) +
520           u1 * v1 * w0 *
521               get_float(self->vertexValue, getVertexId(self, cOffset + 5)) +
522           u1 * v1 * w1 *
523               get_float(self->vertexValue, getVertexId(self, cOffset + 6)) +
524           u0 * v1 * w1 *
525               get_float(self->vertexValue, getVertexId(self, cOffset + 7));
526  return true;
527}
528
529//----------------------------------------------------------------------------
530// Compute iso-parametric interpolation functions
531//
532static inline void hexInterpolationFunctions(float pcoords[3], float sf[8])
533{
534  float rm, sm, tm;
535
536  rm = 1.f - pcoords[0];
537  sm = 1.f - pcoords[1];
538  tm = 1.f - pcoords[2];
539
540  sf[0] = rm * sm * tm;
541  sf[1] = pcoords[0] * sm * tm;
542  sf[2] = pcoords[0] * pcoords[1] * tm;
543  sf[3] = rm * pcoords[1] * tm;
544  sf[4] = rm * sm * pcoords[2];
545  sf[5] = pcoords[0] * sm * pcoords[2];
546  sf[6] = pcoords[0] * pcoords[1] * pcoords[2];
547  sf[7] = rm * pcoords[1] * pcoords[2];
548}
549
550//----------------------------------------------------------------------------
551static inline void hexInterpolationDerivs(float pcoords[3], float derivs[24])
552{
553  float rm, sm, tm;
554
555  rm = 1.f - pcoords[0];
556  sm = 1.f - pcoords[1];
557  tm = 1.f - pcoords[2];
558
559  // r-derivatives
560  derivs[0] = -sm * tm;
561  derivs[1] = sm * tm;
562  derivs[2] = pcoords[1] * tm;
563  derivs[3] = -pcoords[1] * tm;
564  derivs[4] = -sm * pcoords[2];
565  derivs[5] = sm * pcoords[2];
566  derivs[6] = pcoords[1] * pcoords[2];
567  derivs[7] = -pcoords[1] * pcoords[2];
568
569  // s-derivatives
570  derivs[8]  = -rm * tm;
571  derivs[9]  = -pcoords[0] * tm;
572  derivs[10] = pcoords[0] * tm;
573  derivs[11] = rm * tm;
574  derivs[12] = -rm * pcoords[2];
575  derivs[13] = -pcoords[0] * pcoords[2];
576  derivs[14] = pcoords[0] * pcoords[2];
577  derivs[15] = rm * pcoords[2];
578
579  // t-derivatives
580  derivs[16] = -rm * sm;
581  derivs[17] = -pcoords[0] * sm;
582  derivs[18] = -pcoords[0] * pcoords[1];
583  derivs[19] = -rm * pcoords[1];
584  derivs[20] = rm * sm;
585  derivs[21] = pcoords[0] * sm;
586  derivs[22] = pcoords[0] * pcoords[1];
587  derivs[23] = rm * pcoords[1];
588}
589
590static const uniform float HEX_DIVERGED               = 1.e6;
591static const uniform int HEX_MAX_ITERATION            = 10;
592static const uniform float HEX_CONVERGED              = 1.e-04;
593static const uniform float HEX_OUTSIDE_CELL_TOLERANCE = 1.e-06;
594
595static bool intersectAndSampleHexIterative(const void *uniform userData,
596                                           uniform uint64 id,
597                                           uniform bool assumeInside,
598                                           float &result,
599                                           vec3f samplePos)
600{
601  const VKLUnstructuredVolume *uniform self =
602      (const VKLUnstructuredVolume *uniform)userData;
603
604  float pcoords[3] = {0.5, 0.5, 0.5};
605  float derivs[24];
606  float weights[8];
607
608  // Get cell offset in index buffer
609  const uniform uint64 cOffset             = getCellOffset(self, id);
610  const uniform float determinantTolerance = self->iterativeTolerance[id];
611
612  // Enter iteration loop
613  bool converged = false;
614  for (uniform int iteration = 0; !converged && (iteration < HEX_MAX_ITERATION);
615       iteration++) {
616    unmasked
617    {
618      // Calculate element interpolation functions and derivatives
619      hexInterpolationFunctions(pcoords, weights);
620      hexInterpolationDerivs(pcoords, derivs);
621
622      // Calculate newton functions
623      vec3f fcol = make_vec3f(0.f, 0.f, 0.f);
624      vec3f rcol = make_vec3f(0.f, 0.f, 0.f);
625      vec3f scol = make_vec3f(0.f, 0.f, 0.f);
626      vec3f tcol = make_vec3f(0.f, 0.f, 0.f);
627      for (uniform int i = 0; i < 8; i++) {
628        const uniform vec3f pt =
629            get_vec3f(self->vertex, getVertexId(self, cOffset + i));
630        fcol = fcol + pt * weights[i];
631        rcol = rcol + pt * derivs[i];
632        scol = scol + pt * derivs[i + 8];
633        tcol = tcol + pt * derivs[i + 16];
634      }
635
636      fcol = fcol - samplePos;
637
638      // Compute determinants and generate improvements
639      const float d = det(make_LinearSpace3f(rcol, scol, tcol));
640    }
641
642    if (absf(d) < determinantTolerance) {
643      return false;
644    }
645
646    const float d0 = det(make_LinearSpace3f(fcol, scol, tcol)) / d;
647    const float d1 = det(make_LinearSpace3f(rcol, fcol, tcol)) / d;
648    const float d2 = det(make_LinearSpace3f(rcol, scol, fcol)) / d;
649
650    pcoords[0] = pcoords[0] - d0;
651    pcoords[1] = pcoords[1] - d1;
652    pcoords[2] = pcoords[2] - d2;
653
654    // Convergence/divergence test - if neither, repeat
655    if ((absf(d0) < HEX_CONVERGED) & (absf(d1) < HEX_CONVERGED) &
656        (absf(d2) < HEX_CONVERGED)) {
657      converged = true;
658    } else if ((absf(pcoords[0]) > HEX_DIVERGED) |
659               (absf(pcoords[1]) > HEX_DIVERGED) |
660               (absf(pcoords[2]) > HEX_DIVERGED)) {
661      return false;
662    }
663  }
664
665  if (!converged) {
666    return false;
667  }
668
669  const uniform float lowerlimit = 0.0 - HEX_OUTSIDE_CELL_TOLERANCE;
670  const uniform float upperlimit = 1.0 + HEX_OUTSIDE_CELL_TOLERANCE;
671  if (assumeInside || (pcoords[0] >= lowerlimit && pcoords[0] <= upperlimit &&
672                       pcoords[1] >= lowerlimit && pcoords[1] <= upperlimit &&
673                       pcoords[2] >= lowerlimit && pcoords[2] <= upperlimit)) {
674    // Evaluation
675    if (isValid(self->cellValue)) {
676      result = get_float(self->cellValue, id);
677    } else {
678      float val = 0.f;
679      for (uniform int i = 0; i < 8; i++) {
680        val += weights[i] *
681               get_float(self->vertexValue, getVertexId(self, cOffset + i));
682      }
683      result = val;
684    }
685
686    return true;
687  }
688
689  return false;
690}
691
692//----------------------------------------------------------------------------
693// Compute iso-parametric interpolation functions
694//
695static inline void pyramidInterpolationFunctions(float pcoords[3], float sf[5])
696{
697  float rm, sm, tm;
698
699  rm = 1.f - pcoords[0];
700  sm = 1.f - pcoords[1];
701  tm = 1.f - pcoords[2];
702
703  sf[0] = rm * sm * tm;
704  sf[1] = pcoords[0] * sm * tm;
705  sf[2] = pcoords[0] * pcoords[1] * tm;
706  sf[3] = rm * pcoords[1] * tm;
707  sf[4] = pcoords[2];
708}
709
710//----------------------------------------------------------------------------
711static inline void pyramidInterpolationDerivs(float pcoords[3],
712                                              float derivs[15])
713{
714  // r-derivatives
715  derivs[0] = -(pcoords[1] - 1.f) * (pcoords[2] - 1.f);
716  derivs[1] = (pcoords[1] - 1.f) * (pcoords[2] - 1.f);
717  derivs[2] = pcoords[1] - pcoords[1] * pcoords[2];
718  derivs[3] = pcoords[1] * (pcoords[2] - 1.f);
719  derivs[4] = 0.f;
720
721  // s-derivatives
722  derivs[5] = -(pcoords[0] - 1.f) * (pcoords[2] - 1.f);
723  derivs[6] = pcoords[0] * (pcoords[2] - 1.f);
724  derivs[7] = pcoords[0] - pcoords[0] * pcoords[2];
725  derivs[8] = (pcoords[0] - 1.f) * (pcoords[2] - 1.f);
726  derivs[9] = 0.f;
727
728  // t-derivatives
729  derivs[10] = -(pcoords[0] - 1.f) * (pcoords[1] - 1.f);
730  derivs[11] = pcoords[0] * (pcoords[1] - 1.f);
731  derivs[12] = -pcoords[0] * pcoords[1];
732  derivs[13] = (pcoords[0] - 1.f) * pcoords[1];
733  derivs[14] = 1.f;
734}
735
736static const uniform float PYRAMID_DIVERGED               = 1.e6;
737static const uniform int PYRAMID_MAX_ITERATION            = 10;
738static const uniform float PYRAMID_CONVERGED              = 1.e-04;
739static const uniform float PYRAMID_OUTSIDE_CELL_TOLERANCE = 1.e-06;
740
741static bool intersectAndSamplePyramid(const void *uniform userData,
742                                      uniform uint64 id,
743                                      uniform bool assumeInside,
744                                      float &result,
745                                      vec3f samplePos)
746{
747  const VKLUnstructuredVolume *uniform self =
748      (const VKLUnstructuredVolume *uniform)userData;
749
750  float pcoords[3] = {0.5, 0.5, 0.5};
751  float derivs[15];
752  float weights[5];
753
754  // Get cell offset in index buffer
755  const uniform uint64 cOffset             = getCellOffset(self, id);
756  const uniform float determinantTolerance = self->iterativeTolerance[id];
757
758  // Enter iteration loop
759  bool converged = false;
760  for (uniform int iteration = 0;
761       !converged && (iteration < PYRAMID_MAX_ITERATION);
762       iteration++) {
763    unmasked
764    {
765      // Calculate element interpolation functions and derivatives
766      pyramidInterpolationFunctions(pcoords, weights);
767      pyramidInterpolationDerivs(pcoords, derivs);
768
769      // Calculate newton functions
770      vec3f fcol = make_vec3f(0.f, 0.f, 0.f);
771      vec3f rcol = make_vec3f(0.f, 0.f, 0.f);
772      vec3f scol = make_vec3f(0.f, 0.f, 0.f);
773      vec3f tcol = make_vec3f(0.f, 0.f, 0.f);
774      for (uniform int i = 0; i < 5; i++) {
775        const uniform vec3f pt =
776            get_vec3f(self->vertex, getVertexId(self, cOffset + i));
777        fcol = fcol + pt * weights[i];
778        rcol = rcol + pt * derivs[i];
779        scol = scol + pt * derivs[i + 5];
780        tcol = tcol + pt * derivs[i + 10];
781      }
782
783      fcol = fcol - samplePos;
784
785      // Compute determinants and generate improvements
786      const float d = det(make_LinearSpace3f(rcol, scol, tcol));
787    }
788
789    if (absf(d) < determinantTolerance) {
790      return false;
791    }
792
793    const float d0 = det(make_LinearSpace3f(fcol, scol, tcol)) / d;
794    const float d1 = det(make_LinearSpace3f(rcol, fcol, tcol)) / d;
795    const float d2 = det(make_LinearSpace3f(rcol, scol, fcol)) / d;
796
797    pcoords[0] = pcoords[0] - d0;
798    pcoords[1] = pcoords[1] - d1;
799    pcoords[2] = pcoords[2] - d2;
800
801    // Convergence/divergence test - if neither, repeat
802    if ((absf(d0) < PYRAMID_CONVERGED) & (absf(d1) < PYRAMID_CONVERGED) &
803        (absf(d2) < PYRAMID_CONVERGED)) {
804      converged = true;
805    } else if ((absf(pcoords[0]) > PYRAMID_DIVERGED) |
806               (absf(pcoords[1]) > PYRAMID_DIVERGED) |
807               (absf(pcoords[2]) > PYRAMID_DIVERGED)) {
808      return false;
809    }
810  }
811
812  if (!converged) {
813    return false;
814  }
815
816  const uniform float lowerlimit = 0.0 - PYRAMID_OUTSIDE_CELL_TOLERANCE;
817  const uniform float upperlimit = 1.0 + PYRAMID_OUTSIDE_CELL_TOLERANCE;
818  if (assumeInside || (pcoords[0] >= lowerlimit && pcoords[0] <= upperlimit &&
819                       pcoords[1] >= lowerlimit && pcoords[1] <= upperlimit &&
820                       pcoords[2] >= lowerlimit && pcoords[2] <= upperlimit)) {
821    // Evaluation
822    if (isValid(self->cellValue)) {
823      result = get_float(self->cellValue, id);
824    } else {
825      float val = 0.f;
826      for (uniform int i = 0; i < 5; i++) {
827        val += weights[i] *
828               get_float(self->vertexValue, getVertexId(self, cOffset + i));
829      }
830      result = val;
831    }
832
833    return true;
834  }
835
836  return false;
837}
838
839static bool intersectAndSampleCell(const void *uniform userData,
840                                   uniform uint64 id,
841                                   float &result,
842                                   vec3f samplePos)
843{
844  bool hit = false;
845  const VKLUnstructuredVolume *uniform self =
846      (const VKLUnstructuredVolume *uniform)userData;
847
848  switch (get_uint8(self->cellType, id)) {
849  case VKL_TETRAHEDRON:
850    hit = intersectAndSampleTet(userData, id, false, result, samplePos);
851    break;
852  case VKL_HEXAHEDRON:
853    if (!self->hexIterative)
854      hit = intersectAndSampleHexFast(userData, id, result, samplePos);
855    else
856      hit = intersectAndSampleHexIterative(
857          userData, id, false, result, samplePos);
858    break;
859  case VKL_WEDGE:
860    hit = intersectAndSampleWedge(userData, id, false, result, samplePos);
861    break;
862  case VKL_PYRAMID:
863    hit = intersectAndSamplePyramid(userData, id, false, result, samplePos);
864    break;
865  }
866
867  // Return true if samplePos is inside the cell
868  return hit;
869}
870
871#define template_stable_tri_normal(univary)                                   \
872  static inline univary vec3f stable_tri_normal(                              \
873      const univary vec3f &a, const univary vec3f &b, const univary vec3f &c) \
874  {                                                                           \
875    const univary float ab_x = a.z * b.y;                                     \
876    const univary float ab_y = a.x * b.z;                                     \
877    const univary float ab_z = a.y * b.x;                                     \
878    const univary float bc_x = b.z * c.y;                                     \
879    const univary float bc_y = b.x * c.z;                                     \
880    const univary float bc_z = b.y * c.x;                                     \
881    const univary vec3f cross_ab =                                            \
882        make_vec3f(a.y * b.z - ab_x, a.z * b.x - ab_y, a.x * b.y - ab_z);     \
883    const univary vec3f cross_bc =                                            \
884        make_vec3f(b.y * c.z - bc_x, b.z * c.x - bc_y, b.x * c.y - bc_z);     \
885    const univary bool sx = abs(ab_x) < abs(bc_x);                            \
886    const univary bool sy = abs(ab_y) < abs(bc_y);                            \
887    const univary bool sz = abs(ab_z) < abs(bc_z);                            \
888    return make_vec3f(sx ? cross_ab.x : cross_bc.x,                           \
889                      sy ? cross_ab.y : cross_bc.y,                           \
890                      sz ? cross_ab.z : cross_bc.z);                          \
891  }
892// template_stable_tri_normal(uniform);
893template_stable_tri_normal(varying);
894#undef template_stable_tri_normal
895
896#define template_intersectRayTri(univary)                        \
897  static inline void intersectRayTri_##univary(                  \
898      const univary vec3f &origin,                               \
899      const univary vec3f &direction,                            \
900      const uniform vec3f &p0,                                   \
901      const uniform vec3f &p1,                                   \
902      const uniform vec3f &p2,                                   \
903      univary bool &foundEntrance,                               \
904      univary bool &foundExit,                                   \
905      univary box1f &intersectedTRange)                          \
906  {                                                              \
907    const univary vec3f v0 = p0 - origin;                        \
908    const univary vec3f v1 = p1 - origin;                        \
909    const univary vec3f v2 = p2 - origin;                        \
910                                                                 \
911    /* calculate triangle edges */                               \
912    const univary vec3f e0 = v2 - v0;                            \
913    const univary vec3f e1 = v0 - v1;                            \
914    const univary vec3f e2 = v1 - v2;                            \
915                                                                 \
916    /* perform edge tests */                                     \
917    const univary float U = dot(cross(e0, v2 + v0), direction);  \
918    const univary float V = dot(cross(e1, v0 + v1), direction);  \
919    const univary float W = dot(cross(e2, v1 + v2), direction);  \
920    if (!foundEntrance) {                                        \
921      const univary float UVW = U + V + W;                       \
922      const univary float eps = 1e-6f * abs(UVW);                \
923      univary bool isenterhit = (min(U, min(V, W)) >= -eps);     \
924      if (isenterhit) {                                          \
925        const univary vec3f Ng  = stable_tri_normal(e0, e1, e2); \
926        const univary float den = 2.f * dot(Ng, direction);      \
927                                                                 \
928        /* perform depth test */                                 \
929        if (den != 0.f) {                                        \
930          const univary float T   = 2.f * dot(v0, Ng);           \
931          intersectedTRange.lower = rcp(den) * T;                \
932          foundEntrance           = true;                        \
933          return;                                                \
934        }                                                        \
935      }                                                          \
936    }                                                            \
937    if (!foundExit) {                                            \
938      const univary float UVW = U + V + W;                       \
939      const univary float eps = 1e-6f * abs(UVW);                \
940      univary bool isexithit  = (max(U, max(V, W)) <= eps);      \
941      if (isexithit) {                                           \
942        const univary vec3f Ng  = stable_tri_normal(e0, e1, e2); \
943        const univary float den = 2.f * dot(Ng, direction);      \
944                                                                 \
945        /* perform depth test */                                 \
946        if (den != 0.f) {                                        \
947          const univary float T   = 2.f * dot(v0, Ng);           \
948          intersectedTRange.upper = rcp(den) * T;                \
949          foundExit               = true;                        \
950          return;                                                \
951        }                                                        \
952      }                                                          \
953    }                                                            \
954    return;                                                      \
955  }
956// template_intersectRayTri(uniform);
957template_intersectRayTri(varying);
958#undef template_intersectRayTri
959
960#define template_intersectRayTet(univary)                               \
961  static inline univary box1f intersectRayTet_##univary(                \
962      const univary vec3f &origin,                                      \
963      const univary vec3f &direction,                                   \
964      const univary box1f &rangeLimit,                                  \
965      const VKLUnstructuredVolume *uniform volume,                      \
966      const uniform uint64 cellID)                                      \
967  {                                                                     \
968    /* // Get cell offset in index buffer */                            \
969    const uniform uint64 cOffset = getCellOffset(volume, cellID);       \
970                                                                        \
971    const uniform uint64 planes[4][3] = {                               \
972        {cOffset + 2, cOffset + 0, cOffset + 1},                        \
973        {cOffset + 3, cOffset + 1, cOffset + 0},                        \
974        {cOffset + 3, cOffset + 2, cOffset + 1},                        \
975        {cOffset + 2, cOffset + 3, cOffset + 0}};                       \
976    univary box1f intersectedTRange = make_box1f(neg_inf, inf);         \
977    univary bool foundExit          = false;                            \
978    univary bool foundEntrance      = false;                            \
979    for (uniform int i = 0; i < 4; ++i) {                               \
980      if (foundEntrance && foundExit)                                   \
981        break;                                                          \
982      const uniform vec3f p0 =                                          \
983          get_vec3f(volume->vertex, getVertexId(volume, planes[i][0])); \
984      const uniform vec3f p1 =                                          \
985          get_vec3f(volume->vertex, getVertexId(volume, planes[i][1])); \
986      const uniform vec3f p2 =                                          \
987          get_vec3f(volume->vertex, getVertexId(volume, planes[i][2])); \
988                                                                        \
989      intersectRayTri_##univary(origin,                                 \
990                                direction,                              \
991                                p0,                                     \
992                                p1,                                     \
993                                p2,                                     \
994                                foundEntrance,                          \
995                                foundExit,                              \
996                                intersectedTRange);                     \
997    }                                                                   \
998    if (foundEntrance && foundExit) {                                   \
999      intersectedTRange.lower =                                         \
1000          max(intersectedTRange.lower, rangeLimit.lower);               \
1001      intersectedTRange.upper =                                         \
1002          min(intersectedTRange.upper, rangeLimit.upper);               \
1003      return intersectedTRange;                                         \
1004    }                                                                   \
1005    return make_box1f(inf, neg_inf);                                    \
1006  }
1007
1008// template_intersectRayTet(uniform);
1009template_intersectRayTet(varying);
1010#undef template_intersectRayTet
1011
1012#define template_intersectRayHex(univary)                               \
1013  static inline univary box1f intersectRayHex_##univary(                \
1014      const univary vec3f &origin,                                      \
1015      const univary vec3f &direction,                                   \
1016      const univary box1f &rangeLimit,                                  \
1017      const VKLUnstructuredVolume *uniform volume,                      \
1018      const uniform uint64 cellID)                                      \
1019  {                                                                     \
1020    /* // Get cell offset in index buffer */                            \
1021    const uniform uint64 cOffset = getCellOffset(volume, cellID);       \
1022                                                                        \
1023    const uniform uint64 planes[12][3] = {                              \
1024        {cOffset + 0, cOffset + 1, cOffset + 2},                        \
1025        {cOffset + 0, cOffset + 2, cOffset + 3},                        \
1026        {cOffset + 5, cOffset + 6, cOffset + 1},                        \
1027        {cOffset + 6, cOffset + 2, cOffset + 1},                        \
1028        {cOffset + 7, cOffset + 4, cOffset + 0},                        \
1029        {cOffset + 7, cOffset + 0, cOffset + 3},                        \
1030        {cOffset + 4, cOffset + 5, cOffset + 1},                        \
1031        {cOffset + 4, cOffset + 1, cOffset + 0},                        \
1032        {cOffset + 5, cOffset + 4, cOffset + 6},                        \
1033        {cOffset + 6, cOffset + 4, cOffset + 7},                        \
1034        {cOffset + 6, cOffset + 7, cOffset + 2},                        \
1035        {cOffset + 2, cOffset + 7, cOffset + 3}};                       \
1036    univary box1f intersectedTRange = make_box1f(neg_inf, inf);         \
1037    univary bool foundExit          = false;                            \
1038    univary bool foundEntrance      = false;                            \
1039    for (uniform int i = 0; i < 12; ++i) {                              \
1040      if (foundEntrance && foundExit)                                   \
1041        break;                                                          \
1042      const uniform vec3f p0 =                                          \
1043          get_vec3f(volume->vertex, getVertexId(volume, planes[i][0])); \
1044      const uniform vec3f p1 =                                          \
1045          get_vec3f(volume->vertex, getVertexId(volume, planes[i][1])); \
1046      const uniform vec3f p2 =                                          \
1047          get_vec3f(volume->vertex, getVertexId(volume, planes[i][2])); \
1048                                                                        \
1049      intersectRayTri_##univary(origin,                                 \
1050                                direction,                              \
1051                                p0,                                     \
1052                                p1,                                     \
1053                                p2,                                     \
1054                                foundEntrance,                          \
1055                                foundExit,                              \
1056                                intersectedTRange);                     \
1057    }                                                                   \
1058    if (foundEntrance && foundExit) {                                   \
1059      intersectedTRange.lower =                                         \
1060          max(intersectedTRange.lower, rangeLimit.lower);               \
1061      intersectedTRange.upper =                                         \
1062          min(intersectedTRange.upper, rangeLimit.upper);               \
1063      return intersectedTRange;                                         \
1064    }                                                                   \
1065    return make_box1f(inf, neg_inf);                                    \
1066  }
1067
1068// template_intersectRayTet(uniform);
1069template_intersectRayHex(varying);
1070#undef template_intersectRayHex
1071
1072#define template_intersectRayWedge(univary)                             \
1073  static inline univary box1f intersectRayWedge_##univary(              \
1074      const univary vec3f &origin,                                      \
1075      const univary vec3f &direction,                                   \
1076      const univary box1f &rangeLimit,                                  \
1077      const VKLUnstructuredVolume *uniform volume,                      \
1078      const uniform uint64 cellID)                                      \
1079  {                                                                     \
1080    /* // Get cell offset in index buffer */                            \
1081    const uniform uint64 cOffset = getCellOffset(volume, cellID);       \
1082                                                                        \
1083    const uniform uint64 planes[8][3] = {                               \
1084        {cOffset + 2, cOffset + 0, cOffset + 1},                        \
1085        {cOffset + 4, cOffset + 1, cOffset + 0},                        \
1086        {cOffset + 5, cOffset + 2, cOffset + 1},                        \
1087        {cOffset + 5, cOffset + 3, cOffset + 0},                        \
1088        {cOffset + 5, cOffset + 4, cOffset + 3},                        \
1089        {cOffset + 4, cOffset + 0, cOffset + 3},                        \
1090        {cOffset + 5, cOffset + 1, cOffset + 4},                        \
1091        {cOffset + 5, cOffset + 0, cOffset + 2}};                       \
1092    univary box1f intersectedTRange = make_box1f(neg_inf, inf);         \
1093    univary bool foundExit          = false;                            \
1094    univary bool foundEntrance      = false;                            \
1095    for (uniform int i = 0; i < 8; ++i) {                               \
1096      if (foundEntrance && foundExit)                                   \
1097        break;                                                          \
1098      const uniform vec3f p0 =                                          \
1099          get_vec3f(volume->vertex, getVertexId(volume, planes[i][0])); \
1100      const uniform vec3f p1 =                                          \
1101          get_vec3f(volume->vertex, getVertexId(volume, planes[i][1])); \
1102      const uniform vec3f p2 =                                          \
1103          get_vec3f(volume->vertex, getVertexId(volume, planes[i][2])); \
1104                                                                        \
1105      intersectRayTri_##univary(origin,                                 \
1106                                direction,                              \
1107                                p0,                                     \
1108                                p1,                                     \
1109                                p2,                                     \
1110                                foundEntrance,                          \
1111                                foundExit,                              \
1112                                intersectedTRange);                     \
1113    }                                                                   \
1114    if (foundEntrance && foundExit) {                                   \
1115      intersectedTRange.lower =                                         \
1116          max(intersectedTRange.lower, rangeLimit.lower);               \
1117      intersectedTRange.upper =                                         \
1118          min(intersectedTRange.upper, rangeLimit.upper);               \
1119      return intersectedTRange;                                         \
1120    }                                                                   \
1121    return make_box1f(inf, neg_inf);                                    \
1122  }
1123
1124// template_intersectRayWedge(uniform);
1125template_intersectRayWedge(varying);
1126#undef template_intersectRayWedge
1127
1128#define template_intersectRayPyramid(univary)                           \
1129  static inline univary box1f intersectRayPyramid_##univary(            \
1130      const univary vec3f &origin,                                      \
1131      const univary vec3f &direction,                                   \
1132      const univary box1f &rangeLimit,                                  \
1133      const VKLUnstructuredVolume *uniform volume,                      \
1134      const uniform uint64 cellID)                                      \
1135  {                                                                     \
1136    /* // Get cell offset in index buffer */                            \
1137    const uniform uint64 cOffset = getCellOffset(volume, cellID);       \
1138                                                                        \
1139    const uniform uint64 planes[6][3] = {                               \
1140        {cOffset + 3, cOffset + 0, cOffset + 1},                        \
1141        {cOffset + 4, cOffset + 1, cOffset + 0},                        \
1142        {cOffset + 4, cOffset + 2, cOffset + 1},                        \
1143        {cOffset + 4, cOffset + 3, cOffset + 2},                        \
1144        {cOffset + 3, cOffset + 4, cOffset + 0},                        \
1145        {cOffset + 3, cOffset + 1, cOffset + 2}};                       \
1146    univary box1f intersectedTRange = make_box1f(neg_inf, inf);         \
1147    univary bool foundExit          = false;                            \
1148    univary bool foundEntrance      = false;                            \
1149    for (uniform int i = 0; i < 6; ++i) {                               \
1150      if (foundEntrance && foundExit)                                   \
1151        break;                                                          \
1152      const uniform vec3f p0 =                                          \
1153          get_vec3f(volume->vertex, getVertexId(volume, planes[i][0])); \
1154      const uniform vec3f p1 =                                          \
1155          get_vec3f(volume->vertex, getVertexId(volume, planes[i][1])); \
1156      const uniform vec3f p2 =                                          \
1157          get_vec3f(volume->vertex, getVertexId(volume, planes[i][2])); \
1158                                                                        \
1159      intersectRayTri_##univary(origin,                                 \
1160                                direction,                              \
1161                                p0,                                     \
1162                                p1,                                     \
1163                                p2,                                     \
1164                                foundEntrance,                          \
1165                                foundExit,                              \
1166                                intersectedTRange);                     \
1167    }                                                                   \
1168    if (foundEntrance && foundExit) {                                   \
1169      intersectedTRange.lower =                                         \
1170          max(intersectedTRange.lower, rangeLimit.lower);               \
1171      intersectedTRange.upper =                                         \
1172          min(intersectedTRange.upper, rangeLimit.upper);               \
1173      return intersectedTRange;                                         \
1174    }                                                                   \
1175    return make_box1f(inf, neg_inf);                                    \
1176  }
1177
1178// template_intersectRayPyramid(uniform);
1179template_intersectRayPyramid(varying);
1180#undef template_intersectRayPyramid
1181
1182#define template_intersectRayCell(univary)                         \
1183  inline univary box1f intersectRayCell_##univary(                 \
1184      const univary vec3f &origin,                                 \
1185      const univary vec3f &direction,                              \
1186      const univary box1f &rangeLimit,                             \
1187      const VKLUnstructuredVolume *uniform volume,                 \
1188      const uniform uint64 cellID)                                 \
1189  {                                                                \
1190    univary box1f intersectedTRange;                               \
1191    switch (get_uint8(volume->cellType, cellID)) {                 \
1192    case VKL_TETRAHEDRON:                                          \
1193      intersectedTRange = intersectRayTet_##univary(               \
1194          origin, direction, rangeLimit, volume, cellID);          \
1195      break;                                                       \
1196    case VKL_HEXAHEDRON:                                           \
1197      intersectedTRange = intersectRayHex_##univary(               \
1198          origin, direction, rangeLimit, volume, cellID);          \
1199      break;                                                       \
1200    case VKL_WEDGE:                                                \
1201      intersectedTRange = intersectRayWedge_##univary(             \
1202          origin, direction, rangeLimit, volume, cellID);          \
1203      break;                                                       \
1204    case VKL_PYRAMID:                                              \
1205      intersectedTRange = intersectRayPyramid_##univary(           \
1206          origin, direction, rangeLimit, volume, cellID);          \
1207      break;                                                       \
1208    }                                                              \
1209                                                                   \
1210    univary box1f result;                                          \
1211    result.lower = max(intersectedTRange.lower, rangeLimit.lower); \
1212    result.upper = min(intersectedTRange.upper, rangeLimit.upper); \
1213    return result;                                                 \
1214  }
1215
1216// template_intersectNode(uniform);
1217template_intersectRayCell(varying);
1218#undef template_intersectRayCell
1219
1220inline varying float VKLUnstructuredVolume_sample(
1221    const Sampler *uniform sampler,
1222    const varying vec3f &worldCoordinates,
1223    const uniform uint32 _attributeIndex,
1224    const varying float &_time)
1225{
1226  // Cast to the actual Volume subtype.
1227  const VKLUnstructuredVolume *uniform self =
1228      (const VKLUnstructuredVolume *uniform)sampler->volume;
1229
1230  float results = self->super.super.background[0];
1231
1232  traverseEmbree(self->super.bvhRoot,
1233                 self,
1234                 intersectAndSampleCell,
1235                 results,
1236                 worldCoordinates);
1237
1238  return results;
1239}
1240
1241inline varying vec3f VKLUnstructuredVolume_computeGradient(
1242    const Sampler *uniform sampler, const varying vec3f &objectCoordinates)
1243{
1244  // Cast to the actual Volume subtype.
1245  const VKLUnstructuredVolume *uniform self =
1246      (const VKLUnstructuredVolume *uniform)sampler->volume;
1247
1248  // gradient step in each dimension (object coordinates)
1249  vec3f gradientStep = self->gradientStep;
1250
1251  // fixed time
1252  float time = 0.f;
1253
1254  // compute via forward or backward differences depending on volume / cell
1255  // boundaries (as determined by NaN sample values outside any volume cell)
1256  vec3f gradient;
1257
1258  float sample = VKLUnstructuredVolume_sample(sampler, objectCoordinates, 0, time);
1259
1260  gradient.x = VKLUnstructuredVolume_sample(
1261                   sampler,
1262                   objectCoordinates + make_vec3f(gradientStep.x, 0.f, 0.f),
1263                   0,
1264                   time) -
1265               sample;
1266  gradient.y = VKLUnstructuredVolume_sample(
1267                   sampler,
1268                   objectCoordinates + make_vec3f(0.f, gradientStep.y, 0.f),
1269                   0,
1270                   time) -
1271               sample;
1272  gradient.z = VKLUnstructuredVolume_sample(
1273                   sampler,
1274                   objectCoordinates + make_vec3f(0.f, 0.f, gradientStep.z),
1275                   0,
1276                   time) -
1277               sample;
1278
1279  if (isnan(gradient.x)) {
1280    gradientStep.x *= -1.f;
1281
1282    gradient.x = VKLUnstructuredVolume_sample(
1283                     sampler,
1284                     objectCoordinates + make_vec3f(gradientStep.x, 0.f, 0.f),
1285                     0,
1286                     time) -
1287                 sample;
1288  }
1289
1290  if (isnan(gradient.y)) {
1291    gradientStep.y *= -1.f;
1292
1293    gradient.y = VKLUnstructuredVolume_sample(
1294                     sampler,
1295                     objectCoordinates + make_vec3f(0.f, gradientStep.y, 0.f),
1296                     0,
1297                     time) -
1298                 sample;
1299  }
1300
1301  if (isnan(gradient.z)) {
1302    gradientStep.z *= -1.f;
1303
1304    gradient.z = VKLUnstructuredVolume_sample(
1305                     sampler,
1306                     objectCoordinates + make_vec3f(0.f, 0.f, gradientStep.z),
1307                     0,
1308                     time) -
1309                 sample;
1310  }
1311
1312  return gradient / gradientStep;
1313}
1314
1315export void EXPORT_UNIQUE(VKLUnstructuredVolume_sample_export,
1316                          uniform const int *uniform imask,
1317                          void *uniform _sampler,
1318                          const void *uniform _objectCoordinates,
1319                          void *uniform _samples)
1320{
1321  const Sampler *uniform sampler = (const Sampler *uniform) _sampler;
1322  if (imask[programIndex]) {
1323    const varying vec3f *uniform objectCoordinates =
1324        (const varying vec3f *uniform)_objectCoordinates;
1325    varying float time             = 0.f;
1326    varying float *uniform samples = (varying float *uniform)_samples;
1327
1328    *samples = VKLUnstructuredVolume_sample(sampler, *objectCoordinates, 0, time);
1329  }
1330}
1331
1332export void EXPORT_UNIQUE(VKLUnstructuredVolume_gradient_export,
1333                          uniform const int *uniform imask,
1334                          void *uniform _sampler,
1335                          const void *uniform _objectCoordinates,
1336                          void *uniform _gradients)
1337{
1338  const Sampler *uniform sampler = (const Sampler *uniform) _sampler;
1339  if (imask[programIndex]) {
1340    const varying vec3f *uniform objectCoordinates =
1341        (const varying vec3f *uniform)_objectCoordinates;
1342    varying vec3f *uniform gradients = (varying vec3f * uniform) _gradients;
1343
1344    *gradients = VKLUnstructuredVolume_computeGradient(sampler, *objectCoordinates);
1345  }
1346}
1347
1348export void *uniform EXPORT_UNIQUE(VKLUnstructuredVolume_Constructor)
1349{
1350  uniform VKLUnstructuredVolume *uniform self =
1351      uniform new uniform VKLUnstructuredVolume;
1352  memset(self, 0, sizeof(uniform VKLUnstructuredVolume));
1353
1354  return self;
1355}
1356
1357export void EXPORT_UNIQUE(VKLUnstructuredVolume_Destructor, void *uniform _self)
1358{
1359  VKLUnstructuredVolume *uniform volume =
1360      (VKLUnstructuredVolume * uniform) _self;
1361  delete volume;
1362}
1363
1364export void EXPORT_UNIQUE(VKLUnstructuredVolume_set,
1365                          void *uniform _self,
1366                          const uniform box3f &_bbox,
1367                          const Data1D *uniform _vertex,
1368                          const Data1D *uniform _index,
1369                          const uniform bool _index32Bit,
1370                          const Data1D *uniform _vertexValue,
1371                          const Data1D *uniform _cellValue,
1372                          const Data1D *uniform _cell,
1373                          const uniform bool _cell32Bit,
1374                          const uniform uint32 _cellSkipIds,
1375                          const Data1D *uniform _cellType,
1376                          const void *uniform bvhRoot,
1377                          const vec3f *uniform _faceNormals,
1378                          const float *uniform _iterativeTolerance,
1379                          const uniform bool _hexIterative)
1380{
1381  uniform VKLUnstructuredVolume *uniform self =
1382      (uniform VKLUnstructuredVolume * uniform) _self;
1383
1384  self->vertex      = *_vertex;
1385  self->index       = *_index;
1386  self->index32Bit  = _index32Bit;
1387  self->vertexValue = *_vertexValue;
1388  self->cellValue   = *_cellValue;
1389  self->cell        = *_cell;
1390  self->cell32Bit   = _cell32Bit;
1391  self->cellSkipIds = _cellSkipIds;
1392  self->cellType    = *_cellType;
1393
1394  self->faceNormals        = _faceNormals;
1395  self->iterativeTolerance = _iterativeTolerance;
1396  self->hexIterative       = _hexIterative;
1397
1398  self->super.boundingBox = _bbox;
1399
1400  self->gradientStep =
1401      make_vec3f(0.01f * reduce_min(self->super.boundingBox.upper -
1402                                    self->super.boundingBox.lower));
1403
1404  self->super.bvhRoot          = (uniform Node * uniform) bvhRoot;
1405}
1406
1407export UnstructuredSamplerBase *uniform
1408EXPORT_UNIQUE(VKLUnstructuredSampler_Constructor, void *uniform _volume)
1409{
1410  UnstructuredSamplerBase *uniform sampler =
1411      uniform new UnstructuredSamplerBase;
1412  memset(sampler, 0, sizeof(uniform UnstructuredSamplerBase));
1413
1414  sampler->super.volume                = (const Volume *uniform)_volume;
1415  sampler->super.computeSample_varying = VKLUnstructuredVolume_sample;
1416  sampler->super.computeGradient_varying =
1417      VKLUnstructuredVolume_computeGradient;
1418
1419  return sampler;
1420}
1421
1422export void EXPORT_UNIQUE(VKLUnstructuredSampler_Destructor,
1423                          void *uniform _sampler)
1424{
1425  UnstructuredSamplerBase *uniform sampler =
1426      (UnstructuredSamplerBase * uniform) _sampler;
1427  delete sampler;
1428}
1429