1 // Copyright 2009-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 
4 #include "scene_instance.h"
5 #include "scene.h"
6 #include "motion_derivative.h"
7 namespace embree
8 {
9 #if defined(EMBREE_LOWEST_ISA)
10 
Instance(Device * device,Accel * object,unsigned int numTimeSteps)11   Instance::Instance (Device* device, Accel* object, unsigned int numTimeSteps)
12     : Geometry(device,Geometry::GTY_INSTANCE_CHEAP,1,numTimeSteps)
13     , object(object)
14     , local2world(nullptr)
15   {
16     if (object) object->refInc();
17     gsubtype = GTY_SUBTYPE_INSTANCE_LINEAR;
18     world2local0 = one;
19     local2world = (AffineSpace3ff*) alignedMalloc(numTimeSteps*sizeof(AffineSpace3ff),16);
20     for (size_t i = 0; i < numTimeSteps; i++)
21       local2world[i] = one;
22   }
23 
~Instance()24   Instance::~Instance()
25   {
26     alignedFree(local2world);
27     if (object) object->refDec();
28   }
29 
setNumTimeSteps(unsigned int numTimeSteps_in)30   void Instance::setNumTimeSteps (unsigned int numTimeSteps_in)
31   {
32     if (numTimeSteps_in == numTimeSteps)
33       return;
34 
35     AffineSpace3ff* local2world2 = (AffineSpace3ff*) alignedMalloc(numTimeSteps_in*sizeof(AffineSpace3ff),16);
36 
37     for (size_t i = 0; i < min(numTimeSteps, numTimeSteps_in); i++) {
38       local2world2[i] = local2world[i];
39     }
40 
41     for (size_t i = numTimeSteps; i < numTimeSteps_in; i++) {
42       local2world2[i] = one;
43     }
44 
45     alignedFree(local2world);
46     local2world = local2world2;
47 
48     Geometry::setNumTimeSteps(numTimeSteps_in);
49   }
50 
setInstancedScene(const Ref<Scene> & scene)51   void Instance::setInstancedScene(const Ref<Scene>& scene)
52   {
53     if (object) object->refDec();
54     object = scene.ptr;
55     if (object) object->refInc();
56     Geometry::update();
57   }
58 
59 #if 0
60   void Instance::preCommit()
61   {
62 #if 0 // disable expensive instance optimization for now
63     // decide whether we're an expensive instnace or not
64     auto numExpensiveGeo =  static_cast<Scene*> (object)->getNumPrimitives(CurveGeometry::geom_type, false)
65                           + static_cast<Scene*> (object)->getNumPrimitives(CurveGeometry::geom_type, true)
66                           + static_cast<Scene*> (object)->getNumPrimitives(UserGeometry::geom_type, false)
67                           + static_cast<Scene*> (object)->getNumPrimitives(UserGeometry::geom_type, true);
68     if (numExpensiveGeo > 0) {
69       this->gtype = GTY_INSTANCE_EXPENSIVE;
70     }
71 #endif
72 
73     Geometry::preCommit();
74   }
75 #endif
76 
addElementsToCount(GeometryCounts & counts) const77   void Instance::addElementsToCount (GeometryCounts & counts) const
78   {
79     if (Geometry::GTY_INSTANCE_CHEAP == this->gtype) {
80       if (1 == numTimeSteps) {
81         counts.numInstancesCheap += numPrimitives;
82       } else {
83         counts.numMBInstancesCheap += numPrimitives;
84       }
85     } else {
86       if (1 == numTimeSteps) {
87         counts.numInstancesExpensive += numPrimitives;
88       } else {
89         counts.numMBInstancesExpensive += numPrimitives;
90       }
91     }
92   }
93 
setTransform(const AffineSpace3fa & xfm,unsigned int timeStep)94   void Instance::setTransform(const AffineSpace3fa& xfm, unsigned int timeStep)
95   {
96     if (timeStep >= numTimeSteps)
97       throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid timestep");
98 
99     local2world[timeStep] = xfm;
100     gsubtype = GTY_SUBTYPE_INSTANCE_LINEAR;
101   }
102 
setQuaternionDecomposition(const AffineSpace3ff & qd,unsigned int timeStep)103   void Instance::setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep)
104   {
105     if (timeStep >= numTimeSteps)
106       throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid timestep");
107 
108     local2world[timeStep] = qd;
109     gsubtype = GTY_SUBTYPE_INSTANCE_QUATERNION;
110   }
111 
getTransform(float time)112   AffineSpace3fa Instance::getTransform(float time)
113   {
114     if (likely(numTimeSteps <= 1))
115       return getLocal2World();
116     else
117       return getLocal2World(time);
118   }
119 
setMask(unsigned mask)120   void Instance::setMask (unsigned mask)
121   {
122     this->mask = mask;
123     Geometry::update();
124   }
125 
commit()126   void Instance::commit()
127   {
128     if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION))
129       world2local0 = rcp(quaternionDecompositionToAffineSpace(local2world[0]));
130     else
131       world2local0 = rcp(local2world[0]);
132 
133     Geometry::commit();
134   }
135 
136   /*
137 
138      This function calculates the correction for the linear bounds
139      bbox0/bbox1 to properly bound the motion obtained when linearly
140      blending the transformation and applying the resulting
141      transformation to the linearly blended positions
142      lerp(xfm0,xfm1,t)*lerp(p0,p1,t). The extrema of the error to the
143      linearly blended bounds have to get calculates
144      f = lerp(xfm0,xfm1,t)*lerp(p0,p1,t) - lerp(bounds0,bounds1,t). For
145      the position where this error gets extreme we have to correct the
146      linear bounds. The derivative of this function f can get
147      calculates as
148 
149      f' = (lerp(A0,A1,t) lerp(p0,p1,t))` - (lerp(bounds0,bounds1,t))`
150         = lerp'(A0,A1,t) lerp(p0,p1,t) + lerp(A0,A1,t) lerp'(p0,p1,t) - (bounds1-bounds0)
151         = (A1-A0) lerp(p0,p1,t) + lerp(A0,A1,t) (p1-p0) - (bounds1-bounds0)
152         = (A1-A0) (p0 + t*(p1-p0)) + (A0 + t*(A1-A0)) (p1-p0) - (bounds1-bounds0)
153         = (A1-A0) * p0 + t*(A1-A0)*(p1-p0) + A0*(p1-p0) + t*(A1-A0)*(p1-p0) - (bounds1-bounds0)
154         = (A1-A0) * p0 + A0*(p1-p0) - (bounds1-bounds0) + t* ((A1-A0)*(p1-p0) + (A1-A0)*(p1-p0))
155 
156    The t value where this function has an extremal point is thus:
157 
158     => t = - ((A1-A0) * p0 + A0*(p1-p0) + (bounds1-bounds0)) / (2*(A1-A0)*(p1-p0))
159          = (2*A0*p0 - A1*p0 - A0*p1 + (bounds1-bounds0)) / (2*(A1-A0)*(p1-p0))
160 
161    */
162 
boundSegmentLinear(AffineSpace3fa const & xfm0,AffineSpace3fa const & xfm1,BBox3fa const & obbox0,BBox3fa const & obbox1,BBox3fa const & bbox0,BBox3fa const & bbox1,float tmin,float tmax)163   BBox3fa boundSegmentLinear(AffineSpace3fa const& xfm0,
164                              AffineSpace3fa const& xfm1,
165                              BBox3fa const& obbox0,
166                              BBox3fa const& obbox1,
167                              BBox3fa const& bbox0,
168                              BBox3fa const& bbox1,
169                              float tmin,
170                              float tmax)
171   {
172     BBox3fa delta(Vec3fa(0.f), Vec3fa(0.f));
173 
174     // loop over bounding box corners
175     for (int ii = 0; ii < 2; ++ii)
176     for (int jj = 0; jj < 2; ++jj)
177     for (int kk = 0; kk < 2; ++kk)
178     {
179       Vec3fa p0(ii == 0 ? obbox0.lower.x : obbox0.upper.x,
180                 jj == 0 ? obbox0.lower.y : obbox0.upper.y,
181                 kk == 0 ? obbox0.lower.z : obbox0.upper.z);
182       Vec3fa p1(ii == 0 ? obbox1.lower.x : obbox1.upper.x,
183                 jj == 0 ? obbox1.lower.y : obbox1.upper.y,
184                 kk == 0 ? obbox1.lower.z : obbox1.upper.z);
185 
186       // get extrema of motion of bounding box corner for each dimension
187       const Vec3fa denom = 2.0 * xfmVector(xfm0 - xfm1, p0 - p1);
188       const Vec3fa nom   = 2.0 * xfmPoint (xfm0, p0) - xfmPoint(xfm0, p1) - xfmPoint(xfm1, p0);
189       for (int dim = 0; dim < 3; ++dim)
190       {
191         if (!(std::abs(denom[dim]) > 0)) continue;
192 
193         const float tl = (nom[dim] + (bbox1.lower[dim] - bbox0.lower[dim])) / denom[dim];
194         if (tmin <= tl && tl <= tmax) {
195           const BBox3fa bt = lerp(bbox0, bbox1, tl);
196           const Vec3fa  pt = xfmPoint (lerp(xfm0, xfm1, tl), lerp(p0, p1, tl));
197           delta.lower[dim] = std::min(delta.lower[dim], pt[dim] - bt.lower[dim]);
198         }
199         const float tu = (nom[dim] + (bbox1.upper[dim] - bbox0.upper[dim])) / denom[dim];
200         if (tmin <= tu && tu <= tmax) {
201           const BBox3fa bt = lerp(bbox0, bbox1, tu);
202           const Vec3fa  pt = xfmPoint(lerp(xfm0, xfm1, tu), lerp(p0, p1, tu));
203           delta.upper[dim] = std::max(delta.upper[dim], pt[dim] - bt.upper[dim]);
204         }
205       }
206     }
207     return delta;
208   }
209 
210   /*
211      This function calculates the correction for the linear bounds
212      bbox0/bbox1 to properly bound the motion obtained by linearly
213      blending the quaternion transformations and applying the
214      resulting transformation to the linearly blended positions. The
215      extrema of the error to the linearly blended bounds has to get
216      calclated, the the linear bounds get corrected at the extremal
217      points. In difference to the previous function the extremal
218      points cannot get calculated analytically, thus we fall back to
219      some root solver.
220   */
221 
boundSegmentNonlinear(MotionDerivativeCoefficients const & motionDerivCoeffs,AffineSpace3fa const & xfm0,AffineSpace3fa const & xfm1,BBox3fa const & obbox0,BBox3fa const & obbox1,BBox3fa const & bbox0,BBox3fa const & bbox1,float tmin,float tmax)222   BBox3fa boundSegmentNonlinear(MotionDerivativeCoefficients const& motionDerivCoeffs,
223                                 AffineSpace3fa const& xfm0,
224                                 AffineSpace3fa const& xfm1,
225                                 BBox3fa const& obbox0,
226                                 BBox3fa const& obbox1,
227                                 BBox3fa const& bbox0,
228                                 BBox3fa const& bbox1,
229                                 float tmin,
230                                 float tmax)
231   {
232     BBox3fa delta(Vec3fa(0.f), Vec3fa(0.f));
233     float roots[32];
234     unsigned int maxNumRoots = 32;
235     unsigned int numRoots;
236     const Interval1f interval(tmin, tmax);
237 
238     // loop over bounding box corners
239     for (int ii = 0; ii < 2; ++ii)
240     for (int jj = 0; jj < 2; ++jj)
241     for (int kk = 0; kk < 2; ++kk)
242     {
243       Vec3fa p0(ii == 0 ? obbox0.lower.x : obbox0.upper.x,
244                 jj == 0 ? obbox0.lower.y : obbox0.upper.y,
245                 kk == 0 ? obbox0.lower.z : obbox0.upper.z);
246       Vec3fa p1(ii == 0 ? obbox1.lower.x : obbox1.upper.x,
247                 jj == 0 ? obbox1.lower.y : obbox1.upper.y,
248                 kk == 0 ? obbox1.lower.z : obbox1.upper.z);
249 
250       // get extrema of motion of bounding box corner for each dimension
251       for (int dim = 0; dim < 3; ++dim)
252       {
253         MotionDerivative motionDerivative(motionDerivCoeffs, dim, p0, p1);
254 
255         numRoots = motionDerivative.findRoots(interval, bbox0.lower[dim] - bbox1.lower[dim], roots, maxNumRoots);
256         for (unsigned int r = 0; r < numRoots; ++r) {
257           float t = roots[r];
258           const BBox3fa bt = lerp(bbox0, bbox1, t);
259           const Vec3fa  pt = xfmPoint(slerp(xfm0, xfm1, t), lerp(p0, p1, t));
260           delta.lower[dim] = std::min(delta.lower[dim], pt[dim] - bt.lower[dim]);
261         }
262 
263         numRoots = motionDerivative.findRoots(interval, bbox0.upper[dim] - bbox1.upper[dim], roots, maxNumRoots);
264         for (unsigned int r = 0; r < numRoots; ++r) {
265           float t = roots[r];
266           const BBox3fa bt = lerp(bbox0, bbox1, t);
267           const Vec3fa  pt = xfmPoint(slerp(xfm0, xfm1, t), lerp(p0, p1, t));
268           delta.upper[dim] = std::max(delta.upper[dim], pt[dim] - bt.upper[dim]);
269         }
270       }
271     }
272 
273     return delta;
274   }
275 
boundSegment(size_t itime,BBox3fa const & obbox0,BBox3fa const & obbox1,BBox3fa const & bbox0,BBox3fa const & bbox1,float tmin,float tmax) const276   BBox3fa Instance::boundSegment(size_t itime,
277       BBox3fa const& obbox0, BBox3fa const& obbox1,
278       BBox3fa const& bbox0, BBox3fa const& bbox1,
279       float tmin, float tmax) const
280   {
281     if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION)) {
282       auto const& xfm0 = local2world[itime];
283       auto const& xfm1 = local2world[itime+1];
284       MotionDerivativeCoefficients motionDerivCoeffs(local2world[itime+0], local2world[itime+1]);
285       return boundSegmentNonlinear(motionDerivCoeffs, xfm0, xfm1, obbox0, obbox1, bbox0, bbox1, tmin, tmax);
286     } else {
287       auto const& xfm0 = local2world[itime];
288       auto const& xfm1 = local2world[itime+1];
289       return boundSegmentLinear(xfm0, xfm1, obbox0, obbox1, bbox0, bbox1, tmin, tmax);
290     }
291   }
292 
nonlinearBounds(const BBox1f & time_range_in,const BBox1f & geom_time_range,float geom_time_segments) const293   LBBox3fa Instance::nonlinearBounds(const BBox1f& time_range_in,
294                                      const BBox1f& geom_time_range,
295                                      float geom_time_segments) const
296   {
297     LBBox3fa lbbox = empty;
298     /* normalize global time_range_in to local geom_time_range */
299     const BBox1f time_range((time_range_in.lower-geom_time_range.lower)/geom_time_range.size(),
300                             (time_range_in.upper-geom_time_range.lower)/geom_time_range.size());
301 
302     const float lower = time_range.lower*geom_time_segments;
303     const float upper = time_range.upper*geom_time_segments;
304     const float ilowerf = floor(lower);
305     const float iupperf = ceil(upper);
306     const float ilowerfc = max(0.0f,ilowerf);
307     const float iupperfc = min(iupperf,geom_time_segments);
308     const int   ilowerc = (int)ilowerfc;
309     const int   iupperc = (int)iupperfc;
310     assert(iupperc-ilowerc > 0);
311 
312     /* this larger iteration range guarantees that we process borders of geom_time_range is (partially) inside time_range_in */
313     const int ilower_iter = max(-1,(int)ilowerf);
314     const int iupper_iter = min((int)iupperf,(int)geom_time_segments+1);
315 
316     if (iupper_iter-ilower_iter == 1)
317     {
318       const float f0 = (ilowerc / geom_time_segments - time_range.lower) / time_range.size();
319       const float f1 = (iupperc / geom_time_segments - time_range.lower) / time_range.size();
320 
321       lbbox.bounds0 = bounds(ilowerc, iupperc, max(0.0f,lower-ilowerfc));
322       lbbox.bounds1 = bounds(iupperc, ilowerc, max(0.0f,iupperfc-upper));
323 
324       const BBox3fa d = boundSegment(ilowerc, getObjectBounds(ilowerc), getObjectBounds(iupperc),
325         lerp(lbbox.bounds0, lbbox.bounds1, f0), lerp(lbbox.bounds0, lbbox.bounds1, f1),
326         max(0.f, lower-ilowerfc), 1.f - max(0.f, iupperfc-upper));
327 
328       lbbox.bounds0.lower += d.lower; lbbox.bounds1.lower += d.lower;
329       lbbox.bounds0.upper += d.upper; lbbox.bounds1.upper += d.upper;
330     }
331     else
332     {
333       BBox3fa b0 = bounds(ilowerc, ilowerc+1, lower-ilowerfc);
334       BBox3fa b1 = bounds(iupperc, iupperc-1, iupperfc-upper);
335 
336       for (int i = ilower_iter+1; i < iupper_iter; i++)
337       {
338         const float f = (float(i) / geom_time_segments - time_range.lower) / time_range.size();
339         const BBox3fa bt = lerp(b0, b1, f);
340         const BBox3fa bi = bounds(0, i);
341         const Vec3fa dlower = min(bi.lower-bt.lower, Vec3fa(zero));
342         const Vec3fa dupper = max(bi.upper-bt.upper, Vec3fa(zero));
343         b0.lower += dlower; b1.lower += dlower;
344         b0.upper += dupper; b1.upper += dupper;
345       }
346 
347       BBox3fa delta(Vec3fa(0.f), Vec3fa(0.f));
348       for (int i = max(1, ilower_iter+1); i <= min((int)fnumTimeSegments, iupper_iter); i++)
349       {
350         // compute local times for local itimes
351         const float f0 = ((i-1) / geom_time_segments - time_range.lower) / time_range.size();
352         const float f1 = ((i  ) / geom_time_segments - time_range.lower) / time_range.size();
353         const float tmin = (i == max(1, ilower_iter+1))                           ?       max(0.f, lower-ilowerfc) : 0.f;
354         const float tmax = (i == max(1, min((int)fnumTimeSegments, iupper_iter))) ? 1.f - max(0.f, iupperfc-upper) : 1.f;
355         const BBox3fa d = boundSegment(i-1, getObjectBounds(i-1), getObjectBounds(i),
356           lerp(b0, b1, f0), lerp(b0, b1, f1), tmin, tmax);
357         delta.lower = min(delta.lower, d.lower);
358         delta.upper = max(delta.upper, d.upper);
359       }
360       b0.lower += delta.lower; b1.lower += delta.lower;
361       b0.upper += delta.upper; b1.upper += delta.upper;
362 
363       lbbox.bounds0 = b0;
364       lbbox.bounds1 = b1;
365     }
366     return lbbox;
367   }
368 #endif
369 
370   namespace isa
371   {
createInstance(Device * device)372     Instance* createInstance(Device* device) {
373       return new InstanceISA(device);
374     }
375   }
376 }
377