1 // Copyright 2009-2021 Intel Corporation 2 // SPDX-License-Identifier: Apache-2.0 3 4 #include "scene_instance.h" 5 #include "scene.h" 6 #include "motion_derivative.h" 7 namespace embree 8 { 9 #if defined(EMBREE_LOWEST_ISA) 10 Instance(Device * device,Accel * object,unsigned int numTimeSteps)11 Instance::Instance (Device* device, Accel* object, unsigned int numTimeSteps) 12 : Geometry(device,Geometry::GTY_INSTANCE_CHEAP,1,numTimeSteps) 13 , object(object) 14 , local2world(nullptr) 15 { 16 if (object) object->refInc(); 17 gsubtype = GTY_SUBTYPE_INSTANCE_LINEAR; 18 world2local0 = one; 19 local2world = (AffineSpace3ff*) alignedMalloc(numTimeSteps*sizeof(AffineSpace3ff),16); 20 for (size_t i = 0; i < numTimeSteps; i++) 21 local2world[i] = one; 22 } 23 ~Instance()24 Instance::~Instance() 25 { 26 alignedFree(local2world); 27 if (object) object->refDec(); 28 } 29 setNumTimeSteps(unsigned int numTimeSteps_in)30 void Instance::setNumTimeSteps (unsigned int numTimeSteps_in) 31 { 32 if (numTimeSteps_in == numTimeSteps) 33 return; 34 35 AffineSpace3ff* local2world2 = (AffineSpace3ff*) alignedMalloc(numTimeSteps_in*sizeof(AffineSpace3ff),16); 36 37 for (size_t i = 0; i < min(numTimeSteps, numTimeSteps_in); i++) { 38 local2world2[i] = local2world[i]; 39 } 40 41 for (size_t i = numTimeSteps; i < numTimeSteps_in; i++) { 42 local2world2[i] = one; 43 } 44 45 alignedFree(local2world); 46 local2world = local2world2; 47 48 Geometry::setNumTimeSteps(numTimeSteps_in); 49 } 50 setInstancedScene(const Ref<Scene> & scene)51 void Instance::setInstancedScene(const Ref<Scene>& scene) 52 { 53 if (object) object->refDec(); 54 object = scene.ptr; 55 if (object) object->refInc(); 56 Geometry::update(); 57 } 58 59 #if 0 60 void Instance::preCommit() 61 { 62 #if 0 // disable expensive instance optimization for now 63 // decide whether we're an expensive instnace or not 64 auto numExpensiveGeo = static_cast<Scene*> (object)->getNumPrimitives(CurveGeometry::geom_type, false) 65 + static_cast<Scene*> (object)->getNumPrimitives(CurveGeometry::geom_type, true) 66 + static_cast<Scene*> (object)->getNumPrimitives(UserGeometry::geom_type, false) 67 + static_cast<Scene*> (object)->getNumPrimitives(UserGeometry::geom_type, true); 68 if (numExpensiveGeo > 0) { 69 this->gtype = GTY_INSTANCE_EXPENSIVE; 70 } 71 #endif 72 73 Geometry::preCommit(); 74 } 75 #endif 76 addElementsToCount(GeometryCounts & counts) const77 void Instance::addElementsToCount (GeometryCounts & counts) const 78 { 79 if (Geometry::GTY_INSTANCE_CHEAP == this->gtype) { 80 if (1 == numTimeSteps) { 81 counts.numInstancesCheap += numPrimitives; 82 } else { 83 counts.numMBInstancesCheap += numPrimitives; 84 } 85 } else { 86 if (1 == numTimeSteps) { 87 counts.numInstancesExpensive += numPrimitives; 88 } else { 89 counts.numMBInstancesExpensive += numPrimitives; 90 } 91 } 92 } 93 setTransform(const AffineSpace3fa & xfm,unsigned int timeStep)94 void Instance::setTransform(const AffineSpace3fa& xfm, unsigned int timeStep) 95 { 96 if (timeStep >= numTimeSteps) 97 throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid timestep"); 98 99 local2world[timeStep] = xfm; 100 gsubtype = GTY_SUBTYPE_INSTANCE_LINEAR; 101 } 102 setQuaternionDecomposition(const AffineSpace3ff & qd,unsigned int timeStep)103 void Instance::setQuaternionDecomposition(const AffineSpace3ff& qd, unsigned int timeStep) 104 { 105 if (timeStep >= numTimeSteps) 106 throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid timestep"); 107 108 local2world[timeStep] = qd; 109 gsubtype = GTY_SUBTYPE_INSTANCE_QUATERNION; 110 } 111 getTransform(float time)112 AffineSpace3fa Instance::getTransform(float time) 113 { 114 if (likely(numTimeSteps <= 1)) 115 return getLocal2World(); 116 else 117 return getLocal2World(time); 118 } 119 setMask(unsigned mask)120 void Instance::setMask (unsigned mask) 121 { 122 this->mask = mask; 123 Geometry::update(); 124 } 125 commit()126 void Instance::commit() 127 { 128 if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION)) 129 world2local0 = rcp(quaternionDecompositionToAffineSpace(local2world[0])); 130 else 131 world2local0 = rcp(local2world[0]); 132 133 Geometry::commit(); 134 } 135 136 /* 137 138 This function calculates the correction for the linear bounds 139 bbox0/bbox1 to properly bound the motion obtained when linearly 140 blending the transformation and applying the resulting 141 transformation to the linearly blended positions 142 lerp(xfm0,xfm1,t)*lerp(p0,p1,t). The extrema of the error to the 143 linearly blended bounds have to get calculates 144 f = lerp(xfm0,xfm1,t)*lerp(p0,p1,t) - lerp(bounds0,bounds1,t). For 145 the position where this error gets extreme we have to correct the 146 linear bounds. The derivative of this function f can get 147 calculates as 148 149 f' = (lerp(A0,A1,t) lerp(p0,p1,t))` - (lerp(bounds0,bounds1,t))` 150 = lerp'(A0,A1,t) lerp(p0,p1,t) + lerp(A0,A1,t) lerp'(p0,p1,t) - (bounds1-bounds0) 151 = (A1-A0) lerp(p0,p1,t) + lerp(A0,A1,t) (p1-p0) - (bounds1-bounds0) 152 = (A1-A0) (p0 + t*(p1-p0)) + (A0 + t*(A1-A0)) (p1-p0) - (bounds1-bounds0) 153 = (A1-A0) * p0 + t*(A1-A0)*(p1-p0) + A0*(p1-p0) + t*(A1-A0)*(p1-p0) - (bounds1-bounds0) 154 = (A1-A0) * p0 + A0*(p1-p0) - (bounds1-bounds0) + t* ((A1-A0)*(p1-p0) + (A1-A0)*(p1-p0)) 155 156 The t value where this function has an extremal point is thus: 157 158 => t = - ((A1-A0) * p0 + A0*(p1-p0) + (bounds1-bounds0)) / (2*(A1-A0)*(p1-p0)) 159 = (2*A0*p0 - A1*p0 - A0*p1 + (bounds1-bounds0)) / (2*(A1-A0)*(p1-p0)) 160 161 */ 162 boundSegmentLinear(AffineSpace3fa const & xfm0,AffineSpace3fa const & xfm1,BBox3fa const & obbox0,BBox3fa const & obbox1,BBox3fa const & bbox0,BBox3fa const & bbox1,float tmin,float tmax)163 BBox3fa boundSegmentLinear(AffineSpace3fa const& xfm0, 164 AffineSpace3fa const& xfm1, 165 BBox3fa const& obbox0, 166 BBox3fa const& obbox1, 167 BBox3fa const& bbox0, 168 BBox3fa const& bbox1, 169 float tmin, 170 float tmax) 171 { 172 BBox3fa delta(Vec3fa(0.f), Vec3fa(0.f)); 173 174 // loop over bounding box corners 175 for (int ii = 0; ii < 2; ++ii) 176 for (int jj = 0; jj < 2; ++jj) 177 for (int kk = 0; kk < 2; ++kk) 178 { 179 Vec3fa p0(ii == 0 ? obbox0.lower.x : obbox0.upper.x, 180 jj == 0 ? obbox0.lower.y : obbox0.upper.y, 181 kk == 0 ? obbox0.lower.z : obbox0.upper.z); 182 Vec3fa p1(ii == 0 ? obbox1.lower.x : obbox1.upper.x, 183 jj == 0 ? obbox1.lower.y : obbox1.upper.y, 184 kk == 0 ? obbox1.lower.z : obbox1.upper.z); 185 186 // get extrema of motion of bounding box corner for each dimension 187 const Vec3fa denom = 2.0 * xfmVector(xfm0 - xfm1, p0 - p1); 188 const Vec3fa nom = 2.0 * xfmPoint (xfm0, p0) - xfmPoint(xfm0, p1) - xfmPoint(xfm1, p0); 189 for (int dim = 0; dim < 3; ++dim) 190 { 191 if (!(std::abs(denom[dim]) > 0)) continue; 192 193 const float tl = (nom[dim] + (bbox1.lower[dim] - bbox0.lower[dim])) / denom[dim]; 194 if (tmin <= tl && tl <= tmax) { 195 const BBox3fa bt = lerp(bbox0, bbox1, tl); 196 const Vec3fa pt = xfmPoint (lerp(xfm0, xfm1, tl), lerp(p0, p1, tl)); 197 delta.lower[dim] = std::min(delta.lower[dim], pt[dim] - bt.lower[dim]); 198 } 199 const float tu = (nom[dim] + (bbox1.upper[dim] - bbox0.upper[dim])) / denom[dim]; 200 if (tmin <= tu && tu <= tmax) { 201 const BBox3fa bt = lerp(bbox0, bbox1, tu); 202 const Vec3fa pt = xfmPoint(lerp(xfm0, xfm1, tu), lerp(p0, p1, tu)); 203 delta.upper[dim] = std::max(delta.upper[dim], pt[dim] - bt.upper[dim]); 204 } 205 } 206 } 207 return delta; 208 } 209 210 /* 211 This function calculates the correction for the linear bounds 212 bbox0/bbox1 to properly bound the motion obtained by linearly 213 blending the quaternion transformations and applying the 214 resulting transformation to the linearly blended positions. The 215 extrema of the error to the linearly blended bounds has to get 216 calclated, the the linear bounds get corrected at the extremal 217 points. In difference to the previous function the extremal 218 points cannot get calculated analytically, thus we fall back to 219 some root solver. 220 */ 221 boundSegmentNonlinear(MotionDerivativeCoefficients const & motionDerivCoeffs,AffineSpace3fa const & xfm0,AffineSpace3fa const & xfm1,BBox3fa const & obbox0,BBox3fa const & obbox1,BBox3fa const & bbox0,BBox3fa const & bbox1,float tmin,float tmax)222 BBox3fa boundSegmentNonlinear(MotionDerivativeCoefficients const& motionDerivCoeffs, 223 AffineSpace3fa const& xfm0, 224 AffineSpace3fa const& xfm1, 225 BBox3fa const& obbox0, 226 BBox3fa const& obbox1, 227 BBox3fa const& bbox0, 228 BBox3fa const& bbox1, 229 float tmin, 230 float tmax) 231 { 232 BBox3fa delta(Vec3fa(0.f), Vec3fa(0.f)); 233 float roots[32]; 234 unsigned int maxNumRoots = 32; 235 unsigned int numRoots; 236 const Interval1f interval(tmin, tmax); 237 238 // loop over bounding box corners 239 for (int ii = 0; ii < 2; ++ii) 240 for (int jj = 0; jj < 2; ++jj) 241 for (int kk = 0; kk < 2; ++kk) 242 { 243 Vec3fa p0(ii == 0 ? obbox0.lower.x : obbox0.upper.x, 244 jj == 0 ? obbox0.lower.y : obbox0.upper.y, 245 kk == 0 ? obbox0.lower.z : obbox0.upper.z); 246 Vec3fa p1(ii == 0 ? obbox1.lower.x : obbox1.upper.x, 247 jj == 0 ? obbox1.lower.y : obbox1.upper.y, 248 kk == 0 ? obbox1.lower.z : obbox1.upper.z); 249 250 // get extrema of motion of bounding box corner for each dimension 251 for (int dim = 0; dim < 3; ++dim) 252 { 253 MotionDerivative motionDerivative(motionDerivCoeffs, dim, p0, p1); 254 255 numRoots = motionDerivative.findRoots(interval, bbox0.lower[dim] - bbox1.lower[dim], roots, maxNumRoots); 256 for (unsigned int r = 0; r < numRoots; ++r) { 257 float t = roots[r]; 258 const BBox3fa bt = lerp(bbox0, bbox1, t); 259 const Vec3fa pt = xfmPoint(slerp(xfm0, xfm1, t), lerp(p0, p1, t)); 260 delta.lower[dim] = std::min(delta.lower[dim], pt[dim] - bt.lower[dim]); 261 } 262 263 numRoots = motionDerivative.findRoots(interval, bbox0.upper[dim] - bbox1.upper[dim], roots, maxNumRoots); 264 for (unsigned int r = 0; r < numRoots; ++r) { 265 float t = roots[r]; 266 const BBox3fa bt = lerp(bbox0, bbox1, t); 267 const Vec3fa pt = xfmPoint(slerp(xfm0, xfm1, t), lerp(p0, p1, t)); 268 delta.upper[dim] = std::max(delta.upper[dim], pt[dim] - bt.upper[dim]); 269 } 270 } 271 } 272 273 return delta; 274 } 275 boundSegment(size_t itime,BBox3fa const & obbox0,BBox3fa const & obbox1,BBox3fa const & bbox0,BBox3fa const & bbox1,float tmin,float tmax) const276 BBox3fa Instance::boundSegment(size_t itime, 277 BBox3fa const& obbox0, BBox3fa const& obbox1, 278 BBox3fa const& bbox0, BBox3fa const& bbox1, 279 float tmin, float tmax) const 280 { 281 if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION)) { 282 auto const& xfm0 = local2world[itime]; 283 auto const& xfm1 = local2world[itime+1]; 284 MotionDerivativeCoefficients motionDerivCoeffs(local2world[itime+0], local2world[itime+1]); 285 return boundSegmentNonlinear(motionDerivCoeffs, xfm0, xfm1, obbox0, obbox1, bbox0, bbox1, tmin, tmax); 286 } else { 287 auto const& xfm0 = local2world[itime]; 288 auto const& xfm1 = local2world[itime+1]; 289 return boundSegmentLinear(xfm0, xfm1, obbox0, obbox1, bbox0, bbox1, tmin, tmax); 290 } 291 } 292 nonlinearBounds(const BBox1f & time_range_in,const BBox1f & geom_time_range,float geom_time_segments) const293 LBBox3fa Instance::nonlinearBounds(const BBox1f& time_range_in, 294 const BBox1f& geom_time_range, 295 float geom_time_segments) const 296 { 297 LBBox3fa lbbox = empty; 298 /* normalize global time_range_in to local geom_time_range */ 299 const BBox1f time_range((time_range_in.lower-geom_time_range.lower)/geom_time_range.size(), 300 (time_range_in.upper-geom_time_range.lower)/geom_time_range.size()); 301 302 const float lower = time_range.lower*geom_time_segments; 303 const float upper = time_range.upper*geom_time_segments; 304 const float ilowerf = floor(lower); 305 const float iupperf = ceil(upper); 306 const float ilowerfc = max(0.0f,ilowerf); 307 const float iupperfc = min(iupperf,geom_time_segments); 308 const int ilowerc = (int)ilowerfc; 309 const int iupperc = (int)iupperfc; 310 assert(iupperc-ilowerc > 0); 311 312 /* this larger iteration range guarantees that we process borders of geom_time_range is (partially) inside time_range_in */ 313 const int ilower_iter = max(-1,(int)ilowerf); 314 const int iupper_iter = min((int)iupperf,(int)geom_time_segments+1); 315 316 if (iupper_iter-ilower_iter == 1) 317 { 318 const float f0 = (ilowerc / geom_time_segments - time_range.lower) / time_range.size(); 319 const float f1 = (iupperc / geom_time_segments - time_range.lower) / time_range.size(); 320 321 lbbox.bounds0 = bounds(ilowerc, iupperc, max(0.0f,lower-ilowerfc)); 322 lbbox.bounds1 = bounds(iupperc, ilowerc, max(0.0f,iupperfc-upper)); 323 324 const BBox3fa d = boundSegment(ilowerc, getObjectBounds(ilowerc), getObjectBounds(iupperc), 325 lerp(lbbox.bounds0, lbbox.bounds1, f0), lerp(lbbox.bounds0, lbbox.bounds1, f1), 326 max(0.f, lower-ilowerfc), 1.f - max(0.f, iupperfc-upper)); 327 328 lbbox.bounds0.lower += d.lower; lbbox.bounds1.lower += d.lower; 329 lbbox.bounds0.upper += d.upper; lbbox.bounds1.upper += d.upper; 330 } 331 else 332 { 333 BBox3fa b0 = bounds(ilowerc, ilowerc+1, lower-ilowerfc); 334 BBox3fa b1 = bounds(iupperc, iupperc-1, iupperfc-upper); 335 336 for (int i = ilower_iter+1; i < iupper_iter; i++) 337 { 338 const float f = (float(i) / geom_time_segments - time_range.lower) / time_range.size(); 339 const BBox3fa bt = lerp(b0, b1, f); 340 const BBox3fa bi = bounds(0, i); 341 const Vec3fa dlower = min(bi.lower-bt.lower, Vec3fa(zero)); 342 const Vec3fa dupper = max(bi.upper-bt.upper, Vec3fa(zero)); 343 b0.lower += dlower; b1.lower += dlower; 344 b0.upper += dupper; b1.upper += dupper; 345 } 346 347 BBox3fa delta(Vec3fa(0.f), Vec3fa(0.f)); 348 for (int i = max(1, ilower_iter+1); i <= min((int)fnumTimeSegments, iupper_iter); i++) 349 { 350 // compute local times for local itimes 351 const float f0 = ((i-1) / geom_time_segments - time_range.lower) / time_range.size(); 352 const float f1 = ((i ) / geom_time_segments - time_range.lower) / time_range.size(); 353 const float tmin = (i == max(1, ilower_iter+1)) ? max(0.f, lower-ilowerfc) : 0.f; 354 const float tmax = (i == max(1, min((int)fnumTimeSegments, iupper_iter))) ? 1.f - max(0.f, iupperfc-upper) : 1.f; 355 const BBox3fa d = boundSegment(i-1, getObjectBounds(i-1), getObjectBounds(i), 356 lerp(b0, b1, f0), lerp(b0, b1, f1), tmin, tmax); 357 delta.lower = min(delta.lower, d.lower); 358 delta.upper = max(delta.upper, d.upper); 359 } 360 b0.lower += delta.lower; b1.lower += delta.lower; 361 b0.upper += delta.upper; b1.upper += delta.upper; 362 363 lbbox.bounds0 = b0; 364 lbbox.bounds1 = b1; 365 } 366 return lbbox; 367 } 368 #endif 369 370 namespace isa 371 { createInstance(Device * device)372 Instance* createInstance(Device* device) { 373 return new InstanceISA(device); 374 } 375 } 376 } 377