1 // Copyright 2019-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3
4 #include "CPUDevice.h"
5 #include "../common/Data.h"
6 #include "../common/export_util.h"
7 #include "../iterator/Iterator.h"
8 #include "../observer/Observer.h"
9 #include "../sampler/Sampler.h"
10 #include "../volume/Volume.h"
11 #include "CPUDevice_ispc.h"
12
13 namespace openvkl {
14 namespace cpu_device {
15
16 template <int W>
supportsWidth(int width)17 bool CPUDevice<W>::supportsWidth(int width)
18 {
19 return width == W || width == 4 || width == 8 || width == 16;
20 }
21
22 template <int W>
getNativeSIMDWidth()23 int CPUDevice<W>::getNativeSIMDWidth()
24 {
25 return CALL_ISPC(ISPC_getProgramCount);
26 }
27
28 template <int W>
commit()29 void CPUDevice<W>::commit()
30 {
31 Device::commit();
32 }
33
34 template <int W>
commit(VKLObject object)35 void CPUDevice<W>::commit(VKLObject object)
36 {
37 ManagedObject *managedObject = (ManagedObject *)object;
38 managedObject->commit();
39 }
40
41 template <int W>
release(VKLObject object)42 void CPUDevice<W>::release(VKLObject object)
43 {
44 ManagedObject *managedObject = (ManagedObject *)object;
45 managedObject->refDec();
46 }
47
48 ///////////////////////////////////////////////////////////////////////////
49 // Data ///////////////////////////////////////////////////////////////////
50 ///////////////////////////////////////////////////////////////////////////
51
52 template <int W>
newData(size_t numItems,VKLDataType dataType,const void * source,VKLDataCreationFlags dataCreationFlags,size_t byteStride)53 VKLData CPUDevice<W>::newData(size_t numItems,
54 VKLDataType dataType,
55 const void *source,
56 VKLDataCreationFlags dataCreationFlags,
57 size_t byteStride)
58 {
59 Data *data =
60 new Data(numItems, dataType, source, dataCreationFlags, byteStride);
61 return (VKLData)data;
62 }
63
64 ///////////////////////////////////////////////////////////////////////////
65 // Observer ///////////////////////////////////////////////////////////////
66 ///////////////////////////////////////////////////////////////////////////
67
68 template <int W>
newObserver(VKLVolume volume,const char * type)69 VKLObserver CPUDevice<W>::newObserver(VKLVolume volume, const char *type)
70 {
71 auto &object = referenceFromHandle<Volume<W>>(volume);
72 Observer<W> *observer = object.newObserver(type);
73 return (VKLObserver)observer;
74 }
75
76 template <int W>
newObserver(VKLSampler sampler,const char * type)77 VKLObserver CPUDevice<W>::newObserver(VKLSampler sampler, const char *type)
78 {
79 auto &object = referenceFromHandle<Sampler<W>>(sampler);
80 Observer<W> *observer = object.newObserver(type);
81 return (VKLObserver)observer;
82 }
83
84 template <int W>
mapObserver(VKLObserver observer)85 const void *CPUDevice<W>::mapObserver(VKLObserver observer)
86 {
87 auto &observerObject = referenceFromHandle<Observer<W>>(observer);
88 return observerObject.map();
89 }
90
91 template <int W>
unmapObserver(VKLObserver observer)92 void CPUDevice<W>::unmapObserver(VKLObserver observer)
93 {
94 auto &observerObject = referenceFromHandle<Observer<W>>(observer);
95 observerObject.unmap();
96 }
97
98 template <int W>
getObserverElementType(VKLObserver observer) const99 VKLDataType CPUDevice<W>::getObserverElementType(VKLObserver observer) const
100 {
101 auto &observerObject = referenceFromHandle<Observer<W>>(observer);
102 return observerObject.getElementType();
103 }
104
105 template <int W>
getObserverElementSize(VKLObserver observer) const106 size_t CPUDevice<W>::getObserverElementSize(VKLObserver observer) const
107 {
108 auto &observerObject = referenceFromHandle<Observer<W>>(observer);
109 return observerObject.getElementSize();
110 }
111
112 template <int W>
getObserverNumElements(VKLObserver observer) const113 size_t CPUDevice<W>::getObserverNumElements(VKLObserver observer) const
114 {
115 auto &observerObject = referenceFromHandle<Observer<W>>(observer);
116 return observerObject.getNumElements();
117 }
118
119 ///////////////////////////////////////////////////////////////////////////
120 // Interval iterator //////////////////////////////////////////////////////
121 ///////////////////////////////////////////////////////////////////////////
122
123 template <int W>
newIntervalIteratorContext(VKLSampler sampler)124 VKLIntervalIteratorContext CPUDevice<W>::newIntervalIteratorContext(
125 VKLSampler sampler)
126 {
127 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
128 return (VKLIntervalIteratorContext)samplerObject
129 .getIntervalIteratorFactory()
130 .newContext(samplerObject);
131 }
132
133 ///////////////////////////////////////////////////////////////////////////
134 // Hit iterator ///////////////////////////////////////////////////////////
135 ///////////////////////////////////////////////////////////////////////////
136
137 template <int W>
newHitIteratorContext(VKLSampler sampler)138 VKLHitIteratorContext CPUDevice<W>::newHitIteratorContext(
139 VKLSampler sampler)
140 {
141 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
142 return (VKLHitIteratorContext)samplerObject.getHitIteratorFactory()
143 .newContext(samplerObject);
144 }
145
146 ///////////////////////////////////////////////////////////////////////////
147 // Parameters /////////////////////////////////////////////////////////////
148 ///////////////////////////////////////////////////////////////////////////
149
150 template <int W>
setBool(VKLObject object,const char * name,const bool b)151 void CPUDevice<W>::setBool(VKLObject object, const char *name, const bool b)
152 {
153 ManagedObject *managedObject = (ManagedObject *)object;
154 managedObject->setParam(name, b);
155 }
156
157 template <int W>
set1f(VKLObject object,const char * name,const float x)158 void CPUDevice<W>::set1f(VKLObject object, const char *name, const float x)
159 {
160 ManagedObject *managedObject = (ManagedObject *)object;
161 managedObject->setParam(name, x);
162 }
163
164 template <int W>
set1i(VKLObject object,const char * name,const int x)165 void CPUDevice<W>::set1i(VKLObject object, const char *name, const int x)
166 {
167 ManagedObject *managedObject = (ManagedObject *)object;
168 managedObject->setParam(name, x);
169 }
170
171 template <int W>
setVec3f(VKLObject object,const char * name,const vec3f & v)172 void CPUDevice<W>::setVec3f(VKLObject object,
173 const char *name,
174 const vec3f &v)
175 {
176 ManagedObject *managedObject = (ManagedObject *)object;
177 managedObject->setParam(name, v);
178 }
179
180 template <int W>
setVec3i(VKLObject object,const char * name,const vec3i & v)181 void CPUDevice<W>::setVec3i(VKLObject object,
182 const char *name,
183 const vec3i &v)
184 {
185 ManagedObject *managedObject = (ManagedObject *)object;
186 managedObject->setParam(name, v);
187 }
188
189 template <int W>
setObject(VKLObject object,const char * name,VKLObject setObject)190 void CPUDevice<W>::setObject(VKLObject object,
191 const char *name,
192 VKLObject setObject)
193 {
194 ManagedObject *target = (ManagedObject *)object;
195 ManagedObject *value = (ManagedObject *)setObject;
196 target->setParam(name, value);
197 }
198
199 template <int W>
setString(VKLObject object,const char * name,const std::string & s)200 void CPUDevice<W>::setString(VKLObject object,
201 const char *name,
202 const std::string &s)
203 {
204 ManagedObject *managedObject = (ManagedObject *)object;
205 managedObject->setParam(name, s);
206 }
207
208 template <int W>
setVoidPtr(VKLObject object,const char * name,void * v)209 void CPUDevice<W>::setVoidPtr(VKLObject object, const char *name, void *v)
210 {
211 ManagedObject *managedObject = (ManagedObject *)object;
212 managedObject->setParam(name, v);
213 }
214
215 ///////////////////////////////////////////////////////////////////////////
216 // Sampler ////////////////////////////////////////////////////////////////
217 ///////////////////////////////////////////////////////////////////////////
218
219 template <int W>
newSampler(VKLVolume volume)220 VKLSampler CPUDevice<W>::newSampler(VKLVolume volume)
221 {
222 auto &volumeObject = referenceFromHandle<Volume<W>>(volume);
223 return (VKLSampler)volumeObject.newSampler();
224 }
225
226 #define __define_computeSampleN(WIDTH) \
227 template <int W> \
228 void CPUDevice<W>::computeSample##WIDTH( \
229 const int *valid, \
230 VKLSampler sampler, \
231 const vvec3fn<WIDTH> &objectCoordinates, \
232 float *samples, \
233 unsigned int attributeIndex, \
234 const float *times) \
235 { \
236 computeSampleAnyWidth<WIDTH>( \
237 valid, sampler, objectCoordinates, samples, attributeIndex, times); \
238 }
239
240 __define_computeSampleN(4);
241 __define_computeSampleN(8);
242 __define_computeSampleN(16);
243
244 #undef __define_computeSampleN
245
246 // support a fast path for scalar sampling
247 template <int W>
computeSample1(const int * valid,VKLSampler sampler,const vvec3fn<1> & objectCoordinates,float * sample,unsigned int attributeIndex,const float * time)248 void CPUDevice<W>::computeSample1(const int *valid,
249 VKLSampler sampler,
250 const vvec3fn<1> &objectCoordinates,
251 float *sample,
252 unsigned int attributeIndex,
253 const float *time)
254 {
255 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
256 vfloatn<1> timeW(time, 1);
257 vfloatn<1> sampleW;
258 samplerObject.computeSample(
259 objectCoordinates, sampleW, attributeIndex, timeW);
260 *sample = sampleW[0];
261 }
262
263 template <int W>
computeSampleN(VKLSampler sampler,unsigned int N,const vvec3fn<1> * objectCoordinates,float * samples,unsigned int attributeIndex,const float * times)264 void CPUDevice<W>::computeSampleN(VKLSampler sampler,
265 unsigned int N,
266 const vvec3fn<1> *objectCoordinates,
267 float *samples,
268 unsigned int attributeIndex,
269 const float *times)
270 {
271 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
272 samplerObject.computeSampleN(
273 N, objectCoordinates, samples, attributeIndex, times);
274 }
275
276 #define __define_computeSampleMN(WIDTH) \
277 template <int W> \
278 void CPUDevice<W>::computeSampleM##WIDTH( \
279 const int *valid, \
280 VKLSampler sampler, \
281 const vvec3fn<WIDTH> &objectCoordinates, \
282 float *samples, \
283 unsigned int M, \
284 const unsigned int *attributeIndices, \
285 const float *times) \
286 { \
287 computeSampleMAnyWidth<WIDTH>(valid, \
288 sampler, \
289 objectCoordinates, \
290 samples, \
291 M, \
292 attributeIndices, \
293 times); \
294 }
295
296 __define_computeSampleMN(4);
297 __define_computeSampleMN(8);
298 __define_computeSampleMN(16);
299
300 #undef __define_computeSampleMN
301
302 // support a fast path for scalar sampling
303 template <int W>
computeSampleM1(const int * valid,VKLSampler sampler,const vvec3fn<1> & objectCoordinates,float * samples,unsigned int M,const unsigned int * attributeIndices,const float * time)304 void CPUDevice<W>::computeSampleM1(const int *valid,
305 VKLSampler sampler,
306 const vvec3fn<1> &objectCoordinates,
307 float *samples,
308 unsigned int M,
309 const unsigned int *attributeIndices,
310 const float *time)
311 {
312 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
313 vfloatn<1> timeW(time, 1);
314 samplerObject.computeSampleM(
315 objectCoordinates, samples, M, attributeIndices, timeW);
316 }
317
318 template <int W>
computeSampleMN(VKLSampler sampler,unsigned int N,const vvec3fn<1> * objectCoordinates,float * samples,unsigned int M,const unsigned int * attributeIndices,const float * times)319 void CPUDevice<W>::computeSampleMN(VKLSampler sampler,
320 unsigned int N,
321 const vvec3fn<1> *objectCoordinates,
322 float *samples,
323 unsigned int M,
324 const unsigned int *attributeIndices,
325 const float *times)
326 {
327 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
328 samplerObject.computeSampleMN(
329 N, objectCoordinates, samples, M, attributeIndices, times);
330 }
331
332 #define __define_computeGradientN(WIDTH) \
333 template <int W> \
334 void CPUDevice<W>::computeGradient##WIDTH( \
335 const int *valid, \
336 VKLSampler sampler, \
337 const vvec3fn<WIDTH> &objectCoordinates, \
338 vvec3fn<WIDTH> &gradients, \
339 unsigned int attributeIndex, \
340 const float *times) \
341 { \
342 computeGradientAnyWidth<WIDTH>( \
343 valid, sampler, objectCoordinates, gradients, attributeIndex, times); \
344 }
345
346 __define_computeGradientN(1);
347 __define_computeGradientN(4);
348 __define_computeGradientN(8);
349 __define_computeGradientN(16);
350
351 #undef __define_computeGradientN
352
353 template <int W>
computeGradientN(VKLSampler sampler,unsigned int N,const vvec3fn<1> * objectCoordinates,vvec3fn<1> * gradients,unsigned int attributeIndex,const float * times)354 void CPUDevice<W>::computeGradientN(VKLSampler sampler,
355 unsigned int N,
356 const vvec3fn<1> *objectCoordinates,
357 vvec3fn<1> *gradients,
358 unsigned int attributeIndex,
359 const float *times)
360 {
361 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
362 samplerObject.computeGradientN(
363 N, objectCoordinates, gradients, attributeIndex, times);
364 }
365
366 ///////////////////////////////////////////////////////////////////////////
367 // Volume /////////////////////////////////////////////////////////////////
368 ///////////////////////////////////////////////////////////////////////////
369
370 template <int W>
newVolume(const char * type)371 VKLVolume CPUDevice<W>::newVolume(const char *type)
372 {
373 // warn for deprecated snake case volume types
374 std::string typeStr(type);
375
376 if (typeStr.find("_") != std::string::npos) {
377 postLogMessage(this, VKL_LOG_WARNING)
378 << "volume type name '" << typeStr
379 << "' may be deprecated; volume type names are now camelCase (no "
380 "underscores)";
381 }
382
383 std::stringstream ss;
384 ss << type << "_" << W;
385
386 return (VKLVolume)Volume<W>::createInstance(this, ss.str());
387 }
388
389 template <int W>
getBoundingBox(VKLVolume volume)390 box3f CPUDevice<W>::getBoundingBox(VKLVolume volume)
391 {
392 auto &volumeObject = referenceFromHandle<Volume<W>>(volume);
393 return volumeObject.getBoundingBox();
394 }
395
396 template <int W>
getNumAttributes(VKLVolume volume)397 unsigned int CPUDevice<W>::getNumAttributes(VKLVolume volume)
398 {
399 auto &volumeObject = referenceFromHandle<Volume<W>>(volume);
400 return volumeObject.getNumAttributes();
401 }
402
403 template <int W>
getValueRange(VKLVolume volume,unsigned int attributeIndex)404 range1f CPUDevice<W>::getValueRange(VKLVolume volume,
405 unsigned int attributeIndex)
406 {
407 auto &volumeObject = referenceFromHandle<Volume<W>>(volume);
408 return volumeObject.getValueRange(attributeIndex);
409 }
410
411 ///////////////////////////////////////////////////////////////////////////
412 // Private methods ////////////////////////////////////////////////////////
413 ///////////////////////////////////////////////////////////////////////////
414
415 template <int W>
416 template <int OW>
417 typename std::enable_if<(OW < W), void>::type
computeSampleAnyWidth(const int * valid,VKLSampler sampler,const vvec3fn<OW> & objectCoordinates,float * samples,unsigned int attributeIndex,const float * times)418 CPUDevice<W>::computeSampleAnyWidth(const int *valid,
419 VKLSampler sampler,
420 const vvec3fn<OW> &objectCoordinates,
421 float *samples,
422 unsigned int attributeIndex,
423 const float *times)
424 {
425 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
426
427 vvec3fn<W> ocW = static_cast<vvec3fn<W>>(objectCoordinates);
428 vfloatn<W> tW(times, OW);
429
430 vintn<W> validW;
431 for (int i = 0; i < W; i++)
432 validW[i] = i < OW ? valid[i] : 0;
433
434 ocW.fill_inactive_lanes(validW);
435 tW.fill_inactive_lanes(validW);
436
437 vfloatn<W> samplesW;
438
439 samplerObject.computeSampleV(validW, ocW, samplesW, attributeIndex, tW);
440
441 for (int i = 0; i < OW; i++)
442 samples[i] = samplesW[i];
443 }
444
445 template <int W>
446 template <int OW>
447 typename std::enable_if<(OW == W), void>::type
computeSampleAnyWidth(const int * valid,VKLSampler sampler,const vvec3fn<OW> & objectCoordinates,float * samples,unsigned int attributeIndex,const float * times)448 CPUDevice<W>::computeSampleAnyWidth(const int *valid,
449 VKLSampler sampler,
450 const vvec3fn<OW> &objectCoordinates,
451 float *samples,
452 unsigned int attributeIndex,
453 const float *times)
454 {
455 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
456
457 vfloatn<W> tW(times, W);
458
459 vintn<W> validW;
460 for (int i = 0; i < W; i++)
461 validW[i] = valid[i];
462
463 vfloatn<W> samplesW;
464
465 samplerObject.computeSampleV(
466 validW, objectCoordinates, samplesW, attributeIndex, tW);
467
468 for (int i = 0; i < W; i++)
469 samples[i] = samplesW[i];
470 }
471
472 template <int W>
473 template <int OW>
474 typename std::enable_if<(OW > W), void>::type
computeSampleAnyWidth(const int * valid,VKLSampler sampler,const vvec3fn<OW> & objectCoordinates,float * samples,unsigned int attributeIndex,const float * times)475 CPUDevice<W>::computeSampleAnyWidth(const int *valid,
476 VKLSampler sampler,
477 const vvec3fn<OW> &objectCoordinates,
478 float *samples,
479 unsigned int attributeIndex,
480 const float *times)
481 {
482 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
483
484 vfloatn<OW> tOW(times, OW);
485
486 const int numPacks = OW / W + (OW % W != 0);
487
488 for (int packIndex = 0; packIndex < numPacks; packIndex++) {
489 vvec3fn<W> ocW = objectCoordinates.template extract_pack<W>(packIndex);
490 vfloatn<W> tW = tOW.template extract_pack<W>(packIndex);
491
492 vintn<W> validW;
493 for (int i = packIndex * W; i < (packIndex + 1) * W && i < OW; i++)
494 validW[i - packIndex * W] = i < OW ? valid[i] : 0;
495
496 ocW.fill_inactive_lanes(validW);
497 tW.fill_inactive_lanes(validW);
498
499 vfloatn<W> samplesW;
500
501 samplerObject.computeSampleV(validW, ocW, samplesW, attributeIndex, tW);
502
503 for (int i = packIndex * W; i < (packIndex + 1) * W && i < OW; i++)
504 samples[i] = samplesW[i - packIndex * W];
505 }
506 }
507
508 template <int W>
509 template <int OW>
510 typename std::enable_if<(OW < W), void>::type
computeSampleMAnyWidth(const int * valid,VKLSampler sampler,const vvec3fn<OW> & objectCoordinates,float * samples,unsigned int M,const unsigned int * attributeIndices,const float * times)511 CPUDevice<W>::computeSampleMAnyWidth(const int *valid,
512 VKLSampler sampler,
513 const vvec3fn<OW> &objectCoordinates,
514 float *samples,
515 unsigned int M,
516 const unsigned int *attributeIndices,
517 const float *times)
518 {
519 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
520
521 vvec3fn<W> ocW = static_cast<vvec3fn<W>>(objectCoordinates);
522 vfloatn<W> tW(times, OW);
523
524 vintn<W> validW;
525 for (int i = 0; i < W; i++)
526 validW[i] = i < OW ? valid[i] : 0;
527
528 ocW.fill_inactive_lanes(validW);
529 tW.fill_inactive_lanes(validW);
530
531 float *samplesW = (float *)alloca(M * W * sizeof(float));
532
533 samplerObject.computeSampleMV(
534 validW, ocW, samplesW, M, attributeIndices, tW);
535
536 for (unsigned int a = 0; a < M; a++) {
537 for (int i = 0; i < OW; i++) {
538 samples[a * OW + i] = samplesW[a * W + i];
539 }
540 }
541 }
542
543 template <int W>
544 template <int OW>
545 typename std::enable_if<(OW == W), void>::type
computeSampleMAnyWidth(const int * valid,VKLSampler sampler,const vvec3fn<OW> & objectCoordinates,float * samples,unsigned int M,const unsigned int * attributeIndices,const float * times)546 CPUDevice<W>::computeSampleMAnyWidth(const int *valid,
547 VKLSampler sampler,
548 const vvec3fn<OW> &objectCoordinates,
549 float *samples,
550 unsigned int M,
551 const unsigned int *attributeIndices,
552 const float *times)
553 {
554 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
555
556 vfloatn<W> timesW(times, W);
557
558 vintn<W> validW;
559 for (int i = 0; i < W; i++)
560 validW[i] = valid[i];
561
562 samplerObject.computeSampleMV(
563 validW, objectCoordinates, samples, M, attributeIndices, timesW);
564 }
565
566 template <int W>
567 template <int OW>
568 typename std::enable_if<(OW > W), void>::type
computeSampleMAnyWidth(const int * valid,VKLSampler sampler,const vvec3fn<OW> & objectCoordinates,float * samples,unsigned int M,const unsigned int * attributeIndices,const float * times)569 CPUDevice<W>::computeSampleMAnyWidth(const int *valid,
570 VKLSampler sampler,
571 const vvec3fn<OW> &objectCoordinates,
572 float *samples,
573 unsigned int M,
574 const unsigned int *attributeIndices,
575 const float *times)
576 {
577 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
578
579 vfloatn<OW> tOW(times, OW);
580
581 const int numPacks = OW / W + (OW % W != 0);
582
583 for (int packIndex = 0; packIndex < numPacks; packIndex++) {
584 vvec3fn<W> ocW = objectCoordinates.template extract_pack<W>(packIndex);
585 vfloatn<W> tW = tOW.template extract_pack<W>(packIndex);
586
587 vintn<W> validW;
588 for (int i = packIndex * W; i < (packIndex + 1) * W && i < OW; i++)
589 validW[i - packIndex * W] = i < OW ? valid[i] : 0;
590
591 ocW.fill_inactive_lanes(validW);
592 tW.fill_inactive_lanes(validW);
593
594 float *samplesW = (float *)alloca(M * W * sizeof(float));
595
596 samplerObject.computeSampleMV(
597 validW, ocW, samplesW, M, attributeIndices, tW);
598
599 for (unsigned int a = 0; a < M; a++) {
600 for (int i = packIndex * W; i < (packIndex + 1) * W && i < OW; i++)
601 samples[a * OW + i] = samplesW[a * W + (i - packIndex * W)];
602 }
603 }
604 }
605
606 template <int W>
607 template <int OW>
608 typename std::enable_if<(OW < W), void>::type
computeGradientAnyWidth(const int * valid,VKLSampler sampler,const vvec3fn<OW> & objectCoordinates,vvec3fn<OW> & gradients,unsigned int attributeIndex,const float * times)609 CPUDevice<W>::computeGradientAnyWidth(const int *valid,
610 VKLSampler sampler,
611 const vvec3fn<OW> &objectCoordinates,
612 vvec3fn<OW> &gradients,
613 unsigned int attributeIndex,
614 const float *times)
615 {
616 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
617
618 vvec3fn<W> ocW = static_cast<vvec3fn<W>>(objectCoordinates);
619 vfloatn<W> tW(times, OW);
620
621 vintn<W> validW;
622 for (int i = 0; i < W; i++)
623 validW[i] = i < OW ? valid[i] : 0;
624
625 ocW.fill_inactive_lanes(validW);
626 tW.fill_inactive_lanes(validW);
627
628 vvec3fn<W> gradientsW;
629
630 samplerObject.computeGradientV(
631 validW, ocW, gradientsW, attributeIndex, tW);
632
633 for (int i = 0; i < OW; i++) {
634 gradients.x[i] = gradientsW.x[i];
635 gradients.y[i] = gradientsW.y[i];
636 gradients.z[i] = gradientsW.z[i];
637 }
638 }
639
640 template <int W>
641 template <int OW>
642 typename std::enable_if<(OW == W), void>::type
computeGradientAnyWidth(const int * valid,VKLSampler sampler,const vvec3fn<OW> & objectCoordinates,vvec3fn<OW> & gradients,unsigned int attributeIndex,const float * times)643 CPUDevice<W>::computeGradientAnyWidth(const int *valid,
644 VKLSampler sampler,
645 const vvec3fn<OW> &objectCoordinates,
646 vvec3fn<OW> &gradients,
647 unsigned int attributeIndex,
648 const float *times)
649 {
650 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
651
652 vfloatn<W> tW(times, W);
653
654 vintn<W> validW;
655 for (int i = 0; i < W; i++)
656 validW[i] = valid[i];
657
658 samplerObject.computeGradientV(
659 validW, objectCoordinates, gradients, attributeIndex, tW);
660 }
661
662 template <int W>
663 template <int OW>
664 typename std::enable_if<(OW > W), void>::type
computeGradientAnyWidth(const int * valid,VKLSampler sampler,const vvec3fn<OW> & objectCoordinates,vvec3fn<OW> & gradients,unsigned int attributeIndex,const float * times)665 CPUDevice<W>::computeGradientAnyWidth(const int *valid,
666 VKLSampler sampler,
667 const vvec3fn<OW> &objectCoordinates,
668 vvec3fn<OW> &gradients,
669 unsigned int attributeIndex,
670 const float *times)
671 {
672 auto &samplerObject = referenceFromHandle<Sampler<W>>(sampler);
673
674 vfloatn<OW> tOW(times, OW);
675
676 const int numPacks = OW / W + (OW % W != 0);
677
678 for (int packIndex = 0; packIndex < numPacks; packIndex++) {
679 vvec3fn<W> ocW = objectCoordinates.template extract_pack<W>(packIndex);
680 vfloatn<W> tW = tOW.template extract_pack<W>(packIndex);
681
682 vintn<W> validW;
683 for (int i = packIndex * W; i < (packIndex + 1) * W && i < OW; i++)
684 validW[i - packIndex * W] = i < OW ? valid[i] : 0;
685
686 ocW.fill_inactive_lanes(validW);
687 tW.fill_inactive_lanes(validW);
688
689 vvec3fn<W> gradientsW;
690
691 samplerObject.computeGradientV(
692 validW, ocW, gradientsW, attributeIndex, tW);
693
694 for (int i = packIndex * W; i < (packIndex + 1) * W && i < OW; i++) {
695 gradients.x[i] = gradientsW.x[i - packIndex * W];
696 gradients.y[i] = gradientsW.y[i - packIndex * W];
697 gradients.z[i] = gradientsW.z[i - packIndex * W];
698 }
699 }
700 }
701
702 VKL_REGISTER_DEVICE(CPUDevice<VKL_TARGET_WIDTH>,
703 CONCAT1(internal_cpu_, VKL_TARGET_WIDTH))
704
705 } // namespace cpu_device
706 } // namespace openvkl
707
CONCAT1(openvkl_init_module_cpu_device_,VKL_TARGET_WIDTH)708 extern "C" OPENVKL_DLLEXPORT void CONCAT1(openvkl_init_module_cpu_device_,
709 VKL_TARGET_WIDTH)()
710 {
711 }
712