1 // Copyright 2009-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 
4 #include "../common/math/random_sampler.h"
5 #include "../common/math/sampling.h"
6 #include "../common/tutorial/tutorial_device.h"
7 #include "../common/tutorial/scene_device.h"
8 
9 namespace embree {
10 
11 #define USE_INTERFACE 0 // 0 = stream, 1 = single rays/packets, 2 = single rays/packets using stream interface
12 #define AMBIENT_OCCLUSION_SAMPLES 64
13 //#define rtcOccluded rtcIntersect
14 //#define rtcOccluded1M rtcIntersect1M
15 
16 #define SIMPLE_SHADING 1
17 #define OBJ_MATERIAL 1
18 
19 extern "C" ISPCScene* g_ispc_scene;
20 extern "C" int g_instancing_mode;
21 
22 /* scene data */
23 RTCScene g_scene = nullptr;
24 
25 #define MAX_EDGE_LEVEL 64.0f
26 #define MIN_EDGE_LEVEL  4.0f
27 #define LEVEL_FACTOR   64.0f
28 
updateEdgeLevel(ISPCSubdivMesh * mesh,const Vec3fa & cam_pos,const unsigned int e0,const unsigned int e1)29 inline float updateEdgeLevel( ISPCSubdivMesh* mesh, const Vec3fa& cam_pos, const unsigned int e0, const unsigned int e1)
30 {
31   const Vec3fa v0 = mesh->positions[0][mesh->position_indices[e0]];
32   const Vec3fa v1 = mesh->positions[0][mesh->position_indices[e1]];
33   const Vec3fa edge = v1-v0;
34   const Vec3fa P = 0.5f*(v1+v0);
35   const Vec3fa dist = cam_pos - P;
36   return max(min(LEVEL_FACTOR*(0.5f*length(edge)/length(dist)),MAX_EDGE_LEVEL),MIN_EDGE_LEVEL);
37 }
38 
39 
updateEdgeLevelBuffer(ISPCSubdivMesh * mesh,const Vec3fa & cam_pos,unsigned int startID,unsigned int endID)40 void updateEdgeLevelBuffer( ISPCSubdivMesh* mesh, const Vec3fa& cam_pos, unsigned int startID, unsigned int endID )
41 {
42   for (unsigned int f=startID; f<endID;f++) {
43     unsigned int e = mesh->face_offsets[f];
44     unsigned int N = mesh->verticesPerFace[f];
45     if (N == 4) /* fast path for quads */
46       for (unsigned int i=0; i<4; i++)
47         mesh->subdivlevel[e+i] =  updateEdgeLevel(mesh,cam_pos,e+(i+0),e+(i+1)%4);
48     else if (N == 3) /* fast path for triangles */
49       for (unsigned int i=0; i<3; i++)
50         mesh->subdivlevel[e+i] =  updateEdgeLevel(mesh,cam_pos,e+(i+0),e+(i+1)%3);
51     else /* fast path for general polygons */
52       for (unsigned int i=0; i<N; i++)
53         mesh->subdivlevel[e+i] =  updateEdgeLevel(mesh,cam_pos,e+(i+0),e+(i+1)%N);
54   }
55 }
56 
57 #if defined(ISPC)
updateSubMeshEdgeLevelBufferTask(int taskIndex,int threadIndex,ISPCSubdivMesh * mesh,const Vec3fa & cam_pos)58 void updateSubMeshEdgeLevelBufferTask (int taskIndex, int threadIndex,  ISPCSubdivMesh* mesh, const Vec3fa& cam_pos )
59 {
60   const unsigned int size = mesh->numFaces;
61   const unsigned int startID = ((taskIndex+0)*size)/taskCount;
62   const unsigned int endID   = ((taskIndex+1)*size)/taskCount;
63   updateEdgeLevelBuffer(mesh,cam_pos,startID,endID);
64 }
updateMeshEdgeLevelBufferTask(int taskIndex,int threadIndex,ISPCScene * scene_in,const Vec3fa & cam_pos)65 void updateMeshEdgeLevelBufferTask (int taskIndex, int threadIndex,  ISPCScene* scene_in, const Vec3fa& cam_pos )
66 {
67   ISPCGeometry* geometry = g_ispc_scene->geometries[taskIndex];
68   if (geometry->type != SUBDIV_MESH) return;
69   ISPCSubdivMesh* mesh = (ISPCSubdivMesh*) geometry;
70   if (mesh->numFaces < 10000) {
71     updateEdgeLevelBuffer(mesh,cam_pos,0,mesh->numFaces);
72     rtcUpdateGeometryBuffer(geometry->geometry,RTC_BUFFER_TYPE_LEVEL,0);
73     rtcCommitGeometry(geometry->geometry);
74   }
75 }
76 #endif
77 
updateEdgeLevels(ISPCScene * scene_in,const Vec3fa & cam_pos)78 void updateEdgeLevels(ISPCScene* scene_in, const Vec3fa& cam_pos)
79 {
80   /* first update small meshes */
81 #if defined(ISPC)
82   parallel_for(size_t(0),size_t( scene_in->numGeometries ),[&](const range<size_t>& range) {
83     const int threadIndex = (int)TaskScheduler::threadIndex();
84     for (size_t i=range.begin(); i<range.end(); i++)
85       updateMeshEdgeLevelBufferTask((int)i,threadIndex,scene_in,cam_pos);
86   });
87 #endif
88 
89   /* now update large meshes */
90   for (unsigned int g=0; g<scene_in->numGeometries; g++)
91   {
92     ISPCGeometry* geometry = g_ispc_scene->geometries[g];
93     if (geometry->type != SUBDIV_MESH) continue;
94     ISPCSubdivMesh* mesh = (ISPCSubdivMesh*) geometry;
95 #if defined(ISPC)
96     if (mesh->numFaces < 10000) continue;
97     parallel_for(size_t(0),size_t( (mesh->numFaces+4095)/4096 ),[&](const range<size_t>& range) {
98     const int threadIndex = (int)TaskScheduler::threadIndex();
99     for (size_t i=range.begin(); i<range.end(); i++)
100       updateSubMeshEdgeLevelBufferTask((int)i,threadIndex,mesh,cam_pos);
101   });
102 #else
103     updateEdgeLevelBuffer(mesh,cam_pos,0,mesh->numFaces);
104 #endif
105     rtcUpdateGeometryBuffer(geometry->geometry,RTC_BUFFER_TYPE_LEVEL,0);
106     rtcCommitGeometry(geometry->geometry);
107   }
108 }
109 
convertScene(ISPCScene * scene_in)110 RTCScene convertScene(ISPCScene* scene_in)
111 {
112   RTCScene scene_out = ConvertScene(g_device, scene_in,RTC_BUILD_QUALITY_MEDIUM);
113   return scene_out;
114 }
115 
116 /* renders a single pixel casting with ambient occlusion */
ambientOcclusionShading(int x,int y,Ray & ray,RayStats & stats)117 Vec3fa ambientOcclusionShading(int x, int y, Ray& ray, RayStats& stats)
118 {
119   Ray rays[AMBIENT_OCCLUSION_SAMPLES];
120 
121   Vec3fa Ng = normalize(ray.Ng);
122   if (dot(ray.dir,Ng) > 0.0f) Ng = neg(Ng);
123 
124   Vec3fa col = Vec3fa(min(1.0f,0.3f+0.8f*abs(dot(Ng,normalize(ray.dir)))));
125 
126   /* calculate hit point */
127   float intensity = 0;
128   Vec3fa hitPos = ray.org + ray.tfar * ray.dir;
129 
130   RandomSampler sampler;
131   RandomSampler_init(sampler,x,y,0);
132 
133   /* enable only valid rays */
134   for (int i=0; i<AMBIENT_OCCLUSION_SAMPLES; i++)
135   {
136     /* sample random direction */
137     Vec2f s = RandomSampler_get2D(sampler);
138     Sample3f dir;
139     dir.v = cosineSampleHemisphere(s);
140     dir.pdf = cosineSampleHemispherePDF(dir.v);
141     dir.v = frame(Ng) * dir.v;
142 
143     /* initialize shadow ray */
144     Ray& shadow = rays[i];
145     bool mask = 1; { // invalidate inactive rays
146       shadow.tnear() = mask ? 0.001f       : (float)(pos_inf);
147       shadow.tfar  = mask ? (float)(inf) : (float)(neg_inf);
148     }
149     init_Ray(shadow, hitPos, dir.v, shadow.tnear(), shadow.tfar);
150 
151     RayStats_addShadowRay(stats);
152   }
153 
154   RTCIntersectContext context;
155   rtcInitIntersectContext(&context);
156   context.flags = g_iflags_incoherent;
157 
158   /* trace occlusion rays */
159 #if USE_INTERFACE == 0
160   rtcOccluded1M(g_scene,&context,(RTCRay*)&rays,AMBIENT_OCCLUSION_SAMPLES,sizeof(Ray));
161 #elif USE_INTERFACE == 1
162   for (unsigned int i=0; i<AMBIENT_OCCLUSION_SAMPLES; i++)
163     rtcOccluded1(g_scene,RTCRay_(rays[i]));
164 #else
165   for (unsigned int i=0; i<AMBIENT_OCCLUSION_SAMPLES; i++)
166     rtcOccluded1M(g_scene,&context,(RTCRay*)&rays[i],1,sizeof(Ray));
167 #endif
168 
169   /* accumulate illumination */
170   for (int i=0; i<AMBIENT_OCCLUSION_SAMPLES; i++) {
171     if (rays[i].tfar >= 0.0f)
172       intensity += 1.0f;
173   }
174 
175   /* shade pixel */
176   return col * (intensity/AMBIENT_OCCLUSION_SAMPLES);
177 }
178 
179 
postIntersectGeometry(const Ray & ray,DifferentialGeometry & dg,ISPCGeometry * geometry,int & materialID)180 void postIntersectGeometry(const Ray& ray, DifferentialGeometry& dg, ISPCGeometry* geometry, int& materialID)
181 {
182   if (geometry->type == TRIANGLE_MESH)
183   {
184     ISPCTriangleMesh* mesh = (ISPCTriangleMesh*) geometry;
185     materialID = mesh->geom.materialID;
186   }
187   else if (geometry->type == QUAD_MESH)
188   {
189     ISPCQuadMesh* mesh = (ISPCQuadMesh*) geometry;
190     materialID = mesh->geom.materialID;
191   }
192   else if (geometry->type == GRID_MESH)
193   {
194     ISPCGridMesh* mesh = (ISPCGridMesh*) geometry;
195     materialID = mesh->geom.materialID;
196   }
197   else if (geometry->type == SUBDIV_MESH)
198   {
199     ISPCSubdivMesh* mesh = (ISPCSubdivMesh*) geometry;
200     materialID = mesh->geom.materialID;
201   }
202   else if (geometry->type == CURVES)
203   {
204     ISPCHairSet* mesh = (ISPCHairSet*) geometry;
205     materialID = mesh->geom.materialID;
206   }
207   else if (geometry->type == POINTS)
208   {
209     ISPCPointSet* mesh = (ISPCPointSet*) geometry;
210     materialID = mesh->geom.materialID;
211   }
212   else if (geometry->type == GROUP) {
213     auto geomID = ray.geomID; {
214       postIntersectGeometry(ray,dg,((ISPCGroup*) geometry)->geometries[geomID],materialID);
215     }
216   }
217   else
218     assert(false);
219 }
220 
calculate_interpolated_space(ISPCInstance * instance,float gtime)221 AffineSpace3fa calculate_interpolated_space (ISPCInstance* instance, float gtime)
222 {
223   if (instance->numTimeSteps == 1)
224     return AffineSpace3fa(instance->spaces[0]);
225 
226    /* calculate time segment itime and fractional time ftime */
227   const int time_segments = instance->numTimeSteps-1;
228   const float time = gtime*(float)(time_segments);
229   const int itime = clamp((int)(floor(time)),(int)0,time_segments-1);
230   const float ftime = time - (float)(itime);
231   return (1.0f-ftime)*AffineSpace3fa(instance->spaces[itime+0]) + ftime*AffineSpace3fa(instance->spaces[itime+1]);
232 }
233 
234 typedef ISPCInstance* ISPCInstancePtr;
235 
postIntersect(const Ray & ray,DifferentialGeometry & dg)236 inline int postIntersect(const Ray& ray, DifferentialGeometry& dg)
237 {
238   int materialID = 0;
239   auto instID = ray.instID[0]; {
240     auto geomID = ray.geomID; {
241       ISPCGeometry* geometry = nullptr;
242       if (g_instancing_mode != ISPC_INSTANCING_NONE) {
243         ISPCInstance* instance = (ISPCInstancePtr) g_ispc_scene->geometries[instID];
244         geometry = instance->child;
245       } else {
246         geometry = g_ispc_scene->geometries[geomID];
247       }
248       postIntersectGeometry(ray,dg,geometry,materialID);
249     }
250   }
251 
252   if (g_instancing_mode != ISPC_INSTANCING_NONE)
253   {
254     auto instID = ray.instID[0];
255     {
256       /* get instance and geometry pointers */
257       ISPCInstance* instance = (ISPCInstancePtr) g_ispc_scene->geometries[instID];
258 
259       /* convert normals */
260       //AffineSpace3fa space = (1.0f-ray.time())*AffineSpace3fa(instance->space0) + ray.time()*AffineSpace3fa(instance->space1);
261       AffineSpace3fa space = calculate_interpolated_space(instance,ray.time());
262       dg.Ng = xfmVector(space,dg.Ng);
263       dg.Ns = xfmVector(space,dg.Ns);
264     }
265   }
266 
267   return materialID;
268 }
269 
face_forward(const Vec3fa & dir,const Vec3fa & _Ng)270 inline Vec3fa face_forward(const Vec3fa& dir, const Vec3fa& _Ng) {
271   const Vec3fa Ng = _Ng;
272   return dot(dir,Ng) < 0.0f ? Ng : neg(Ng);
273 }
274 
275 /* renders a single screen tile */
renderTileStandard(int taskIndex,int threadIndex,int * pixels,const unsigned int width,const unsigned int height,const float time,const ISPCCamera & camera,const int numTilesX,const int numTilesY)276 void renderTileStandard(int taskIndex,
277                         int threadIndex,
278                         int* pixels,
279                         const unsigned int width,
280                         const unsigned int height,
281                         const float time,
282                         const ISPCCamera& camera,
283                         const int numTilesX,
284                         const int numTilesY)
285 {
286   const unsigned int tileY = taskIndex / numTilesX;
287   const unsigned int tileX = taskIndex - tileY * numTilesX;
288   const unsigned int x0 = tileX * TILE_SIZE_X;
289   const unsigned int x1 = min(x0+TILE_SIZE_X,width);
290   const unsigned int y0 = tileY * TILE_SIZE_Y;
291   const unsigned int y1 = min(y0+TILE_SIZE_Y,height);
292 
293   RayStats& stats = g_stats[threadIndex];
294 
295   Ray rays[TILE_SIZE_X*TILE_SIZE_Y];
296 
297   /* generate stream of primary rays */
298   int N = 0;
299   for (unsigned int y=y0; y<y1; y++) for (unsigned int x=x0; x<x1; x++)
300   {
301     /* ISPC workaround for mask == 0 */
302 
303 
304     RandomSampler sampler;
305     RandomSampler_init(sampler, x, y, 0);
306 
307     /* initialize ray */
308     Ray& ray = rays[N++];
309     bool mask = 1; { // invalidates inactive rays
310       ray.tnear() = mask ? 0.0f         : (float)(pos_inf);
311       ray.tfar  = mask ? (float)(inf) : (float)(neg_inf);
312     }
313     init_Ray(ray, Vec3fa(camera.xfm.p), Vec3fa(normalize((float)x*camera.xfm.l.vx + (float)y*camera.xfm.l.vy + camera.xfm.l.vz)), ray.tnear(), ray.tfar, RandomSampler_get1D(sampler));
314 
315     RayStats_addRay(stats);
316   }
317 
318   RTCIntersectContext context;
319   rtcInitIntersectContext(&context);
320   context.flags = g_iflags_coherent;
321 
322   /* trace stream of rays */
323 #if USE_INTERFACE == 0
324   rtcIntersect1M(g_scene,&context,(RTCRayHit*)&rays[0],N,sizeof(Ray));
325 #elif USE_INTERFACE == 1
326   for (unsigned int i=0; i<N; i++)
327     rtcIntersect1(g_scene,&context,RTCRayHit_(rays[i]));
328 #else
329   for (unsigned int i=0; i<N; i++)
330     rtcIntersect1M(g_scene,&context,(RTCRayHit*)&rays[i],1,sizeof(Ray));
331 #endif
332 
333   /* shade stream of rays */
334   N = 0;
335   for (unsigned int y=y0; y<y1; y++) for (unsigned int x=x0; x<x1; x++)
336   {
337     /* ISPC workaround for mask == 0 */
338 
339     Ray& ray = rays[N++];
340 
341     /* eyelight shading */
342     Vec3fa color = Vec3fa(0.0f);
343     if (ray.geomID != RTC_INVALID_GEOMETRY_ID)
344 #if SIMPLE_SHADING == 1
345     {
346 #if OBJ_MATERIAL == 1
347       Vec3fa Kd = Vec3fa(0.5f);
348       DifferentialGeometry dg;
349       dg.geomID = ray.geomID;
350       dg.primID = ray.primID;
351       dg.u = ray.u;
352       dg.v = ray.v;
353       dg.P  = ray.org+ray.tfar*ray.dir;
354       dg.Ng = ray.Ng;
355       dg.Ns = ray.Ng;
356       int materialID = postIntersect(ray,dg);
357       dg.Ng = face_forward(ray.dir,normalize(dg.Ng));
358       dg.Ns = face_forward(ray.dir,normalize(dg.Ns));
359 
360       /* shade */
361       if (g_ispc_scene->materials[materialID]->type == MATERIAL_OBJ) {
362         ISPCOBJMaterial* material = (ISPCOBJMaterial*) g_ispc_scene->materials[materialID];
363         Kd = Vec3fa(material->Kd);
364       }
365 
366       color = Kd*dot(neg(ray.dir),dg.Ns);
367 #else
368       color = Vec3fa(abs(dot(ray.dir,normalize(ray.Ng))));
369 #endif
370     }
371 #else
372       color = ambientOcclusionShading(x,y,ray,g_stats[threadIndex]);
373 #endif
374 
375     /* write color to framebuffer */
376     unsigned int r = (unsigned int) (255.0f * clamp(color.x,0.0f,1.0f));
377     unsigned int g = (unsigned int) (255.0f * clamp(color.y,0.0f,1.0f));
378     unsigned int b = (unsigned int) (255.0f * clamp(color.z,0.0f,1.0f));
379     pixels[y*width+x] = (b << 16) + (g << 8) + r;
380   }
381 }
382 
383 /* task that renders a single screen tile */
renderTileTask(int taskIndex,int threadIndex,int * pixels,const unsigned int width,const unsigned int height,const float time,const ISPCCamera & camera,const int numTilesX,const int numTilesY)384 void renderTileTask (int taskIndex, int threadIndex, int* pixels,
385                          const unsigned int width,
386                          const unsigned int height,
387                          const float time,
388                          const ISPCCamera& camera,
389                          const int numTilesX,
390                          const int numTilesY)
391 {
392   renderTileStandard(taskIndex,threadIndex,pixels,width,height,time,camera,numTilesX,numTilesY);
393 }
394 
395 /* called by the C++ code for initialization */
device_init(char * cfg)396 extern "C" void device_init (char* cfg)
397 {
398 }
399 
renderFrameStandard(int * pixels,const unsigned int width,const unsigned int height,const float time,const ISPCCamera & camera)400 extern "C" void renderFrameStandard (int* pixels,
401                           const unsigned int width,
402                           const unsigned int height,
403                           const float time,
404                           const ISPCCamera& camera)
405 {
406   const int numTilesX = (width +TILE_SIZE_X-1)/TILE_SIZE_X;
407   const int numTilesY = (height+TILE_SIZE_Y-1)/TILE_SIZE_Y;
408   parallel_for(size_t(0),size_t(numTilesX*numTilesY),[&](const range<size_t>& range) {
409     const int threadIndex = (int)TaskScheduler::threadIndex();
410     for (size_t i=range.begin(); i<range.end(); i++)
411       renderTileTask((int)i,threadIndex,pixels,width,height,time,camera,numTilesX,numTilesY);
412   });
413 }
414 
415 /* called by the C++ code to render */
device_render(int * pixels,const unsigned int width,const unsigned int height,const float time,const ISPCCamera & camera)416 extern "C" void device_render (int* pixels,
417                            const unsigned int width,
418                            const unsigned int height,
419                            const float time,
420                            const ISPCCamera& camera)
421 {
422   /* create scene */
423   if (!g_scene) {
424     g_scene = convertScene(g_ispc_scene);
425     updateEdgeLevels(g_ispc_scene, camera.xfm.p);
426     rtcCommitScene (g_scene);
427   }
428 }
429 
430 /* called by the C++ code for cleanup */
device_cleanup()431 extern "C" void device_cleanup ()
432 {
433   rtcReleaseScene (g_scene); g_scene = nullptr;
434 }
435 
436 } // namespace embree
437