1 // Copyright 2009-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3
4 #include "../common/math/random_sampler.h"
5 #include "../common/math/sampling.h"
6 #include "../common/tutorial/tutorial_device.h"
7 #include "../common/tutorial/scene_device.h"
8
9 namespace embree {
10
11 #define USE_INTERFACE 0 // 0 = stream, 1 = single rays/packets, 2 = single rays/packets using stream interface
12 #define AMBIENT_OCCLUSION_SAMPLES 64
13 //#define rtcOccluded rtcIntersect
14 //#define rtcOccluded1M rtcIntersect1M
15
16 #define SIMPLE_SHADING 1
17 #define OBJ_MATERIAL 1
18
19 extern "C" ISPCScene* g_ispc_scene;
20 extern "C" int g_instancing_mode;
21
22 /* scene data */
23 RTCScene g_scene = nullptr;
24
25 #define MAX_EDGE_LEVEL 64.0f
26 #define MIN_EDGE_LEVEL 4.0f
27 #define LEVEL_FACTOR 64.0f
28
updateEdgeLevel(ISPCSubdivMesh * mesh,const Vec3fa & cam_pos,const unsigned int e0,const unsigned int e1)29 inline float updateEdgeLevel( ISPCSubdivMesh* mesh, const Vec3fa& cam_pos, const unsigned int e0, const unsigned int e1)
30 {
31 const Vec3fa v0 = mesh->positions[0][mesh->position_indices[e0]];
32 const Vec3fa v1 = mesh->positions[0][mesh->position_indices[e1]];
33 const Vec3fa edge = v1-v0;
34 const Vec3fa P = 0.5f*(v1+v0);
35 const Vec3fa dist = cam_pos - P;
36 return max(min(LEVEL_FACTOR*(0.5f*length(edge)/length(dist)),MAX_EDGE_LEVEL),MIN_EDGE_LEVEL);
37 }
38
39
updateEdgeLevelBuffer(ISPCSubdivMesh * mesh,const Vec3fa & cam_pos,unsigned int startID,unsigned int endID)40 void updateEdgeLevelBuffer( ISPCSubdivMesh* mesh, const Vec3fa& cam_pos, unsigned int startID, unsigned int endID )
41 {
42 for (unsigned int f=startID; f<endID;f++) {
43 unsigned int e = mesh->face_offsets[f];
44 unsigned int N = mesh->verticesPerFace[f];
45 if (N == 4) /* fast path for quads */
46 for (unsigned int i=0; i<4; i++)
47 mesh->subdivlevel[e+i] = updateEdgeLevel(mesh,cam_pos,e+(i+0),e+(i+1)%4);
48 else if (N == 3) /* fast path for triangles */
49 for (unsigned int i=0; i<3; i++)
50 mesh->subdivlevel[e+i] = updateEdgeLevel(mesh,cam_pos,e+(i+0),e+(i+1)%3);
51 else /* fast path for general polygons */
52 for (unsigned int i=0; i<N; i++)
53 mesh->subdivlevel[e+i] = updateEdgeLevel(mesh,cam_pos,e+(i+0),e+(i+1)%N);
54 }
55 }
56
57 #if defined(ISPC)
updateSubMeshEdgeLevelBufferTask(int taskIndex,int threadIndex,ISPCSubdivMesh * mesh,const Vec3fa & cam_pos)58 void updateSubMeshEdgeLevelBufferTask (int taskIndex, int threadIndex, ISPCSubdivMesh* mesh, const Vec3fa& cam_pos )
59 {
60 const unsigned int size = mesh->numFaces;
61 const unsigned int startID = ((taskIndex+0)*size)/taskCount;
62 const unsigned int endID = ((taskIndex+1)*size)/taskCount;
63 updateEdgeLevelBuffer(mesh,cam_pos,startID,endID);
64 }
updateMeshEdgeLevelBufferTask(int taskIndex,int threadIndex,ISPCScene * scene_in,const Vec3fa & cam_pos)65 void updateMeshEdgeLevelBufferTask (int taskIndex, int threadIndex, ISPCScene* scene_in, const Vec3fa& cam_pos )
66 {
67 ISPCGeometry* geometry = g_ispc_scene->geometries[taskIndex];
68 if (geometry->type != SUBDIV_MESH) return;
69 ISPCSubdivMesh* mesh = (ISPCSubdivMesh*) geometry;
70 if (mesh->numFaces < 10000) {
71 updateEdgeLevelBuffer(mesh,cam_pos,0,mesh->numFaces);
72 rtcUpdateGeometryBuffer(geometry->geometry,RTC_BUFFER_TYPE_LEVEL,0);
73 rtcCommitGeometry(geometry->geometry);
74 }
75 }
76 #endif
77
updateEdgeLevels(ISPCScene * scene_in,const Vec3fa & cam_pos)78 void updateEdgeLevels(ISPCScene* scene_in, const Vec3fa& cam_pos)
79 {
80 /* first update small meshes */
81 #if defined(ISPC)
82 parallel_for(size_t(0),size_t( scene_in->numGeometries ),[&](const range<size_t>& range) {
83 const int threadIndex = (int)TaskScheduler::threadIndex();
84 for (size_t i=range.begin(); i<range.end(); i++)
85 updateMeshEdgeLevelBufferTask((int)i,threadIndex,scene_in,cam_pos);
86 });
87 #endif
88
89 /* now update large meshes */
90 for (unsigned int g=0; g<scene_in->numGeometries; g++)
91 {
92 ISPCGeometry* geometry = g_ispc_scene->geometries[g];
93 if (geometry->type != SUBDIV_MESH) continue;
94 ISPCSubdivMesh* mesh = (ISPCSubdivMesh*) geometry;
95 #if defined(ISPC)
96 if (mesh->numFaces < 10000) continue;
97 parallel_for(size_t(0),size_t( (mesh->numFaces+4095)/4096 ),[&](const range<size_t>& range) {
98 const int threadIndex = (int)TaskScheduler::threadIndex();
99 for (size_t i=range.begin(); i<range.end(); i++)
100 updateSubMeshEdgeLevelBufferTask((int)i,threadIndex,mesh,cam_pos);
101 });
102 #else
103 updateEdgeLevelBuffer(mesh,cam_pos,0,mesh->numFaces);
104 #endif
105 rtcUpdateGeometryBuffer(geometry->geometry,RTC_BUFFER_TYPE_LEVEL,0);
106 rtcCommitGeometry(geometry->geometry);
107 }
108 }
109
convertScene(ISPCScene * scene_in)110 RTCScene convertScene(ISPCScene* scene_in)
111 {
112 RTCScene scene_out = ConvertScene(g_device, scene_in,RTC_BUILD_QUALITY_MEDIUM);
113 return scene_out;
114 }
115
116 /* renders a single pixel casting with ambient occlusion */
ambientOcclusionShading(int x,int y,Ray & ray,RayStats & stats)117 Vec3fa ambientOcclusionShading(int x, int y, Ray& ray, RayStats& stats)
118 {
119 Ray rays[AMBIENT_OCCLUSION_SAMPLES];
120
121 Vec3fa Ng = normalize(ray.Ng);
122 if (dot(ray.dir,Ng) > 0.0f) Ng = neg(Ng);
123
124 Vec3fa col = Vec3fa(min(1.0f,0.3f+0.8f*abs(dot(Ng,normalize(ray.dir)))));
125
126 /* calculate hit point */
127 float intensity = 0;
128 Vec3fa hitPos = ray.org + ray.tfar * ray.dir;
129
130 RandomSampler sampler;
131 RandomSampler_init(sampler,x,y,0);
132
133 /* enable only valid rays */
134 for (int i=0; i<AMBIENT_OCCLUSION_SAMPLES; i++)
135 {
136 /* sample random direction */
137 Vec2f s = RandomSampler_get2D(sampler);
138 Sample3f dir;
139 dir.v = cosineSampleHemisphere(s);
140 dir.pdf = cosineSampleHemispherePDF(dir.v);
141 dir.v = frame(Ng) * dir.v;
142
143 /* initialize shadow ray */
144 Ray& shadow = rays[i];
145 bool mask = 1; { // invalidate inactive rays
146 shadow.tnear() = mask ? 0.001f : (float)(pos_inf);
147 shadow.tfar = mask ? (float)(inf) : (float)(neg_inf);
148 }
149 init_Ray(shadow, hitPos, dir.v, shadow.tnear(), shadow.tfar);
150
151 RayStats_addShadowRay(stats);
152 }
153
154 RTCIntersectContext context;
155 rtcInitIntersectContext(&context);
156 context.flags = g_iflags_incoherent;
157
158 /* trace occlusion rays */
159 #if USE_INTERFACE == 0
160 rtcOccluded1M(g_scene,&context,(RTCRay*)&rays,AMBIENT_OCCLUSION_SAMPLES,sizeof(Ray));
161 #elif USE_INTERFACE == 1
162 for (unsigned int i=0; i<AMBIENT_OCCLUSION_SAMPLES; i++)
163 rtcOccluded1(g_scene,RTCRay_(rays[i]));
164 #else
165 for (unsigned int i=0; i<AMBIENT_OCCLUSION_SAMPLES; i++)
166 rtcOccluded1M(g_scene,&context,(RTCRay*)&rays[i],1,sizeof(Ray));
167 #endif
168
169 /* accumulate illumination */
170 for (int i=0; i<AMBIENT_OCCLUSION_SAMPLES; i++) {
171 if (rays[i].tfar >= 0.0f)
172 intensity += 1.0f;
173 }
174
175 /* shade pixel */
176 return col * (intensity/AMBIENT_OCCLUSION_SAMPLES);
177 }
178
179
postIntersectGeometry(const Ray & ray,DifferentialGeometry & dg,ISPCGeometry * geometry,int & materialID)180 void postIntersectGeometry(const Ray& ray, DifferentialGeometry& dg, ISPCGeometry* geometry, int& materialID)
181 {
182 if (geometry->type == TRIANGLE_MESH)
183 {
184 ISPCTriangleMesh* mesh = (ISPCTriangleMesh*) geometry;
185 materialID = mesh->geom.materialID;
186 }
187 else if (geometry->type == QUAD_MESH)
188 {
189 ISPCQuadMesh* mesh = (ISPCQuadMesh*) geometry;
190 materialID = mesh->geom.materialID;
191 }
192 else if (geometry->type == GRID_MESH)
193 {
194 ISPCGridMesh* mesh = (ISPCGridMesh*) geometry;
195 materialID = mesh->geom.materialID;
196 }
197 else if (geometry->type == SUBDIV_MESH)
198 {
199 ISPCSubdivMesh* mesh = (ISPCSubdivMesh*) geometry;
200 materialID = mesh->geom.materialID;
201 }
202 else if (geometry->type == CURVES)
203 {
204 ISPCHairSet* mesh = (ISPCHairSet*) geometry;
205 materialID = mesh->geom.materialID;
206 }
207 else if (geometry->type == POINTS)
208 {
209 ISPCPointSet* mesh = (ISPCPointSet*) geometry;
210 materialID = mesh->geom.materialID;
211 }
212 else if (geometry->type == GROUP) {
213 auto geomID = ray.geomID; {
214 postIntersectGeometry(ray,dg,((ISPCGroup*) geometry)->geometries[geomID],materialID);
215 }
216 }
217 else
218 assert(false);
219 }
220
calculate_interpolated_space(ISPCInstance * instance,float gtime)221 AffineSpace3fa calculate_interpolated_space (ISPCInstance* instance, float gtime)
222 {
223 if (instance->numTimeSteps == 1)
224 return AffineSpace3fa(instance->spaces[0]);
225
226 /* calculate time segment itime and fractional time ftime */
227 const int time_segments = instance->numTimeSteps-1;
228 const float time = gtime*(float)(time_segments);
229 const int itime = clamp((int)(floor(time)),(int)0,time_segments-1);
230 const float ftime = time - (float)(itime);
231 return (1.0f-ftime)*AffineSpace3fa(instance->spaces[itime+0]) + ftime*AffineSpace3fa(instance->spaces[itime+1]);
232 }
233
234 typedef ISPCInstance* ISPCInstancePtr;
235
postIntersect(const Ray & ray,DifferentialGeometry & dg)236 inline int postIntersect(const Ray& ray, DifferentialGeometry& dg)
237 {
238 int materialID = 0;
239 auto instID = ray.instID[0]; {
240 auto geomID = ray.geomID; {
241 ISPCGeometry* geometry = nullptr;
242 if (g_instancing_mode != ISPC_INSTANCING_NONE) {
243 ISPCInstance* instance = (ISPCInstancePtr) g_ispc_scene->geometries[instID];
244 geometry = instance->child;
245 } else {
246 geometry = g_ispc_scene->geometries[geomID];
247 }
248 postIntersectGeometry(ray,dg,geometry,materialID);
249 }
250 }
251
252 if (g_instancing_mode != ISPC_INSTANCING_NONE)
253 {
254 auto instID = ray.instID[0];
255 {
256 /* get instance and geometry pointers */
257 ISPCInstance* instance = (ISPCInstancePtr) g_ispc_scene->geometries[instID];
258
259 /* convert normals */
260 //AffineSpace3fa space = (1.0f-ray.time())*AffineSpace3fa(instance->space0) + ray.time()*AffineSpace3fa(instance->space1);
261 AffineSpace3fa space = calculate_interpolated_space(instance,ray.time());
262 dg.Ng = xfmVector(space,dg.Ng);
263 dg.Ns = xfmVector(space,dg.Ns);
264 }
265 }
266
267 return materialID;
268 }
269
face_forward(const Vec3fa & dir,const Vec3fa & _Ng)270 inline Vec3fa face_forward(const Vec3fa& dir, const Vec3fa& _Ng) {
271 const Vec3fa Ng = _Ng;
272 return dot(dir,Ng) < 0.0f ? Ng : neg(Ng);
273 }
274
275 /* renders a single screen tile */
renderTileStandard(int taskIndex,int threadIndex,int * pixels,const unsigned int width,const unsigned int height,const float time,const ISPCCamera & camera,const int numTilesX,const int numTilesY)276 void renderTileStandard(int taskIndex,
277 int threadIndex,
278 int* pixels,
279 const unsigned int width,
280 const unsigned int height,
281 const float time,
282 const ISPCCamera& camera,
283 const int numTilesX,
284 const int numTilesY)
285 {
286 const unsigned int tileY = taskIndex / numTilesX;
287 const unsigned int tileX = taskIndex - tileY * numTilesX;
288 const unsigned int x0 = tileX * TILE_SIZE_X;
289 const unsigned int x1 = min(x0+TILE_SIZE_X,width);
290 const unsigned int y0 = tileY * TILE_SIZE_Y;
291 const unsigned int y1 = min(y0+TILE_SIZE_Y,height);
292
293 RayStats& stats = g_stats[threadIndex];
294
295 Ray rays[TILE_SIZE_X*TILE_SIZE_Y];
296
297 /* generate stream of primary rays */
298 int N = 0;
299 for (unsigned int y=y0; y<y1; y++) for (unsigned int x=x0; x<x1; x++)
300 {
301 /* ISPC workaround for mask == 0 */
302
303
304 RandomSampler sampler;
305 RandomSampler_init(sampler, x, y, 0);
306
307 /* initialize ray */
308 Ray& ray = rays[N++];
309 bool mask = 1; { // invalidates inactive rays
310 ray.tnear() = mask ? 0.0f : (float)(pos_inf);
311 ray.tfar = mask ? (float)(inf) : (float)(neg_inf);
312 }
313 init_Ray(ray, Vec3fa(camera.xfm.p), Vec3fa(normalize((float)x*camera.xfm.l.vx + (float)y*camera.xfm.l.vy + camera.xfm.l.vz)), ray.tnear(), ray.tfar, RandomSampler_get1D(sampler));
314
315 RayStats_addRay(stats);
316 }
317
318 RTCIntersectContext context;
319 rtcInitIntersectContext(&context);
320 context.flags = g_iflags_coherent;
321
322 /* trace stream of rays */
323 #if USE_INTERFACE == 0
324 rtcIntersect1M(g_scene,&context,(RTCRayHit*)&rays[0],N,sizeof(Ray));
325 #elif USE_INTERFACE == 1
326 for (unsigned int i=0; i<N; i++)
327 rtcIntersect1(g_scene,&context,RTCRayHit_(rays[i]));
328 #else
329 for (unsigned int i=0; i<N; i++)
330 rtcIntersect1M(g_scene,&context,(RTCRayHit*)&rays[i],1,sizeof(Ray));
331 #endif
332
333 /* shade stream of rays */
334 N = 0;
335 for (unsigned int y=y0; y<y1; y++) for (unsigned int x=x0; x<x1; x++)
336 {
337 /* ISPC workaround for mask == 0 */
338
339 Ray& ray = rays[N++];
340
341 /* eyelight shading */
342 Vec3fa color = Vec3fa(0.0f);
343 if (ray.geomID != RTC_INVALID_GEOMETRY_ID)
344 #if SIMPLE_SHADING == 1
345 {
346 #if OBJ_MATERIAL == 1
347 Vec3fa Kd = Vec3fa(0.5f);
348 DifferentialGeometry dg;
349 dg.geomID = ray.geomID;
350 dg.primID = ray.primID;
351 dg.u = ray.u;
352 dg.v = ray.v;
353 dg.P = ray.org+ray.tfar*ray.dir;
354 dg.Ng = ray.Ng;
355 dg.Ns = ray.Ng;
356 int materialID = postIntersect(ray,dg);
357 dg.Ng = face_forward(ray.dir,normalize(dg.Ng));
358 dg.Ns = face_forward(ray.dir,normalize(dg.Ns));
359
360 /* shade */
361 if (g_ispc_scene->materials[materialID]->type == MATERIAL_OBJ) {
362 ISPCOBJMaterial* material = (ISPCOBJMaterial*) g_ispc_scene->materials[materialID];
363 Kd = Vec3fa(material->Kd);
364 }
365
366 color = Kd*dot(neg(ray.dir),dg.Ns);
367 #else
368 color = Vec3fa(abs(dot(ray.dir,normalize(ray.Ng))));
369 #endif
370 }
371 #else
372 color = ambientOcclusionShading(x,y,ray,g_stats[threadIndex]);
373 #endif
374
375 /* write color to framebuffer */
376 unsigned int r = (unsigned int) (255.0f * clamp(color.x,0.0f,1.0f));
377 unsigned int g = (unsigned int) (255.0f * clamp(color.y,0.0f,1.0f));
378 unsigned int b = (unsigned int) (255.0f * clamp(color.z,0.0f,1.0f));
379 pixels[y*width+x] = (b << 16) + (g << 8) + r;
380 }
381 }
382
383 /* task that renders a single screen tile */
renderTileTask(int taskIndex,int threadIndex,int * pixels,const unsigned int width,const unsigned int height,const float time,const ISPCCamera & camera,const int numTilesX,const int numTilesY)384 void renderTileTask (int taskIndex, int threadIndex, int* pixels,
385 const unsigned int width,
386 const unsigned int height,
387 const float time,
388 const ISPCCamera& camera,
389 const int numTilesX,
390 const int numTilesY)
391 {
392 renderTileStandard(taskIndex,threadIndex,pixels,width,height,time,camera,numTilesX,numTilesY);
393 }
394
395 /* called by the C++ code for initialization */
device_init(char * cfg)396 extern "C" void device_init (char* cfg)
397 {
398 }
399
renderFrameStandard(int * pixels,const unsigned int width,const unsigned int height,const float time,const ISPCCamera & camera)400 extern "C" void renderFrameStandard (int* pixels,
401 const unsigned int width,
402 const unsigned int height,
403 const float time,
404 const ISPCCamera& camera)
405 {
406 const int numTilesX = (width +TILE_SIZE_X-1)/TILE_SIZE_X;
407 const int numTilesY = (height+TILE_SIZE_Y-1)/TILE_SIZE_Y;
408 parallel_for(size_t(0),size_t(numTilesX*numTilesY),[&](const range<size_t>& range) {
409 const int threadIndex = (int)TaskScheduler::threadIndex();
410 for (size_t i=range.begin(); i<range.end(); i++)
411 renderTileTask((int)i,threadIndex,pixels,width,height,time,camera,numTilesX,numTilesY);
412 });
413 }
414
415 /* called by the C++ code to render */
device_render(int * pixels,const unsigned int width,const unsigned int height,const float time,const ISPCCamera & camera)416 extern "C" void device_render (int* pixels,
417 const unsigned int width,
418 const unsigned int height,
419 const float time,
420 const ISPCCamera& camera)
421 {
422 /* create scene */
423 if (!g_scene) {
424 g_scene = convertScene(g_ispc_scene);
425 updateEdgeLevels(g_ispc_scene, camera.xfm.p);
426 rtcCommitScene (g_scene);
427 }
428 }
429
430 /* called by the C++ code for cleanup */
device_cleanup()431 extern "C" void device_cleanup ()
432 {
433 rtcReleaseScene (g_scene); g_scene = nullptr;
434 }
435
436 } // namespace embree
437