1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4
5 #include <vector>
6
7 #include <OpenImageIO/filesystem.h>
8 #include <OpenImageIO/sysutil.h>
9
10 #include <OSL/oslconfig.h>
11
12 #include "optixraytracer.h"
13 #include "render_params.h"
14
15 #ifdef OSL_USE_OPTIX
16 # if (OPTIX_VERSION >= 70000)
17 # include <optix_function_table_definition.h>
18 # include <optix_stack_size.h>
19 # include <optix_stubs.h>
20 # include <cuda.h>
21 # include <nvrtc.h>
22 # endif
23 #endif
24
25 // The pre-compiled renderer support library LLVM bitcode is embedded
26 // into the executable and made available through these variables.
27 extern int rend_llvm_compiled_ops_size;
28 extern unsigned char rend_llvm_compiled_ops_block[];
29
30
31 OSL_NAMESPACE_ENTER
32
33 #if (OPTIX_VERSION >= 70000)
34
35 #define CUDA_CHECK(call) \
36 { \
37 cudaError_t error = call; \
38 if (error != cudaSuccess) \
39 { \
40 std::stringstream ss; \
41 ss << "CUDA call (" << #call << " ) failed with error: '" \
42 << cudaGetErrorString( error ) \
43 << "' (" __FILE__ << ":" << __LINE__ << ")\n"; \
44 fprintf(stderr, "[CUDA ERROR] %s", ss.str().c_str() ); \
45 } \
46 }
47
48 #define NVRTC_CHECK(call) \
49 { \
50 nvrtcResult error = call; \
51 if (error != NVRTC_SUCCESS) \
52 { \
53 std::stringstream ss; \
54 ss << "NVRTC call (" << #call << " ) failed with error: '" \
55 << nvrtcGetErrorString( error ) \
56 << "' (" __FILE__ << ":" << __LINE__ << ")\n"; \
57 fprintf(stderr, "[NVRTC ERROR] %s", ss.str().c_str() ); \
58 } \
59 }
60
61
62 #define OPTIX_CHECK(call) \
63 { \
64 OptixResult res = call; \
65 if (res != OPTIX_SUCCESS) \
66 { \
67 std::stringstream ss; \
68 ss << "Optix call '" << #call << "' failed with error: " \
69 << optixGetErrorName( res ) \
70 << " (" __FILE__ ":" << __LINE__ << ")\n"; \
71 fprintf(stderr,"[OPTIX ERROR] %s", ss.str().c_str() ); \
72 exit(1); \
73 } \
74 }
75 #endif
76
77 #define CUDA_SYNC_CHECK() \
78 { \
79 cudaDeviceSynchronize(); \
80 cudaError_t error = cudaGetLastError(); \
81 if (error != cudaSuccess) { \
82 fprintf( stderr, "error (%s: line %d): %s\n", __FILE__, __LINE__, cudaGetErrorString( error ) ); \
83 exit(1); \
84 } \
85 }
86
87 #ifdef OSL_USE_OPTIX
88 #if (OPTIX_VERSION >= 70000)
context_log_cb(unsigned int level,const char * tag,const char * message,void *)89 static void context_log_cb( unsigned int level, const char* tag, const char* message, void* /*cbdata */ )
90 {
91 // std::cerr << "[ ** LOGCALLBACK** " << std::setw( 2 ) << level << "][" << std::setw( 12 ) << tag << "]: " << message << "\n";
92 }
93 #endif
94 #endif
95
OptixRaytracer()96 OptixRaytracer::OptixRaytracer ()
97 {
98 #ifdef OSL_USE_OPTIX
99 #if (OPTIX_VERSION >= 70000)
100 // Initialize CUDA
101 cudaFree(0);
102
103 CUcontext cuCtx = nullptr; // zero means take the current context
104
105 OptixDeviceContextOptions ctx_options = {};
106 ctx_options.logCallbackFunction = context_log_cb;
107 ctx_options.logCallbackLevel = 4;
108
109 OPTIX_CHECK (optixInit ());
110 OPTIX_CHECK (optixDeviceContextCreate (cuCtx, &ctx_options, &m_optix_ctx));
111
112 CUDA_CHECK (cudaSetDevice (0));
113 CUDA_CHECK (cudaStreamCreate (&m_cuda_stream));
114
115 // Set up the string table. This allocates a block of CUDA device memory to
116 // hold all of the static strings used by the OSL shaders. The strings can
117 // be accessed via OptiX variables that hold pointers to the table entries.
118 m_str_table.init(m_optix_ctx);
119
120 // Register all of our string table entries
121 for (auto &&gvar : m_str_table.contents())
122 register_global (gvar.first.c_str(), gvar.second);
123 #endif
124 #endif
125 }
126
~OptixRaytracer()127 OptixRaytracer::~OptixRaytracer ()
128 {
129 #ifdef OSL_USE_OPTIX
130 m_str_table.freetable();
131 #if (OPTIX_VERSION < 70000)
132 if (m_optix_ctx)
133 m_optix_ctx->destroy();
134 #else
135 if (m_optix_ctx)
136 OPTIX_CHECK (optixDeviceContextDestroy (m_optix_ctx));
137 #endif
138 #endif
139 }
140
141
142 uint64_t
register_global(const std::string & str,uint64_t value)143 OptixRaytracer::register_global (const std::string &str, uint64_t value)
144 {
145 auto it = m_globals_map.find (ustring(str));
146
147 if (it != m_globals_map.end()) {
148 return it->second;
149 }
150 m_globals_map[ustring(str)] = value;
151
152 return value;
153 }
154
155 bool
fetch_global(const std::string & str,uint64_t * value)156 OptixRaytracer::fetch_global (const std::string &str, uint64_t *value)
157 {
158 auto it = m_globals_map.find (ustring(str));
159
160 if (it != m_globals_map.end()) {
161 *value = it->second;
162 return true;
163 }
164 return false;
165 }
166
167 std::string
load_ptx_file(string_view filename)168 OptixRaytracer::load_ptx_file (string_view filename)
169 {
170 #ifdef OSL_USE_OPTIX
171 std::vector<std::string> paths = {
172 OIIO::Filesystem::parent_path(OIIO::Sysutil::this_program_path()),
173 PTX_PATH
174 };
175 std::string filepath = OIIO::Filesystem::searchpath_find (filename, paths,
176 false);
177 if (OIIO::Filesystem::exists(filepath)) {
178 std::string ptx_string;
179 if (OIIO::Filesystem::read_text_file (filepath, ptx_string))
180 return ptx_string;
181 }
182 #endif
183 errhandler().severe ("Unable to load %s", filename);
184 return {};
185 }
186
187
188
189 bool
init_optix_context(int xres OSL_MAYBE_UNUSED,int yres OSL_MAYBE_UNUSED)190 OptixRaytracer::init_optix_context (int xres OSL_MAYBE_UNUSED,
191 int yres OSL_MAYBE_UNUSED)
192 {
193 #ifdef OSL_USE_OPTIX
194
195 shadingsys->attribute ("lib_bitcode", {OSL::TypeDesc::UINT8, rend_llvm_compiled_ops_size},
196 rend_llvm_compiled_ops_block);
197
198
199 #if (OPTIX_VERSION < 70000)
200
201 // Set up the OptiX context
202 m_optix_ctx = optix::Context::create();
203
204 // Set up the string table. This allocates a block of CUDA device memory to
205 // hold all of the static strings used by the OSL shaders. The strings can
206 // be accessed via OptiX variables that hold pointers to the table entries.
207 m_str_table.init(m_optix_ctx);
208
209 if (m_optix_ctx->getEnabledDeviceCount() != 1)
210 errhandler().warning ("Only one CUDA device is currently supported");
211
212 m_optix_ctx->setRayTypeCount (2);
213 m_optix_ctx->setEntryPointCount (1);
214 m_optix_ctx->setStackSize (2048);
215 m_optix_ctx->setPrintEnabled (true);
216
217 // Load the renderer CUDA source and generate PTX for it
218 std::string progName = "optix_raytracer.ptx";
219 std::string renderer_ptx = load_ptx_file(progName);
220 if (renderer_ptx.empty()) {
221 errhandler().severe ("Could not find PTX for the raygen program");
222 return false;
223 }
224
225 // Create the OptiX programs and set them on the optix::Context
226 m_program = m_optix_ctx->createProgramFromPTXString(renderer_ptx, "raygen");
227 m_optix_ctx->setRayGenerationProgram(0, m_program);
228
229 if (scene.num_prims()) {
230 m_optix_ctx["radiance_ray_type"]->setUint (0u);
231 m_optix_ctx["shadow_ray_type" ]->setUint (1u);
232 m_optix_ctx["bg_color" ]->setFloat (0.0f, 0.0f, 0.0f);
233 m_optix_ctx["bad_color" ]->setFloat (1.0f, 0.0f, 1.0f);
234
235 // Create the OptiX programs and set them on the optix::Context
236 if (renderer_ptx.size()) {
237 m_optix_ctx->setMissProgram (0, m_optix_ctx->createProgramFromPTXString (renderer_ptx, "miss"));
238 m_optix_ctx->setExceptionProgram (0, m_optix_ctx->createProgramFromPTXString (renderer_ptx, "exception"));
239 }
240
241 // Load the PTX for the wrapper program. It will be used to
242 // create OptiX Materials from the OSL ShaderGroups
243 m_materials_ptx = load_ptx_file ("wrapper.ptx");
244 if (m_materials_ptx.empty())
245 return false;
246
247 // Load the PTX for the primitives
248 std::string sphere_ptx = load_ptx_file ("sphere.ptx");
249 std::string quad_ptx = load_ptx_file ("quad.ptx");
250 if (sphere_ptx.empty() || quad_ptx.empty())
251 return false;
252
253 // Create the sphere and quad intersection programs.
254 sphere_bounds = m_optix_ctx->createProgramFromPTXString (sphere_ptx, "bounds");
255 quad_bounds = m_optix_ctx->createProgramFromPTXString (quad_ptx, "bounds");
256 sphere_intersect = m_optix_ctx->createProgramFromPTXString (sphere_ptx, "intersect");
257 quad_intersect = m_optix_ctx->createProgramFromPTXString (quad_ptx, "intersect");
258 }
259
260 #endif //#if (OPTIX_VERSION < 70000)
261
262 #endif
263 return true;
264 }
265
266
267
268 bool
synch_attributes()269 OptixRaytracer::synch_attributes ()
270 {
271 #ifdef OSL_USE_OPTIX
272
273 #if (OPTIX_VERSION < 70000)
274 // FIXME -- this is for testing only
275 // Make some device strings to test userdata parameters
276 uint64_t addr1 = register_string ("ud_str_1", "");
277 uint64_t addr2 = register_string ("userdata string", "");
278 m_optix_ctx["test_str_1"]->setUserData (sizeof(char*), &addr1);
279 m_optix_ctx["test_str_2"]->setUserData (sizeof(char*), &addr2);
280
281 {
282 const char* name = OSL_NAMESPACE_STRING "::pvt::s_color_system";
283
284 char* colorSys = nullptr;
285 long long cpuDataSizes[2] = {0,0};
286 if (!shadingsys->getattribute("colorsystem", TypeDesc::PTR, (void*)&colorSys) ||
287 !shadingsys->getattribute("colorsystem:sizes", TypeDesc(TypeDesc::LONGLONG,2), (void*)&cpuDataSizes) ||
288 !colorSys || !cpuDataSizes[0]) {
289 errhandler().error ("No colorsystem available.");
290 return false;
291 }
292 auto cpuDataSize = cpuDataSizes[0];
293 auto numStrings = cpuDataSizes[1];
294
295 // Get the size data-size, minus the ustring size
296 const size_t podDataSize = cpuDataSize - sizeof(StringParam)*numStrings;
297
298 optix::Buffer buffer = m_optix_ctx->createBuffer(RT_BUFFER_INPUT, RT_FORMAT_USER);
299 if (!buffer) {
300 errhandler().error ("Could not create buffer for '%s'.", name);
301 return false;
302 }
303
304 // set the element size to char
305 buffer->setElementSize(sizeof(char));
306
307 // and number of elements to the actual size needed.
308 buffer->setSize(podDataSize + sizeof(DeviceString)*numStrings);
309
310 // copy the base data
311 char* gpuData = (char*) buffer->map();
312 if (!gpuData) {
313 errhandler().error ("Could not map buffer for '%s' (size: %lu).",
314 name, podDataSize + sizeof(DeviceString)*numStrings);
315 return false;
316 }
317 ::memcpy(gpuData, colorSys, podDataSize);
318
319 // then copy the device string to the end, first strings starting at dataPtr - (numStrings)
320 // FIXME -- Should probably handle alignment better.
321 const ustring* cpuString = (const ustring*)(colorSys + (cpuDataSize - sizeof(StringParam)*numStrings));
322 char* gpuStrings = gpuData + podDataSize;
323 for (const ustring* end = cpuString + numStrings; cpuString < end; ++cpuString) {
324 // convert the ustring to a device string
325 uint64_t devStr = register_string (cpuString->string(), "");
326 ::memcpy(gpuStrings, &devStr, sizeof(devStr));
327 gpuStrings += sizeof(DeviceString);
328 }
329
330 buffer->unmap();
331 m_optix_ctx[name]->setBuffer(buffer);
332 }
333 #else // #if (OPTIX_VERSION < 70000)
334
335 // FIXME -- this is for testing only
336 // Make some device strings to test userdata parameters
337 uint64_t addr1 = register_string ("ud_str_1", "");
338 uint64_t addr2 = register_string ("userdata string", "");
339
340 // Store the user-data
341 register_global("test_str_1", addr1);
342 register_global("test_str_2", addr2);
343
344 {
345 char* colorSys = nullptr;
346 long long cpuDataSizes[2] = {0,0};
347 if (!shadingsys->getattribute("colorsystem", TypeDesc::PTR, (void*)&colorSys) ||
348 !shadingsys->getattribute("colorsystem:sizes", TypeDesc(TypeDesc::LONGLONG,2), (void*)&cpuDataSizes) ||
349 !colorSys || !cpuDataSizes[0]) {
350 errhandler().error ("No colorsystem available.");
351 return false;
352 }
353
354 const char* name = OSL_NAMESPACE_STRING "::pvt::s_color_system";
355
356 auto cpuDataSize = cpuDataSizes[0];
357 auto numStrings = cpuDataSizes[1];
358
359 // Get the size data-size, minus the ustring size
360 const size_t podDataSize = cpuDataSize - sizeof(StringParam)*numStrings;
361
362 CUdeviceptr d_buffer;
363 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_buffer), podDataSize + sizeof (DeviceString)*numStrings));
364 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_buffer), colorSys, podDataSize, cudaMemcpyHostToDevice));
365
366 // then copy the device string to the end, first strings starting at dataPtr - (numStrings)
367 // FIXME -- Should probably handle alignment better.
368 const ustring* cpuString = (const ustring*)(colorSys + (cpuDataSize - sizeof (StringParam)*numStrings));
369 CUdeviceptr gpuStrings = d_buffer + podDataSize;
370 for (const ustring* end = cpuString + numStrings; cpuString < end; ++cpuString) {
371 // convert the ustring to a device string
372 uint64_t devStr = register_string (cpuString->string(), "");
373 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(gpuStrings), &devStr, sizeof (devStr), cudaMemcpyHostToDevice));
374 gpuStrings += sizeof (DeviceString);
375 }
376 register_global(name, d_buffer);
377 }
378 #endif // #if (OPTIX_VERSION < 70000)
379 #endif
380 return true;
381 }
382
383 #if (OPTIX_VERSION >= 70000)
384 bool
load_optix_module(const char * filename,const OptixModuleCompileOptions * module_compile_options,const OptixPipelineCompileOptions * pipeline_compile_options,OptixModule * program_module)385 OptixRaytracer::load_optix_module (const char* filename,
386 const OptixModuleCompileOptions* module_compile_options,
387 const OptixPipelineCompileOptions* pipeline_compile_options,
388 OptixModule* program_module)
389 {
390 char msg_log[8192];
391
392 // Load the renderer CUDA source and generate PTX for it
393 std::string program_ptx = load_ptx_file(filename);
394 if (program_ptx.empty()) {
395 errhandler().severe ("Could not find PTX file: %s", filename);
396 return false;
397 }
398
399 size_t sizeof_msg_log = sizeof(msg_log);
400 OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
401 module_compile_options,
402 pipeline_compile_options,
403 program_ptx.c_str(),
404 program_ptx.size(),
405 msg_log, &sizeof_msg_log,
406 program_module));
407 //if (sizeof_msg_log > 1)
408 // printf ("Creating Module from PTX-file %s:\n%s\n", filename, msg_log);
409 return true;
410 }
411
412 bool
create_optix_pg(const OptixProgramGroupDesc * pg_desc,const int num_pg,OptixProgramGroupOptions * program_options,OptixProgramGroup * pg)413 OptixRaytracer::create_optix_pg(const OptixProgramGroupDesc* pg_desc,
414 const int num_pg,
415 OptixProgramGroupOptions* program_options,
416 OptixProgramGroup* pg)
417 {
418 char msg_log[8192];
419 size_t sizeof_msg_log = sizeof(msg_log);
420 OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
421 pg_desc,
422 num_pg,
423 program_options,
424 msg_log, &sizeof_msg_log,
425 pg));
426 //if (sizeof_msg_log > 1)
427 // printf ("Creating program group:\n%s\n", msg_log);
428
429 return true;
430 }
431 #endif
432
433 bool
make_optix_materials()434 OptixRaytracer::make_optix_materials ()
435 {
436 #ifdef OSL_USE_OPTIX
437
438 #if (OPTIX_VERSION < 70000)
439
440 optix::Program closest_hit, any_hit;
441 if (scene.num_prims()) {
442 closest_hit = m_optix_ctx->createProgramFromPTXString(
443 m_materials_ptx, "closest_hit_osl");
444 any_hit = m_optix_ctx->createProgramFromPTXString(
445 m_materials_ptx, "any_hit_shadow");
446 }
447
448 // Stand-in: names of shader outputs to preserve
449 // FIXME
450 std::vector<const char*> outputs { "Cout" };
451
452 // Optimize each ShaderGroup in the scene, and use the resulting
453 // PTX to create OptiX Programs which can be called by the closest
454 // hit program in the wrapper to execute the compiled OSL shader.
455 int mtl_id = 0;
456 for (const auto& groupref : shaders()) {
457 shadingsys->attribute (groupref.get(), "renderer_outputs",
458 TypeDesc(TypeDesc::STRING, outputs.size()),
459 outputs.data());
460
461 shadingsys->optimize_group (groupref.get(), nullptr);
462
463 if (!scene.num_prims()) {
464 if (!shadingsys->find_symbol (*groupref.get(), ustring(outputs[0]))) {
465 errhandler().warning ("Requested output '%s', which wasn't found",
466 outputs[0]);
467 }
468 }
469
470 std::string group_name, init_name, entry_name;
471 shadingsys->getattribute (groupref.get(), "groupname", group_name);
472 shadingsys->getattribute (groupref.get(), "group_init_name", init_name);
473 shadingsys->getattribute (groupref.get(), "group_entry_name", entry_name);
474
475 // Retrieve the compiled ShaderGroup PTX
476 std::string osl_ptx;
477 shadingsys->getattribute (groupref.get(), "ptx_compiled_version",
478 OSL::TypeDesc::PTR, &osl_ptx);
479
480 if (osl_ptx.empty()) {
481 errhandler().error ("Failed to generate PTX for ShaderGroup %s",
482 group_name);
483 return false;
484 }
485
486 if (options.get_int("saveptx")) {
487 std::string filename = OIIO::Strutil::sprintf("%s_%d.ptx", group_name,
488 mtl_id++);
489 OIIO::ofstream out;
490 OIIO::Filesystem::open (out, filename);
491 out << osl_ptx;
492 }
493
494 // Create Programs from the init and group_entry functions,
495 // and set the OSL functions as Callable Programs so that they
496 // can be executed by the closest hit program in the wrapper
497 optix::Program osl_init = m_optix_ctx->createProgramFromPTXString (
498 osl_ptx, init_name);
499 optix::Program osl_group = m_optix_ctx->createProgramFromPTXString (
500 osl_ptx, entry_name);
501 if (scene.num_prims()) {
502 // Create a new Material using the wrapper PTX
503 optix::Material mtl = m_optix_ctx->createMaterial();
504 mtl->setClosestHitProgram (0, closest_hit);
505 mtl->setAnyHitProgram (1, any_hit);
506
507 // Set the OSL functions as Callable Programs so that they can be
508 // executed by the closest hit program in the wrapper
509 mtl["osl_init_func" ]->setProgramId (osl_init );
510 mtl["osl_group_func"]->setProgramId (osl_group);
511 scene.optix_mtls.push_back(mtl);
512 } else {
513 // Grid shading
514 m_program["osl_init_func" ]->setProgramId (osl_init );
515 m_program["osl_group_func"]->setProgramId (osl_group);
516 }
517 }
518 if (!synch_attributes())
519 return false;
520
521 #else //#if (OPTIX_VERSION < 70000)
522 // Stand-in: names of shader outputs to preserve
523 std::vector<const char*> outputs { "Cout" };
524
525 std::vector<OptixModule> modules;
526
527 // Space for mesage logging
528 char msg_log[8192];
529 size_t sizeof_msg_log;
530
531 // Make module that contains programs we'll use in this scene
532 OptixModuleCompileOptions module_compile_options = {};
533
534 module_compile_options.maxRegisterCount = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT;
535 module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_DEFAULT;
536 module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
537
538 OptixPipelineCompileOptions pipeline_compile_options = {};
539
540 pipeline_compile_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY;
541 pipeline_compile_options.usesMotionBlur = false;
542 pipeline_compile_options.numPayloadValues = 3;
543 pipeline_compile_options.numAttributeValues = 3;
544 pipeline_compile_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW;
545 pipeline_compile_options.pipelineLaunchParamsVariableName = "render_params";
546
547 // Create 'raygen' program
548
549 // Load the renderer CUDA source and generate PTX for it
550 OptixModule program_module;
551 load_optix_module("optix_raytracer.ptx", &module_compile_options,
552 &pipeline_compile_options,
553 &program_module);
554
555 // Record it so we can destroy it later
556 modules.push_back(program_module);
557
558 OptixModule quad_module;
559 load_optix_module("quad.ptx", &module_compile_options,
560 &pipeline_compile_options,
561 &quad_module);
562
563 OptixModule sphere_module;
564 load_optix_module("sphere.ptx", &module_compile_options,
565 &pipeline_compile_options,
566 &sphere_module);
567
568
569 OptixModule wrapper_module;
570 load_optix_module("wrapper.ptx", &module_compile_options,
571 &pipeline_compile_options,
572 &wrapper_module);
573
574
575 OptixProgramGroupOptions program_options = {};
576 std::vector<OptixProgramGroup> shader_groups;
577
578 // Raygen group
579 OptixProgramGroupDesc raygen_desc = {};
580 raygen_desc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
581 raygen_desc.raygen.module = program_module;
582 raygen_desc.raygen.entryFunctionName = "__raygen__";
583
584 OptixProgramGroup raygen_group;
585 create_optix_pg(&raygen_desc, 1, &program_options, &raygen_group);
586
587 // Miss group
588 OptixProgramGroupDesc miss_desc = {};
589 miss_desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
590 miss_desc.miss.module = program_module; // raygen file/module contains miss program
591 miss_desc.miss.entryFunctionName = "__miss__";
592
593 OptixProgramGroup miss_group;
594 create_optix_pg(&miss_desc, 1, &program_options, &miss_group);
595
596 // Hitgroup -- quads
597 OptixProgramGroupDesc quad_hitgroup_desc = {};
598 quad_hitgroup_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
599 quad_hitgroup_desc.hitgroup.moduleCH = wrapper_module;
600 quad_hitgroup_desc.hitgroup.entryFunctionNameCH = "__closesthit__closest_hit_osl";
601 quad_hitgroup_desc.hitgroup.moduleAH = wrapper_module;
602 quad_hitgroup_desc.hitgroup.entryFunctionNameAH = "__anyhit__any_hit_shadow";
603 quad_hitgroup_desc.hitgroup.moduleIS = quad_module;
604 quad_hitgroup_desc.hitgroup.entryFunctionNameIS = "__intersection__quad";
605 OptixProgramGroup quad_hitgroup;
606 create_optix_pg(&quad_hitgroup_desc, 1, &program_options, &quad_hitgroup);
607
608 // Direct-callable -- fills in ShaderGlobals for Quads
609 OptixProgramGroupDesc quad_fillSG_desc = {};
610 quad_fillSG_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
611 quad_fillSG_desc.callables.moduleDC = quad_module;
612 quad_fillSG_desc.callables.entryFunctionNameDC = "__direct_callable__quad_shaderglobals";
613 quad_fillSG_desc.callables.moduleCC = 0;
614 quad_fillSG_desc.callables.entryFunctionNameCC = nullptr;
615 OptixProgramGroup quad_fillSG_dc;
616 create_optix_pg(&quad_fillSG_desc, 1, &program_options, &quad_fillSG_dc);
617
618 // Hitgroup -- sphere
619 OptixProgramGroupDesc sphere_hitgroup_desc = {};
620 sphere_hitgroup_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
621 sphere_hitgroup_desc.hitgroup.moduleCH = wrapper_module;
622 sphere_hitgroup_desc.hitgroup.entryFunctionNameCH = "__closesthit__closest_hit_osl";
623 sphere_hitgroup_desc.hitgroup.moduleAH = wrapper_module;
624 sphere_hitgroup_desc.hitgroup.entryFunctionNameAH = "__anyhit__any_hit_shadow";
625 sphere_hitgroup_desc.hitgroup.moduleIS = sphere_module;
626 sphere_hitgroup_desc.hitgroup.entryFunctionNameIS = "__intersection__sphere";
627 OptixProgramGroup sphere_hitgroup;
628 create_optix_pg(&sphere_hitgroup_desc, 1, &program_options, &sphere_hitgroup);
629
630 // Direct-callable -- fills in ShaderGlobals for Sphere
631 OptixProgramGroupDesc sphere_fillSG_desc = {};
632 sphere_fillSG_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
633 sphere_fillSG_desc.callables.moduleDC = sphere_module;
634 sphere_fillSG_desc.callables.entryFunctionNameDC = "__direct_callable__sphere_shaderglobals";
635 sphere_fillSG_desc.callables.moduleCC = 0;
636 sphere_fillSG_desc.callables.entryFunctionNameCC = nullptr;
637 OptixProgramGroup sphere_fillSG_dc;
638 create_optix_pg(&sphere_fillSG_desc, 1, &program_options, &sphere_fillSG_dc);
639
640 // Create materials
641 int mtl_id = 0;
642 for (const auto& groupref : shaders()) {
643 std::string group_name, init_name, entry_name;
644 shadingsys->getattribute (groupref.get(), "groupname", group_name);
645 shadingsys->getattribute (groupref.get(), "group_init_name", init_name);
646 shadingsys->getattribute (groupref.get(), "group_entry_name", entry_name);
647
648 shadingsys->attribute (groupref.get(), "renderer_outputs",
649 TypeDesc(TypeDesc::STRING, outputs.size()),
650 outputs.data());
651 shadingsys->optimize_group (groupref.get(), nullptr);
652
653 if (!shadingsys->find_symbol (*groupref.get(), ustring(outputs[0]))) {
654 // FIXME: This is for cases where testshade is run with 1x1 resolution
655 // Those tests may not have a Cout parameter to write to.
656 if (m_xres > 1 && m_yres > 1) {
657 errhandler().warning ("Requested output '%s', which wasn't found",
658 outputs[0]);
659 }
660 }
661
662 // Retrieve the compiled ShaderGroup PTX
663 std::string osl_ptx;
664 shadingsys->getattribute (groupref.get(), "ptx_compiled_version",
665 OSL::TypeDesc::PTR, &osl_ptx);
666
667 if (osl_ptx.empty()) {
668 errhandler().error ("Failed to generate PTX for ShaderGroup %s",
669 group_name);
670 return false;
671 }
672
673 if (options.get_int ("saveptx")) {
674 std::string filename = OIIO::Strutil::sprintf("%s_%d.ptx", group_name,
675 mtl_id++);
676 OIIO::ofstream out;
677 OIIO::Filesystem::open (out, filename);
678 out << osl_ptx;
679 }
680
681 OptixModule optix_module;
682
683 // Create Programs from the init and group_entry functions,
684 // and set the OSL functions as Callable Programs so that they
685 // can be executed by the closest hit program in the wrapper
686 sizeof_msg_log = sizeof(msg_log);
687 OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
688 &module_compile_options,
689 &pipeline_compile_options,
690 osl_ptx.c_str(),
691 osl_ptx.size(),
692 msg_log, &sizeof_msg_log,
693 &optix_module));
694 //if (sizeof_msg_log > 1)
695 // printf ("Creating module for PTX group '%s':\n%s\n", group_name.c_str(), msg_log);
696 modules.push_back(optix_module);
697
698 // Create 2x program groups (for direct callables)
699 OptixProgramGroupDesc pgDesc[2] = {};
700 pgDesc[0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
701 pgDesc[0].callables.moduleDC = optix_module;
702 pgDesc[0].callables.entryFunctionNameDC = init_name.c_str();
703 pgDesc[0].callables.moduleCC = 0;
704 pgDesc[0].callables.entryFunctionNameCC = nullptr;
705 pgDesc[1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
706 pgDesc[1].callables.moduleDC = optix_module;
707 pgDesc[1].callables.entryFunctionNameDC = entry_name.c_str();
708 pgDesc[1].callables.moduleCC = 0;
709 pgDesc[1].callables.entryFunctionNameCC = nullptr;
710
711 shader_groups.resize(shader_groups.size() + 2);
712 sizeof_msg_log = sizeof(msg_log);
713 OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
714 &pgDesc[0],
715 2,
716 &program_options,
717 msg_log, &sizeof_msg_log,
718 &shader_groups[shader_groups.size() - 2]));
719 //if (sizeof_msg_log > 1)
720 // printf ("Creating 'shader' group for group '%s':\n%s\n", group_name.c_str(), msg_log);
721 }
722
723
724 OptixPipelineLinkOptions pipeline_link_options;
725 pipeline_link_options.maxTraceDepth = 1;
726 pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
727 #if (OPTIX_VERSION < 70100)
728 pipeline_link_options.overrideUsesMotionBlur = false;
729 #endif
730
731 // Build string-table "library"
732 nvrtcProgram str_lib;
733
734 auto extractNamespaces = [] (const OIIO::ustring &s) {
735 const char *str = s.c_str();
736 std::vector<std::string> ns;
737 do {
738 const char *begin = str;
739 // get to first ':'
740 while (*str != ':' && *str)
741 str++;
742 ns.push_back (std::string (begin, str));
743 // advance to second ':'
744 if (*str && *str == ':')
745 str++;
746 } while (*str++ != 0);
747 return ns;
748 };
749
750 std::stringstream strlib_ss;
751
752 strlib_ss << "// so things name-mangle properly\n";
753 strlib_ss << "struct DeviceString {\n";
754 strlib_ss << " const char *data;\n";
755 strlib_ss << "};\n";
756 strlib_ss << "\n";
757
758 // write out all the global strings
759 for (auto &&gvar : m_globals_map) {
760 std::vector<std::string> var_ns = extractNamespaces(gvar.first);
761
762 // build namespace
763 for (size_t i = 0; i < var_ns.size() - 1; i++)
764 strlib_ss << "namespace " << var_ns[i] << " {\n";
765
766 strlib_ss << "__device__ DeviceString " << var_ns.back() << " = { (const char *)" << gvar.second << "};\n";
767 // close namespace up
768 for (size_t i = 0; i < var_ns.size() - 1; i++)
769 strlib_ss << "}\n";
770 }
771
772 strlib_ss << "extern \"C\" __global__ void __direct_callable__strlib_dummy(int *j)\n";
773 strlib_ss << "{\n";
774 strlib_ss << " // must have a __direct_callable__ function for the module to compile\n";
775 strlib_ss << "}\n";
776
777 // XXX: Should this move to compute_60 (compute_35-compute_50 is now deprecated)
778 const char *cuda_compile_options[] = { "--gpu-architecture=compute_35" ,
779 "--use_fast_math" ,
780 "-dc" ,
781 #if OSL_CPLUSPLUS_VERSION >= 14
782 "--std=c++14"
783 #else
784 "--std=c++11"
785 #endif
786 };
787
788 int num_compile_flags = int(sizeof(cuda_compile_options) / sizeof(cuda_compile_options[0]));
789 size_t str_lib_size, cuda_log_size;
790
791
792 std::string cuda_string = strlib_ss.str();
793
794 NVRTC_CHECK (nvrtcCreateProgram (&str_lib,
795 cuda_string.c_str(),
796 "cuda_strng_library",
797 0, // number of headers
798 nullptr, // header paths
799 nullptr)); // header files
800 nvrtcResult compileResult = nvrtcCompileProgram (str_lib, num_compile_flags, cuda_compile_options);
801 if (compileResult != NVRTC_SUCCESS) {
802 NVRTC_CHECK (nvrtcGetProgramLogSize (str_lib, &cuda_log_size));
803 std::vector<char> cuda_log(cuda_log_size+1);
804 NVRTC_CHECK (nvrtcGetProgramLog (str_lib, cuda_log.data()));
805 cuda_log.back() = 0;
806 errhandler().error ("nvrtcCompileProgram failure for:\n%s\n"
807 "====================================\n"
808 "%s\n", cuda_string.c_str(), cuda_log.data());
809 return false;
810 }
811
812
813 NVRTC_CHECK (nvrtcGetPTXSize (str_lib, &str_lib_size));
814 std::vector<char> str_lib_ptx (str_lib_size);
815 NVRTC_CHECK (nvrtcGetPTX (str_lib, str_lib_ptx.data()));
816 NVRTC_CHECK (nvrtcDestroyProgram (&str_lib));
817
818 std::string strlib_string (str_lib_ptx.begin(), str_lib_ptx.end());
819
820 OptixModule strlib_module;
821 sizeof_msg_log = sizeof(msg_log);
822 OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
823 &module_compile_options,
824 &pipeline_compile_options,
825 str_lib_ptx.data(),
826 str_lib_ptx.size(),
827 msg_log, &sizeof_msg_log,
828 &strlib_module));
829 //if (sizeof_msg_log > 1)
830 // printf ("Creating module from string-library PTX:\n%s\n", msg_log);
831
832 OptixProgramGroupDesc strlib_pg_desc = {};
833 strlib_pg_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
834 strlib_pg_desc.callables.moduleDC = strlib_module;
835 strlib_pg_desc.callables.entryFunctionNameDC = "__direct_callable__strlib_dummy";
836 strlib_pg_desc.callables.moduleCC = 0;
837 strlib_pg_desc.callables.entryFunctionNameCC = nullptr;
838
839 OptixProgramGroup strlib_group;
840
841 sizeof_msg_log = sizeof(msg_log);
842 OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
843 &strlib_pg_desc,
844 1,
845 &program_options,
846 msg_log, &sizeof_msg_log,
847 &strlib_group));
848 //if (sizeof_msg_log > 1)
849 // printf ("Creating program group for string-library:\n%s\n", msg_log);
850
851 // Set up OptiX pipeline
852 std::vector<OptixProgramGroup> final_groups = {
853 strlib_group, // string globals
854 raygen_group,
855 miss_group
856 };
857
858 if (scene.quads.size() > 0)
859 final_groups.push_back (quad_hitgroup);
860 if (scene.spheres.size() > 0)
861 final_groups.push_back (sphere_hitgroup);
862
863 final_groups.push_back (quad_fillSG_dc);
864 final_groups.push_back (sphere_fillSG_dc);
865
866 // append the shader groups to our "official" list of program groups
867 final_groups.insert (final_groups.end(), shader_groups.begin(), shader_groups.end());
868
869 sizeof_msg_log = sizeof(msg_log);
870 OPTIX_CHECK (optixPipelineCreate (m_optix_ctx,
871 &pipeline_compile_options,
872 &pipeline_link_options,
873 final_groups.data(),
874 int(final_groups.size()),
875 msg_log, &sizeof_msg_log,
876 &m_optix_pipeline));
877 //if (sizeof_msg_log > 1)
878 // printf ("Creating optix pipeline:\n%s\n", msg_log);
879
880 // Set the pipeline stack size
881 OptixStackSizes stack_sizes = {};
882 for( OptixProgramGroup& program_group : final_groups )
883 OPTIX_CHECK (optixUtilAccumulateStackSizes (program_group, &stack_sizes));
884
885 uint32_t max_trace_depth = 1;
886 uint32_t max_cc_depth = 1;
887 uint32_t max_dc_depth = 1;
888 uint32_t direct_callable_stack_size_from_traversal;
889 uint32_t direct_callable_stack_size_from_state;
890 uint32_t continuation_stack_size;
891 OPTIX_CHECK (optixUtilComputeStackSizes (&stack_sizes,
892 max_trace_depth,
893 max_cc_depth,
894 max_dc_depth,
895 &direct_callable_stack_size_from_traversal,
896 &direct_callable_stack_size_from_state,
897 &continuation_stack_size ) );
898
899 const uint32_t max_traversal_depth = 1;
900 OPTIX_CHECK (optixPipelineSetStackSize (m_optix_pipeline,
901 direct_callable_stack_size_from_traversal,
902 direct_callable_stack_size_from_state,
903 continuation_stack_size,
904 max_traversal_depth ));
905
906 // Build OptiX Shader Binding Table (SBT)
907
908 std::vector<GenericRecord> sbt_records(final_groups.size());
909
910 CUdeviceptr d_raygen_record;
911 CUdeviceptr d_miss_record;
912 CUdeviceptr d_hitgroup_records;
913 CUdeviceptr d_callable_records;
914
915 std::vector<CUdeviceptr> d_sbt_records(final_groups.size());
916
917 for (size_t i = 0; i < final_groups.size(); i++) {
918 OPTIX_CHECK (optixSbtRecordPackHeader (final_groups[i], &sbt_records[i]));
919 }
920
921 int sbtIndex = 3;
922 const int hitRecordStart = sbtIndex;
923
924 // Copy geometry data to appropriate SBT records
925 if (scene.quads.size() > 0 ) {
926 sbt_records[sbtIndex].data = reinterpret_cast<void *>(d_quads_list);
927 sbt_records[sbtIndex].sbtGeoIndex = 0; // DC index for filling in Quad ShaderGlobals
928 ++sbtIndex;
929 }
930
931 if (scene.spheres.size() > 0 ) {
932 sbt_records[sbtIndex].data = reinterpret_cast<void *>(d_spheres_list);
933 sbt_records[sbtIndex].sbtGeoIndex = 1; // DC index for filling in Sphere ShaderGlobals
934 ++sbtIndex;
935 }
936
937 const int callableRecordStart = sbtIndex;
938
939 // Copy geometry data to our DC (direct-callable) funcs that fill ShaderGlobals
940 sbt_records[sbtIndex++].data = reinterpret_cast<void *>(d_quads_list);
941 sbt_records[sbtIndex++].data = reinterpret_cast<void *>(d_spheres_list);
942
943 const int nshaders = int(shader_groups.size());
944 const int nhitgroups = (scene.quads.size() > 0) + (scene.spheres.size() > 0);
945
946 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_raygen_record) , sizeof(GenericRecord)));
947 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_miss_record) , sizeof(GenericRecord)));
948 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_hitgroup_records) , nhitgroups * sizeof(GenericRecord)));
949 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_callable_records) , (2 + nshaders) * sizeof(GenericRecord)));
950
951 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_raygen_record) , &sbt_records[1], sizeof(GenericRecord), cudaMemcpyHostToDevice));
952 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_miss_record) , &sbt_records[2], sizeof(GenericRecord), cudaMemcpyHostToDevice));
953 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_hitgroup_records), &sbt_records[hitRecordStart], nhitgroups * sizeof(GenericRecord), cudaMemcpyHostToDevice));
954 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_callable_records), &sbt_records[callableRecordStart], (2 + nshaders) * sizeof(GenericRecord), cudaMemcpyHostToDevice));
955
956 // Looks like OptixShadingTable needs to be filled out completely
957 m_optix_sbt.raygenRecord = d_raygen_record;
958 m_optix_sbt.missRecordBase = d_miss_record;
959 m_optix_sbt.missRecordStrideInBytes = sizeof(GenericRecord);
960 m_optix_sbt.missRecordCount = 1;
961 m_optix_sbt.hitgroupRecordBase = d_hitgroup_records;
962 m_optix_sbt.hitgroupRecordStrideInBytes = sizeof(GenericRecord);
963 m_optix_sbt.hitgroupRecordCount = nhitgroups;
964 m_optix_sbt.callablesRecordBase = d_callable_records;
965 m_optix_sbt.callablesRecordStrideInBytes = sizeof(GenericRecord);
966 m_optix_sbt.callablesRecordCount = 2 + nshaders;
967
968 // Pipeline has been created so we can clean some things up
969 for (auto &&i : final_groups) {
970 optixProgramGroupDestroy(i);
971 }
972 for (auto &&i : modules) {
973 optixModuleDestroy(i);
974 }
975 modules.clear();
976
977
978 #endif //#if (OPTIX_VERSION < 70000)
979
980 #endif
981 return true;
982 }
983
984 bool
finalize_scene()985 OptixRaytracer::finalize_scene()
986 {
987 #ifdef OSL_USE_OPTIX
988
989 #if (OPTIX_VERSION < 70000)
990 make_optix_materials();
991
992 // Create a GeometryGroup to contain the scene geometry
993 optix::GeometryGroup geom_group = m_optix_ctx->createGeometryGroup();
994
995 m_optix_ctx["top_object" ]->set (geom_group);
996 m_optix_ctx["top_shadower"]->set (geom_group);
997
998 // NB: Since the scenes in the test suite consist of only a few primitives,
999 // using 'NoAccel' instead of 'Trbvh' might yield a slight performance
1000 // improvement. For more complex scenes (e.g., scenes with meshes),
1001 // using 'Trbvh' is recommended to achieve maximum performance.
1002 geom_group->setAcceleration (m_optix_ctx->createAcceleration ("Trbvh"));
1003
1004 // Translate the primitives parsed from the scene description into OptiX scene
1005 // objects
1006 for (const auto& sphere : scene.spheres) {
1007 optix::Geometry sphere_geom = m_optix_ctx->createGeometry();
1008 sphere.setOptixVariables (sphere_geom, sphere_bounds, sphere_intersect);
1009
1010 optix::GeometryInstance sphere_gi = m_optix_ctx->createGeometryInstance (
1011 sphere_geom, &scene.optix_mtls[sphere.shaderid()], &scene.optix_mtls[sphere.shaderid()]+1);
1012
1013 geom_group->addChild (sphere_gi);
1014 }
1015
1016 for (const auto& quad : scene.quads) {
1017 optix::Geometry quad_geom = m_optix_ctx->createGeometry();
1018 quad.setOptixVariables (quad_geom, quad_bounds, quad_intersect);
1019
1020 optix::GeometryInstance quad_gi = m_optix_ctx->createGeometryInstance (
1021 quad_geom, &scene.optix_mtls[quad.shaderid()], &scene.optix_mtls[quad.shaderid()]+1);
1022
1023 geom_group->addChild (quad_gi);
1024 }
1025
1026 // Set the camera variables on the OptiX Context, to be used by the ray gen program
1027 m_optix_ctx["eye" ]->setFloat (vec3_to_float3 (camera.eye));
1028 m_optix_ctx["dir" ]->setFloat (vec3_to_float3 (camera.dir));
1029 m_optix_ctx["cx" ]->setFloat (vec3_to_float3 (camera.cx));
1030 m_optix_ctx["cy" ]->setFloat (vec3_to_float3 (camera.cy));
1031 m_optix_ctx["invw"]->setFloat (camera.invw);
1032 m_optix_ctx["invh"]->setFloat (camera.invh);
1033
1034 // Create the output buffer
1035 optix::Buffer buffer = m_optix_ctx->createBuffer(RT_BUFFER_OUTPUT, RT_FORMAT_FLOAT3, camera.xres, camera.yres);
1036 m_optix_ctx["output_buffer"]->set(buffer);
1037
1038 m_optix_ctx->validate();
1039
1040 #else //#if (OPTIX_VERSION < 70000)
1041
1042 // Build acceleration structures
1043 OptixAccelBuildOptions accelOptions;
1044 OptixBuildInput buildInputs[2];
1045
1046 memset(&accelOptions, 0, sizeof(OptixAccelBuildOptions));
1047 accelOptions.buildFlags = OPTIX_BUILD_FLAG_NONE;
1048 accelOptions.operation = OPTIX_BUILD_OPERATION_BUILD;
1049 accelOptions.motionOptions.numKeys = 0;
1050 memset(buildInputs, 0, sizeof(OptixBuildInput) * 2);
1051
1052 // Set up quads input
1053 void* d_quadsAabb;
1054 std::vector<OptixAabb> quadsAabb;
1055 std::vector<QuadParams> quadsParams;
1056 quadsAabb.reserve(scene.quads.size());
1057 quadsParams.reserve(scene.quads.size());
1058 std::vector<int> quadShaders;
1059 quadShaders.reserve(scene.quads.size());
1060 for (const auto& quad : scene.quads) {
1061 OptixAabb aabb;
1062 quad.getBounds(aabb.minX, aabb.minY, aabb.minZ,
1063 aabb.maxX, aabb.maxY, aabb.maxZ);
1064 quadsAabb.push_back(aabb);
1065 QuadParams quad_params;
1066 quad.setOptixVariables(&quad_params);
1067 quadsParams.push_back(quad_params);
1068 }
1069 // Copy Quads bounding boxes to cuda device
1070 CUDA_CHECK (cudaMalloc (&d_quadsAabb, sizeof(OptixAabb) * scene.quads.size()));
1071 CUDA_CHECK (cudaMemcpy ( d_quadsAabb, quadsAabb.data(), sizeof(OptixAabb) * scene.quads.size(), cudaMemcpyHostToDevice));
1072
1073 // Copy Quads to cuda device
1074 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_quads_list) , sizeof(QuadParams ) * scene.quads.size()));
1075 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>( d_quads_list) , quadsParams.data(), sizeof(QuadParams ) * scene.quads.size(), cudaMemcpyHostToDevice));
1076
1077 // Fill in Quad shaders
1078 CUdeviceptr d_quadsIndexOffsetBuffer;
1079 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_quadsIndexOffsetBuffer), scene.quads.size() * sizeof(int)));
1080
1081 int numBuildInputs = 0;
1082
1083 unsigned int quadSbtRecord;
1084 quadSbtRecord = OPTIX_GEOMETRY_FLAG_NONE;
1085 if (scene.quads.size() > 0) {
1086 #if (OPTIX_VERSION < 70100)
1087 OptixBuildInputCustomPrimitiveArray& quadsInput = buildInputs[numBuildInputs].aabbArray;
1088 #else
1089 OptixBuildInputCustomPrimitiveArray& quadsInput = buildInputs[numBuildInputs].customPrimitiveArray;
1090 #endif
1091 buildInputs[numBuildInputs].type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1092 quadsInput.flags = &quadSbtRecord;
1093 quadsInput.aabbBuffers = reinterpret_cast<CUdeviceptr *>(&d_quadsAabb);
1094 quadsInput.numPrimitives = scene.quads.size();
1095 quadsInput.numSbtRecords = 1;
1096 quadsInput.sbtIndexOffsetSizeInBytes = sizeof(int);
1097 quadsInput.sbtIndexOffsetStrideInBytes = sizeof(int);
1098 quadsInput.sbtIndexOffsetBuffer = 0; // d_quadsIndexOffsetBuffer;
1099 ++numBuildInputs;
1100 }
1101
1102 // Set up spheres input
1103 void* d_spheresAabb;
1104 std::vector<OptixAabb> spheresAabb;
1105 std::vector<SphereParams> spheresParams;
1106 spheresAabb.reserve(scene.spheres.size());
1107 spheresParams.reserve(scene.spheres.size());
1108 std::vector<int> sphereShaders;
1109 sphereShaders.reserve(scene.spheres.size());
1110 for (const auto& sphere : scene.spheres) {
1111 OptixAabb aabb;
1112 sphere.getBounds(aabb.minX, aabb.minY, aabb.minZ,
1113 aabb.maxX, aabb.maxY, aabb.maxZ);
1114 spheresAabb.push_back(aabb);
1115
1116 SphereParams sphere_params;
1117 sphere.setOptixVariables(&sphere_params);
1118 spheresParams.push_back(sphere_params);
1119 }
1120 // Copy Spheres bounding boxes to cuda device
1121 CUDA_CHECK (cudaMalloc (&d_spheresAabb, sizeof(OptixAabb) * scene.spheres.size()));
1122 CUDA_CHECK (cudaMemcpy ( d_spheresAabb, spheresAabb.data(), sizeof(OptixAabb) * scene.spheres.size(), cudaMemcpyHostToDevice));
1123
1124 // Copy Spheres to cuda device
1125 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_spheres_list), sizeof(SphereParams) * scene.spheres.size()));
1126 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>( d_spheres_list), spheresParams.data(), sizeof(SphereParams) * scene.spheres.size(), cudaMemcpyHostToDevice));
1127
1128 // Fill in Sphere shaders
1129 CUdeviceptr d_spheresIndexOffsetBuffer;
1130 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_spheresIndexOffsetBuffer), scene.spheres.size() * sizeof(int)));
1131
1132 unsigned int sphereSbtRecord;
1133 sphereSbtRecord = OPTIX_GEOMETRY_FLAG_NONE;
1134 if (scene.spheres.size() > 0) {
1135 #if (OPTIX_VERSION < 70100)
1136 OptixBuildInputCustomPrimitiveArray& spheresInput = buildInputs[numBuildInputs].aabbArray;
1137 #else
1138 OptixBuildInputCustomPrimitiveArray& spheresInput = buildInputs[numBuildInputs].customPrimitiveArray;
1139 #endif
1140 buildInputs[numBuildInputs].type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1141 spheresInput.flags = &sphereSbtRecord;
1142 spheresInput.aabbBuffers = reinterpret_cast<CUdeviceptr *>(&d_spheresAabb);
1143 spheresInput.numPrimitives = scene.spheres.size();
1144 spheresInput.numSbtRecords = 1;
1145 spheresInput.sbtIndexOffsetSizeInBytes = sizeof(int);
1146 spheresInput.sbtIndexOffsetStrideInBytes = sizeof(int);
1147 spheresInput.sbtIndexOffsetBuffer = 0; // d_spheresIndexOffsetBuffer;
1148 ++numBuildInputs;
1149 }
1150
1151 // Compute memory usage by acceleration structures
1152 OptixAccelBufferSizes bufferSizes;
1153 optixAccelComputeMemoryUsage(m_optix_ctx, &accelOptions, buildInputs, numBuildInputs, &bufferSizes);
1154
1155 void *d_output, *d_temp;
1156 CUDA_CHECK (cudaMalloc (&d_output, bufferSizes.outputSizeInBytes));
1157 CUDA_CHECK (cudaMalloc (&d_temp , bufferSizes.tempSizeInBytes ));
1158
1159 // Get the bounding box for the AS
1160 void *d_aabb;
1161 CUDA_CHECK (cudaMalloc (&d_aabb, sizeof(OptixAabb)));
1162
1163 OptixAccelEmitDesc property;
1164 property.type = OPTIX_PROPERTY_TYPE_AABBS;
1165 property.result = (CUdeviceptr) d_aabb;
1166
1167 OPTIX_CHECK (optixAccelBuild (m_optix_ctx,
1168 m_cuda_stream,
1169 &accelOptions,
1170 buildInputs,
1171 numBuildInputs,
1172 reinterpret_cast<CUdeviceptr>(d_temp), bufferSizes.tempSizeInBytes,
1173 reinterpret_cast<CUdeviceptr>(d_output), bufferSizes.outputSizeInBytes,
1174 &m_travHandle,
1175 &property,
1176 1));
1177
1178 OptixAabb h_aabb;
1179 CUDA_CHECK (cudaMemcpy ((void*)&h_aabb, reinterpret_cast<void *>(d_aabb), sizeof(OptixAabb), cudaMemcpyDeviceToHost));
1180 cudaFree (d_aabb);
1181
1182 // Sanity check the AS bounds
1183 // printf ("AABB min: [%0.6f, %0.6f, %0.6f], max: [%0.6f, %0.6f, %0.6f]\n",
1184 // h_aabb.minX, h_aabb.minY, h_aabb.minZ,
1185 // h_aabb.maxX, h_aabb.maxY, h_aabb.maxZ );
1186
1187 make_optix_materials();
1188
1189 #endif //#if (OPTIX_VERSION < 70000)
1190 #endif //#ifdef OSL_USE_OPTIX
1191 return true;
1192 }
1193
1194
1195
1196 /// Return true if the texture handle (previously returned by
1197 /// get_texture_handle()) is a valid texture that can be subsequently
1198 /// read or sampled.
1199 bool
good(TextureHandle * handle OSL_MAYBE_UNUSED)1200 OptixRaytracer::good(TextureHandle *handle OSL_MAYBE_UNUSED)
1201 {
1202 #ifdef OSL_USE_OPTIX
1203
1204 #if (OPTIX_VERSION < 70000)
1205 return intptr_t(handle) != RT_TEXTURE_ID_NULL;
1206 #else
1207 return handle != nullptr;
1208 #endif
1209
1210 #else
1211 return false;
1212 #endif
1213 }
1214
1215
1216
1217 /// Given the name of a texture, return an opaque handle that can be
1218 /// used with texture calls to avoid the name lookups.
1219 RendererServices::TextureHandle*
get_texture_handle(ustring filename OSL_MAYBE_UNUSED,ShadingContext * shading_context OSL_MAYBE_UNUSED)1220 OptixRaytracer::get_texture_handle (ustring filename OSL_MAYBE_UNUSED,
1221 ShadingContext* shading_context OSL_MAYBE_UNUSED)
1222 {
1223 #ifdef OSL_USE_OPTIX
1224
1225 #if (OPTIX_VERSION < 70000)
1226 auto itr = m_samplers.find(filename);
1227 if (itr == m_samplers.end()) {
1228 optix::TextureSampler sampler = context()->createTextureSampler();
1229 sampler->setWrapMode(0, RT_WRAP_REPEAT);
1230 sampler->setWrapMode(1, RT_WRAP_REPEAT);
1231 sampler->setWrapMode(2, RT_WRAP_REPEAT);
1232
1233 sampler->setFilteringModes(RT_FILTER_LINEAR, RT_FILTER_LINEAR, RT_FILTER_NONE);
1234 sampler->setIndexingMode(false ? RT_TEXTURE_INDEX_ARRAY_INDEX : RT_TEXTURE_INDEX_NORMALIZED_COORDINATES);
1235 sampler->setReadMode(RT_TEXTURE_READ_NORMALIZED_FLOAT);
1236 sampler->setMaxAnisotropy(1.0f);
1237
1238
1239 OIIO::ImageBuf image;
1240 if (!image.init_spec(filename, 0, 0)) {
1241 errhandler().error ("Could not load: %s", filename);
1242 return (TextureHandle*)(intptr_t(RT_TEXTURE_ID_NULL));
1243 }
1244 int nchan = image.spec().nchannels;
1245
1246 OIIO::ROI roi = OIIO::get_roi_full(image.spec());
1247 int width = roi.width(), height = roi.height();
1248 std::vector<float> pixels(width * height * nchan);
1249 image.get_pixels(roi, OIIO::TypeDesc::FLOAT, pixels.data());
1250
1251 optix::Buffer buffer = context()->createBuffer(RT_BUFFER_INPUT, RT_FORMAT_FLOAT4, width, height);
1252
1253 float* device_ptr = static_cast<float*>(buffer->map());
1254 unsigned int pixel_idx = 0;
1255 for (int y = 0; y < height; ++y) {
1256 for (int x = 0; x < width; ++x) {
1257 memcpy(device_ptr, &pixels[pixel_idx], sizeof(float) * nchan);
1258 device_ptr += 4;
1259 pixel_idx += nchan;
1260 }
1261 }
1262 buffer->unmap();
1263 sampler->setBuffer(buffer);
1264 itr = m_samplers.emplace(std::move(filename), std::move(sampler)).first;
1265
1266 }
1267 return (RendererServices::TextureHandle*) intptr_t(itr->second->getId());
1268
1269 #else //#if (OPTIX_VERSION < 70000)
1270
1271 auto itr = m_samplers.find(filename);
1272 if (itr == m_samplers.end()) {
1273
1274 // Open image
1275 OIIO::ImageBuf image;
1276 if (!image.init_spec(filename, 0, 0)) {
1277 errhandler().error ("Could not load: %s", filename);
1278 return (TextureHandle*)(intptr_t(nullptr));
1279 }
1280
1281 OIIO::ROI roi = OIIO::get_roi_full(image.spec());
1282 int32_t width = roi.width(), height = roi.height();
1283 std::vector<float> pixels(width * height * 4);
1284
1285 for (int j = 0; j < height; j++) {
1286 for (int i = 0; i < width; i++) {
1287 image.getpixel(i, j, 0, &pixels[((j*width) + i) * 4 + 0]);
1288 }
1289 }
1290 cudaResourceDesc res_desc = {};
1291
1292 // hard-code textures to 4 channels
1293 int32_t pitch = width * 4 * sizeof(float);
1294 cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
1295
1296 cudaArray_t pixelArray;
1297 CUDA_CHECK (cudaMallocArray (&pixelArray,
1298 &channel_desc,
1299 width,height));
1300
1301 CUDA_CHECK (cudaMemcpy2DToArray (pixelArray,
1302 0, 0,
1303 pixels.data(),
1304 pitch,pitch,height,
1305 cudaMemcpyHostToDevice));
1306
1307 res_desc.resType = cudaResourceTypeArray;
1308 res_desc.res.array.array = pixelArray;
1309
1310 cudaTextureDesc tex_desc = {};
1311 tex_desc.addressMode[0] = cudaAddressModeWrap;
1312 tex_desc.addressMode[1] = cudaAddressModeWrap;
1313 tex_desc.filterMode = cudaFilterModeLinear;
1314 tex_desc.readMode = cudaReadModeElementType; //cudaReadModeNormalizedFloat;
1315 tex_desc.normalizedCoords = 1;
1316 tex_desc.maxAnisotropy = 1;
1317 tex_desc.maxMipmapLevelClamp = 99;
1318 tex_desc.minMipmapLevelClamp = 0;
1319 tex_desc.mipmapFilterMode = cudaFilterModePoint;
1320 tex_desc.borderColor[0] = 1.0f;
1321 tex_desc.sRGB = 0;
1322
1323 // Create texture object
1324 cudaTextureObject_t cuda_tex = 0;
1325 CUDA_CHECK (cudaCreateTextureObject (&cuda_tex, &res_desc, &tex_desc, nullptr));
1326 itr = m_samplers.emplace (std::move(filename), std::move(cuda_tex)).first;
1327 }
1328 return reinterpret_cast<RendererServices::TextureHandle *>(itr->second);
1329
1330 #endif //#if (OPTIX_VERSION < 70000)
1331
1332 #else
1333 return nullptr;
1334 #endif
1335 }
1336
1337
1338
1339 void
prepare_render()1340 OptixRaytracer::prepare_render()
1341 {
1342 #ifdef OSL_USE_OPTIX
1343 // Set up the OptiX Context
1344 init_optix_context (camera.xres, camera.yres);
1345
1346 // Set up the OptiX scene graph
1347 finalize_scene ();
1348 #endif
1349 }
1350
1351
1352
1353 void
warmup()1354 OptixRaytracer::warmup()
1355 {
1356 #ifdef OSL_USE_OPTIX
1357 // Perform a tiny launch to warm up the OptiX context
1358 #if (OPTIX_VERSION < 70000)
1359 m_optix_ctx->launch (0, 1, 1);
1360 #else
1361 OPTIX_CHECK (optixLaunch (m_optix_pipeline,
1362 m_cuda_stream,
1363 d_launch_params,
1364 sizeof(RenderParams),
1365 &m_optix_sbt,
1366 0, 0, 1));
1367 CUDA_SYNC_CHECK();
1368 #endif
1369 #endif
1370 }
1371
1372
1373
1374 void
render(int xres OSL_MAYBE_UNUSED,int yres OSL_MAYBE_UNUSED)1375 OptixRaytracer::render(int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED)
1376 {
1377 #ifdef OSL_USE_OPTIX
1378 #if (OPTIX_VERSION < 70000)
1379 m_optix_ctx->launch (0, xres, yres);
1380 #else
1381 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_output_buffer), xres * yres * 4 * sizeof(float)));
1382 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_launch_params), sizeof(RenderParams)));
1383
1384
1385 m_xres = xres;
1386 m_yres = yres;
1387
1388 RenderParams params;
1389 params.eye.x = camera.eye.x;
1390 params.eye.y = camera.eye.y;
1391 params.eye.z = camera.eye.z;
1392 params.dir.x = camera.dir.x;
1393 params.dir.y = camera.dir.y;
1394 params.dir.z = camera.dir.z;
1395 params.cx.x = camera.cx.x;
1396 params.cx.y = camera.cx.y;
1397 params.cx.z = camera.cx.z;
1398 params.cy.x = camera.cy.x;
1399 params.cy.y = camera.cy.y;
1400 params.cy.z = camera.cy.z;
1401 params.invw = 1.0f / m_xres;
1402 params.invh = 1.0f / m_yres;
1403 params.output_buffer = d_output_buffer;
1404 params.traversal_handle = m_travHandle;
1405
1406 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_launch_params), ¶ms, sizeof(RenderParams), cudaMemcpyHostToDevice));
1407
1408 OPTIX_CHECK (optixLaunch (m_optix_pipeline,
1409 m_cuda_stream,
1410 d_launch_params,
1411 sizeof(RenderParams),
1412 &m_optix_sbt,
1413 xres, yres, 1));
1414 CUDA_SYNC_CHECK();
1415 #endif
1416 #endif
1417 }
1418
1419
1420
1421 void
finalize_pixel_buffer()1422 OptixRaytracer::finalize_pixel_buffer ()
1423 {
1424 #ifdef OSL_USE_OPTIX
1425 std::string buffer_name = "output_buffer";
1426 #if (OPTIX_VERSION < 70000)
1427 const void* buffer_ptr = m_optix_ctx[buffer_name]->getBuffer()->map();
1428 if (! buffer_ptr)
1429 errhandler().severe ("Unable to map buffer %s", buffer_name);
1430 pixelbuf.set_pixels (OIIO::ROI::All(), OIIO::TypeFloat, buffer_ptr);
1431 #else
1432 std::vector<float> tmp_buff(m_xres * m_yres * 3);
1433 CUDA_CHECK (cudaMemcpy (tmp_buff.data(), reinterpret_cast<void *>(d_output_buffer), m_xres * m_yres * 3 * sizeof(float), cudaMemcpyDeviceToHost));
1434 pixelbuf.set_pixels (OIIO::ROI::All(), OIIO::TypeFloat, tmp_buff.data());
1435 #endif
1436 #endif
1437 }
1438
1439
1440
1441 void
clear()1442 OptixRaytracer::clear()
1443 {
1444 shaders().clear();
1445 #ifdef OSL_USE_OPTIX
1446 #if (OPTIX_VERSION < 70000)
1447 if (m_optix_ctx) {
1448 m_optix_ctx->destroy();
1449 m_optix_ctx = nullptr;
1450 }
1451 #else
1452 OPTIX_CHECK (optixDeviceContextDestroy (m_optix_ctx));
1453 m_optix_ctx = 0;
1454 #endif
1455 #endif
1456 }
1457
1458 OSL_NAMESPACE_EXIT
1459