1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4 
5 #include <vector>
6 
7 #include <OpenImageIO/filesystem.h>
8 #include <OpenImageIO/sysutil.h>
9 
10 #include <OSL/oslconfig.h>
11 
12 #include "optixraytracer.h"
13 #include "render_params.h"
14 
15 #ifdef OSL_USE_OPTIX
16 #  if (OPTIX_VERSION >= 70000)
17 #  include <optix_function_table_definition.h>
18 #  include <optix_stack_size.h>
19 #  include <optix_stubs.h>
20 #  include <cuda.h>
21 #  include <nvrtc.h>
22 #  endif
23 #endif
24 
25 // The pre-compiled renderer support library LLVM bitcode is embedded
26 // into the executable and made available through these variables.
27 extern int rend_llvm_compiled_ops_size;
28 extern unsigned char rend_llvm_compiled_ops_block[];
29 
30 
31 OSL_NAMESPACE_ENTER
32 
33 #if (OPTIX_VERSION >= 70000)
34 
35 #define CUDA_CHECK(call)                                                  \
36 {                                                                         \
37     cudaError_t error = call;                                             \
38     if (error != cudaSuccess)                                             \
39     {                                                                     \
40         std::stringstream ss;                                             \
41         ss << "CUDA call (" << #call << " ) failed with error: '"         \
42            << cudaGetErrorString( error )                                 \
43            << "' (" __FILE__ << ":" << __LINE__ << ")\n";                 \
44            fprintf(stderr, "[CUDA ERROR]  %s", ss.str().c_str() );        \
45     }                                                                     \
46 }
47 
48 #define NVRTC_CHECK(call)                                                 \
49 {                                                                         \
50     nvrtcResult error = call;                                             \
51     if (error != NVRTC_SUCCESS)                                           \
52     {                                                                     \
53         std::stringstream ss;                                             \
54         ss << "NVRTC call (" << #call << " ) failed with error: '"        \
55            << nvrtcGetErrorString( error )                                \
56            << "' (" __FILE__ << ":" << __LINE__ << ")\n";                 \
57            fprintf(stderr, "[NVRTC ERROR]  %s", ss.str().c_str() );       \
58     }                                                                     \
59 }
60 
61 
62 #define OPTIX_CHECK(call)                                                 \
63 {                                                                         \
64     OptixResult res = call;                                               \
65     if (res != OPTIX_SUCCESS)                                             \
66     {                                                                     \
67         std::stringstream ss;                                             \
68         ss  << "Optix call '" << #call << "' failed with error: "         \
69             << optixGetErrorName( res )                                   \
70             << " (" __FILE__ ":"   << __LINE__ << ")\n";                  \
71         fprintf(stderr,"[OPTIX ERROR]  %s", ss.str().c_str() );           \
72         exit(1);                                                          \
73     }                                                                     \
74 }
75 #endif
76 
77 #define CUDA_SYNC_CHECK()                                               \
78 {                                                                       \
79     cudaDeviceSynchronize();                                            \
80     cudaError_t error = cudaGetLastError();                             \
81     if (error != cudaSuccess) {                                         \
82         fprintf( stderr, "error (%s: line %d): %s\n", __FILE__, __LINE__, cudaGetErrorString( error ) ); \
83         exit(1);                                                        \
84     }                                                                   \
85 }
86 
87 #ifdef OSL_USE_OPTIX
88 #if (OPTIX_VERSION >= 70000)
context_log_cb(unsigned int level,const char * tag,const char * message,void *)89 static void context_log_cb( unsigned int level, const char* tag, const char* message, void* /*cbdata */ )
90 {
91 //    std::cerr << "[ ** LOGCALLBACK** " << std::setw( 2 ) << level << "][" << std::setw( 12 ) << tag << "]: " << message << "\n";
92 }
93 #endif
94 #endif
95 
OptixRaytracer()96 OptixRaytracer::OptixRaytracer ()
97 {
98 #ifdef OSL_USE_OPTIX
99 #if (OPTIX_VERSION >= 70000)
100     // Initialize CUDA
101     cudaFree(0);
102 
103     CUcontext cuCtx = nullptr;  // zero means take the current context
104 
105     OptixDeviceContextOptions ctx_options = {};
106     ctx_options.logCallbackFunction = context_log_cb;
107     ctx_options.logCallbackLevel    = 4;
108 
109     OPTIX_CHECK (optixInit ());
110     OPTIX_CHECK (optixDeviceContextCreate (cuCtx, &ctx_options, &m_optix_ctx));
111 
112     CUDA_CHECK (cudaSetDevice (0));
113     CUDA_CHECK (cudaStreamCreate (&m_cuda_stream));
114 
115     // Set up the string table. This allocates a block of CUDA device memory to
116     // hold all of the static strings used by the OSL shaders. The strings can
117     // be accessed via OptiX variables that hold pointers to the table entries.
118     m_str_table.init(m_optix_ctx);
119 
120     // Register all of our string table entries
121     for (auto &&gvar : m_str_table.contents())
122         register_global (gvar.first.c_str(), gvar.second);
123 #endif
124 #endif
125 }
126 
~OptixRaytracer()127 OptixRaytracer::~OptixRaytracer ()
128 {
129 #ifdef OSL_USE_OPTIX
130     m_str_table.freetable();
131 #if (OPTIX_VERSION < 70000)
132     if (m_optix_ctx)
133         m_optix_ctx->destroy();
134 #else
135     if (m_optix_ctx)
136         OPTIX_CHECK (optixDeviceContextDestroy (m_optix_ctx));
137 #endif
138 #endif
139 }
140 
141 
142 uint64_t
register_global(const std::string & str,uint64_t value)143 OptixRaytracer::register_global (const std::string &str, uint64_t value)
144 {
145     auto it = m_globals_map.find (ustring(str));
146 
147     if (it != m_globals_map.end()) {
148        return it->second;
149     }
150     m_globals_map[ustring(str)] = value;
151 
152     return value;
153 }
154 
155 bool
fetch_global(const std::string & str,uint64_t * value)156 OptixRaytracer::fetch_global (const std::string &str, uint64_t *value)
157 {
158     auto it = m_globals_map.find (ustring(str));
159 
160     if (it != m_globals_map.end()) {
161        *value = it->second;
162        return true;
163     }
164     return false;
165 }
166 
167 std::string
load_ptx_file(string_view filename)168 OptixRaytracer::load_ptx_file (string_view filename)
169 {
170 #ifdef OSL_USE_OPTIX
171     std::vector<std::string> paths = {
172         OIIO::Filesystem::parent_path(OIIO::Sysutil::this_program_path()),
173         PTX_PATH
174     };
175     std::string filepath = OIIO::Filesystem::searchpath_find (filename, paths,
176                                                               false);
177     if (OIIO::Filesystem::exists(filepath)) {
178         std::string ptx_string;
179         if (OIIO::Filesystem::read_text_file (filepath, ptx_string))
180             return ptx_string;
181     }
182 #endif
183     errhandler().severe ("Unable to load %s", filename);
184     return {};
185 }
186 
187 
188 
189 bool
init_optix_context(int xres OSL_MAYBE_UNUSED,int yres OSL_MAYBE_UNUSED)190 OptixRaytracer::init_optix_context (int xres OSL_MAYBE_UNUSED,
191                                     int yres OSL_MAYBE_UNUSED)
192 {
193 #ifdef OSL_USE_OPTIX
194 
195     shadingsys->attribute ("lib_bitcode", {OSL::TypeDesc::UINT8, rend_llvm_compiled_ops_size},
196                            rend_llvm_compiled_ops_block);
197 
198 
199 #if (OPTIX_VERSION < 70000)
200 
201     // Set up the OptiX context
202     m_optix_ctx = optix::Context::create();
203 
204     // Set up the string table. This allocates a block of CUDA device memory to
205     // hold all of the static strings used by the OSL shaders. The strings can
206     // be accessed via OptiX variables that hold pointers to the table entries.
207     m_str_table.init(m_optix_ctx);
208 
209     if (m_optix_ctx->getEnabledDeviceCount() != 1)
210         errhandler().warning ("Only one CUDA device is currently supported");
211 
212     m_optix_ctx->setRayTypeCount (2);
213     m_optix_ctx->setEntryPointCount (1);
214     m_optix_ctx->setStackSize (2048);
215     m_optix_ctx->setPrintEnabled (true);
216 
217     // Load the renderer CUDA source and generate PTX for it
218     std::string progName = "optix_raytracer.ptx";
219     std::string renderer_ptx = load_ptx_file(progName);
220     if (renderer_ptx.empty()) {
221         errhandler().severe ("Could not find PTX for the raygen program");
222         return false;
223     }
224 
225     // Create the OptiX programs and set them on the optix::Context
226     m_program = m_optix_ctx->createProgramFromPTXString(renderer_ptx, "raygen");
227     m_optix_ctx->setRayGenerationProgram(0, m_program);
228 
229     if (scene.num_prims()) {
230         m_optix_ctx["radiance_ray_type"]->setUint  (0u);
231         m_optix_ctx["shadow_ray_type"  ]->setUint  (1u);
232         m_optix_ctx["bg_color"         ]->setFloat (0.0f, 0.0f, 0.0f);
233         m_optix_ctx["bad_color"        ]->setFloat (1.0f, 0.0f, 1.0f);
234 
235         // Create the OptiX programs and set them on the optix::Context
236         if (renderer_ptx.size()) {
237             m_optix_ctx->setMissProgram (0, m_optix_ctx->createProgramFromPTXString (renderer_ptx, "miss"));
238             m_optix_ctx->setExceptionProgram (0, m_optix_ctx->createProgramFromPTXString (renderer_ptx, "exception"));
239         }
240 
241         // Load the PTX for the wrapper program. It will be used to
242         // create OptiX Materials from the OSL ShaderGroups
243         m_materials_ptx = load_ptx_file ("wrapper.ptx");
244         if (m_materials_ptx.empty())
245             return false;
246 
247         // Load the PTX for the primitives
248         std::string sphere_ptx = load_ptx_file ("sphere.ptx");
249         std::string quad_ptx = load_ptx_file ("quad.ptx");
250         if (sphere_ptx.empty() || quad_ptx.empty())
251             return false;
252 
253         // Create the sphere and quad intersection programs.
254         sphere_bounds    = m_optix_ctx->createProgramFromPTXString (sphere_ptx, "bounds");
255         quad_bounds      = m_optix_ctx->createProgramFromPTXString (quad_ptx,   "bounds");
256         sphere_intersect = m_optix_ctx->createProgramFromPTXString (sphere_ptx, "intersect");
257         quad_intersect   = m_optix_ctx->createProgramFromPTXString (quad_ptx,   "intersect");
258     }
259 
260 #endif //#if (OPTIX_VERSION < 70000)
261 
262 #endif
263     return true;
264 }
265 
266 
267 
268 bool
synch_attributes()269 OptixRaytracer::synch_attributes ()
270 {
271 #ifdef OSL_USE_OPTIX
272 
273 #if (OPTIX_VERSION < 70000)
274     // FIXME -- this is for testing only
275     // Make some device strings to test userdata parameters
276     uint64_t addr1 = register_string ("ud_str_1", "");
277     uint64_t addr2 = register_string ("userdata string", "");
278     m_optix_ctx["test_str_1"]->setUserData (sizeof(char*), &addr1);
279     m_optix_ctx["test_str_2"]->setUserData (sizeof(char*), &addr2);
280 
281     {
282         const char* name = OSL_NAMESPACE_STRING "::pvt::s_color_system";
283 
284         char* colorSys = nullptr;
285         long long cpuDataSizes[2] = {0,0};
286         if (!shadingsys->getattribute("colorsystem", TypeDesc::PTR, (void*)&colorSys) ||
287             !shadingsys->getattribute("colorsystem:sizes", TypeDesc(TypeDesc::LONGLONG,2), (void*)&cpuDataSizes) ||
288             !colorSys || !cpuDataSizes[0]) {
289             errhandler().error ("No colorsystem available.");
290             return false;
291         }
292         auto cpuDataSize = cpuDataSizes[0];
293         auto numStrings = cpuDataSizes[1];
294 
295         // Get the size data-size, minus the ustring size
296         const size_t podDataSize = cpuDataSize - sizeof(StringParam)*numStrings;
297 
298         optix::Buffer buffer = m_optix_ctx->createBuffer(RT_BUFFER_INPUT, RT_FORMAT_USER);
299         if (!buffer) {
300             errhandler().error ("Could not create buffer for '%s'.", name);
301             return false;
302         }
303 
304         // set the element size to char
305         buffer->setElementSize(sizeof(char));
306 
307         // and number of elements to the actual size needed.
308         buffer->setSize(podDataSize + sizeof(DeviceString)*numStrings);
309 
310         // copy the base data
311         char* gpuData = (char*) buffer->map();
312         if (!gpuData) {
313             errhandler().error ("Could not map buffer for '%s' (size: %lu).",
314                                 name, podDataSize + sizeof(DeviceString)*numStrings);
315             return false;
316         }
317         ::memcpy(gpuData, colorSys, podDataSize);
318 
319         // then copy the device string to the end, first strings starting at dataPtr - (numStrings)
320         // FIXME -- Should probably handle alignment better.
321         const ustring* cpuString = (const ustring*)(colorSys + (cpuDataSize - sizeof(StringParam)*numStrings));
322         char* gpuStrings = gpuData + podDataSize;
323         for (const ustring* end = cpuString + numStrings; cpuString < end; ++cpuString) {
324             // convert the ustring to a device string
325             uint64_t devStr = register_string (cpuString->string(), "");
326             ::memcpy(gpuStrings, &devStr, sizeof(devStr));
327             gpuStrings += sizeof(DeviceString);
328         }
329 
330         buffer->unmap();
331         m_optix_ctx[name]->setBuffer(buffer);
332     }
333 #else // #if (OPTIX_VERSION < 70000)
334 
335     // FIXME -- this is for testing only
336     // Make some device strings to test userdata parameters
337     uint64_t addr1 = register_string ("ud_str_1", "");
338     uint64_t addr2 = register_string ("userdata string", "");
339 
340     // Store the user-data
341     register_global("test_str_1", addr1);
342     register_global("test_str_2", addr2);
343 
344     {
345         char* colorSys = nullptr;
346         long long cpuDataSizes[2] = {0,0};
347         if (!shadingsys->getattribute("colorsystem", TypeDesc::PTR, (void*)&colorSys) ||
348             !shadingsys->getattribute("colorsystem:sizes", TypeDesc(TypeDesc::LONGLONG,2), (void*)&cpuDataSizes) ||
349             !colorSys || !cpuDataSizes[0]) {
350             errhandler().error ("No colorsystem available.");
351             return false;
352         }
353 
354         const char* name = OSL_NAMESPACE_STRING "::pvt::s_color_system";
355 
356         auto cpuDataSize = cpuDataSizes[0];
357         auto numStrings = cpuDataSizes[1];
358 
359         // Get the size data-size, minus the ustring size
360         const size_t podDataSize = cpuDataSize - sizeof(StringParam)*numStrings;
361 
362         CUdeviceptr d_buffer;
363         CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_buffer), podDataSize + sizeof (DeviceString)*numStrings));
364         CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_buffer), colorSys, podDataSize, cudaMemcpyHostToDevice));
365 
366         // then copy the device string to the end, first strings starting at dataPtr - (numStrings)
367         // FIXME -- Should probably handle alignment better.
368         const ustring* cpuString = (const ustring*)(colorSys + (cpuDataSize - sizeof (StringParam)*numStrings));
369         CUdeviceptr gpuStrings = d_buffer + podDataSize;
370         for (const ustring* end = cpuString + numStrings; cpuString < end; ++cpuString) {
371             // convert the ustring to a device string
372             uint64_t devStr = register_string (cpuString->string(), "");
373             CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(gpuStrings), &devStr, sizeof (devStr), cudaMemcpyHostToDevice));
374             gpuStrings += sizeof (DeviceString);
375         }
376         register_global(name, d_buffer);
377     }
378 #endif // #if (OPTIX_VERSION < 70000)
379 #endif
380     return true;
381 }
382 
383 #if (OPTIX_VERSION >= 70000)
384 bool
load_optix_module(const char * filename,const OptixModuleCompileOptions * module_compile_options,const OptixPipelineCompileOptions * pipeline_compile_options,OptixModule * program_module)385 OptixRaytracer::load_optix_module (const char*                        filename,
386                                    const OptixModuleCompileOptions*   module_compile_options,
387                                    const OptixPipelineCompileOptions* pipeline_compile_options,
388                                    OptixModule*                       program_module)
389 {
390     char msg_log[8192];
391 
392     // Load the renderer CUDA source and generate PTX for it
393     std::string program_ptx = load_ptx_file(filename);
394     if (program_ptx.empty()) {
395         errhandler().severe ("Could not find PTX file:  %s", filename);
396         return false;
397     }
398 
399     size_t sizeof_msg_log = sizeof(msg_log);
400     OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
401                                            module_compile_options,
402                                            pipeline_compile_options,
403                                            program_ptx.c_str(),
404                                            program_ptx.size(),
405                                            msg_log, &sizeof_msg_log,
406                                            program_module));
407     //if (sizeof_msg_log > 1)
408     //    printf ("Creating Module from PTX-file %s:\n%s\n", filename, msg_log);
409     return true;
410 }
411 
412 bool
create_optix_pg(const OptixProgramGroupDesc * pg_desc,const int num_pg,OptixProgramGroupOptions * program_options,OptixProgramGroup * pg)413 OptixRaytracer::create_optix_pg(const OptixProgramGroupDesc* pg_desc,
414                                 const int                    num_pg,
415                                 OptixProgramGroupOptions*    program_options,
416                                 OptixProgramGroup*           pg)
417 {
418     char msg_log[8192];
419     size_t sizeof_msg_log = sizeof(msg_log);
420     OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
421                                           pg_desc,
422                                           num_pg,
423                                           program_options,
424                                           msg_log, &sizeof_msg_log,
425                                           pg));
426     //if (sizeof_msg_log > 1)
427     //    printf ("Creating program group:\n%s\n", msg_log);
428 
429     return true;
430 }
431 #endif
432 
433 bool
make_optix_materials()434 OptixRaytracer::make_optix_materials ()
435 {
436 #ifdef OSL_USE_OPTIX
437 
438 #if (OPTIX_VERSION < 70000)
439 
440     optix::Program closest_hit, any_hit;
441     if (scene.num_prims()) {
442         closest_hit = m_optix_ctx->createProgramFromPTXString(
443             m_materials_ptx, "closest_hit_osl");
444         any_hit = m_optix_ctx->createProgramFromPTXString(
445             m_materials_ptx, "any_hit_shadow");
446     }
447 
448     // Stand-in: names of shader outputs to preserve
449     // FIXME
450     std::vector<const char*> outputs { "Cout" };
451 
452     // Optimize each ShaderGroup in the scene, and use the resulting
453     // PTX to create OptiX Programs which can be called by the closest
454     // hit program in the wrapper to execute the compiled OSL shader.
455     int mtl_id = 0;
456     for (const auto& groupref : shaders()) {
457         shadingsys->attribute (groupref.get(), "renderer_outputs",
458                                TypeDesc(TypeDesc::STRING, outputs.size()),
459                                outputs.data());
460 
461         shadingsys->optimize_group (groupref.get(), nullptr);
462 
463         if (!scene.num_prims()) {
464             if (!shadingsys->find_symbol (*groupref.get(), ustring(outputs[0]))) {
465                 errhandler().warning ("Requested output '%s', which wasn't found",
466                                       outputs[0]);
467             }
468         }
469 
470         std::string group_name, init_name, entry_name;
471         shadingsys->getattribute (groupref.get(), "groupname",        group_name);
472         shadingsys->getattribute (groupref.get(), "group_init_name",  init_name);
473         shadingsys->getattribute (groupref.get(), "group_entry_name", entry_name);
474 
475         // Retrieve the compiled ShaderGroup PTX
476         std::string osl_ptx;
477         shadingsys->getattribute (groupref.get(), "ptx_compiled_version",
478                                   OSL::TypeDesc::PTR, &osl_ptx);
479 
480         if (osl_ptx.empty()) {
481             errhandler().error ("Failed to generate PTX for ShaderGroup %s",
482                                 group_name);
483             return false;
484         }
485 
486         if (options.get_int("saveptx")) {
487             std::string filename = OIIO::Strutil::sprintf("%s_%d.ptx", group_name,
488                                                           mtl_id++);
489             OIIO::ofstream out;
490             OIIO::Filesystem::open (out, filename);
491             out << osl_ptx;
492         }
493 
494         // Create Programs from the init and group_entry functions,
495         // and set the OSL functions as Callable Programs so that they
496         // can be executed by the closest hit program in the wrapper
497         optix::Program osl_init = m_optix_ctx->createProgramFromPTXString (
498             osl_ptx, init_name);
499         optix::Program osl_group = m_optix_ctx->createProgramFromPTXString (
500             osl_ptx, entry_name);
501         if (scene.num_prims()) {
502             // Create a new Material using the wrapper PTX
503             optix::Material mtl = m_optix_ctx->createMaterial();
504             mtl->setClosestHitProgram (0, closest_hit);
505             mtl->setAnyHitProgram (1, any_hit);
506 
507             // Set the OSL functions as Callable Programs so that they can be
508             // executed by the closest hit program in the wrapper
509             mtl["osl_init_func" ]->setProgramId (osl_init );
510             mtl["osl_group_func"]->setProgramId (osl_group);
511             scene.optix_mtls.push_back(mtl);
512         } else {
513             // Grid shading
514             m_program["osl_init_func" ]->setProgramId (osl_init );
515             m_program["osl_group_func"]->setProgramId (osl_group);
516         }
517     }
518     if (!synch_attributes())
519         return false;
520 
521 #else //#if (OPTIX_VERSION < 70000)
522     // Stand-in: names of shader outputs to preserve
523     std::vector<const char*> outputs { "Cout" };
524 
525     std::vector<OptixModule> modules;
526 
527     // Space for mesage logging
528     char msg_log[8192];
529     size_t sizeof_msg_log;
530 
531     // Make module that contains programs we'll use in this scene
532     OptixModuleCompileOptions module_compile_options = {};
533 
534     module_compile_options.maxRegisterCount  = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT;
535     module_compile_options.optLevel          = OPTIX_COMPILE_OPTIMIZATION_DEFAULT;
536     module_compile_options.debugLevel        = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
537 
538     OptixPipelineCompileOptions pipeline_compile_options = {};
539 
540     pipeline_compile_options.traversableGraphFlags      = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY;
541     pipeline_compile_options.usesMotionBlur             = false;
542     pipeline_compile_options.numPayloadValues           = 3;
543     pipeline_compile_options.numAttributeValues         = 3;
544     pipeline_compile_options.exceptionFlags             = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW;
545     pipeline_compile_options.pipelineLaunchParamsVariableName = "render_params";
546 
547     // Create 'raygen' program
548 
549     // Load the renderer CUDA source and generate PTX for it
550     OptixModule program_module;
551     load_optix_module("optix_raytracer.ptx", &module_compile_options,
552                                              &pipeline_compile_options,
553                                              &program_module);
554 
555     // Record it so we can destroy it later
556     modules.push_back(program_module);
557 
558     OptixModule quad_module;
559     load_optix_module("quad.ptx", &module_compile_options,
560                                  &pipeline_compile_options,
561                                  &quad_module);
562 
563     OptixModule sphere_module;
564     load_optix_module("sphere.ptx", &module_compile_options,
565                                    &pipeline_compile_options,
566                                    &sphere_module);
567 
568 
569     OptixModule wrapper_module;
570     load_optix_module("wrapper.ptx", &module_compile_options,
571                                     &pipeline_compile_options,
572                                     &wrapper_module);
573 
574 
575     OptixProgramGroupOptions program_options = {};
576     std::vector<OptixProgramGroup> shader_groups;
577 
578     // Raygen group
579     OptixProgramGroupDesc raygen_desc = {};
580     raygen_desc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
581     raygen_desc.raygen.module            = program_module;
582     raygen_desc.raygen.entryFunctionName = "__raygen__";
583 
584     OptixProgramGroup  raygen_group;
585     create_optix_pg(&raygen_desc, 1, &program_options, &raygen_group);
586 
587     // Miss group
588     OptixProgramGroupDesc miss_desc = {};
589     miss_desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
590     miss_desc.miss.module            = program_module;  // raygen file/module contains miss program
591     miss_desc.miss.entryFunctionName = "__miss__";
592 
593     OptixProgramGroup  miss_group;
594     create_optix_pg(&miss_desc, 1, &program_options, &miss_group);
595 
596     // Hitgroup -- quads
597     OptixProgramGroupDesc quad_hitgroup_desc = {};
598     quad_hitgroup_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
599     quad_hitgroup_desc.hitgroup.moduleCH            = wrapper_module;
600     quad_hitgroup_desc.hitgroup.entryFunctionNameCH = "__closesthit__closest_hit_osl";
601     quad_hitgroup_desc.hitgroup.moduleAH            = wrapper_module;
602     quad_hitgroup_desc.hitgroup.entryFunctionNameAH = "__anyhit__any_hit_shadow";
603     quad_hitgroup_desc.hitgroup.moduleIS            = quad_module;
604     quad_hitgroup_desc.hitgroup.entryFunctionNameIS = "__intersection__quad";
605     OptixProgramGroup quad_hitgroup;
606     create_optix_pg(&quad_hitgroup_desc, 1, &program_options, &quad_hitgroup);
607 
608     // Direct-callable -- fills in ShaderGlobals for Quads
609     OptixProgramGroupDesc quad_fillSG_desc = {};
610     quad_fillSG_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
611     quad_fillSG_desc.callables.moduleDC            = quad_module;
612     quad_fillSG_desc.callables.entryFunctionNameDC = "__direct_callable__quad_shaderglobals";
613     quad_fillSG_desc.callables.moduleCC            = 0;
614     quad_fillSG_desc.callables.entryFunctionNameCC = nullptr;
615     OptixProgramGroup quad_fillSG_dc;
616     create_optix_pg(&quad_fillSG_desc, 1, &program_options, &quad_fillSG_dc);
617 
618     // Hitgroup -- sphere
619     OptixProgramGroupDesc sphere_hitgroup_desc = {};
620     sphere_hitgroup_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
621     sphere_hitgroup_desc.hitgroup.moduleCH            = wrapper_module;
622     sphere_hitgroup_desc.hitgroup.entryFunctionNameCH = "__closesthit__closest_hit_osl";
623     sphere_hitgroup_desc.hitgroup.moduleAH            = wrapper_module;
624     sphere_hitgroup_desc.hitgroup.entryFunctionNameAH = "__anyhit__any_hit_shadow";
625     sphere_hitgroup_desc.hitgroup.moduleIS            = sphere_module;
626     sphere_hitgroup_desc.hitgroup.entryFunctionNameIS = "__intersection__sphere";
627     OptixProgramGroup sphere_hitgroup;
628     create_optix_pg(&sphere_hitgroup_desc, 1, &program_options, &sphere_hitgroup);
629 
630     // Direct-callable -- fills in ShaderGlobals for Sphere
631     OptixProgramGroupDesc sphere_fillSG_desc = {};
632     sphere_fillSG_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
633     sphere_fillSG_desc.callables.moduleDC            = sphere_module;
634     sphere_fillSG_desc.callables.entryFunctionNameDC = "__direct_callable__sphere_shaderglobals";
635     sphere_fillSG_desc.callables.moduleCC            = 0;
636     sphere_fillSG_desc.callables.entryFunctionNameCC = nullptr;
637     OptixProgramGroup sphere_fillSG_dc;
638     create_optix_pg(&sphere_fillSG_desc, 1, &program_options, &sphere_fillSG_dc);
639 
640     // Create materials
641     int mtl_id = 0;
642     for (const auto& groupref : shaders()) {
643         std::string group_name, init_name, entry_name;
644         shadingsys->getattribute (groupref.get(), "groupname",        group_name);
645         shadingsys->getattribute (groupref.get(), "group_init_name",  init_name);
646         shadingsys->getattribute (groupref.get(), "group_entry_name", entry_name);
647 
648         shadingsys->attribute (groupref.get(), "renderer_outputs",
649                                TypeDesc(TypeDesc::STRING, outputs.size()),
650                                outputs.data());
651         shadingsys->optimize_group (groupref.get(), nullptr);
652 
653         if (!shadingsys->find_symbol (*groupref.get(), ustring(outputs[0]))) {
654             // FIXME: This is for cases where testshade is run with 1x1 resolution
655             //        Those tests may not have a Cout parameter to write to.
656             if (m_xres > 1 && m_yres > 1) {
657                 errhandler().warning ("Requested output '%s', which wasn't found",
658                                       outputs[0]);
659             }
660         }
661 
662         // Retrieve the compiled ShaderGroup PTX
663         std::string osl_ptx;
664         shadingsys->getattribute (groupref.get(), "ptx_compiled_version",
665                                   OSL::TypeDesc::PTR, &osl_ptx);
666 
667         if (osl_ptx.empty()) {
668             errhandler().error ("Failed to generate PTX for ShaderGroup %s",
669                                 group_name);
670             return false;
671         }
672 
673         if (options.get_int ("saveptx")) {
674             std::string filename = OIIO::Strutil::sprintf("%s_%d.ptx", group_name,
675                                                           mtl_id++);
676             OIIO::ofstream out;
677             OIIO::Filesystem::open (out, filename);
678             out << osl_ptx;
679         }
680 
681         OptixModule optix_module;
682 
683         // Create Programs from the init and group_entry functions,
684         // and set the OSL functions as Callable Programs so that they
685         // can be executed by the closest hit program in the wrapper
686         sizeof_msg_log = sizeof(msg_log);
687         OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
688                                                &module_compile_options,
689                                                &pipeline_compile_options,
690                                                osl_ptx.c_str(),
691                                                osl_ptx.size(),
692                                                msg_log, &sizeof_msg_log,
693                                                &optix_module));
694         //if (sizeof_msg_log > 1)
695         //    printf ("Creating module for PTX group '%s':\n%s\n", group_name.c_str(), msg_log);
696         modules.push_back(optix_module);
697 
698         // Create 2x program groups (for direct callables)
699         OptixProgramGroupDesc pgDesc[2] = {};
700         pgDesc[0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
701         pgDesc[0].callables.moduleDC            = optix_module;
702         pgDesc[0].callables.entryFunctionNameDC = init_name.c_str();
703         pgDesc[0].callables.moduleCC            = 0;
704         pgDesc[0].callables.entryFunctionNameCC = nullptr;
705         pgDesc[1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
706         pgDesc[1].callables.moduleDC            = optix_module;
707         pgDesc[1].callables.entryFunctionNameDC = entry_name.c_str();
708         pgDesc[1].callables.moduleCC            = 0;
709         pgDesc[1].callables.entryFunctionNameCC = nullptr;
710 
711         shader_groups.resize(shader_groups.size() + 2);
712         sizeof_msg_log = sizeof(msg_log);
713         OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
714                                               &pgDesc[0],
715                                               2,
716                                               &program_options,
717                                               msg_log, &sizeof_msg_log,
718                                               &shader_groups[shader_groups.size() - 2]));
719         //if (sizeof_msg_log > 1)
720         //    printf ("Creating 'shader' group for group '%s':\n%s\n", group_name.c_str(), msg_log);
721     }
722 
723 
724     OptixPipelineLinkOptions pipeline_link_options;
725     pipeline_link_options.maxTraceDepth          = 1;
726     pipeline_link_options.debugLevel             = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
727 #if (OPTIX_VERSION < 70100)
728     pipeline_link_options.overrideUsesMotionBlur = false;
729 #endif
730 
731     // Build string-table "library"
732     nvrtcProgram str_lib;
733 
734     auto extractNamespaces = [] (const OIIO::ustring &s) {
735         const char *str = s.c_str();
736         std::vector<std::string> ns;
737         do {
738             const char *begin = str;
739             // get to first ':'
740             while (*str != ':' && *str)
741                 str++;
742             ns.push_back (std::string (begin, str));
743             // advance to second ':'
744             if (*str && *str == ':')
745                 str++;
746         } while (*str++ != 0);
747         return ns;
748     };
749 
750     std::stringstream strlib_ss;
751 
752     strlib_ss << "// so things name-mangle properly\n";
753     strlib_ss << "struct DeviceString {\n";
754     strlib_ss << "   const char *data;\n";
755     strlib_ss << "};\n";
756     strlib_ss << "\n";
757 
758     // write out all the global strings
759     for (auto &&gvar : m_globals_map) {
760         std::vector<std::string> var_ns = extractNamespaces(gvar.first);
761 
762         // build namespace
763         for (size_t i = 0; i < var_ns.size() - 1; i++)
764             strlib_ss << "namespace " << var_ns[i] << " {\n";
765 
766         strlib_ss << "__device__ DeviceString " << var_ns.back() << " = { (const char *)" << gvar.second << "};\n";
767         // close namespace up
768         for (size_t i = 0; i < var_ns.size() - 1; i++)
769             strlib_ss << "}\n";
770     }
771 
772     strlib_ss << "extern \"C\" __global__ void __direct_callable__strlib_dummy(int *j)\n";
773     strlib_ss << "{\n";
774     strlib_ss << "   // must have a __direct_callable__ function for the module to compile\n";
775     strlib_ss << "}\n";
776 
777     // XXX: Should this move to compute_60 (compute_35-compute_50 is now deprecated)
778     const char *cuda_compile_options[] = { "--gpu-architecture=compute_35"  ,
779                                            "--use_fast_math"                ,
780                                            "-dc"                            ,
781 #if OSL_CPLUSPLUS_VERSION >= 14
782                                            "--std=c++14"
783 #else
784                                            "--std=c++11"
785 #endif
786                                          };
787 
788     int num_compile_flags = int(sizeof(cuda_compile_options) / sizeof(cuda_compile_options[0]));
789     size_t str_lib_size, cuda_log_size;
790 
791 
792     std::string cuda_string = strlib_ss.str();
793 
794     NVRTC_CHECK (nvrtcCreateProgram (&str_lib,
795                                      cuda_string.c_str(),
796                                      "cuda_strng_library",
797                                      0,         // number of headers
798                                      nullptr,   // header paths
799                                      nullptr)); // header files
800     nvrtcResult compileResult = nvrtcCompileProgram (str_lib,  num_compile_flags, cuda_compile_options);
801     if (compileResult != NVRTC_SUCCESS) {
802         NVRTC_CHECK (nvrtcGetProgramLogSize (str_lib, &cuda_log_size));
803         std::vector<char> cuda_log(cuda_log_size+1);
804         NVRTC_CHECK (nvrtcGetProgramLog (str_lib, cuda_log.data()));
805         cuda_log.back() = 0;
806         errhandler().error ("nvrtcCompileProgram failure for:\n%s\n"
807                             "====================================\n"
808                             "%s\n", cuda_string.c_str(), cuda_log.data());
809         return false;
810     }
811 
812 
813     NVRTC_CHECK (nvrtcGetPTXSize (str_lib, &str_lib_size));
814     std::vector<char> str_lib_ptx (str_lib_size);
815     NVRTC_CHECK (nvrtcGetPTX (str_lib, str_lib_ptx.data()));
816     NVRTC_CHECK (nvrtcDestroyProgram (&str_lib));
817 
818     std::string strlib_string (str_lib_ptx.begin(), str_lib_ptx.end());
819 
820     OptixModule strlib_module;
821     sizeof_msg_log = sizeof(msg_log);
822     OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
823                                            &module_compile_options,
824                                            &pipeline_compile_options,
825                                            str_lib_ptx.data(),
826                                            str_lib_ptx.size(),
827                                            msg_log, &sizeof_msg_log,
828                                            &strlib_module));
829     //if (sizeof_msg_log > 1)
830     //    printf ("Creating module from string-library PTX:\n%s\n", msg_log);
831 
832     OptixProgramGroupDesc strlib_pg_desc = {};
833     strlib_pg_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
834     strlib_pg_desc.callables.moduleDC              = strlib_module;
835     strlib_pg_desc.callables.entryFunctionNameDC   = "__direct_callable__strlib_dummy";
836     strlib_pg_desc.callables.moduleCC              = 0;
837     strlib_pg_desc.callables.entryFunctionNameCC   = nullptr;
838 
839     OptixProgramGroup strlib_group;
840 
841     sizeof_msg_log = sizeof(msg_log);
842     OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
843                                           &strlib_pg_desc,
844                                           1,
845                                           &program_options,
846                                           msg_log, &sizeof_msg_log,
847                                           &strlib_group));
848     //if (sizeof_msg_log > 1)
849     //    printf ("Creating program group for string-library:\n%s\n", msg_log);
850 
851     // Set up OptiX pipeline
852     std::vector<OptixProgramGroup> final_groups = {
853          strlib_group,     // string globals
854          raygen_group,
855          miss_group
856     };
857 
858     if (scene.quads.size() > 0)
859         final_groups.push_back (quad_hitgroup);
860     if (scene.spheres.size() > 0)
861         final_groups.push_back (sphere_hitgroup);
862 
863     final_groups.push_back (quad_fillSG_dc);
864     final_groups.push_back (sphere_fillSG_dc);
865 
866     // append the shader groups to our "official" list of program groups
867     final_groups.insert (final_groups.end(), shader_groups.begin(), shader_groups.end());
868 
869     sizeof_msg_log = sizeof(msg_log);
870     OPTIX_CHECK (optixPipelineCreate (m_optix_ctx,
871                                       &pipeline_compile_options,
872                                       &pipeline_link_options,
873                                       final_groups.data(),
874                                       int(final_groups.size()),
875                                       msg_log, &sizeof_msg_log,
876                                       &m_optix_pipeline));
877     //if (sizeof_msg_log > 1)
878     //    printf ("Creating optix pipeline:\n%s\n", msg_log);
879 
880     // Set the pipeline stack size
881     OptixStackSizes stack_sizes = {};
882     for( OptixProgramGroup& program_group : final_groups )
883         OPTIX_CHECK (optixUtilAccumulateStackSizes (program_group, &stack_sizes));
884 
885     uint32_t max_trace_depth = 1;
886     uint32_t max_cc_depth    = 1;
887     uint32_t max_dc_depth    = 1;
888     uint32_t direct_callable_stack_size_from_traversal;
889     uint32_t direct_callable_stack_size_from_state;
890     uint32_t continuation_stack_size;
891     OPTIX_CHECK (optixUtilComputeStackSizes (&stack_sizes,
892                                              max_trace_depth,
893                                              max_cc_depth,
894                                              max_dc_depth,
895                                              &direct_callable_stack_size_from_traversal,
896                                              &direct_callable_stack_size_from_state,
897                                              &continuation_stack_size ) );
898 
899     const uint32_t max_traversal_depth = 1;
900     OPTIX_CHECK (optixPipelineSetStackSize (m_optix_pipeline,
901                                             direct_callable_stack_size_from_traversal,
902                                             direct_callable_stack_size_from_state,
903                                             continuation_stack_size,
904                                             max_traversal_depth ));
905 
906     // Build OptiX Shader Binding Table (SBT)
907 
908     std::vector<GenericRecord>   sbt_records(final_groups.size());
909 
910     CUdeviceptr d_raygen_record;
911     CUdeviceptr d_miss_record;
912     CUdeviceptr d_hitgroup_records;
913     CUdeviceptr d_callable_records;
914 
915     std::vector<CUdeviceptr> d_sbt_records(final_groups.size());
916 
917     for (size_t i = 0; i < final_groups.size(); i++) {
918         OPTIX_CHECK (optixSbtRecordPackHeader (final_groups[i], &sbt_records[i]));
919     }
920 
921     int       sbtIndex       = 3;
922     const int hitRecordStart = sbtIndex;
923 
924     // Copy geometry data to appropriate SBT records
925     if (scene.quads.size() > 0 ) {
926         sbt_records[sbtIndex].data        = reinterpret_cast<void *>(d_quads_list);
927         sbt_records[sbtIndex].sbtGeoIndex = 0;   // DC index for filling in Quad ShaderGlobals
928         ++sbtIndex;
929     }
930 
931     if (scene.spheres.size() > 0 ) {
932         sbt_records[sbtIndex].data        = reinterpret_cast<void *>(d_spheres_list);
933         sbt_records[sbtIndex].sbtGeoIndex = 1;   // DC index for filling in Sphere ShaderGlobals
934         ++sbtIndex;
935     }
936 
937     const int callableRecordStart = sbtIndex;
938 
939     // Copy geometry data to our DC (direct-callable) funcs that fill ShaderGlobals
940     sbt_records[sbtIndex++].data = reinterpret_cast<void *>(d_quads_list);
941     sbt_records[sbtIndex++].data = reinterpret_cast<void *>(d_spheres_list);
942 
943     const int nshaders   = int(shader_groups.size());
944     const int nhitgroups = (scene.quads.size() > 0) + (scene.spheres.size() > 0);
945 
946     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_raygen_record)    ,     sizeof(GenericRecord)));
947     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_miss_record)      ,     sizeof(GenericRecord)));
948     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_hitgroup_records) , nhitgroups * sizeof(GenericRecord)));
949     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_callable_records) , (2 + nshaders) * sizeof(GenericRecord)));
950 
951     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_raygen_record)   , &sbt_records[1],     sizeof(GenericRecord), cudaMemcpyHostToDevice));
952     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_miss_record)     , &sbt_records[2],     sizeof(GenericRecord), cudaMemcpyHostToDevice));
953     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_hitgroup_records), &sbt_records[hitRecordStart], nhitgroups * sizeof(GenericRecord), cudaMemcpyHostToDevice));
954     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_callable_records), &sbt_records[callableRecordStart], (2 + nshaders) * sizeof(GenericRecord), cudaMemcpyHostToDevice));
955 
956     // Looks like OptixShadingTable needs to be filled out completely
957     m_optix_sbt.raygenRecord                 = d_raygen_record;
958     m_optix_sbt.missRecordBase               = d_miss_record;
959     m_optix_sbt.missRecordStrideInBytes      = sizeof(GenericRecord);
960     m_optix_sbt.missRecordCount              = 1;
961     m_optix_sbt.hitgroupRecordBase           = d_hitgroup_records;
962     m_optix_sbt.hitgroupRecordStrideInBytes  = sizeof(GenericRecord);
963     m_optix_sbt.hitgroupRecordCount          = nhitgroups;
964     m_optix_sbt.callablesRecordBase          = d_callable_records;
965     m_optix_sbt.callablesRecordStrideInBytes = sizeof(GenericRecord);
966     m_optix_sbt.callablesRecordCount         = 2 + nshaders;
967 
968     // Pipeline has been created so we can clean some things up
969     for (auto &&i : final_groups) {
970         optixProgramGroupDestroy(i);
971     }
972     for (auto &&i : modules) {
973         optixModuleDestroy(i);
974     }
975     modules.clear();
976 
977 
978 #endif //#if (OPTIX_VERSION < 70000)
979 
980 #endif
981     return true;
982 }
983 
984 bool
finalize_scene()985 OptixRaytracer::finalize_scene()
986 {
987 #ifdef OSL_USE_OPTIX
988 
989 #if (OPTIX_VERSION < 70000)
990     make_optix_materials();
991 
992     // Create a GeometryGroup to contain the scene geometry
993     optix::GeometryGroup geom_group = m_optix_ctx->createGeometryGroup();
994 
995     m_optix_ctx["top_object"  ]->set (geom_group);
996     m_optix_ctx["top_shadower"]->set (geom_group);
997 
998     // NB: Since the scenes in the test suite consist of only a few primitives,
999     //     using 'NoAccel' instead of 'Trbvh' might yield a slight performance
1000     //     improvement. For more complex scenes (e.g., scenes with meshes),
1001     //     using 'Trbvh' is recommended to achieve maximum performance.
1002     geom_group->setAcceleration (m_optix_ctx->createAcceleration ("Trbvh"));
1003 
1004     // Translate the primitives parsed from the scene description into OptiX scene
1005     // objects
1006     for (const auto& sphere : scene.spheres) {
1007         optix::Geometry sphere_geom = m_optix_ctx->createGeometry();
1008         sphere.setOptixVariables (sphere_geom, sphere_bounds, sphere_intersect);
1009 
1010         optix::GeometryInstance sphere_gi = m_optix_ctx->createGeometryInstance (
1011             sphere_geom, &scene.optix_mtls[sphere.shaderid()], &scene.optix_mtls[sphere.shaderid()]+1);
1012 
1013         geom_group->addChild (sphere_gi);
1014     }
1015 
1016     for (const auto& quad : scene.quads) {
1017         optix::Geometry quad_geom = m_optix_ctx->createGeometry();
1018         quad.setOptixVariables (quad_geom, quad_bounds, quad_intersect);
1019 
1020         optix::GeometryInstance quad_gi = m_optix_ctx->createGeometryInstance (
1021             quad_geom, &scene.optix_mtls[quad.shaderid()], &scene.optix_mtls[quad.shaderid()]+1);
1022 
1023         geom_group->addChild (quad_gi);
1024     }
1025 
1026     // Set the camera variables on the OptiX Context, to be used by the ray gen program
1027     m_optix_ctx["eye" ]->setFloat (vec3_to_float3 (camera.eye));
1028     m_optix_ctx["dir" ]->setFloat (vec3_to_float3 (camera.dir));
1029     m_optix_ctx["cx"  ]->setFloat (vec3_to_float3 (camera.cx));
1030     m_optix_ctx["cy"  ]->setFloat (vec3_to_float3 (camera.cy));
1031     m_optix_ctx["invw"]->setFloat (camera.invw);
1032     m_optix_ctx["invh"]->setFloat (camera.invh);
1033 
1034     // Create the output buffer
1035     optix::Buffer buffer = m_optix_ctx->createBuffer(RT_BUFFER_OUTPUT, RT_FORMAT_FLOAT3, camera.xres, camera.yres);
1036     m_optix_ctx["output_buffer"]->set(buffer);
1037 
1038     m_optix_ctx->validate();
1039 
1040 #else //#if (OPTIX_VERSION < 70000)
1041 
1042     // Build acceleration structures
1043     OptixAccelBuildOptions accelOptions;
1044     OptixBuildInput buildInputs[2];
1045 
1046     memset(&accelOptions, 0, sizeof(OptixAccelBuildOptions));
1047     accelOptions.buildFlags = OPTIX_BUILD_FLAG_NONE;
1048     accelOptions.operation  = OPTIX_BUILD_OPERATION_BUILD;
1049     accelOptions.motionOptions.numKeys = 0;
1050     memset(buildInputs, 0, sizeof(OptixBuildInput) * 2);
1051 
1052     // Set up quads input
1053     void* d_quadsAabb;
1054     std::vector<OptixAabb> quadsAabb;
1055     std::vector<QuadParams> quadsParams;
1056     quadsAabb.reserve(scene.quads.size());
1057     quadsParams.reserve(scene.quads.size());
1058     std::vector<int> quadShaders;
1059     quadShaders.reserve(scene.quads.size());
1060     for (const auto& quad : scene.quads) {
1061        OptixAabb aabb;
1062        quad.getBounds(aabb.minX, aabb.minY, aabb.minZ,
1063                       aabb.maxX, aabb.maxY, aabb.maxZ);
1064        quadsAabb.push_back(aabb);
1065        QuadParams quad_params;
1066        quad.setOptixVariables(&quad_params);
1067        quadsParams.push_back(quad_params);
1068     }
1069     // Copy Quads bounding boxes to cuda device
1070     CUDA_CHECK (cudaMalloc (&d_quadsAabb,                   sizeof(OptixAabb) * scene.quads.size()));
1071     CUDA_CHECK (cudaMemcpy ( d_quadsAabb, quadsAabb.data(), sizeof(OptixAabb) * scene.quads.size(), cudaMemcpyHostToDevice));
1072 
1073     // Copy Quads to cuda device
1074     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_quads_list)  ,                     sizeof(QuadParams  ) * scene.quads.size()));
1075     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void  *>( d_quads_list)  , quadsParams.data(), sizeof(QuadParams  ) * scene.quads.size(), cudaMemcpyHostToDevice));
1076 
1077     // Fill in Quad shaders
1078     CUdeviceptr d_quadsIndexOffsetBuffer;
1079     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_quadsIndexOffsetBuffer), scene.quads.size() * sizeof(int)));
1080 
1081     int numBuildInputs = 0;
1082 
1083     unsigned int  quadSbtRecord;
1084     quadSbtRecord = OPTIX_GEOMETRY_FLAG_NONE;
1085     if (scene.quads.size() > 0) {
1086 #if (OPTIX_VERSION < 70100)
1087         OptixBuildInputCustomPrimitiveArray& quadsInput = buildInputs[numBuildInputs].aabbArray;
1088 #else
1089         OptixBuildInputCustomPrimitiveArray& quadsInput = buildInputs[numBuildInputs].customPrimitiveArray;
1090 #endif
1091         buildInputs[numBuildInputs].type       = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1092         quadsInput.flags                       = &quadSbtRecord;
1093         quadsInput.aabbBuffers                 = reinterpret_cast<CUdeviceptr *>(&d_quadsAabb);
1094         quadsInput.numPrimitives               = scene.quads.size();
1095         quadsInput.numSbtRecords               = 1;
1096         quadsInput.sbtIndexOffsetSizeInBytes   = sizeof(int);
1097         quadsInput.sbtIndexOffsetStrideInBytes = sizeof(int);
1098         quadsInput.sbtIndexOffsetBuffer        = 0; // d_quadsIndexOffsetBuffer;
1099         ++numBuildInputs;
1100     }
1101 
1102     //  Set up spheres input
1103     void* d_spheresAabb;
1104     std::vector<OptixAabb> spheresAabb;
1105     std::vector<SphereParams> spheresParams;
1106     spheresAabb.reserve(scene.spheres.size());
1107     spheresParams.reserve(scene.spheres.size());
1108     std::vector<int> sphereShaders;
1109     sphereShaders.reserve(scene.spheres.size());
1110     for (const auto& sphere : scene.spheres) {
1111        OptixAabb aabb;
1112        sphere.getBounds(aabb.minX, aabb.minY, aabb.minZ,
1113                         aabb.maxX, aabb.maxY, aabb.maxZ);
1114        spheresAabb.push_back(aabb);
1115 
1116        SphereParams sphere_params;
1117        sphere.setOptixVariables(&sphere_params);
1118        spheresParams.push_back(sphere_params);
1119     }
1120     // Copy Spheres bounding boxes to cuda device
1121     CUDA_CHECK (cudaMalloc (&d_spheresAabb,                     sizeof(OptixAabb) * scene.spheres.size()));
1122     CUDA_CHECK (cudaMemcpy ( d_spheresAabb, spheresAabb.data(), sizeof(OptixAabb) * scene.spheres.size(), cudaMemcpyHostToDevice));
1123 
1124     // Copy Spheres to cuda device
1125     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_spheres_list),                       sizeof(SphereParams) * scene.spheres.size()));
1126     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void  *>( d_spheres_list), spheresParams.data(), sizeof(SphereParams) * scene.spheres.size(), cudaMemcpyHostToDevice));
1127 
1128     // Fill in Sphere shaders
1129     CUdeviceptr d_spheresIndexOffsetBuffer;
1130     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_spheresIndexOffsetBuffer), scene.spheres.size() * sizeof(int)));
1131 
1132     unsigned int sphereSbtRecord;
1133     sphereSbtRecord = OPTIX_GEOMETRY_FLAG_NONE;
1134     if (scene.spheres.size() > 0) {
1135 #if (OPTIX_VERSION < 70100)
1136         OptixBuildInputCustomPrimitiveArray& spheresInput = buildInputs[numBuildInputs].aabbArray;
1137 #else
1138         OptixBuildInputCustomPrimitiveArray& spheresInput = buildInputs[numBuildInputs].customPrimitiveArray;
1139 #endif
1140         buildInputs[numBuildInputs].type         = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1141         spheresInput.flags                       = &sphereSbtRecord;
1142         spheresInput.aabbBuffers                 = reinterpret_cast<CUdeviceptr *>(&d_spheresAabb);
1143         spheresInput.numPrimitives               = scene.spheres.size();
1144         spheresInput.numSbtRecords               = 1;
1145         spheresInput.sbtIndexOffsetSizeInBytes   = sizeof(int);
1146         spheresInput.sbtIndexOffsetStrideInBytes = sizeof(int);
1147         spheresInput.sbtIndexOffsetBuffer        = 0; // d_spheresIndexOffsetBuffer;
1148         ++numBuildInputs;
1149     }
1150 
1151     // Compute memory usage by acceleration structures
1152     OptixAccelBufferSizes bufferSizes;
1153     optixAccelComputeMemoryUsage(m_optix_ctx, &accelOptions, buildInputs, numBuildInputs, &bufferSizes);
1154 
1155     void *d_output, *d_temp;
1156     CUDA_CHECK (cudaMalloc (&d_output, bufferSizes.outputSizeInBytes));
1157     CUDA_CHECK (cudaMalloc (&d_temp  , bufferSizes.tempSizeInBytes  ));
1158 
1159     // Get the bounding box for the AS
1160     void *d_aabb;
1161     CUDA_CHECK (cudaMalloc (&d_aabb, sizeof(OptixAabb)));
1162 
1163     OptixAccelEmitDesc property;
1164     property.type = OPTIX_PROPERTY_TYPE_AABBS;
1165     property.result = (CUdeviceptr) d_aabb;
1166 
1167     OPTIX_CHECK (optixAccelBuild (m_optix_ctx,
1168                                   m_cuda_stream,
1169                                   &accelOptions,
1170                                   buildInputs,
1171                                   numBuildInputs,
1172                                   reinterpret_cast<CUdeviceptr>(d_temp),   bufferSizes.tempSizeInBytes,
1173                                   reinterpret_cast<CUdeviceptr>(d_output), bufferSizes.outputSizeInBytes,
1174                                   &m_travHandle,
1175                                   &property,
1176                                   1));
1177 
1178     OptixAabb h_aabb;
1179     CUDA_CHECK (cudaMemcpy ((void*)&h_aabb, reinterpret_cast<void *>(d_aabb), sizeof(OptixAabb), cudaMemcpyDeviceToHost));
1180     cudaFree (d_aabb);
1181 
1182     // Sanity check the AS bounds
1183     // printf ("AABB min: [%0.6f, %0.6f, %0.6f], max: [%0.6f, %0.6f, %0.6f]\n",
1184     //         h_aabb.minX, h_aabb.minY, h_aabb.minZ,
1185     //         h_aabb.maxX, h_aabb.maxY, h_aabb.maxZ );
1186 
1187     make_optix_materials();
1188 
1189 #endif //#if (OPTIX_VERSION < 70000)
1190 #endif //#ifdef OSL_USE_OPTIX
1191     return true;
1192 }
1193 
1194 
1195 
1196 /// Return true if the texture handle (previously returned by
1197 /// get_texture_handle()) is a valid texture that can be subsequently
1198 /// read or sampled.
1199 bool
good(TextureHandle * handle OSL_MAYBE_UNUSED)1200 OptixRaytracer::good(TextureHandle *handle OSL_MAYBE_UNUSED)
1201 {
1202 #ifdef OSL_USE_OPTIX
1203 
1204 #if (OPTIX_VERSION < 70000)
1205     return intptr_t(handle) != RT_TEXTURE_ID_NULL;
1206 #else
1207     return handle != nullptr;
1208 #endif
1209 
1210 #else
1211     return false;
1212 #endif
1213 }
1214 
1215 
1216 
1217 /// Given the name of a texture, return an opaque handle that can be
1218 /// used with texture calls to avoid the name lookups.
1219 RendererServices::TextureHandle*
get_texture_handle(ustring filename OSL_MAYBE_UNUSED,ShadingContext * shading_context OSL_MAYBE_UNUSED)1220 OptixRaytracer::get_texture_handle (ustring filename OSL_MAYBE_UNUSED,
1221                                     ShadingContext* shading_context OSL_MAYBE_UNUSED)
1222 {
1223 #ifdef OSL_USE_OPTIX
1224 
1225 #if (OPTIX_VERSION < 70000)
1226     auto itr = m_samplers.find(filename);
1227     if (itr == m_samplers.end()) {
1228         optix::TextureSampler sampler = context()->createTextureSampler();
1229         sampler->setWrapMode(0, RT_WRAP_REPEAT);
1230         sampler->setWrapMode(1, RT_WRAP_REPEAT);
1231         sampler->setWrapMode(2, RT_WRAP_REPEAT);
1232 
1233         sampler->setFilteringModes(RT_FILTER_LINEAR, RT_FILTER_LINEAR, RT_FILTER_NONE);
1234         sampler->setIndexingMode(false ? RT_TEXTURE_INDEX_ARRAY_INDEX : RT_TEXTURE_INDEX_NORMALIZED_COORDINATES);
1235         sampler->setReadMode(RT_TEXTURE_READ_NORMALIZED_FLOAT);
1236         sampler->setMaxAnisotropy(1.0f);
1237 
1238 
1239         OIIO::ImageBuf image;
1240         if (!image.init_spec(filename, 0, 0)) {
1241             errhandler().error ("Could not load: %s", filename);
1242             return (TextureHandle*)(intptr_t(RT_TEXTURE_ID_NULL));
1243         }
1244         int nchan = image.spec().nchannels;
1245 
1246         OIIO::ROI roi = OIIO::get_roi_full(image.spec());
1247         int width = roi.width(), height = roi.height();
1248         std::vector<float> pixels(width * height * nchan);
1249         image.get_pixels(roi, OIIO::TypeDesc::FLOAT, pixels.data());
1250 
1251         optix::Buffer buffer = context()->createBuffer(RT_BUFFER_INPUT, RT_FORMAT_FLOAT4, width, height);
1252 
1253         float* device_ptr = static_cast<float*>(buffer->map());
1254         unsigned int pixel_idx = 0;
1255         for (int y = 0; y < height; ++y) {
1256             for (int x = 0; x < width; ++x) {
1257                 memcpy(device_ptr, &pixels[pixel_idx], sizeof(float) * nchan);
1258                 device_ptr += 4;
1259                 pixel_idx += nchan;
1260             }
1261         }
1262         buffer->unmap();
1263         sampler->setBuffer(buffer);
1264         itr = m_samplers.emplace(std::move(filename), std::move(sampler)).first;
1265 
1266     }
1267     return (RendererServices::TextureHandle*) intptr_t(itr->second->getId());
1268 
1269 #else //#if (OPTIX_VERSION < 70000)
1270 
1271     auto itr = m_samplers.find(filename);
1272     if (itr == m_samplers.end()) {
1273 
1274         // Open image
1275         OIIO::ImageBuf image;
1276         if (!image.init_spec(filename, 0, 0)) {
1277             errhandler().error ("Could not load: %s", filename);
1278             return (TextureHandle*)(intptr_t(nullptr));
1279         }
1280 
1281         OIIO::ROI roi = OIIO::get_roi_full(image.spec());
1282         int32_t width = roi.width(), height = roi.height();
1283         std::vector<float> pixels(width * height * 4);
1284 
1285         for (int j = 0; j < height; j++) {
1286             for (int i = 0; i < width; i++) {
1287                 image.getpixel(i, j, 0, &pixels[((j*width) + i) * 4 + 0]);
1288             }
1289         }
1290         cudaResourceDesc res_desc = {};
1291 
1292         // hard-code textures to 4 channels
1293         int32_t pitch  = width * 4 * sizeof(float);
1294         cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
1295 
1296         cudaArray_t   pixelArray;
1297         CUDA_CHECK (cudaMallocArray (&pixelArray,
1298                                      &channel_desc,
1299                                      width,height));
1300 
1301         CUDA_CHECK (cudaMemcpy2DToArray (pixelArray,
1302                                          0, 0,
1303                                          pixels.data(),
1304                                          pitch,pitch,height,
1305                                          cudaMemcpyHostToDevice));
1306 
1307         res_desc.resType          = cudaResourceTypeArray;
1308         res_desc.res.array.array  = pixelArray;
1309 
1310         cudaTextureDesc tex_desc     = {};
1311         tex_desc.addressMode[0]      = cudaAddressModeWrap;
1312         tex_desc.addressMode[1]      = cudaAddressModeWrap;
1313         tex_desc.filterMode          = cudaFilterModeLinear;
1314         tex_desc.readMode            = cudaReadModeElementType; //cudaReadModeNormalizedFloat;
1315         tex_desc.normalizedCoords    = 1;
1316         tex_desc.maxAnisotropy       = 1;
1317         tex_desc.maxMipmapLevelClamp = 99;
1318         tex_desc.minMipmapLevelClamp = 0;
1319         tex_desc.mipmapFilterMode    = cudaFilterModePoint;
1320         tex_desc.borderColor[0]      = 1.0f;
1321         tex_desc.sRGB                = 0;
1322 
1323         // Create texture object
1324         cudaTextureObject_t cuda_tex = 0;
1325         CUDA_CHECK (cudaCreateTextureObject (&cuda_tex, &res_desc, &tex_desc, nullptr));
1326         itr = m_samplers.emplace (std::move(filename), std::move(cuda_tex)).first;
1327     }
1328     return reinterpret_cast<RendererServices::TextureHandle *>(itr->second);
1329 
1330 #endif //#if (OPTIX_VERSION < 70000)
1331 
1332 #else
1333     return nullptr;
1334 #endif
1335 }
1336 
1337 
1338 
1339 void
prepare_render()1340 OptixRaytracer::prepare_render()
1341 {
1342 #ifdef OSL_USE_OPTIX
1343     // Set up the OptiX Context
1344     init_optix_context (camera.xres, camera.yres);
1345 
1346     // Set up the OptiX scene graph
1347     finalize_scene ();
1348 #endif
1349 }
1350 
1351 
1352 
1353 void
warmup()1354 OptixRaytracer::warmup()
1355 {
1356 #ifdef OSL_USE_OPTIX
1357     // Perform a tiny launch to warm up the OptiX context
1358 #if (OPTIX_VERSION < 70000)
1359     m_optix_ctx->launch (0, 1, 1);
1360 #else
1361     OPTIX_CHECK (optixLaunch (m_optix_pipeline,
1362                               m_cuda_stream,
1363                               d_launch_params,
1364                               sizeof(RenderParams),
1365                               &m_optix_sbt,
1366                               0, 0, 1));
1367     CUDA_SYNC_CHECK();
1368 #endif
1369 #endif
1370 }
1371 
1372 
1373 
1374 void
render(int xres OSL_MAYBE_UNUSED,int yres OSL_MAYBE_UNUSED)1375 OptixRaytracer::render(int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED)
1376 {
1377 #ifdef OSL_USE_OPTIX
1378 #if (OPTIX_VERSION < 70000)
1379     m_optix_ctx->launch (0, xres, yres);
1380 #else
1381     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_output_buffer), xres * yres * 4 * sizeof(float)));
1382     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_launch_params), sizeof(RenderParams)));
1383 
1384 
1385     m_xres = xres;
1386     m_yres = yres;
1387 
1388     RenderParams params;
1389     params.eye.x = camera.eye.x;
1390     params.eye.y = camera.eye.y;
1391     params.eye.z = camera.eye.z;
1392     params.dir.x = camera.dir.x;
1393     params.dir.y = camera.dir.y;
1394     params.dir.z = camera.dir.z;
1395     params.cx.x = camera.cx.x;
1396     params.cx.y = camera.cx.y;
1397     params.cx.z = camera.cx.z;
1398     params.cy.x = camera.cy.x;
1399     params.cy.y = camera.cy.y;
1400     params.cy.z = camera.cy.z;
1401     params.invw = 1.0f / m_xres;
1402     params.invh = 1.0f / m_yres;
1403     params.output_buffer = d_output_buffer;
1404     params.traversal_handle = m_travHandle;
1405 
1406     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_launch_params), &params, sizeof(RenderParams), cudaMemcpyHostToDevice));
1407 
1408     OPTIX_CHECK (optixLaunch (m_optix_pipeline,
1409                               m_cuda_stream,
1410                               d_launch_params,
1411                               sizeof(RenderParams),
1412                               &m_optix_sbt,
1413                               xres, yres, 1));
1414     CUDA_SYNC_CHECK();
1415 #endif
1416 #endif
1417 }
1418 
1419 
1420 
1421 void
finalize_pixel_buffer()1422 OptixRaytracer::finalize_pixel_buffer ()
1423 {
1424 #ifdef OSL_USE_OPTIX
1425     std::string buffer_name = "output_buffer";
1426 #if (OPTIX_VERSION < 70000)
1427     const void* buffer_ptr = m_optix_ctx[buffer_name]->getBuffer()->map();
1428     if (! buffer_ptr)
1429         errhandler().severe ("Unable to map buffer %s", buffer_name);
1430     pixelbuf.set_pixels (OIIO::ROI::All(), OIIO::TypeFloat, buffer_ptr);
1431 #else
1432     std::vector<float> tmp_buff(m_xres * m_yres * 3);
1433     CUDA_CHECK (cudaMemcpy (tmp_buff.data(), reinterpret_cast<void *>(d_output_buffer), m_xres * m_yres * 3 * sizeof(float), cudaMemcpyDeviceToHost));
1434     pixelbuf.set_pixels (OIIO::ROI::All(), OIIO::TypeFloat, tmp_buff.data());
1435 #endif
1436 #endif
1437 }
1438 
1439 
1440 
1441 void
clear()1442 OptixRaytracer::clear()
1443 {
1444     shaders().clear();
1445 #ifdef OSL_USE_OPTIX
1446 #if (OPTIX_VERSION < 70000)
1447     if (m_optix_ctx) {
1448         m_optix_ctx->destroy();
1449         m_optix_ctx = nullptr;
1450     }
1451 #else
1452     OPTIX_CHECK (optixDeviceContextDestroy (m_optix_ctx));
1453     m_optix_ctx = 0;
1454 #endif
1455 #endif
1456 }
1457 
1458 OSL_NAMESPACE_EXIT
1459