1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4 
5 #include <vector>
6 
7 #include <OpenImageIO/filesystem.h>
8 #include <OpenImageIO/sysutil.h>
9 
10 #include <OSL/oslconfig.h>
11 
12 #include "optixgridrender.h"
13 #include "render_params.h"
14 
15 #ifdef OSL_USE_OPTIX
16 #  if (OPTIX_VERSION >= 70000)
17 #  include <optix_function_table_definition.h>
18 #  include <optix_stack_size.h>
19 #  include <optix_stubs.h>
20 #  include <cuda.h>
21 #  include <nvrtc.h>
22 #  endif
23 #endif
24 
25 // The pre-compiled renderer support library LLVM bitcode is embedded
26 // into the executable and made available through these variables.
27 extern int rend_llvm_compiled_ops_size;
28 extern unsigned char rend_llvm_compiled_ops_block[];
29 
30 
31 
32 OSL_NAMESPACE_ENTER
33 
34 
35 #if (OPTIX_VERSION >= 70000)
36 
37 #define CUDA_CHECK(call)                                                  \
38 {                                                                         \
39     cudaError_t error = call;                                             \
40     if (error != cudaSuccess)                                             \
41     {                                                                     \
42         std::stringstream ss;                                             \
43         ss << "CUDA call (" << #call << " ) failed with error: '"         \
44            << cudaGetErrorString( error )                                 \
45            << "' (" __FILE__ << ":" << __LINE__ << ")\n";                 \
46            fprintf (stderr, "[CUDA ERROR]  %s", ss.str().c_str() );       \
47         exit(1);                                                          \
48     }                                                                     \
49 }
50 
51 #define NVRTC_CHECK(call)                                                 \
52 {                                                                         \
53     nvrtcResult error = call;                                             \
54     if (error != NVRTC_SUCCESS)                                           \
55     {                                                                     \
56         std::stringstream ss;                                             \
57         ss << "NVRTC call (" << #call << " ) failed with error: '"        \
58            << nvrtcGetErrorString( error )                                \
59            << "' (" __FILE__ << ":" << __LINE__ << ")\n";                 \
60            fprintf (stderr, "[NVRTC ERROR]  %s", ss.str().c_str() );      \
61         exit(1);                                                          \
62     }                                                                     \
63 }
64 
65 
66 #define OPTIX_CHECK(call)                                                 \
67 {                                                                         \
68     OptixResult res = call;                                               \
69     if (res != OPTIX_SUCCESS)                                             \
70     {                                                                     \
71         std::stringstream ss;                                             \
72         ss  << "Optix call '" << #call << "' failed with error: "         \
73             << optixGetErrorName( res )                                   \
74             << " (" __FILE__ ":"   << __LINE__ << ")\n";                  \
75         fprintf (stderr,"[OPTIX ERROR]  %s", ss.str().c_str() );          \
76         exit(1);                                                          \
77     }                                                                     \
78 }
79 #endif
80 
81 #define CUDA_SYNC_CHECK()                                               \
82 {                                                                       \
83     cudaDeviceSynchronize();                                            \
84     cudaError_t error = cudaGetLastError();                             \
85     if (error != cudaSuccess) {                                         \
86         fprintf ( stderr, "error (%s: line %d): %s\n", __FILE__, __LINE__, cudaGetErrorString( error ) ); \
87         exit(1);                                                        \
88     }                                                                   \
89 }
90 
91 #ifdef OSL_USE_OPTIX
92 #if (OPTIX_VERSION >= 70000)
context_log_cb(unsigned int level,const char * tag,const char * message,void *)93 static void context_log_cb (unsigned int level, const char* tag, const char* message, void* /*cbdata */)
94 {
95 //    std::cerr << "[" << std::setw( 2 ) << level << "][" << std::setw( 12 ) << tag << "]: " << message << "\n";
96 }
97 #endif
98 #endif
99 
OptixGridRenderer()100 OptixGridRenderer::OptixGridRenderer ()
101 {
102 #ifdef OSL_USE_OPTIX
103 
104 #if (OPTIX_VERSION < 70000)
105     // Set up the OptiX context
106     m_optix_ctx = optix::Context::create();
107     if (m_optix_ctx->getEnabledDeviceCount() != 1)
108         errhandler().warning ("Only one CUDA device is currently supported");
109 
110     // Set up the string table. This allocates a block of CUDA device memory to
111     // hold all of the static strings used by the OSL shaders. The strings can
112     // be accessed via OptiX variables that hold pointers to the table entries.
113     m_str_table.init(m_optix_ctx);
114 #else
115     // Initialize CUDA
116     cudaFree(0);
117 
118     CUcontext cuCtx = nullptr;  // zero means take the current context
119 
120     OptixDeviceContextOptions ctx_options = {};
121     ctx_options.logCallbackFunction = context_log_cb;
122     ctx_options.logCallbackLevel    = 4;
123 
124     OPTIX_CHECK (optixInit());
125     OPTIX_CHECK (optixDeviceContextCreate (cuCtx, &ctx_options, &m_optix_ctx));
126 
127     CUDA_CHECK (cudaSetDevice (0));
128     CUDA_CHECK (cudaStreamCreate (&m_cuda_stream));
129 
130     // Set up the string table. This allocates a block of CUDA device memory to
131     // hold all of the static strings used by the OSL shaders. The strings can
132     // be accessed via OptiX variables that hold pointers to the table entries.
133     m_str_table.init(m_optix_ctx);
134     // Register all of our string table entries
135     for (auto &&gvar : m_str_table.contents())
136         register_global(gvar.first.c_str(), gvar.second);
137 #endif //#if (OPTIX_VERSION < 70000)
138 
139 #endif //#ifdef OSL_USE_OPTIX
140 }
141 
142 uint64_t
register_global(const std::string & str,uint64_t value)143 OptixGridRenderer::register_global (const std::string &str, uint64_t value)
144 {
145     auto it = m_globals_map.find (ustring(str));
146 
147     if (it != m_globals_map.end()) {
148        return it->second;
149     }
150     m_globals_map[ustring(str)] = value;
151     return value;
152 }
153 
154 bool
fetch_global(const std::string & str,uint64_t * value)155 OptixGridRenderer::fetch_global (const std::string &str, uint64_t *value)
156 {
157     auto it = m_globals_map.find (ustring(str));
158 
159     if (it != m_globals_map.end()) {
160        *value = it->second;
161        return true;
162     }
163     return false;
164 }
165 
166 
167 
168 std::string
load_ptx_file(string_view filename)169 OptixGridRenderer::load_ptx_file (string_view filename)
170 {
171 #ifdef OSL_USE_OPTIX
172     std::vector<std::string> paths = {
173         OIIO::Filesystem::parent_path(OIIO::Sysutil::this_program_path()),
174         PTX_PATH
175     };
176     std::string filepath = OIIO::Filesystem::searchpath_find (filename, paths,
177                                                               false);
178     if (OIIO::Filesystem::exists(filepath)) {
179         std::string ptx_string;
180         if (OIIO::Filesystem::read_text_file (filepath, ptx_string))
181             return ptx_string;
182     }
183 #endif
184     errhandler().severe ("Unable to load %s", filename);
185     return {};
186 }
187 
188 
189 
~OptixGridRenderer()190 OptixGridRenderer::~OptixGridRenderer ()
191 {
192 #ifdef OSL_USE_OPTIX
193     m_str_table.freetable();
194 #if (OPTIX_VERSION < 70000)
195     if (m_optix_ctx)
196         m_optix_ctx->destroy();
197 #else
198     if (m_optix_ctx)
199         OPTIX_CHECK (optixDeviceContextDestroy (m_optix_ctx));
200 #endif
201 #endif
202 }
203 
204 
205 
206 void
init_shadingsys(ShadingSystem * ss)207 OptixGridRenderer::init_shadingsys (ShadingSystem *ss)
208 {
209     shadingsys = ss;
210 
211 #ifdef OSL_USE_OPTIX
212     shadingsys->attribute ("lib_bitcode", {OSL::TypeDesc::UINT8, rend_llvm_compiled_ops_size},
213                            rend_llvm_compiled_ops_block);
214 #endif
215 }
216 
217 
218 
219 bool
init_optix_context(int xres OSL_MAYBE_UNUSED,int yres OSL_MAYBE_UNUSED)220 OptixGridRenderer::init_optix_context (int xres OSL_MAYBE_UNUSED,
221                                        int yres OSL_MAYBE_UNUSED)
222 {
223 #ifdef OSL_USE_OPTIX
224 #if (OPTIX_VERSION < 70000)
225     m_optix_ctx->setRayTypeCount (2);
226     m_optix_ctx->setEntryPointCount (1);
227     m_optix_ctx->setStackSize (2048);
228     m_optix_ctx->setPrintEnabled (true);
229 
230     // Load the renderer CUDA source and generate PTX for it
231     std::string progName = "optix_grid_renderer.ptx";
232     std::string renderer_ptx = load_ptx_file(progName);
233     if (renderer_ptx.empty()) {
234         errhandler().severe ("Could not find PTX for the raygen program");
235         return false;
236     }
237 
238     // Create the OptiX programs and set them on the optix::Context
239     m_program = m_optix_ctx->createProgramFromPTXString(renderer_ptx, "raygen");
240     m_optix_ctx->setRayGenerationProgram(0, m_program);
241 #endif //#if (OPTIX_VERSION < 70000)
242 #endif
243     return true;
244 }
245 
246 
247 
248 bool
synch_attributes()249 OptixGridRenderer::synch_attributes ()
250 {
251 #ifdef OSL_USE_OPTIX
252 
253 #if (OPTIX_VERSION < 70000)
254     // FIXME -- this is for testing only
255     // Make some device strings to test userdata parameters
256     uint64_t addr1 = register_string ("ud_str_1", "");
257     uint64_t addr2 = register_string ("userdata string", "");
258     m_optix_ctx["test_str_1"]->setUserData (sizeof(char*), &addr1);
259     m_optix_ctx["test_str_2"]->setUserData (sizeof(char*), &addr2);
260 
261     {
262         const char* name = OSL_NAMESPACE_STRING "::pvt::s_color_system";
263 
264         char* colorSys = nullptr;
265         long long cpuDataSizes[2] = {0,0};
266         if (!shadingsys->getattribute("colorsystem", TypeDesc::PTR, (void*)&colorSys) ||
267             !shadingsys->getattribute("colorsystem:sizes", TypeDesc(TypeDesc::LONGLONG,2), (void*)&cpuDataSizes) ||
268             !colorSys || !cpuDataSizes[0]) {
269             errhandler().error ("No colorsystem available.");
270             return false;
271         }
272 
273         auto cpuDataSize = cpuDataSizes[0];
274         auto numStrings = cpuDataSizes[1];
275 
276         // Get the size data-size, minus the ustring size
277         const size_t podDataSize = cpuDataSize - sizeof(StringParam)*numStrings;
278 
279         optix::Buffer buffer = m_optix_ctx->createBuffer(RT_BUFFER_INPUT, RT_FORMAT_USER);
280         if (!buffer) {
281             errhandler().error ("Could not create buffer for '%s'.", name);
282             return false;
283         }
284 
285         // set the element size to char
286         buffer->setElementSize(sizeof(char));
287 
288         // and number of elements to the actual size needed.
289         buffer->setSize(podDataSize + sizeof(DeviceString)*numStrings);
290 
291         // copy the base data
292         char* gpuData = (char*) buffer->map();
293         if (!gpuData) {
294             errhandler().error ("Could not map buffer for '%s' (size: %lu).",
295                                 name, podDataSize + sizeof(DeviceString)*numStrings);
296             return false;
297         }
298         ::memcpy(gpuData, colorSys, podDataSize);
299         // then copy the device string to the end, first strings starting at dataPtr - (numStrings)
300         // FIXME -- Should probably handle alignment better.
301         const ustring* cpuString = (const ustring*)(colorSys + (cpuDataSize - sizeof(StringParam)*numStrings));
302         char* gpuStrings = gpuData + podDataSize;
303         for (const ustring* end = cpuString + numStrings; cpuString < end; ++cpuString) {
304             // convert the ustring to a device string
305             uint64_t devStr = register_string (cpuString->string(), "");
306             ::memcpy(gpuStrings, &devStr, sizeof(devStr));
307             gpuStrings += sizeof(DeviceString);
308         }
309 
310         buffer->unmap();
311         m_optix_ctx[name]->setBuffer(buffer);
312 
313 #else // #if (OPTIX_VERSION < 70000)
314 
315     // FIXME -- this is for testing only
316     // Make some device strings to test userdata parameters
317     uint64_t addr1 = register_string ("ud_str_1", "");
318     uint64_t addr2 = register_string ("userdata string", "");
319 
320     // Store the user-data
321     register_global("test_str_1", addr1);
322     register_global("test_str_2", addr2);
323 
324     {
325         char* colorSys = nullptr;
326         long long cpuDataSizes[2] = {0,0};
327         if (!shadingsys->getattribute("colorsystem", TypeDesc::PTR, (void*)&colorSys) ||
328             !shadingsys->getattribute("colorsystem:sizes", TypeDesc(TypeDesc::LONGLONG,2), (void*)&cpuDataSizes) ||
329             !colorSys || !cpuDataSizes[0]) {
330             errhandler().error ("No colorsystem available.");
331             return false;
332         }
333 
334         const char* name = OSL_NAMESPACE_STRING "::pvt::s_color_system";
335 
336         auto cpuDataSize = cpuDataSizes[0];
337         auto numStrings = cpuDataSizes[1];
338 
339         // Get the size data-size, minus the ustring size
340         const size_t podDataSize = cpuDataSize - sizeof(StringParam)*numStrings;
341 
342         CUdeviceptr d_buffer;
343 
344         CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_buffer), podDataSize + sizeof(DeviceString)*numStrings));
345 
346         CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_buffer), colorSys, podDataSize, cudaMemcpyHostToDevice));
347 
348         // then copy the device string to the end, first strings starting at dataPtr - (numStrings)
349         // FIXME -- Should probably handle alignment better.
350         const ustring* cpuString = (const ustring*)(colorSys + (cpuDataSize - sizeof(StringParam)*numStrings));
351         CUdeviceptr gpuStrings = d_buffer + podDataSize;
352         for (const ustring* end = cpuString + numStrings; cpuString < end; ++cpuString) {
353             // convert the ustring to a device string
354             uint64_t devStr = register_string (cpuString->string(), "");
355             CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(gpuStrings), &devStr, sizeof(devStr), cudaMemcpyHostToDevice));
356             gpuStrings += sizeof(DeviceString);
357         }
358         register_global (name, d_buffer);
359 
360 #endif
361     }
362 #endif
363     return true;
364 }
365 
366 
367 
368 bool
369 OptixGridRenderer::make_optix_materials ()
370 {
371 #ifdef OSL_USE_OPTIX
372 
373     // Stand-in: names of shader outputs to preserve
374     // FIXME
375     std::vector<const char*> outputs { "Cout" };
376 
377     // Optimize each ShaderGroup in the scene, and use the resulting
378     // PTX to create OptiX Programs which can be called by the closest
379     // hit program in the wrapper to execute the compiled OSL shader.
380     int mtl_id = 0;
381 
382 #if (OPTIX_VERSION < 70000)
383     for (const auto& groupref : shaders()) {
384         shadingsys->attribute (groupref.get(), "renderer_outputs",
385                                TypeDesc(TypeDesc::STRING, outputs.size()),
386                                outputs.data());
387 
388         shadingsys->optimize_group (groupref.get(), nullptr);
389 
390         if (!shadingsys->find_symbol (*groupref.get(), ustring(outputs[0]))) {
391             // FIXME: This is for cases where testshade is run with 1x1 resolution
392             //        Those tests may not have a Cout parameter to write to.
393             if (m_xres > 1 && m_yres > 1) {
394                 errhandler().warning ("Requested output '%s', which wasn't found",
395                                       outputs[0]);
396             }
397         }
398 
399         std::string group_name, init_name, entry_name;
400         shadingsys->getattribute (groupref.get(), "groupname",        group_name);
401         shadingsys->getattribute (groupref.get(), "group_init_name",  init_name);
402         shadingsys->getattribute (groupref.get(), "group_entry_name", entry_name);
403 
404         // Retrieve the compiled ShaderGroup PTX
405         std::string osl_ptx;
406         shadingsys->getattribute (groupref.get(), "ptx_compiled_version",
407                                   OSL::TypeDesc::PTR, &osl_ptx);
408 
409         if (osl_ptx.empty()) {
410             errhandler().error ("Failed to generate PTX for ShaderGroup %s",
411                                 group_name);
412             return false;
413         }
414 
415         if (options.get_int("saveptx")) {
416             std::string filename = OIIO::Strutil::sprintf("%s_%d.ptx",
417                                                           group_name, mtl_id++);
418             OIIO::ofstream out;
419             OIIO::Filesystem::open (out, filename);
420             out << osl_ptx;
421         }
422 
423         // Create Programs from the init and group_entry functions,
424         // and set the OSL functions as Callable Programs so that they
425         // can be executed by the closest hit program in the wrapper
426         optix::Program osl_init = m_optix_ctx->createProgramFromPTXString (
427             osl_ptx, init_name);
428         optix::Program osl_group = m_optix_ctx->createProgramFromPTXString (
429             osl_ptx, entry_name);
430 
431         // Grid shading
432         m_program["osl_init_func" ]->setProgramId (osl_init );
433         m_program["osl_group_func"]->setProgramId (osl_group);
434     }
435 
436 #else //#if (OPTIX_VERSION < 70000)
437 
438     std::vector<OptixModule> modules;
439 
440     // Space for mesage logging
441     char msg_log[8192];
442     size_t sizeof_msg_log;
443 
444     // Make module that contains programs we'll use in this scene
445     OptixModuleCompileOptions module_compile_options = {};
446 
447     module_compile_options.maxRegisterCount  = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT;
448     module_compile_options.optLevel          = OPTIX_COMPILE_OPTIMIZATION_DEFAULT;
449     module_compile_options.debugLevel        = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
450 
451     OptixPipelineCompileOptions pipeline_compile_options = {};
452 
453     pipeline_compile_options.traversableGraphFlags      = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY;
454     pipeline_compile_options.usesMotionBlur             = false;
455     pipeline_compile_options.numPayloadValues           = 0;
456     pipeline_compile_options.numAttributeValues         = 0;
457     pipeline_compile_options.exceptionFlags             = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW;
458     pipeline_compile_options.pipelineLaunchParamsVariableName = "render_params";
459 
460     // Create 'raygen' program
461 
462     // Load the renderer CUDA source and generate PTX for it
463     std::string progName = "optix_grid_renderer.ptx";
464     std::string program_ptx = load_ptx_file(progName);
465     if (program_ptx.empty()) {
466         errhandler().severe ("Could not find PTX for the raygen program");
467         return false;
468     }
469 
470     sizeof_msg_log = sizeof(msg_log);
471     OptixModule program_module;
472     OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
473                                            &module_compile_options,
474                                            &pipeline_compile_options,
475                                            program_ptx.c_str(),
476                                            program_ptx.size(),
477                                            msg_log, &sizeof_msg_log,
478                                            &program_module));
479     //if (sizeof_msg_log > 1)
480     //    printf ("Creating module from PTX-file %s:\n%s\n", progName.c_str(), msg_log);
481 
482     // Record it so we can destroy it later
483     modules.push_back(program_module);
484 
485     OptixProgramGroupOptions program_options = {};
486     std::vector<OptixProgramGroup> program_groups;
487 
488     // Raygen group
489     OptixProgramGroupDesc raygen_desc = {};
490     raygen_desc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
491     raygen_desc.raygen.module            = program_module;
492     raygen_desc.raygen.entryFunctionName = "__raygen__";
493 
494     OptixProgramGroup  raygen_group;
495     sizeof_msg_log = sizeof (msg_log);
496     OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
497                                           &raygen_desc,
498                                           1, // number of program groups
499                                           &program_options, // program options
500                                           msg_log, &sizeof_msg_log,
501                                           &raygen_group));
502     //if (sizeof_msg_log > 1)
503     //    printf ("Creating 'ray-gen' program group:\n%s\n", msg_log);
504 
505     // Miss group
506     OptixProgramGroupDesc miss_desc = {};
507     miss_desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
508     miss_desc.miss.module            = program_module;
509     miss_desc.miss.entryFunctionName = "__miss__";
510 
511     OptixProgramGroup  miss_group;
512     sizeof_msg_log = sizeof(msg_log);
513     OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
514                                           &miss_desc,
515                                           1,
516                                           &program_options,
517                                           msg_log, &sizeof_msg_log,
518                                           &miss_group));
519     //if (sizeof_msg_log > 1)
520     //    printf ("Creating 'miss' program group:\n%s\n", msg_log);
521 
522     // Hitgroup
523     OptixProgramGroupDesc hitgroup_desc = {};
524     hitgroup_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
525     hitgroup_desc.hitgroup.moduleCH = program_module;
526     hitgroup_desc.hitgroup.entryFunctionNameCH = "__closesthit__";
527     hitgroup_desc.hitgroup.moduleAH = program_module;
528     hitgroup_desc.hitgroup.entryFunctionNameAH = "__anyhit__";
529 
530     OptixProgramGroup  hitgroup_group;
531 
532     sizeof_msg_log = sizeof(msg_log);
533     OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
534                                           &hitgroup_desc,
535                                           1, // number of program groups
536                                           &program_options, // program options
537                                           msg_log, &sizeof_msg_log,
538                                           &hitgroup_group));
539     //if (sizeof_msg_log > 1)
540     //    printf ("Creating 'hitgroup' program group:\n%s\n", msg_log);
541 
542     // Create materials
543     for (const auto& groupref : shaders()) {
544         shadingsys->attribute (groupref.get(), "renderer_outputs",
545                                TypeDesc(TypeDesc::STRING, outputs.size()),
546                                outputs.data());
547 
548         shadingsys->optimize_group (groupref.get(), nullptr);
549 
550         if (!shadingsys->find_symbol (*groupref.get(), ustring(outputs[0]))) {
551             // FIXME: This is for cases where testshade is run with 1x1 resolution
552             //        Those tests may not have a Cout parameter to write to.
553             if (m_xres > 1 && m_yres > 1) {
554                 errhandler().warning ("Requested output '%s', which wasn't found",
555                                       outputs[0]);
556             }
557         }
558 
559         std::string group_name, init_name, entry_name;
560         shadingsys->getattribute (groupref.get(), "groupname",        group_name);
561         shadingsys->getattribute (groupref.get(), "group_init_name",  init_name);
562         shadingsys->getattribute (groupref.get(), "group_entry_name", entry_name);
563 
564         // Retrieve the compiled ShaderGroup PTX
565         std::string osl_ptx;
566         shadingsys->getattribute (groupref.get(), "ptx_compiled_version",
567                                   OSL::TypeDesc::PTR, &osl_ptx);
568 
569         if (osl_ptx.empty()) {
570             errhandler().error ("Failed to generate PTX for ShaderGroup %s",
571                                 group_name);
572             return false;
573         }
574 
575         if (options.get_int("saveptx")) {
576             std::string filename = OIIO::Strutil::sprintf("%s_%d.ptx", group_name,
577                                                           mtl_id++);
578             OIIO::ofstream out;
579             OIIO::Filesystem::open (out, filename);
580             out << osl_ptx;
581         }
582 
583         OptixModule optix_module;
584 
585         // Create Programs from the init and group_entry functions,
586         // and set the OSL functions as Callable Programs so that they
587         // can be executed by the closest hit program in the wrapper
588         sizeof_msg_log = sizeof(msg_log);
589         OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
590                                                &module_compile_options,
591                                                &pipeline_compile_options,
592                                                osl_ptx.c_str(),
593                                                osl_ptx.size(),
594                                                msg_log, &sizeof_msg_log,
595                                                &optix_module));
596         //if (sizeof_msg_log > 1)
597         //    printf ("Creating module from PTX group '%s':\n%s\n", group_name.c_str(), msg_log);
598 
599         modules.push_back (optix_module);
600 
601         // Create 2x program groups (for direct callables)
602         OptixProgramGroupOptions program_options = {};
603         OptixProgramGroupDesc pgDesc[3] = {};
604         pgDesc[0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
605         pgDesc[0].callables.moduleDC            = optix_module;
606         pgDesc[0].callables.entryFunctionNameDC = init_name.c_str();
607         pgDesc[0].callables.moduleCC            = 0;
608         pgDesc[0].callables.entryFunctionNameCC = nullptr;
609         pgDesc[1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
610         pgDesc[1].callables.moduleDC            = optix_module;
611         pgDesc[1].callables.entryFunctionNameDC = entry_name.c_str();
612         pgDesc[1].callables.moduleCC            = 0;
613         pgDesc[1].callables.entryFunctionNameCC = nullptr;
614 
615         program_groups.resize (program_groups.size() + 2);
616 
617         sizeof_msg_log = sizeof(msg_log);
618         OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
619                                               &pgDesc[0],
620                                               2, // number of program groups
621                                               &program_options, // program options
622                                               msg_log, &sizeof_msg_log,
623                                               &program_groups[program_groups.size() - 2]));
624         //if (sizeof_msg_log > 1)
625         //    printf ("Creating 'shader' group for group '%s':\n%s\n", group_name.c_str(), msg_log);
626     }
627 
628 
629     OptixPipelineLinkOptions pipeline_link_options;
630     pipeline_link_options.maxTraceDepth          = 1;
631     pipeline_link_options.debugLevel             = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
632 #if (OPTIX_VERSION < 70100)
633     pipeline_link_options.overrideUsesMotionBlur = false;
634 #endif
635 
636     // Build string-table "library"
637     nvrtcProgram str_lib;
638 
639     auto extractNamespaces = [] (const OIIO::ustring &s) {
640         const char *str = s.c_str();
641         std::vector<std::string> ns;
642         do {
643             const char *begin = str;
644             // get to first ':'
645             while (*str != ':' && *str)
646                 str++;
647             ns.push_back(std::string(begin, str));
648             // advance to second ':'
649             if (*str && *str == ':')
650                 str++;
651         } while (*str++ != 0);
652         return ns;
653     };
654 
655     std::stringstream strlib_ss;
656 
657     strlib_ss << "// so things name-mangle properly\n";
658     strlib_ss << "struct DeviceString {\n";
659     strlib_ss << "    const char* m_chars;\n";
660     strlib_ss << "};\n";
661 
662     // write out all the global strings
663     for (auto &&gvar : m_globals_map) {
664         std::vector<std::string> var_ns = extractNamespaces(gvar.first);
665 
666         // build namespace
667         for (size_t i = 0; i < var_ns.size() - 1; i++)
668             strlib_ss << "namespace " << var_ns[i] << " {\n";
669 
670         strlib_ss << "__device__ DeviceString " << var_ns.back() << " = { (const char *)" << gvar.second << "};\n";
671         // close namespace up
672         for (size_t i = 0; i < var_ns.size() - 1; i++)
673             strlib_ss << "}\n";
674     }
675 
676     strlib_ss << "\n";
677     strlib_ss << "extern \"C\" __global__ void __direct_callable__strlib_dummy(int *j)\n";
678     strlib_ss << "{\n";
679     strlib_ss << "   // must have a __direct_callable__ function for the module to compile\n";
680     strlib_ss << "}\n";
681 
682     // XXX: Should this move to compute_60 (compute_35-compute_50 is now deprecated)
683     const char *cuda_compile_options[] = { "--gpu-architecture=compute_35"  ,
684                                            "--use_fast_math"                ,
685                                            "-dc"                            ,
686 #if OSL_CPLUSPLUS_VERSION >= 14
687                                            "--std=c++14"
688 #else
689                                            "--std=c++11"
690 #endif
691                                          };
692 
693     int num_compile_flags = int (sizeof (cuda_compile_options) / sizeof (cuda_compile_options[0]));
694     size_t str_lib_size, cuda_log_size;
695 
696     std::string cuda_string = strlib_ss.str();
697 
698     NVRTC_CHECK (nvrtcCreateProgram (&str_lib,
699                                      cuda_string.c_str(),
700                                      "cuda_strng_library",
701                                      0,         // number of headers
702                                      nullptr,   // header paths
703                                      nullptr)); // header files
704     nvrtcResult compileResult = nvrtcCompileProgram (str_lib,  num_compile_flags, cuda_compile_options);
705     if (compileResult != NVRTC_SUCCESS) {
706         NVRTC_CHECK (nvrtcGetProgramLogSize (str_lib, &cuda_log_size));
707         std::vector<char> cuda_log(cuda_log_size+1);
708         NVRTC_CHECK (nvrtcGetProgramLog (str_lib, cuda_log.data()));
709         cuda_log.back() = 0;
710         errhandler().error ("nvrtcCompileProgram failure for:\n%s\n"
711                             "====================================\n"
712                             "%s\n", cuda_string.c_str(), cuda_log.data());
713         return false;
714     }
715     NVRTC_CHECK (nvrtcGetPTXSize (str_lib, &str_lib_size));
716     std::vector<char> str_lib_ptx (str_lib_size);
717     NVRTC_CHECK (nvrtcGetPTX(str_lib, str_lib_ptx.data()));
718     NVRTC_CHECK (nvrtcDestroyProgram (&str_lib));
719 
720     std::string strlib_string (str_lib_ptx.begin (), str_lib_ptx.end ());
721 
722     OptixModule strlib_module;
723     sizeof_msg_log = sizeof(msg_log);
724     OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
725                                            &module_compile_options,
726                                            &pipeline_compile_options,
727                                            str_lib_ptx.data(),
728                                            str_lib_ptx.size(),
729                                            msg_log, &sizeof_msg_log,
730                                            &strlib_module));
731     //if (sizeof_msg_log > 1)
732     //    printf ("Creating module from string-library PTX:\n%s\n", msg_log);
733 
734     OptixProgramGroupDesc strlib_pg_desc = {};
735     strlib_pg_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
736     strlib_pg_desc.callables.moduleDC              = strlib_module;
737     strlib_pg_desc.callables.entryFunctionNameDC   = "__direct_callable__strlib_dummy";
738     strlib_pg_desc.callables.moduleCC              = 0;
739     strlib_pg_desc.callables.entryFunctionNameCC   = nullptr;
740 
741     OptixProgramGroup strlib_group;
742 
743     sizeof_msg_log = sizeof(msg_log);
744     OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
745                                           &strlib_pg_desc,
746                                           1,
747                                           &program_options,
748                                           msg_log, &sizeof_msg_log,
749                                           &strlib_group));
750     //if (sizeof_msg_log > 1)
751     //    printf ("Creating program group for string-library:\n%s\n", msg_log);
752 
753     // Set up OptiX pipeline
754     std::vector<OptixProgramGroup> final_groups = {
755         strlib_group,      // string globals
756         raygen_group,
757         miss_group,
758         hitgroup_group,
759         program_groups[0], // init
760         program_groups[1], // entry
761     };
762 
763     sizeof_msg_log = sizeof(msg_log);
764     OPTIX_CHECK (optixPipelineCreate (m_optix_ctx,
765                                       &pipeline_compile_options,
766                                       &pipeline_link_options,
767                                       final_groups.data(),
768                                       int(final_groups.size()),
769                                       msg_log, &sizeof_msg_log,
770                                       &m_optix_pipeline));
771     //if (sizeof_msg_log > 1)
772     //    printf ("Creating optix pipeline:\n%s\n", msg_log);
773 
774     // Set the pipeline stack size
775     OptixStackSizes stack_sizes = {};
776     for( OptixProgramGroup& program_group : final_groups )
777         OPTIX_CHECK (optixUtilAccumulateStackSizes (program_group, &stack_sizes));
778 
779     uint32_t max_trace_depth = 1;
780     uint32_t max_cc_depth    = 1;
781     uint32_t max_dc_depth    = 1;
782     uint32_t direct_callable_stack_size_from_traversal;
783     uint32_t direct_callable_stack_size_from_state;
784     uint32_t continuation_stack_size;
785     OPTIX_CHECK (optixUtilComputeStackSizes (&stack_sizes,
786                                              max_trace_depth,
787                                              max_cc_depth,
788                                              max_dc_depth,
789                                              &direct_callable_stack_size_from_traversal,
790                                              &direct_callable_stack_size_from_state,
791                                              &continuation_stack_size ) );
792 
793     const uint32_t max_traversal_depth = 1;
794     OPTIX_CHECK (optixPipelineSetStackSize (m_optix_pipeline,
795                                             direct_callable_stack_size_from_traversal,
796                                             direct_callable_stack_size_from_state,
797                                             continuation_stack_size,
798                                             max_traversal_depth ));
799 
800     // Build OptiX Shader Binding Table (SBT)
801     CUdeviceptr d_raygenRecord;
802     CUdeviceptr d_missRecord;
803     CUdeviceptr d_hitgroupRecord;
804     CUdeviceptr d_callablesRecord;
805 
806     EmptyRecord raygenRecord, missRecord, hitgroupRecord, callablesRecord[2];
807 
808     OPTIX_CHECK (optixSbtRecordPackHeader (raygen_group     , &raygenRecord  ));
809     OPTIX_CHECK (optixSbtRecordPackHeader (miss_group       , &missRecord    ));
810     OPTIX_CHECK (optixSbtRecordPackHeader (hitgroup_group   , &hitgroupRecord));
811     OPTIX_CHECK (optixSbtRecordPackHeader (program_groups[0], &callablesRecord[0]));
812     OPTIX_CHECK (optixSbtRecordPackHeader (program_groups[1], &callablesRecord[1]));
813 
814     raygenRecord.data       = reinterpret_cast<void *>(5);
815     missRecord.data         = nullptr;
816     hitgroupRecord.data     = nullptr;
817     callablesRecord[0].data = reinterpret_cast<void *>(1);
818     callablesRecord[1].data = reinterpret_cast<void *>(2);
819 
820     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **> (&d_raygenRecord)   ,     sizeof(EmptyRecord)));
821     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **> (&d_missRecord)     ,     sizeof(EmptyRecord)));
822     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **> (&d_hitgroupRecord) ,     sizeof(EmptyRecord)));
823     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **> (&d_callablesRecord), 2 * sizeof(EmptyRecord)));
824 
825     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>( d_raygenRecord)   , &raygenRecord      , sizeof(EmptyRecord), cudaMemcpyHostToDevice));
826     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>( d_missRecord)     , &missRecord        , sizeof(EmptyRecord), cudaMemcpyHostToDevice));
827     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>( d_hitgroupRecord) , &hitgroupRecord    , sizeof(EmptyRecord), cudaMemcpyHostToDevice));
828     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>( d_callablesRecord), &callablesRecord[0], 2 * sizeof(EmptyRecord), cudaMemcpyHostToDevice));
829 
830     // Looks like OptixShadingTable needs to be filled out completely
831     m_optix_sbt.raygenRecord                 = d_raygenRecord;
832     m_optix_sbt.missRecordBase               = d_missRecord;
833     m_optix_sbt.missRecordStrideInBytes      = sizeof(EmptyRecord);
834     m_optix_sbt.missRecordCount              = 1;
835     m_optix_sbt.hitgroupRecordBase           = d_hitgroupRecord;
836     m_optix_sbt.hitgroupRecordStrideInBytes  = sizeof(EmptyRecord);
837     m_optix_sbt.hitgroupRecordCount          = 1;
838     m_optix_sbt.callablesRecordBase          = d_callablesRecord;
839     m_optix_sbt.callablesRecordStrideInBytes = sizeof(EmptyRecord);
840     m_optix_sbt.callablesRecordCount         = 2;
841 
842 #endif //#if (OPTIX_VERSION < 70000)
843 
844 #endif //#ifdef OSL_USE_OPTIX
845     return true;
846 }
847 
848 
849 
850 bool
851 OptixGridRenderer::finalize_scene()
852 {
853 #ifdef OSL_USE_OPTIX
854     make_optix_materials();
855 
856 #if (OPTIX_VERSION < 70000)
857 
858     m_optix_ctx["invw"]->setFloat (1.0f/m_xres);
859     m_optix_ctx["invh"]->setFloat (1.0f/m_yres);
860 
861     // Create the output buffer
862     optix::Buffer buffer = m_optix_ctx->createBuffer(RT_BUFFER_OUTPUT, RT_FORMAT_FLOAT3, m_xres, m_yres);
863     m_optix_ctx["output_buffer"]->set(buffer);
864 #else
865 
866 #endif //#if (OPTIX_VERSION < 70000)
867 
868 #if (OPTIX_VERSION < 70000)
869     if (!synch_attributes ())
870         return false;
871 #endif
872 
873 #if (OPTIX_VERSION < 70000)
874     m_optix_ctx->validate();
875 #else
876 #endif
877 
878 #endif
879     return true;
880 }
881 
882 
883 
884 /// Return true if the texture handle (previously returned by
885 /// get_texture_handle()) is a valid texture that can be subsequently
886 /// read or sampled.
887 bool
888 OptixGridRenderer::good(TextureHandle *handle OSL_MAYBE_UNUSED)
889 {
890 #ifdef OSL_USE_OPTIX
891 #if (OPTIX_VERSION < 70000)
892     return intptr_t(handle) != RT_TEXTURE_ID_NULL;
893 #else
894     return handle != nullptr;
895 #endif
896 #else
897     return false;
898 #endif
899 }
900 
901 
902 
903 /// Given the name of a texture, return an opaque handle that can be
904 /// used with texture calls to avoid the name lookups.
905 RendererServices::TextureHandle*
906 OptixGridRenderer::get_texture_handle (ustring filename OSL_MAYBE_UNUSED,
907                                        ShadingContext* shading_context OSL_MAYBE_UNUSED)
908 {
909 #ifdef OSL_USE_OPTIX
910 
911 #if (OPTIX_VERSION < 70000)
912     auto itr = m_samplers.find(filename);
913     if (itr == m_samplers.end()) {
914         optix::TextureSampler sampler = context()->createTextureSampler();
915         sampler->setWrapMode(0, RT_WRAP_REPEAT);
916         sampler->setWrapMode(1, RT_WRAP_REPEAT);
917         sampler->setWrapMode(2, RT_WRAP_REPEAT);
918 
919         sampler->setFilteringModes(RT_FILTER_LINEAR, RT_FILTER_LINEAR, RT_FILTER_NONE);
920         sampler->setIndexingMode(false ? RT_TEXTURE_INDEX_ARRAY_INDEX : RT_TEXTURE_INDEX_NORMALIZED_COORDINATES);
921         sampler->setReadMode(RT_TEXTURE_READ_NORMALIZED_FLOAT);
922         sampler->setMaxAnisotropy(1.0f);
923 
924 
925         OIIO::ImageBuf image;
926         if (!image.init_spec(filename, 0, 0)) {
927             errhandler().error ("Could not load: %s", filename);
928             return (TextureHandle*)(intptr_t(RT_TEXTURE_ID_NULL));
929         }
930         int nchan = image.spec().nchannels;
931 
932         OIIO::ROI roi = OIIO::get_roi_full(image.spec());
933         int width = roi.width(), height = roi.height();
934         std::vector<float> pixels(width * height * nchan);
935         image.get_pixels(roi, OIIO::TypeDesc::FLOAT, pixels.data());
936 
937         optix::Buffer buffer = context()->createBuffer(RT_BUFFER_INPUT, RT_FORMAT_FLOAT4, width, height);
938 
939         float* device_ptr = static_cast<float*>(buffer->map());
940         unsigned int pixel_idx = 0;
941         for (int y = 0; y < height; ++y) {
942             for (int x = 0; x < width; ++x) {
943                 memcpy(device_ptr, &pixels[pixel_idx], sizeof(float) * nchan);
944                 device_ptr += 4;
945                 pixel_idx += nchan;
946             }
947         }
948         buffer->unmap();
949         sampler->setBuffer(buffer);
950         itr = m_samplers.emplace(std::move(filename), std::move(sampler)).first;
951 
952     }
953     return (RendererServices::TextureHandle*) intptr_t(itr->second->getId());
954 #else //#if (OPTIX_VERSION < 70000)
955     auto itr = m_samplers.find(filename);
956     if (itr == m_samplers.end()) {
957 
958         // Open image
959         OIIO::ImageBuf image;
960         if (!image.init_spec(filename, 0, 0)) {
961             errhandler().error ("Could not load: %s", filename);
962             return (TextureHandle*)(intptr_t(nullptr));
963         }
964 
965         OIIO::ROI roi = OIIO::get_roi_full(image.spec());
966         int32_t width = roi.width(), height = roi.height();
967         std::vector<float> pixels(width * height * 4);
968 
969         for (int j = 0; j < height; j++) {
970             for (int i = 0; i < width; i++) {
971                 image.getpixel(i, j, 0, &pixels[((j*width) + i) * 4 + 0]);
972             }
973         }
974         cudaResourceDesc res_desc = {};
975 
976         // hard-code textures to 4 channels
977         int32_t pitch  = width * 4 * sizeof(float);
978         cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
979 
980         cudaArray_t   pixelArray;
981         CUDA_CHECK (cudaMallocArray (&pixelArray,
982                                      &channel_desc,
983                                      width,height));
984 
985         CUDA_CHECK (cudaMemcpy2DToArray (pixelArray,
986                                          /* offset */0,0,
987                                          pixels.data(),
988                                          pitch,pitch,height,
989                                          cudaMemcpyHostToDevice));
990 
991         res_desc.resType          = cudaResourceTypeArray;
992         res_desc.res.array.array  = pixelArray;
993 
994         cudaTextureDesc tex_desc     = {};
995         tex_desc.addressMode[0]      = cudaAddressModeWrap;
996         tex_desc.addressMode[1]      = cudaAddressModeWrap;
997         tex_desc.filterMode          = cudaFilterModeLinear;
998         tex_desc.readMode            = cudaReadModeElementType; //cudaReadModeNormalizedFloat;
999         tex_desc.normalizedCoords    = 1;
1000         tex_desc.maxAnisotropy       = 1;
1001         tex_desc.maxMipmapLevelClamp = 99;
1002         tex_desc.minMipmapLevelClamp = 0;
1003         tex_desc.mipmapFilterMode    = cudaFilterModePoint;
1004         tex_desc.borderColor[0]      = 1.0f;
1005         tex_desc.sRGB                = 0;
1006 
1007         // Create texture object
1008         cudaTextureObject_t cuda_tex = 0;
1009         CUDA_CHECK (cudaCreateTextureObject (&cuda_tex, &res_desc, &tex_desc, nullptr));
1010         itr = m_samplers.emplace (std::move(filename), std::move(cuda_tex)).first;
1011     }
1012     return reinterpret_cast<RendererServices::TextureHandle *>(itr->second);
1013 
1014 #endif //#if (OPTIX_VERSION < 70000)
1015 
1016 #else
1017     return nullptr;
1018 #endif
1019 }
1020 
1021 
1022 
1023 void
1024 OptixGridRenderer::prepare_render()
1025 {
1026 #ifdef OSL_USE_OPTIX
1027     // Set up the OptiX Context
1028     init_optix_context(m_xres, m_yres);
1029 
1030     // Set up the OptiX scene graph
1031     finalize_scene ();
1032 #endif
1033 }
1034 
1035 
1036 
1037 void
1038 OptixGridRenderer::warmup()
1039 {
1040 #ifdef OSL_USE_OPTIX
1041     // Perform a tiny launch to warm up the OptiX context
1042 #if (OPTIX_VERSION < 70000)
1043     m_optix_ctx->launch (0, 1, 1);
1044 #else
1045     OPTIX_CHECK (optixLaunch (m_optix_pipeline,
1046                               m_cuda_stream,
1047                               d_launch_params,
1048                               sizeof(RenderParams),
1049                               &m_optix_sbt,
1050                               0, 0, 1));
1051     CUDA_SYNC_CHECK();
1052 #endif
1053 
1054 #endif
1055 }
1056 
1057 
1058 //extern "C" void setTestshadeGlobals(float h_invw, float h_invh, CUdeviceptr d_output_buffer, bool h_flipv);
1059 
1060 void
1061 OptixGridRenderer::render(int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED)
1062 {
1063 #ifdef OSL_USE_OPTIX
1064 #if (OPTIX_VERSION < 70000)
1065     m_optix_ctx->launch (0, xres, yres);
1066 #else
1067     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_output_buffer), xres * yres * 4 * sizeof(float)));
1068     CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_launch_params), sizeof(RenderParams)));
1069 
1070 
1071     m_xres = xres;
1072     m_yres = yres;
1073 
1074     RenderParams params;
1075     params.invw = 1.0f / m_xres;
1076     params.invh = 1.0f / m_yres;
1077     params.flipv = false; /* I don't see flipv being initialized anywhere */
1078     params.output_buffer = d_output_buffer;
1079 
1080     CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_launch_params), &params, sizeof(RenderParams), cudaMemcpyHostToDevice));
1081 
1082     OPTIX_CHECK (optixLaunch (m_optix_pipeline,
1083                               m_cuda_stream,
1084                               d_launch_params,
1085                               sizeof(RenderParams),
1086                               &m_optix_sbt,
1087                               xres, yres, 1));
1088     CUDA_SYNC_CHECK();
1089 #endif
1090 #endif
1091 }
1092 
1093 
1094 
1095 void
1096 OptixGridRenderer::finalize_pixel_buffer ()
1097 {
1098 #ifdef OSL_USE_OPTIX
1099 
1100     std::string buffer_name = "output_buffer";
1101 #if (OPTIX_VERSION < 70000)
1102     const void* buffer_ptr = m_optix_ctx[buffer_name]->getBuffer()->map();
1103     if (! buffer_ptr)
1104         errhandler().severe ("Unable to map buffer %s", buffer_name);
1105     outputbuf(0)->set_pixels (OIIO::ROI::All(), OIIO::TypeFloat, buffer_ptr);
1106 #else
1107     std::vector<float> tmp_buff(m_xres * m_yres * 3);
1108     CUDA_CHECK (cudaMemcpy (tmp_buff.data(), reinterpret_cast<void *>(d_output_buffer), m_xres * m_yres * 3 * sizeof(float), cudaMemcpyDeviceToHost));
1109     outputbuf(0)->set_pixels (OIIO::ROI::All(), OIIO::TypeFloat, tmp_buff.data());
1110 #endif
1111 #endif
1112 }
1113 
1114 
1115 
1116 void
1117 OptixGridRenderer::clear()
1118 {
1119     shaders().clear();
1120 #ifdef OSL_USE_OPTIX
1121 #if (OPTIX_VERSION < 70000)
1122     if (m_optix_ctx) {
1123         m_optix_ctx->destroy();
1124         m_optix_ctx = nullptr;
1125     }
1126 #else
1127     if (m_optix_ctx) {
1128         OPTIX_CHECK (optixDeviceContextDestroy (m_optix_ctx));
1129         m_optix_ctx = 0;
1130     }
1131 #endif
1132 
1133 #endif
1134 }
1135 
1136 OSL_NAMESPACE_EXIT
1137 
1138