1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4
5 #include <vector>
6
7 #include <OpenImageIO/filesystem.h>
8 #include <OpenImageIO/sysutil.h>
9
10 #include <OSL/oslconfig.h>
11
12 #include "optixgridrender.h"
13 #include "render_params.h"
14
15 #ifdef OSL_USE_OPTIX
16 # if (OPTIX_VERSION >= 70000)
17 # include <optix_function_table_definition.h>
18 # include <optix_stack_size.h>
19 # include <optix_stubs.h>
20 # include <cuda.h>
21 # include <nvrtc.h>
22 # endif
23 #endif
24
25 // The pre-compiled renderer support library LLVM bitcode is embedded
26 // into the executable and made available through these variables.
27 extern int rend_llvm_compiled_ops_size;
28 extern unsigned char rend_llvm_compiled_ops_block[];
29
30
31
32 OSL_NAMESPACE_ENTER
33
34
35 #if (OPTIX_VERSION >= 70000)
36
37 #define CUDA_CHECK(call) \
38 { \
39 cudaError_t error = call; \
40 if (error != cudaSuccess) \
41 { \
42 std::stringstream ss; \
43 ss << "CUDA call (" << #call << " ) failed with error: '" \
44 << cudaGetErrorString( error ) \
45 << "' (" __FILE__ << ":" << __LINE__ << ")\n"; \
46 fprintf (stderr, "[CUDA ERROR] %s", ss.str().c_str() ); \
47 exit(1); \
48 } \
49 }
50
51 #define NVRTC_CHECK(call) \
52 { \
53 nvrtcResult error = call; \
54 if (error != NVRTC_SUCCESS) \
55 { \
56 std::stringstream ss; \
57 ss << "NVRTC call (" << #call << " ) failed with error: '" \
58 << nvrtcGetErrorString( error ) \
59 << "' (" __FILE__ << ":" << __LINE__ << ")\n"; \
60 fprintf (stderr, "[NVRTC ERROR] %s", ss.str().c_str() ); \
61 exit(1); \
62 } \
63 }
64
65
66 #define OPTIX_CHECK(call) \
67 { \
68 OptixResult res = call; \
69 if (res != OPTIX_SUCCESS) \
70 { \
71 std::stringstream ss; \
72 ss << "Optix call '" << #call << "' failed with error: " \
73 << optixGetErrorName( res ) \
74 << " (" __FILE__ ":" << __LINE__ << ")\n"; \
75 fprintf (stderr,"[OPTIX ERROR] %s", ss.str().c_str() ); \
76 exit(1); \
77 } \
78 }
79 #endif
80
81 #define CUDA_SYNC_CHECK() \
82 { \
83 cudaDeviceSynchronize(); \
84 cudaError_t error = cudaGetLastError(); \
85 if (error != cudaSuccess) { \
86 fprintf ( stderr, "error (%s: line %d): %s\n", __FILE__, __LINE__, cudaGetErrorString( error ) ); \
87 exit(1); \
88 } \
89 }
90
91 #ifdef OSL_USE_OPTIX
92 #if (OPTIX_VERSION >= 70000)
context_log_cb(unsigned int level,const char * tag,const char * message,void *)93 static void context_log_cb (unsigned int level, const char* tag, const char* message, void* /*cbdata */)
94 {
95 // std::cerr << "[" << std::setw( 2 ) << level << "][" << std::setw( 12 ) << tag << "]: " << message << "\n";
96 }
97 #endif
98 #endif
99
OptixGridRenderer()100 OptixGridRenderer::OptixGridRenderer ()
101 {
102 #ifdef OSL_USE_OPTIX
103
104 #if (OPTIX_VERSION < 70000)
105 // Set up the OptiX context
106 m_optix_ctx = optix::Context::create();
107 if (m_optix_ctx->getEnabledDeviceCount() != 1)
108 errhandler().warning ("Only one CUDA device is currently supported");
109
110 // Set up the string table. This allocates a block of CUDA device memory to
111 // hold all of the static strings used by the OSL shaders. The strings can
112 // be accessed via OptiX variables that hold pointers to the table entries.
113 m_str_table.init(m_optix_ctx);
114 #else
115 // Initialize CUDA
116 cudaFree(0);
117
118 CUcontext cuCtx = nullptr; // zero means take the current context
119
120 OptixDeviceContextOptions ctx_options = {};
121 ctx_options.logCallbackFunction = context_log_cb;
122 ctx_options.logCallbackLevel = 4;
123
124 OPTIX_CHECK (optixInit());
125 OPTIX_CHECK (optixDeviceContextCreate (cuCtx, &ctx_options, &m_optix_ctx));
126
127 CUDA_CHECK (cudaSetDevice (0));
128 CUDA_CHECK (cudaStreamCreate (&m_cuda_stream));
129
130 // Set up the string table. This allocates a block of CUDA device memory to
131 // hold all of the static strings used by the OSL shaders. The strings can
132 // be accessed via OptiX variables that hold pointers to the table entries.
133 m_str_table.init(m_optix_ctx);
134 // Register all of our string table entries
135 for (auto &&gvar : m_str_table.contents())
136 register_global(gvar.first.c_str(), gvar.second);
137 #endif //#if (OPTIX_VERSION < 70000)
138
139 #endif //#ifdef OSL_USE_OPTIX
140 }
141
142 uint64_t
register_global(const std::string & str,uint64_t value)143 OptixGridRenderer::register_global (const std::string &str, uint64_t value)
144 {
145 auto it = m_globals_map.find (ustring(str));
146
147 if (it != m_globals_map.end()) {
148 return it->second;
149 }
150 m_globals_map[ustring(str)] = value;
151 return value;
152 }
153
154 bool
fetch_global(const std::string & str,uint64_t * value)155 OptixGridRenderer::fetch_global (const std::string &str, uint64_t *value)
156 {
157 auto it = m_globals_map.find (ustring(str));
158
159 if (it != m_globals_map.end()) {
160 *value = it->second;
161 return true;
162 }
163 return false;
164 }
165
166
167
168 std::string
load_ptx_file(string_view filename)169 OptixGridRenderer::load_ptx_file (string_view filename)
170 {
171 #ifdef OSL_USE_OPTIX
172 std::vector<std::string> paths = {
173 OIIO::Filesystem::parent_path(OIIO::Sysutil::this_program_path()),
174 PTX_PATH
175 };
176 std::string filepath = OIIO::Filesystem::searchpath_find (filename, paths,
177 false);
178 if (OIIO::Filesystem::exists(filepath)) {
179 std::string ptx_string;
180 if (OIIO::Filesystem::read_text_file (filepath, ptx_string))
181 return ptx_string;
182 }
183 #endif
184 errhandler().severe ("Unable to load %s", filename);
185 return {};
186 }
187
188
189
~OptixGridRenderer()190 OptixGridRenderer::~OptixGridRenderer ()
191 {
192 #ifdef OSL_USE_OPTIX
193 m_str_table.freetable();
194 #if (OPTIX_VERSION < 70000)
195 if (m_optix_ctx)
196 m_optix_ctx->destroy();
197 #else
198 if (m_optix_ctx)
199 OPTIX_CHECK (optixDeviceContextDestroy (m_optix_ctx));
200 #endif
201 #endif
202 }
203
204
205
206 void
init_shadingsys(ShadingSystem * ss)207 OptixGridRenderer::init_shadingsys (ShadingSystem *ss)
208 {
209 shadingsys = ss;
210
211 #ifdef OSL_USE_OPTIX
212 shadingsys->attribute ("lib_bitcode", {OSL::TypeDesc::UINT8, rend_llvm_compiled_ops_size},
213 rend_llvm_compiled_ops_block);
214 #endif
215 }
216
217
218
219 bool
init_optix_context(int xres OSL_MAYBE_UNUSED,int yres OSL_MAYBE_UNUSED)220 OptixGridRenderer::init_optix_context (int xres OSL_MAYBE_UNUSED,
221 int yres OSL_MAYBE_UNUSED)
222 {
223 #ifdef OSL_USE_OPTIX
224 #if (OPTIX_VERSION < 70000)
225 m_optix_ctx->setRayTypeCount (2);
226 m_optix_ctx->setEntryPointCount (1);
227 m_optix_ctx->setStackSize (2048);
228 m_optix_ctx->setPrintEnabled (true);
229
230 // Load the renderer CUDA source and generate PTX for it
231 std::string progName = "optix_grid_renderer.ptx";
232 std::string renderer_ptx = load_ptx_file(progName);
233 if (renderer_ptx.empty()) {
234 errhandler().severe ("Could not find PTX for the raygen program");
235 return false;
236 }
237
238 // Create the OptiX programs and set them on the optix::Context
239 m_program = m_optix_ctx->createProgramFromPTXString(renderer_ptx, "raygen");
240 m_optix_ctx->setRayGenerationProgram(0, m_program);
241 #endif //#if (OPTIX_VERSION < 70000)
242 #endif
243 return true;
244 }
245
246
247
248 bool
synch_attributes()249 OptixGridRenderer::synch_attributes ()
250 {
251 #ifdef OSL_USE_OPTIX
252
253 #if (OPTIX_VERSION < 70000)
254 // FIXME -- this is for testing only
255 // Make some device strings to test userdata parameters
256 uint64_t addr1 = register_string ("ud_str_1", "");
257 uint64_t addr2 = register_string ("userdata string", "");
258 m_optix_ctx["test_str_1"]->setUserData (sizeof(char*), &addr1);
259 m_optix_ctx["test_str_2"]->setUserData (sizeof(char*), &addr2);
260
261 {
262 const char* name = OSL_NAMESPACE_STRING "::pvt::s_color_system";
263
264 char* colorSys = nullptr;
265 long long cpuDataSizes[2] = {0,0};
266 if (!shadingsys->getattribute("colorsystem", TypeDesc::PTR, (void*)&colorSys) ||
267 !shadingsys->getattribute("colorsystem:sizes", TypeDesc(TypeDesc::LONGLONG,2), (void*)&cpuDataSizes) ||
268 !colorSys || !cpuDataSizes[0]) {
269 errhandler().error ("No colorsystem available.");
270 return false;
271 }
272
273 auto cpuDataSize = cpuDataSizes[0];
274 auto numStrings = cpuDataSizes[1];
275
276 // Get the size data-size, minus the ustring size
277 const size_t podDataSize = cpuDataSize - sizeof(StringParam)*numStrings;
278
279 optix::Buffer buffer = m_optix_ctx->createBuffer(RT_BUFFER_INPUT, RT_FORMAT_USER);
280 if (!buffer) {
281 errhandler().error ("Could not create buffer for '%s'.", name);
282 return false;
283 }
284
285 // set the element size to char
286 buffer->setElementSize(sizeof(char));
287
288 // and number of elements to the actual size needed.
289 buffer->setSize(podDataSize + sizeof(DeviceString)*numStrings);
290
291 // copy the base data
292 char* gpuData = (char*) buffer->map();
293 if (!gpuData) {
294 errhandler().error ("Could not map buffer for '%s' (size: %lu).",
295 name, podDataSize + sizeof(DeviceString)*numStrings);
296 return false;
297 }
298 ::memcpy(gpuData, colorSys, podDataSize);
299 // then copy the device string to the end, first strings starting at dataPtr - (numStrings)
300 // FIXME -- Should probably handle alignment better.
301 const ustring* cpuString = (const ustring*)(colorSys + (cpuDataSize - sizeof(StringParam)*numStrings));
302 char* gpuStrings = gpuData + podDataSize;
303 for (const ustring* end = cpuString + numStrings; cpuString < end; ++cpuString) {
304 // convert the ustring to a device string
305 uint64_t devStr = register_string (cpuString->string(), "");
306 ::memcpy(gpuStrings, &devStr, sizeof(devStr));
307 gpuStrings += sizeof(DeviceString);
308 }
309
310 buffer->unmap();
311 m_optix_ctx[name]->setBuffer(buffer);
312
313 #else // #if (OPTIX_VERSION < 70000)
314
315 // FIXME -- this is for testing only
316 // Make some device strings to test userdata parameters
317 uint64_t addr1 = register_string ("ud_str_1", "");
318 uint64_t addr2 = register_string ("userdata string", "");
319
320 // Store the user-data
321 register_global("test_str_1", addr1);
322 register_global("test_str_2", addr2);
323
324 {
325 char* colorSys = nullptr;
326 long long cpuDataSizes[2] = {0,0};
327 if (!shadingsys->getattribute("colorsystem", TypeDesc::PTR, (void*)&colorSys) ||
328 !shadingsys->getattribute("colorsystem:sizes", TypeDesc(TypeDesc::LONGLONG,2), (void*)&cpuDataSizes) ||
329 !colorSys || !cpuDataSizes[0]) {
330 errhandler().error ("No colorsystem available.");
331 return false;
332 }
333
334 const char* name = OSL_NAMESPACE_STRING "::pvt::s_color_system";
335
336 auto cpuDataSize = cpuDataSizes[0];
337 auto numStrings = cpuDataSizes[1];
338
339 // Get the size data-size, minus the ustring size
340 const size_t podDataSize = cpuDataSize - sizeof(StringParam)*numStrings;
341
342 CUdeviceptr d_buffer;
343
344 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_buffer), podDataSize + sizeof(DeviceString)*numStrings));
345
346 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_buffer), colorSys, podDataSize, cudaMemcpyHostToDevice));
347
348 // then copy the device string to the end, first strings starting at dataPtr - (numStrings)
349 // FIXME -- Should probably handle alignment better.
350 const ustring* cpuString = (const ustring*)(colorSys + (cpuDataSize - sizeof(StringParam)*numStrings));
351 CUdeviceptr gpuStrings = d_buffer + podDataSize;
352 for (const ustring* end = cpuString + numStrings; cpuString < end; ++cpuString) {
353 // convert the ustring to a device string
354 uint64_t devStr = register_string (cpuString->string(), "");
355 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(gpuStrings), &devStr, sizeof(devStr), cudaMemcpyHostToDevice));
356 gpuStrings += sizeof(DeviceString);
357 }
358 register_global (name, d_buffer);
359
360 #endif
361 }
362 #endif
363 return true;
364 }
365
366
367
368 bool
369 OptixGridRenderer::make_optix_materials ()
370 {
371 #ifdef OSL_USE_OPTIX
372
373 // Stand-in: names of shader outputs to preserve
374 // FIXME
375 std::vector<const char*> outputs { "Cout" };
376
377 // Optimize each ShaderGroup in the scene, and use the resulting
378 // PTX to create OptiX Programs which can be called by the closest
379 // hit program in the wrapper to execute the compiled OSL shader.
380 int mtl_id = 0;
381
382 #if (OPTIX_VERSION < 70000)
383 for (const auto& groupref : shaders()) {
384 shadingsys->attribute (groupref.get(), "renderer_outputs",
385 TypeDesc(TypeDesc::STRING, outputs.size()),
386 outputs.data());
387
388 shadingsys->optimize_group (groupref.get(), nullptr);
389
390 if (!shadingsys->find_symbol (*groupref.get(), ustring(outputs[0]))) {
391 // FIXME: This is for cases where testshade is run with 1x1 resolution
392 // Those tests may not have a Cout parameter to write to.
393 if (m_xres > 1 && m_yres > 1) {
394 errhandler().warning ("Requested output '%s', which wasn't found",
395 outputs[0]);
396 }
397 }
398
399 std::string group_name, init_name, entry_name;
400 shadingsys->getattribute (groupref.get(), "groupname", group_name);
401 shadingsys->getattribute (groupref.get(), "group_init_name", init_name);
402 shadingsys->getattribute (groupref.get(), "group_entry_name", entry_name);
403
404 // Retrieve the compiled ShaderGroup PTX
405 std::string osl_ptx;
406 shadingsys->getattribute (groupref.get(), "ptx_compiled_version",
407 OSL::TypeDesc::PTR, &osl_ptx);
408
409 if (osl_ptx.empty()) {
410 errhandler().error ("Failed to generate PTX for ShaderGroup %s",
411 group_name);
412 return false;
413 }
414
415 if (options.get_int("saveptx")) {
416 std::string filename = OIIO::Strutil::sprintf("%s_%d.ptx",
417 group_name, mtl_id++);
418 OIIO::ofstream out;
419 OIIO::Filesystem::open (out, filename);
420 out << osl_ptx;
421 }
422
423 // Create Programs from the init and group_entry functions,
424 // and set the OSL functions as Callable Programs so that they
425 // can be executed by the closest hit program in the wrapper
426 optix::Program osl_init = m_optix_ctx->createProgramFromPTXString (
427 osl_ptx, init_name);
428 optix::Program osl_group = m_optix_ctx->createProgramFromPTXString (
429 osl_ptx, entry_name);
430
431 // Grid shading
432 m_program["osl_init_func" ]->setProgramId (osl_init );
433 m_program["osl_group_func"]->setProgramId (osl_group);
434 }
435
436 #else //#if (OPTIX_VERSION < 70000)
437
438 std::vector<OptixModule> modules;
439
440 // Space for mesage logging
441 char msg_log[8192];
442 size_t sizeof_msg_log;
443
444 // Make module that contains programs we'll use in this scene
445 OptixModuleCompileOptions module_compile_options = {};
446
447 module_compile_options.maxRegisterCount = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT;
448 module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_DEFAULT;
449 module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
450
451 OptixPipelineCompileOptions pipeline_compile_options = {};
452
453 pipeline_compile_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY;
454 pipeline_compile_options.usesMotionBlur = false;
455 pipeline_compile_options.numPayloadValues = 0;
456 pipeline_compile_options.numAttributeValues = 0;
457 pipeline_compile_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW;
458 pipeline_compile_options.pipelineLaunchParamsVariableName = "render_params";
459
460 // Create 'raygen' program
461
462 // Load the renderer CUDA source and generate PTX for it
463 std::string progName = "optix_grid_renderer.ptx";
464 std::string program_ptx = load_ptx_file(progName);
465 if (program_ptx.empty()) {
466 errhandler().severe ("Could not find PTX for the raygen program");
467 return false;
468 }
469
470 sizeof_msg_log = sizeof(msg_log);
471 OptixModule program_module;
472 OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
473 &module_compile_options,
474 &pipeline_compile_options,
475 program_ptx.c_str(),
476 program_ptx.size(),
477 msg_log, &sizeof_msg_log,
478 &program_module));
479 //if (sizeof_msg_log > 1)
480 // printf ("Creating module from PTX-file %s:\n%s\n", progName.c_str(), msg_log);
481
482 // Record it so we can destroy it later
483 modules.push_back(program_module);
484
485 OptixProgramGroupOptions program_options = {};
486 std::vector<OptixProgramGroup> program_groups;
487
488 // Raygen group
489 OptixProgramGroupDesc raygen_desc = {};
490 raygen_desc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
491 raygen_desc.raygen.module = program_module;
492 raygen_desc.raygen.entryFunctionName = "__raygen__";
493
494 OptixProgramGroup raygen_group;
495 sizeof_msg_log = sizeof (msg_log);
496 OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
497 &raygen_desc,
498 1, // number of program groups
499 &program_options, // program options
500 msg_log, &sizeof_msg_log,
501 &raygen_group));
502 //if (sizeof_msg_log > 1)
503 // printf ("Creating 'ray-gen' program group:\n%s\n", msg_log);
504
505 // Miss group
506 OptixProgramGroupDesc miss_desc = {};
507 miss_desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
508 miss_desc.miss.module = program_module;
509 miss_desc.miss.entryFunctionName = "__miss__";
510
511 OptixProgramGroup miss_group;
512 sizeof_msg_log = sizeof(msg_log);
513 OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
514 &miss_desc,
515 1,
516 &program_options,
517 msg_log, &sizeof_msg_log,
518 &miss_group));
519 //if (sizeof_msg_log > 1)
520 // printf ("Creating 'miss' program group:\n%s\n", msg_log);
521
522 // Hitgroup
523 OptixProgramGroupDesc hitgroup_desc = {};
524 hitgroup_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
525 hitgroup_desc.hitgroup.moduleCH = program_module;
526 hitgroup_desc.hitgroup.entryFunctionNameCH = "__closesthit__";
527 hitgroup_desc.hitgroup.moduleAH = program_module;
528 hitgroup_desc.hitgroup.entryFunctionNameAH = "__anyhit__";
529
530 OptixProgramGroup hitgroup_group;
531
532 sizeof_msg_log = sizeof(msg_log);
533 OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
534 &hitgroup_desc,
535 1, // number of program groups
536 &program_options, // program options
537 msg_log, &sizeof_msg_log,
538 &hitgroup_group));
539 //if (sizeof_msg_log > 1)
540 // printf ("Creating 'hitgroup' program group:\n%s\n", msg_log);
541
542 // Create materials
543 for (const auto& groupref : shaders()) {
544 shadingsys->attribute (groupref.get(), "renderer_outputs",
545 TypeDesc(TypeDesc::STRING, outputs.size()),
546 outputs.data());
547
548 shadingsys->optimize_group (groupref.get(), nullptr);
549
550 if (!shadingsys->find_symbol (*groupref.get(), ustring(outputs[0]))) {
551 // FIXME: This is for cases where testshade is run with 1x1 resolution
552 // Those tests may not have a Cout parameter to write to.
553 if (m_xres > 1 && m_yres > 1) {
554 errhandler().warning ("Requested output '%s', which wasn't found",
555 outputs[0]);
556 }
557 }
558
559 std::string group_name, init_name, entry_name;
560 shadingsys->getattribute (groupref.get(), "groupname", group_name);
561 shadingsys->getattribute (groupref.get(), "group_init_name", init_name);
562 shadingsys->getattribute (groupref.get(), "group_entry_name", entry_name);
563
564 // Retrieve the compiled ShaderGroup PTX
565 std::string osl_ptx;
566 shadingsys->getattribute (groupref.get(), "ptx_compiled_version",
567 OSL::TypeDesc::PTR, &osl_ptx);
568
569 if (osl_ptx.empty()) {
570 errhandler().error ("Failed to generate PTX for ShaderGroup %s",
571 group_name);
572 return false;
573 }
574
575 if (options.get_int("saveptx")) {
576 std::string filename = OIIO::Strutil::sprintf("%s_%d.ptx", group_name,
577 mtl_id++);
578 OIIO::ofstream out;
579 OIIO::Filesystem::open (out, filename);
580 out << osl_ptx;
581 }
582
583 OptixModule optix_module;
584
585 // Create Programs from the init and group_entry functions,
586 // and set the OSL functions as Callable Programs so that they
587 // can be executed by the closest hit program in the wrapper
588 sizeof_msg_log = sizeof(msg_log);
589 OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
590 &module_compile_options,
591 &pipeline_compile_options,
592 osl_ptx.c_str(),
593 osl_ptx.size(),
594 msg_log, &sizeof_msg_log,
595 &optix_module));
596 //if (sizeof_msg_log > 1)
597 // printf ("Creating module from PTX group '%s':\n%s\n", group_name.c_str(), msg_log);
598
599 modules.push_back (optix_module);
600
601 // Create 2x program groups (for direct callables)
602 OptixProgramGroupOptions program_options = {};
603 OptixProgramGroupDesc pgDesc[3] = {};
604 pgDesc[0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
605 pgDesc[0].callables.moduleDC = optix_module;
606 pgDesc[0].callables.entryFunctionNameDC = init_name.c_str();
607 pgDesc[0].callables.moduleCC = 0;
608 pgDesc[0].callables.entryFunctionNameCC = nullptr;
609 pgDesc[1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
610 pgDesc[1].callables.moduleDC = optix_module;
611 pgDesc[1].callables.entryFunctionNameDC = entry_name.c_str();
612 pgDesc[1].callables.moduleCC = 0;
613 pgDesc[1].callables.entryFunctionNameCC = nullptr;
614
615 program_groups.resize (program_groups.size() + 2);
616
617 sizeof_msg_log = sizeof(msg_log);
618 OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
619 &pgDesc[0],
620 2, // number of program groups
621 &program_options, // program options
622 msg_log, &sizeof_msg_log,
623 &program_groups[program_groups.size() - 2]));
624 //if (sizeof_msg_log > 1)
625 // printf ("Creating 'shader' group for group '%s':\n%s\n", group_name.c_str(), msg_log);
626 }
627
628
629 OptixPipelineLinkOptions pipeline_link_options;
630 pipeline_link_options.maxTraceDepth = 1;
631 pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
632 #if (OPTIX_VERSION < 70100)
633 pipeline_link_options.overrideUsesMotionBlur = false;
634 #endif
635
636 // Build string-table "library"
637 nvrtcProgram str_lib;
638
639 auto extractNamespaces = [] (const OIIO::ustring &s) {
640 const char *str = s.c_str();
641 std::vector<std::string> ns;
642 do {
643 const char *begin = str;
644 // get to first ':'
645 while (*str != ':' && *str)
646 str++;
647 ns.push_back(std::string(begin, str));
648 // advance to second ':'
649 if (*str && *str == ':')
650 str++;
651 } while (*str++ != 0);
652 return ns;
653 };
654
655 std::stringstream strlib_ss;
656
657 strlib_ss << "// so things name-mangle properly\n";
658 strlib_ss << "struct DeviceString {\n";
659 strlib_ss << " const char* m_chars;\n";
660 strlib_ss << "};\n";
661
662 // write out all the global strings
663 for (auto &&gvar : m_globals_map) {
664 std::vector<std::string> var_ns = extractNamespaces(gvar.first);
665
666 // build namespace
667 for (size_t i = 0; i < var_ns.size() - 1; i++)
668 strlib_ss << "namespace " << var_ns[i] << " {\n";
669
670 strlib_ss << "__device__ DeviceString " << var_ns.back() << " = { (const char *)" << gvar.second << "};\n";
671 // close namespace up
672 for (size_t i = 0; i < var_ns.size() - 1; i++)
673 strlib_ss << "}\n";
674 }
675
676 strlib_ss << "\n";
677 strlib_ss << "extern \"C\" __global__ void __direct_callable__strlib_dummy(int *j)\n";
678 strlib_ss << "{\n";
679 strlib_ss << " // must have a __direct_callable__ function for the module to compile\n";
680 strlib_ss << "}\n";
681
682 // XXX: Should this move to compute_60 (compute_35-compute_50 is now deprecated)
683 const char *cuda_compile_options[] = { "--gpu-architecture=compute_35" ,
684 "--use_fast_math" ,
685 "-dc" ,
686 #if OSL_CPLUSPLUS_VERSION >= 14
687 "--std=c++14"
688 #else
689 "--std=c++11"
690 #endif
691 };
692
693 int num_compile_flags = int (sizeof (cuda_compile_options) / sizeof (cuda_compile_options[0]));
694 size_t str_lib_size, cuda_log_size;
695
696 std::string cuda_string = strlib_ss.str();
697
698 NVRTC_CHECK (nvrtcCreateProgram (&str_lib,
699 cuda_string.c_str(),
700 "cuda_strng_library",
701 0, // number of headers
702 nullptr, // header paths
703 nullptr)); // header files
704 nvrtcResult compileResult = nvrtcCompileProgram (str_lib, num_compile_flags, cuda_compile_options);
705 if (compileResult != NVRTC_SUCCESS) {
706 NVRTC_CHECK (nvrtcGetProgramLogSize (str_lib, &cuda_log_size));
707 std::vector<char> cuda_log(cuda_log_size+1);
708 NVRTC_CHECK (nvrtcGetProgramLog (str_lib, cuda_log.data()));
709 cuda_log.back() = 0;
710 errhandler().error ("nvrtcCompileProgram failure for:\n%s\n"
711 "====================================\n"
712 "%s\n", cuda_string.c_str(), cuda_log.data());
713 return false;
714 }
715 NVRTC_CHECK (nvrtcGetPTXSize (str_lib, &str_lib_size));
716 std::vector<char> str_lib_ptx (str_lib_size);
717 NVRTC_CHECK (nvrtcGetPTX(str_lib, str_lib_ptx.data()));
718 NVRTC_CHECK (nvrtcDestroyProgram (&str_lib));
719
720 std::string strlib_string (str_lib_ptx.begin (), str_lib_ptx.end ());
721
722 OptixModule strlib_module;
723 sizeof_msg_log = sizeof(msg_log);
724 OPTIX_CHECK (optixModuleCreateFromPTX (m_optix_ctx,
725 &module_compile_options,
726 &pipeline_compile_options,
727 str_lib_ptx.data(),
728 str_lib_ptx.size(),
729 msg_log, &sizeof_msg_log,
730 &strlib_module));
731 //if (sizeof_msg_log > 1)
732 // printf ("Creating module from string-library PTX:\n%s\n", msg_log);
733
734 OptixProgramGroupDesc strlib_pg_desc = {};
735 strlib_pg_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
736 strlib_pg_desc.callables.moduleDC = strlib_module;
737 strlib_pg_desc.callables.entryFunctionNameDC = "__direct_callable__strlib_dummy";
738 strlib_pg_desc.callables.moduleCC = 0;
739 strlib_pg_desc.callables.entryFunctionNameCC = nullptr;
740
741 OptixProgramGroup strlib_group;
742
743 sizeof_msg_log = sizeof(msg_log);
744 OPTIX_CHECK (optixProgramGroupCreate (m_optix_ctx,
745 &strlib_pg_desc,
746 1,
747 &program_options,
748 msg_log, &sizeof_msg_log,
749 &strlib_group));
750 //if (sizeof_msg_log > 1)
751 // printf ("Creating program group for string-library:\n%s\n", msg_log);
752
753 // Set up OptiX pipeline
754 std::vector<OptixProgramGroup> final_groups = {
755 strlib_group, // string globals
756 raygen_group,
757 miss_group,
758 hitgroup_group,
759 program_groups[0], // init
760 program_groups[1], // entry
761 };
762
763 sizeof_msg_log = sizeof(msg_log);
764 OPTIX_CHECK (optixPipelineCreate (m_optix_ctx,
765 &pipeline_compile_options,
766 &pipeline_link_options,
767 final_groups.data(),
768 int(final_groups.size()),
769 msg_log, &sizeof_msg_log,
770 &m_optix_pipeline));
771 //if (sizeof_msg_log > 1)
772 // printf ("Creating optix pipeline:\n%s\n", msg_log);
773
774 // Set the pipeline stack size
775 OptixStackSizes stack_sizes = {};
776 for( OptixProgramGroup& program_group : final_groups )
777 OPTIX_CHECK (optixUtilAccumulateStackSizes (program_group, &stack_sizes));
778
779 uint32_t max_trace_depth = 1;
780 uint32_t max_cc_depth = 1;
781 uint32_t max_dc_depth = 1;
782 uint32_t direct_callable_stack_size_from_traversal;
783 uint32_t direct_callable_stack_size_from_state;
784 uint32_t continuation_stack_size;
785 OPTIX_CHECK (optixUtilComputeStackSizes (&stack_sizes,
786 max_trace_depth,
787 max_cc_depth,
788 max_dc_depth,
789 &direct_callable_stack_size_from_traversal,
790 &direct_callable_stack_size_from_state,
791 &continuation_stack_size ) );
792
793 const uint32_t max_traversal_depth = 1;
794 OPTIX_CHECK (optixPipelineSetStackSize (m_optix_pipeline,
795 direct_callable_stack_size_from_traversal,
796 direct_callable_stack_size_from_state,
797 continuation_stack_size,
798 max_traversal_depth ));
799
800 // Build OptiX Shader Binding Table (SBT)
801 CUdeviceptr d_raygenRecord;
802 CUdeviceptr d_missRecord;
803 CUdeviceptr d_hitgroupRecord;
804 CUdeviceptr d_callablesRecord;
805
806 EmptyRecord raygenRecord, missRecord, hitgroupRecord, callablesRecord[2];
807
808 OPTIX_CHECK (optixSbtRecordPackHeader (raygen_group , &raygenRecord ));
809 OPTIX_CHECK (optixSbtRecordPackHeader (miss_group , &missRecord ));
810 OPTIX_CHECK (optixSbtRecordPackHeader (hitgroup_group , &hitgroupRecord));
811 OPTIX_CHECK (optixSbtRecordPackHeader (program_groups[0], &callablesRecord[0]));
812 OPTIX_CHECK (optixSbtRecordPackHeader (program_groups[1], &callablesRecord[1]));
813
814 raygenRecord.data = reinterpret_cast<void *>(5);
815 missRecord.data = nullptr;
816 hitgroupRecord.data = nullptr;
817 callablesRecord[0].data = reinterpret_cast<void *>(1);
818 callablesRecord[1].data = reinterpret_cast<void *>(2);
819
820 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **> (&d_raygenRecord) , sizeof(EmptyRecord)));
821 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **> (&d_missRecord) , sizeof(EmptyRecord)));
822 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **> (&d_hitgroupRecord) , sizeof(EmptyRecord)));
823 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **> (&d_callablesRecord), 2 * sizeof(EmptyRecord)));
824
825 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>( d_raygenRecord) , &raygenRecord , sizeof(EmptyRecord), cudaMemcpyHostToDevice));
826 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>( d_missRecord) , &missRecord , sizeof(EmptyRecord), cudaMemcpyHostToDevice));
827 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>( d_hitgroupRecord) , &hitgroupRecord , sizeof(EmptyRecord), cudaMemcpyHostToDevice));
828 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>( d_callablesRecord), &callablesRecord[0], 2 * sizeof(EmptyRecord), cudaMemcpyHostToDevice));
829
830 // Looks like OptixShadingTable needs to be filled out completely
831 m_optix_sbt.raygenRecord = d_raygenRecord;
832 m_optix_sbt.missRecordBase = d_missRecord;
833 m_optix_sbt.missRecordStrideInBytes = sizeof(EmptyRecord);
834 m_optix_sbt.missRecordCount = 1;
835 m_optix_sbt.hitgroupRecordBase = d_hitgroupRecord;
836 m_optix_sbt.hitgroupRecordStrideInBytes = sizeof(EmptyRecord);
837 m_optix_sbt.hitgroupRecordCount = 1;
838 m_optix_sbt.callablesRecordBase = d_callablesRecord;
839 m_optix_sbt.callablesRecordStrideInBytes = sizeof(EmptyRecord);
840 m_optix_sbt.callablesRecordCount = 2;
841
842 #endif //#if (OPTIX_VERSION < 70000)
843
844 #endif //#ifdef OSL_USE_OPTIX
845 return true;
846 }
847
848
849
850 bool
851 OptixGridRenderer::finalize_scene()
852 {
853 #ifdef OSL_USE_OPTIX
854 make_optix_materials();
855
856 #if (OPTIX_VERSION < 70000)
857
858 m_optix_ctx["invw"]->setFloat (1.0f/m_xres);
859 m_optix_ctx["invh"]->setFloat (1.0f/m_yres);
860
861 // Create the output buffer
862 optix::Buffer buffer = m_optix_ctx->createBuffer(RT_BUFFER_OUTPUT, RT_FORMAT_FLOAT3, m_xres, m_yres);
863 m_optix_ctx["output_buffer"]->set(buffer);
864 #else
865
866 #endif //#if (OPTIX_VERSION < 70000)
867
868 #if (OPTIX_VERSION < 70000)
869 if (!synch_attributes ())
870 return false;
871 #endif
872
873 #if (OPTIX_VERSION < 70000)
874 m_optix_ctx->validate();
875 #else
876 #endif
877
878 #endif
879 return true;
880 }
881
882
883
884 /// Return true if the texture handle (previously returned by
885 /// get_texture_handle()) is a valid texture that can be subsequently
886 /// read or sampled.
887 bool
888 OptixGridRenderer::good(TextureHandle *handle OSL_MAYBE_UNUSED)
889 {
890 #ifdef OSL_USE_OPTIX
891 #if (OPTIX_VERSION < 70000)
892 return intptr_t(handle) != RT_TEXTURE_ID_NULL;
893 #else
894 return handle != nullptr;
895 #endif
896 #else
897 return false;
898 #endif
899 }
900
901
902
903 /// Given the name of a texture, return an opaque handle that can be
904 /// used with texture calls to avoid the name lookups.
905 RendererServices::TextureHandle*
906 OptixGridRenderer::get_texture_handle (ustring filename OSL_MAYBE_UNUSED,
907 ShadingContext* shading_context OSL_MAYBE_UNUSED)
908 {
909 #ifdef OSL_USE_OPTIX
910
911 #if (OPTIX_VERSION < 70000)
912 auto itr = m_samplers.find(filename);
913 if (itr == m_samplers.end()) {
914 optix::TextureSampler sampler = context()->createTextureSampler();
915 sampler->setWrapMode(0, RT_WRAP_REPEAT);
916 sampler->setWrapMode(1, RT_WRAP_REPEAT);
917 sampler->setWrapMode(2, RT_WRAP_REPEAT);
918
919 sampler->setFilteringModes(RT_FILTER_LINEAR, RT_FILTER_LINEAR, RT_FILTER_NONE);
920 sampler->setIndexingMode(false ? RT_TEXTURE_INDEX_ARRAY_INDEX : RT_TEXTURE_INDEX_NORMALIZED_COORDINATES);
921 sampler->setReadMode(RT_TEXTURE_READ_NORMALIZED_FLOAT);
922 sampler->setMaxAnisotropy(1.0f);
923
924
925 OIIO::ImageBuf image;
926 if (!image.init_spec(filename, 0, 0)) {
927 errhandler().error ("Could not load: %s", filename);
928 return (TextureHandle*)(intptr_t(RT_TEXTURE_ID_NULL));
929 }
930 int nchan = image.spec().nchannels;
931
932 OIIO::ROI roi = OIIO::get_roi_full(image.spec());
933 int width = roi.width(), height = roi.height();
934 std::vector<float> pixels(width * height * nchan);
935 image.get_pixels(roi, OIIO::TypeDesc::FLOAT, pixels.data());
936
937 optix::Buffer buffer = context()->createBuffer(RT_BUFFER_INPUT, RT_FORMAT_FLOAT4, width, height);
938
939 float* device_ptr = static_cast<float*>(buffer->map());
940 unsigned int pixel_idx = 0;
941 for (int y = 0; y < height; ++y) {
942 for (int x = 0; x < width; ++x) {
943 memcpy(device_ptr, &pixels[pixel_idx], sizeof(float) * nchan);
944 device_ptr += 4;
945 pixel_idx += nchan;
946 }
947 }
948 buffer->unmap();
949 sampler->setBuffer(buffer);
950 itr = m_samplers.emplace(std::move(filename), std::move(sampler)).first;
951
952 }
953 return (RendererServices::TextureHandle*) intptr_t(itr->second->getId());
954 #else //#if (OPTIX_VERSION < 70000)
955 auto itr = m_samplers.find(filename);
956 if (itr == m_samplers.end()) {
957
958 // Open image
959 OIIO::ImageBuf image;
960 if (!image.init_spec(filename, 0, 0)) {
961 errhandler().error ("Could not load: %s", filename);
962 return (TextureHandle*)(intptr_t(nullptr));
963 }
964
965 OIIO::ROI roi = OIIO::get_roi_full(image.spec());
966 int32_t width = roi.width(), height = roi.height();
967 std::vector<float> pixels(width * height * 4);
968
969 for (int j = 0; j < height; j++) {
970 for (int i = 0; i < width; i++) {
971 image.getpixel(i, j, 0, &pixels[((j*width) + i) * 4 + 0]);
972 }
973 }
974 cudaResourceDesc res_desc = {};
975
976 // hard-code textures to 4 channels
977 int32_t pitch = width * 4 * sizeof(float);
978 cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
979
980 cudaArray_t pixelArray;
981 CUDA_CHECK (cudaMallocArray (&pixelArray,
982 &channel_desc,
983 width,height));
984
985 CUDA_CHECK (cudaMemcpy2DToArray (pixelArray,
986 /* offset */0,0,
987 pixels.data(),
988 pitch,pitch,height,
989 cudaMemcpyHostToDevice));
990
991 res_desc.resType = cudaResourceTypeArray;
992 res_desc.res.array.array = pixelArray;
993
994 cudaTextureDesc tex_desc = {};
995 tex_desc.addressMode[0] = cudaAddressModeWrap;
996 tex_desc.addressMode[1] = cudaAddressModeWrap;
997 tex_desc.filterMode = cudaFilterModeLinear;
998 tex_desc.readMode = cudaReadModeElementType; //cudaReadModeNormalizedFloat;
999 tex_desc.normalizedCoords = 1;
1000 tex_desc.maxAnisotropy = 1;
1001 tex_desc.maxMipmapLevelClamp = 99;
1002 tex_desc.minMipmapLevelClamp = 0;
1003 tex_desc.mipmapFilterMode = cudaFilterModePoint;
1004 tex_desc.borderColor[0] = 1.0f;
1005 tex_desc.sRGB = 0;
1006
1007 // Create texture object
1008 cudaTextureObject_t cuda_tex = 0;
1009 CUDA_CHECK (cudaCreateTextureObject (&cuda_tex, &res_desc, &tex_desc, nullptr));
1010 itr = m_samplers.emplace (std::move(filename), std::move(cuda_tex)).first;
1011 }
1012 return reinterpret_cast<RendererServices::TextureHandle *>(itr->second);
1013
1014 #endif //#if (OPTIX_VERSION < 70000)
1015
1016 #else
1017 return nullptr;
1018 #endif
1019 }
1020
1021
1022
1023 void
1024 OptixGridRenderer::prepare_render()
1025 {
1026 #ifdef OSL_USE_OPTIX
1027 // Set up the OptiX Context
1028 init_optix_context(m_xres, m_yres);
1029
1030 // Set up the OptiX scene graph
1031 finalize_scene ();
1032 #endif
1033 }
1034
1035
1036
1037 void
1038 OptixGridRenderer::warmup()
1039 {
1040 #ifdef OSL_USE_OPTIX
1041 // Perform a tiny launch to warm up the OptiX context
1042 #if (OPTIX_VERSION < 70000)
1043 m_optix_ctx->launch (0, 1, 1);
1044 #else
1045 OPTIX_CHECK (optixLaunch (m_optix_pipeline,
1046 m_cuda_stream,
1047 d_launch_params,
1048 sizeof(RenderParams),
1049 &m_optix_sbt,
1050 0, 0, 1));
1051 CUDA_SYNC_CHECK();
1052 #endif
1053
1054 #endif
1055 }
1056
1057
1058 //extern "C" void setTestshadeGlobals(float h_invw, float h_invh, CUdeviceptr d_output_buffer, bool h_flipv);
1059
1060 void
1061 OptixGridRenderer::render(int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED)
1062 {
1063 #ifdef OSL_USE_OPTIX
1064 #if (OPTIX_VERSION < 70000)
1065 m_optix_ctx->launch (0, xres, yres);
1066 #else
1067 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_output_buffer), xres * yres * 4 * sizeof(float)));
1068 CUDA_CHECK (cudaMalloc (reinterpret_cast<void **>(&d_launch_params), sizeof(RenderParams)));
1069
1070
1071 m_xres = xres;
1072 m_yres = yres;
1073
1074 RenderParams params;
1075 params.invw = 1.0f / m_xres;
1076 params.invh = 1.0f / m_yres;
1077 params.flipv = false; /* I don't see flipv being initialized anywhere */
1078 params.output_buffer = d_output_buffer;
1079
1080 CUDA_CHECK (cudaMemcpy (reinterpret_cast<void *>(d_launch_params), ¶ms, sizeof(RenderParams), cudaMemcpyHostToDevice));
1081
1082 OPTIX_CHECK (optixLaunch (m_optix_pipeline,
1083 m_cuda_stream,
1084 d_launch_params,
1085 sizeof(RenderParams),
1086 &m_optix_sbt,
1087 xres, yres, 1));
1088 CUDA_SYNC_CHECK();
1089 #endif
1090 #endif
1091 }
1092
1093
1094
1095 void
1096 OptixGridRenderer::finalize_pixel_buffer ()
1097 {
1098 #ifdef OSL_USE_OPTIX
1099
1100 std::string buffer_name = "output_buffer";
1101 #if (OPTIX_VERSION < 70000)
1102 const void* buffer_ptr = m_optix_ctx[buffer_name]->getBuffer()->map();
1103 if (! buffer_ptr)
1104 errhandler().severe ("Unable to map buffer %s", buffer_name);
1105 outputbuf(0)->set_pixels (OIIO::ROI::All(), OIIO::TypeFloat, buffer_ptr);
1106 #else
1107 std::vector<float> tmp_buff(m_xres * m_yres * 3);
1108 CUDA_CHECK (cudaMemcpy (tmp_buff.data(), reinterpret_cast<void *>(d_output_buffer), m_xres * m_yres * 3 * sizeof(float), cudaMemcpyDeviceToHost));
1109 outputbuf(0)->set_pixels (OIIO::ROI::All(), OIIO::TypeFloat, tmp_buff.data());
1110 #endif
1111 #endif
1112 }
1113
1114
1115
1116 void
1117 OptixGridRenderer::clear()
1118 {
1119 shaders().clear();
1120 #ifdef OSL_USE_OPTIX
1121 #if (OPTIX_VERSION < 70000)
1122 if (m_optix_ctx) {
1123 m_optix_ctx->destroy();
1124 m_optix_ctx = nullptr;
1125 }
1126 #else
1127 if (m_optix_ctx) {
1128 OPTIX_CHECK (optixDeviceContextDestroy (m_optix_ctx));
1129 m_optix_ctx = 0;
1130 }
1131 #endif
1132
1133 #endif
1134 }
1135
1136 OSL_NAMESPACE_EXIT
1137
1138