1 #ifndef _AFJHAYYTA_PYOPENCL_HEADER_SEEN_WRAP_CL_HPP 2 #define _AFJHAYYTA_PYOPENCL_HEADER_SEEN_WRAP_CL_HPP 3 4 // CL 1.2 undecided: 5 // clSetPrintfCallback 6 7 // {{{ includes 8 9 #define CL_USE_DEPRECATED_OPENCL_1_1_APIS 10 // #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 11 12 #ifdef __APPLE__ 13 14 // Mac ------------------------------------------------------------------------ 15 #include <OpenCL/opencl.h> 16 #include "pyopencl_ext.h" 17 #ifdef HAVE_GL 18 19 #define PYOPENCL_GL_SHARING_VERSION 1 20 21 #include <OpenGL/OpenGL.h> 22 #include <OpenCL/cl_gl.h> 23 #include <OpenCL/cl_gl_ext.h> 24 #endif 25 26 #else 27 28 // elsewhere ------------------------------------------------------------------ 29 #define CL_TARGET_OPENCL_VERSION 220 30 31 #include <CL/cl.h> 32 #include "pyopencl_ext.h" 33 34 #if defined(_WIN32) 35 #define NOMINMAX 36 #include <windows.h> 37 #endif 38 39 #ifdef HAVE_GL 40 #include <GL/gl.h> 41 #include <CL/cl_gl.h> 42 #endif 43 44 #if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) 45 #define PYOPENCL_GL_SHARING_VERSION cl_khr_gl_sharing 46 #endif 47 48 #endif 49 50 #include <thread> 51 #include <mutex> 52 #include <condition_variable> 53 54 #include <cstdio> 55 #include <stdexcept> 56 #include <iostream> 57 #include <vector> 58 #include <utility> 59 #include <numeric> 60 #include "wrap_helpers.hpp" 61 #include "numpy_init.hpp" 62 #include "tools.hpp" 63 64 #ifdef PYOPENCL_PRETEND_CL_VERSION 65 #define PYOPENCL_CL_VERSION PYOPENCL_PRETEND_CL_VERSION 66 #else 67 68 #if defined(CL_VERSION_2_2) 69 #define PYOPENCL_CL_VERSION 0x2020 70 #elif defined(CL_VERSION_2_1) 71 #define PYOPENCL_CL_VERSION 0x2010 72 #elif defined(CL_VERSION_2_0) 73 #define PYOPENCL_CL_VERSION 0x2000 74 #elif defined(CL_VERSION_1_2) 75 #define PYOPENCL_CL_VERSION 0x1020 76 #elif defined(CL_VERSION_1_1) 77 #define PYOPENCL_CL_VERSION 0x1010 78 #else 79 #define PYOPENCL_CL_VERSION 0x1000 80 #endif 81 82 #endif 83 84 85 #if (PY_VERSION_HEX >= 0x03000000) or defined(PYPY_VERSION) 86 #define PYOPENCL_USE_NEW_BUFFER_INTERFACE 87 #define PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(s) std::move(s) 88 #else 89 #define PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(s) (s) 90 #endif 91 92 93 94 // }}} 95 96 97 98 99 100 // {{{ tools 101 #if PY_VERSION_HEX >= 0x02050000 102 typedef Py_ssize_t PYOPENCL_BUFFER_SIZE_T; 103 #else 104 typedef int PYOPENCL_BUFFER_SIZE_T; 105 #endif 106 107 #define PYOPENCL_CAST_BOOL(B) ((B) ? CL_TRUE : CL_FALSE) 108 109 110 111 112 113 #define PYOPENCL_DEPRECATED(WHAT, KILL_VERSION, EXTRA_MSG) \ 114 { \ 115 PyErr_Warn( \ 116 PyExc_DeprecationWarning, \ 117 WHAT " is deprecated and will stop working in PyOpenCL " KILL_VERSION". " \ 118 EXTRA_MSG); \ 119 } 120 121 #if PYOPENCL_CL_VERSION >= 0x1020 122 123 #define PYOPENCL_GET_EXT_FUN(PLATFORM, NAME, VAR) \ 124 NAME##_fn VAR \ 125 = (NAME##_fn) \ 126 clGetExtensionFunctionAddressForPlatform(PLATFORM, #NAME); \ 127 \ 128 if (!VAR) \ 129 throw error(#NAME, CL_INVALID_VALUE, #NAME \ 130 "not available"); 131 132 #else 133 134 #define PYOPENCL_GET_EXT_FUN(PLATFORM, NAME, VAR) \ 135 NAME##_fn VAR \ 136 = (NAME##_fn) \ 137 clGetExtensionFunctionAddress(#NAME); \ 138 \ 139 if (!VAR) \ 140 throw error(#NAME, CL_INVALID_VALUE, #NAME \ 141 "not available"); 142 143 #endif 144 145 146 #define PYOPENCL_PARSE_PY_DEVICES \ 147 std::vector<cl_device_id> devices_vec; \ 148 cl_uint num_devices; \ 149 cl_device_id *devices; \ 150 \ 151 if (py_devices.ptr() == Py_None) \ 152 { \ 153 num_devices = 0; \ 154 devices = 0; \ 155 } \ 156 else \ 157 { \ 158 for (py::handle py_dev: py_devices) \ 159 devices_vec.push_back( \ 160 (py_dev).cast<device &>().data()); \ 161 num_devices = devices_vec.size(); \ 162 devices = devices_vec.empty( ) ? nullptr : &devices_vec.front(); \ 163 } \ 164 165 166 #define PYOPENCL_RETRY_RETURN_IF_MEM_ERROR(OPERATION) \ 167 try \ 168 { \ 169 OPERATION \ 170 } \ 171 catch (pyopencl::error &e) \ 172 { \ 173 if (!e.is_out_of_memory()) \ 174 throw; \ 175 } \ 176 \ 177 /* If we get here, we got an error from CL. 178 * We should run the Python GC to try and free up 179 * some memory references. */ \ 180 run_python_gc(); \ 181 \ 182 /* Now retry the allocation. If it fails again, 183 * let it fail. */ \ 184 { \ 185 OPERATION \ 186 } 187 188 189 190 191 #define PYOPENCL_RETRY_IF_MEM_ERROR(OPERATION) \ 192 { \ 193 bool failed_with_mem_error = false; \ 194 try \ 195 { \ 196 OPERATION \ 197 } \ 198 catch (pyopencl::error &e) \ 199 { \ 200 failed_with_mem_error = true; \ 201 if (!e.is_out_of_memory()) \ 202 throw; \ 203 } \ 204 \ 205 if (failed_with_mem_error) \ 206 { \ 207 /* If we get here, we got an error from CL. 208 * We should run the Python GC to try and free up 209 * some memory references. */ \ 210 run_python_gc(); \ 211 \ 212 /* Now retry the allocation. If it fails again, 213 * let it fail. */ \ 214 { \ 215 OPERATION \ 216 } \ 217 } \ 218 } 219 220 // }}} 221 222 // {{{ tracing and error reporting 223 #ifdef PYOPENCL_TRACE 224 #define PYOPENCL_PRINT_CALL_TRACE(NAME) \ 225 std::cerr << NAME << std::endl; 226 #define PYOPENCL_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) \ 227 std::cerr << NAME << " (" << EXTRA_INFO << ')' << std::endl; 228 #else 229 #define PYOPENCL_PRINT_CALL_TRACE(NAME) /*nothing*/ 230 #define PYOPENCL_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) /*nothing*/ 231 #endif 232 233 #define PYOPENCL_CALL_GUARDED_THREADED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \ 234 { \ 235 PYOPENCL_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \ 236 cl_int status_code; \ 237 { \ 238 py::gil_scoped_release release; \ 239 status_code = NAME ARGLIST; \ 240 } \ 241 if (status_code != CL_SUCCESS) \ 242 throw pyopencl::error(#NAME, status_code);\ 243 } 244 245 #define PYOPENCL_CALL_GUARDED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \ 246 { \ 247 PYOPENCL_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \ 248 cl_int status_code; \ 249 status_code = NAME ARGLIST; \ 250 if (status_code != CL_SUCCESS) \ 251 throw pyopencl::error(#NAME, status_code);\ 252 } 253 254 #define PYOPENCL_CALL_GUARDED_THREADED(NAME, ARGLIST) \ 255 { \ 256 PYOPENCL_PRINT_CALL_TRACE(#NAME); \ 257 cl_int status_code; \ 258 { \ 259 py::gil_scoped_release release; \ 260 status_code = NAME ARGLIST; \ 261 } \ 262 if (status_code != CL_SUCCESS) \ 263 throw pyopencl::error(#NAME, status_code);\ 264 } 265 266 #define PYOPENCL_CALL_GUARDED(NAME, ARGLIST) \ 267 { \ 268 PYOPENCL_PRINT_CALL_TRACE(#NAME); \ 269 cl_int status_code; \ 270 status_code = NAME ARGLIST; \ 271 if (status_code != CL_SUCCESS) \ 272 throw pyopencl::error(#NAME, status_code);\ 273 } 274 #define PYOPENCL_CALL_GUARDED_CLEANUP(NAME, ARGLIST) \ 275 { \ 276 PYOPENCL_PRINT_CALL_TRACE(#NAME); \ 277 cl_int status_code; \ 278 status_code = NAME ARGLIST; \ 279 if (status_code != CL_SUCCESS) \ 280 std::cerr \ 281 << "PyOpenCL WARNING: a clean-up operation failed (dead context maybe?)" \ 282 << std::endl \ 283 << #NAME " failed with code " << status_code \ 284 << std::endl; \ 285 } 286 287 // }}} 288 289 // {{{ get_info helpers 290 #define PYOPENCL_GET_OPAQUE_INFO(WHAT, FIRST_ARG, SECOND_ARG, CL_TYPE, TYPE) \ 291 { \ 292 CL_TYPE param_value; \ 293 PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ 294 (FIRST_ARG, SECOND_ARG, sizeof(param_value), ¶m_value, 0)); \ 295 if (param_value) \ 296 return py::object(handle_from_new_ptr( \ 297 new TYPE(param_value, /*retain*/ true))); \ 298 else \ 299 return py::none(); \ 300 } 301 302 #define PYOPENCL_GET_VEC_INFO(WHAT, FIRST_ARG, SECOND_ARG, RES_VEC) \ 303 { \ 304 size_t size; \ 305 PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ 306 (FIRST_ARG, SECOND_ARG, 0, 0, &size)); \ 307 \ 308 RES_VEC.resize(size / sizeof(RES_VEC.front())); \ 309 \ 310 PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ 311 (FIRST_ARG, SECOND_ARG, size, \ 312 RES_VEC.empty( ) ? nullptr : &RES_VEC.front(), &size)); \ 313 } 314 315 #define PYOPENCL_GET_STR_INFO(WHAT, FIRST_ARG, SECOND_ARG) \ 316 { \ 317 size_t param_value_size; \ 318 PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ 319 (FIRST_ARG, SECOND_ARG, 0, 0, ¶m_value_size)); \ 320 \ 321 std::vector<char> param_value(param_value_size); \ 322 PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ 323 (FIRST_ARG, SECOND_ARG, param_value_size, \ 324 param_value.empty( ) ? nullptr : ¶m_value.front(), ¶m_value_size)); \ 325 \ 326 return py::cast( \ 327 param_value.empty( ) ? "" : std::string(¶m_value.front(), param_value_size-1)); \ 328 } 329 330 331 332 333 #define PYOPENCL_GET_INTEGRAL_INFO(WHAT, FIRST_ARG, SECOND_ARG, TYPE) \ 334 { \ 335 TYPE param_value; \ 336 PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ 337 (FIRST_ARG, SECOND_ARG, sizeof(param_value), ¶m_value, 0)); \ 338 return py::cast(param_value); \ 339 } 340 341 // }}} 342 343 // {{{ event helpers -------------------------------------------------------------- 344 #define PYOPENCL_PARSE_WAIT_FOR \ 345 cl_uint num_events_in_wait_list = 0; \ 346 std::vector<cl_event> event_wait_list; \ 347 \ 348 if (py_wait_for.ptr() != Py_None) \ 349 { \ 350 event_wait_list.resize(len(py_wait_for)); \ 351 for (py::handle evt: py_wait_for) \ 352 event_wait_list[num_events_in_wait_list++] = \ 353 evt.cast<const event &>().data(); \ 354 } 355 356 #define PYOPENCL_WAITLIST_ARGS \ 357 num_events_in_wait_list, event_wait_list.empty( ) ? nullptr : &event_wait_list.front() 358 359 #define PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, obj) \ 360 try \ 361 { \ 362 return new nanny_event(evt, false, obj); \ 363 } \ 364 catch (...) \ 365 { \ 366 clReleaseEvent(evt); \ 367 throw; \ 368 } 369 370 #define PYOPENCL_RETURN_NEW_EVENT(evt) \ 371 try \ 372 { \ 373 return new event(evt, false); \ 374 } \ 375 catch (...) \ 376 { \ 377 clReleaseEvent(evt); \ 378 throw; \ 379 } 380 381 // }}} 382 383 // {{{ equality testing 384 #define PYOPENCL_EQUALITY_TESTS(cls) \ 385 bool operator==(cls const &other) const \ 386 { return data() == other.data(); } \ 387 bool operator!=(cls const &other) const \ 388 { return data() != other.data(); } \ 389 long hash() const \ 390 { return (long) (intptr_t) data(); } 391 // }}} 392 393 394 395 namespace pyopencl 396 { 397 class program; 398 399 // {{{ error 400 class error : public std::runtime_error 401 { 402 private: 403 std::string m_routine; 404 cl_int m_code; 405 406 // This is here because clLinkProgram returns a program 407 // object *just* so that there is somewhere for it to 408 // stuff the linker logs. :/ 409 bool m_program_initialized; 410 cl_program m_program; 411 412 public: error(const char * routine,cl_int c,const char * msg="")413 error(const char *routine, cl_int c, const char *msg="") 414 : std::runtime_error(msg), m_routine(routine), m_code(c), 415 m_program_initialized(false), m_program(nullptr) 416 { } 417 error(const char * routine,cl_program prg,cl_int c,const char * msg="")418 error(const char *routine, cl_program prg, cl_int c, 419 const char *msg="") 420 : std::runtime_error(msg), m_routine(routine), m_code(c), 421 m_program_initialized(true), m_program(prg) 422 { } 423 ~error()424 virtual ~error() 425 { 426 if (m_program_initialized) 427 clReleaseProgram(m_program); 428 } 429 routine() const430 const std::string &routine() const 431 { 432 return m_routine; 433 } 434 code() const435 cl_int code() const 436 { 437 return m_code; 438 } 439 is_out_of_memory() const440 bool is_out_of_memory() const 441 { 442 return (code() == CL_MEM_OBJECT_ALLOCATION_FAILURE 443 || code() == CL_OUT_OF_RESOURCES 444 || code() == CL_OUT_OF_HOST_MEMORY); 445 } 446 447 program *get_program() const; 448 449 }; 450 451 // }}} 452 453 454 // {{{ buffer interface helper 455 // 456 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 457 class py_buffer_wrapper : public noncopyable 458 { 459 private: 460 bool m_initialized; 461 462 public: 463 Py_buffer m_buf; 464 py_buffer_wrapper()465 py_buffer_wrapper() 466 : m_initialized(false) 467 {} 468 get(PyObject * obj,int flags)469 void get(PyObject *obj, int flags) 470 { 471 #ifdef PYPY_VERSION 472 // work around https://bitbucket.org/pypy/pypy/issues/2873 473 if (flags & PyBUF_ANY_CONTIGUOUS) 474 { 475 int flags_wo_cont = flags & ~PyBUF_ANY_CONTIGUOUS; 476 if (PyObject_GetBuffer(obj, &m_buf, flags_wo_cont | PyBUF_C_CONTIGUOUS)) 477 { 478 PyErr_Clear(); 479 if (PyObject_GetBuffer(obj, &m_buf, flags_wo_cont | PyBUF_F_CONTIGUOUS)) 480 throw py::error_already_set(); 481 } 482 } 483 else 484 #endif 485 if (PyObject_GetBuffer(obj, &m_buf, flags)) 486 throw py::error_already_set(); 487 488 m_initialized = true; 489 } 490 ~py_buffer_wrapper()491 virtual ~py_buffer_wrapper() 492 { 493 if (m_initialized) 494 PyBuffer_Release(&m_buf); 495 } 496 }; 497 #endif 498 499 // }}} 500 501 inline get_cl_header_version()502 py::tuple get_cl_header_version() 503 { 504 return py::make_tuple( 505 PYOPENCL_CL_VERSION >> (3*4), 506 (PYOPENCL_CL_VERSION >> (1*4)) & 0xff 507 ); 508 } 509 510 511 // {{{ platform 512 513 class platform : noncopyable 514 { 515 private: 516 cl_platform_id m_platform; 517 518 public: platform(cl_platform_id pid)519 platform(cl_platform_id pid) 520 : m_platform(pid) 521 { } 522 platform(cl_platform_id pid,bool)523 platform(cl_platform_id pid, bool /*retain (ignored)*/) 524 : m_platform(pid) 525 { } 526 data() const527 cl_platform_id data() const 528 { 529 return m_platform; 530 } 531 532 PYOPENCL_EQUALITY_TESTS(platform); 533 get_info(cl_platform_info param_name) const534 py::object get_info(cl_platform_info param_name) const 535 { 536 switch (param_name) 537 { 538 case CL_PLATFORM_PROFILE: 539 case CL_PLATFORM_VERSION: 540 case CL_PLATFORM_NAME: 541 case CL_PLATFORM_VENDOR: 542 #if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) 543 case CL_PLATFORM_EXTENSIONS: 544 #endif 545 PYOPENCL_GET_STR_INFO(Platform, m_platform, param_name); 546 547 default: 548 throw error("Platform.get_info", CL_INVALID_VALUE); 549 } 550 } 551 552 py::list get_devices(cl_device_type devtype); 553 }; 554 555 556 557 558 inline get_platforms()559 py::list get_platforms() 560 { 561 cl_uint num_platforms = 0; 562 PYOPENCL_CALL_GUARDED(clGetPlatformIDs, (0, 0, &num_platforms)); 563 564 std::vector<cl_platform_id> platforms(num_platforms); 565 PYOPENCL_CALL_GUARDED(clGetPlatformIDs, 566 (num_platforms, platforms.empty( ) ? nullptr : &platforms.front(), &num_platforms)); 567 568 py::list result; 569 for (cl_platform_id pid: platforms) 570 result.append(handle_from_new_ptr( 571 new platform(pid))); 572 573 return result; 574 } 575 576 // }}} 577 578 579 // {{{ device 580 581 class device : noncopyable 582 { 583 public: 584 enum reference_type_t { 585 REF_NOT_OWNABLE, 586 #if PYOPENCL_CL_VERSION >= 0x1020 587 REF_CL_1_2, 588 #endif 589 }; 590 private: 591 cl_device_id m_device; 592 reference_type_t m_ref_type; 593 594 public: device(cl_device_id did)595 device(cl_device_id did) 596 : m_device(did), m_ref_type(REF_NOT_OWNABLE) 597 { } 598 device(cl_device_id did,bool retain,reference_type_t ref_type=REF_NOT_OWNABLE)599 device(cl_device_id did, bool retain, reference_type_t ref_type=REF_NOT_OWNABLE) 600 : m_device(did), m_ref_type(ref_type) 601 { 602 if (retain && ref_type != REF_NOT_OWNABLE) 603 { 604 if (false) 605 { } 606 607 #if PYOPENCL_CL_VERSION >= 0x1020 608 else if (ref_type == REF_CL_1_2) 609 { 610 PYOPENCL_CALL_GUARDED(clRetainDevice, (did)); 611 } 612 #endif 613 614 else 615 throw error("Device", CL_INVALID_VALUE, 616 "cannot own references to devices when device fission or CL 1.2 is not available"); 617 } 618 } 619 ~device()620 ~device() 621 { 622 #if PYOPENCL_CL_VERSION >= 0x1020 623 if (m_ref_type == REF_CL_1_2) 624 PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseDevice, (m_device)); 625 #endif 626 } 627 data() const628 cl_device_id data() const 629 { 630 return m_device; 631 } 632 633 PYOPENCL_EQUALITY_TESTS(device); 634 get_info(cl_device_info param_name) const635 py::object get_info(cl_device_info param_name) const 636 { 637 #define DEV_GET_INT_INF(TYPE) \ 638 PYOPENCL_GET_INTEGRAL_INFO(Device, m_device, param_name, TYPE); 639 640 switch (param_name) 641 { 642 case CL_DEVICE_TYPE: DEV_GET_INT_INF(cl_device_type); 643 case CL_DEVICE_VENDOR_ID: DEV_GET_INT_INF(cl_uint); 644 case CL_DEVICE_MAX_COMPUTE_UNITS: DEV_GET_INT_INF(cl_uint); 645 case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: DEV_GET_INT_INF(cl_uint); 646 case CL_DEVICE_MAX_WORK_GROUP_SIZE: DEV_GET_INT_INF(size_t); 647 648 case CL_DEVICE_MAX_WORK_ITEM_SIZES: 649 { 650 std::vector<size_t> result; 651 PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); 652 PYOPENCL_RETURN_VECTOR(size_t, result); 653 } 654 655 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: DEV_GET_INT_INF(cl_uint); 656 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: DEV_GET_INT_INF(cl_uint); 657 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: DEV_GET_INT_INF(cl_uint); 658 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: DEV_GET_INT_INF(cl_uint); 659 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: DEV_GET_INT_INF(cl_uint); 660 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: DEV_GET_INT_INF(cl_uint); 661 662 case CL_DEVICE_MAX_CLOCK_FREQUENCY: DEV_GET_INT_INF(cl_uint); 663 case CL_DEVICE_ADDRESS_BITS: DEV_GET_INT_INF(cl_uint); 664 case CL_DEVICE_MAX_READ_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint); 665 case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint); 666 case CL_DEVICE_MAX_MEM_ALLOC_SIZE: DEV_GET_INT_INF(cl_ulong); 667 case CL_DEVICE_IMAGE2D_MAX_WIDTH: DEV_GET_INT_INF(size_t); 668 case CL_DEVICE_IMAGE2D_MAX_HEIGHT: DEV_GET_INT_INF(size_t); 669 case CL_DEVICE_IMAGE3D_MAX_WIDTH: DEV_GET_INT_INF(size_t); 670 case CL_DEVICE_IMAGE3D_MAX_HEIGHT: DEV_GET_INT_INF(size_t); 671 case CL_DEVICE_IMAGE3D_MAX_DEPTH: DEV_GET_INT_INF(size_t); 672 case CL_DEVICE_IMAGE_SUPPORT: DEV_GET_INT_INF(cl_bool); 673 case CL_DEVICE_MAX_PARAMETER_SIZE: DEV_GET_INT_INF(size_t); 674 case CL_DEVICE_MAX_SAMPLERS: DEV_GET_INT_INF(cl_uint); 675 case CL_DEVICE_MEM_BASE_ADDR_ALIGN: DEV_GET_INT_INF(cl_uint); 676 case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: DEV_GET_INT_INF(cl_uint); 677 case CL_DEVICE_SINGLE_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); 678 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG 679 case CL_DEVICE_DOUBLE_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); 680 #endif 681 #ifdef CL_DEVICE_HALF_FP_CONFIG 682 case CL_DEVICE_HALF_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); 683 #endif 684 685 case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: DEV_GET_INT_INF(cl_device_mem_cache_type); 686 case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: DEV_GET_INT_INF(cl_uint); 687 case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: DEV_GET_INT_INF(cl_ulong); 688 case CL_DEVICE_GLOBAL_MEM_SIZE: DEV_GET_INT_INF(cl_ulong); 689 690 case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: DEV_GET_INT_INF(cl_ulong); 691 case CL_DEVICE_MAX_CONSTANT_ARGS: DEV_GET_INT_INF(cl_uint); 692 case CL_DEVICE_LOCAL_MEM_TYPE: DEV_GET_INT_INF(cl_device_local_mem_type); 693 case CL_DEVICE_LOCAL_MEM_SIZE: DEV_GET_INT_INF(cl_ulong); 694 case CL_DEVICE_ERROR_CORRECTION_SUPPORT: DEV_GET_INT_INF(cl_bool); 695 case CL_DEVICE_PROFILING_TIMER_RESOLUTION: DEV_GET_INT_INF(size_t); 696 case CL_DEVICE_ENDIAN_LITTLE: DEV_GET_INT_INF(cl_bool); 697 case CL_DEVICE_AVAILABLE: DEV_GET_INT_INF(cl_bool); 698 case CL_DEVICE_COMPILER_AVAILABLE: DEV_GET_INT_INF(cl_bool); 699 case CL_DEVICE_EXECUTION_CAPABILITIES: DEV_GET_INT_INF(cl_device_exec_capabilities); 700 #if PYOPENCL_CL_VERSION >= 0x2000 701 case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties); 702 #else 703 case CL_DEVICE_QUEUE_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties); 704 #endif 705 706 case CL_DEVICE_NAME: 707 case CL_DEVICE_VENDOR: 708 case CL_DRIVER_VERSION: 709 case CL_DEVICE_PROFILE: 710 case CL_DEVICE_VERSION: 711 case CL_DEVICE_EXTENSIONS: 712 PYOPENCL_GET_STR_INFO(Device, m_device, param_name); 713 714 case CL_DEVICE_PLATFORM: 715 PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_platform_id, platform); 716 717 #if PYOPENCL_CL_VERSION >= 0x1010 718 case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: DEV_GET_INT_INF(cl_uint); 719 720 case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: DEV_GET_INT_INF(cl_uint); 721 case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: DEV_GET_INT_INF(cl_uint); 722 case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: DEV_GET_INT_INF(cl_uint); 723 case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: DEV_GET_INT_INF(cl_uint); 724 case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: DEV_GET_INT_INF(cl_uint); 725 case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: DEV_GET_INT_INF(cl_uint); 726 case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: DEV_GET_INT_INF(cl_uint); 727 728 case CL_DEVICE_HOST_UNIFIED_MEMORY: DEV_GET_INT_INF(cl_bool); 729 case CL_DEVICE_OPENCL_C_VERSION: 730 PYOPENCL_GET_STR_INFO(Device, m_device, param_name); 731 #endif 732 #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 733 case CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV: 734 case CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV: 735 case CL_DEVICE_REGISTERS_PER_BLOCK_NV: 736 case CL_DEVICE_WARP_SIZE_NV: 737 DEV_GET_INT_INF(cl_uint); 738 case CL_DEVICE_GPU_OVERLAP_NV: 739 case CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV: 740 case CL_DEVICE_INTEGRATED_MEMORY_NV: 741 DEV_GET_INT_INF(cl_bool); 742 #endif 743 #ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 744 case CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV: 745 DEV_GET_INT_INF(cl_uint); 746 #endif 747 #ifdef CL_DEVICE_PCI_BUS_ID_NV 748 case CL_DEVICE_PCI_BUS_ID_NV: 749 DEV_GET_INT_INF(cl_uint); 750 #endif 751 #ifdef CL_DEVICE_PCI_SLOT_ID_NV 752 case CL_DEVICE_PCI_SLOT_ID_NV: 753 DEV_GET_INT_INF(cl_uint); 754 #endif 755 #ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 756 case CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD: DEV_GET_INT_INF(cl_bool); 757 #endif 758 #ifdef CL_DEVICE_GFXIP_MAJOR_AMD 759 case CL_DEVICE_GFXIP_MAJOR_AMD: DEV_GET_INT_INF(cl_uint); 760 #endif 761 #ifdef CL_DEVICE_GFXIP_MINOR_AMD 762 case CL_DEVICE_GFXIP_MINOR_AMD: DEV_GET_INT_INF(cl_uint); 763 #endif 764 #ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 765 case CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD: DEV_GET_INT_INF(cl_uint); 766 #endif 767 #if PYOPENCL_CL_VERSION >= 0x1020 768 case CL_DEVICE_LINKER_AVAILABLE: DEV_GET_INT_INF(cl_bool); 769 case CL_DEVICE_BUILT_IN_KERNELS: 770 PYOPENCL_GET_STR_INFO(Device, m_device, param_name); 771 case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: DEV_GET_INT_INF(size_t); 772 case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: DEV_GET_INT_INF(size_t); 773 case CL_DEVICE_PARENT_DEVICE: 774 PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_device_id, device); 775 case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: DEV_GET_INT_INF(cl_uint); 776 case CL_DEVICE_PARTITION_TYPE: 777 case CL_DEVICE_PARTITION_PROPERTIES: 778 { 779 std::vector<cl_device_partition_property> result; 780 PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); 781 PYOPENCL_RETURN_VECTOR(cl_device_partition_property, result); 782 } 783 case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: 784 { 785 #if defined(__GNUG__) && !defined(__clang__) 786 #pragma GCC diagnostic push 787 // what's being ignored here is an alignment attribute to native size, which 788 // shouldn't matter on the relevant ABIs that I'm aware of. 789 #pragma GCC diagnostic ignored "-Wignored-attributes" 790 #endif 791 std::vector<cl_device_affinity_domain> result; 792 #if defined(__GNUG__) && !defined(__clang__) 793 #pragma GCC diagnostic pop 794 #endif 795 PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); 796 PYOPENCL_RETURN_VECTOR(cl_device_affinity_domain, result); 797 } 798 case CL_DEVICE_REFERENCE_COUNT: DEV_GET_INT_INF(cl_uint); 799 case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: DEV_GET_INT_INF(cl_bool); 800 case CL_DEVICE_PRINTF_BUFFER_SIZE: DEV_GET_INT_INF(cl_bool); 801 #endif 802 // {{{ AMD dev attrs cl_amd_device_attribute_query 803 // 804 // types of AMD dev attrs divined from 805 // https://www.khronos.org/registry/cl/api/1.2/cl.hpp 806 #ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 807 case CL_DEVICE_PROFILING_TIMER_OFFSET_AMD: DEV_GET_INT_INF(cl_ulong); 808 #endif 809 /* FIXME 810 #ifdef CL_DEVICE_TOPOLOGY_AMD 811 case CL_DEVICE_TOPOLOGY_AMD: 812 #endif 813 */ 814 #ifdef CL_DEVICE_BOARD_NAME_AMD 815 case CL_DEVICE_BOARD_NAME_AMD: ; 816 PYOPENCL_GET_STR_INFO(Device, m_device, param_name); 817 #endif 818 #ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 819 case CL_DEVICE_GLOBAL_FREE_MEMORY_AMD: 820 { 821 std::vector<size_t> result; 822 PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); 823 PYOPENCL_RETURN_VECTOR(size_t, result); 824 } 825 #endif 826 #ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 827 case CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD: DEV_GET_INT_INF(cl_uint); 828 #endif 829 #ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 830 case CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD: DEV_GET_INT_INF(cl_uint); 831 #endif 832 #ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 833 case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD: DEV_GET_INT_INF(cl_uint); 834 #endif 835 #ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 836 case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); 837 #endif 838 #ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 839 case CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD: DEV_GET_INT_INF(cl_uint); 840 #endif 841 #ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD 842 case CL_DEVICE_LOCAL_MEM_BANKS_AMD: DEV_GET_INT_INF(cl_uint); 843 #endif 844 // }}} 845 846 #ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 847 case CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT: DEV_GET_INT_INF(cl_uint); 848 #endif 849 #if PYOPENCL_CL_VERSION >= 0x2000 850 case CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint); 851 case CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE: DEV_GET_INT_INF(size_t); 852 case CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties); 853 case CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE: DEV_GET_INT_INF(cl_uint); 854 case CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE: DEV_GET_INT_INF(cl_uint); 855 case CL_DEVICE_MAX_ON_DEVICE_QUEUES: DEV_GET_INT_INF(cl_uint); 856 case CL_DEVICE_MAX_ON_DEVICE_EVENTS: DEV_GET_INT_INF(cl_uint); 857 case CL_DEVICE_SVM_CAPABILITIES: DEV_GET_INT_INF(cl_device_svm_capabilities); 858 case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: DEV_GET_INT_INF(size_t); 859 case CL_DEVICE_MAX_PIPE_ARGS: DEV_GET_INT_INF(cl_uint); 860 case CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS: DEV_GET_INT_INF(cl_uint); 861 case CL_DEVICE_PIPE_MAX_PACKET_SIZE: DEV_GET_INT_INF(cl_uint); 862 case CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint); 863 case CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint); 864 case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint); 865 #endif 866 #if PYOPENCL_CL_VERSION >= 0x2010 867 case CL_DEVICE_IL_VERSION: 868 PYOPENCL_GET_STR_INFO(Device, m_device, param_name); 869 case CL_DEVICE_MAX_NUM_SUB_GROUPS: DEV_GET_INT_INF(cl_uint); 870 case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: DEV_GET_INT_INF(cl_bool); 871 #endif 872 #ifdef CL_DEVICE_ME_VERSION_INTEL 873 case CL_DEVICE_ME_VERSION_INTEL: DEV_GET_INT_INF(cl_uint); 874 #endif 875 #ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 876 case CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM: DEV_GET_INT_INF(cl_uint); 877 #endif 878 #ifdef CL_DEVICE_PAGE_SIZE_QCOM 879 case CL_DEVICE_PAGE_SIZE_QCOM: DEV_GET_INT_INF(cl_uint); 880 #endif 881 #ifdef CL_DEVICE_SPIR_VERSIONS 882 case CL_DEVICE_SPIR_VERSIONS: 883 PYOPENCL_GET_STR_INFO(Device, m_device, param_name); 884 #endif 885 #ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA 886 case CL_DEVICE_CORE_TEMPERATURE_ALTERA: DEV_GET_INT_INF(cl_int); 887 #endif 888 889 #ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 890 case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL: 891 { 892 std::vector<cl_uint> result; 893 PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); 894 PYOPENCL_RETURN_VECTOR(cl_uint, result); 895 } 896 #endif 897 #ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 898 case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL: DEV_GET_INT_INF(cl_uint); 899 #endif 900 901 default: 902 throw error("Device.get_info", CL_INVALID_VALUE); 903 } 904 } 905 906 #if PYOPENCL_CL_VERSION >= 0x1020 create_sub_devices(py::object py_properties)907 py::list create_sub_devices(py::object py_properties) 908 { 909 std::vector<cl_device_partition_property> properties; 910 911 COPY_PY_LIST(cl_device_partition_property, properties); 912 properties.push_back(0); 913 914 cl_device_partition_property *props_ptr 915 = properties.empty( ) ? nullptr : &properties.front(); 916 917 cl_uint num_entries; 918 PYOPENCL_CALL_GUARDED(clCreateSubDevices, 919 (m_device, props_ptr, 0, nullptr, &num_entries)); 920 921 std::vector<cl_device_id> result; 922 result.resize(num_entries); 923 924 PYOPENCL_CALL_GUARDED(clCreateSubDevices, 925 (m_device, props_ptr, num_entries, &result.front(), nullptr)); 926 927 py::list py_result; 928 for (cl_device_id did: result) 929 py_result.append(handle_from_new_ptr( 930 new pyopencl::device(did, /*retain*/true, 931 device::REF_CL_1_2))); 932 return py_result; 933 } 934 #endif 935 936 }; 937 938 939 940 get_devices(cl_device_type devtype)941 inline py::list platform::get_devices(cl_device_type devtype) 942 { 943 cl_uint num_devices = 0; 944 PYOPENCL_PRINT_CALL_TRACE("clGetDeviceIDs"); 945 { 946 cl_int status_code; 947 status_code = clGetDeviceIDs(m_platform, devtype, 0, 0, &num_devices); 948 if (status_code == CL_DEVICE_NOT_FOUND) 949 num_devices = 0; 950 else if (status_code != CL_SUCCESS) \ 951 throw pyopencl::error("clGetDeviceIDs", status_code); 952 } 953 954 if (num_devices == 0) 955 return py::list(); 956 957 std::vector<cl_device_id> devices(num_devices); 958 PYOPENCL_CALL_GUARDED(clGetDeviceIDs, 959 (m_platform, devtype, 960 num_devices, devices.empty( ) ? nullptr : &devices.front(), &num_devices)); 961 962 py::list result; 963 for (cl_device_id did: devices) 964 result.append(handle_from_new_ptr( 965 new device(did))); 966 967 return result; 968 } 969 970 // }}} 971 972 973 // {{{ context 974 975 class context : public noncopyable 976 { 977 private: 978 cl_context m_context; 979 980 public: context(cl_context ctx,bool retain)981 context(cl_context ctx, bool retain) 982 : m_context(ctx) 983 { 984 if (retain) 985 PYOPENCL_CALL_GUARDED(clRetainContext, (ctx)); 986 } 987 ~context()988 ~context() 989 { 990 PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseContext, 991 (m_context)); 992 } 993 data() const994 cl_context data() const 995 { 996 return m_context; 997 } 998 999 PYOPENCL_EQUALITY_TESTS(context); 1000 get_info(cl_context_info param_name) const1001 py::object get_info(cl_context_info param_name) const 1002 { 1003 switch (param_name) 1004 { 1005 case CL_CONTEXT_REFERENCE_COUNT: 1006 PYOPENCL_GET_INTEGRAL_INFO( 1007 Context, m_context, param_name, cl_uint); 1008 1009 case CL_CONTEXT_DEVICES: 1010 { 1011 std::vector<cl_device_id> result; 1012 PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result); 1013 1014 py::list py_result; 1015 for (cl_device_id did: result) 1016 py_result.append(handle_from_new_ptr( 1017 new pyopencl::device(did))); 1018 return py_result; 1019 } 1020 1021 case CL_CONTEXT_PROPERTIES: 1022 { 1023 std::vector<cl_context_properties> result; 1024 PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result); 1025 1026 py::list py_result; 1027 for (size_t i = 0; i < result.size(); i+=2) 1028 { 1029 cl_context_properties key = result[i]; 1030 py::object value; 1031 switch (key) 1032 { 1033 case CL_CONTEXT_PLATFORM: 1034 { 1035 value = py::object( 1036 handle_from_new_ptr(new platform( 1037 reinterpret_cast<cl_platform_id>(result[i+1])))); 1038 break; 1039 } 1040 1041 #if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1) 1042 #if defined(__APPLE__) && defined(HAVE_GL) 1043 case CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE: 1044 #else 1045 case CL_GL_CONTEXT_KHR: 1046 case CL_EGL_DISPLAY_KHR: 1047 case CL_GLX_DISPLAY_KHR: 1048 case CL_WGL_HDC_KHR: 1049 case CL_CGL_SHAREGROUP_KHR: 1050 #endif 1051 value = py::cast(result[i+1]); 1052 break; 1053 1054 #endif 1055 case 0: 1056 break; 1057 1058 default: 1059 throw error("Context.get_info", CL_INVALID_VALUE, 1060 "unknown context_property key encountered"); 1061 } 1062 1063 py_result.append(py::make_tuple(result[i], value)); 1064 } 1065 return py_result; 1066 } 1067 1068 #if PYOPENCL_CL_VERSION >= 0x1010 1069 case CL_CONTEXT_NUM_DEVICES: 1070 PYOPENCL_GET_INTEGRAL_INFO( 1071 Context, m_context, param_name, cl_uint); 1072 #endif 1073 1074 default: 1075 throw error("Context.get_info", CL_INVALID_VALUE); 1076 } 1077 } 1078 1079 1080 // not exposed to python get_hex_platform_version() const1081 int get_hex_platform_version() const 1082 { 1083 std::vector<cl_device_id> devices; 1084 PYOPENCL_GET_VEC_INFO(Context, m_context, CL_CONTEXT_DEVICES, devices); 1085 1086 if (devices.size() == 0) 1087 throw error("Context._get_hex_version", CL_INVALID_VALUE, 1088 "platform has no devices"); 1089 1090 cl_platform_id plat; 1091 1092 PYOPENCL_CALL_GUARDED(clGetDeviceInfo, 1093 (devices[0], CL_DEVICE_PLATFORM, sizeof(plat), &plat, nullptr)); 1094 1095 std::string plat_version; 1096 { 1097 size_t param_value_size; 1098 PYOPENCL_CALL_GUARDED(clGetPlatformInfo, 1099 (plat, CL_PLATFORM_VERSION, 0, 0, ¶m_value_size)); 1100 1101 std::vector<char> param_value(param_value_size); 1102 PYOPENCL_CALL_GUARDED(clGetPlatformInfo, 1103 (plat, CL_PLATFORM_VERSION, param_value_size, 1104 param_value.empty( ) ? nullptr : ¶m_value.front(), ¶m_value_size)); 1105 1106 plat_version = 1107 param_value.empty( ) ? "" : std::string(¶m_value.front(), param_value_size-1); 1108 } 1109 1110 int major_ver, minor_ver; 1111 errno = 0; 1112 int match_count = sscanf(plat_version.c_str(), "OpenCL %d.%d ", &major_ver, &minor_ver); 1113 if (errno || match_count != 2) 1114 throw error("Context._get_hex_version", CL_INVALID_VALUE, 1115 "Platform version string did not have expected format"); 1116 1117 return major_ver << 12 | minor_ver << 4; 1118 } 1119 }; 1120 1121 1122 inline parse_context_properties(py::object py_properties)1123 std::vector<cl_context_properties> parse_context_properties( 1124 py::object py_properties) 1125 { 1126 std::vector<cl_context_properties> props; 1127 1128 if (py_properties.ptr() != Py_None) 1129 { 1130 for (py::handle prop_tuple_py: py_properties) 1131 { 1132 py::tuple prop_tuple(prop_tuple_py.cast<py::tuple>()); 1133 1134 if (len(prop_tuple) != 2) 1135 throw error("Context", CL_INVALID_VALUE, "property tuple must have length 2"); 1136 cl_context_properties prop = prop_tuple[0].cast<cl_context_properties>(); 1137 props.push_back(prop); 1138 1139 if (prop == CL_CONTEXT_PLATFORM) 1140 { 1141 props.push_back( 1142 reinterpret_cast<cl_context_properties>( 1143 prop_tuple[1].cast<const platform &>().data())); 1144 } 1145 #if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1) 1146 #if defined(_WIN32) 1147 else if (prop == CL_WGL_HDC_KHR) 1148 { 1149 // size_t is a stand-in for HANDLE, hopefully has the same size. 1150 size_t hnd = (prop_tuple[1]).cast<size_t>(); 1151 props.push_back(hnd); 1152 } 1153 #endif 1154 else if ( 1155 #if defined(__APPLE__) && defined(HAVE_GL) 1156 prop == CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE 1157 #else 1158 prop == CL_GL_CONTEXT_KHR 1159 || prop == CL_EGL_DISPLAY_KHR 1160 || prop == CL_GLX_DISPLAY_KHR 1161 || prop == CL_CGL_SHAREGROUP_KHR 1162 #endif 1163 ) 1164 { 1165 py::object ctypes = py::module::import("ctypes"); 1166 py::object prop = prop_tuple[1], c_void_p = ctypes.attr("c_void_p"); 1167 py::object ptr = ctypes.attr("cast")(prop, c_void_p); 1168 props.push_back(ptr.attr("value").cast<cl_context_properties>()); 1169 } 1170 #endif 1171 else 1172 throw error("Context", CL_INVALID_VALUE, "invalid context property"); 1173 } 1174 props.push_back(0); 1175 } 1176 1177 return props; 1178 } 1179 1180 1181 inline create_context_inner(py::object py_devices,py::object py_properties,py::object py_dev_type)1182 context *create_context_inner(py::object py_devices, py::object py_properties, 1183 py::object py_dev_type) 1184 { 1185 std::vector<cl_context_properties> props 1186 = parse_context_properties(py_properties); 1187 1188 cl_context_properties *props_ptr 1189 = props.empty( ) ? nullptr : &props.front(); 1190 1191 cl_int status_code; 1192 1193 cl_context ctx; 1194 1195 // from device list 1196 if (py_devices.ptr() != Py_None) 1197 { 1198 if (py_dev_type.ptr() != Py_None) 1199 throw error("Context", CL_INVALID_VALUE, 1200 "one of 'devices' or 'dev_type' must be None"); 1201 1202 std::vector<cl_device_id> devices; 1203 for (py::handle py_dev: py_devices) 1204 devices.push_back(py_dev.cast<const device &>().data()); 1205 1206 PYOPENCL_PRINT_CALL_TRACE("clCreateContext"); 1207 ctx = clCreateContext( 1208 props_ptr, 1209 devices.size(), 1210 devices.empty( ) ? nullptr : &devices.front(), 1211 0, 0, &status_code); 1212 } 1213 // from dev_type 1214 else 1215 { 1216 cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT; 1217 if (py_dev_type.ptr() != Py_None) 1218 dev_type = py_dev_type.cast<cl_device_type>(); 1219 1220 PYOPENCL_PRINT_CALL_TRACE("clCreateContextFromType"); 1221 ctx = clCreateContextFromType(props_ptr, dev_type, 0, 0, &status_code); 1222 } 1223 1224 if (status_code != CL_SUCCESS) 1225 throw pyopencl::error("Context", status_code); 1226 1227 try 1228 { 1229 return new context(ctx, false); 1230 } 1231 catch (...) 1232 { 1233 PYOPENCL_CALL_GUARDED(clReleaseContext, (ctx)); 1234 throw; 1235 } 1236 } 1237 1238 1239 inline create_context(py::object py_devices,py::object py_properties,py::object py_dev_type)1240 context *create_context(py::object py_devices, py::object py_properties, 1241 py::object py_dev_type) 1242 { 1243 PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( 1244 return create_context_inner(py_devices, py_properties, py_dev_type); 1245 ) 1246 } 1247 1248 // }}} 1249 1250 1251 // {{{ command_queue 1252 1253 class command_queue 1254 { 1255 private: 1256 cl_command_queue m_queue; 1257 1258 public: command_queue(cl_command_queue q,bool retain)1259 command_queue(cl_command_queue q, bool retain) 1260 : m_queue(q) 1261 { 1262 if (retain) 1263 PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (q)); 1264 } 1265 command_queue(command_queue const & src)1266 command_queue(command_queue const &src) 1267 : m_queue(src.m_queue) 1268 { 1269 PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue)); 1270 } 1271 command_queue(const context & ctx,const device * py_dev=nullptr,py::object py_props=py::none ())1272 command_queue( 1273 const context &ctx, 1274 const device *py_dev=nullptr, 1275 py::object py_props=py::none()) 1276 { 1277 cl_device_id dev; 1278 if (py_dev) 1279 dev = py_dev->data(); 1280 else 1281 { 1282 std::vector<cl_device_id> devs; 1283 PYOPENCL_GET_VEC_INFO(Context, ctx.data(), CL_CONTEXT_DEVICES, devs); 1284 if (devs.size() == 0) 1285 throw pyopencl::error("CommandQueue", CL_INVALID_VALUE, 1286 "context doesn't have any devices? -- don't know which one to default to"); 1287 dev = devs[0]; 1288 } 1289 1290 int hex_plat_version = ctx.get_hex_platform_version(); 1291 1292 bool props_given_as_numeric; 1293 cl_command_queue_properties num_props; 1294 if (py_props.is_none()) 1295 { 1296 num_props = 0; 1297 props_given_as_numeric = true; 1298 } 1299 else 1300 { 1301 try 1302 { 1303 num_props = py::cast<cl_command_queue_properties>(py_props); 1304 props_given_as_numeric = true; 1305 } 1306 catch (py::cast_error &) 1307 { 1308 props_given_as_numeric = false; 1309 } 1310 } 1311 1312 if (props_given_as_numeric) 1313 { 1314 #if PYOPENCL_CL_VERSION >= 0x2000 1315 if (hex_plat_version >= 0x2000) 1316 { 1317 cl_queue_properties props_list[] = { CL_QUEUE_PROPERTIES, num_props, 0 }; 1318 1319 cl_int status_code; 1320 1321 PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueueWithProperties"); 1322 m_queue = clCreateCommandQueueWithProperties( 1323 ctx.data(), dev, props_list, &status_code); 1324 1325 if (status_code != CL_SUCCESS) 1326 throw pyopencl::error("CommandQueue", status_code); 1327 } 1328 else 1329 #endif 1330 { 1331 cl_int status_code; 1332 1333 PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueue"); 1334 #if defined(__GNUG__) && !defined(__clang__) 1335 #pragma GCC diagnostic push 1336 #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 1337 #endif 1338 m_queue = clCreateCommandQueue( 1339 ctx.data(), dev, num_props, &status_code); 1340 #if defined(__GNUG__) && !defined(__clang__) 1341 #pragma GCC diagnostic pop 1342 #endif 1343 if (status_code != CL_SUCCESS) 1344 throw pyopencl::error("CommandQueue", status_code); 1345 } 1346 } 1347 else 1348 { 1349 #if PYOPENCL_CL_VERSION >= 0x2000 1350 throw error("CommandQueue", CL_INVALID_VALUE, 1351 "queue properties given as an iterable, " 1352 "which is only allowed when PyOpenCL was built " 1353 "against an OpenCL 2+ header"); 1354 1355 if (hex_plat_version < 0x2000) 1356 { 1357 std::cerr << 1358 "queue properties given as an iterable, " 1359 "which uses an OpenCL 2+-only interface, " 1360 "but the context's platform does not " 1361 "declare OpenCL 2 support. Proceeding " 1362 "as requested, but the next thing you see " 1363 "may be a crash." << std:: endl; 1364 } 1365 1366 cl_queue_properties props[py::len(py_props) + 1]; 1367 { 1368 size_t i = 0; 1369 for (auto prop: py_props) 1370 props[i++] = py::cast<cl_queue_properties>(prop); 1371 props[i++] = 0; 1372 } 1373 1374 cl_int status_code; 1375 PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueueWithProperties"); 1376 m_queue = clCreateCommandQueueWithProperties( 1377 ctx.data(), dev, props, &status_code); 1378 1379 if (status_code != CL_SUCCESS) 1380 throw pyopencl::error("CommandQueue", status_code); 1381 #endif 1382 } 1383 } 1384 ~command_queue()1385 ~command_queue() 1386 { 1387 PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseCommandQueue, 1388 (m_queue)); 1389 } 1390 data() const1391 const cl_command_queue data() const 1392 { return m_queue; } 1393 1394 PYOPENCL_EQUALITY_TESTS(command_queue); 1395 get_info(cl_command_queue_info param_name) const1396 py::object get_info(cl_command_queue_info param_name) const 1397 { 1398 switch (param_name) 1399 { 1400 case CL_QUEUE_CONTEXT: 1401 PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name, 1402 cl_context, context); 1403 case CL_QUEUE_DEVICE: 1404 PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name, 1405 cl_device_id, device); 1406 case CL_QUEUE_REFERENCE_COUNT: 1407 PYOPENCL_GET_INTEGRAL_INFO(CommandQueue, m_queue, param_name, 1408 cl_uint); 1409 case CL_QUEUE_PROPERTIES: 1410 PYOPENCL_GET_INTEGRAL_INFO(CommandQueue, m_queue, param_name, 1411 cl_command_queue_properties); 1412 1413 default: 1414 throw error("CommandQueue.get_info", CL_INVALID_VALUE); 1415 } 1416 } 1417 get_context() const1418 std::unique_ptr<context> get_context() const 1419 { 1420 cl_context param_value; 1421 PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, 1422 (m_queue, CL_QUEUE_CONTEXT, sizeof(param_value), ¶m_value, 0)); 1423 return std::unique_ptr<context>( 1424 new context(param_value, /*retain*/ true)); 1425 } 1426 1427 #if PYOPENCL_CL_VERSION < 0x1010 set_property(cl_command_queue_properties prop,bool enable)1428 cl_command_queue_properties set_property( 1429 cl_command_queue_properties prop, 1430 bool enable) 1431 { 1432 cl_command_queue_properties old_prop; 1433 PYOPENCL_CALL_GUARDED(clSetCommandQueueProperty, 1434 (m_queue, prop, PYOPENCL_CAST_BOOL(enable), &old_prop)); 1435 return old_prop; 1436 } 1437 #endif 1438 flush()1439 void flush() 1440 { PYOPENCL_CALL_GUARDED(clFlush, (m_queue)); } finish()1441 void finish() 1442 { PYOPENCL_CALL_GUARDED_THREADED(clFinish, (m_queue)); } 1443 }; 1444 1445 // }}} 1446 1447 1448 // {{{ event/synchronization 1449 1450 class event : noncopyable 1451 { 1452 private: 1453 cl_event m_event; 1454 1455 public: event(cl_event event,bool retain)1456 event(cl_event event, bool retain) 1457 : m_event(event) 1458 { 1459 if (retain) 1460 PYOPENCL_CALL_GUARDED(clRetainEvent, (event)); 1461 } 1462 event(event const & src)1463 event(event const &src) 1464 : m_event(src.m_event) 1465 { PYOPENCL_CALL_GUARDED(clRetainEvent, (m_event)); } 1466 ~event()1467 virtual ~event() 1468 { 1469 PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseEvent, 1470 (m_event)); 1471 } 1472 data() const1473 const cl_event data() const 1474 { return m_event; } 1475 1476 PYOPENCL_EQUALITY_TESTS(event); 1477 get_info(cl_event_info param_name) const1478 py::object get_info(cl_event_info param_name) const 1479 { 1480 switch (param_name) 1481 { 1482 case CL_EVENT_COMMAND_QUEUE: 1483 PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name, 1484 cl_command_queue, command_queue); 1485 case CL_EVENT_COMMAND_TYPE: 1486 PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, 1487 cl_command_type); 1488 case CL_EVENT_COMMAND_EXECUTION_STATUS: 1489 PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, 1490 cl_int); 1491 case CL_EVENT_REFERENCE_COUNT: 1492 PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, 1493 cl_uint); 1494 #if PYOPENCL_CL_VERSION >= 0x1010 1495 case CL_EVENT_CONTEXT: 1496 PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name, 1497 cl_context, context); 1498 #endif 1499 1500 default: 1501 throw error("Event.get_info", CL_INVALID_VALUE); 1502 } 1503 } 1504 get_profiling_info(cl_profiling_info param_name) const1505 py::object get_profiling_info(cl_profiling_info param_name) const 1506 { 1507 switch (param_name) 1508 { 1509 case CL_PROFILING_COMMAND_QUEUED: 1510 case CL_PROFILING_COMMAND_SUBMIT: 1511 case CL_PROFILING_COMMAND_START: 1512 case CL_PROFILING_COMMAND_END: 1513 #if PYOPENCL_CL_VERSION >= 0x2000 1514 case CL_PROFILING_COMMAND_COMPLETE: 1515 #endif 1516 PYOPENCL_GET_INTEGRAL_INFO(EventProfiling, m_event, param_name, 1517 cl_ulong); 1518 default: 1519 throw error("Event.get_profiling_info", CL_INVALID_VALUE); 1520 } 1521 } 1522 wait()1523 virtual void wait() 1524 { 1525 PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, (1, &m_event)); 1526 } 1527 1528 #if PYOPENCL_CL_VERSION >= 0x1010 1529 // {{{ set_callback, by way of a a thread-based construction 1530 1531 private: 1532 struct event_callback_info_t 1533 { 1534 std::mutex m_mutex; 1535 std::condition_variable m_condvar; 1536 1537 py::object m_py_event; 1538 py::object m_py_callback; 1539 1540 bool m_set_callback_suceeded; 1541 1542 bool m_notify_thread_wakeup_is_genuine; 1543 1544 cl_event m_event; 1545 cl_int m_command_exec_status; 1546 event_callback_info_tpyopencl::event::event_callback_info_t1547 event_callback_info_t(py::object py_event, py::object py_callback) 1548 : m_py_event(py_event), m_py_callback(py_callback), m_set_callback_suceeded(true), 1549 m_notify_thread_wakeup_is_genuine(false) 1550 {} 1551 }; 1552 evt_callback(cl_event evt,cl_int command_exec_status,void * user_data)1553 static void evt_callback(cl_event evt, cl_int command_exec_status, void *user_data) 1554 { 1555 event_callback_info_t *cb_info = reinterpret_cast<event_callback_info_t *>(user_data); 1556 { 1557 std::lock_guard<std::mutex> lg(cb_info->m_mutex); 1558 cb_info->m_event = evt; 1559 cb_info->m_command_exec_status = command_exec_status; 1560 cb_info->m_notify_thread_wakeup_is_genuine = true; 1561 } 1562 1563 cb_info->m_condvar.notify_one(); 1564 } 1565 1566 public: set_callback(cl_int command_exec_callback_type,py::object pfn_event_notify)1567 void set_callback(cl_int command_exec_callback_type, py::object pfn_event_notify) 1568 { 1569 // The reason for doing this via a thread is that we're able to wait on 1570 // acquiring the GIL. (which we can't in the callback) 1571 1572 std::unique_ptr<event_callback_info_t> cb_info_holder( 1573 new event_callback_info_t( 1574 handle_from_new_ptr(new event(*this)), 1575 pfn_event_notify)); 1576 event_callback_info_t *cb_info = cb_info_holder.get(); 1577 1578 std::thread notif_thread([cb_info]() 1579 { 1580 { 1581 std::unique_lock<std::mutex> ulk(cb_info->m_mutex); 1582 cb_info->m_condvar.wait( 1583 ulk, 1584 [&](){ return cb_info->m_notify_thread_wakeup_is_genuine; }); 1585 1586 // ulk no longer held here, cb_info ready for deletion 1587 } 1588 1589 { 1590 py::gil_scoped_acquire acquire; 1591 1592 if (cb_info->m_set_callback_suceeded) 1593 { 1594 try { 1595 cb_info->m_py_callback( 1596 // cb_info->m_py_event, 1597 cb_info->m_command_exec_status); 1598 } 1599 catch (std::exception &exc) 1600 { 1601 std::cerr 1602 << "[pyopencl] event callback handler threw an exception, ignoring: " 1603 << exc.what() 1604 << std::endl; 1605 } 1606 } 1607 1608 // Need to hold GIL to delete py::object instances in 1609 // event_callback_info_t 1610 delete cb_info; 1611 } 1612 }); 1613 // Thread is away--it is now its responsibility to free cb_info. 1614 cb_info_holder.release(); 1615 1616 // notif_thread should no longer be coupled to the lifetime of the thread. 1617 notif_thread.detach(); 1618 1619 try 1620 { 1621 PYOPENCL_CALL_GUARDED(clSetEventCallback, ( 1622 data(), command_exec_callback_type, &event::evt_callback, cb_info)); 1623 } 1624 catch (...) { 1625 // Setting the callback did not succeed. The thread would never 1626 // be woken up. Wake it up to let it know that it can stop. 1627 { 1628 std::lock_guard<std::mutex> lg(cb_info->m_mutex); 1629 cb_info->m_set_callback_suceeded = false; 1630 cb_info->m_notify_thread_wakeup_is_genuine = true; 1631 } 1632 cb_info->m_condvar.notify_one(); 1633 throw; 1634 } 1635 } 1636 // }}} 1637 #endif 1638 }; 1639 1640 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 1641 class nanny_event : public event 1642 { 1643 // In addition to everything an event does, the nanny event holds a reference 1644 // to a Python object and waits for its own completion upon destruction. 1645 1646 protected: 1647 std::unique_ptr<py_buffer_wrapper> m_ward; 1648 1649 public: 1650 nanny_event(cl_event evt,bool retain,std::unique_ptr<py_buffer_wrapper> & ward)1651 nanny_event(cl_event evt, bool retain, std::unique_ptr<py_buffer_wrapper> &ward) 1652 : event(evt, retain), m_ward(std::move(ward)) 1653 { } 1654 ~nanny_event()1655 ~nanny_event() 1656 { wait(); } 1657 get_ward() const1658 py::object get_ward() const 1659 { 1660 if (m_ward.get()) 1661 { 1662 return py::reinterpret_borrow<py::object>(m_ward->m_buf.obj); 1663 } 1664 else 1665 return py::none(); 1666 } 1667 wait()1668 virtual void wait() 1669 { 1670 event::wait(); 1671 m_ward.reset(); 1672 } 1673 }; 1674 #else 1675 class nanny_event : public event 1676 { 1677 // In addition to everything an event does, the nanny event holds a reference 1678 // to a Python object and waits for its own completion upon destruction. 1679 1680 protected: 1681 py::object m_ward; 1682 1683 public: 1684 nanny_event(cl_event evt,bool retain,py::object ward)1685 nanny_event(cl_event evt, bool retain, py::object ward) 1686 : event(evt, retain), m_ward(ward) 1687 { } 1688 nanny_event(nanny_event const & src)1689 nanny_event(nanny_event const &src) 1690 : event(src), m_ward(src.m_ward) 1691 { } 1692 ~nanny_event()1693 ~nanny_event() 1694 { wait(); } 1695 get_ward() const1696 py::object get_ward() const 1697 { return m_ward; } 1698 wait()1699 virtual void wait() 1700 { 1701 event::wait(); 1702 m_ward = py::none(); 1703 } 1704 }; 1705 #endif 1706 1707 1708 1709 1710 inline wait_for_events(py::object events)1711 void wait_for_events(py::object events) 1712 { 1713 cl_uint num_events_in_wait_list = 0; 1714 std::vector<cl_event> event_wait_list(len(events)); 1715 1716 for (py::handle evt: events) 1717 event_wait_list[num_events_in_wait_list++] = 1718 evt.cast<event &>().data(); 1719 1720 PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, ( 1721 PYOPENCL_WAITLIST_ARGS)); 1722 } 1723 1724 1725 1726 1727 #if PYOPENCL_CL_VERSION >= 0x1020 1728 inline enqueue_marker_with_wait_list(command_queue & cq,py::object py_wait_for)1729 event *enqueue_marker_with_wait_list(command_queue &cq, 1730 py::object py_wait_for) 1731 { 1732 PYOPENCL_PARSE_WAIT_FOR; 1733 cl_event evt; 1734 1735 PYOPENCL_CALL_GUARDED(clEnqueueMarkerWithWaitList, ( 1736 cq.data(), PYOPENCL_WAITLIST_ARGS, &evt)); 1737 1738 PYOPENCL_RETURN_NEW_EVENT(evt); 1739 } 1740 1741 inline enqueue_barrier_with_wait_list(command_queue & cq,py::object py_wait_for)1742 event *enqueue_barrier_with_wait_list(command_queue &cq, 1743 py::object py_wait_for) 1744 { 1745 PYOPENCL_PARSE_WAIT_FOR; 1746 cl_event evt; 1747 1748 PYOPENCL_CALL_GUARDED(clEnqueueBarrierWithWaitList, 1749 (cq.data(), PYOPENCL_WAITLIST_ARGS, &evt)); 1750 1751 PYOPENCL_RETURN_NEW_EVENT(evt); 1752 } 1753 #endif 1754 1755 1756 // {{{ used internally for pre-OpenCL-1.2 contexts 1757 1758 inline enqueue_marker(command_queue & cq)1759 event *enqueue_marker(command_queue &cq) 1760 { 1761 cl_event evt; 1762 1763 PYOPENCL_CALL_GUARDED(clEnqueueMarker, ( 1764 cq.data(), &evt)); 1765 1766 PYOPENCL_RETURN_NEW_EVENT(evt); 1767 } 1768 1769 inline enqueue_wait_for_events(command_queue & cq,py::object py_events)1770 void enqueue_wait_for_events(command_queue &cq, py::object py_events) 1771 { 1772 cl_uint num_events = 0; 1773 std::vector<cl_event> event_list(len(py_events)); 1774 1775 for (py::handle py_evt: py_events) 1776 event_list[num_events++] = py_evt.cast<event &>().data(); 1777 1778 PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, ( 1779 cq.data(), num_events, event_list.empty( ) ? nullptr : &event_list.front())); 1780 } 1781 1782 inline enqueue_barrier(command_queue & cq)1783 void enqueue_barrier(command_queue &cq) 1784 { 1785 PYOPENCL_CALL_GUARDED(clEnqueueBarrier, (cq.data())); 1786 } 1787 1788 // }}} 1789 1790 1791 #if PYOPENCL_CL_VERSION >= 0x1010 1792 class user_event : public event 1793 { 1794 public: user_event(cl_event evt,bool retain)1795 user_event(cl_event evt, bool retain) 1796 : event(evt, retain) 1797 { } 1798 set_status(cl_int execution_status)1799 void set_status(cl_int execution_status) 1800 { 1801 PYOPENCL_CALL_GUARDED(clSetUserEventStatus, (data(), execution_status)); 1802 } 1803 }; 1804 1805 1806 1807 1808 inline create_user_event(context & ctx)1809 user_event *create_user_event(context &ctx) 1810 { 1811 cl_int status_code; 1812 PYOPENCL_PRINT_CALL_TRACE("clCreateUserEvent"); 1813 cl_event evt = clCreateUserEvent(ctx.data(), &status_code); 1814 1815 if (status_code != CL_SUCCESS) 1816 throw pyopencl::error("UserEvent", status_code); 1817 1818 try 1819 { 1820 return new user_event(evt, false); 1821 } 1822 catch (...) 1823 { 1824 clReleaseEvent(evt); 1825 throw; 1826 } 1827 } 1828 1829 #endif 1830 1831 // }}} 1832 1833 1834 // {{{ memory_object 1835 1836 py::object create_mem_object_wrapper(cl_mem mem, bool retain); 1837 1838 class memory_object_holder 1839 { 1840 public: 1841 virtual const cl_mem data() const = 0; 1842 1843 PYOPENCL_EQUALITY_TESTS(memory_object_holder); 1844 size() const1845 size_t size() const 1846 { 1847 size_t param_value; 1848 PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, 1849 (data(), CL_MEM_SIZE, sizeof(param_value), ¶m_value, 0)); 1850 return param_value; 1851 } 1852 1853 py::object get_info(cl_mem_info param_name) const; 1854 }; 1855 1856 1857 1858 1859 class memory_object : noncopyable, public memory_object_holder 1860 { 1861 public: 1862 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 1863 typedef std::unique_ptr<py_buffer_wrapper> hostbuf_t; 1864 #else 1865 typedef py::object hostbuf_t; 1866 #endif 1867 1868 private: 1869 bool m_valid; 1870 cl_mem m_mem; 1871 hostbuf_t m_hostbuf; 1872 1873 public: memory_object(cl_mem mem,bool retain,hostbuf_t hostbuf=hostbuf_t ())1874 memory_object(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) 1875 : m_valid(true), m_mem(mem) 1876 { 1877 if (retain) 1878 PYOPENCL_CALL_GUARDED(clRetainMemObject, (mem)); 1879 1880 m_hostbuf = PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf); 1881 } 1882 memory_object(memory_object & src)1883 memory_object(memory_object &src) 1884 : m_valid(true), m_mem(src.m_mem), 1885 m_hostbuf(PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(src.m_hostbuf)) 1886 { 1887 PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem)); 1888 } 1889 memory_object(memory_object_holder const & src)1890 memory_object(memory_object_holder const &src) 1891 : m_valid(true), m_mem(src.data()) 1892 { 1893 PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem)); 1894 } 1895 release()1896 void release() 1897 { 1898 if (!m_valid) 1899 throw error("MemoryObject.free", CL_INVALID_VALUE, 1900 "trying to double-unref mem object"); 1901 PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseMemObject, (m_mem)); 1902 m_valid = false; 1903 } 1904 ~memory_object()1905 virtual ~memory_object() 1906 { 1907 if (m_valid) 1908 release(); 1909 } 1910 hostbuf()1911 py::object hostbuf() 1912 { 1913 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 1914 if (m_hostbuf.get()) 1915 return py::reinterpret_borrow<py::object>(m_hostbuf->m_buf.obj); 1916 else 1917 return py::none(); 1918 #else 1919 return m_hostbuf; 1920 #endif 1921 } 1922 data() const1923 const cl_mem data() const 1924 { return m_mem; } 1925 1926 }; 1927 1928 #if PYOPENCL_CL_VERSION >= 0x1020 1929 inline enqueue_migrate_mem_objects(command_queue & cq,py::object py_mem_objects,cl_mem_migration_flags flags,py::object py_wait_for)1930 event *enqueue_migrate_mem_objects( 1931 command_queue &cq, 1932 py::object py_mem_objects, 1933 cl_mem_migration_flags flags, 1934 py::object py_wait_for) 1935 { 1936 PYOPENCL_PARSE_WAIT_FOR; 1937 1938 std::vector<cl_mem> mem_objects; 1939 for (py::handle mo: py_mem_objects) 1940 mem_objects.push_back(mo.cast<const memory_object &>().data()); 1941 1942 cl_event evt; 1943 PYOPENCL_RETRY_IF_MEM_ERROR( 1944 PYOPENCL_CALL_GUARDED(clEnqueueMigrateMemObjects, ( 1945 cq.data(), 1946 mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(), 1947 flags, 1948 PYOPENCL_WAITLIST_ARGS, &evt 1949 )); 1950 ); 1951 PYOPENCL_RETURN_NEW_EVENT(evt); 1952 } 1953 #endif 1954 1955 // }}} 1956 1957 1958 // {{{ buffer 1959 create_buffer(cl_context ctx,cl_mem_flags flags,size_t size,void * host_ptr)1960 inline cl_mem create_buffer( 1961 cl_context ctx, 1962 cl_mem_flags flags, 1963 size_t size, 1964 void *host_ptr) 1965 { 1966 cl_int status_code; 1967 PYOPENCL_PRINT_CALL_TRACE("clCreateBuffer"); 1968 cl_mem mem = clCreateBuffer(ctx, flags, size, host_ptr, &status_code); 1969 1970 if (status_code != CL_SUCCESS) 1971 throw pyopencl::error("create_buffer", status_code); 1972 1973 return mem; 1974 } 1975 1976 1977 1978 create_buffer_gc(cl_context ctx,cl_mem_flags flags,size_t size,void * host_ptr)1979 inline cl_mem create_buffer_gc( 1980 cl_context ctx, 1981 cl_mem_flags flags, 1982 size_t size, 1983 void *host_ptr) 1984 { 1985 PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( 1986 return create_buffer(ctx, flags, size, host_ptr); 1987 ); 1988 } 1989 1990 1991 1992 #if PYOPENCL_CL_VERSION >= 0x1010 create_sub_buffer(cl_mem buffer,cl_mem_flags flags,cl_buffer_create_type bct,const void * buffer_create_info)1993 inline cl_mem create_sub_buffer( 1994 cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct, 1995 const void *buffer_create_info) 1996 { 1997 cl_int status_code; 1998 PYOPENCL_PRINT_CALL_TRACE("clCreateSubBuffer"); 1999 cl_mem mem = clCreateSubBuffer(buffer, flags, 2000 bct, buffer_create_info, &status_code); 2001 2002 if (status_code != CL_SUCCESS) 2003 throw pyopencl::error("clCreateSubBuffer", status_code); 2004 2005 return mem; 2006 } 2007 2008 2009 2010 create_sub_buffer_gc(cl_mem buffer,cl_mem_flags flags,cl_buffer_create_type bct,const void * buffer_create_info)2011 inline cl_mem create_sub_buffer_gc( 2012 cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct, 2013 const void *buffer_create_info) 2014 { 2015 PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( 2016 return create_sub_buffer(buffer, flags, bct, buffer_create_info); 2017 ); 2018 } 2019 #endif 2020 2021 2022 2023 class buffer : public memory_object 2024 { 2025 public: buffer(cl_mem mem,bool retain,hostbuf_t hostbuf=hostbuf_t ())2026 buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) 2027 : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) 2028 { } 2029 2030 #if PYOPENCL_CL_VERSION >= 0x1010 get_sub_region(size_t origin,size_t size,cl_mem_flags flags) const2031 buffer *get_sub_region( 2032 size_t origin, size_t size, cl_mem_flags flags) const 2033 { 2034 cl_buffer_region region = { origin, size}; 2035 2036 cl_mem mem = create_sub_buffer_gc( 2037 data(), flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion); 2038 2039 try 2040 { 2041 return new buffer(mem, false); 2042 } 2043 catch (...) 2044 { 2045 PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); 2046 throw; 2047 } 2048 } 2049 getitem(py::slice slc) const2050 buffer *getitem(py::slice slc) const 2051 { 2052 PYOPENCL_BUFFER_SIZE_T start, end, stride, length; 2053 2054 size_t my_length; 2055 PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, 2056 (data(), CL_MEM_SIZE, sizeof(my_length), &my_length, 0)); 2057 2058 #if PY_VERSION_HEX >= 0x03020000 2059 if (PySlice_GetIndicesEx(slc.ptr(), 2060 #else 2061 if (PySlice_GetIndicesEx(reinterpret_cast<PySliceObject *>(slc.ptr()), 2062 #endif 2063 my_length, &start, &end, &stride, &length) != 0) 2064 throw py::error_already_set(); 2065 2066 if (stride != 1) 2067 throw pyopencl::error("Buffer.__getitem__", CL_INVALID_VALUE, 2068 "Buffer slice must have stride 1"); 2069 2070 cl_mem_flags my_flags; 2071 PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, 2072 (data(), CL_MEM_FLAGS, sizeof(my_flags), &my_flags, 0)); 2073 2074 my_flags &= ~CL_MEM_COPY_HOST_PTR; 2075 2076 if (end <= start) 2077 throw pyopencl::error("Buffer.__getitem__", CL_INVALID_VALUE, 2078 "Buffer slice have end > start"); 2079 2080 return get_sub_region(start, end-start, my_flags); 2081 } 2082 #endif 2083 }; 2084 2085 // {{{ buffer creation 2086 2087 inline create_buffer_py(context & ctx,cl_mem_flags flags,size_t size,py::object py_hostbuf)2088 buffer *create_buffer_py( 2089 context &ctx, 2090 cl_mem_flags flags, 2091 size_t size, 2092 py::object py_hostbuf 2093 ) 2094 { 2095 if (py_hostbuf.ptr() != Py_None && 2096 !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) 2097 PyErr_Warn(PyExc_UserWarning, "'hostbuf' was passed, " 2098 "but no memory flags to make use of it."); 2099 2100 void *buf = 0; 2101 2102 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2103 std::unique_ptr<py_buffer_wrapper> retained_buf_obj; 2104 if (py_hostbuf.ptr() != Py_None) 2105 { 2106 retained_buf_obj = std::unique_ptr<py_buffer_wrapper>(new py_buffer_wrapper); 2107 2108 int py_buf_flags = PyBUF_ANY_CONTIGUOUS; 2109 if ((flags & CL_MEM_USE_HOST_PTR) 2110 && ((flags & CL_MEM_READ_WRITE) 2111 || (flags & CL_MEM_WRITE_ONLY))) 2112 py_buf_flags |= PyBUF_WRITABLE; 2113 2114 retained_buf_obj->get(py_hostbuf.ptr(), py_buf_flags); 2115 2116 buf = retained_buf_obj->m_buf.buf; 2117 2118 if (size > size_t(retained_buf_obj->m_buf.len)) 2119 throw pyopencl::error("Buffer", CL_INVALID_VALUE, 2120 "specified size is greater than host buffer size"); 2121 if (size == 0) 2122 size = retained_buf_obj->m_buf.len; 2123 } 2124 #else 2125 py::object retained_buf_obj; 2126 if (py_hostbuf.ptr() != Py_None) 2127 { 2128 PYOPENCL_BUFFER_SIZE_T len; 2129 if ((flags & CL_MEM_USE_HOST_PTR) 2130 && ((flags & CL_MEM_READ_WRITE) 2131 || (flags & CL_MEM_WRITE_ONLY))) 2132 { 2133 if (PyObject_AsWriteBuffer(py_hostbuf.ptr(), &buf, &len)) 2134 throw py::error_already_set(); 2135 } 2136 else 2137 { 2138 if (PyObject_AsReadBuffer( 2139 py_hostbuf.ptr(), const_cast<const void **>(&buf), &len)) 2140 throw py::error_already_set(); 2141 } 2142 2143 if (flags & CL_MEM_USE_HOST_PTR) 2144 retained_buf_obj = py_hostbuf; 2145 2146 if (size > size_t(len)) 2147 throw pyopencl::error("Buffer", CL_INVALID_VALUE, 2148 "specified size is greater than host buffer size"); 2149 if (size == 0) 2150 size = len; 2151 } 2152 #endif 2153 2154 cl_mem mem = create_buffer_gc(ctx.data(), flags, size, buf); 2155 2156 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2157 if (!(flags & CL_MEM_USE_HOST_PTR)) 2158 retained_buf_obj.reset(); 2159 #endif 2160 2161 try 2162 { 2163 return new buffer(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj)); 2164 } 2165 catch (...) 2166 { 2167 PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); 2168 throw; 2169 } 2170 } 2171 2172 // }}} 2173 2174 // {{{ buffer transfers 2175 2176 // {{{ byte-for-byte transfers 2177 2178 inline enqueue_read_buffer(command_queue & cq,memory_object_holder & mem,py::object buffer,size_t device_offset,py::object py_wait_for,bool is_blocking)2179 event *enqueue_read_buffer( 2180 command_queue &cq, 2181 memory_object_holder &mem, 2182 py::object buffer, 2183 size_t device_offset, 2184 py::object py_wait_for, 2185 bool is_blocking) 2186 { 2187 PYOPENCL_PARSE_WAIT_FOR; 2188 2189 void *buf; 2190 PYOPENCL_BUFFER_SIZE_T len; 2191 2192 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2193 std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper); 2194 2195 ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); 2196 2197 buf = ward->m_buf.buf; 2198 len = ward->m_buf.len; 2199 #else 2200 py::object ward = buffer; 2201 if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) 2202 throw py::error_already_set(); 2203 #endif 2204 2205 cl_event evt; 2206 PYOPENCL_RETRY_IF_MEM_ERROR( 2207 PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBuffer, ( 2208 cq.data(), 2209 mem.data(), 2210 PYOPENCL_CAST_BOOL(is_blocking), 2211 device_offset, len, buf, 2212 PYOPENCL_WAITLIST_ARGS, &evt 2213 )) 2214 ); 2215 PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); 2216 } 2217 2218 2219 2220 2221 inline enqueue_write_buffer(command_queue & cq,memory_object_holder & mem,py::object buffer,size_t device_offset,py::object py_wait_for,bool is_blocking)2222 event *enqueue_write_buffer( 2223 command_queue &cq, 2224 memory_object_holder &mem, 2225 py::object buffer, 2226 size_t device_offset, 2227 py::object py_wait_for, 2228 bool is_blocking) 2229 { 2230 PYOPENCL_PARSE_WAIT_FOR; 2231 2232 const void *buf; 2233 PYOPENCL_BUFFER_SIZE_T len; 2234 2235 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2236 std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper); 2237 2238 ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); 2239 2240 buf = ward->m_buf.buf; 2241 len = ward->m_buf.len; 2242 #else 2243 py::object ward = buffer; 2244 if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len)) 2245 throw py::error_already_set(); 2246 #endif 2247 2248 cl_event evt; 2249 PYOPENCL_RETRY_IF_MEM_ERROR( 2250 PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBuffer, ( 2251 cq.data(), 2252 mem.data(), 2253 PYOPENCL_CAST_BOOL(is_blocking), 2254 device_offset, len, buf, 2255 PYOPENCL_WAITLIST_ARGS, &evt 2256 )) 2257 ); 2258 PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); 2259 } 2260 2261 2262 2263 2264 inline enqueue_copy_buffer(command_queue & cq,memory_object_holder & src,memory_object_holder & dst,ptrdiff_t byte_count,size_t src_offset,size_t dst_offset,py::object py_wait_for)2265 event *enqueue_copy_buffer( 2266 command_queue &cq, 2267 memory_object_holder &src, 2268 memory_object_holder &dst, 2269 ptrdiff_t byte_count, 2270 size_t src_offset, 2271 size_t dst_offset, 2272 py::object py_wait_for) 2273 { 2274 PYOPENCL_PARSE_WAIT_FOR; 2275 2276 if (byte_count < 0) 2277 { 2278 size_t byte_count_src = 0; 2279 size_t byte_count_dst = 0; 2280 PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, 2281 (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_src, 0)); 2282 PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, 2283 (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_dst, 0)); 2284 byte_count = std::min(byte_count_src, byte_count_dst); 2285 } 2286 2287 cl_event evt; 2288 PYOPENCL_RETRY_IF_MEM_ERROR( 2289 PYOPENCL_CALL_GUARDED(clEnqueueCopyBuffer, ( 2290 cq.data(), 2291 src.data(), dst.data(), 2292 src_offset, dst_offset, 2293 byte_count, 2294 PYOPENCL_WAITLIST_ARGS, 2295 &evt 2296 )) 2297 ); 2298 2299 PYOPENCL_RETURN_NEW_EVENT(evt); 2300 } 2301 2302 // }}} 2303 2304 // {{{ rectangular transfers 2305 #if PYOPENCL_CL_VERSION >= 0x1010 2306 inline enqueue_read_buffer_rect(command_queue & cq,memory_object_holder & mem,py::object buffer,py::object py_buffer_origin,py::object py_host_origin,py::object py_region,py::sequence py_buffer_pitches,py::sequence py_host_pitches,py::object py_wait_for,bool is_blocking)2307 event *enqueue_read_buffer_rect( 2308 command_queue &cq, 2309 memory_object_holder &mem, 2310 py::object buffer, 2311 py::object py_buffer_origin, 2312 py::object py_host_origin, 2313 py::object py_region, 2314 py::sequence py_buffer_pitches, 2315 py::sequence py_host_pitches, 2316 py::object py_wait_for, 2317 bool is_blocking 2318 ) 2319 { 2320 PYOPENCL_PARSE_WAIT_FOR; 2321 COPY_PY_COORD_TRIPLE(buffer_origin); 2322 COPY_PY_COORD_TRIPLE(host_origin); 2323 COPY_PY_REGION_TRIPLE(region); 2324 COPY_PY_PITCH_TUPLE(buffer_pitches); 2325 COPY_PY_PITCH_TUPLE(host_pitches); 2326 2327 void *buf; 2328 2329 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2330 std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper); 2331 2332 ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); 2333 2334 buf = ward->m_buf.buf; 2335 #else 2336 py::object ward = buffer; 2337 2338 PYOPENCL_BUFFER_SIZE_T len; 2339 if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) 2340 throw py::error_already_set(); 2341 #endif 2342 2343 cl_event evt; 2344 PYOPENCL_RETRY_IF_MEM_ERROR( 2345 PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBufferRect, ( 2346 cq.data(), 2347 mem.data(), 2348 PYOPENCL_CAST_BOOL(is_blocking), 2349 buffer_origin, host_origin, region, 2350 buffer_pitches[0], buffer_pitches[1], 2351 host_pitches[0], host_pitches[1], 2352 buf, 2353 PYOPENCL_WAITLIST_ARGS, &evt 2354 )) 2355 ); 2356 PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); 2357 } 2358 2359 2360 2361 2362 inline enqueue_write_buffer_rect(command_queue & cq,memory_object_holder & mem,py::object buffer,py::object py_buffer_origin,py::object py_host_origin,py::object py_region,py::sequence py_buffer_pitches,py::sequence py_host_pitches,py::object py_wait_for,bool is_blocking)2363 event *enqueue_write_buffer_rect( 2364 command_queue &cq, 2365 memory_object_holder &mem, 2366 py::object buffer, 2367 py::object py_buffer_origin, 2368 py::object py_host_origin, 2369 py::object py_region, 2370 py::sequence py_buffer_pitches, 2371 py::sequence py_host_pitches, 2372 py::object py_wait_for, 2373 bool is_blocking 2374 ) 2375 { 2376 PYOPENCL_PARSE_WAIT_FOR; 2377 COPY_PY_COORD_TRIPLE(buffer_origin); 2378 COPY_PY_COORD_TRIPLE(host_origin); 2379 COPY_PY_REGION_TRIPLE(region); 2380 COPY_PY_PITCH_TUPLE(buffer_pitches); 2381 COPY_PY_PITCH_TUPLE(host_pitches); 2382 2383 const void *buf; 2384 2385 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2386 std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper); 2387 2388 ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); 2389 2390 buf = ward->m_buf.buf; 2391 #else 2392 py::object ward = buffer; 2393 PYOPENCL_BUFFER_SIZE_T len; 2394 if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len)) 2395 throw py::error_already_set(); 2396 #endif 2397 2398 cl_event evt; 2399 PYOPENCL_RETRY_IF_MEM_ERROR( 2400 PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBufferRect, ( 2401 cq.data(), 2402 mem.data(), 2403 PYOPENCL_CAST_BOOL(is_blocking), 2404 buffer_origin, host_origin, region, 2405 buffer_pitches[0], buffer_pitches[1], 2406 host_pitches[0], host_pitches[1], 2407 buf, 2408 PYOPENCL_WAITLIST_ARGS, &evt 2409 )) 2410 ); 2411 PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); 2412 } 2413 2414 2415 2416 2417 inline enqueue_copy_buffer_rect(command_queue & cq,memory_object_holder & src,memory_object_holder & dst,py::object py_src_origin,py::object py_dst_origin,py::object py_region,py::sequence py_src_pitches,py::sequence py_dst_pitches,py::object py_wait_for)2418 event *enqueue_copy_buffer_rect( 2419 command_queue &cq, 2420 memory_object_holder &src, 2421 memory_object_holder &dst, 2422 py::object py_src_origin, 2423 py::object py_dst_origin, 2424 py::object py_region, 2425 py::sequence py_src_pitches, 2426 py::sequence py_dst_pitches, 2427 py::object py_wait_for) 2428 { 2429 PYOPENCL_PARSE_WAIT_FOR; 2430 COPY_PY_COORD_TRIPLE(src_origin); 2431 COPY_PY_COORD_TRIPLE(dst_origin); 2432 COPY_PY_REGION_TRIPLE(region); 2433 COPY_PY_PITCH_TUPLE(src_pitches); 2434 COPY_PY_PITCH_TUPLE(dst_pitches); 2435 2436 cl_event evt; 2437 PYOPENCL_RETRY_IF_MEM_ERROR( 2438 PYOPENCL_CALL_GUARDED(clEnqueueCopyBufferRect, ( 2439 cq.data(), 2440 src.data(), dst.data(), 2441 src_origin, dst_origin, region, 2442 src_pitches[0], src_pitches[1], 2443 dst_pitches[0], dst_pitches[1], 2444 PYOPENCL_WAITLIST_ARGS, 2445 &evt 2446 )) 2447 ); 2448 2449 PYOPENCL_RETURN_NEW_EVENT(evt); 2450 } 2451 2452 #endif 2453 2454 // }}} 2455 2456 // }}} 2457 2458 #if PYOPENCL_CL_VERSION >= 0x1020 2459 inline enqueue_fill_buffer(command_queue & cq,memory_object_holder & mem,py::object pattern,size_t offset,size_t size,py::object py_wait_for)2460 event *enqueue_fill_buffer( 2461 command_queue &cq, 2462 memory_object_holder &mem, 2463 py::object pattern, 2464 size_t offset, 2465 size_t size, 2466 py::object py_wait_for 2467 ) 2468 { 2469 PYOPENCL_PARSE_WAIT_FOR; 2470 2471 const void *pattern_buf; 2472 PYOPENCL_BUFFER_SIZE_T pattern_len; 2473 2474 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2475 std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper); 2476 2477 ward->get(pattern.ptr(), PyBUF_ANY_CONTIGUOUS); 2478 2479 pattern_buf = ward->m_buf.buf; 2480 pattern_len = ward->m_buf.len; 2481 #else 2482 if (PyObject_AsReadBuffer(pattern.ptr(), &pattern_buf, &pattern_len)) 2483 throw py::error_already_set(); 2484 #endif 2485 2486 cl_event evt; 2487 PYOPENCL_RETRY_IF_MEM_ERROR( 2488 PYOPENCL_CALL_GUARDED(clEnqueueFillBuffer, ( 2489 cq.data(), 2490 mem.data(), 2491 pattern_buf, pattern_len, offset, size, 2492 PYOPENCL_WAITLIST_ARGS, &evt 2493 )) 2494 ); 2495 PYOPENCL_RETURN_NEW_EVENT(evt); 2496 } 2497 #endif 2498 2499 // }}} 2500 2501 2502 // {{{ image 2503 2504 class image : public memory_object 2505 { 2506 public: image(cl_mem mem,bool retain,hostbuf_t hostbuf=hostbuf_t ())2507 image(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) 2508 : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) 2509 { } 2510 get_image_info(cl_image_info param_name) const2511 py::object get_image_info(cl_image_info param_name) const 2512 { 2513 switch (param_name) 2514 { 2515 case CL_IMAGE_FORMAT: 2516 PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name, 2517 cl_image_format); 2518 case CL_IMAGE_ELEMENT_SIZE: 2519 case CL_IMAGE_ROW_PITCH: 2520 case CL_IMAGE_SLICE_PITCH: 2521 case CL_IMAGE_WIDTH: 2522 case CL_IMAGE_HEIGHT: 2523 case CL_IMAGE_DEPTH: 2524 #if PYOPENCL_CL_VERSION >= 0x1020 2525 case CL_IMAGE_ARRAY_SIZE: 2526 #endif 2527 PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name, size_t); 2528 2529 #if PYOPENCL_CL_VERSION >= 0x1020 2530 case CL_IMAGE_BUFFER: 2531 { 2532 cl_mem param_value; 2533 PYOPENCL_CALL_GUARDED(clGetImageInfo, \ 2534 (data(), param_name, sizeof(param_value), ¶m_value, 0)); 2535 if (param_value == 0) 2536 { 2537 // no associated memory object? no problem. 2538 return py::none(); 2539 } 2540 2541 return create_mem_object_wrapper(param_value, /* retain */ true); 2542 } 2543 2544 case CL_IMAGE_NUM_MIP_LEVELS: 2545 case CL_IMAGE_NUM_SAMPLES: 2546 PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name, cl_uint); 2547 #endif 2548 2549 default: 2550 throw error("MemoryObject.get_image_info", CL_INVALID_VALUE); 2551 } 2552 } 2553 }; 2554 2555 2556 2557 2558 // {{{ image formats 2559 2560 inline make_image_format(cl_channel_order ord,cl_channel_type tp)2561 cl_image_format *make_image_format(cl_channel_order ord, cl_channel_type tp) 2562 { 2563 std::unique_ptr<cl_image_format> result(new cl_image_format); 2564 result->image_channel_order = ord; 2565 result->image_channel_data_type = tp; 2566 return result.release(); 2567 } 2568 2569 inline get_supported_image_formats(context const & ctx,cl_mem_flags flags,cl_mem_object_type image_type)2570 py::list get_supported_image_formats( 2571 context const &ctx, 2572 cl_mem_flags flags, 2573 cl_mem_object_type image_type) 2574 { 2575 cl_uint num_image_formats; 2576 PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, ( 2577 ctx.data(), flags, image_type, 2578 0, nullptr, &num_image_formats)); 2579 2580 std::vector<cl_image_format> formats(num_image_formats); 2581 PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, ( 2582 ctx.data(), flags, image_type, 2583 formats.size(), formats.empty( ) ? nullptr : &formats.front(), nullptr)); 2584 2585 PYOPENCL_RETURN_VECTOR(cl_image_format, formats); 2586 } 2587 2588 inline get_image_format_channel_count(cl_image_format const & fmt)2589 cl_uint get_image_format_channel_count(cl_image_format const &fmt) 2590 { 2591 switch (fmt.image_channel_order) 2592 { 2593 case CL_R: return 1; 2594 case CL_A: return 1; 2595 case CL_RG: return 2; 2596 case CL_RA: return 2; 2597 case CL_RGB: return 3; 2598 case CL_RGBA: return 4; 2599 case CL_BGRA: return 4; 2600 case CL_INTENSITY: return 1; 2601 case CL_LUMINANCE: return 1; 2602 default: 2603 throw pyopencl::error("ImageFormat.channel_dtype_size", 2604 CL_INVALID_VALUE, 2605 "unrecognized channel order"); 2606 } 2607 } 2608 2609 inline get_image_format_channel_dtype_size(cl_image_format const & fmt)2610 cl_uint get_image_format_channel_dtype_size(cl_image_format const &fmt) 2611 { 2612 switch (fmt.image_channel_data_type) 2613 { 2614 case CL_SNORM_INT8: return 1; 2615 case CL_SNORM_INT16: return 2; 2616 case CL_UNORM_INT8: return 1; 2617 case CL_UNORM_INT16: return 2; 2618 case CL_UNORM_SHORT_565: return 2; 2619 case CL_UNORM_SHORT_555: return 2; 2620 case CL_UNORM_INT_101010: return 4; 2621 case CL_SIGNED_INT8: return 1; 2622 case CL_SIGNED_INT16: return 2; 2623 case CL_SIGNED_INT32: return 4; 2624 case CL_UNSIGNED_INT8: return 1; 2625 case CL_UNSIGNED_INT16: return 2; 2626 case CL_UNSIGNED_INT32: return 4; 2627 case CL_HALF_FLOAT: return 2; 2628 case CL_FLOAT: return 4; 2629 default: 2630 throw pyopencl::error("ImageFormat.channel_dtype_size", 2631 CL_INVALID_VALUE, 2632 "unrecognized channel data type"); 2633 } 2634 } 2635 2636 inline get_image_format_item_size(cl_image_format const & fmt)2637 cl_uint get_image_format_item_size(cl_image_format const &fmt) 2638 { 2639 return get_image_format_channel_count(fmt) 2640 * get_image_format_channel_dtype_size(fmt); 2641 } 2642 2643 // }}} 2644 2645 // {{{ image creation 2646 2647 inline create_image(context const & ctx,cl_mem_flags flags,cl_image_format const & fmt,py::sequence shape,py::sequence pitches,py::object buffer)2648 image *create_image( 2649 context const &ctx, 2650 cl_mem_flags flags, 2651 cl_image_format const &fmt, 2652 py::sequence shape, 2653 py::sequence pitches, 2654 py::object buffer) 2655 { 2656 if (shape.ptr() == Py_None) 2657 throw pyopencl::error("Image", CL_INVALID_VALUE, 2658 "'shape' must be given"); 2659 2660 void *buf = 0; 2661 PYOPENCL_BUFFER_SIZE_T len = 0; 2662 2663 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2664 std::unique_ptr<py_buffer_wrapper> retained_buf_obj; 2665 if (buffer.ptr() != Py_None) 2666 { 2667 retained_buf_obj = std::unique_ptr<py_buffer_wrapper>(new py_buffer_wrapper); 2668 2669 int py_buf_flags = PyBUF_ANY_CONTIGUOUS; 2670 if ((flags & CL_MEM_USE_HOST_PTR) 2671 && ((flags & CL_MEM_READ_WRITE) 2672 || (flags & CL_MEM_WRITE_ONLY))) 2673 py_buf_flags |= PyBUF_WRITABLE; 2674 2675 retained_buf_obj->get(buffer.ptr(), py_buf_flags); 2676 2677 buf = retained_buf_obj->m_buf.buf; 2678 len = retained_buf_obj->m_buf.len; 2679 } 2680 #else 2681 py::object retained_buf_obj; 2682 if (buffer.ptr() != Py_None) 2683 { 2684 if ((flags & CL_MEM_USE_HOST_PTR) 2685 && ((flags & CL_MEM_READ_WRITE) 2686 || (flags & CL_MEM_WRITE_ONLY))) 2687 { 2688 if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) 2689 throw py::error_already_set(); 2690 } 2691 else 2692 { 2693 if (PyObject_AsReadBuffer( 2694 buffer.ptr(), const_cast<const void **>(&buf), &len)) 2695 throw py::error_already_set(); 2696 } 2697 2698 if (flags & CL_MEM_USE_HOST_PTR) 2699 retained_buf_obj = buffer; 2700 } 2701 #endif 2702 2703 unsigned dims = py::len(shape); 2704 cl_int status_code; 2705 cl_mem mem; 2706 if (dims == 2) 2707 { 2708 size_t width = (shape[0]).cast<size_t>(); 2709 size_t height = (shape[1]).cast<size_t>(); 2710 2711 size_t pitch = 0; 2712 if (pitches.ptr() != Py_None) 2713 { 2714 if (py::len(pitches) != 1) 2715 throw pyopencl::error("Image", CL_INVALID_VALUE, 2716 "invalid length of pitch tuple"); 2717 pitch = (pitches[0]).cast<size_t>(); 2718 } 2719 2720 // check buffer size 2721 cl_int itemsize = get_image_format_item_size(fmt); 2722 if (buf && std::max(pitch, width*itemsize)*height > cl_uint(len)) 2723 throw pyopencl::error("Image", CL_INVALID_VALUE, 2724 "buffer too small"); 2725 2726 PYOPENCL_PRINT_CALL_TRACE("clCreateImage2D"); 2727 PYOPENCL_RETRY_IF_MEM_ERROR( 2728 { 2729 mem = clCreateImage2D(ctx.data(), flags, &fmt, 2730 width, height, pitch, buf, &status_code); 2731 if (status_code != CL_SUCCESS) 2732 throw pyopencl::error("clCreateImage2D", status_code); 2733 } ); 2734 2735 } 2736 else if (dims == 3) 2737 { 2738 size_t width = (shape[0]).cast<size_t>(); 2739 size_t height = (shape[1]).cast<size_t>(); 2740 size_t depth = (shape[2]).cast<size_t>(); 2741 2742 size_t pitch_x = 0; 2743 size_t pitch_y = 0; 2744 2745 if (pitches.ptr() != Py_None) 2746 { 2747 if (py::len(pitches) != 2) 2748 throw pyopencl::error("Image", CL_INVALID_VALUE, 2749 "invalid length of pitch tuple"); 2750 2751 pitch_x = (pitches[0]).cast<size_t>(); 2752 pitch_y = (pitches[1]).cast<size_t>(); 2753 } 2754 2755 // check buffer size 2756 cl_int itemsize = get_image_format_item_size(fmt); 2757 if (buf && 2758 std::max(std::max(pitch_x, width*itemsize)*height, pitch_y) 2759 * depth > cl_uint(len)) 2760 throw pyopencl::error("Image", CL_INVALID_VALUE, 2761 "buffer too small"); 2762 2763 PYOPENCL_PRINT_CALL_TRACE("clCreateImage3D"); 2764 PYOPENCL_RETRY_IF_MEM_ERROR( 2765 { 2766 mem = clCreateImage3D(ctx.data(), flags, &fmt, 2767 width, height, depth, pitch_x, pitch_y, buf, &status_code); 2768 if (status_code != CL_SUCCESS) 2769 throw pyopencl::error("clCreateImage3D", status_code); 2770 } ); 2771 } 2772 else 2773 throw pyopencl::error("Image", CL_INVALID_VALUE, 2774 "invalid dimension"); 2775 2776 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2777 if (!(flags & CL_MEM_USE_HOST_PTR)) 2778 retained_buf_obj.reset(); 2779 #endif 2780 2781 try 2782 { 2783 return new image(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj)); 2784 } 2785 catch (...) 2786 { 2787 PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); 2788 throw; 2789 } 2790 } 2791 2792 #if PYOPENCL_CL_VERSION >= 0x1020 2793 2794 inline create_image_from_desc(context const & ctx,cl_mem_flags flags,cl_image_format const & fmt,cl_image_desc & desc,py::object buffer)2795 image *create_image_from_desc( 2796 context const &ctx, 2797 cl_mem_flags flags, 2798 cl_image_format const &fmt, 2799 cl_image_desc &desc, 2800 py::object buffer) 2801 { 2802 if (buffer.ptr() != Py_None && 2803 !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) 2804 PyErr_Warn(PyExc_UserWarning, "'hostbuf' was passed, " 2805 "but no memory flags to make use of it."); 2806 2807 void *buf = 0; 2808 2809 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2810 std::unique_ptr<py_buffer_wrapper> retained_buf_obj; 2811 if (buffer.ptr() != Py_None) 2812 { 2813 retained_buf_obj = std::unique_ptr<py_buffer_wrapper>(new py_buffer_wrapper); 2814 2815 int py_buf_flags = PyBUF_ANY_CONTIGUOUS; 2816 if ((flags & CL_MEM_USE_HOST_PTR) 2817 && ((flags & CL_MEM_READ_WRITE) 2818 || (flags & CL_MEM_WRITE_ONLY))) 2819 py_buf_flags |= PyBUF_WRITABLE; 2820 2821 retained_buf_obj->get(buffer.ptr(), py_buf_flags); 2822 2823 buf = retained_buf_obj->m_buf.buf; 2824 } 2825 #else 2826 py::object retained_buf_obj; 2827 PYOPENCL_BUFFER_SIZE_T len; 2828 if (buffer.ptr() != Py_None) 2829 { 2830 if ((flags & CL_MEM_USE_HOST_PTR) 2831 && ((flags & CL_MEM_READ_WRITE) 2832 || (flags & CL_MEM_WRITE_ONLY))) 2833 { 2834 if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) 2835 throw py::error_already_set(); 2836 } 2837 else 2838 { 2839 if (PyObject_AsReadBuffer( 2840 buffer.ptr(), const_cast<const void **>(&buf), &len)) 2841 throw py::error_already_set(); 2842 } 2843 2844 if (flags & CL_MEM_USE_HOST_PTR) 2845 retained_buf_obj = buffer; 2846 } 2847 #endif 2848 2849 PYOPENCL_PRINT_CALL_TRACE("clCreateImage"); 2850 cl_int status_code; 2851 cl_mem mem = clCreateImage(ctx.data(), flags, &fmt, &desc, buf, &status_code); 2852 if (status_code != CL_SUCCESS) 2853 throw pyopencl::error("clCreateImage", status_code); 2854 2855 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2856 if (!(flags & CL_MEM_USE_HOST_PTR)) 2857 retained_buf_obj.reset(); 2858 #endif 2859 2860 try 2861 { 2862 return new image(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj)); 2863 } 2864 catch (...) 2865 { 2866 PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); 2867 throw; 2868 } 2869 } 2870 2871 #endif 2872 2873 // }}} 2874 2875 // {{{ image transfers 2876 2877 inline enqueue_read_image(command_queue & cq,image & img,py::object py_origin,py::object py_region,py::object buffer,size_t row_pitch,size_t slice_pitch,py::object py_wait_for,bool is_blocking)2878 event *enqueue_read_image( 2879 command_queue &cq, 2880 image &img, 2881 py::object py_origin, py::object py_region, 2882 py::object buffer, 2883 size_t row_pitch, size_t slice_pitch, 2884 py::object py_wait_for, 2885 bool is_blocking) 2886 { 2887 PYOPENCL_PARSE_WAIT_FOR; 2888 COPY_PY_COORD_TRIPLE(origin); 2889 COPY_PY_REGION_TRIPLE(region); 2890 2891 void *buf; 2892 2893 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2894 std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper); 2895 2896 ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); 2897 2898 buf = ward->m_buf.buf; 2899 #else 2900 py::object ward = buffer; 2901 PYOPENCL_BUFFER_SIZE_T len; 2902 if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) 2903 throw py::error_already_set(); 2904 #endif 2905 2906 cl_event evt; 2907 2908 PYOPENCL_RETRY_IF_MEM_ERROR( 2909 PYOPENCL_CALL_GUARDED(clEnqueueReadImage, ( 2910 cq.data(), 2911 img.data(), 2912 PYOPENCL_CAST_BOOL(is_blocking), 2913 origin, region, row_pitch, slice_pitch, buf, 2914 PYOPENCL_WAITLIST_ARGS, &evt 2915 )); 2916 ); 2917 PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); 2918 } 2919 2920 2921 2922 2923 inline enqueue_write_image(command_queue & cq,image & img,py::object py_origin,py::object py_region,py::object buffer,size_t row_pitch,size_t slice_pitch,py::object py_wait_for,bool is_blocking)2924 event *enqueue_write_image( 2925 command_queue &cq, 2926 image &img, 2927 py::object py_origin, py::object py_region, 2928 py::object buffer, 2929 size_t row_pitch, size_t slice_pitch, 2930 py::object py_wait_for, 2931 bool is_blocking) 2932 { 2933 PYOPENCL_PARSE_WAIT_FOR; 2934 COPY_PY_COORD_TRIPLE(origin); 2935 COPY_PY_REGION_TRIPLE(region); 2936 2937 const void *buf; 2938 2939 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 2940 std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper); 2941 2942 ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); 2943 2944 buf = ward->m_buf.buf; 2945 #else 2946 py::object ward = buffer; 2947 PYOPENCL_BUFFER_SIZE_T len; 2948 if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len)) 2949 throw py::error_already_set(); 2950 #endif 2951 2952 cl_event evt; 2953 PYOPENCL_RETRY_IF_MEM_ERROR( 2954 PYOPENCL_CALL_GUARDED(clEnqueueWriteImage, ( 2955 cq.data(), 2956 img.data(), 2957 PYOPENCL_CAST_BOOL(is_blocking), 2958 origin, region, row_pitch, slice_pitch, buf, 2959 PYOPENCL_WAITLIST_ARGS, &evt 2960 )); 2961 ); 2962 PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); 2963 } 2964 2965 2966 2967 2968 inline enqueue_copy_image(command_queue & cq,memory_object_holder & src,memory_object_holder & dest,py::object py_src_origin,py::object py_dest_origin,py::object py_region,py::object py_wait_for)2969 event *enqueue_copy_image( 2970 command_queue &cq, 2971 memory_object_holder &src, 2972 memory_object_holder &dest, 2973 py::object py_src_origin, 2974 py::object py_dest_origin, 2975 py::object py_region, 2976 py::object py_wait_for 2977 ) 2978 { 2979 PYOPENCL_PARSE_WAIT_FOR; 2980 COPY_PY_COORD_TRIPLE(src_origin); 2981 COPY_PY_COORD_TRIPLE(dest_origin); 2982 COPY_PY_REGION_TRIPLE(region); 2983 2984 cl_event evt; 2985 PYOPENCL_RETRY_IF_MEM_ERROR( 2986 PYOPENCL_CALL_GUARDED(clEnqueueCopyImage, ( 2987 cq.data(), src.data(), dest.data(), 2988 src_origin, dest_origin, region, 2989 PYOPENCL_WAITLIST_ARGS, &evt 2990 )); 2991 ); 2992 PYOPENCL_RETURN_NEW_EVENT(evt); 2993 } 2994 2995 2996 2997 2998 inline enqueue_copy_image_to_buffer(command_queue & cq,memory_object_holder & src,memory_object_holder & dest,py::object py_origin,py::object py_region,size_t offset,py::object py_wait_for)2999 event *enqueue_copy_image_to_buffer( 3000 command_queue &cq, 3001 memory_object_holder &src, 3002 memory_object_holder &dest, 3003 py::object py_origin, 3004 py::object py_region, 3005 size_t offset, 3006 py::object py_wait_for 3007 ) 3008 { 3009 PYOPENCL_PARSE_WAIT_FOR; 3010 COPY_PY_COORD_TRIPLE(origin); 3011 COPY_PY_REGION_TRIPLE(region); 3012 3013 cl_event evt; 3014 PYOPENCL_RETRY_IF_MEM_ERROR( 3015 PYOPENCL_CALL_GUARDED(clEnqueueCopyImageToBuffer, ( 3016 cq.data(), src.data(), dest.data(), 3017 origin, region, offset, 3018 PYOPENCL_WAITLIST_ARGS, &evt 3019 )); 3020 ); 3021 PYOPENCL_RETURN_NEW_EVENT(evt); 3022 } 3023 3024 3025 3026 3027 inline enqueue_copy_buffer_to_image(command_queue & cq,memory_object_holder & src,memory_object_holder & dest,size_t offset,py::object py_origin,py::object py_region,py::object py_wait_for)3028 event *enqueue_copy_buffer_to_image( 3029 command_queue &cq, 3030 memory_object_holder &src, 3031 memory_object_holder &dest, 3032 size_t offset, 3033 py::object py_origin, 3034 py::object py_region, 3035 py::object py_wait_for 3036 ) 3037 { 3038 PYOPENCL_PARSE_WAIT_FOR; 3039 COPY_PY_COORD_TRIPLE(origin); 3040 COPY_PY_REGION_TRIPLE(region); 3041 3042 cl_event evt; 3043 PYOPENCL_RETRY_IF_MEM_ERROR( 3044 PYOPENCL_CALL_GUARDED(clEnqueueCopyBufferToImage, ( 3045 cq.data(), src.data(), dest.data(), 3046 offset, origin, region, 3047 PYOPENCL_WAITLIST_ARGS, &evt 3048 )); 3049 ); 3050 PYOPENCL_RETURN_NEW_EVENT(evt); 3051 } 3052 3053 // }}} 3054 3055 #if PYOPENCL_CL_VERSION >= 0x1020 3056 inline enqueue_fill_image(command_queue & cq,memory_object_holder & mem,py::object color,py::object py_origin,py::object py_region,py::object py_wait_for)3057 event *enqueue_fill_image( 3058 command_queue &cq, 3059 memory_object_holder &mem, 3060 py::object color, 3061 py::object py_origin, py::object py_region, 3062 py::object py_wait_for 3063 ) 3064 { 3065 PYOPENCL_PARSE_WAIT_FOR; 3066 3067 COPY_PY_COORD_TRIPLE(origin); 3068 COPY_PY_REGION_TRIPLE(region); 3069 3070 const void *color_buf; 3071 3072 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 3073 std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper); 3074 3075 ward->get(color.ptr(), PyBUF_ANY_CONTIGUOUS); 3076 3077 color_buf = ward->m_buf.buf; 3078 #else 3079 PYOPENCL_BUFFER_SIZE_T color_len; 3080 if (PyObject_AsReadBuffer(color.ptr(), &color_buf, &color_len)) 3081 throw py::error_already_set(); 3082 #endif 3083 3084 cl_event evt; 3085 PYOPENCL_RETRY_IF_MEM_ERROR( 3086 PYOPENCL_CALL_GUARDED(clEnqueueFillImage, ( 3087 cq.data(), 3088 mem.data(), 3089 color_buf, origin, region, 3090 PYOPENCL_WAITLIST_ARGS, &evt 3091 )); 3092 ); 3093 PYOPENCL_RETURN_NEW_EVENT(evt); 3094 } 3095 #endif 3096 3097 // }}} 3098 3099 3100 // {{{ maps 3101 class memory_map 3102 { 3103 private: 3104 bool m_valid; 3105 std::shared_ptr<command_queue> m_queue; 3106 memory_object m_mem; 3107 void *m_ptr; 3108 3109 public: memory_map(std::shared_ptr<command_queue> cq,memory_object const & mem,void * ptr)3110 memory_map(std::shared_ptr<command_queue> cq, memory_object const &mem, void *ptr) 3111 : m_valid(true), m_queue(cq), m_mem(mem), m_ptr(ptr) 3112 { 3113 } 3114 ~memory_map()3115 ~memory_map() 3116 { 3117 if (m_valid) 3118 delete release(0, py::none()); 3119 } 3120 release(command_queue * cq,py::object py_wait_for)3121 event *release(command_queue *cq, py::object py_wait_for) 3122 { 3123 PYOPENCL_PARSE_WAIT_FOR; 3124 3125 if (cq == 0) 3126 cq = m_queue.get(); 3127 3128 cl_event evt; 3129 PYOPENCL_CALL_GUARDED(clEnqueueUnmapMemObject, ( 3130 cq->data(), m_mem.data(), m_ptr, 3131 PYOPENCL_WAITLIST_ARGS, &evt 3132 )); 3133 3134 m_valid = false; 3135 3136 PYOPENCL_RETURN_NEW_EVENT(evt); 3137 } 3138 }; 3139 3140 3141 3142 3143 // FIXME: Reenable in pypy 3144 #ifndef PYPY_VERSION 3145 inline enqueue_map_buffer(std::shared_ptr<command_queue> cq,memory_object_holder & buf,cl_map_flags flags,size_t offset,py::object py_shape,py::object dtype,py::object py_order,py::object py_strides,py::object py_wait_for,bool is_blocking)3146 py::object enqueue_map_buffer( 3147 std::shared_ptr<command_queue> cq, 3148 memory_object_holder &buf, 3149 cl_map_flags flags, 3150 size_t offset, 3151 py::object py_shape, py::object dtype, 3152 py::object py_order, py::object py_strides, 3153 py::object py_wait_for, 3154 bool is_blocking 3155 ) 3156 { 3157 PYOPENCL_PARSE_WAIT_FOR; 3158 PYOPENCL_PARSE_NUMPY_ARRAY_SPEC; 3159 3160 npy_uintp size_in_bytes = tp_descr->elsize; 3161 for (npy_intp sdim: shape) 3162 size_in_bytes *= sdim; 3163 3164 py::object result; 3165 3166 cl_event evt; 3167 cl_int status_code; 3168 PYOPENCL_PRINT_CALL_TRACE("clEnqueueMapBuffer"); 3169 void *mapped; 3170 3171 PYOPENCL_RETRY_IF_MEM_ERROR( 3172 { 3173 { 3174 py::gil_scoped_release release; 3175 mapped = clEnqueueMapBuffer( 3176 cq->data(), buf.data(), 3177 PYOPENCL_CAST_BOOL(is_blocking), flags, 3178 offset, size_in_bytes, 3179 PYOPENCL_WAITLIST_ARGS, &evt, 3180 &status_code); 3181 } 3182 if (status_code != CL_SUCCESS) 3183 throw pyopencl::error("clEnqueueMapBuffer", status_code); 3184 } ); 3185 3186 event evt_handle(evt, false); 3187 3188 std::unique_ptr<memory_map> map; 3189 try 3190 { 3191 result = py::object(py::reinterpret_steal<py::object>(PyArray_NewFromDescr( 3192 &PyArray_Type, tp_descr, 3193 shape.size(), 3194 shape.empty() ? nullptr : &shape.front(), 3195 strides.empty() ? nullptr : &strides.front(), 3196 mapped, ary_flags, /*obj*/nullptr))); 3197 3198 if (size_in_bytes != (npy_uintp) PyArray_NBYTES(result.ptr())) 3199 throw pyopencl::error("enqueue_map_buffer", CL_INVALID_VALUE, 3200 "miscalculated numpy array size (not contiguous?)"); 3201 3202 map = std::unique_ptr<memory_map>(new memory_map(cq, buf, mapped)); 3203 } 3204 catch (...) 3205 { 3206 PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueUnmapMemObject, ( 3207 cq->data(), buf.data(), mapped, 0, 0, 0)); 3208 throw; 3209 } 3210 3211 py::object map_py(handle_from_new_ptr(map.release())); 3212 PyArray_BASE(result.ptr()) = map_py.ptr(); 3213 Py_INCREF(map_py.ptr()); 3214 3215 return py::make_tuple( 3216 result, 3217 handle_from_new_ptr(new event(evt_handle))); 3218 } 3219 #endif 3220 3221 3222 3223 3224 // FIXME: Reenable in pypy 3225 #ifndef PYPY_VERSION 3226 inline enqueue_map_image(std::shared_ptr<command_queue> cq,memory_object_holder & img,cl_map_flags flags,py::object py_origin,py::object py_region,py::object py_shape,py::object dtype,py::object py_order,py::object py_strides,py::object py_wait_for,bool is_blocking)3227 py::object enqueue_map_image( 3228 std::shared_ptr<command_queue> cq, 3229 memory_object_holder &img, 3230 cl_map_flags flags, 3231 py::object py_origin, 3232 py::object py_region, 3233 py::object py_shape, py::object dtype, 3234 py::object py_order, py::object py_strides, 3235 py::object py_wait_for, 3236 bool is_blocking 3237 ) 3238 { 3239 PYOPENCL_PARSE_WAIT_FOR; 3240 PYOPENCL_PARSE_NUMPY_ARRAY_SPEC; 3241 COPY_PY_COORD_TRIPLE(origin); 3242 COPY_PY_REGION_TRIPLE(region); 3243 3244 cl_event evt; 3245 cl_int status_code; 3246 PYOPENCL_PRINT_CALL_TRACE("clEnqueueMapImage"); 3247 size_t row_pitch, slice_pitch; 3248 void *mapped; 3249 PYOPENCL_RETRY_IF_MEM_ERROR( 3250 { 3251 { 3252 py::gil_scoped_release release; 3253 mapped = clEnqueueMapImage( 3254 cq->data(), img.data(), 3255 PYOPENCL_CAST_BOOL(is_blocking), flags, 3256 origin, region, &row_pitch, &slice_pitch, 3257 PYOPENCL_WAITLIST_ARGS, &evt, 3258 &status_code); 3259 } 3260 if (status_code != CL_SUCCESS) 3261 throw pyopencl::error("clEnqueueMapImage", status_code); 3262 } ); 3263 3264 event evt_handle(evt, false); 3265 3266 std::unique_ptr<memory_map> map; 3267 try 3268 { 3269 map = std::unique_ptr<memory_map>(new memory_map(cq, img, mapped)); 3270 } 3271 catch (...) 3272 { 3273 PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueUnmapMemObject, ( 3274 cq->data(), img.data(), mapped, 0, 0, 0)); 3275 throw; 3276 } 3277 3278 py::object result = py::reinterpret_steal<py::object>(PyArray_NewFromDescr( 3279 &PyArray_Type, tp_descr, 3280 shape.size(), 3281 shape.empty() ? nullptr : &shape.front(), 3282 strides.empty() ? nullptr : &strides.front(), 3283 mapped, ary_flags, /*obj*/nullptr)); 3284 3285 py::object map_py(handle_from_new_ptr(map.release())); 3286 PyArray_BASE(result.ptr()) = map_py.ptr(); 3287 Py_INCREF(map_py.ptr()); 3288 3289 return py::make_tuple( 3290 result, 3291 handle_from_new_ptr(new event(evt_handle)), 3292 row_pitch, slice_pitch); 3293 } 3294 #endif 3295 3296 // }}} 3297 3298 3299 // {{{ svm 3300 3301 #if PYOPENCL_CL_VERSION >= 0x2000 3302 3303 class svm_arg_wrapper 3304 { 3305 private: 3306 void *m_ptr; 3307 PYOPENCL_BUFFER_SIZE_T m_size; 3308 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 3309 std::unique_ptr<py_buffer_wrapper> ward; 3310 #endif 3311 3312 public: svm_arg_wrapper(py::object holder)3313 svm_arg_wrapper(py::object holder) 3314 { 3315 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 3316 ward = std::unique_ptr<py_buffer_wrapper>(new py_buffer_wrapper); 3317 #ifdef PYPY_VERSION 3318 // FIXME: get a read-only buffer 3319 // Not quite honest, but Pypy doesn't consider numpy arrays 3320 // created from objects with the __aray_interface__ writeable. 3321 ward->get(holder.ptr(), PyBUF_ANY_CONTIGUOUS); 3322 #else 3323 ward->get(holder.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); 3324 #endif 3325 m_ptr = ward->m_buf.buf; 3326 m_size = ward->m_buf.len; 3327 #else 3328 py::object ward = holder; 3329 if (PyObject_AsWriteBuffer(holder.ptr(), &m_ptr, &m_size)) 3330 throw py::error_already_set(); 3331 #endif 3332 } 3333 ptr() const3334 void *ptr() const 3335 { 3336 return m_ptr; 3337 } size() const3338 size_t size() const 3339 { 3340 return m_size; 3341 } 3342 }; 3343 3344 3345 class svm_allocation : noncopyable 3346 { 3347 private: 3348 std::shared_ptr<context> m_context; 3349 void *m_allocation; 3350 3351 public: svm_allocation(std::shared_ptr<context> const & ctx,size_t size,cl_uint alignment,cl_svm_mem_flags flags)3352 svm_allocation(std::shared_ptr<context> const &ctx, size_t size, cl_uint alignment, cl_svm_mem_flags flags) 3353 : m_context(ctx) 3354 { 3355 PYOPENCL_PRINT_CALL_TRACE("clSVMalloc"); 3356 m_allocation = clSVMAlloc( 3357 ctx->data(), 3358 flags, size, alignment); 3359 3360 if (!m_allocation) 3361 throw pyopencl::error("clSVMAlloc", CL_OUT_OF_RESOURCES); 3362 } 3363 ~svm_allocation()3364 ~svm_allocation() 3365 { 3366 if (m_allocation) 3367 release(); 3368 } 3369 release()3370 void release() 3371 { 3372 if (!m_allocation) 3373 throw error("SVMAllocation.release", CL_INVALID_VALUE, 3374 "trying to double-unref svm allocation"); 3375 3376 clSVMFree(m_context->data(), m_allocation); 3377 m_allocation = nullptr; 3378 } 3379 enqueue_release(command_queue & queue,py::object py_wait_for)3380 void enqueue_release(command_queue &queue, py::object py_wait_for) 3381 { 3382 PYOPENCL_PARSE_WAIT_FOR; 3383 3384 if (!m_allocation) 3385 throw error("SVMAllocation.release", CL_INVALID_VALUE, 3386 "trying to double-unref svm allocation"); 3387 3388 cl_event evt; 3389 3390 PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueSVMFree, ( 3391 queue.data(), 1, &m_allocation, 3392 nullptr, nullptr, 3393 PYOPENCL_WAITLIST_ARGS, &evt)); 3394 3395 m_allocation = nullptr; 3396 } 3397 ptr() const3398 void *ptr() const 3399 { 3400 return m_allocation; 3401 } 3402 ptr_as_int() const3403 intptr_t ptr_as_int() const 3404 { 3405 return (intptr_t) m_allocation; 3406 } 3407 operator ==(svm_allocation const & other) const3408 bool operator==(svm_allocation const &other) const 3409 { 3410 return m_allocation == other.m_allocation; 3411 } 3412 operator !=(svm_allocation const & other) const3413 bool operator!=(svm_allocation const &other) const 3414 { 3415 return m_allocation != other.m_allocation; 3416 } 3417 }; 3418 3419 3420 inline enqueue_svm_memcpy(command_queue & cq,cl_bool is_blocking,svm_arg_wrapper & dst,svm_arg_wrapper & src,py::object py_wait_for)3421 event *enqueue_svm_memcpy( 3422 command_queue &cq, 3423 cl_bool is_blocking, 3424 svm_arg_wrapper &dst, svm_arg_wrapper &src, 3425 py::object py_wait_for 3426 ) 3427 { 3428 PYOPENCL_PARSE_WAIT_FOR; 3429 3430 if (src.size() != dst.size()) 3431 throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE, 3432 "sizes of source and destination buffer do not match"); 3433 3434 cl_event evt; 3435 PYOPENCL_CALL_GUARDED( 3436 clEnqueueSVMMemcpy, 3437 ( 3438 cq.data(), 3439 is_blocking, 3440 dst.ptr(), src.ptr(), 3441 dst.size(), 3442 PYOPENCL_WAITLIST_ARGS, 3443 &evt 3444 )); 3445 3446 PYOPENCL_RETURN_NEW_EVENT(evt); 3447 } 3448 3449 3450 inline enqueue_svm_memfill(command_queue & cq,svm_arg_wrapper & dst,py::object py_pattern,py::object byte_count,py::object py_wait_for)3451 event *enqueue_svm_memfill( 3452 command_queue &cq, 3453 svm_arg_wrapper &dst, py::object py_pattern, 3454 py::object byte_count, 3455 py::object py_wait_for 3456 ) 3457 { 3458 PYOPENCL_PARSE_WAIT_FOR; 3459 3460 const void *pattern_ptr; 3461 PYOPENCL_BUFFER_SIZE_T pattern_len; 3462 3463 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 3464 std::unique_ptr<py_buffer_wrapper> pattern_ward(new py_buffer_wrapper); 3465 3466 pattern_ward->get(py_pattern.ptr(), PyBUF_ANY_CONTIGUOUS); 3467 3468 pattern_ptr = pattern_ward->m_buf.buf; 3469 pattern_len = pattern_ward->m_buf.len; 3470 #else 3471 py::object pattern_ward = py_pattern; 3472 if (PyObject_AsReadBuffer(py_pattern.ptr(), &pattern_ptr, &pattern_len)) 3473 throw py::error_already_set(); 3474 #endif 3475 3476 size_t fill_size = dst.size(); 3477 if (!byte_count.is_none()) 3478 fill_size = py::cast<size_t>(byte_count); 3479 3480 cl_event evt; 3481 PYOPENCL_CALL_GUARDED( 3482 clEnqueueSVMMemFill, 3483 ( 3484 cq.data(), 3485 dst.ptr(), pattern_ptr, 3486 pattern_len, 3487 fill_size, 3488 PYOPENCL_WAITLIST_ARGS, 3489 &evt 3490 )); 3491 3492 PYOPENCL_RETURN_NEW_EVENT(evt); 3493 } 3494 3495 3496 inline enqueue_svm_map(command_queue & cq,cl_bool is_blocking,cl_map_flags flags,svm_arg_wrapper & svm,py::object py_wait_for)3497 event *enqueue_svm_map( 3498 command_queue &cq, 3499 cl_bool is_blocking, 3500 cl_map_flags flags, 3501 svm_arg_wrapper &svm, 3502 py::object py_wait_for 3503 ) 3504 { 3505 PYOPENCL_PARSE_WAIT_FOR; 3506 3507 cl_event evt; 3508 PYOPENCL_CALL_GUARDED( 3509 clEnqueueSVMMap, 3510 ( 3511 cq.data(), 3512 is_blocking, 3513 flags, 3514 svm.ptr(), svm.size(), 3515 PYOPENCL_WAITLIST_ARGS, 3516 &evt 3517 )); 3518 3519 PYOPENCL_RETURN_NEW_EVENT(evt); 3520 } 3521 3522 3523 inline enqueue_svm_unmap(command_queue & cq,svm_arg_wrapper & svm,py::object py_wait_for)3524 event *enqueue_svm_unmap( 3525 command_queue &cq, 3526 svm_arg_wrapper &svm, 3527 py::object py_wait_for 3528 ) 3529 { 3530 PYOPENCL_PARSE_WAIT_FOR; 3531 3532 cl_event evt; 3533 PYOPENCL_CALL_GUARDED( 3534 clEnqueueSVMUnmap, 3535 ( 3536 cq.data(), 3537 svm.ptr(), 3538 PYOPENCL_WAITLIST_ARGS, 3539 &evt 3540 )); 3541 3542 PYOPENCL_RETURN_NEW_EVENT(evt); 3543 } 3544 #endif 3545 3546 3547 #if PYOPENCL_CL_VERSION >= 0x2010 3548 inline enqueue_svm_migratemem(command_queue & cq,py::sequence svms,cl_mem_migration_flags flags,py::object py_wait_for)3549 event *enqueue_svm_migratemem( 3550 command_queue &cq, 3551 py::sequence svms, 3552 cl_mem_migration_flags flags, 3553 py::object py_wait_for 3554 ) 3555 { 3556 PYOPENCL_PARSE_WAIT_FOR; 3557 3558 std::vector<const void *> svm_pointers; 3559 std::vector<size_t> sizes; 3560 3561 for (py::handle py_svm: svms) 3562 { 3563 svm_arg_wrapper &svm(py::cast<svm_arg_wrapper &>(py_svm)); 3564 3565 svm_pointers.push_back(svm.ptr()); 3566 sizes.push_back(svm.size()); 3567 } 3568 3569 cl_event evt; 3570 PYOPENCL_CALL_GUARDED( 3571 clEnqueueSVMMigrateMem, 3572 ( 3573 cq.data(), 3574 svm_pointers.size(), 3575 svm_pointers.empty() ? nullptr : &svm_pointers.front(), 3576 sizes.empty() ? nullptr : &sizes.front(), 3577 flags, 3578 PYOPENCL_WAITLIST_ARGS, 3579 &evt 3580 )); 3581 3582 PYOPENCL_RETURN_NEW_EVENT(evt); 3583 } 3584 #endif 3585 3586 // }}} 3587 3588 3589 // {{{ sampler 3590 3591 class sampler : noncopyable 3592 { 3593 private: 3594 cl_sampler m_sampler; 3595 3596 public: 3597 #if PYOPENCL_CL_VERSION >= 0x2000 sampler(context const & ctx,py::sequence py_props)3598 sampler(context const &ctx, py::sequence py_props) 3599 { 3600 int hex_plat_version = ctx.get_hex_platform_version(); 3601 3602 if (hex_plat_version < 0x2000) 3603 { 3604 std::cerr << 3605 "sampler properties given as an iterable, " 3606 "which uses an OpenCL 2+-only interface, " 3607 "but the context's platform does not " 3608 "declare OpenCL 2 support. Proceeding " 3609 "as requested, but the next thing you see " 3610 "may be a crash." << std:: endl; 3611 } 3612 3613 cl_sampler_properties props[py::len(py_props) + 1]; 3614 { 3615 size_t i = 0; 3616 for (auto prop: py_props) 3617 props[i++] = py::cast<cl_sampler_properties>(prop); 3618 props[i++] = 0; 3619 } 3620 3621 cl_int status_code; 3622 PYOPENCL_PRINT_CALL_TRACE("clCreateSamplerWithProperties"); 3623 3624 m_sampler = clCreateSamplerWithProperties( 3625 ctx.data(), 3626 props, 3627 &status_code); 3628 3629 if (status_code != CL_SUCCESS) 3630 throw pyopencl::error("Sampler", status_code); 3631 } 3632 #endif 3633 sampler(context const & ctx,bool normalized_coordinates,cl_addressing_mode am,cl_filter_mode fm)3634 sampler(context const &ctx, bool normalized_coordinates, 3635 cl_addressing_mode am, cl_filter_mode fm) 3636 { 3637 PYOPENCL_PRINT_CALL_TRACE("clCreateSampler"); 3638 3639 int hex_plat_version = ctx.get_hex_platform_version(); 3640 #if PYOPENCL_CL_VERSION >= 0x2000 3641 if (hex_plat_version >= 0x2000) 3642 { 3643 cl_sampler_properties props_list[] = { 3644 CL_SAMPLER_NORMALIZED_COORDS, normalized_coordinates, 3645 CL_SAMPLER_ADDRESSING_MODE, am, 3646 CL_SAMPLER_FILTER_MODE, fm, 3647 0, 3648 }; 3649 3650 cl_int status_code; 3651 3652 PYOPENCL_PRINT_CALL_TRACE("clCreateSamplerWithProperties"); 3653 m_sampler = clCreateSamplerWithProperties( 3654 ctx.data(), props_list, &status_code); 3655 3656 if (status_code != CL_SUCCESS) 3657 throw pyopencl::error("Sampler", status_code); 3658 } 3659 else 3660 #endif 3661 { 3662 cl_int status_code; 3663 3664 #if defined(__GNUG__) && !defined(__clang__) 3665 #pragma GCC diagnostic push 3666 #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 3667 #endif 3668 m_sampler = clCreateSampler( 3669 ctx.data(), 3670 normalized_coordinates, 3671 am, fm, &status_code); 3672 #if defined(__GNUG__) && !defined(__clang__) 3673 #pragma GCC diagnostic pop 3674 #endif 3675 3676 if (status_code != CL_SUCCESS) 3677 throw pyopencl::error("Sampler", status_code); 3678 } 3679 } 3680 sampler(cl_sampler samp,bool retain)3681 sampler(cl_sampler samp, bool retain) 3682 : m_sampler(samp) 3683 { 3684 if (retain) 3685 PYOPENCL_CALL_GUARDED(clRetainSampler, (samp)); 3686 } 3687 ~sampler()3688 ~sampler() 3689 { 3690 PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseSampler, (m_sampler)); 3691 } 3692 data() const3693 cl_sampler data() const 3694 { 3695 return m_sampler; 3696 } 3697 3698 PYOPENCL_EQUALITY_TESTS(sampler); 3699 get_info(cl_sampler_info param_name) const3700 py::object get_info(cl_sampler_info param_name) const 3701 { 3702 switch (param_name) 3703 { 3704 case CL_SAMPLER_REFERENCE_COUNT: 3705 PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, 3706 cl_uint); 3707 case CL_SAMPLER_CONTEXT: 3708 PYOPENCL_GET_OPAQUE_INFO(Sampler, m_sampler, param_name, 3709 cl_context, context); 3710 case CL_SAMPLER_ADDRESSING_MODE: 3711 PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, 3712 cl_addressing_mode); 3713 case CL_SAMPLER_FILTER_MODE: 3714 PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, 3715 cl_filter_mode); 3716 case CL_SAMPLER_NORMALIZED_COORDS: 3717 PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, 3718 cl_bool); 3719 3720 default: 3721 throw error("Sampler.get_info", CL_INVALID_VALUE); 3722 } 3723 } 3724 }; 3725 3726 // }}} 3727 3728 3729 // {{{ program 3730 3731 class program : noncopyable 3732 { 3733 public: 3734 enum program_kind_type { KND_UNKNOWN, KND_SOURCE, KND_BINARY }; 3735 3736 private: 3737 cl_program m_program; 3738 program_kind_type m_program_kind; 3739 3740 public: program(cl_program prog,bool retain,program_kind_type progkind=KND_UNKNOWN)3741 program(cl_program prog, bool retain, program_kind_type progkind=KND_UNKNOWN) 3742 : m_program(prog), m_program_kind(progkind) 3743 { 3744 if (retain) 3745 PYOPENCL_CALL_GUARDED(clRetainProgram, (prog)); 3746 } 3747 ~program()3748 ~program() 3749 { 3750 PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseProgram, (m_program)); 3751 } 3752 data() const3753 cl_program data() const 3754 { 3755 return m_program; 3756 } 3757 kind() const3758 program_kind_type kind() const 3759 { 3760 return m_program_kind; 3761 } 3762 3763 PYOPENCL_EQUALITY_TESTS(program); 3764 get_info(cl_program_info param_name) const3765 py::object get_info(cl_program_info param_name) const 3766 { 3767 switch (param_name) 3768 { 3769 case CL_PROGRAM_REFERENCE_COUNT: 3770 PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, 3771 cl_uint); 3772 case CL_PROGRAM_CONTEXT: 3773 PYOPENCL_GET_OPAQUE_INFO(Program, m_program, param_name, 3774 cl_context, context); 3775 case CL_PROGRAM_NUM_DEVICES: 3776 PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, 3777 cl_uint); 3778 case CL_PROGRAM_DEVICES: 3779 { 3780 std::vector<cl_device_id> result; 3781 PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result); 3782 3783 py::list py_result; 3784 for (cl_device_id did: result) 3785 py_result.append(handle_from_new_ptr( 3786 new pyopencl::device(did))); 3787 return py_result; 3788 } 3789 case CL_PROGRAM_SOURCE: 3790 PYOPENCL_GET_STR_INFO(Program, m_program, param_name); 3791 case CL_PROGRAM_BINARY_SIZES: 3792 { 3793 std::vector<size_t> result; 3794 PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result); 3795 PYOPENCL_RETURN_VECTOR(size_t, result); 3796 } 3797 case CL_PROGRAM_BINARIES: 3798 // {{{ 3799 { 3800 std::vector<size_t> sizes; 3801 PYOPENCL_GET_VEC_INFO(Program, m_program, CL_PROGRAM_BINARY_SIZES, sizes); 3802 3803 size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0); 3804 3805 std::unique_ptr<unsigned char []> result( 3806 new unsigned char[total_size]); 3807 std::vector<unsigned char *> result_ptrs; 3808 3809 unsigned char *ptr = result.get(); 3810 for (unsigned i = 0; i < sizes.size(); ++i) 3811 { 3812 result_ptrs.push_back(ptr); 3813 ptr += sizes[i]; 3814 } 3815 3816 PYOPENCL_CALL_GUARDED(clGetProgramInfo, 3817 (m_program, param_name, sizes.size()*sizeof(unsigned char *), 3818 result_ptrs.empty( ) ? nullptr : &result_ptrs.front(), 0)); \ 3819 3820 py::list py_result; 3821 ptr = result.get(); 3822 for (unsigned i = 0; i < sizes.size(); ++i) 3823 { 3824 py::object binary_pyobj( 3825 py::reinterpret_steal<py::object>( 3826 #if PY_VERSION_HEX >= 0x03000000 3827 PyBytes_FromStringAndSize( 3828 reinterpret_cast<char *>(ptr), sizes[i]) 3829 #else 3830 PyString_FromStringAndSize( 3831 reinterpret_cast<char *>(ptr), sizes[i]) 3832 #endif 3833 )); 3834 py_result.append(binary_pyobj); 3835 ptr += sizes[i]; 3836 } 3837 return py_result; 3838 } 3839 // }}} 3840 #if PYOPENCL_CL_VERSION >= 0x1020 3841 case CL_PROGRAM_NUM_KERNELS: 3842 PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, 3843 size_t); 3844 case CL_PROGRAM_KERNEL_NAMES: 3845 PYOPENCL_GET_STR_INFO(Program, m_program, param_name); 3846 #endif 3847 3848 default: 3849 throw error("Program.get_info", CL_INVALID_VALUE); 3850 } 3851 } 3852 get_build_info(device const & dev,cl_program_build_info param_name) const3853 py::object get_build_info( 3854 device const &dev, 3855 cl_program_build_info param_name) const 3856 { 3857 switch (param_name) 3858 { 3859 #define PYOPENCL_FIRST_ARG m_program, dev.data() // hackety hack 3860 case CL_PROGRAM_BUILD_STATUS: 3861 PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild, 3862 PYOPENCL_FIRST_ARG, param_name, 3863 cl_build_status); 3864 case CL_PROGRAM_BUILD_OPTIONS: 3865 case CL_PROGRAM_BUILD_LOG: 3866 PYOPENCL_GET_STR_INFO(ProgramBuild, 3867 PYOPENCL_FIRST_ARG, param_name); 3868 #if PYOPENCL_CL_VERSION >= 0x1020 3869 case CL_PROGRAM_BINARY_TYPE: 3870 PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild, 3871 PYOPENCL_FIRST_ARG, param_name, 3872 cl_program_binary_type); 3873 #endif 3874 #if PYOPENCL_CL_VERSION >= 0x2000 3875 case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: 3876 PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild, 3877 PYOPENCL_FIRST_ARG, param_name, 3878 size_t); 3879 #endif 3880 #undef PYOPENCL_FIRST_ARG 3881 3882 default: 3883 throw error("Program.get_build_info", CL_INVALID_VALUE); 3884 } 3885 } 3886 build(std::string options,py::object py_devices)3887 void build(std::string options, py::object py_devices) 3888 { 3889 PYOPENCL_PARSE_PY_DEVICES; 3890 3891 PYOPENCL_CALL_GUARDED_THREADED(clBuildProgram, 3892 (m_program, num_devices, devices, 3893 options.c_str(), 0 ,0)); 3894 } 3895 3896 #if PYOPENCL_CL_VERSION >= 0x1020 compile(std::string options,py::object py_devices,py::object py_headers)3897 void compile(std::string options, py::object py_devices, 3898 py::object py_headers) 3899 { 3900 PYOPENCL_PARSE_PY_DEVICES; 3901 3902 // {{{ pick apart py_headers 3903 // py_headers is a list of tuples *(name, program)* 3904 3905 std::vector<std::string> header_names; 3906 std::vector<cl_program> programs; 3907 for (py::handle name_hdr_tup_py: py_headers) 3908 { 3909 py::tuple name_hdr_tup = py::reinterpret_borrow<py::tuple>(name_hdr_tup_py); 3910 if (py::len(name_hdr_tup) != 2) 3911 throw error("Program.compile", CL_INVALID_VALUE, 3912 "epxected (name, header) tuple in headers list"); 3913 std::string name = (name_hdr_tup[0]).cast<std::string>(); 3914 program &prg = (name_hdr_tup[1]).cast<program &>(); 3915 3916 header_names.push_back(name); 3917 programs.push_back(prg.data()); 3918 } 3919 3920 std::vector<const char *> header_name_ptrs; 3921 for (std::string const &name: header_names) 3922 header_name_ptrs.push_back(name.c_str()); 3923 3924 // }}} 3925 3926 PYOPENCL_CALL_GUARDED_THREADED(clCompileProgram, 3927 (m_program, num_devices, devices, 3928 options.c_str(), header_names.size(), 3929 programs.empty() ? nullptr : &programs.front(), 3930 header_name_ptrs.empty() ? nullptr : &header_name_ptrs.front(), 3931 0, 0)); 3932 } 3933 #endif 3934 }; 3935 3936 3937 3938 3939 inline create_program_with_source(context & ctx,std::string const & src)3940 program *create_program_with_source( 3941 context &ctx, 3942 std::string const &src) 3943 { 3944 const char *string = src.c_str(); 3945 size_t length = src.size(); 3946 3947 cl_int status_code; 3948 PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithSource"); 3949 cl_program result = clCreateProgramWithSource( 3950 ctx.data(), 1, &string, &length, &status_code); 3951 if (status_code != CL_SUCCESS) 3952 throw pyopencl::error("clCreateProgramWithSource", status_code); 3953 3954 try 3955 { 3956 return new program(result, false, program::KND_SOURCE); 3957 } 3958 catch (...) 3959 { 3960 clReleaseProgram(result); 3961 throw; 3962 } 3963 } 3964 3965 3966 3967 3968 3969 inline create_program_with_binary(context & ctx,py::sequence py_devices,py::sequence py_binaries)3970 program *create_program_with_binary( 3971 context &ctx, 3972 py::sequence py_devices, 3973 py::sequence py_binaries) 3974 { 3975 std::vector<cl_device_id> devices; 3976 std::vector<const unsigned char *> binaries; 3977 std::vector<size_t> sizes; 3978 3979 size_t num_devices = len(py_devices); 3980 if (len(py_binaries) != num_devices) 3981 throw error("create_program_with_binary", CL_INVALID_VALUE, 3982 "device and binary counts don't match"); 3983 3984 for (size_t i = 0; i < num_devices; ++i) 3985 { 3986 devices.push_back( 3987 (py_devices[i]).cast<device const &>().data()); 3988 const void *buf; 3989 PYOPENCL_BUFFER_SIZE_T len; 3990 3991 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 3992 py_buffer_wrapper buf_wrapper; 3993 3994 buf_wrapper.get(py::object(py_binaries[i]).ptr(), PyBUF_ANY_CONTIGUOUS); 3995 3996 buf = buf_wrapper.m_buf.buf; 3997 len = buf_wrapper.m_buf.len; 3998 #else 3999 if (PyObject_AsReadBuffer( 4000 py::object(py_binaries[i]).ptr(), &buf, &len)) 4001 throw py::error_already_set(); 4002 #endif 4003 4004 binaries.push_back(reinterpret_cast<const unsigned char *>(buf)); 4005 sizes.push_back(len); 4006 } 4007 4008 cl_int binary_statuses[num_devices]; 4009 4010 cl_int status_code; 4011 PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBinary"); 4012 cl_program result = clCreateProgramWithBinary( 4013 ctx.data(), num_devices, 4014 devices.empty( ) ? nullptr : &devices.front(), 4015 sizes.empty( ) ? nullptr : &sizes.front(), 4016 binaries.empty( ) ? nullptr : &binaries.front(), 4017 binary_statuses, 4018 &status_code); 4019 if (status_code != CL_SUCCESS) 4020 throw pyopencl::error("clCreateProgramWithBinary", status_code); 4021 4022 /* 4023 for (int i = 0; i < num_devices; ++i) 4024 printf("%d:%d\n", i, binary_statuses[i]); 4025 */ 4026 4027 try 4028 { 4029 return new program(result, false, program::KND_BINARY); 4030 } 4031 catch (...) 4032 { 4033 clReleaseProgram(result); 4034 throw; 4035 } 4036 } 4037 4038 4039 4040 #if (PYOPENCL_CL_VERSION >= 0x1020) && \ 4041 ((PYOPENCL_CL_VERSION >= 0x1030) && defined(__APPLE__)) 4042 inline create_program_with_built_in_kernels(context & ctx,py::object py_devices,std::string const & kernel_names)4043 program *create_program_with_built_in_kernels( 4044 context &ctx, 4045 py::object py_devices, 4046 std::string const &kernel_names) 4047 { 4048 PYOPENCL_PARSE_PY_DEVICES; 4049 4050 cl_int status_code; 4051 PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBuiltInKernels"); 4052 cl_program result = clCreateProgramWithBuiltInKernels( 4053 ctx.data(), num_devices, devices, 4054 kernel_names.c_str(), &status_code); 4055 if (status_code != CL_SUCCESS) 4056 throw pyopencl::error("clCreateProgramWithBuiltInKernels", status_code); 4057 4058 try 4059 { 4060 return new program(result, false); 4061 } 4062 catch (...) 4063 { 4064 clReleaseProgram(result); 4065 throw; 4066 } 4067 } 4068 #endif 4069 4070 4071 4072 #if PYOPENCL_CL_VERSION >= 0x1020 4073 inline link_program(context & ctx,py::object py_programs,std::string const & options,py::object py_devices)4074 program *link_program( 4075 context &ctx, 4076 py::object py_programs, 4077 std::string const &options, 4078 py::object py_devices 4079 ) 4080 { 4081 PYOPENCL_PARSE_PY_DEVICES; 4082 4083 std::vector<cl_program> programs; 4084 for (py::handle py_prg: py_programs) 4085 { 4086 program &prg = (py_prg).cast<program &>(); 4087 programs.push_back(prg.data()); 4088 } 4089 4090 cl_int status_code; 4091 PYOPENCL_PRINT_CALL_TRACE("clLinkProgram"); 4092 cl_program result = clLinkProgram( 4093 ctx.data(), num_devices, devices, 4094 options.c_str(), 4095 programs.size(), 4096 programs.empty() ? nullptr : &programs.front(), 4097 0, 0, 4098 &status_code); 4099 4100 if (status_code != CL_SUCCESS) 4101 throw pyopencl::error("clLinkProgram", result, status_code); 4102 4103 try 4104 { 4105 return new program(result, false); 4106 } 4107 catch (...) 4108 { 4109 clReleaseProgram(result); 4110 throw; 4111 } 4112 } 4113 4114 #endif 4115 4116 4117 #if PYOPENCL_CL_VERSION >= 0x1020 4118 inline unload_platform_compiler(platform & plat)4119 void unload_platform_compiler(platform &plat) 4120 { 4121 PYOPENCL_CALL_GUARDED(clUnloadPlatformCompiler, (plat.data())); 4122 } 4123 #endif 4124 4125 // }}} 4126 4127 4128 // {{{ kernel 4129 class local_memory 4130 { 4131 private: 4132 size_t m_size; 4133 4134 public: local_memory(size_t size)4135 local_memory(size_t size) 4136 : m_size(size) 4137 { } 4138 size() const4139 size_t size() const 4140 { return m_size; } 4141 }; 4142 4143 4144 4145 4146 class kernel : noncopyable 4147 { 4148 private: 4149 cl_kernel m_kernel; 4150 4151 public: kernel(cl_kernel knl,bool retain)4152 kernel(cl_kernel knl, bool retain) 4153 : m_kernel(knl) 4154 { 4155 if (retain) 4156 PYOPENCL_CALL_GUARDED(clRetainKernel, (knl)); 4157 } 4158 kernel(program const & prg,std::string const & kernel_name)4159 kernel(program const &prg, std::string const &kernel_name) 4160 { 4161 cl_int status_code; 4162 4163 PYOPENCL_PRINT_CALL_TRACE("clCreateKernel"); 4164 m_kernel = clCreateKernel(prg.data(), kernel_name.c_str(), 4165 &status_code); 4166 if (status_code != CL_SUCCESS) 4167 throw pyopencl::error("clCreateKernel", status_code); 4168 } 4169 ~kernel()4170 ~kernel() 4171 { 4172 PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseKernel, (m_kernel)); 4173 } 4174 data() const4175 cl_kernel data() const 4176 { 4177 return m_kernel; 4178 } 4179 4180 PYOPENCL_EQUALITY_TESTS(kernel); 4181 set_arg_null(cl_uint arg_index)4182 void set_arg_null(cl_uint arg_index) 4183 { 4184 cl_mem m = 0; 4185 PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index, 4186 sizeof(cl_mem), &m)); 4187 } 4188 set_arg_mem(cl_uint arg_index,memory_object_holder & moh)4189 void set_arg_mem(cl_uint arg_index, memory_object_holder &moh) 4190 { 4191 cl_mem m = moh.data(); 4192 PYOPENCL_CALL_GUARDED(clSetKernelArg, 4193 (m_kernel, arg_index, sizeof(cl_mem), &m)); 4194 } 4195 set_arg_local(cl_uint arg_index,local_memory const & loc)4196 void set_arg_local(cl_uint arg_index, local_memory const &loc) 4197 { 4198 PYOPENCL_CALL_GUARDED(clSetKernelArg, 4199 (m_kernel, arg_index, loc.size(), 0)); 4200 } 4201 set_arg_sampler(cl_uint arg_index,sampler const & smp)4202 void set_arg_sampler(cl_uint arg_index, sampler const &smp) 4203 { 4204 cl_sampler s = smp.data(); 4205 PYOPENCL_CALL_GUARDED(clSetKernelArg, 4206 (m_kernel, arg_index, sizeof(cl_sampler), &s)); 4207 } 4208 set_arg_buf(cl_uint arg_index,py::object py_buffer)4209 void set_arg_buf(cl_uint arg_index, py::object py_buffer) 4210 { 4211 const void *buf; 4212 PYOPENCL_BUFFER_SIZE_T len; 4213 4214 #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE 4215 py_buffer_wrapper buf_wrapper; 4216 4217 try 4218 { 4219 buf_wrapper.get(py_buffer.ptr(), PyBUF_ANY_CONTIGUOUS); 4220 } 4221 catch (py::error_already_set &) 4222 { 4223 PyErr_Clear(); 4224 throw error("Kernel.set_arg", CL_INVALID_VALUE, 4225 "invalid kernel argument"); 4226 } 4227 4228 buf = buf_wrapper.m_buf.buf; 4229 len = buf_wrapper.m_buf.len; 4230 #else 4231 if (PyObject_AsReadBuffer(py_buffer.ptr(), &buf, &len)) 4232 { 4233 PyErr_Clear(); 4234 throw error("Kernel.set_arg", CL_INVALID_VALUE, 4235 "invalid kernel argument"); 4236 } 4237 #endif 4238 4239 PYOPENCL_CALL_GUARDED(clSetKernelArg, 4240 (m_kernel, arg_index, len, buf)); 4241 } 4242 4243 #if PYOPENCL_CL_VERSION >= 0x2000 set_arg_svm(cl_uint arg_index,svm_arg_wrapper const & wrp)4244 void set_arg_svm(cl_uint arg_index, svm_arg_wrapper const &wrp) 4245 { 4246 PYOPENCL_CALL_GUARDED(clSetKernelArgSVMPointer, 4247 (m_kernel, arg_index, wrp.ptr())); 4248 } 4249 #endif 4250 set_arg(cl_uint arg_index,py::object arg)4251 void set_arg(cl_uint arg_index, py::object arg) 4252 { 4253 if (arg.ptr() == Py_None) 4254 { 4255 set_arg_null(arg_index); 4256 return; 4257 } 4258 4259 try 4260 { 4261 set_arg_mem(arg_index, arg.cast<memory_object_holder &>()); 4262 return; 4263 } 4264 catch (py::cast_error &) { } 4265 4266 #if PYOPENCL_CL_VERSION >= 0x2000 4267 try 4268 { 4269 set_arg_svm(arg_index, arg.cast<svm_arg_wrapper const &>()); 4270 return; 4271 } 4272 catch (py::cast_error &) { } 4273 #endif 4274 4275 try 4276 { 4277 set_arg_local(arg_index, arg.cast<local_memory>()); 4278 return; 4279 } 4280 catch (py::cast_error &) { } 4281 4282 try 4283 { 4284 set_arg_sampler(arg_index, arg.cast<const sampler &>()); 4285 return; 4286 } 4287 catch (py::cast_error &) { } 4288 4289 set_arg_buf(arg_index, arg); 4290 } 4291 get_info(cl_kernel_info param_name) const4292 py::object get_info(cl_kernel_info param_name) const 4293 { 4294 switch (param_name) 4295 { 4296 case CL_KERNEL_FUNCTION_NAME: 4297 PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name); 4298 case CL_KERNEL_NUM_ARGS: 4299 case CL_KERNEL_REFERENCE_COUNT: 4300 PYOPENCL_GET_INTEGRAL_INFO(Kernel, m_kernel, param_name, 4301 cl_uint); 4302 case CL_KERNEL_CONTEXT: 4303 PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name, 4304 cl_context, context); 4305 case CL_KERNEL_PROGRAM: 4306 PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name, 4307 cl_program, program); 4308 #if PYOPENCL_CL_VERSION >= 0x1020 4309 case CL_KERNEL_ATTRIBUTES: 4310 PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name); 4311 #endif 4312 default: 4313 throw error("Kernel.get_info", CL_INVALID_VALUE); 4314 } 4315 } 4316 get_work_group_info(cl_kernel_work_group_info param_name,device const & dev) const4317 py::object get_work_group_info( 4318 cl_kernel_work_group_info param_name, 4319 device const &dev 4320 ) const 4321 { 4322 switch (param_name) 4323 { 4324 #define PYOPENCL_FIRST_ARG m_kernel, dev.data() // hackety hack 4325 case CL_KERNEL_WORK_GROUP_SIZE: 4326 PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, 4327 PYOPENCL_FIRST_ARG, param_name, 4328 size_t); 4329 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: 4330 { 4331 std::vector<size_t> result; 4332 PYOPENCL_GET_VEC_INFO(KernelWorkGroup, 4333 PYOPENCL_FIRST_ARG, param_name, result); 4334 4335 PYOPENCL_RETURN_VECTOR(size_t, result); 4336 } 4337 case CL_KERNEL_LOCAL_MEM_SIZE: 4338 #if PYOPENCL_CL_VERSION >= 0x1010 4339 case CL_KERNEL_PRIVATE_MEM_SIZE: 4340 #endif 4341 PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, 4342 PYOPENCL_FIRST_ARG, param_name, 4343 cl_ulong); 4344 4345 #if PYOPENCL_CL_VERSION >= 0x1010 4346 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: 4347 PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, 4348 PYOPENCL_FIRST_ARG, param_name, 4349 size_t); 4350 #endif 4351 default: 4352 throw error("Kernel.get_work_group_info", CL_INVALID_VALUE); 4353 #undef PYOPENCL_FIRST_ARG 4354 } 4355 } 4356 4357 #if PYOPENCL_CL_VERSION >= 0x1020 get_arg_info(cl_uint arg_index,cl_kernel_arg_info param_name) const4358 py::object get_arg_info( 4359 cl_uint arg_index, 4360 cl_kernel_arg_info param_name 4361 ) const 4362 { 4363 switch (param_name) 4364 { 4365 #define PYOPENCL_FIRST_ARG m_kernel, arg_index // hackety hack 4366 case CL_KERNEL_ARG_ADDRESS_QUALIFIER: 4367 PYOPENCL_GET_INTEGRAL_INFO(KernelArg, 4368 PYOPENCL_FIRST_ARG, param_name, 4369 cl_kernel_arg_address_qualifier); 4370 4371 case CL_KERNEL_ARG_ACCESS_QUALIFIER: 4372 PYOPENCL_GET_INTEGRAL_INFO(KernelArg, 4373 PYOPENCL_FIRST_ARG, param_name, 4374 cl_kernel_arg_access_qualifier); 4375 4376 case CL_KERNEL_ARG_TYPE_NAME: 4377 case CL_KERNEL_ARG_NAME: 4378 PYOPENCL_GET_STR_INFO(KernelArg, PYOPENCL_FIRST_ARG, param_name); 4379 #undef PYOPENCL_FIRST_ARG 4380 default: 4381 throw error("Kernel.get_arg_info", CL_INVALID_VALUE); 4382 } 4383 } 4384 #endif 4385 }; 4386 4387 4388 inline create_kernels_in_program(program & pgm)4389 py::list create_kernels_in_program(program &pgm) 4390 { 4391 cl_uint num_kernels; 4392 PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, ( 4393 pgm.data(), 0, 0, &num_kernels)); 4394 4395 std::vector<cl_kernel> kernels(num_kernels); 4396 PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, ( 4397 pgm.data(), num_kernels, 4398 kernels.empty( ) ? nullptr : &kernels.front(), &num_kernels)); 4399 4400 py::list result; 4401 for (cl_kernel knl: kernels) 4402 result.append(handle_from_new_ptr(new kernel(knl, true))); 4403 4404 return result; 4405 } 4406 4407 4408 4409 inline enqueue_nd_range_kernel(command_queue & cq,kernel & knl,py::object py_global_work_size,py::object py_local_work_size,py::object py_global_work_offset,py::object py_wait_for,bool g_times_l)4410 event *enqueue_nd_range_kernel( 4411 command_queue &cq, 4412 kernel &knl, 4413 py::object py_global_work_size, 4414 py::object py_local_work_size, 4415 py::object py_global_work_offset, 4416 py::object py_wait_for, 4417 bool g_times_l) 4418 { 4419 PYOPENCL_PARSE_WAIT_FOR; 4420 4421 cl_uint work_dim = len(py_global_work_size); 4422 4423 std::vector<size_t> global_work_size; 4424 COPY_PY_LIST(size_t, global_work_size); 4425 4426 size_t *local_work_size_ptr = 0; 4427 std::vector<size_t> local_work_size; 4428 if (py_local_work_size.ptr() != Py_None) 4429 { 4430 if (g_times_l) 4431 work_dim = std::max(work_dim, unsigned(len(py_local_work_size))); 4432 else 4433 if (work_dim != unsigned(len(py_local_work_size))) 4434 throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE, 4435 "global/local work sizes have differing dimensions"); 4436 4437 COPY_PY_LIST(size_t, local_work_size); 4438 4439 while (local_work_size.size() < work_dim) 4440 local_work_size.push_back(1); 4441 while (global_work_size.size() < work_dim) 4442 global_work_size.push_back(1); 4443 4444 local_work_size_ptr = local_work_size.empty( ) ? nullptr : &local_work_size.front(); 4445 } 4446 4447 if (g_times_l && local_work_size_ptr) 4448 { 4449 for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis) 4450 global_work_size[work_axis] *= local_work_size[work_axis]; 4451 } 4452 4453 size_t *global_work_offset_ptr = 0; 4454 std::vector<size_t> global_work_offset; 4455 if (py_global_work_offset.ptr() != Py_None) 4456 { 4457 if (work_dim != unsigned(len(py_global_work_offset))) 4458 throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE, 4459 "global work size and offset have differing dimensions"); 4460 4461 COPY_PY_LIST(size_t, global_work_offset); 4462 4463 if (g_times_l && local_work_size_ptr) 4464 { 4465 for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis) 4466 global_work_offset[work_axis] *= local_work_size[work_axis]; 4467 } 4468 4469 global_work_offset_ptr = global_work_offset.empty( ) ? nullptr : &global_work_offset.front(); 4470 } 4471 4472 PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( { 4473 cl_event evt; 4474 PYOPENCL_CALL_GUARDED(clEnqueueNDRangeKernel, ( 4475 cq.data(), 4476 knl.data(), 4477 work_dim, 4478 global_work_offset_ptr, 4479 global_work_size.empty( ) ? nullptr : &global_work_size.front(), 4480 local_work_size_ptr, 4481 PYOPENCL_WAITLIST_ARGS, &evt 4482 )); 4483 PYOPENCL_RETURN_NEW_EVENT(evt); 4484 } ); 4485 } 4486 4487 // }}} 4488 4489 4490 // {{{ gl interop 4491 inline have_gl()4492 bool have_gl() 4493 { 4494 #ifdef HAVE_GL 4495 return true; 4496 #else 4497 return false; 4498 #endif 4499 } 4500 4501 4502 4503 4504 #ifdef HAVE_GL 4505 4506 #ifdef __APPLE__ 4507 inline get_apple_cgl_share_group()4508 cl_context_properties get_apple_cgl_share_group() 4509 { 4510 CGLContextObj kCGLContext = CGLGetCurrentContext(); 4511 CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext); 4512 4513 return (cl_context_properties) kCGLShareGroup; 4514 } 4515 #endif /* __APPLE__ */ 4516 4517 4518 4519 4520 class gl_buffer : public memory_object 4521 { 4522 public: gl_buffer(cl_mem mem,bool retain,hostbuf_t hostbuf=hostbuf_t ())4523 gl_buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) 4524 : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) 4525 { } 4526 }; 4527 4528 4529 4530 4531 class gl_renderbuffer : public memory_object 4532 { 4533 public: gl_renderbuffer(cl_mem mem,bool retain,hostbuf_t hostbuf=hostbuf_t ())4534 gl_renderbuffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) 4535 : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) 4536 { } 4537 }; 4538 4539 4540 4541 4542 class gl_texture : public image 4543 { 4544 public: gl_texture(cl_mem mem,bool retain,hostbuf_t hostbuf=hostbuf_t ())4545 gl_texture(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) 4546 : image(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) 4547 { } 4548 get_gl_texture_info(cl_gl_texture_info param_name)4549 py::object get_gl_texture_info(cl_gl_texture_info param_name) 4550 { 4551 switch (param_name) 4552 { 4553 case CL_GL_TEXTURE_TARGET: 4554 PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLenum); 4555 case CL_GL_MIPMAP_LEVEL: 4556 PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLint); 4557 4558 default: 4559 throw error("MemoryObject.get_gl_texture_info", CL_INVALID_VALUE); 4560 } 4561 } 4562 }; 4563 4564 4565 4566 4567 #define PYOPENCL_WRAP_BUFFER_CREATOR(TYPE, NAME, CL_NAME, ARGS, CL_ARGS) \ 4568 inline \ 4569 TYPE *NAME ARGS \ 4570 { \ 4571 cl_int status_code; \ 4572 PYOPENCL_PRINT_CALL_TRACE(#CL_NAME); \ 4573 cl_mem mem = CL_NAME CL_ARGS; \ 4574 \ 4575 if (status_code != CL_SUCCESS) \ 4576 throw pyopencl::error(#CL_NAME, status_code); \ 4577 \ 4578 try \ 4579 { \ 4580 return new TYPE(mem, false); \ 4581 } \ 4582 catch (...) \ 4583 { \ 4584 PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); \ 4585 throw; \ 4586 } \ 4587 } 4588 4589 4590 4591 4592 PYOPENCL_WRAP_BUFFER_CREATOR(gl_buffer, 4593 create_from_gl_buffer, clCreateFromGLBuffer, 4594 (context &ctx, cl_mem_flags flags, GLuint bufobj), 4595 (ctx.data(), flags, bufobj, &status_code)); 4596 PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture, 4597 create_from_gl_texture_2d, clCreateFromGLTexture2D, 4598 (context &ctx, cl_mem_flags flags, 4599 GLenum texture_target, GLint miplevel, GLuint texture), 4600 (ctx.data(), flags, texture_target, miplevel, texture, &status_code)); 4601 PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture, 4602 create_from_gl_texture_3d, clCreateFromGLTexture3D, 4603 (context &ctx, cl_mem_flags flags, 4604 GLenum texture_target, GLint miplevel, GLuint texture), 4605 (ctx.data(), flags, texture_target, miplevel, texture, &status_code)); 4606 PYOPENCL_WRAP_BUFFER_CREATOR(gl_renderbuffer, 4607 create_from_gl_renderbuffer, clCreateFromGLRenderbuffer, 4608 (context &ctx, cl_mem_flags flags, GLuint renderbuffer), 4609 (ctx.data(), flags, renderbuffer, &status_code)); 4610 4611 inline create_from_gl_texture(context & ctx,cl_mem_flags flags,GLenum texture_target,GLint miplevel,GLuint texture,unsigned dims)4612 gl_texture *create_from_gl_texture( 4613 context &ctx, cl_mem_flags flags, 4614 GLenum texture_target, GLint miplevel, 4615 GLuint texture, unsigned dims) 4616 { 4617 if (dims == 2) 4618 return create_from_gl_texture_2d(ctx, flags, texture_target, miplevel, texture); 4619 else if (dims == 3) 4620 return create_from_gl_texture_3d(ctx, flags, texture_target, miplevel, texture); 4621 else 4622 throw pyopencl::error("Image", CL_INVALID_VALUE, 4623 "invalid dimension"); 4624 } 4625 4626 4627 4628 4629 4630 inline get_gl_object_info(memory_object_holder const & mem)4631 py::tuple get_gl_object_info(memory_object_holder const &mem) 4632 { 4633 cl_gl_object_type otype; 4634 GLuint gl_name; 4635 PYOPENCL_CALL_GUARDED(clGetGLObjectInfo, (mem.data(), &otype, &gl_name)); 4636 return py::make_tuple(otype, gl_name); 4637 } 4638 4639 #define WRAP_GL_ENQUEUE(what, What) \ 4640 inline \ 4641 event *enqueue_##what##_gl_objects( \ 4642 command_queue &cq, \ 4643 py::object py_mem_objects, \ 4644 py::object py_wait_for) \ 4645 { \ 4646 PYOPENCL_PARSE_WAIT_FOR; \ 4647 \ 4648 std::vector<cl_mem> mem_objects; \ 4649 for (py::handle mo: py_mem_objects) \ 4650 mem_objects.push_back((mo).cast<memory_object_holder &>().data()); \ 4651 \ 4652 cl_event evt; \ 4653 PYOPENCL_CALL_GUARDED(clEnqueue##What##GLObjects, ( \ 4654 cq.data(), \ 4655 mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(), \ 4656 PYOPENCL_WAITLIST_ARGS, &evt \ 4657 )); \ 4658 \ 4659 PYOPENCL_RETURN_NEW_EVENT(evt); \ 4660 } 4661 4662 WRAP_GL_ENQUEUE(acquire, Acquire); 4663 WRAP_GL_ENQUEUE(release, Release); 4664 #endif 4665 4666 4667 4668 4669 #if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) 4670 inline get_gl_context_info_khr(py::object py_properties,cl_gl_context_info param_name,py::object py_platform)4671 py::object get_gl_context_info_khr( 4672 py::object py_properties, 4673 cl_gl_context_info param_name, 4674 py::object py_platform 4675 ) 4676 { 4677 std::vector<cl_context_properties> props 4678 = parse_context_properties(py_properties); 4679 4680 typedef CL_API_ENTRY cl_int (CL_API_CALL 4681 *func_ptr_type)(const cl_context_properties * /* properties */, 4682 cl_gl_context_info /* param_name */, 4683 size_t /* param_value_size */, 4684 void * /* param_value */, 4685 size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; 4686 4687 func_ptr_type func_ptr; 4688 4689 #if PYOPENCL_CL_VERSION >= 0x1020 4690 if (py_platform.ptr() != Py_None) 4691 { 4692 platform &plat = (py_platform).cast<platform &>(); 4693 4694 func_ptr = (func_ptr_type) clGetExtensionFunctionAddressForPlatform( 4695 plat.data(), "clGetGLContextInfoKHR"); 4696 } 4697 else 4698 { 4699 PYOPENCL_DEPRECATED("get_gl_context_info_khr with platform=None", "2013.1", ); 4700 4701 func_ptr = (func_ptr_type) clGetExtensionFunctionAddress( 4702 "clGetGLContextInfoKHR"); 4703 } 4704 #else 4705 func_ptr = (func_ptr_type) clGetExtensionFunctionAddress( 4706 "clGetGLContextInfoKHR"); 4707 #endif 4708 4709 4710 if (!func_ptr) 4711 throw error("Context.get_info", CL_INVALID_PLATFORM, 4712 "clGetGLContextInfoKHR extension function not present"); 4713 4714 cl_context_properties *props_ptr 4715 = props.empty( ) ? nullptr : &props.front(); 4716 4717 switch (param_name) 4718 { 4719 case CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR: 4720 { 4721 cl_device_id param_value; 4722 PYOPENCL_CALL_GUARDED(func_ptr, 4723 (props_ptr, param_name, sizeof(param_value), ¶m_value, 0)); 4724 return py::object(handle_from_new_ptr( \ 4725 new device(param_value, /*retain*/ true))); 4726 } 4727 4728 case CL_DEVICES_FOR_GL_CONTEXT_KHR: 4729 { 4730 size_t size; 4731 PYOPENCL_CALL_GUARDED(func_ptr, 4732 (props_ptr, param_name, 0, 0, &size)); 4733 4734 std::vector<cl_device_id> devices; 4735 4736 devices.resize(size / sizeof(devices.front())); 4737 4738 PYOPENCL_CALL_GUARDED(func_ptr, 4739 (props_ptr, param_name, size, 4740 devices.empty( ) ? nullptr : &devices.front(), &size)); 4741 4742 py::list result; 4743 for (cl_device_id did: devices) 4744 result.append(handle_from_new_ptr( 4745 new device(did))); 4746 4747 return result; 4748 } 4749 4750 default: 4751 throw error("get_gl_context_info_khr", CL_INVALID_VALUE); 4752 } 4753 } 4754 4755 #endif 4756 4757 // }}} 4758 4759 4760 // {{{ deferred implementation bits 4761 get_program() const4762 inline program *error::get_program() const 4763 { 4764 return new program(m_program, /* retain */ true); 4765 } 4766 create_mem_object_wrapper(cl_mem mem,bool retain=true)4767 inline py::object create_mem_object_wrapper(cl_mem mem, bool retain=true) 4768 { 4769 cl_mem_object_type mem_obj_type; 4770 PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \ 4771 (mem, CL_MEM_TYPE, sizeof(mem_obj_type), &mem_obj_type, 0)); 4772 4773 switch (mem_obj_type) 4774 { 4775 case CL_MEM_OBJECT_BUFFER: 4776 return py::object(handle_from_new_ptr( 4777 new buffer(mem, retain))); 4778 case CL_MEM_OBJECT_IMAGE2D: 4779 case CL_MEM_OBJECT_IMAGE3D: 4780 #if PYOPENCL_CL_VERSION >= 0x1020 4781 case CL_MEM_OBJECT_IMAGE2D_ARRAY: 4782 case CL_MEM_OBJECT_IMAGE1D: 4783 case CL_MEM_OBJECT_IMAGE1D_ARRAY: 4784 case CL_MEM_OBJECT_IMAGE1D_BUFFER: 4785 #endif 4786 return py::object(handle_from_new_ptr( 4787 new image(mem, retain))); 4788 default: 4789 return py::object(handle_from_new_ptr( 4790 new memory_object(mem, retain))); 4791 } 4792 } 4793 4794 inline memory_object_from_int(intptr_t cl_mem_as_int,bool retain)4795 py::object memory_object_from_int(intptr_t cl_mem_as_int, bool retain) 4796 { 4797 return create_mem_object_wrapper((cl_mem) cl_mem_as_int, retain); 4798 } 4799 4800 4801 inline get_info(cl_mem_info param_name) const4802 py::object memory_object_holder::get_info(cl_mem_info param_name) const 4803 { 4804 switch (param_name) 4805 { 4806 case CL_MEM_TYPE: 4807 PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, 4808 cl_mem_object_type); 4809 case CL_MEM_FLAGS: 4810 PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, 4811 cl_mem_flags); 4812 case CL_MEM_SIZE: 4813 PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, 4814 size_t); 4815 case CL_MEM_HOST_PTR: 4816 throw pyopencl::error("MemoryObject.get_info", CL_INVALID_VALUE, 4817 "Use MemoryObject.get_host_array to get host pointer."); 4818 case CL_MEM_MAP_COUNT: 4819 PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, 4820 cl_uint); 4821 case CL_MEM_REFERENCE_COUNT: 4822 PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, 4823 cl_uint); 4824 case CL_MEM_CONTEXT: 4825 PYOPENCL_GET_OPAQUE_INFO(MemObject, data(), param_name, 4826 cl_context, context); 4827 4828 #if PYOPENCL_CL_VERSION >= 0x1010 4829 case CL_MEM_ASSOCIATED_MEMOBJECT: 4830 { 4831 cl_mem param_value; 4832 PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \ 4833 (data(), param_name, sizeof(param_value), ¶m_value, 0)); 4834 if (param_value == 0) 4835 { 4836 // no associated memory object? no problem. 4837 return py::none(); 4838 } 4839 4840 return create_mem_object_wrapper(param_value); 4841 } 4842 case CL_MEM_OFFSET: 4843 PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, 4844 size_t); 4845 #endif 4846 4847 default: 4848 throw error("MemoryObjectHolder.get_info", CL_INVALID_VALUE); 4849 } 4850 } 4851 4852 // FIXME: Reenable in pypy 4853 #ifndef PYPY_VERSION 4854 inline get_mem_obj_host_array(py::object mem_obj_py,py::object shape,py::object dtype,py::object order_py)4855 py::object get_mem_obj_host_array( 4856 py::object mem_obj_py, 4857 py::object shape, py::object dtype, 4858 py::object order_py) 4859 { 4860 memory_object_holder const &mem_obj = 4861 (mem_obj_py).cast<memory_object_holder const &>(); 4862 PyArray_Descr *tp_descr; 4863 if (PyArray_DescrConverter(dtype.ptr(), &tp_descr) != NPY_SUCCEED) 4864 throw py::error_already_set(); 4865 cl_mem_flags mem_flags; 4866 PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, 4867 (mem_obj.data(), CL_MEM_FLAGS, sizeof(mem_flags), &mem_flags, 0)); 4868 if (!(mem_flags & CL_MEM_USE_HOST_PTR)) 4869 throw pyopencl::error("MemoryObject.get_host_array", CL_INVALID_VALUE, 4870 "Only MemoryObject with USE_HOST_PTR " 4871 "is supported."); 4872 4873 std::vector<npy_intp> dims; 4874 try 4875 { 4876 dims.push_back(py::cast<npy_intp>(shape)); 4877 } 4878 catch (py::cast_error &) 4879 { 4880 for (auto it: shape) 4881 dims.push_back(it.cast<npy_intp>()); 4882 } 4883 4884 NPY_ORDER order = PyArray_CORDER; 4885 PyArray_OrderConverter(order_py.ptr(), &order); 4886 4887 int ary_flags = 0; 4888 if (order == PyArray_FORTRANORDER) 4889 ary_flags |= NPY_FARRAY; 4890 else if (order == PyArray_CORDER) 4891 ary_flags |= NPY_CARRAY; 4892 else 4893 throw std::runtime_error("unrecognized order specifier"); 4894 4895 void *host_ptr; 4896 size_t mem_obj_size; 4897 PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, 4898 (mem_obj.data(), CL_MEM_HOST_PTR, sizeof(host_ptr), 4899 &host_ptr, 0)); 4900 PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, 4901 (mem_obj.data(), CL_MEM_SIZE, sizeof(mem_obj_size), 4902 &mem_obj_size, 0)); 4903 4904 py::object result = py::reinterpret_steal<py::object>(PyArray_NewFromDescr( 4905 &PyArray_Type, tp_descr, 4906 dims.size(), &dims.front(), /*strides*/ nullptr, 4907 host_ptr, ary_flags, /*obj*/nullptr)); 4908 4909 if ((size_t) PyArray_NBYTES(result.ptr()) > mem_obj_size) 4910 throw pyopencl::error("MemoryObject.get_host_array", 4911 CL_INVALID_VALUE, 4912 "Resulting array is larger than memory object."); 4913 4914 PyArray_BASE(result.ptr()) = mem_obj_py.ptr(); 4915 Py_INCREF(mem_obj_py.ptr()); 4916 4917 return result; 4918 } 4919 #endif 4920 4921 // }}} 4922 } 4923 4924 #endif 4925 4926 // vim: foldmethod=marker 4927