1 #ifndef STAN_MATH_OPENCL_MATRIX_CL_HPP 2 #define STAN_MATH_OPENCL_MATRIX_CL_HPP 3 #ifdef STAN_OPENCL 4 5 #include <stan/math/opencl/prim/size.hpp> 6 #include <stan/math/opencl/err/check_opencl.hpp> 7 #include <stan/math/prim/err/check_size_match.hpp> 8 #include <stan/math/opencl/opencl_context.hpp> 9 #include <stan/math/opencl/ref_type_for_opencl.hpp> 10 #include <stan/math/opencl/matrix_cl_view.hpp> 11 #include <stan/math/prim/meta.hpp> 12 #include <stan/math/prim/fun/Eigen.hpp> 13 #include <stan/math/prim/fun/vec_concat.hpp> 14 #include <CL/opencl.hpp> 15 #include <algorithm> 16 #include <iostream> 17 #include <string> 18 #include <type_traits> 19 #include <vector> 20 21 /** \ingroup opencl 22 * \defgroup matrix_cl_group Matrix 23 * The matrix_cl class - allocates memory space on the OpenCL device. Operations 24 * on `matrix_cl` types are executed lazily via the kernel generator 25 * and async routines. 26 */ 27 namespace stan { 28 namespace math { 29 30 /** \addtogroup matrix_cl_group 31 * @{ 32 */ 33 34 // forward declare 35 template <typename T> 36 class arena_matrix_cl; 37 38 template <typename> 39 class matrix_cl; 40 41 /** 42 * Represents an arithmetic matrix on the OpenCL device. 43 * @tparam T an arithmetic type for the type stored in the OpenCL buffer. 44 */ 45 template <typename T> 46 class matrix_cl : public matrix_cl_base { 47 private: 48 cl::Buffer buffer_cl_; // Holds the allocated memory on the device 49 int rows_{0}; // Number of rows. 50 int cols_{0}; // Number of columns. 51 // Holds info on if matrix is a special type 52 matrix_cl_view view_{matrix_cl_view::Entire}; 53 mutable std::vector<cl::Event> write_events_; // Tracks write jobs 54 mutable std::vector<cl::Event> read_events_; // Tracks reads 55 56 public: 57 using Scalar = T; // Underlying type of the matrix 58 using type = T; // Underlying type of the matrix 59 // Forward declare the methods that work in place on the matrix 60 template <matrix_cl_view matrix_view = matrix_cl_view::Entire> 61 inline void zeros_strict_tri(); 62 rows() const63 int rows() const { return rows_; } 64 cols() const65 int cols() const { return cols_; } 66 size() const67 int size() const { return rows_ * cols_; } 68 view() const69 const matrix_cl_view& view() const { return view_; } 70 view(const matrix_cl_view & view)71 void view(const matrix_cl_view& view) { view_ = view; } 72 73 /** 74 * Clear the write events from the event stacks. 75 */ clear_write_events() const76 inline void clear_write_events() const { 77 write_events_.clear(); 78 return; 79 } 80 81 /** 82 * Clear the read events from the event stacks. 83 */ clear_read_events() const84 inline void clear_read_events() const { 85 read_events_.clear(); 86 return; 87 } 88 89 /** 90 * Clear the write events from the event stacks. 91 */ clear_read_write_events() const92 inline void clear_read_write_events() const { 93 read_events_.clear(); 94 write_events_.clear(); 95 return; 96 } 97 98 /** 99 * Get the events from the event stacks. 100 * @return The write event stack. 101 */ write_events() const102 inline const std::vector<cl::Event>& write_events() const { 103 return write_events_; 104 } 105 106 /** 107 * Get the events from the event stacks. 108 * @return The read/write event stack. 109 */ read_events() const110 inline const std::vector<cl::Event>& read_events() const { 111 return read_events_; 112 } 113 114 /** 115 * Get the events from the event stacks. 116 * @return The read/write event stack. 117 */ read_write_events() const118 inline const std::vector<cl::Event> read_write_events() const { 119 return vec_concat(this->read_events(), this->write_events()); 120 } 121 122 /** 123 * Add an event to the read event stack. 124 * @param new_event The event to be pushed on the event stack. 125 */ add_read_event(cl::Event new_event) const126 inline void add_read_event(cl::Event new_event) const { 127 this->read_events_.push_back(new_event); 128 } 129 130 /** 131 * Add an event to the write event stack. 132 * @param new_event The event to be pushed on the event stack. 133 */ add_write_event(cl::Event new_event) const134 inline void add_write_event(cl::Event new_event) const { 135 this->write_events_.push_back(new_event); 136 } 137 138 /** 139 * Add an event to the read/write event stack. 140 * @param new_event The event to be pushed on the event stack. 141 */ add_read_write_event(cl::Event new_event) const142 inline void add_read_write_event(cl::Event new_event) const { 143 this->read_events_.push_back(new_event); 144 this->write_events_.push_back(new_event); 145 } 146 147 /** 148 * Waits for the write events and clears the read event stack. 149 */ wait_for_write_events() const150 inline void wait_for_write_events() const { 151 for (cl::Event e : write_events_) { 152 e.wait(); 153 } 154 write_events_.clear(); 155 } 156 157 /** 158 * Waits for the read events and clears the read event stack. 159 */ wait_for_read_events() const160 inline void wait_for_read_events() const { 161 for (cl::Event e : read_events_) { 162 e.wait(); 163 } 164 read_events_.clear(); 165 } 166 167 /** 168 * Waits for read and write events to finish and clears the read, write, and 169 * read/write event stacks. 170 */ wait_for_read_write_events() const171 inline void wait_for_read_write_events() const { 172 wait_for_read_events(); 173 wait_for_write_events(); 174 } 175 buffer() const176 const cl::Buffer& buffer() const { return buffer_cl_; } buffer()177 cl::Buffer& buffer() { return buffer_cl_; } 178 matrix_cl()179 matrix_cl() {} 180 /** 181 * Construct a matrix_cl<T> from an existing cl::Buffer object. The matrix 182 * directly uses given buffer - no copying is done. 183 * 184 * @param A the cl::Buffer object to construct the matrix from 185 * @param R number of rows 186 * @param C number of columns 187 * @param partial_view view of the matrix 188 */ matrix_cl(const cl::Buffer & A,const int R,const int C,matrix_cl_view partial_view=matrix_cl_view::Entire)189 matrix_cl(const cl::Buffer& A, const int R, const int C, 190 matrix_cl_view partial_view = matrix_cl_view::Entire) 191 : buffer_cl_(A), rows_(R), cols_(C), view_(partial_view) {} 192 193 /** 194 * Copy constructor. 195 * @param A matrix_cl to copy 196 */ matrix_cl(const matrix_cl<T> & A)197 matrix_cl(const matrix_cl<T>& A) 198 : rows_(A.rows()), cols_(A.cols()), view_(A.view()) { 199 if (A.size() == 0) { 200 return; 201 } 202 buffer_cl_ = cl::Buffer(opencl_context.context(), CL_MEM_READ_WRITE, 203 sizeof(T) * this->size()); 204 initialize_buffer_cl(A); 205 } 206 207 /** 208 * Move constructor. 209 * @param A matrix_cl to move 210 */ matrix_cl(matrix_cl<T> && A)211 matrix_cl(matrix_cl<T>&& A) 212 : buffer_cl_(std::move(A.buffer_cl_)), 213 rows_(A.rows_), 214 cols_(A.cols_), 215 view_(A.view_), 216 write_events_(std::move(A.write_events_)), 217 read_events_(std::move(A.read_events_)) {} 218 219 /** 220 * Constructor from `arena_matrix_cl`. 221 * @param A matrix_cl to move 222 */ 223 // defined in rev/arena_matrix_cl.hpp 224 matrix_cl(const arena_matrix_cl<T>& A); // NOLINT(runtime/explicit) 225 226 /** 227 * Constructor for the matrix_cl that creates a copy of a std::vector of Eigen 228 * matrices on the OpenCL device. Each matrix is flattened into one column 229 * of the resulting matrix_cl. If a lvalue is passed to this constructor the 230 * caller must make sure that the vector does not go out of scope before 231 * copying is complete. 232 * 233 * That means `.wait()` must be called on the event associated on copying or 234 * any other event that requires completion of this event. This can be done by 235 * calling `.wait_for_write_events()` or `.wait_for_read_write_events()` on 236 * this matrix or any matrix that is calculated from this one. 237 * 238 * @param A the vector of Eigen matrices 239 * 240 * @throw <code>std::invalid_argument</code> if the 241 * matrices do not have matching dimensions 242 * @throw <code>std::system_error</code> if the memory on the device could not 243 * be allocated 244 */ 245 template <typename Vec, require_std_vector_vt<is_eigen, Vec>* = nullptr, 246 require_st_same<Vec, T>* = nullptr> matrix_cl(Vec && A)247 explicit matrix_cl(Vec&& A) try : rows_(A.empty() ? 0 : A[0].size()), 248 cols_(A.size()) { 249 if (this->size() == 0) { 250 return; 251 } 252 cl::Context& ctx = opencl_context.context(); 253 cl::CommandQueue& queue = opencl_context.queue(); 254 buffer_cl_ = cl::Buffer(ctx, CL_MEM_READ_WRITE, sizeof(T) * size()); 255 for (int i = 0, offset_size = 0; i < cols_; i++, offset_size += rows_) { 256 check_size_match("matrix constructor", "input rows", A[i].size(), 257 "matrix_cl rows", rows_); 258 cl::Event write_event; 259 queue.enqueueWriteBuffer( 260 buffer_cl_, 261 opencl_context.in_order() || std::is_rvalue_reference<Vec&&>::value, 262 sizeof(T) * offset_size, sizeof(T) * rows_, A[i].data(), nullptr, 263 &write_event); 264 this->add_write_event(write_event); 265 } 266 } catch (const cl::Error& e) { 267 check_opencl_error("matrix constructor", e); 268 } 269 270 /** 271 * Constructor for the matrix_cl that 272 * only allocates the buffer on the OpenCL device. 273 * Regardless of `partial_view`, whole matrix is stored. 274 * 275 * @param rows number of matrix rows, must be greater or equal to 0 276 * @param cols number of matrix columns, must be greater or equal to 0 277 * @param partial_view which part of the matrix is used 278 * 279 * @throw <code>std::system_error</code> if the memory on the device could not 280 * be allocated 281 * 282 */ matrix_cl(const int rows,const int cols,matrix_cl_view partial_view=matrix_cl_view::Entire)283 matrix_cl(const int rows, const int cols, 284 matrix_cl_view partial_view = matrix_cl_view::Entire) 285 : rows_(rows), cols_(cols), view_(partial_view) { 286 if (size() == 0) { 287 return; 288 } 289 cl::Context& ctx = opencl_context.context(); 290 try { 291 int flags = CL_MEM_READ_WRITE; 292 if (opencl_context.device()[0].getInfo<CL_DEVICE_HOST_UNIFIED_MEMORY>()) { 293 flags |= CL_MEM_ALLOC_HOST_PTR; 294 } 295 buffer_cl_ = cl::Buffer(ctx, flags, sizeof(T) * rows_ * cols_); 296 } catch (const cl::Error& e) { 297 check_opencl_error("matrix constructor", e); 298 } 299 } 300 301 /** 302 * Constructor for the matrix_cl that creates a copy of the Eigen matrix or 303 * Eigen expression on the OpenCL device. Regardless of `partial_view`, whole 304 * matrix is stored. 305 * 306 * If a lvalue matrix or a map is passed to this constructor, it might be 307 * directly used by the device. The caller must make sure that the matrix (map 308 * data) does not go out of scope as long as this `matrix_cl` is in use 309 * (`std::move`-ing it or using raw `buffer()` also counts as in use). 310 * 311 * @tparam Mat type of \c Eigen \c Matrix or expression 312 * @param A the \c Eigen \c Matrix or expression 313 * @param partial_view which part of the matrix is used 314 * 315 * @throw <code>std::system_error</code> if the memory on the device could not 316 * be allocated 317 */ 318 template <typename Mat, require_eigen_t<Mat>* = nullptr, 319 require_vt_same<Mat, T>* = nullptr> matrix_cl(Mat && A,matrix_cl_view partial_view=matrix_cl_view::Entire)320 explicit matrix_cl(Mat&& A, 321 matrix_cl_view partial_view = matrix_cl_view::Entire) 322 : rows_(A.rows()), cols_(A.cols()), view_(partial_view) { 323 using Mat_type = std::decay_t<ref_type_for_opencl_t<Mat>>; 324 if (size() == 0) { 325 return; 326 } 327 initialize_buffer_no_heap_if< 328 std::is_same<std::decay_t<Mat>, Mat_type>::value 329 && (std::is_lvalue_reference<Mat>::value 330 || is_eigen_contiguous_map<Mat>::value)>(A); 331 } 332 333 /** 334 * Constructor for the matrix_cl that creates a copy of a scalar on the OpenCL 335 * device. Regardless of `partial_view`, whole matrix is stored. 336 * 337 * If a lvalue is passed to this constructor, it might be directly used by the 338 * device. The caller must make sure that it does not go out of scope as long 339 * as this `matrix_cl` is in use 340 * (`std::move`-ing it or using raw `buffer()` also counts as in use). 341 * 342 * @param A the scalar 343 * @param partial_view which part of the matrix is used 344 * 345 * @throw <code>std::system_error</code> if the memory on the device could not 346 * be allocated 347 */ 348 template <typename Scal, 349 typename = require_same_t<T, std::remove_reference_t<Scal>>> matrix_cl(Scal && A,matrix_cl_view partial_view=matrix_cl_view::Diagonal)350 explicit matrix_cl(Scal&& A, 351 matrix_cl_view partial_view = matrix_cl_view::Diagonal) 352 : rows_(1), cols_(1), view_(partial_view) { 353 initialize_buffer<std::is_rvalue_reference<Scal&&>::value>( 354 const_cast<const std::decay_t<Scal>*>(&A)); 355 } 356 357 /** 358 * Construct a matrix_cl of size Nx1 from \c std::vector. 359 * 360 * If a lvalue is passed to this constructor, it might be directly used by the 361 * device. The caller must make sure that it does not go out of scope as long 362 * as this `matrix_cl` is in use 363 * (`std::move`-ing it or using raw `buffer()` also counts as in use). 364 * 365 * @param A Standard vector 366 * @param partial_view which part of the matrix is used 367 * 368 * @throw <code>std::system_error</code> if the memory on the device could not 369 * be allocated 370 */ 371 template <typename Vec, require_std_vector_t<Vec>* = nullptr, 372 require_vt_same<Vec, T>* = nullptr> matrix_cl(Vec && A,matrix_cl_view partial_view=matrix_cl_view::Entire)373 explicit matrix_cl(Vec&& A, 374 matrix_cl_view partial_view = matrix_cl_view::Entire) 375 : matrix_cl(std::forward<Vec>(A), A.size(), 1) {} 376 377 /** 378 * Construct from \c std::vector with given rows and columns. 379 * 380 * If a lvalue is passed to this constructor, it might be directly used by the 381 * device. The caller must make sure that it does not go out of scope as long 382 * as this `matrix_cl` is in use `std::move`-ing it or using raw `buffer()` 383 * also counts as in use). 384 * 385 * @param A Standard vector 386 * @param R Number of rows the matrix should have. 387 * @param C Number of columns the matrix should have. 388 * @param partial_view which part of the matrix is used 389 * 390 * @throw <code>std::system_error</code> if the memory on the device could not 391 * be allocated 392 */ 393 template <typename Vec, require_std_vector_t<Vec>* = nullptr, 394 require_vt_same<Vec, T>* = nullptr> matrix_cl(Vec && A,const int & R,const int & C,matrix_cl_view partial_view=matrix_cl_view::Entire)395 explicit matrix_cl(Vec&& A, const int& R, const int& C, 396 matrix_cl_view partial_view = matrix_cl_view::Entire) 397 : rows_(R), cols_(C), view_(partial_view) { 398 initialize_buffer_no_heap_if<std::is_lvalue_reference<Vec>::value>(A); 399 } 400 401 /** 402 * Construct from \c array with given rows and columns. 403 * 404 * The memory might be directly used by the device. The caller must make sure 405 * that it does not go out of scope as long as this `matrix_cl` is in use 406 * (`std::move`-ing it or using raw `buffer()` also counts as in use). 407 * 408 * @param A array of doubles 409 * @param R Number of rows the matrix should have. 410 * @param C Number of columns the matrix should have. 411 * @param partial_view which part of the matrix is used 412 * 413 * @throw <code>std::system_error</code> if the memory on the device could not 414 * be allocated 415 */ 416 template <typename U, require_same_t<T, U>* = nullptr> matrix_cl(const U * A,const int & R,const int & C,matrix_cl_view partial_view=matrix_cl_view::Entire)417 explicit matrix_cl(const U* A, const int& R, const int& C, 418 matrix_cl_view partial_view = matrix_cl_view::Entire) 419 : rows_(R), cols_(C), view_(partial_view) { 420 initialize_buffer(A); 421 } 422 423 /** 424 * Construct from a kernel generator expression. It evaluates the expression 425 * into \c this. 426 * @tparam Expr type of the expression 427 * @param expression expression 428 */ 429 // defined in kernel_generator/matrix_cl_conversion.hpp 430 template <typename Expr, 431 require_all_kernel_expressions_and_none_scalar_t<Expr>* = nullptr, 432 require_not_matrix_cl_t<Expr>* = nullptr> 433 matrix_cl(const Expr& expression); // NOLINT(runtime/explicit) 434 435 /** 436 * Move assignment operator. 437 */ operator =(matrix_cl<T> && a)438 matrix_cl<T>& operator=(matrix_cl<T>&& a) { 439 view_ = a.view(); 440 rows_ = a.rows(); 441 cols_ = a.cols(); 442 this->wait_for_read_write_events(); 443 buffer_cl_ = std::move(a.buffer_cl_); 444 write_events_ = std::move(a.write_events_); 445 read_events_ = std::move(a.read_events_); 446 return *this; 447 } 448 449 /** 450 * Copy assignment operator. 451 */ operator =(const matrix_cl<T> & a)452 matrix_cl<T>& operator=(const matrix_cl<T>& a) { 453 this->view_ = a.view(); 454 if (a.size() == 0) { 455 this->rows_ = a.rows(); 456 this->cols_ = a.cols(); 457 return *this; 458 } 459 this->wait_for_read_write_events(); 460 if (size() != a.size()) { 461 buffer_cl_ = cl::Buffer(opencl_context.context(), CL_MEM_READ_WRITE, 462 sizeof(T) * a.size()); 463 } 464 this->rows_ = a.rows(); 465 this->cols_ = a.cols(); 466 initialize_buffer_cl(a); 467 return *this; 468 } 469 470 /** 471 * Assignment of a kernel generator expression evaluates the expression into 472 * \c this. 473 * @tparam Expr type of the expression 474 * @param expression expression 475 */ 476 // defined in kernel_generator/matrix_cl_conversion.hpp 477 template <typename Expr, 478 require_all_kernel_expressions_and_none_scalar_t<Expr>* = nullptr, 479 require_not_matrix_cl_t<Expr>* = nullptr> 480 matrix_cl<T>& operator=(const Expr& expression); 481 482 /** 483 * Assignment of `arena_matrix_cl<T>`. 484 * @tparam Expr type of the expression 485 * @param expression expression 486 */ 487 // defined in rev/arena_matrix_cl.hpp 488 matrix_cl<T>& operator=(const arena_matrix_cl<T>& other); 489 490 /** 491 * Evaluates `this`. This is a no-op. 492 * @return `*this` 493 */ eval() const494 const matrix_cl<T>& eval() const& { return *this; } eval()495 matrix_cl<T> eval() && { return std::move(*this); } 496 497 /** 498 * Destructor waits for write events to prevent any kernels from writing 499 * memory that has already been reused. 500 */ ~matrix_cl()501 ~matrix_cl() { wait_for_read_write_events(); } 502 503 private: 504 /** 505 * Initializes the OpenCL buffer of this matrix by copying the data from given 506 * buffer. Assumes that size of \c this is already set and matches the 507 * buffer size. 508 * 509 * The caller must make sure that data is not deleted as long as 510 * this `matrix_cl` is in use (`std::move`-ing it or using raw `buffer()` also 511 * counts as in use). 512 * 513 * @tparam in_order whether copying must be done in order 514 * efficiently use it directly 515 * @param A pointer to buffer 516 * @return event for the copy 517 */ 518 template <bool in_order = false> initialize_buffer(const T * A)519 cl::Event initialize_buffer(const T* A) { 520 cl::Event transfer_event; 521 if (size() == 0) { 522 return transfer_event; 523 } 524 cl::Context& ctx = opencl_context.context(); 525 cl::CommandQueue& queue = opencl_context.queue(); 526 try { 527 buffer_cl_ = cl::Buffer(ctx, CL_MEM_READ_WRITE, sizeof(T) * size()); 528 queue.enqueueWriteBuffer(buffer_cl_, 529 opencl_context.in_order() || in_order, 0, 530 sizeof(T) * size(), A, nullptr, &transfer_event); 531 this->add_write_event(transfer_event); 532 } catch (const cl::Error& e) { 533 check_opencl_error("initialize_buffer", e); 534 } 535 return transfer_event; 536 } 537 538 template <bool in_order = false> initialize_buffer(T * A)539 cl::Event initialize_buffer(T* A) { 540 cl::Event transfer_event; 541 if (size() == 0) { 542 return transfer_event; 543 } 544 cl::Context& ctx = opencl_context.context(); 545 cl::CommandQueue& queue = opencl_context.queue(); 546 try { 547 if (opencl_context.device()[0].getInfo<CL_DEVICE_HOST_UNIFIED_MEMORY>()) { 548 buffer_cl_ 549 = cl::Buffer(ctx, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, 550 sizeof(T) * size(), A); // this is always synchronous 551 } else { 552 buffer_cl_ = cl::Buffer(ctx, CL_MEM_READ_WRITE, sizeof(T) * size()); 553 queue.enqueueWriteBuffer( 554 buffer_cl_, opencl_context.in_order() || in_order, 0, 555 sizeof(T) * size(), A, nullptr, &transfer_event); 556 this->add_write_event(transfer_event); 557 } 558 } catch (const cl::Error& e) { 559 check_opencl_error("initialize_buffer", e); 560 } 561 return transfer_event; 562 } 563 564 /** 565 * Initializes the OpenCL buffer of this matrix by copying the data from given 566 * object. Assumes that size of \c this is already set and matches the 567 * buffer size. If No_heap is false the object is first moved to heap 568 * and callback is set to delete it after copying to OpenCL device is 569 * complete. Otherwise the caller must make sure that input object is not 570 * deleted as long as this `matrix_cl` is in use (`std::move`-ing it or using 571 * raw `buffer()` also counts). 572 * 573 * @tparam No_heap whether to move the object to heap first 574 * @tparam U type of object 575 * @param obj object 576 * @return event for the copy 577 */ 578 template <bool No_heap, typename U, std::enable_if_t<No_heap>* = nullptr> initialize_buffer_no_heap_if(U && obj)579 void initialize_buffer_no_heap_if(U&& obj) { 580 if (size() == 0) { 581 return; 582 } 583 initialize_buffer(obj.data()); 584 } 585 // we need separate overloads as obj.data() might not be available when second 586 // overload is called. 587 template <bool No_heap, typename U, std::enable_if_t<!No_heap>* = nullptr> initialize_buffer_no_heap_if(U && obj)588 void initialize_buffer_no_heap_if(U&& obj) { 589 using U_val = std::decay_t<ref_type_for_opencl_t<U>>; 590 if (size() == 0) { 591 return; 592 } 593 auto* obj_heap = new U_val(std::move(obj)); 594 try { 595 cl::Event e = initialize_buffer(obj_heap->data()); 596 if (opencl_context.device()[0].getInfo<CL_DEVICE_HOST_UNIFIED_MEMORY>()) { 597 buffer_cl_.setDestructorCallback(&delete_it_destructor<U_val>, 598 obj_heap); 599 } else { 600 e.setCallback(CL_COMPLETE, &delete_it_event<U_val>, obj_heap); 601 } 602 } catch (...) { 603 delete obj_heap; 604 throw; 605 } 606 } 607 608 /** 609 * Initializes the OpenCL buffer of this matrix by copying the data from given 610 * matrix_cl. Assumes that size of \c this is already set and matches the 611 * size of given matrix. 612 * @param A matrix_cl 613 */ initialize_buffer_cl(const matrix_cl<T> & A)614 void initialize_buffer_cl(const matrix_cl<T>& A) { 615 try { 616 cl::Event cstr_event; 617 opencl_context.queue().enqueueCopyBuffer(A.buffer(), this->buffer(), 0, 0, 618 A.size() * sizeof(T), 619 &A.write_events(), &cstr_event); 620 this->add_write_event(cstr_event); 621 A.add_read_event(cstr_event); 622 } catch (const cl::Error& e) { 623 check_opencl_error("copy (OpenCL)->(OpenCL)", e); 624 } 625 } 626 627 /** 628 * Deletes the container. Used as a callback for OpenCL event. 629 * @tparam U type of container 630 * @param e cl_event handle 631 * @param status status of event 632 * @param container container to delete 633 */ 634 template <typename U> delete_it_event(cl_event e,cl_int status,void * container)635 static void delete_it_event(cl_event e, cl_int status, void* container) { 636 delete static_cast<U*>(container); 637 } 638 639 /** 640 * Deletes the container. Used as a callback for destruction of `cl::Buffer`. 641 * @tparam U type of container 642 * @param buff buffer that is being destructed 643 * @param container container to delete 644 */ 645 template <typename U> delete_it_destructor(cl_mem buff,void * container)646 static void delete_it_destructor(cl_mem buff, void* container) { 647 delete static_cast<U*>(container); 648 } 649 }; 650 /** @}*/ 651 652 } // namespace math 653 } // namespace stan 654 655 #endif 656 #endif 657