1 #ifndef VIENNACL_BACKEND_CPU_RAM_HPP_
2 #define VIENNACL_BACKEND_CPU_RAM_HPP_
3 
4 /* =========================================================================
5    Copyright (c) 2010-2016, Institute for Microelectronics,
6                             Institute for Analysis and Scientific Computing,
7                             TU Wien.
8    Portions of this software are copyright by UChicago Argonne, LLC.
9 
10                             -----------------
11                   ViennaCL - The Vienna Computing Library
12                             -----------------
13 
14    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
15 
16    (A list of authors and contributors can be found in the manual)
17 
18    License:         MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 /** @file viennacl/backend/cpu_ram.hpp
22     @brief Implementations for the OpenCL backend functionality
23 */
24 
25 #include <cassert>
26 #include <vector>
27 #ifdef VIENNACL_WITH_AVX2
28 #include <stdlib.h>
29 #endif
30 
31 #include "viennacl/forwards.h"
32 #include "viennacl/tools/shared_ptr.hpp"
33 
34 namespace viennacl
35 {
36 namespace backend
37 {
38 namespace cpu_ram
39 {
40 typedef viennacl::tools::shared_ptr<char>  handle_type;
41 // Requirements for backend:
42 
43 // * memory_create(size, host_ptr)
44 // * memory_copy(src, dest, offset_src, offset_dest, size)
45 // * memory_write_from_main_memory(src, offset, size,
46 //                                 dest, offset, size)
47 // * memory_read_to_main_memory(src, offset, size
48 //                              dest, offset, size)
49 // *
50 //
51 
52 namespace detail
53 {
54   /** @brief Helper struct for deleting an pointer to an array */
55   template<class U>
56   struct array_deleter
57   {
58 #ifdef VIENNACL_WITH_AVX2
operator ()viennacl::backend::cpu_ram::detail::array_deleter59     void operator()(U* p) const { free(p); }
60 #else
61     void operator()(U* p) const { delete[] p; }
62 #endif
63   };
64 
65 }
66 
67 /** @brief Creates an array of the specified size in main RAM. If the second argument is provided, the buffer is initialized with data from that pointer.
68  *
69  * @param size_in_bytes   Number of bytes to allocate
70  * @param host_ptr        Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data.
71  *
72  */
memory_create(vcl_size_t size_in_bytes,const void * host_ptr=NULL)73 inline handle_type  memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)
74 {
75 #ifdef VIENNACL_WITH_AVX2
76   // Note: aligned_alloc not available on all compilers. Consider platform-specific alternatives such as posix_memalign()
77   if (!host_ptr)
78     return handle_type(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>());
79 
80   handle_type new_handle(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>());
81 #else
82   if (!host_ptr)
83     return handle_type(new char[size_in_bytes], detail::array_deleter<char>());
84 
85   handle_type new_handle(new char[size_in_bytes], detail::array_deleter<char>());
86 #endif
87 
88   // copy data:
89   char * raw_ptr = new_handle.get();
90   const char * data_ptr = static_cast<const char *>(host_ptr);
91 #ifdef VIENNACL_WITH_OPENMP
92     #pragma omp parallel for
93 #endif
94   for (long i=0; i<long(size_in_bytes); ++i)
95     raw_ptr[i] = data_ptr[i];
96 
97   return new_handle;
98 }
99 
100 /** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'.
101  *
102  *  @param src_buffer     A smart pointer to the begin of an allocated buffer
103  *  @param dst_buffer     A smart pointer to the end of an allocated buffer
104  *  @param src_offset     Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
105  *  @param dst_offset     Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
106  *  @param bytes_to_copy  Number of bytes to be copied
107  */
memory_copy(handle_type const & src_buffer,handle_type & dst_buffer,vcl_size_t src_offset,vcl_size_t dst_offset,vcl_size_t bytes_to_copy)108 inline void memory_copy(handle_type const & src_buffer,
109                         handle_type & dst_buffer,
110                         vcl_size_t src_offset,
111                         vcl_size_t dst_offset,
112                         vcl_size_t bytes_to_copy)
113 {
114   assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
115   assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
116 
117 #ifdef VIENNACL_WITH_OPENMP
118   #pragma omp parallel for
119 #endif
120   for (long i=0; i<long(bytes_to_copy); ++i)
121     dst_buffer.get()[vcl_size_t(i)+dst_offset] = src_buffer.get()[vcl_size_t(i) + src_offset];
122 }
123 
124 /** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'
125  *
126  * @param dst_buffer    A smart pointer to the beginning of an allocated buffer
127  * @param dst_offset    Offset of the first written byte from the beginning of 'dst_buffer' (in bytes)
128  * @param bytes_to_copy Number of bytes to be copied
129  * @param ptr           Pointer to the first byte to be written
130  */
memory_write(handle_type & dst_buffer,vcl_size_t dst_offset,vcl_size_t bytes_to_copy,const void * ptr,bool)131 inline void memory_write(handle_type & dst_buffer,
132                          vcl_size_t dst_offset,
133                          vcl_size_t bytes_to_copy,
134                          const void * ptr,
135                          bool /*async*/)
136 {
137   assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
138 
139 #ifdef VIENNACL_WITH_OPENMP
140   #pragma omp parallel for
141 #endif
142   for (long i=0; i<long(bytes_to_copy); ++i)
143     dst_buffer.get()[vcl_size_t(i)+dst_offset] = static_cast<const char *>(ptr)[i];
144 }
145 
146 /** @brief Reads data from a buffer back to main RAM.
147  *
148  * @param src_buffer         A smart pointer to the beginning of an allocated source buffer
149  * @param src_offset         Offset of the first byte to be read from the beginning of src_buffer (in bytes_
150  * @param bytes_to_copy      Number of bytes to be read
151  * @param ptr                Location in main RAM where to read data should be written to
152  */
memory_read(handle_type const & src_buffer,vcl_size_t src_offset,vcl_size_t bytes_to_copy,void * ptr,bool)153 inline void memory_read(handle_type const & src_buffer,
154                         vcl_size_t src_offset,
155                         vcl_size_t bytes_to_copy,
156                         void * ptr,
157                         bool /*async*/)
158 {
159   assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
160 
161 #ifdef VIENNACL_WITH_OPENMP
162   #pragma omp parallel for
163 #endif
164   for (long i=0; i<long(bytes_to_copy); ++i)
165     static_cast<char *>(ptr)[i] = src_buffer.get()[vcl_size_t(i)+src_offset];
166 }
167 
168 }
169 } //backend
170 } //viennacl
171 #endif
172