1 #ifndef VIENNACL_BACKEND_CPU_RAM_HPP_
2 #define VIENNACL_BACKEND_CPU_RAM_HPP_
3
4 /* =========================================================================
5 Copyright (c) 2010-2016, Institute for Microelectronics,
6 Institute for Analysis and Scientific Computing,
7 TU Wien.
8 Portions of this software are copyright by UChicago Argonne, LLC.
9
10 -----------------
11 ViennaCL - The Vienna Computing Library
12 -----------------
13
14 Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15
16 (A list of authors and contributors can be found in the manual)
17
18 License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20
21 /** @file viennacl/backend/cpu_ram.hpp
22 @brief Implementations for the OpenCL backend functionality
23 */
24
25 #include <cassert>
26 #include <vector>
27 #ifdef VIENNACL_WITH_AVX2
28 #include <stdlib.h>
29 #endif
30
31 #include "viennacl/forwards.h"
32 #include "viennacl/tools/shared_ptr.hpp"
33
34 namespace viennacl
35 {
36 namespace backend
37 {
38 namespace cpu_ram
39 {
40 typedef viennacl::tools::shared_ptr<char> handle_type;
41 // Requirements for backend:
42
43 // * memory_create(size, host_ptr)
44 // * memory_copy(src, dest, offset_src, offset_dest, size)
45 // * memory_write_from_main_memory(src, offset, size,
46 // dest, offset, size)
47 // * memory_read_to_main_memory(src, offset, size
48 // dest, offset, size)
49 // *
50 //
51
52 namespace detail
53 {
54 /** @brief Helper struct for deleting an pointer to an array */
55 template<class U>
56 struct array_deleter
57 {
58 #ifdef VIENNACL_WITH_AVX2
operator ()viennacl::backend::cpu_ram::detail::array_deleter59 void operator()(U* p) const { free(p); }
60 #else
61 void operator()(U* p) const { delete[] p; }
62 #endif
63 };
64
65 }
66
67 /** @brief Creates an array of the specified size in main RAM. If the second argument is provided, the buffer is initialized with data from that pointer.
68 *
69 * @param size_in_bytes Number of bytes to allocate
70 * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data.
71 *
72 */
memory_create(vcl_size_t size_in_bytes,const void * host_ptr=NULL)73 inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)
74 {
75 #ifdef VIENNACL_WITH_AVX2
76 // Note: aligned_alloc not available on all compilers. Consider platform-specific alternatives such as posix_memalign()
77 if (!host_ptr)
78 return handle_type(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>());
79
80 handle_type new_handle(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>());
81 #else
82 if (!host_ptr)
83 return handle_type(new char[size_in_bytes], detail::array_deleter<char>());
84
85 handle_type new_handle(new char[size_in_bytes], detail::array_deleter<char>());
86 #endif
87
88 // copy data:
89 char * raw_ptr = new_handle.get();
90 const char * data_ptr = static_cast<const char *>(host_ptr);
91 #ifdef VIENNACL_WITH_OPENMP
92 #pragma omp parallel for
93 #endif
94 for (long i=0; i<long(size_in_bytes); ++i)
95 raw_ptr[i] = data_ptr[i];
96
97 return new_handle;
98 }
99
100 /** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'.
101 *
102 * @param src_buffer A smart pointer to the begin of an allocated buffer
103 * @param dst_buffer A smart pointer to the end of an allocated buffer
104 * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
105 * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
106 * @param bytes_to_copy Number of bytes to be copied
107 */
memory_copy(handle_type const & src_buffer,handle_type & dst_buffer,vcl_size_t src_offset,vcl_size_t dst_offset,vcl_size_t bytes_to_copy)108 inline void memory_copy(handle_type const & src_buffer,
109 handle_type & dst_buffer,
110 vcl_size_t src_offset,
111 vcl_size_t dst_offset,
112 vcl_size_t bytes_to_copy)
113 {
114 assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
115 assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
116
117 #ifdef VIENNACL_WITH_OPENMP
118 #pragma omp parallel for
119 #endif
120 for (long i=0; i<long(bytes_to_copy); ++i)
121 dst_buffer.get()[vcl_size_t(i)+dst_offset] = src_buffer.get()[vcl_size_t(i) + src_offset];
122 }
123
124 /** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'
125 *
126 * @param dst_buffer A smart pointer to the beginning of an allocated buffer
127 * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes)
128 * @param bytes_to_copy Number of bytes to be copied
129 * @param ptr Pointer to the first byte to be written
130 */
memory_write(handle_type & dst_buffer,vcl_size_t dst_offset,vcl_size_t bytes_to_copy,const void * ptr,bool)131 inline void memory_write(handle_type & dst_buffer,
132 vcl_size_t dst_offset,
133 vcl_size_t bytes_to_copy,
134 const void * ptr,
135 bool /*async*/)
136 {
137 assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
138
139 #ifdef VIENNACL_WITH_OPENMP
140 #pragma omp parallel for
141 #endif
142 for (long i=0; i<long(bytes_to_copy); ++i)
143 dst_buffer.get()[vcl_size_t(i)+dst_offset] = static_cast<const char *>(ptr)[i];
144 }
145
146 /** @brief Reads data from a buffer back to main RAM.
147 *
148 * @param src_buffer A smart pointer to the beginning of an allocated source buffer
149 * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_
150 * @param bytes_to_copy Number of bytes to be read
151 * @param ptr Location in main RAM where to read data should be written to
152 */
memory_read(handle_type const & src_buffer,vcl_size_t src_offset,vcl_size_t bytes_to_copy,void * ptr,bool)153 inline void memory_read(handle_type const & src_buffer,
154 vcl_size_t src_offset,
155 vcl_size_t bytes_to_copy,
156 void * ptr,
157 bool /*async*/)
158 {
159 assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
160
161 #ifdef VIENNACL_WITH_OPENMP
162 #pragma omp parallel for
163 #endif
164 for (long i=0; i<long(bytes_to_copy); ++i)
165 static_cast<char *>(ptr)[i] = src_buffer.get()[vcl_size_t(i)+src_offset];
166 }
167
168 }
169 } //backend
170 } //viennacl
171 #endif
172