1 // SPDX-License-Identifier: Apache-2.0
2 //
3 // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
4 // Copyright 2008-2016 National ICT Australia (NICTA)
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 // ------------------------------------------------------------------------
17
18
19 //! \addtogroup memory
20 //! @{
21
22
23 class memory
24 {
25 public:
26
27 template<typename eT> inline arma_malloc static eT* acquire(const uword n_elem);
28
29 template<typename eT> arma_inline static void release(eT* mem);
30
31 template<typename eT> arma_inline static bool is_aligned(const eT* mem);
32 template<typename eT> arma_inline static void mark_as_aligned( eT*& mem);
33 template<typename eT> arma_inline static void mark_as_aligned(const eT*& mem);
34 };
35
36
37
38 template<typename eT>
39 inline
40 arma_malloc
41 eT*
acquire(const uword n_elem)42 memory::acquire(const uword n_elem)
43 {
44 if(n_elem == 0) { return nullptr; }
45
46 arma_debug_check
47 (
48 ( size_t(n_elem) > (std::numeric_limits<size_t>::max() / sizeof(eT)) ),
49 "arma::memory::acquire(): requested size is too large"
50 );
51
52 eT* out_memptr;
53
54 #if defined(ARMA_ALIEN_MEM_ALLOC_FUNCTION)
55 {
56 out_memptr = (eT *) ARMA_ALIEN_MEM_ALLOC_FUNCTION(sizeof(eT)*n_elem);
57 }
58 #elif defined(ARMA_USE_TBB_ALLOC)
59 {
60 out_memptr = (eT *) scalable_malloc(sizeof(eT)*n_elem);
61 }
62 #elif defined(ARMA_USE_MKL_ALLOC)
63 {
64 out_memptr = (eT *) mkl_malloc( sizeof(eT)*n_elem, 32 );
65 }
66 #elif defined(ARMA_HAVE_POSIX_MEMALIGN)
67 {
68 eT* memptr = nullptr;
69
70 const size_t n_bytes = sizeof(eT)*size_t(n_elem);
71 const size_t alignment = (n_bytes >= size_t(1024)) ? size_t(32) : size_t(16);
72
73 // TODO: investigate apparent memory leak when using alignment >= 64 (as shown on Fedora 28, glibc 2.27)
74 int status = posix_memalign((void **)&memptr, ( (alignment >= sizeof(void*)) ? alignment : sizeof(void*) ), n_bytes);
75
76 out_memptr = (status == 0) ? memptr : nullptr;
77 }
78 #elif defined(_MSC_VER)
79 {
80 //out_memptr = (eT *) malloc(sizeof(eT)*n_elem);
81 //out_memptr = (eT *) _aligned_malloc( sizeof(eT)*n_elem, 16 ); // lives in malloc.h
82
83 const size_t n_bytes = sizeof(eT)*size_t(n_elem);
84 const size_t alignment = (n_bytes >= size_t(1024)) ? size_t(32) : size_t(16);
85
86 out_memptr = (eT *) _aligned_malloc( n_bytes, alignment );
87 }
88 #else
89 {
90 //return ( new(std::nothrow) eT[n_elem] );
91 out_memptr = (eT *) malloc(sizeof(eT)*n_elem);
92 }
93 #endif
94
95 // TODO: for mingw, use __mingw_aligned_malloc
96
97 arma_check_bad_alloc( (out_memptr == nullptr), "arma::memory::acquire(): out of memory" );
98
99 return out_memptr;
100 }
101
102
103
104 template<typename eT>
105 arma_inline
106 void
release(eT * mem)107 memory::release(eT* mem)
108 {
109 if(mem == nullptr) { return; }
110
111 #if defined(ARMA_ALIEN_MEM_FREE_FUNCTION)
112 {
113 ARMA_ALIEN_MEM_FREE_FUNCTION( (void *)(mem) );
114 }
115 #elif defined(ARMA_USE_TBB_ALLOC)
116 {
117 scalable_free( (void *)(mem) );
118 }
119 #elif defined(ARMA_USE_MKL_ALLOC)
120 {
121 mkl_free( (void *)(mem) );
122 }
123 #elif defined(ARMA_HAVE_POSIX_MEMALIGN)
124 {
125 free( (void *)(mem) );
126 }
127 #elif defined(_MSC_VER)
128 {
129 //free( (void *)(mem) );
130 _aligned_free( (void *)(mem) );
131 }
132 #else
133 {
134 //delete [] mem;
135 free( (void *)(mem) );
136 }
137 #endif
138
139 // TODO: for mingw, use __mingw_aligned_free
140 }
141
142
143
144 template<typename eT>
145 arma_inline
146 bool
is_aligned(const eT * mem)147 memory::is_aligned(const eT* mem)
148 {
149 #if (defined(ARMA_HAVE_ICC_ASSUME_ALIGNED) || defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)) && !defined(ARMA_DONT_CHECK_ALIGNMENT)
150 {
151 return (sizeof(std::size_t) >= sizeof(eT*)) ? ((std::size_t(mem) & 0x0F) == 0) : false;
152 }
153 #else
154 {
155 arma_ignore(mem);
156
157 return false;
158 }
159 #endif
160 }
161
162
163
164 template<typename eT>
165 arma_inline
166 void
mark_as_aligned(eT * & mem)167 memory::mark_as_aligned(eT*& mem)
168 {
169 #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED)
170 {
171 __assume_aligned(mem, 16);
172 }
173 #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)
174 {
175 mem = (eT*)__builtin_assume_aligned(mem, 16);
176 }
177 #else
178 {
179 arma_ignore(mem);
180 }
181 #endif
182
183 // TODO: MSVC? __assume( (mem & 0x0F) == 0 );
184 //
185 // http://comments.gmane.org/gmane.comp.gcc.patches/239430
186 // GCC __builtin_assume_aligned is similar to ICC's __assume_aligned,
187 // so for lvalue first argument ICC's __assume_aligned can be emulated using
188 // #define __assume_aligned(lvalueptr, align) lvalueptr = __builtin_assume_aligned (lvalueptr, align)
189 //
190 // http://www.inf.ethz.ch/personal/markusp/teaching/263-2300-ETH-spring11/slides/class19.pdf
191 // http://software.intel.com/sites/products/documentation/hpc/composerxe/en-us/cpp/lin/index.htm
192 // http://d3f8ykwhia686p.cloudfront.net/1live/intel/CompilerAutovectorizationGuide.pdf
193 }
194
195
196
197 template<typename eT>
198 arma_inline
199 void
mark_as_aligned(const eT * & mem)200 memory::mark_as_aligned(const eT*& mem)
201 {
202 #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED)
203 {
204 __assume_aligned(mem, 16);
205 }
206 #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)
207 {
208 mem = (const eT*)__builtin_assume_aligned(mem, 16);
209 }
210 #else
211 {
212 arma_ignore(mem);
213 }
214 #endif
215 }
216
217
218
219 //! @}
220