1 // SPDX-License-Identifier: Apache-2.0
2 //
3 // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
4 // Copyright 2008-2016 National ICT Australia (NICTA)
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 // ------------------------------------------------------------------------
17 
18 
19 //! \addtogroup memory
20 //! @{
21 
22 
23 class memory
24   {
25   public:
26 
27   template<typename eT> inline arma_malloc static eT* acquire(const uword n_elem);
28 
29   template<typename eT> arma_inline static void release(eT* mem);
30 
31   template<typename eT> arma_inline static bool      is_aligned(const eT*  mem);
32   template<typename eT> arma_inline static void mark_as_aligned(      eT*& mem);
33   template<typename eT> arma_inline static void mark_as_aligned(const eT*& mem);
34   };
35 
36 
37 
38 template<typename eT>
39 inline
40 arma_malloc
41 eT*
acquire(const uword n_elem)42 memory::acquire(const uword n_elem)
43   {
44   if(n_elem == 0)  { return nullptr; }
45 
46   arma_debug_check
47     (
48     ( size_t(n_elem) > (std::numeric_limits<size_t>::max() / sizeof(eT)) ),
49     "arma::memory::acquire(): requested size is too large"
50     );
51 
52   eT* out_memptr;
53 
54   #if   defined(ARMA_ALIEN_MEM_ALLOC_FUNCTION)
55     {
56     out_memptr = (eT *) ARMA_ALIEN_MEM_ALLOC_FUNCTION(sizeof(eT)*n_elem);
57     }
58   #elif defined(ARMA_USE_TBB_ALLOC)
59     {
60     out_memptr = (eT *) scalable_malloc(sizeof(eT)*n_elem);
61     }
62   #elif defined(ARMA_USE_MKL_ALLOC)
63     {
64     out_memptr = (eT *) mkl_malloc( sizeof(eT)*n_elem, 32 );
65     }
66   #elif defined(ARMA_HAVE_POSIX_MEMALIGN)
67     {
68     eT* memptr = nullptr;
69 
70     const size_t n_bytes   = sizeof(eT)*size_t(n_elem);
71     const size_t alignment = (n_bytes >= size_t(1024)) ? size_t(32) : size_t(16);
72 
73     // TODO: investigate apparent memory leak when using alignment >= 64 (as shown on Fedora 28, glibc 2.27)
74     int status = posix_memalign((void **)&memptr, ( (alignment >= sizeof(void*)) ? alignment : sizeof(void*) ), n_bytes);
75 
76     out_memptr = (status == 0) ? memptr : nullptr;
77     }
78   #elif defined(_MSC_VER)
79     {
80     //out_memptr = (eT *) malloc(sizeof(eT)*n_elem);
81     //out_memptr = (eT *) _aligned_malloc( sizeof(eT)*n_elem, 16 );  // lives in malloc.h
82 
83     const size_t n_bytes   = sizeof(eT)*size_t(n_elem);
84     const size_t alignment = (n_bytes >= size_t(1024)) ? size_t(32) : size_t(16);
85 
86     out_memptr = (eT *) _aligned_malloc( n_bytes, alignment );
87     }
88   #else
89     {
90     //return ( new(std::nothrow) eT[n_elem] );
91     out_memptr = (eT *) malloc(sizeof(eT)*n_elem);
92     }
93   #endif
94 
95   // TODO: for mingw, use __mingw_aligned_malloc
96 
97   arma_check_bad_alloc( (out_memptr == nullptr), "arma::memory::acquire(): out of memory" );
98 
99   return out_memptr;
100   }
101 
102 
103 
104 template<typename eT>
105 arma_inline
106 void
release(eT * mem)107 memory::release(eT* mem)
108   {
109   if(mem == nullptr)  { return; }
110 
111   #if   defined(ARMA_ALIEN_MEM_FREE_FUNCTION)
112     {
113     ARMA_ALIEN_MEM_FREE_FUNCTION( (void *)(mem) );
114     }
115   #elif defined(ARMA_USE_TBB_ALLOC)
116     {
117     scalable_free( (void *)(mem) );
118     }
119   #elif defined(ARMA_USE_MKL_ALLOC)
120     {
121     mkl_free( (void *)(mem) );
122     }
123   #elif defined(ARMA_HAVE_POSIX_MEMALIGN)
124     {
125     free( (void *)(mem) );
126     }
127   #elif defined(_MSC_VER)
128     {
129     //free( (void *)(mem) );
130     _aligned_free( (void *)(mem) );
131     }
132   #else
133     {
134     //delete [] mem;
135     free( (void *)(mem) );
136     }
137   #endif
138 
139   // TODO: for mingw, use __mingw_aligned_free
140   }
141 
142 
143 
144 template<typename eT>
145 arma_inline
146 bool
is_aligned(const eT * mem)147 memory::is_aligned(const eT* mem)
148   {
149   #if (defined(ARMA_HAVE_ICC_ASSUME_ALIGNED) || defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)) && !defined(ARMA_DONT_CHECK_ALIGNMENT)
150     {
151     return (sizeof(std::size_t) >= sizeof(eT*)) ? ((std::size_t(mem) & 0x0F) == 0) : false;
152     }
153   #else
154     {
155     arma_ignore(mem);
156 
157     return false;
158     }
159   #endif
160   }
161 
162 
163 
164 template<typename eT>
165 arma_inline
166 void
mark_as_aligned(eT * & mem)167 memory::mark_as_aligned(eT*& mem)
168   {
169   #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED)
170     {
171     __assume_aligned(mem, 16);
172     }
173   #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)
174     {
175     mem = (eT*)__builtin_assume_aligned(mem, 16);
176     }
177   #else
178     {
179     arma_ignore(mem);
180     }
181   #endif
182 
183   // TODO: MSVC?  __assume( (mem & 0x0F) == 0 );
184   //
185   // http://comments.gmane.org/gmane.comp.gcc.patches/239430
186   // GCC __builtin_assume_aligned is similar to ICC's __assume_aligned,
187   // so for lvalue first argument ICC's __assume_aligned can be emulated using
188   // #define __assume_aligned(lvalueptr, align) lvalueptr = __builtin_assume_aligned (lvalueptr, align)
189   //
190   // http://www.inf.ethz.ch/personal/markusp/teaching/263-2300-ETH-spring11/slides/class19.pdf
191   // http://software.intel.com/sites/products/documentation/hpc/composerxe/en-us/cpp/lin/index.htm
192   // http://d3f8ykwhia686p.cloudfront.net/1live/intel/CompilerAutovectorizationGuide.pdf
193   }
194 
195 
196 
197 template<typename eT>
198 arma_inline
199 void
mark_as_aligned(const eT * & mem)200 memory::mark_as_aligned(const eT*& mem)
201   {
202   #if defined(ARMA_HAVE_ICC_ASSUME_ALIGNED)
203     {
204     __assume_aligned(mem, 16);
205     }
206   #elif defined(ARMA_HAVE_GCC_ASSUME_ALIGNED)
207     {
208     mem = (const eT*)__builtin_assume_aligned(mem, 16);
209     }
210   #else
211     {
212     arma_ignore(mem);
213     }
214   #endif
215   }
216 
217 
218 
219 //! @}
220