1 /***************************************************************************
2 gdlarray.hpp - basic typedefs
3 -------------------
4 begin : July 22 2002
5 copyright : (C) 2002 by Marc Schellens
6 email : m_schellens@users.sf.net
7 ***************************************************************************/
8
9 /***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
18 #ifndef GDLARRAY_HPP_
19 #define GDLARRAY_HPP_
20
21 // #define GDLARRAY_CACHE
22 #undef GDLARRAY_CACHE
23
24 //#define GDLARRAY_DEBUG
25 #undef GDLARRAY_DEBUG
26
27 // for complex (of POD)
28 const bool TreatPODComplexAsPOD = true;
29
30 template <typename T, bool IsPOD>
31 class GDLArray
32 {
33 private:
34 enum GDLArrayConstants
35 {
36 smallArraySize = 27,
37 maxCache = 1000 * 1000 // ComplexDbl is 16 bytes
38 };
39
40 typedef T Ty;
41
42 #ifdef USE_EIGEN
43 EIGEN_ALIGN16 char scalarBuf[ smallArraySize * sizeof(Ty)];
44 #else
45 char scalarBuf[ smallArraySize * sizeof(Ty)];
46 #endif
47
InitScalar()48 Ty* InitScalar()
49 {
50 assert( sz <= smallArraySize);
51 if( IsPOD)
52 {
53 return reinterpret_cast<Ty*>(scalarBuf);
54 }
55 else
56 {
57 Ty* b = reinterpret_cast<Ty*>(scalarBuf);
58 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
59 for( int i = 0; i<sz; ++i) new (&(b[ i])) Ty();
60 return b;
61 }
62 }
63
64 #ifdef GDLARRAY_CACHE
65 #error "GDLARRAY_CACHE defined"
66 static SizeT cacheSize;
67 static Ty* cache;
68 static Ty* Cached( SizeT newSize);
69 #endif
70
71 Ty* buf;
72 SizeT sz;
73
New(SizeT s)74 Ty* New( SizeT s)
75 {
76 // We should align all our arrays on the boundary that will be beneficial for the acceleration of the machine GDL is built,
77 // as sse and other avx need 32,64..512 alignment. Not necessary on the EIGEN_ALIGN_16, and not only if we use Eigen:: as some code (median filter, random)
78 // uses hardware acceleration independently of whatever speedup Eigen:: may propose. Note that according to http://eigen.tuxfamily.org/dox/group__CoeffwiseMathFunctions.html
79 // Eigen:: may eventually use sse2 or avx on reals but not doubles, etc.
80 // As Eigen::internal::aligned_new is SLOW FOR NON-PODS and sdt::complex is a NON-POD for the compiler (but not for us), we use gdlAlignedMalloc for all PODS.
81 // Normally, Everything should be allocated using gdlAlignedMalloc with the 'good' alignment, not only in the USE_EIGEN case.
82 // Unfortunately gdlAlignedMalloc uses Eigen::internal::alogned_malloc at the moment. Todo Next.
83 #ifdef USE_EIGEN
84 if (IsPOD) return (Ty*) gdlAlignedMalloc(s*sizeof(Ty)); else return Eigen::internal::aligned_new<Ty>( s);
85 #else
86 return new Ty[ s];
87 #endif
88 }
89
90 public:
GDLArray()91 GDLArray() throw() : buf( NULL), sz( 0) {}
92
93 #ifndef GDLARRAY_CACHE
94
~GDLArray()95 ~GDLArray() throw()
96 {
97 if( IsPOD)
98 {
99 #ifdef USE_EIGEN
100 if ( buf != reinterpret_cast<Ty*>(scalarBuf)) gdlAlignedFree(buf);
101 // Eigen::internal::aligned_delete( buf, sz);
102 #else
103 if( buf != reinterpret_cast<Ty*>(scalarBuf))
104 delete[] buf; // buf == NULL also possible
105 #endif
106 // no cleanup of "buf" here
107 }
108 else
109 {
110 #ifdef USE_EIGEN
111 if( buf != reinterpret_cast<Ty*>(scalarBuf)) Eigen::internal::aligned_delete( buf, sz);
112 else
113 for( int i = 0; i<sz; ++i) buf[i].~Ty();
114 #else
115 if( buf != reinterpret_cast<Ty*>(scalarBuf)) delete[] buf; // buf == NULL also possible
116 else
117 for( int i = 0; i<sz; ++i) buf[i].~Ty();
118 #endif
119 }
120 }
121
GDLArray(const GDLArray & cp)122 GDLArray( const GDLArray& cp) : sz( cp.size())
123 {
124 try {
125 buf = (cp.size() > smallArraySize) ? New(cp.size()) /*new Ty[ cp.size()]*/ : InitScalar();
126 } catch (std::bad_alloc&) { ThrowGDLException("Array requires more memory than available"); }
127 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
128 for( SizeT i=0; i<sz; ++i) buf[ i] = cp.buf[ i];
129 }
130
GDLArray(SizeT s,bool dummy)131 GDLArray( SizeT s, bool dummy) : sz( s)
132 {
133 try {
134 buf = (s > smallArraySize) ? New(s) /*T[ s]*/ : InitScalar();
135 } catch (std::bad_alloc&) { ThrowGDLException("Array requires more memory than available"); }
136 }
137
GDLArray(T val,SizeT s)138 GDLArray( T val, SizeT s) : sz( s)
139 {
140 try {
141 buf = (s > smallArraySize) ? New(s) /*T[ s]*/ : InitScalar();
142 } catch (std::bad_alloc&) { ThrowGDLException("Array requires more memory than available"); }
143 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
144 for( SizeT i=0; i<sz; ++i) buf[ i] = val;
145 }
146
GDLArray(const T * arr,SizeT s)147 GDLArray( const T* arr, SizeT s) : sz( s)
148 {
149 try {
150 buf = (s > smallArraySize) ? New(s) /*new Ty[ s]*/: InitScalar();
151 } catch (std::bad_alloc&) { ThrowGDLException("Array requires more memory than available"); }
152 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
153 for( SizeT i=0; i<sz; ++i) buf[ i] = arr[ i];
154 }
155
156 #else // GDLARRAY_CACHE
157
158 // use definition in datatypes.cpp
159 GDLArray( const GDLArray& cp) ;
160 GDLArray( SizeT s, bool b) ;
161 GDLArray( T val, SizeT s) ;
162 GDLArray( const T* arr, SizeT s) ;
163 ~GDLArray() throw();
164
165 #endif // GDLARRAY_CACHE
166
167 // scalar
GDLArray(const T & s)168 explicit GDLArray( const T& s) throw() : sz( 1)
169 {
170 if( IsPOD)
171 {
172 buf = reinterpret_cast<Ty*>(scalarBuf);
173 buf[0] = s;
174 }
175 else
176 {
177 Ty* b = reinterpret_cast<Ty*>(scalarBuf);
178 new (&(b[ 0])) Ty( s);
179 buf = b;
180 }
181 }
182
operator [](SizeT ix)183 T& operator[]( SizeT ix) throw()
184 {
185 assert( ix < sz);
186 return buf[ ix];
187 }
operator [](SizeT ix) const188 const T& operator[]( SizeT ix) const throw()
189 {
190 assert( ix < sz);
191 return buf[ ix];
192 }
193
194 // private: // disable
195 // only used (indirect) by DStructGDL::DStructGDL(const DStructGDL& d_)
InitFrom(const GDLArray & right)196 void InitFrom( const GDLArray& right )
197 {
198 assert( &right != this);
199 assert ( sz == right.size() );
200 if( IsPOD)
201 {
202 std::memcpy(buf,right.buf,sz*sizeof(Ty));
203 }
204 else
205 {
206 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
207 for ( SizeT i=0; i<sz; ++i )
208 buf[ i] = right.buf[ i];
209 }
210 }
211
operator =(const GDLArray & right)212 GDLArray& operator= ( const GDLArray& right )
213 {
214 assert( this != &right);
215 assert( sz == right.size());
216 if( IsPOD)
217 {
218 std::memcpy(buf,right.buf,sz*sizeof(Ty));
219 }
220 else
221 {
222 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
223 for ( SizeT i=0; i<sz; ++i )
224 buf[ i] = right.buf[ i];
225 }
226 return *this;
227 }
228
operator +=(const GDLArray & right)229 GDLArray& operator+=( const GDLArray& right) throw()
230 {
231 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
232 for( SizeT i=0; i<sz; ++i)
233 buf[ i] += right.buf[ i];
234 return *this;
235 }
operator -=(const GDLArray & right)236 GDLArray& operator-=( const GDLArray& right) throw()
237 {
238 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
239 for( SizeT i=0; i<sz; ++i)
240 buf[ i] -= right.buf[ i];
241 return *this;
242 }
243
operator +=(const T & right)244 GDLArray& operator+=( const T& right) throw()
245 {
246 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
247 for( SizeT i=0; i<sz; ++i)
248 buf[ i] += right;
249 return *this;
250 }
operator -=(const T & right)251 GDLArray& operator-=( const T& right) throw()
252 {
253 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
254 for( SizeT i=0; i<sz; ++i)
255 buf[ i] -= right;
256 return *this;
257 }
258
SetBuffer(T * b)259 void SetBuffer( T* b) throw()
260 {
261 buf = b;
262 }
GetBuffer()263 T* GetBuffer() throw()
264 {
265 return buf;
266 }
SetBufferSize(SizeT s)267 void SetBufferSize( SizeT s) throw()
268 {
269 sz = s;
270 }
271
size() const272 SizeT size() const throw()
273 {
274 return sz;
275 }
276
SetSize(SizeT newSz)277 void SetSize( SizeT newSz ) // only used in DStructGDL::DStructGDL( const string& name_) (dstructgdl.cpp)
278 {
279 assert ( sz == 0);
280 sz = newSz;
281 if ( sz > smallArraySize )
282 {
283 try
284 {
285 buf = New(sz) /*new T[ newSz]*/;
286 }
287 catch ( std::bad_alloc& )
288 {
289 ThrowGDLException ( "Array requires more memory than available" );
290 }
291 }
292 else
293 {
294 // default constructed instances have buf == NULL and size == 0
295 // make sure buf is set corectly if such instances are resized
296 buf = InitScalar();
297 }
298 }
299
300 // protected:
301 // void assert(ix<sz arg1);
302 // protected:
303 // void assert(ix<sz arg1);
304 }; // GDLArray
305
306 #endif
307