1 /***************************************************************************
2                           gdlarray.hpp  -  basic typedefs
3                              -------------------
4     begin                : July 22 2002
5     copyright            : (C) 2002 by Marc Schellens
6     email                : m_schellens@users.sf.net
7 ***************************************************************************/
8 
9 /***************************************************************************
10  *                                                                         *
11  *   This program is free software; you can redistribute it and/or modify  *
12  *   it under the terms of the GNU General Public License as published by  *
13  *   the Free Software Foundation; either version 2 of the License, or     *
14  *   (at your option) any later version.                                   *
15  *                                                                         *
16  ***************************************************************************/
17 
18 #ifndef GDLARRAY_HPP_
19 #define GDLARRAY_HPP_
20 
21 // #define GDLARRAY_CACHE
22 #undef GDLARRAY_CACHE
23 
24 //#define GDLARRAY_DEBUG
25 #undef GDLARRAY_DEBUG
26 
27 // for complex (of POD)
28 const bool TreatPODComplexAsPOD = true;
29 
30 template <typename T, bool IsPOD>
31 class GDLArray
32 {
33 private:
34 	enum GDLArrayConstants
35 	{
36 		smallArraySize = 27,
37 		maxCache = 1000 * 1000 // ComplexDbl is 16 bytes
38 	};
39 
40 	typedef T Ty;
41 
42 #ifdef USE_EIGEN
43   EIGEN_ALIGN16 char scalarBuf[ smallArraySize * sizeof(Ty)];
44 #else
45   char scalarBuf[ smallArraySize * sizeof(Ty)];
46 #endif
47 
InitScalar()48   Ty* InitScalar()
49   {
50     assert( sz <= smallArraySize);
51     if( IsPOD)
52     {
53       return reinterpret_cast<Ty*>(scalarBuf);
54     }
55     else
56     {
57       Ty* b = reinterpret_cast<Ty*>(scalarBuf);
58 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
59       for( int i = 0; i<sz; ++i) new (&(b[ i])) Ty();
60       return b;
61     }
62   }
63 
64 #ifdef GDLARRAY_CACHE
65 #error "GDLARRAY_CACHE defined"
66   static SizeT cacheSize;
67   static Ty* cache;
68   static Ty* Cached( SizeT newSize);
69 #endif
70 
71   Ty*   buf;
72   SizeT sz;
73 
New(SizeT s)74   Ty* New( SizeT s)
75   {
76 // We should align all our arrays on the boundary that will be beneficial for the acceleration of the machine GDL is built,
77 // as sse and other avx need 32,64..512 alignment. Not necessary on the EIGEN_ALIGN_16, and not only if we use Eigen:: as some code (median filter, random)
78 // uses hardware acceleration independently of whatever speedup Eigen:: may propose. Note that according to http://eigen.tuxfamily.org/dox/group__CoeffwiseMathFunctions.html
79 // Eigen:: may eventually use sse2 or avx on reals but not doubles, etc.
80 // As Eigen::internal::aligned_new is SLOW FOR NON-PODS and sdt::complex is a NON-POD for the compiler (but not for us), we use gdlAlignedMalloc for all PODS.
81 // Normally, Everything should be allocated using gdlAlignedMalloc with the 'good' alignment, not only in the USE_EIGEN case.
82 // Unfortunately gdlAlignedMalloc uses Eigen::internal::alogned_malloc at the moment. Todo Next.
83 #ifdef USE_EIGEN
84    if (IsPOD) return (Ty*) gdlAlignedMalloc(s*sizeof(Ty)); else return Eigen::internal::aligned_new<Ty>( s);
85 #else
86     return new Ty[ s];
87 #endif
88   }
89 
90 public:
GDLArray()91   GDLArray() throw() : buf( NULL), sz( 0) {}
92 
93 #ifndef GDLARRAY_CACHE
94 
~GDLArray()95   ~GDLArray() throw()
96   {
97   if( IsPOD)
98     {
99 #ifdef USE_EIGEN
100     if ( buf != reinterpret_cast<Ty*>(scalarBuf)) gdlAlignedFree(buf);
101 //	Eigen::internal::aligned_delete( buf, sz);
102 #else
103     if( buf != reinterpret_cast<Ty*>(scalarBuf))
104 	delete[] buf; // buf == NULL also possible
105 #endif
106     // no cleanup of "buf" here
107     }
108   else
109     {
110 #ifdef USE_EIGEN
111     if( buf != reinterpret_cast<Ty*>(scalarBuf)) Eigen::internal::aligned_delete( buf, sz);
112     else
113       for( int i = 0; i<sz; ++i) buf[i].~Ty();
114 #else
115     if( buf != reinterpret_cast<Ty*>(scalarBuf)) delete[] buf; // buf == NULL also possible
116     else
117       for( int i = 0; i<sz; ++i) buf[i].~Ty();
118 #endif
119     }
120   }
121 
GDLArray(const GDLArray & cp)122   GDLArray( const GDLArray& cp) : sz( cp.size())
123   {
124       try {
125 	buf = (cp.size() > smallArraySize) ? New(cp.size()) /*new Ty[ cp.size()]*/ : InitScalar();
126       } catch (std::bad_alloc&) { ThrowGDLException("Array requires more memory than available"); }
127 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
128       for( SizeT i=0; i<sz; ++i)	buf[ i] = cp.buf[ i];
129   }
130 
GDLArray(SizeT s,bool dummy)131   GDLArray( SizeT s, bool dummy) : sz( s)
132   {
133     try {
134       buf = (s > smallArraySize) ? New(s) /*T[ s]*/ : InitScalar();
135     } catch (std::bad_alloc&) { ThrowGDLException("Array requires more memory than available"); }
136   }
137 
GDLArray(T val,SizeT s)138   GDLArray( T val, SizeT s) : sz( s)
139   {
140     try {
141 	    buf = (s > smallArraySize) ? New(s) /*T[ s]*/ : InitScalar();
142     } catch (std::bad_alloc&) { ThrowGDLException("Array requires more memory than available"); }
143 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
144     for( SizeT i=0; i<sz; ++i) buf[ i] = val;
145   }
146 
GDLArray(const T * arr,SizeT s)147   GDLArray( const T* arr, SizeT s) : sz( s)
148   {
149       try {
150 	buf = (s > smallArraySize) ? New(s) /*new Ty[ s]*/: InitScalar();
151       } catch (std::bad_alloc&) { ThrowGDLException("Array requires more memory than available"); }
152 #pragma omp parallel for if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
153       for( SizeT i=0; i<sz; ++i)	buf[ i] = arr[ i];
154   }
155 
156 #else // GDLARRAY_CACHE
157 
158   // use definition in datatypes.cpp
159   GDLArray( const GDLArray& cp) ;
160   GDLArray( SizeT s, bool b) ;
161   GDLArray( T val, SizeT s) ;
162   GDLArray( const T* arr, SizeT s) ;
163   ~GDLArray() throw();
164 
165 #endif // GDLARRAY_CACHE
166 
167   // scalar
GDLArray(const T & s)168   explicit GDLArray( const T& s) throw() : sz( 1)
169   {
170     if( IsPOD)
171     {
172       buf = reinterpret_cast<Ty*>(scalarBuf);
173       buf[0] = s;
174     }
175     else
176     {
177       Ty* b = reinterpret_cast<Ty*>(scalarBuf);
178       new (&(b[ 0])) Ty( s);
179       buf = b;
180     }
181   }
182 
operator [](SizeT ix)183   T& operator[]( SizeT ix) throw()
184   {
185     assert( ix < sz);
186     return buf[ ix];
187   }
operator [](SizeT ix) const188   const T& operator[]( SizeT ix) const throw()
189   {
190     assert( ix < sz);
191     return buf[ ix];
192   }
193 
194 // private: // disable
195 // only used (indirect) by DStructGDL::DStructGDL(const DStructGDL& d_)
InitFrom(const GDLArray & right)196 void InitFrom( const GDLArray& right )
197 {
198   assert( &right != this);
199   assert ( sz == right.size() );
200   if( IsPOD)
201   {
202     std::memcpy(buf,right.buf,sz*sizeof(Ty));
203   }
204   else
205   {
206 #pragma omp parallel for   if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
207     for ( SizeT i=0; i<sz; ++i )
208 	buf[ i] = right.buf[ i];
209   }
210 }
211 
operator =(const GDLArray & right)212 GDLArray& operator= ( const GDLArray& right )
213 {
214   assert( this != &right);
215   assert( sz == right.size());
216   if( IsPOD)
217   {
218     std::memcpy(buf,right.buf,sz*sizeof(Ty));
219   }
220   else
221   {
222 #pragma omp parallel for   if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
223     for ( SizeT i=0; i<sz; ++i )
224       buf[ i] = right.buf[ i];
225   }
226   return *this;
227 }
228 
operator +=(const GDLArray & right)229   GDLArray& operator+=( const GDLArray& right) throw()
230   {
231 #pragma omp parallel for   if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
232     for( SizeT i=0; i<sz; ++i)
233       buf[ i] += right.buf[ i];
234     return *this;
235   }
operator -=(const GDLArray & right)236   GDLArray& operator-=( const GDLArray& right) throw()
237   {
238 #pragma omp parallel for   if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
239     for( SizeT i=0; i<sz; ++i)
240       buf[ i] -= right.buf[ i];
241     return *this;
242   }
243 
operator +=(const T & right)244   GDLArray& operator+=( const T& right) throw()
245   {
246 #pragma omp parallel for   if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
247     for( SizeT i=0; i<sz; ++i)
248       buf[ i] += right;
249     return *this;
250   }
operator -=(const T & right)251   GDLArray& operator-=( const T& right) throw()
252   {
253 #pragma omp parallel for   if (sz >= CpuTPOOL_MIN_ELTS && (CpuTPOOL_MAX_ELTS == 0 || CpuTPOOL_MAX_ELTS <= sz))
254     for( SizeT i=0; i<sz; ++i)
255       buf[ i] -= right;
256     return *this;
257   }
258 
SetBuffer(T * b)259   void SetBuffer( T* b) throw()
260   {
261     buf = b;
262   }
GetBuffer()263   T* GetBuffer() throw()
264   {
265     return buf;
266   }
SetBufferSize(SizeT s)267   void SetBufferSize( SizeT s) throw()
268   {
269     sz = s;
270   }
271 
size() const272   SizeT size() const throw()
273   {
274     return sz;
275   }
276 
SetSize(SizeT newSz)277   void SetSize( SizeT newSz ) // only used in DStructGDL::DStructGDL( const string& name_) (dstructgdl.cpp)
278   {
279     assert ( sz == 0);
280     sz = newSz;
281     if ( sz > smallArraySize )
282     {
283       try
284       {
285 	buf = New(sz) /*new T[ newSz]*/;
286       }
287       catch ( std::bad_alloc& )
288       {
289 	ThrowGDLException ( "Array requires more memory than available" );
290       }
291     }
292     else
293     {
294       // default constructed instances have buf == NULL and size == 0
295       // make sure buf is set corectly if such instances are resized
296       buf = InitScalar();
297     }
298   }
299 
300 // protected:
301 //     void assert(ix<sz arg1);
302 // protected:
303 //     void assert(ix<sz arg1);
304 }; // GDLArray
305 
306 #endif
307