1 // -*- C++ -*-
2 /***************************************************************************
3  * blitz/array/reduce.h   Reductions of an array (or array expression) in a
4  *                        single rank: sum, mean, min, minIndex, max, maxIndex,
5  *                        product, count, any, all
6  *
7  * $Id$
8  *
9  * Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org>
10  *
11  * This file is a part of Blitz.
12  *
13  * Blitz is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU Lesser General Public License
15  * as published by the Free Software Foundation, either version 3
16  * of the License, or (at your option) any later version.
17  *
18  * Blitz is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with Blitz.  If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Suggestions:          blitz-devel@lists.sourceforge.net
27  * Bugs:                 blitz-support@lists.sourceforge.net
28  *
29  * For more information, please see the Blitz++ Home Page:
30  *    https://sourceforge.net/projects/blitz/
31  *
32  ****************************************************************************/
33 #ifndef BZ_ARRAYREDUCE_H
34 #define BZ_ARRAYREDUCE_H
35 
36 #include <blitz/reduce.h>
37 #include <blitz/meta/vecassign.h>
38 
39 namespace blitz {
40 
41 template<bool needIndex,bool needInit> struct _bz_ReduceReset;
42 
43 template<>
44 struct _bz_ReduceReset<true,true> {
45     template<typename T_reduction,typename T_index,typename T_expr>
46     void operator()(T_reduction& reduce,const T_index& index,const T_expr& expr) {
47         reduce.reset(index,expr.first_value());
48     }
49 };
50 
51 template<>
52 struct _bz_ReduceReset<false,true> {
53     template<typename T_reduction,typename T_index,typename T_expr>
54     void operator()(T_reduction& reduce,const T_index&,const T_expr& expr) {
55         reduce.reset(expr.first_value());
56     }
57 };
58 
59 template<>
60 struct _bz_ReduceReset<true,false> {
61     template<typename T_reduction,typename T_index,typename T_expr>
62     void operator()(T_reduction& reduce,const T_index& index,const T_expr&) {
63         reduce.reset(index);
64     }
65 };
66 
67 template<>
68 struct _bz_ReduceReset<false,false> {
69     template<typename T_reduction,typename T_index,typename T_expr>
70     void operator()(T_reduction& reduce,const T_index&,const T_expr&) {
71         reduce.reset();
72     }
73 };
74 
75 
76 /** Expression template class for reductions. \todo We should be able
77     to do vectorization, at least for complete reduction. */
78 template<typename T_expr, int N_index, typename T_reduction>
79 class _bz_ArrayExprReduce {
80 
81 public:
82     typedef _bz_typename T_reduction::T_numtype T_numtype;
83 
84   // select return type
85   typedef typename unwrapET<typename T_expr::T_result>::T_unwrapped test;
86   typedef typename selectET<typename T_expr::T_typeprop,
87 			    T_numtype,
88 			    _bz_ArrayExprReduce<test, N_index, T_reduction> >::T_selected T_typeprop;
89   typedef typename unwrapET<T_typeprop>::T_unwrapped T_result;
90   typedef T_numtype T_optype;
91 
92     typedef T_expr      T_ctorArg1;
93     typedef T_reduction T_ctorArg2;
94   typedef int  T_range_result; // dummy
95 
96     static const int
97         numArrayOperands = T_expr::numArrayOperands,
98         numTVOperands = T_expr::numTVOperands,
99         numTMOperands = T_expr::numTMOperands,
100         numIndexPlaceholders = T_expr::numIndexPlaceholders + 1,
101       minWidth = simdTypes<T_numtype>::vecWidth,
102       maxWidth = simdTypes<T_numtype>::vecWidth,
103         rank_ = T_expr::rank_ - 1;
104 
105   /** Vectorization doesn't work for index expressions, so we can use
106       a dummy here. */
107   template<int N> struct tvresult {
108     typedef FastTV2Iterator<T_numtype, N> Type;
109   };
110 
111     _bz_ArrayExprReduce(const _bz_ArrayExprReduce& reduce)
112         : reduce_(reduce.reduce_), iter_(reduce.iter_), ordering_(reduce.ordering_) { }
113 
114     _bz_ArrayExprReduce(T_expr expr)
115         : iter_(expr)
116     { computeOrdering(); }
117 
118 #if 0
119     _bz_ArrayExprReduce(T_expr expr, T_reduction reduce)
120         : iter_(expr), reduce_(reduce)
121     { computeOrdering(); }
122 #endif
123 
124     int ascending(const int r) const { return iter_.ascending(r); }
125     int ordering(const int r)  const { return ordering_[r];       }
126     int lbound(const int r)    const { return iter_.lbound(r);    }
127     int ubound(const int r)    const { return iter_.ubound(r);    }
128     RectDomain<rank_> domain() const { return iter_.domain(); }
129 
130     template<int N_destRank>
131     T_numtype operator()(const TinyVector<int, N_destRank>& destIndex) const
132     {
133         BZPRECHECK(N_destRank == N_index,
134             "Array reduction performed over rank " << N_index
135             << " to produce a rank " << N_destRank << " expression." << endl
136             << "You must reduce over rank " << N_destRank << " instead.");
137 
138         TinyVector<int, N_destRank + 1> index;
139 
140         // This metaprogram copies elements 0..N-1 of destIndex into index
141         _bz_meta_vecAssign<N_index, 0>::assign(index, destIndex,
142             _bz_update<int,int>());
143 
144         int lbound = iter_.lbound(N_index);
145         int ubound = iter_.ubound(N_index);
146 
147         BZPRECHECK((lbound != tiny(int())) && (ubound != huge(int())),
148            "Array reduction performed over rank " << N_index
149            << " is unbounded." << endl
150            << "There must be an array object in the expression being reduced"
151            << endl << "which provides a bound in rank " << N_index << ".");
152 
153         // If we are doing minIndex/maxIndex, initialize with lower bound
154 
155         _bz_ReduceReset<T_reduction::needIndex,T_reduction::needInit> reset;
156         reset(reduce_,lbound,iter_);
157 
158         for (index[N_index]=lbound; index[N_index]<=ubound; ++index[N_index]) {
159             if (!reduce_(iter_(index), index[N_index]))
160                 break;
161         }
162 
163         return reduce_.result(ubound-lbound+1);
164     }
165 
166     // If you have a precondition failure on these routines, it means
167     // you are trying to use stack iteration mode on an expression
168     // which contains an index placeholder.  You must use index
169     // iteration mode instead.
170 
171     int operator*()        const {
172       BZPRECHECK(0,"Can't use stack iteration on a reduction."); return 0; }
173     int suggestStride(int) const {
174       BZPRECHECK(0,"Can't use stack iteration on a reduction."); return 0; }
175 
176     void push(int)           const {
177       BZPRECHECK(0,"Can't use stack iteration on a reduction."); }
178     void pop(int)            const {
179       BZPRECHECK(0,"Can't use stack iteration on a reduction."); }
180     void advance()           const {
181       BZPRECHECK(0,"Can't use stack iteration on a reduction."); }
182     void advance(int)        const {
183       BZPRECHECK(0,"Can't use stack iteration on a reduction."); }
184     void loadStride(int)     const {
185       BZPRECHECK(0,"Can't use stack iteration on a reduction."); }
186     void advanceUnitStride() const {
187       BZPRECHECK(0,"Can't use stack iteration on a reduction."); }
188 
189     template<int N_rank>
190     void moveTo(const TinyVector<int,N_rank>&) const {
191       BZPRECHECK(0,"Stencils of reductions are not implemented"); }
192 
193     bool isUnitStride(int)    const {
194       BZPRECHECK(0,"Can't use stack iteration on a reduction."); return false; }
195     bool isUnitStride()       const {
196       BZPRECHECK(0,"Can't use stack iteration on a reduction."); return false; }
197     bool canCollapse(int,int) const {
198       BZPRECHECK(0,"Can't use stack iteration on a reduction."); return false; }
199     bool isStride(int,int)    const {
200       BZPRECHECK(0,"Can't use stack iteration on a reduction."); return true;  }
201 
202     T_numtype operator[](int) const {
203       BZPRECHECK(0,"Can't use stack iteration on a reduction."); return T_numtype(); }
204     T_numtype fastRead(int)   const {
205       BZPRECHECK(0,"Can't use stack iteration on a reduction."); return T_numtype(); }
206 
207   template<int N>
208   typename tvresult<N>::Type fastRead_tv(int) const {
209     BZPRECHECK(0,"Can't use stack iteration on an index mapping.");
210     return TinyVector<T_numtype, N>();
211     }
212 
213     /** Determining whether the resulting expression is aligned is
214 	difficult, so to be safe we say no. It shouldn't be attempted
215 	anyway, though. */
216     bool isVectorAligned(diffType offset) const {
217       return false; }
218 
219     // don't know how to define these, so stencil expressions won't work
220     T_result shift(int offset, int dim) const
221   { BZPRECHECK(0,"Stencils of reductions are not implemented"); return T_numtype(); }
222     T_result shift(int offset1, int dim1,int offset2, int dim2) const
223   { BZPRECHECK(0,"Stencils of reductions are not implemented"); return T_numtype(); }
224     void _bz_offsetData(sizeType i) { BZPRECONDITION(0); }
225 
226   // Unclear how to define this, and stencils don't work anyway
227   T_range_result operator()(RectDomain<rank_> d) const
228   { BZPRECHECK(0,"Stencils of reductions are not implemented");
229     return T_range_result();  }
230 
231     void prettyPrint(std::string &str, prettyPrintFormat& format) const
232     {
233         // NEEDS_WORK-- do real formatting for reductions
234         str += "reduce[NEEDS_WORK](";
235         iter_.prettyPrint(str,format);
236         str += ")";
237     }
238 
239   /** \todo do a real shape check (tricky) */
240     template<typename T_shape>
241     bool shapeCheck(const T_shape&) const
242     {
243         return true;
244     }
245 
246   // sliceinfo for expressions
247   template<typename T1, typename T2 = nilArraySection,
248 	   class T3 = nilArraySection, typename T4 = nilArraySection,
249 	   class T5 = nilArraySection, typename T6 = nilArraySection,
250 	   class T7 = nilArraySection, typename T8 = nilArraySection,
251 	   class T9 = nilArraySection, typename T10 = nilArraySection,
252 	   class T11 = nilArraySection>
253   class SliceInfo {
254   public:
255     typedef typename T_expr::template SliceInfo<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11>::T_slice T_slice1;
256     typedef _bz_ArrayExprReduce<T_slice1, N_index, T_reduction> T_slice;
257 };
258 
259     template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6,
260         typename T7, typename T8, typename T9, typename T10, typename T11>
261     typename SliceInfo<T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11>::T_slice
262     operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const
263     {
264       // for slicing reduction results, we would need to set the
265       // dimension reduced over to Range::all(). That's not easy to do
266       // because it requires us to change the type of one of the rn's.
267       BZPRECONDITION(0);
268     }
269 
270 private:
271     _bz_ArrayExprReduce() { }
272 
273   /** Method for properly initializing the ordering values. \todo If
274       the expression being reduced consist of arrays with different
275       orderings, the call to iter_.ordering() will fail with a
276       "different orderings" error. But just like it can happen that
277       ordering values are missing from the expression, it seems
278       equally valid that ordering is indefinite in cases where the
279       expression has differing values. This doesn't prevent us from
280       assigning the expression to an array, and it shouldn't prevent
281       the expression from being used in a reduction either. (This is
282       bug 2058441.) */
283     void computeOrdering()
284     {
285         TinyVector<bool,rank_> in_ordering;
286         in_ordering = false;
287 
288         int j = 0;
289         for (int i=0; i<rank_; ++i) {
290             const int orderingj = iter_.ordering(i);
291             if (orderingj != tiny(int()) && orderingj < rank_ && !in_ordering(orderingj)) {
292                 // unique value in ordering array
293                 in_ordering(orderingj) = true;
294                 ordering_(j++) = orderingj;
295             }
296         }
297 
298         // It is possible that ordering is not a permutation of 0,...,rank-1.
299         // In that case j will be less than rank. We fill in ordering with
300         // the unused values in decreasing order.
301         for (int i = rank_; j < rank_; ++j) {
302             while (in_ordering(--i)); // find an unused index
303             ordering_(j) = i;
304         }
305     }
306 
307     T_reduction reduce_;
308     T_expr iter_;
309     TinyVector<int,rank_> ordering_;
310 };
311 
312 #define BZ_DECL_ARRAY_PARTIAL_REDUCE(fn,reduction)                      \
313 template<typename T_expr, int N_index>                                  \
314 inline                                                                  \
315  _bz_ArrayExpr<_bz_ArrayExprReduce<_bz_typename blitz::asExpr<T_expr>::T_expr, \
316 				   N_index,				\
317 				   reduction<_bz_typename T_expr::T_numtype> > > \
318  fn(const blitz::ETBase<T_expr>& expr,				\
319     const IndexPlaceholder<N_index>&)					\
320 {                                                                       \
321   return _bz_ArrayExprReduce<_bz_typename blitz::asExpr<T_expr>::T_expr, \
322 			     N_index,					\
323 			     reduction<_bz_typename T_expr::T_numtype> > \
324     (blitz::asExpr<T_expr>::getExpr(expr.unwrap()));		\
325 }
326 
327 BZ_DECL_ARRAY_PARTIAL_REDUCE(sum,      ReduceSum)
328 BZ_DECL_ARRAY_PARTIAL_REDUCE(mean,     ReduceMean)
329 BZ_DECL_ARRAY_PARTIAL_REDUCE((min),    ReduceMin)
330 BZ_DECL_ARRAY_PARTIAL_REDUCE(minIndex, ReduceMinIndex)
331 BZ_DECL_ARRAY_PARTIAL_REDUCE((max),    ReduceMax)
332 BZ_DECL_ARRAY_PARTIAL_REDUCE(maxIndex, ReduceMaxIndex)
333 BZ_DECL_ARRAY_PARTIAL_REDUCE(product,  ReduceProduct)
334 BZ_DECL_ARRAY_PARTIAL_REDUCE(count,    ReduceCount)
335 BZ_DECL_ARRAY_PARTIAL_REDUCE(any,      ReduceAny)
336 BZ_DECL_ARRAY_PARTIAL_REDUCE(all,      ReduceAll)
337 BZ_DECL_ARRAY_PARTIAL_REDUCE(first,    ReduceFirst)
338 BZ_DECL_ARRAY_PARTIAL_REDUCE(last,     ReduceLast)
339 
340 /*
341  * Complete reductions
342  */
343 
344 // Prototype of reduction functions
345 template<typename T_expr, typename T_reduction>
346 _bz_typename T_reduction::T_resulttype
347 _bz_ArrayExprFullReduce(T_expr expr, T_reduction reduction);
348 
349 template<typename T_expr, typename T_reduction>
350 _bz_typename T_reduction::T_resulttype
351 _bz_reduceWithIndexTraversal(T_expr expr, T_reduction reduction);
352 
353 template<typename T_expr, typename T_reduction>
354 _bz_typename T_reduction::T_resulttype
355 _bz_reduceWithIndexVectorTraversal(T_expr expr, T_reduction reduction);
356 
357 #define BZ_DECL_ARRAY_FULL_REDUCE(fn,reduction)				\
358 template<typename T_expr>                                               \
359  _bz_inline_et								\
360 _bz_typename reduction<_bz_typename T_expr::T_numtype>::T_resulttype    \
361  fn(const blitz::ETBase<T_expr>& expr)				\
362 {                                                                       \
363   return _bz_ArrayExprFullReduce					\
364     (blitz::asExpr<T_expr>::getExpr(expr.unwrap()),		\
365      reduction<_bz_typename T_expr::T_numtype>());			\
366 }                                                                       \
367 
368 BZ_DECL_ARRAY_FULL_REDUCE(sum,      ReduceSum)
369 BZ_DECL_ARRAY_FULL_REDUCE(mean,     ReduceMean)
370 BZ_DECL_ARRAY_FULL_REDUCE((min),    ReduceMin)
371 BZ_DECL_ARRAY_FULL_REDUCE((max),    ReduceMax)
372 BZ_DECL_ARRAY_FULL_REDUCE((minmax), ReduceMinMax)
373 BZ_DECL_ARRAY_FULL_REDUCE(product,  ReduceProduct)
374 BZ_DECL_ARRAY_FULL_REDUCE(count,    ReduceCount)
375 BZ_DECL_ARRAY_FULL_REDUCE(any,      ReduceAny)
376 BZ_DECL_ARRAY_FULL_REDUCE(all,      ReduceAll)
377 BZ_DECL_ARRAY_FULL_REDUCE(first,    ReduceFirst)
378 BZ_DECL_ARRAY_FULL_REDUCE(last,     ReduceLast)
379 
380 // Special versions of complete reductions: minIndex and
381 // maxIndex
382 
383 #define BZ_DECL_ARRAY_FULL_REDUCE_INDEXVECTOR(fn,reduction)             \
384 template<typename T_expr>                                               \
385  _bz_inline_et								\
386  _bz_typename reduction<_bz_typename T_expr::T_numtype,			\
387 			T_expr::rank_>::T_resulttype			\
388  fn(const blitz::ETBase<T_expr>& expr)				\
389 {                                                                       \
390   return _bz_reduceWithIndexVectorTraversal				\
391     (blitz::asExpr<T_expr>::getExpr(expr.unwrap()),		\
392      reduction<_bz_typename T_expr::T_numtype, T_expr::rank_>());	\
393 }
394 
395 BZ_DECL_ARRAY_FULL_REDUCE_INDEXVECTOR(minIndex, ReduceMinIndexVector)
396 BZ_DECL_ARRAY_FULL_REDUCE_INDEXVECTOR(maxIndex, ReduceMaxIndexVector)
397 
398 }
399 
400 #include <blitz/array/reduce.cc>
401 
402 #endif // BZ_ARRAYREDUCE_H
403