1 //
2 // reduce.cc
3 //
4 // Copyright (C) 1996 Limit Point Systems, Inc.
5 //
6 // Author: Curtis Janssen <cljanss@limitpt.com>
7 // Maintainer: LPS
8 //
9 // This file is part of the SC Toolkit.
10 //
11 // The SC Toolkit is free software; you can redistribute it and/or modify
12 // it under the terms of the GNU Library General Public License as published by
13 // the Free Software Foundation; either version 2, or (at your option)
14 // any later version.
15 //
16 // The SC Toolkit is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU Library General Public License for more details.
20 //
21 // You should have received a copy of the GNU Library General Public License
22 // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to
23 // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24 //
25 // The U.S. Government is granted a limited license as per AL 91-7.
26 //
27 
28 #ifdef HAVE_CONFIG_H
29 #include <scconfig.h>
30 #endif
31 #include <util/group/message.h>
32 
33 using namespace sc;
34 
35 /////////////////////////////////////////////////////////////////////////
36 // instantiate templates
37 
38 #ifdef EXPLICIT_TEMPLATE_INSTANTIATION
39 template class GrpReduce<double>;
40 template class GrpReduce<unsigned int>;
41 template class GrpReduce<int>;
42 template class GrpReduce<long>;
43 template class GrpReduce<float>;
44 template class GrpReduce<short>;
45 template class GrpReduce<char>;
46 template class GrpReduce<unsigned char>;
47 template class GrpReduce<signed char>;
48 
49 template class GrpFunctionReduce<double>;
50 template class GrpFunctionReduce<unsigned int>;
51 template class GrpFunctionReduce<int>;
52 template class GrpFunctionReduce<long>;
53 template class GrpFunctionReduce<float>;
54 template class GrpFunctionReduce<short>;
55 template class GrpFunctionReduce<char>;
56 template class GrpFunctionReduce<unsigned char>;
57 template class GrpFunctionReduce<signed char>;
58 
59 template class GrpMinReduce<double>;
60 template class GrpMinReduce<unsigned int>;
61 template class GrpMinReduce<int>;
62 template class GrpMinReduce<long>;
63 template class GrpMinReduce<float>;
64 template class GrpMinReduce<short>;
65 template class GrpMinReduce<char>;
66 template class GrpMinReduce<unsigned char>;
67 template class GrpMinReduce<signed char>;
68 
69 template class GrpMaxReduce<double>;
70 template class GrpMaxReduce<unsigned int>;
71 template class GrpMaxReduce<int>;
72 template class GrpMaxReduce<long>;
73 template class GrpMaxReduce<float>;
74 template class GrpMaxReduce<short>;
75 template class GrpMaxReduce<char>;
76 template class GrpMaxReduce<unsigned char>;
77 template class GrpMaxReduce<signed char>;
78 
79 template class GrpSumReduce<double>;
80 template class GrpSumReduce<unsigned int>;
81 template class GrpSumReduce<int>;
82 template class GrpSumReduce<long>;
83 template class GrpSumReduce<float>;
84 template class GrpSumReduce<short>;
85 template class GrpSumReduce<char>;
86 template class GrpSumReduce<unsigned char>;
87 template class GrpSumReduce<signed char>;
88 
89 template class GrpProductReduce<double>;
90 template class GrpProductReduce<unsigned int>;
91 template class GrpProductReduce<int>;
92 template class GrpProductReduce<long>;
93 template class GrpProductReduce<float>;
94 template class GrpProductReduce<short>;
95 template class GrpProductReduce<char>;
96 template class GrpProductReduce<unsigned char>;
97 template class GrpProductReduce<signed char>;
98 
99 template class GrpArithmeticOrReduce<unsigned int>;
100 template class GrpArithmeticOrReduce<int>;
101 template class GrpArithmeticOrReduce<long>;
102 template class GrpArithmeticOrReduce<short>;
103 template class GrpArithmeticOrReduce<char>;
104 template class GrpArithmeticOrReduce<unsigned char>;
105 template class GrpArithmeticOrReduce<signed char>;
106 
107 template class GrpArithmeticAndReduce<unsigned int>;
108 template class GrpArithmeticAndReduce<int>;
109 template class GrpArithmeticAndReduce<long>;
110 template class GrpArithmeticAndReduce<short>;
111 template class GrpArithmeticAndReduce<char>;
112 template class GrpArithmeticAndReduce<unsigned char>;
113 template class GrpArithmeticAndReduce<signed char>;
114 
115 template class GrpArithmeticXOrReduce<unsigned int>;
116 template class GrpArithmeticXOrReduce<int>;
117 template class GrpArithmeticXOrReduce<long>;
118 template class GrpArithmeticXOrReduce<short>;
119 template class GrpArithmeticXOrReduce<char>;
120 template class GrpArithmeticXOrReduce<unsigned char>;
121 template class GrpArithmeticXOrReduce<signed char>;
122 #endif
123 
124 /////////////////////////////////////////////////////////////////////////
125 // sum reduction members
126 
127 template <class T>
128 void
do_sum(MessageGrp * grp,T * data,int n,T * tmp,int target)129 do_sum(MessageGrp* grp, T* data, int n, T* tmp, int target)
130 {
131   GrpSumReduce<T> gred;
132   grp->reduce(data, n, gred, tmp, target);
133 }
134 
135 void
sum(double * data,int n,double * tmp,int target)136 MessageGrp::sum(double* data, int n, double* tmp, int target)
137 {
138   do_sum(this, data, n, tmp, target);
139 }
140 
141 void
sum(unsigned int * data,int n,unsigned int * tmp,int target)142 MessageGrp::sum(unsigned int* data, int n, unsigned int* tmp, int target)
143 {
144   do_sum(this, data, n, tmp, target);
145 }
146 
147 void
sum(int * data,int n,int * tmp,int target)148 MessageGrp::sum(int* data, int n, int* tmp, int target)
149 {
150   do_sum(this, data, n, tmp, target);
151 }
152 
153 void
sum(char * data,int n,char * tmp,int target)154 MessageGrp::sum(char* data, int n, char* tmp, int target)
155 {
156   do_sum(this, data, n, tmp, target);
157 }
158 
159 void
sum(unsigned char * data,int n,unsigned char * tmp,int target)160 MessageGrp::sum(unsigned char* data, int n, unsigned char* tmp, int target)
161 {
162   do_sum(this, data, n, tmp, target);
163 }
164 
165 void
sum(signed char * data,int n,signed char * tmp,int target)166 MessageGrp::sum(signed char* data, int n, signed char* tmp, int target)
167 {
168   do_sum(this, data, n, tmp, target);
169 }
170 
171 /////////////////////////////////////////////////////////////////////////
172 // min reduction members
173 
174 template <class T>
175 void
do_max(MessageGrp * grp,T * data,int n,T * tmp,int target)176 do_max(MessageGrp* grp, T* data, int n, T* tmp, int target)
177 {
178   GrpMaxReduce<T> gred;
179   grp->reduce(data, n, gred, tmp, target);
180 }
181 
182 void
max(double * data,int n,double * tmp,int target)183 MessageGrp::max(double* data, int n, double* tmp, int target)
184 {
185   do_max(this, data, n, tmp, target);
186 }
187 
188 void
max(unsigned int * data,int n,unsigned int * tmp,int target)189 MessageGrp::max(unsigned int* data, int n, unsigned int* tmp, int target)
190 {
191   do_max(this, data, n, tmp, target);
192 }
193 
194 void
max(int * data,int n,int * tmp,int target)195 MessageGrp::max(int* data, int n, int* tmp, int target)
196 {
197   do_max(this, data, n, tmp, target);
198 }
199 
200 void
max(char * data,int n,char * tmp,int target)201 MessageGrp::max(char* data, int n, char* tmp, int target)
202 {
203   do_max(this, data, n, tmp, target);
204 }
205 
206 void
max(unsigned char * data,int n,unsigned char * tmp,int target)207 MessageGrp::max(unsigned char* data, int n, unsigned char* tmp, int target)
208 {
209   do_max(this, data, n, tmp, target);
210 }
211 
212 void
max(signed char * data,int n,signed char * tmp,int target)213 MessageGrp::max(signed char* data, int n, signed char* tmp, int target)
214 {
215   do_max(this, data, n, tmp, target);
216 }
217 
218 /////////////////////////////////////////////////////////////////////////
219 // max reduction members
220 
221 template <class T>
222 void
do_min(MessageGrp * grp,T * data,int n,T * tmp,int target)223 do_min(MessageGrp* grp, T* data, int n, T* tmp, int target)
224 {
225   GrpMinReduce<T> gred;
226   grp->reduce(data, n, gred, tmp, target);
227 }
228 
229 void
min(double * data,int n,double * tmp,int target)230 MessageGrp::min(double* data, int n, double* tmp, int target)
231 {
232   do_min(this, data, n, tmp, target);
233 }
234 
235 void
min(unsigned int * data,int n,unsigned int * tmp,int target)236 MessageGrp::min(unsigned int* data, int n, unsigned int* tmp, int target)
237 {
238   do_min(this, data, n, tmp, target);
239 }
240 
241 void
min(int * data,int n,int * tmp,int target)242 MessageGrp::min(int* data, int n, int* tmp, int target)
243 {
244   do_min(this, data, n, tmp, target);
245 }
246 
247 void
min(char * data,int n,char * tmp,int target)248 MessageGrp::min(char* data, int n, char* tmp, int target)
249 {
250   do_min(this, data, n, tmp, target);
251 }
252 
253 void
min(unsigned char * data,int n,unsigned char * tmp,int target)254 MessageGrp::min(unsigned char* data, int n, unsigned char* tmp, int target)
255 {
256   do_min(this, data, n, tmp, target);
257 }
258 
259 void
min(signed char * data,int n,signed char * tmp,int target)260 MessageGrp::min(signed char* data, int n, signed char* tmp, int target)
261 {
262   do_min(this, data, n, tmp, target);
263 }
264 
265 /////////////////////////////////////////////////////////////////////////
266 // generic reduction
267 
268 void
reduce(double * data,int n,GrpReduce<double> & red,double * scratch,int target)269 MessageGrp::reduce(double* data, int n, GrpReduce<double>& red,
270                    double* scratch, int target)
271 {
272   int tgop_max = gop_max_/sizeof(double);
273   if (tgop_max == 0) tgop_max = gop_max_?1:n;
274 
275   int passed_scratch;
276   if (!scratch) {
277       scratch = new double[n>tgop_max?tgop_max:n];
278       passed_scratch = 0;
279     }
280   else passed_scratch = 1;
281 
282   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
283                                                     (target== -1?0:target)));
284   for (i->backwards(); !i->done(); i->next()) {
285       for (int idat=0; idat<n; idat+=tgop_max) {
286           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
287           if (i->send()) {
288               send(i->sendto(), &data[idat], ndat);
289             }
290           if (i->recv()) {
291               recv(i->recvfrom(), scratch, ndat);
292               red.reduce(&data[idat], scratch, ndat);
293             }
294         }
295       if (n > tgop_max) sync();
296     }
297 
298   if (target == -1) {
299       bcast(data, n, 0);
300     }
301 
302   if (!passed_scratch) delete[] scratch;
303 }
304 
305 void
reduce(unsigned int * data,int n,GrpReduce<unsigned int> & red,unsigned int * scratch,int target)306 MessageGrp::reduce(unsigned int* data, int n, GrpReduce<unsigned int>& red,
307                    unsigned int* scratch, int target)
308 {
309   int tgop_max = gop_max_/sizeof(unsigned int);
310   if (tgop_max == 0) tgop_max = gop_max_?1:n;
311 
312   int passed_scratch;
313   if (!scratch) {
314       scratch = new unsigned int[n>tgop_max?tgop_max:n];
315       passed_scratch = 0;
316     }
317   else passed_scratch = 1;
318 
319   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
320                                                     (target== -1?0:target)));
321   for (i->backwards(); !i->done(); i->next()) {
322       for (int idat=0; idat<n; idat+=tgop_max) {
323           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
324           if (i->send()) {
325               send(i->sendto(), &data[idat], ndat);
326             }
327           if (i->recv()) {
328               recv(i->recvfrom(), scratch, ndat);
329               red.reduce(&data[idat], scratch, ndat);
330             }
331         }
332       if (n > tgop_max) sync();
333     }
334 
335   if (target == -1) {
336       bcast(data, n, 0);
337     }
338 
339   if (!passed_scratch) delete[] scratch;
340 }
341 
342 void
reduce(int * data,int n,GrpReduce<int> & red,int * scratch,int target)343 MessageGrp::reduce(int* data, int n, GrpReduce<int>& red,
344                    int* scratch, int target)
345 {
346   int tgop_max = gop_max_/sizeof(int);
347   if (tgop_max == 0) tgop_max = gop_max_?1:n;
348 
349   int passed_scratch;
350   if (!scratch) {
351       scratch = new int[n>tgop_max?tgop_max:n];
352       passed_scratch = 0;
353     }
354   else passed_scratch = 1;
355 
356   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
357                                                     (target== -1?0:target)));
358   for (i->backwards(); !i->done(); i->next()) {
359       for (int idat=0; idat<n; idat+=tgop_max) {
360           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
361           if (i->send()) {
362               send(i->sendto(), &data[idat], ndat);
363             }
364           if (i->recv()) {
365               recv(i->recvfrom(), scratch, ndat);
366               red.reduce(&data[idat], scratch, ndat);
367             }
368         }
369       if (n > tgop_max) sync();
370     }
371 
372   if (target == -1) {
373       bcast(data, n, 0);
374     }
375 
376   if (!passed_scratch) delete[] scratch;
377 }
378 
379 void
reduce(char * data,int n,GrpReduce<char> & red,char * scratch,int target)380 MessageGrp::reduce(char* data, int n, GrpReduce<char>& red,
381                    char* scratch, int target)
382 {
383   int tgop_max = gop_max_/sizeof(char);
384   if (tgop_max == 0) tgop_max = gop_max_?1:n;
385 
386   int passed_scratch;
387   if (!scratch) {
388       scratch = new char[n>tgop_max?tgop_max:n];
389       passed_scratch = 0;
390     }
391   else passed_scratch = 1;
392 
393   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
394                                                     (target== -1?0:target)));
395   for (i->backwards(); !i->done(); i->next()) {
396       for (int idat=0; idat<n; idat+=tgop_max) {
397           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
398           if (i->send()) {
399               send(i->sendto(), &data[idat], ndat);
400             }
401           if (i->recv()) {
402               recv(i->recvfrom(), scratch, ndat);
403               red.reduce(&data[idat], scratch, ndat);
404             }
405         }
406       if (n > tgop_max) sync();
407     }
408 
409   if (target == -1) {
410       bcast(data, n, 0);
411     }
412 
413   if (!passed_scratch) delete[] scratch;
414 }
415 
416 void
reduce(unsigned char * data,int n,GrpReduce<unsigned char> & red,unsigned char * scratch,int target)417 MessageGrp::reduce(unsigned char* data, int n, GrpReduce<unsigned char>& red,
418                    unsigned char* scratch, int target)
419 {
420   int tgop_max = gop_max_/sizeof(unsigned char);
421   if (tgop_max == 0) tgop_max = gop_max_?1:n;
422 
423   int passed_scratch;
424   if (!scratch) {
425       scratch = new unsigned char[n>tgop_max?tgop_max:n];
426       passed_scratch = 0;
427     }
428   else passed_scratch = 1;
429 
430   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
431                                                     (target== -1?0:target)));
432   for (i->backwards(); !i->done(); i->next()) {
433       for (int idat=0; idat<n; idat+=tgop_max) {
434           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
435           if (i->send()) {
436               send(i->sendto(), &data[idat], ndat);
437             }
438           if (i->recv()) {
439               recv(i->recvfrom(), scratch, ndat);
440               red.reduce(&data[idat], scratch, ndat);
441             }
442         }
443       if (n > tgop_max) sync();
444     }
445 
446   if (target == -1) {
447       bcast(data, n, 0);
448     }
449 
450   if (!passed_scratch) delete[] scratch;
451 }
452 
453 void
reduce(signed char * data,int n,GrpReduce<signed char> & red,signed char * scratch,int target)454 MessageGrp::reduce(signed char* data, int n, GrpReduce<signed char>& red,
455                    signed char* scratch, int target)
456 {
457   int tgop_max = gop_max_/sizeof(signed char);
458   if (tgop_max == 0) tgop_max = gop_max_?1:n;
459 
460   int passed_scratch;
461   if (!scratch) {
462       scratch = new signed char[n>tgop_max?tgop_max:n];
463       passed_scratch = 0;
464     }
465   else passed_scratch = 1;
466 
467   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
468                                                     (target== -1?0:target)));
469   for (i->backwards(); !i->done(); i->next()) {
470       for (int idat=0; idat<n; idat+=tgop_max) {
471           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
472           if (i->send()) {
473               send(i->sendto(), &data[idat], ndat);
474             }
475           if (i->recv()) {
476               recv(i->recvfrom(), scratch, ndat);
477               red.reduce(&data[idat], scratch, ndat);
478             }
479         }
480       if (n > tgop_max) sync();
481     }
482 
483   if (target == -1) {
484       bcast(data, n, 0);
485     }
486 
487   if (!passed_scratch) delete[] scratch;
488 }
489 
490 void
reduce(short * data,int n,GrpReduce<short> & red,short * scratch,int target)491 MessageGrp::reduce(short* data, int n, GrpReduce<short>& red,
492                    short* scratch, int target)
493 {
494   int tgop_max = gop_max_/sizeof(short);
495   if (tgop_max == 0) tgop_max = gop_max_?1:n;
496 
497   int passed_scratch;
498   if (!scratch) {
499       scratch = new short[n>tgop_max?tgop_max:n];
500       passed_scratch = 0;
501     }
502   else passed_scratch = 1;
503 
504   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
505                                                     (target== -1?0:target)));
506   for (i->backwards(); !i->done(); i->next()) {
507       for (int idat=0; idat<n; idat+=tgop_max) {
508           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
509           if (i->send()) {
510               send(i->sendto(), &data[idat], ndat);
511             }
512           if (i->recv()) {
513               recv(i->recvfrom(), scratch, ndat);
514               red.reduce(&data[idat], scratch, ndat);
515             }
516         }
517       if (n > tgop_max) sync();
518     }
519 
520   if (target == -1) {
521       bcast(data, n, 0);
522     }
523 
524   if (!passed_scratch) delete[] scratch;
525 }
526 
527 void
reduce(float * data,int n,GrpReduce<float> & red,float * scratch,int target)528 MessageGrp::reduce(float* data, int n, GrpReduce<float>& red,
529                    float* scratch, int target)
530 {
531   int tgop_max = gop_max_/sizeof(float);
532   if (tgop_max == 0) tgop_max = gop_max_?1:n;
533 
534   int passed_scratch;
535   if (!scratch) {
536       scratch = new float[n>tgop_max?tgop_max:n];
537       passed_scratch = 0;
538     }
539   else passed_scratch = 1;
540 
541   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
542                                                     (target== -1?0:target)));
543   for (i->backwards(); !i->done(); i->next()) {
544       for (int idat=0; idat<n; idat+=tgop_max) {
545           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
546           if (i->send()) {
547               send(i->sendto(), &data[idat], ndat);
548             }
549           if (i->recv()) {
550               recv(i->recvfrom(), scratch, ndat);
551               red.reduce(&data[idat], scratch, ndat);
552             }
553         }
554       if (n > tgop_max) sync();
555     }
556 
557   if (target == -1) {
558       bcast(data, n, 0);
559     }
560 
561   if (!passed_scratch) delete[] scratch;
562 }
563 
564 void
reduce(long * data,int n,GrpReduce<long> & red,long * scratch,int target)565 MessageGrp::reduce(long* data, int n, GrpReduce<long>& red,
566                    long* scratch, int target)
567 {
568   int tgop_max = gop_max_/sizeof(long);
569   if (tgop_max == 0) tgop_max = gop_max_?1:n;
570 
571   int passed_scratch;
572   if (!scratch) {
573       scratch = new long[n>tgop_max?tgop_max:n];
574       passed_scratch = 0;
575     }
576   else passed_scratch = 1;
577 
578   Ref<GlobalMsgIter> i(topology_->global_msg_iter(this,
579                                                     (target== -1?0:target)));
580   for (i->backwards(); !i->done(); i->next()) {
581       for (int idat=0; idat<n; idat+=tgop_max) {
582           int ndat = (idat+tgop_max>n)?(n-idat):tgop_max;
583           if (i->send()) {
584               send(i->sendto(), &data[idat], ndat);
585             }
586           if (i->recv()) {
587               recv(i->recvfrom(), scratch, ndat);
588               red.reduce(&data[idat], scratch, ndat);
589             }
590         }
591       if (n > tgop_max) sync();
592     }
593 
594   if (target == -1) {
595       bcast(data, n, 0);
596     }
597 
598   if (!passed_scratch) delete[] scratch;
599 }
600 
601 #ifdef EXPLICIT_TEMPLATE_INSTANTIATION
602 #define INSTANTIATE_DO_X(func,type) \
603     template void func(MessageGrp*, type *, int, type *, int)
604 
605 INSTANTIATE_DO_X(do_sum,unsigned int);
606 INSTANTIATE_DO_X(do_sum,int);
607 INSTANTIATE_DO_X(do_sum,double);
608 INSTANTIATE_DO_X(do_sum,char);
609 INSTANTIATE_DO_X(do_sum,unsigned char);
610 INSTANTIATE_DO_X(do_sum,signed char);
611 
612 INSTANTIATE_DO_X(do_max,unsigned int);
613 INSTANTIATE_DO_X(do_max,int);
614 INSTANTIATE_DO_X(do_max,double);
615 INSTANTIATE_DO_X(do_max,char);
616 INSTANTIATE_DO_X(do_max,unsigned char);
617 INSTANTIATE_DO_X(do_max,signed char);
618 
619 INSTANTIATE_DO_X(do_min,unsigned int);
620 INSTANTIATE_DO_X(do_min,int);
621 INSTANTIATE_DO_X(do_min,double);
622 INSTANTIATE_DO_X(do_min,char);
623 INSTANTIATE_DO_X(do_min,unsigned char);
624 INSTANTIATE_DO_X(do_min,signed char);
625 
626 #endif
627 
628 /////////////////////////////////////////////////////////////////////////////
629 
630 // Local Variables:
631 // mode: c++
632 // c-file-style: "CLJ"
633 // End:
634