1 //------------------------------------------------------------------------------
2 // GB_reduce_panel: s=reduce(A), reduce a matrix to a scalar
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Reduce a matrix to a scalar using a panel-based method for built-in
11 // operators.  No typecasting is performed.  A must be sparse, hypersparse,
12 // or full (it cannot be bitmap).  A cannot have any zombies.  If A has zombies
13 // or is bitmap, GB_reduce_to_scalar_template is used instead.
14 
15 {
16 
17     //--------------------------------------------------------------------------
18     // get A
19     //--------------------------------------------------------------------------
20 
21     const GB_ATYPE *restrict Ax = (GB_ATYPE *) A->x ;
22     int64_t anz = GB_NNZ (A) ;
23     ASSERT (anz > 0) ;
24     ASSERT (!GB_IS_BITMAP (A)) ;
25     ASSERT (A->nzombies == 0) ;
26 
27     #if GB_IS_ANY_MONOID
28     // the ANY monoid can take any entry, and terminate immediately
29     s = Ax [anz-1] ;
30     #else
31 
32     //--------------------------------------------------------------------------
33     // reduce A to a scalar
34     //--------------------------------------------------------------------------
35 
36     if (nthreads == 1)
37     {
38 
39         //----------------------------------------------------------------------
40         // load the Panel with the first entries
41         //----------------------------------------------------------------------
42 
43         GB_ATYPE Panel [GB_PANEL] ;
44         int64_t first_panel_size = GB_IMIN (GB_PANEL, anz) ;
45         for (int64_t k = 0 ; k < first_panel_size ; k++)
46         {
47             Panel [k] = Ax [k] ;
48         }
49 
50         #if GB_HAS_TERMINAL
51         int panel_count = 0 ;
52         #endif
53 
54         //----------------------------------------------------------------------
55         // reduce all entries to the Panel
56         //----------------------------------------------------------------------
57 
58         for (int64_t p = GB_PANEL ; p < anz ; p += GB_PANEL)
59         {
60             if (p + GB_PANEL > anz)
61             {
62                 // last partial panel
63                 for (int64_t k = 0 ; k < anz-p ; k++)
64                 {
65                     // Panel [k] = op (Panel [k], Ax [p+k]) ;
66                     GB_ADD_ARRAY_TO_ARRAY (Panel, k, Ax, p+k) ;
67                 }
68             }
69             else
70             {
71                 // whole panel
72                 for (int64_t k = 0 ; k < GB_PANEL ; k++)
73                 {
74                     // Panel [k] = op (Panel [k], Ax [p+k]) ;
75                     GB_ADD_ARRAY_TO_ARRAY (Panel, k, Ax, p+k) ;
76                 }
77                 #if GB_HAS_TERMINAL
78                 panel_count-- ;
79                 if (panel_count <= 0)
80                 {
81                     // check for early exit only every 256 panels
82                     panel_count = 256 ;
83                     int count = 0 ;
84                     for (int64_t k = 0 ; k < GB_PANEL ; k++)
85                     {
86                         count += (Panel [k] == GB_TERMINAL_VALUE) ;
87                     }
88                     if (count > 0)
89                     {
90                         break ;
91                     }
92                 }
93                 #endif
94             }
95         }
96 
97         //----------------------------------------------------------------------
98         // s = reduce (Panel)
99         //----------------------------------------------------------------------
100 
101         s = Panel [0] ;
102         for (int64_t k = 1 ; k < first_panel_size ; k++)
103         {
104             // s = op (s, Panel [k]) ;
105             GB_ADD_ARRAY_TO_SCALAR (s, Panel, k) ;
106         }
107 
108     }
109     else
110     {
111 
112         //----------------------------------------------------------------------
113         // all tasks share a single early_exit flag
114         //----------------------------------------------------------------------
115 
116         // If this flag gets set, all tasks can terminate early
117 
118         #if GB_HAS_TERMINAL
119         bool early_exit = false ;
120         #endif
121 
122         //----------------------------------------------------------------------
123         // each thread reduces its own slice in parallel
124         //----------------------------------------------------------------------
125 
126         int tid ;
127         #pragma omp parallel for num_threads(nthreads) schedule(static)
128         for (tid = 0 ; tid < ntasks ; tid++)
129         {
130 
131             //------------------------------------------------------------------
132             // determine the work for this task
133             //------------------------------------------------------------------
134 
135             // Task tid reduces Ax [pstart:pend-1] to the scalar W [tid]
136 
137             int64_t pstart, pend ;
138             GB_PARTITION (pstart, pend, anz, tid, ntasks) ;
139             GB_ATYPE t = Ax [pstart] ;
140 
141             //------------------------------------------------------------------
142             // skip this task if the terminal value has already been reached
143             //------------------------------------------------------------------
144 
145             #if GB_HAS_TERMINAL
146             // check if another task has called for an early exit
147             bool my_exit ;
148 
149             GB_ATOMIC_READ
150             my_exit = early_exit ;
151 
152             if (!my_exit)
153             #endif
154 
155             //------------------------------------------------------------------
156             // do the reductions for this task
157             //------------------------------------------------------------------
158 
159             {
160 
161                 //--------------------------------------------------------------
162                 // load the Panel with the first entries
163                 //--------------------------------------------------------------
164 
165                 GB_ATYPE Panel [GB_PANEL] ;
166                 int64_t my_anz = pend - pstart ;
167                 int64_t first_panel_size = GB_IMIN (GB_PANEL, my_anz) ;
168                 for (int64_t k = 0 ; k < first_panel_size ; k++)
169                 {
170                     Panel [k] = Ax [pstart + k] ;
171                 }
172 
173                 #if GB_HAS_TERMINAL
174                 int panel_count = 0 ;
175                 #endif
176 
177                 //--------------------------------------------------------------
178                 // reduce all entries to the Panel
179                 //--------------------------------------------------------------
180 
181                 for (int64_t p = pstart + GB_PANEL ; p < pend ; p += GB_PANEL)
182                 {
183                     if (p + GB_PANEL > pend)
184                     {
185                         // last partial panel
186                         for (int64_t k = 0 ; k < pend-p ; k++)
187                         {
188                             // Panel [k] = op (Panel [k], Ax [p+k]) ;
189                             GB_ADD_ARRAY_TO_ARRAY (Panel, k, Ax, p+k) ;
190                         }
191                     }
192                     else
193                     {
194                         // whole panel
195                         for (int64_t k = 0 ; k < GB_PANEL ; k++)
196                         {
197                             // Panel [k] = op (Panel [k], Ax [p+k]) ;
198                             GB_ADD_ARRAY_TO_ARRAY (Panel, k, Ax, p+k) ;
199                         }
200                         #if GB_HAS_TERMINAL
201                         panel_count-- ;
202                         if (panel_count <= 0)
203                         {
204                             // check for early exit only every 256 panels
205                             panel_count = 256 ;
206                             int count = 0 ;
207                             for (int64_t k = 0 ; k < GB_PANEL ; k++)
208                             {
209                                 count += (Panel [k] == GB_TERMINAL_VALUE) ;
210                             }
211                             if (count > 0)
212                             {
213                                 break ;
214                             }
215                         }
216                         #endif
217                     }
218                 }
219 
220                 //--------------------------------------------------------------
221                 // t = reduce (Panel)
222                 //--------------------------------------------------------------
223 
224                 t = Panel [0] ;
225                 for (int64_t k = 1 ; k < first_panel_size ; k++)
226                 {
227                     // t = op (t, Panel [k]) ;
228                     GB_ADD_ARRAY_TO_SCALAR (t, Panel, k) ;
229                 }
230 
231                 #if GB_HAS_TERMINAL
232                 if (t == GB_TERMINAL_VALUE)
233                 {
234                     // tell all other tasks to exit early
235                     GB_ATOMIC_WRITE
236                     early_exit = true ;
237                 }
238                 #endif
239             }
240 
241             //------------------------------------------------------------------
242             // save the results of this task
243             //------------------------------------------------------------------
244 
245             W [tid] = t ;
246         }
247 
248         //----------------------------------------------------------------------
249         // sum up the results of each slice using a single thread
250         //----------------------------------------------------------------------
251 
252         s = W [0] ;
253         for (int tid = 1 ; tid < ntasks ; tid++)
254         {
255             // s = op (s, W [tid]), no typecast
256             GB_ADD_ARRAY_TO_SCALAR (s, W, tid) ;
257         }
258     }
259     #endif
260 }
261 
262