1 //------------------------------------------------------------------------------ 2 // GB_reduce_panel: s=reduce(A), reduce a matrix to a scalar 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 // Reduce a matrix to a scalar using a panel-based method for built-in 11 // operators. No typecasting is performed. A must be sparse, hypersparse, 12 // or full (it cannot be bitmap). A cannot have any zombies. If A has zombies 13 // or is bitmap, GB_reduce_to_scalar_template is used instead. 14 15 { 16 17 //-------------------------------------------------------------------------- 18 // get A 19 //-------------------------------------------------------------------------- 20 21 const GB_ATYPE *restrict Ax = (GB_ATYPE *) A->x ; 22 int64_t anz = GB_NNZ (A) ; 23 ASSERT (anz > 0) ; 24 ASSERT (!GB_IS_BITMAP (A)) ; 25 ASSERT (A->nzombies == 0) ; 26 27 #if GB_IS_ANY_MONOID 28 // the ANY monoid can take any entry, and terminate immediately 29 s = Ax [anz-1] ; 30 #else 31 32 //-------------------------------------------------------------------------- 33 // reduce A to a scalar 34 //-------------------------------------------------------------------------- 35 36 if (nthreads == 1) 37 { 38 39 //---------------------------------------------------------------------- 40 // load the Panel with the first entries 41 //---------------------------------------------------------------------- 42 43 GB_ATYPE Panel [GB_PANEL] ; 44 int64_t first_panel_size = GB_IMIN (GB_PANEL, anz) ; 45 for (int64_t k = 0 ; k < first_panel_size ; k++) 46 { 47 Panel [k] = Ax [k] ; 48 } 49 50 #if GB_HAS_TERMINAL 51 int panel_count = 0 ; 52 #endif 53 54 //---------------------------------------------------------------------- 55 // reduce all entries to the Panel 56 //---------------------------------------------------------------------- 57 58 for (int64_t p = GB_PANEL ; p < anz ; p += GB_PANEL) 59 { 60 if (p + GB_PANEL > anz) 61 { 62 // last partial panel 63 for (int64_t k = 0 ; k < anz-p ; k++) 64 { 65 // Panel [k] = op (Panel [k], Ax [p+k]) ; 66 GB_ADD_ARRAY_TO_ARRAY (Panel, k, Ax, p+k) ; 67 } 68 } 69 else 70 { 71 // whole panel 72 for (int64_t k = 0 ; k < GB_PANEL ; k++) 73 { 74 // Panel [k] = op (Panel [k], Ax [p+k]) ; 75 GB_ADD_ARRAY_TO_ARRAY (Panel, k, Ax, p+k) ; 76 } 77 #if GB_HAS_TERMINAL 78 panel_count-- ; 79 if (panel_count <= 0) 80 { 81 // check for early exit only every 256 panels 82 panel_count = 256 ; 83 int count = 0 ; 84 for (int64_t k = 0 ; k < GB_PANEL ; k++) 85 { 86 count += (Panel [k] == GB_TERMINAL_VALUE) ; 87 } 88 if (count > 0) 89 { 90 break ; 91 } 92 } 93 #endif 94 } 95 } 96 97 //---------------------------------------------------------------------- 98 // s = reduce (Panel) 99 //---------------------------------------------------------------------- 100 101 s = Panel [0] ; 102 for (int64_t k = 1 ; k < first_panel_size ; k++) 103 { 104 // s = op (s, Panel [k]) ; 105 GB_ADD_ARRAY_TO_SCALAR (s, Panel, k) ; 106 } 107 108 } 109 else 110 { 111 112 //---------------------------------------------------------------------- 113 // all tasks share a single early_exit flag 114 //---------------------------------------------------------------------- 115 116 // If this flag gets set, all tasks can terminate early 117 118 #if GB_HAS_TERMINAL 119 bool early_exit = false ; 120 #endif 121 122 //---------------------------------------------------------------------- 123 // each thread reduces its own slice in parallel 124 //---------------------------------------------------------------------- 125 126 int tid ; 127 #pragma omp parallel for num_threads(nthreads) schedule(static) 128 for (tid = 0 ; tid < ntasks ; tid++) 129 { 130 131 //------------------------------------------------------------------ 132 // determine the work for this task 133 //------------------------------------------------------------------ 134 135 // Task tid reduces Ax [pstart:pend-1] to the scalar W [tid] 136 137 int64_t pstart, pend ; 138 GB_PARTITION (pstart, pend, anz, tid, ntasks) ; 139 GB_ATYPE t = Ax [pstart] ; 140 141 //------------------------------------------------------------------ 142 // skip this task if the terminal value has already been reached 143 //------------------------------------------------------------------ 144 145 #if GB_HAS_TERMINAL 146 // check if another task has called for an early exit 147 bool my_exit ; 148 149 GB_ATOMIC_READ 150 my_exit = early_exit ; 151 152 if (!my_exit) 153 #endif 154 155 //------------------------------------------------------------------ 156 // do the reductions for this task 157 //------------------------------------------------------------------ 158 159 { 160 161 //-------------------------------------------------------------- 162 // load the Panel with the first entries 163 //-------------------------------------------------------------- 164 165 GB_ATYPE Panel [GB_PANEL] ; 166 int64_t my_anz = pend - pstart ; 167 int64_t first_panel_size = GB_IMIN (GB_PANEL, my_anz) ; 168 for (int64_t k = 0 ; k < first_panel_size ; k++) 169 { 170 Panel [k] = Ax [pstart + k] ; 171 } 172 173 #if GB_HAS_TERMINAL 174 int panel_count = 0 ; 175 #endif 176 177 //-------------------------------------------------------------- 178 // reduce all entries to the Panel 179 //-------------------------------------------------------------- 180 181 for (int64_t p = pstart + GB_PANEL ; p < pend ; p += GB_PANEL) 182 { 183 if (p + GB_PANEL > pend) 184 { 185 // last partial panel 186 for (int64_t k = 0 ; k < pend-p ; k++) 187 { 188 // Panel [k] = op (Panel [k], Ax [p+k]) ; 189 GB_ADD_ARRAY_TO_ARRAY (Panel, k, Ax, p+k) ; 190 } 191 } 192 else 193 { 194 // whole panel 195 for (int64_t k = 0 ; k < GB_PANEL ; k++) 196 { 197 // Panel [k] = op (Panel [k], Ax [p+k]) ; 198 GB_ADD_ARRAY_TO_ARRAY (Panel, k, Ax, p+k) ; 199 } 200 #if GB_HAS_TERMINAL 201 panel_count-- ; 202 if (panel_count <= 0) 203 { 204 // check for early exit only every 256 panels 205 panel_count = 256 ; 206 int count = 0 ; 207 for (int64_t k = 0 ; k < GB_PANEL ; k++) 208 { 209 count += (Panel [k] == GB_TERMINAL_VALUE) ; 210 } 211 if (count > 0) 212 { 213 break ; 214 } 215 } 216 #endif 217 } 218 } 219 220 //-------------------------------------------------------------- 221 // t = reduce (Panel) 222 //-------------------------------------------------------------- 223 224 t = Panel [0] ; 225 for (int64_t k = 1 ; k < first_panel_size ; k++) 226 { 227 // t = op (t, Panel [k]) ; 228 GB_ADD_ARRAY_TO_SCALAR (t, Panel, k) ; 229 } 230 231 #if GB_HAS_TERMINAL 232 if (t == GB_TERMINAL_VALUE) 233 { 234 // tell all other tasks to exit early 235 GB_ATOMIC_WRITE 236 early_exit = true ; 237 } 238 #endif 239 } 240 241 //------------------------------------------------------------------ 242 // save the results of this task 243 //------------------------------------------------------------------ 244 245 W [tid] = t ; 246 } 247 248 //---------------------------------------------------------------------- 249 // sum up the results of each slice using a single thread 250 //---------------------------------------------------------------------- 251 252 s = W [0] ; 253 for (int tid = 1 ; tid < ntasks ; tid++) 254 { 255 // s = op (s, W [tid]), no typecast 256 GB_ADD_ARRAY_TO_SCALAR (s, W, tid) ; 257 } 258 } 259 #endif 260 } 261 262