1 //------------------------------------------------------------------------------
2 // GB_AxB_saxpy3_symbolic_template: symbolic analysis for GB_AxB_saxpy3
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Symbolic analysis for C=A*B, C<M>=A*B or C<!M>=A*B, via GB_AxB_saxpy3.
11 // Coarse tasks compute nnz (C (:,j)) for each of their vectors j. Fine tasks
12 // just scatter the mask M into the hash table. This phase does not depend on
13 // the semiring, nor does it depend on the type of C, A, or B. It does access
14 // the values of M, if the mask matrix M is present and not structural.
15
16 // If B is hypersparse, C must also be hypersparse.
17 // Otherwise, C must be sparse.
18
19 // The sparsity of A and B are #defined' constants for this method,
20 // as is the 3 cases of the mask (no M, M, or !M).
21
22 #include "GB_AxB_saxpy3.h"
23 #include "GB_AxB_saxpy3_template.h"
24 #include "GB_atomics.h"
25 #include "GB_bracket.h"
26 #include "GB_unused.h"
27
28 #define GB_META16
29 #include "GB_meta16_definitions.h"
30
GB_EVAL2(GB (AxB_saxpy3_sym),GB_MASK_A_B_SUFFIX)31 void GB_EVAL2 (GB (AxB_saxpy3_sym), GB_MASK_A_B_SUFFIX)
32 (
33 GrB_Matrix C, // Cp is computed for coarse tasks
34 #if ( !GB_NO_MASK )
35 const GrB_Matrix M, // mask matrix M
36 const bool Mask_struct, // M structural, or not
37 const bool M_packed_in_place,
38 #endif
39 const GrB_Matrix A, // A matrix; only the pattern is accessed
40 const GrB_Matrix B, // B matrix; only the pattern is accessed
41 GB_saxpy3task_struct *SaxpyTasks, // list of tasks, and workspace
42 const int ntasks, // total number of tasks
43 const int nfine, // number of fine tasks
44 const int nthreads // number of threads
45 )
46 {
47
48 //--------------------------------------------------------------------------
49 // get M, A, B, and C
50 //--------------------------------------------------------------------------
51
52 int64_t *restrict Cp = C->p ;
53 const int64_t cvlen = C->vlen ;
54
55 const int64_t *restrict Bp = B->p ;
56 const int64_t *restrict Bh = B->h ;
57 const int8_t *restrict Bb = B->b ;
58 const int64_t *restrict Bi = B->i ;
59 const int64_t bvlen = B->vlen ;
60 const bool B_jumbled = B->jumbled ;
61
62 ASSERT (GB_B_IS_SPARSE == GB_IS_SPARSE (B)) ;
63 ASSERT (GB_B_IS_HYPER == GB_IS_HYPERSPARSE (B)) ;
64 ASSERT (GB_B_IS_BITMAP == GB_IS_BITMAP (B)) ;
65 ASSERT (GB_B_IS_FULL == GB_IS_FULL (B)) ;
66
67 const int64_t *restrict Ap = A->p ;
68 const int64_t *restrict Ah = A->h ;
69 const int8_t *restrict Ab = A->b ;
70 const int64_t *restrict Ai = A->i ;
71 const int64_t anvec = A->nvec ;
72 const int64_t avlen = A->vlen ;
73 const bool A_jumbled = A->jumbled ;
74
75 ASSERT (GB_A_IS_SPARSE == GB_IS_SPARSE (A)) ;
76 ASSERT (GB_A_IS_HYPER == GB_IS_HYPERSPARSE (A)) ;
77 ASSERT (GB_A_IS_BITMAP == GB_IS_BITMAP (A)) ;
78 ASSERT (GB_A_IS_FULL == GB_IS_FULL (A)) ;
79
80 #if ( !GB_NO_MASK )
81 const int64_t *restrict Mp = M->p ;
82 const int64_t *restrict Mh = M->h ;
83 const int8_t *restrict Mb = M->b ;
84 const int64_t *restrict Mi = M->i ;
85 const GB_void *restrict Mx = (GB_void *) (Mask_struct ? NULL : (M->x)) ;
86 size_t msize = M->type->size ;
87 int64_t mnvec = M->nvec ;
88 int64_t mvlen = M->vlen ;
89 const bool M_is_hyper = GB_IS_HYPERSPARSE (M) ;
90 const bool M_is_bitmap = GB_IS_BITMAP (M) ;
91 const bool M_jumbled = GB_JUMBLED (M) ;
92 #endif
93
94 //==========================================================================
95 // phase1: count nnz(C(:,j)) for coarse tasks, scatter M for fine tasks
96 //==========================================================================
97
98 // At this point, all of Hf [...] is zero, for all tasks.
99 // Hi and Hx are not initialized.
100
101 int taskid ;
102 #pragma omp parallel for num_threads(nthreads) schedule(static,1)
103 for (taskid = 0 ; taskid < ntasks ; taskid++)
104 {
105
106 //----------------------------------------------------------------------
107 // get the task descriptor
108 //----------------------------------------------------------------------
109
110 int64_t hash_size = SaxpyTasks [taskid].hsize ;
111 bool use_Gustavson = (hash_size == cvlen) ;
112
113 if (taskid < nfine)
114 {
115
116 //------------------------------------------------------------------
117 // no work for fine tasks in phase1 if M is not present
118 //------------------------------------------------------------------
119
120 #if ( !GB_NO_MASK )
121 {
122
123 //--------------------------------------------------------------
124 // get the task descriptor
125 //--------------------------------------------------------------
126
127 int64_t kk = SaxpyTasks [taskid].vector ;
128 int64_t bjnz = (Bp == NULL) ? bvlen : (Bp [kk+1] - Bp [kk]) ;
129 // no work to do if B(:,j) is empty
130 if (bjnz == 0) continue ;
131
132 // partition M(:,j)
133 GB_GET_M_j ; // get M(:,j)
134
135 int team_size = SaxpyTasks [taskid].team_size ;
136 int leader = SaxpyTasks [taskid].leader ;
137 int my_teamid = taskid - leader ;
138 int64_t mystart, myend ;
139 GB_PARTITION (mystart, myend, mjnz, my_teamid, team_size) ;
140 mystart += pM_start ;
141 myend += pM_start ;
142
143 if (use_Gustavson)
144 {
145
146 //----------------------------------------------------------
147 // phase1: fine Gustavson task, C<M>=A*B or C<!M>=A*B
148 //----------------------------------------------------------
149
150 // Scatter the values of M(:,j) into Hf. No atomics needed
151 // since all indices i in M(;,j) are unique. Do not
152 // scatter the mask if M(:,j) is a dense vector, since in
153 // that case the numeric phase accesses M(:,j) directly,
154 // not via Hf.
155
156 if (mjnz > 0)
157 {
158 int8_t *restrict
159 Hf = (int8_t *restrict) SaxpyTasks [taskid].Hf ;
160 GB_SCATTER_M_j (mystart, myend, 1) ;
161 }
162
163 }
164 else if (!M_packed_in_place)
165 {
166
167 //----------------------------------------------------------
168 // phase1: fine hash task, C<M>=A*B or C<!M>=A*B
169 //----------------------------------------------------------
170
171 // If M_packed_in_place is true, this is skipped. The mask
172 // M is dense, and is used in-place.
173
174 // The least significant 2 bits of Hf [hash] is the flag f,
175 // and the upper bits contain h, as (h,f). After this
176 // phase1, if M(i,j)=1 then the hash table contains
177 // ((i+1),1) in Hf [hash] at some location.
178
179 // Later, the flag values of f = 2 and 3 are also used.
180 // Only f=1 is set in this phase.
181
182 // h == 0, f == 0: unoccupied and unlocked
183 // h == i+1, f == 1: occupied with M(i,j)=1
184
185 int64_t *restrict
186 Hf = (int64_t *restrict) SaxpyTasks [taskid].Hf ;
187 int64_t hash_bits = (hash_size-1) ;
188 // scan my M(:,j)
189 for (int64_t pM = mystart ; pM < myend ; pM++)
190 {
191 GB_GET_M_ij (pM) ; // get M(i,j)
192 if (!mij) continue ; // skip if M(i,j)=0
193 int64_t i = GBI (Mi, pM, mvlen) ;
194 int64_t i_mine = ((i+1) << 2) + 1 ; // ((i+1),1)
195 for (GB_HASH (i))
196 {
197 int64_t hf ;
198 // swap my hash entry into the hash table;
199 // does the following using an atomic capture:
200 // { hf = Hf [hash] ; Hf [hash] = i_mine ; }
201 GB_ATOMIC_CAPTURE_INT64 (hf, Hf [hash], i_mine) ;
202 if (hf == 0) break ; // success
203 // i_mine has been inserted, but a prior entry was
204 // already there. It needs to be replaced, so take
205 // ownership of this displaced entry, and keep
206 // looking until a new empty slot is found for it.
207 i_mine = hf ;
208 }
209 }
210 }
211 }
212 #endif
213
214 }
215 else
216 {
217
218 //------------------------------------------------------------------
219 // coarse tasks: compute nnz in each vector of A*B(:,kfirst:klast)
220 //------------------------------------------------------------------
221
222 int64_t *restrict
223 Hf = (int64_t *restrict) SaxpyTasks [taskid].Hf ;
224 int64_t kfirst = SaxpyTasks [taskid].start ;
225 int64_t klast = SaxpyTasks [taskid].end ;
226 int64_t mark = 0 ;
227
228 if (use_Gustavson)
229 {
230
231 //--------------------------------------------------------------
232 // phase1: coarse Gustavson task
233 //--------------------------------------------------------------
234
235 #if ( GB_NO_MASK )
236 {
237 // phase1: coarse Gustavson task, C=A*B
238 #include "GB_AxB_saxpy3_coarseGus_noM_phase1.c"
239 }
240 #elif ( !GB_MASK_COMP )
241 {
242 // phase1: coarse Gustavson task, C<M>=A*B
243 #include "GB_AxB_saxpy3_coarseGus_M_phase1.c"
244 }
245 #else
246 {
247 // phase1: coarse Gustavson task, C<!M>=A*B
248 #include "GB_AxB_saxpy3_coarseGus_notM_phase1.c"
249 }
250 #endif
251
252 }
253 else
254 {
255
256 //--------------------------------------------------------------
257 // phase1: coarse hash task
258 //--------------------------------------------------------------
259
260 int64_t *restrict Hi = SaxpyTasks [taskid].Hi ;
261 int64_t hash_bits = (hash_size-1) ;
262
263 #if ( GB_NO_MASK )
264 {
265
266 //----------------------------------------------------------
267 // phase1: coarse hash task, C=A*B
268 //----------------------------------------------------------
269
270 #undef GB_CHECK_MASK_ij
271 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
272
273 }
274 #elif ( !GB_MASK_COMP )
275 {
276
277 //----------------------------------------------------------
278 // phase1: coarse hash task, C<M>=A*B
279 //----------------------------------------------------------
280
281 if (M_packed_in_place)
282 {
283
284 //------------------------------------------------------
285 // M(:,j) is dense. M is not scattered into Hf.
286 //------------------------------------------------------
287
288 #undef GB_CHECK_MASK_ij
289 #define GB_CHECK_MASK_ij \
290 bool mij = \
291 (M_is_bitmap ? Mjb [i] : 1) && \
292 (Mask_struct ? 1 : (Mjx [i] != 0)) ; \
293 if (!mij) continue ;
294
295 switch (msize)
296 {
297 default:
298 case 1 :
299 #undef M_TYPE
300 #define M_TYPE uint8_t
301 #undef M_SIZE
302 #define M_SIZE 1
303 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
304 break ;
305 case 2 :
306 #undef M_TYPE
307 #define M_TYPE uint16_t
308 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
309 break ;
310 case 4 :
311 #undef M_TYPE
312 #define M_TYPE uint32_t
313 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
314 break ;
315 case 8 :
316 #undef M_TYPE
317 #define M_TYPE uint64_t
318 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
319 break ;
320 case 16 :
321 #undef M_TYPE
322 #define M_TYPE uint64_t
323 #undef M_SIZE
324 #define M_SIZE 2
325 #undef GB_CHECK_MASK_ij
326 #define GB_CHECK_MASK_ij \
327 bool mij = \
328 (M_is_bitmap ? Mjb [i] : 1) && \
329 (Mask_struct ? 1 : \
330 (Mjx [2*i] != 0) || \
331 (Mjx [2*i+1] != 0)) ; \
332 if (!mij) continue ;
333 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
334 break ;
335 }
336
337 }
338 else
339 {
340
341 //------------------------------------------------------
342 // M is sparse and scattered into Hf
343 //------------------------------------------------------
344
345 #include "GB_AxB_saxpy3_coarseHash_M_phase1.c"
346 }
347
348 }
349 #else
350 {
351
352 //----------------------------------------------------------
353 // phase1: coarse hash task, C<!M>=A*B
354 //----------------------------------------------------------
355
356 if (M_packed_in_place)
357 {
358
359 //------------------------------------------------------
360 // M(:,j) is dense. M is not scattered into Hf.
361 //------------------------------------------------------
362
363 #undef GB_CHECK_MASK_ij
364 #define GB_CHECK_MASK_ij \
365 bool mij = \
366 (M_is_bitmap ? Mjb [i] : 1) && \
367 (Mask_struct ? 1 : (Mjx [i] != 0)) ; \
368 if (mij) continue ;
369
370 switch (msize)
371 {
372 default:
373 case 1 :
374 #undef M_TYPE
375 #define M_TYPE uint8_t
376 #undef M_SIZE
377 #define M_SIZE 1
378 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
379 break ;
380 case 2 :
381 #undef M_TYPE
382 #define M_TYPE uint16_t
383 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
384 break ;
385 case 4 :
386 #undef M_TYPE
387 #define M_TYPE uint32_t
388 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
389 break ;
390 case 8 :
391 #undef M_TYPE
392 #define M_TYPE uint64_t
393 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
394 break ;
395 case 16 :
396 #undef M_TYPE
397 #define M_TYPE uint64_t
398 #undef M_SIZE
399 #define M_SIZE 2
400 #undef GB_CHECK_MASK_ij
401 #define GB_CHECK_MASK_ij \
402 bool mij = \
403 (M_is_bitmap ? Mjb [i] : 1) && \
404 (Mask_struct ? 1 : \
405 (Mjx [2*i] != 0) || \
406 (Mjx [2*i+1] != 0)) ; \
407 if (mij) continue ;
408 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
409 break ;
410 }
411
412 }
413 else
414 {
415
416 //------------------------------------------------------
417 // M is sparse and scattered into Hf
418 //------------------------------------------------------
419
420 #include "GB_AxB_saxpy3_coarseHash_notM_phase1.c"
421 }
422 }
423 #endif
424 }
425 }
426 }
427
428 //--------------------------------------------------------------------------
429 // check result for phase1 for fine tasks
430 //--------------------------------------------------------------------------
431
432 #ifdef GB_DEBUG
433 #if ( !GB_NO_MASK )
434 {
435 for (taskid = 0 ; taskid < nfine ; taskid++)
436 {
437 int64_t kk = SaxpyTasks [taskid].vector ;
438 ASSERT (kk >= 0 && kk < B->nvec) ;
439 int64_t bjnz = (Bp == NULL) ? bvlen : (Bp [kk+1] - Bp [kk]) ;
440 // no work to do if B(:,j) is empty
441 if (bjnz == 0) continue ;
442 int64_t hash_size = SaxpyTasks [taskid].hsize ;
443 bool use_Gustavson = (hash_size == cvlen) ;
444 int leader = SaxpyTasks [taskid].leader ;
445 if (leader != taskid) continue ;
446 GB_GET_M_j ; // get M(:,j)
447 if (mjnz == 0) continue ;
448 int64_t mjcount2 = 0 ;
449 int64_t mjcount = 0 ;
450 for (int64_t pM = pM_start ; pM < pM_end ; pM++)
451 {
452 GB_GET_M_ij (pM) ; // get M(i,j)
453 if (mij) mjcount++ ;
454 }
455 if (use_Gustavson)
456 {
457 // phase1: fine Gustavson task, C<M>=A*B or C<!M>=A*B
458 int8_t *restrict
459 Hf = (int8_t *restrict) SaxpyTasks [taskid].Hf ;
460 for (int64_t pM = pM_start ; pM < pM_end ; pM++)
461 {
462 GB_GET_M_ij (pM) ; // get M(i,j)
463 int64_t i = GBI (Mi, pM, mvlen) ;
464 ASSERT (Hf [i] == mij) ;
465 }
466 for (int64_t i = 0 ; i < cvlen ; i++)
467 {
468 ASSERT (Hf [i] == 0 || Hf [i] == 1) ;
469 if (Hf [i] == 1) mjcount2++ ;
470 }
471 ASSERT (mjcount == mjcount2) ;
472 }
473 else if (!M_packed_in_place)
474 {
475 // phase1: fine hash task, C<M>=A*B or C<!M>=A*B
476 // h == 0, f == 0: unoccupied and unlocked
477 // h == i+1, f == 1: occupied with M(i,j)=1
478 int64_t *restrict
479 Hf = (int64_t *restrict) SaxpyTasks [taskid].Hf ;
480 int64_t hash_bits = (hash_size-1) ;
481 for (int64_t pM = pM_start ; pM < pM_end ; pM++)
482 {
483 GB_GET_M_ij (pM) ; // get M(i,j)
484 if (!mij) continue ; // skip if M(i,j)=0
485 int64_t i = GBI (Mi, pM, mvlen) ;
486 int64_t i_mine = ((i+1) << 2) + 1 ; // ((i+1),1)
487 int64_t probe = 0 ;
488 for (GB_HASH (i))
489 {
490 int64_t hf = Hf [hash] ;
491 if (hf == i_mine)
492 {
493 mjcount2++ ;
494 break ;
495 }
496 ASSERT (hf != 0) ;
497 probe++ ;
498 ASSERT (probe < cvlen) ;
499 }
500 }
501 ASSERT (mjcount == mjcount2) ;
502 mjcount2 = 0 ;
503 for (int64_t hash = 0 ; hash < hash_size ; hash++)
504 {
505 int64_t hf = Hf [hash] ;
506 int64_t h = (hf >> 2) ; // empty (0), or a 1-based
507 int64_t f = (hf & 3) ; // 0 if empty or 1 if occupied
508 if (f == 1) ASSERT (h >= 1 && h <= cvlen) ;
509 ASSERT (hf == 0 || f == 1) ;
510 if (f == 1) mjcount2++ ;
511 }
512 ASSERT (mjcount == mjcount2) ;
513 }
514 }
515 }
516 #endif
517 #endif
518 }
519
520