1 //------------------------------------------------------------------------------
2 // GB_AxB_saxpy3_symbolic_template: symbolic analysis for GB_AxB_saxpy3
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Symbolic analysis for C=A*B, C<M>=A*B or C<!M>=A*B, via GB_AxB_saxpy3.
11 // Coarse tasks compute nnz (C (:,j)) for each of their vectors j.  Fine tasks
12 // just scatter the mask M into the hash table.  This phase does not depend on
13 // the semiring, nor does it depend on the type of C, A, or B.  It does access
14 // the values of M, if the mask matrix M is present and not structural.
15 
16 // If B is hypersparse, C must also be hypersparse.
17 // Otherwise, C must be sparse.
18 
19 // The sparsity of A and B are #defined' constants for this method,
20 // as is the 3 cases of the mask (no M, M, or !M).
21 
22 #include "GB_AxB_saxpy3.h"
23 #include "GB_AxB_saxpy3_template.h"
24 #include "GB_atomics.h"
25 #include "GB_bracket.h"
26 #include "GB_unused.h"
27 
28 #define GB_META16
29 #include "GB_meta16_definitions.h"
30 
GB_EVAL2(GB (AxB_saxpy3_sym),GB_MASK_A_B_SUFFIX)31 void GB_EVAL2 (GB (AxB_saxpy3_sym), GB_MASK_A_B_SUFFIX)
32 (
33     GrB_Matrix C,               // Cp is computed for coarse tasks
34     #if ( !GB_NO_MASK )
35     const GrB_Matrix M,         // mask matrix M
36     const bool Mask_struct,     // M structural, or not
37     const bool M_packed_in_place,
38     #endif
39     const GrB_Matrix A,         // A matrix; only the pattern is accessed
40     const GrB_Matrix B,         // B matrix; only the pattern is accessed
41     GB_saxpy3task_struct *SaxpyTasks,     // list of tasks, and workspace
42     const int ntasks,           // total number of tasks
43     const int nfine,            // number of fine tasks
44     const int nthreads          // number of threads
45 )
46 {
47 
48     //--------------------------------------------------------------------------
49     // get M, A, B, and C
50     //--------------------------------------------------------------------------
51 
52     int64_t *restrict Cp = C->p ;
53     const int64_t cvlen = C->vlen ;
54 
55     const int64_t *restrict Bp = B->p ;
56     const int64_t *restrict Bh = B->h ;
57     const int8_t  *restrict Bb = B->b ;
58     const int64_t *restrict Bi = B->i ;
59     const int64_t bvlen = B->vlen ;
60     const bool B_jumbled = B->jumbled ;
61 
62     ASSERT (GB_B_IS_SPARSE == GB_IS_SPARSE (B)) ;
63     ASSERT (GB_B_IS_HYPER  == GB_IS_HYPERSPARSE (B)) ;
64     ASSERT (GB_B_IS_BITMAP == GB_IS_BITMAP (B)) ;
65     ASSERT (GB_B_IS_FULL   == GB_IS_FULL   (B)) ;
66 
67     const int64_t *restrict Ap = A->p ;
68     const int64_t *restrict Ah = A->h ;
69     const int8_t  *restrict Ab = A->b ;
70     const int64_t *restrict Ai = A->i ;
71     const int64_t anvec = A->nvec ;
72     const int64_t avlen = A->vlen ;
73     const bool A_jumbled = A->jumbled ;
74 
75     ASSERT (GB_A_IS_SPARSE == GB_IS_SPARSE (A)) ;
76     ASSERT (GB_A_IS_HYPER  == GB_IS_HYPERSPARSE (A)) ;
77     ASSERT (GB_A_IS_BITMAP == GB_IS_BITMAP (A)) ;
78     ASSERT (GB_A_IS_FULL   == GB_IS_FULL   (A)) ;
79 
80     #if ( !GB_NO_MASK )
81     const int64_t *restrict Mp = M->p ;
82     const int64_t *restrict Mh = M->h ;
83     const int8_t  *restrict Mb = M->b ;
84     const int64_t *restrict Mi = M->i ;
85     const GB_void *restrict Mx = (GB_void *) (Mask_struct ? NULL : (M->x)) ;
86     size_t  msize = M->type->size ;
87     int64_t mnvec = M->nvec ;
88     int64_t mvlen = M->vlen ;
89     const bool M_is_hyper = GB_IS_HYPERSPARSE (M) ;
90     const bool M_is_bitmap = GB_IS_BITMAP (M) ;
91     const bool M_jumbled = GB_JUMBLED (M) ;
92     #endif
93 
94     //==========================================================================
95     // phase1: count nnz(C(:,j)) for coarse tasks, scatter M for fine tasks
96     //==========================================================================
97 
98     // At this point, all of Hf [...] is zero, for all tasks.
99     // Hi and Hx are not initialized.
100 
101     int taskid ;
102     #pragma omp parallel for num_threads(nthreads) schedule(static,1)
103     for (taskid = 0 ; taskid < ntasks ; taskid++)
104     {
105 
106         //----------------------------------------------------------------------
107         // get the task descriptor
108         //----------------------------------------------------------------------
109 
110         int64_t hash_size = SaxpyTasks [taskid].hsize ;
111         bool use_Gustavson = (hash_size == cvlen) ;
112 
113         if (taskid < nfine)
114         {
115 
116             //------------------------------------------------------------------
117             // no work for fine tasks in phase1 if M is not present
118             //------------------------------------------------------------------
119 
120             #if ( !GB_NO_MASK )
121             {
122 
123                 //--------------------------------------------------------------
124                 // get the task descriptor
125                 //--------------------------------------------------------------
126 
127                 int64_t kk = SaxpyTasks [taskid].vector ;
128                 int64_t bjnz = (Bp == NULL) ? bvlen : (Bp [kk+1] - Bp [kk]) ;
129                 // no work to do if B(:,j) is empty
130                 if (bjnz == 0) continue ;
131 
132                 // partition M(:,j)
133                 GB_GET_M_j ;        // get M(:,j)
134 
135                 int team_size = SaxpyTasks [taskid].team_size ;
136                 int leader    = SaxpyTasks [taskid].leader ;
137                 int my_teamid = taskid - leader ;
138                 int64_t mystart, myend ;
139                 GB_PARTITION (mystart, myend, mjnz, my_teamid, team_size) ;
140                 mystart += pM_start ;
141                 myend   += pM_start ;
142 
143                 if (use_Gustavson)
144                 {
145 
146                     //----------------------------------------------------------
147                     // phase1: fine Gustavson task, C<M>=A*B or C<!M>=A*B
148                     //----------------------------------------------------------
149 
150                     // Scatter the values of M(:,j) into Hf.  No atomics needed
151                     // since all indices i in M(;,j) are unique.  Do not
152                     // scatter the mask if M(:,j) is a dense vector, since in
153                     // that case the numeric phase accesses M(:,j) directly,
154                     // not via Hf.
155 
156                     if (mjnz > 0)
157                     {
158                         int8_t *restrict
159                             Hf = (int8_t *restrict) SaxpyTasks [taskid].Hf ;
160                         GB_SCATTER_M_j (mystart, myend, 1) ;
161                     }
162 
163                 }
164                 else if (!M_packed_in_place)
165                 {
166 
167                     //----------------------------------------------------------
168                     // phase1: fine hash task, C<M>=A*B or C<!M>=A*B
169                     //----------------------------------------------------------
170 
171                     // If M_packed_in_place is true, this is skipped.  The mask
172                     // M is dense, and is used in-place.
173 
174                     // The least significant 2 bits of Hf [hash] is the flag f,
175                     // and the upper bits contain h, as (h,f).  After this
176                     // phase1, if M(i,j)=1 then the hash table contains
177                     // ((i+1),1) in Hf [hash] at some location.
178 
179                     // Later, the flag values of f = 2 and 3 are also used.
180                     // Only f=1 is set in this phase.
181 
182                     // h == 0,   f == 0: unoccupied and unlocked
183                     // h == i+1, f == 1: occupied with M(i,j)=1
184 
185                     int64_t *restrict
186                         Hf = (int64_t *restrict) SaxpyTasks [taskid].Hf ;
187                     int64_t hash_bits = (hash_size-1) ;
188                     // scan my M(:,j)
189                     for (int64_t pM = mystart ; pM < myend ; pM++)
190                     {
191                         GB_GET_M_ij (pM) ;              // get M(i,j)
192                         if (!mij) continue ;            // skip if M(i,j)=0
193                         int64_t i = GBI (Mi, pM, mvlen) ;
194                         int64_t i_mine = ((i+1) << 2) + 1 ;  // ((i+1),1)
195                         for (GB_HASH (i))
196                         {
197                             int64_t hf ;
198                             // swap my hash entry into the hash table;
199                             // does the following using an atomic capture:
200                             // { hf = Hf [hash] ; Hf [hash] = i_mine ; }
201                             GB_ATOMIC_CAPTURE_INT64 (hf, Hf [hash], i_mine) ;
202                             if (hf == 0) break ;        // success
203                             // i_mine has been inserted, but a prior entry was
204                             // already there.  It needs to be replaced, so take
205                             // ownership of this displaced entry, and keep
206                             // looking until a new empty slot is found for it.
207                             i_mine = hf ;
208                         }
209                     }
210                 }
211             }
212             #endif
213 
214         }
215         else
216         {
217 
218             //------------------------------------------------------------------
219             // coarse tasks: compute nnz in each vector of A*B(:,kfirst:klast)
220             //------------------------------------------------------------------
221 
222             int64_t *restrict
223                 Hf = (int64_t *restrict) SaxpyTasks [taskid].Hf ;
224             int64_t kfirst = SaxpyTasks [taskid].start ;
225             int64_t klast  = SaxpyTasks [taskid].end ;
226             int64_t mark = 0 ;
227 
228             if (use_Gustavson)
229             {
230 
231                 //--------------------------------------------------------------
232                 // phase1: coarse Gustavson task
233                 //--------------------------------------------------------------
234 
235                 #if ( GB_NO_MASK )
236                 {
237                     // phase1: coarse Gustavson task, C=A*B
238                     #include "GB_AxB_saxpy3_coarseGus_noM_phase1.c"
239                 }
240                 #elif ( !GB_MASK_COMP )
241                 {
242                     // phase1: coarse Gustavson task, C<M>=A*B
243                     #include "GB_AxB_saxpy3_coarseGus_M_phase1.c"
244                 }
245                 #else
246                 {
247                     // phase1: coarse Gustavson task, C<!M>=A*B
248                     #include "GB_AxB_saxpy3_coarseGus_notM_phase1.c"
249                 }
250                 #endif
251 
252             }
253             else
254             {
255 
256                 //--------------------------------------------------------------
257                 // phase1: coarse hash task
258                 //--------------------------------------------------------------
259 
260                 int64_t *restrict Hi = SaxpyTasks [taskid].Hi ;
261                 int64_t hash_bits = (hash_size-1) ;
262 
263                 #if ( GB_NO_MASK )
264                 {
265 
266                     //----------------------------------------------------------
267                     // phase1: coarse hash task, C=A*B
268                     //----------------------------------------------------------
269 
270                     #undef GB_CHECK_MASK_ij
271                     #include "GB_AxB_saxpy3_coarseHash_phase1.c"
272 
273                 }
274                 #elif ( !GB_MASK_COMP )
275                 {
276 
277                     //----------------------------------------------------------
278                     // phase1: coarse hash task, C<M>=A*B
279                     //----------------------------------------------------------
280 
281                     if (M_packed_in_place)
282                     {
283 
284                         //------------------------------------------------------
285                         // M(:,j) is dense.  M is not scattered into Hf.
286                         //------------------------------------------------------
287 
288                         #undef  GB_CHECK_MASK_ij
289                         #define GB_CHECK_MASK_ij                        \
290                             bool mij =                                  \
291                                 (M_is_bitmap ? Mjb [i] : 1) &&          \
292                                 (Mask_struct ? 1 : (Mjx [i] != 0)) ;    \
293                             if (!mij) continue ;
294 
295                         switch (msize)
296                         {
297                             default:
298                             case 1 :
299                                 #undef  M_TYPE
300                                 #define M_TYPE uint8_t
301                                 #undef  M_SIZE
302                                 #define M_SIZE 1
303                                 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
304                                 break ;
305                             case 2 :
306                                 #undef  M_TYPE
307                                 #define M_TYPE uint16_t
308                                 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
309                                 break ;
310                             case 4 :
311                                 #undef  M_TYPE
312                                 #define M_TYPE uint32_t
313                                 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
314                                 break ;
315                             case 8 :
316                                 #undef  M_TYPE
317                                 #define M_TYPE uint64_t
318                                 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
319                                 break ;
320                             case 16 :
321                                 #undef  M_TYPE
322                                 #define M_TYPE uint64_t
323                                 #undef  M_SIZE
324                                 #define M_SIZE 2
325                                 #undef  GB_CHECK_MASK_ij
326                                 #define GB_CHECK_MASK_ij                    \
327                                     bool mij =                              \
328                                         (M_is_bitmap ? Mjb [i] : 1) &&      \
329                                         (Mask_struct ? 1 :                  \
330                                             (Mjx [2*i] != 0) ||             \
331                                             (Mjx [2*i+1] != 0)) ;           \
332                                     if (!mij) continue ;
333                                 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
334                                 break ;
335                         }
336 
337                     }
338                     else
339                     {
340 
341                         //------------------------------------------------------
342                         // M is sparse and scattered into Hf
343                         //------------------------------------------------------
344 
345                         #include "GB_AxB_saxpy3_coarseHash_M_phase1.c"
346                     }
347 
348                 }
349                 #else
350                 {
351 
352                     //----------------------------------------------------------
353                     // phase1: coarse hash task, C<!M>=A*B
354                     //----------------------------------------------------------
355 
356                     if (M_packed_in_place)
357                     {
358 
359                         //------------------------------------------------------
360                         // M(:,j) is dense.  M is not scattered into Hf.
361                         //------------------------------------------------------
362 
363                         #undef  GB_CHECK_MASK_ij
364                         #define GB_CHECK_MASK_ij                        \
365                             bool mij =                                  \
366                                 (M_is_bitmap ? Mjb [i] : 1) &&          \
367                                 (Mask_struct ? 1 : (Mjx [i] != 0)) ;    \
368                             if (mij) continue ;
369 
370                         switch (msize)
371                         {
372                             default:
373                             case 1 :
374                                 #undef  M_TYPE
375                                 #define M_TYPE uint8_t
376                                 #undef  M_SIZE
377                                 #define M_SIZE 1
378                                 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
379                                 break ;
380                             case 2 :
381                                 #undef  M_TYPE
382                                 #define M_TYPE uint16_t
383                                 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
384                                 break ;
385                             case 4 :
386                                 #undef  M_TYPE
387                                 #define M_TYPE uint32_t
388                                 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
389                                 break ;
390                             case 8 :
391                                 #undef  M_TYPE
392                                 #define M_TYPE uint64_t
393                                 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
394                                 break ;
395                             case 16 :
396                                 #undef  M_TYPE
397                                 #define M_TYPE uint64_t
398                                 #undef  M_SIZE
399                                 #define M_SIZE 2
400                                 #undef  GB_CHECK_MASK_ij
401                                 #define GB_CHECK_MASK_ij                    \
402                                     bool mij =                              \
403                                         (M_is_bitmap ? Mjb [i] : 1) &&      \
404                                         (Mask_struct ? 1 :                  \
405                                             (Mjx [2*i] != 0) ||             \
406                                             (Mjx [2*i+1] != 0)) ;           \
407                                     if (mij) continue ;
408                                 #include "GB_AxB_saxpy3_coarseHash_phase1.c"
409                                 break ;
410                         }
411 
412                     }
413                     else
414                     {
415 
416                         //------------------------------------------------------
417                         // M is sparse and scattered into Hf
418                         //------------------------------------------------------
419 
420                         #include "GB_AxB_saxpy3_coarseHash_notM_phase1.c"
421                     }
422                 }
423                 #endif
424             }
425         }
426     }
427 
428     //--------------------------------------------------------------------------
429     // check result for phase1 for fine tasks
430     //--------------------------------------------------------------------------
431 
432     #ifdef GB_DEBUG
433     #if ( !GB_NO_MASK )
434     {
435         for (taskid = 0 ; taskid < nfine ; taskid++)
436         {
437             int64_t kk = SaxpyTasks [taskid].vector ;
438             ASSERT (kk >= 0 && kk < B->nvec) ;
439             int64_t bjnz = (Bp == NULL) ? bvlen : (Bp [kk+1] - Bp [kk]) ;
440             // no work to do if B(:,j) is empty
441             if (bjnz == 0) continue ;
442             int64_t hash_size = SaxpyTasks [taskid].hsize ;
443             bool use_Gustavson = (hash_size == cvlen) ;
444             int leader = SaxpyTasks [taskid].leader ;
445             if (leader != taskid) continue ;
446             GB_GET_M_j ;        // get M(:,j)
447             if (mjnz == 0) continue ;
448             int64_t mjcount2 = 0 ;
449             int64_t mjcount = 0 ;
450             for (int64_t pM = pM_start ; pM < pM_end ; pM++)
451             {
452                 GB_GET_M_ij (pM) ;                  // get M(i,j)
453                 if (mij) mjcount++ ;
454             }
455             if (use_Gustavson)
456             {
457                 // phase1: fine Gustavson task, C<M>=A*B or C<!M>=A*B
458                 int8_t *restrict
459                     Hf = (int8_t *restrict) SaxpyTasks [taskid].Hf ;
460                 for (int64_t pM = pM_start ; pM < pM_end ; pM++)
461                 {
462                     GB_GET_M_ij (pM) ;               // get M(i,j)
463                     int64_t i = GBI (Mi, pM, mvlen) ;
464                     ASSERT (Hf [i] == mij) ;
465                 }
466                 for (int64_t i = 0 ; i < cvlen ; i++)
467                 {
468                     ASSERT (Hf [i] == 0 || Hf [i] == 1) ;
469                     if (Hf [i] == 1) mjcount2++ ;
470                 }
471                 ASSERT (mjcount == mjcount2) ;
472             }
473             else if (!M_packed_in_place)
474             {
475                 // phase1: fine hash task, C<M>=A*B or C<!M>=A*B
476                 // h == 0,   f == 0: unoccupied and unlocked
477                 // h == i+1, f == 1: occupied with M(i,j)=1
478                 int64_t *restrict
479                     Hf = (int64_t *restrict) SaxpyTasks [taskid].Hf ;
480                 int64_t hash_bits = (hash_size-1) ;
481                 for (int64_t pM = pM_start ; pM < pM_end ; pM++)
482                 {
483                     GB_GET_M_ij (pM) ;              // get M(i,j)
484                     if (!mij) continue ;            // skip if M(i,j)=0
485                     int64_t i = GBI (Mi, pM, mvlen) ;
486                     int64_t i_mine = ((i+1) << 2) + 1 ;  // ((i+1),1)
487                     int64_t probe = 0 ;
488                     for (GB_HASH (i))
489                     {
490                         int64_t hf = Hf [hash] ;
491                         if (hf == i_mine)
492                         {
493                             mjcount2++ ;
494                             break ;
495                         }
496                         ASSERT (hf != 0) ;
497                         probe++ ;
498                         ASSERT (probe < cvlen) ;
499                     }
500                 }
501                 ASSERT (mjcount == mjcount2) ;
502                 mjcount2 = 0 ;
503                 for (int64_t hash = 0 ; hash < hash_size ; hash++)
504                 {
505                     int64_t hf = Hf [hash] ;
506                     int64_t h = (hf >> 2) ;     // empty (0), or a 1-based
507                     int64_t f = (hf & 3) ;      // 0 if empty or 1 if occupied
508                     if (f == 1) ASSERT (h >= 1 && h <= cvlen) ;
509                     ASSERT (hf == 0 || f == 1) ;
510                     if (f == 1) mjcount2++ ;
511                 }
512                 ASSERT (mjcount == mjcount2) ;
513             }
514         }
515     }
516     #endif
517     #endif
518 }
519 
520