1 //------------------------------------------------------------------------------
2 // GB_subassign_methods.h: definitions for GB_subassign methods
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // macros for the construction of the GB_subassign methods
11 
12 #ifndef GB_SUBASSIGN_METHODS_H
13 #define GB_SUBASSIGN_METHODS_H
14 #include "GB_add.h"
15 #include "GB_ij.h"
16 #include "GB_Pending.h"
17 #include "GB_unused.h"
18 #include "GB_subassign_IxJ_slice.h"
19 
20 //------------------------------------------------------------------------------
21 // free workspace
22 //------------------------------------------------------------------------------
23 
24 #ifndef GB_FREE_WORK
25 #define GB_FREE_WORK ;
26 #endif
27 
28 #undef  GB_FREE_ALL
29 #define GB_FREE_ALL                             \
30 {                                               \
31     GB_FREE_WORK ;                              \
32     GB_WERK_POP (Npending, int64_t) ;           \
33     GB_FREE_WERK (&TaskList, TaskList_size) ;   \
34     GB_FREE (&Zh, Zh_size) ;                    \
35     GB_FREE_WERK (&Z_to_X, Z_to_X_size) ;       \
36     GB_FREE_WERK (&Z_to_S, Z_to_S_size) ;       \
37     GB_FREE_WERK (&Z_to_A, Z_to_A_size) ;       \
38     GB_FREE_WERK (&Z_to_M, Z_to_M_size) ;       \
39     GB_phbix_free (S);                        \
40 }
41 
42 //------------------------------------------------------------------------------
43 // GB_EMPTY_TASKLIST: declare an empty TaskList
44 //------------------------------------------------------------------------------
45 
46 #define GB_EMPTY_TASKLIST                                                   \
47     GrB_Info info ;                                                         \
48     int taskid, ntasks = 0, nthreads ;                                      \
49     GB_task_struct *TaskList = NULL ; size_t TaskList_size = 0 ;            \
50     GB_WERK_DECLARE (Npending, int64_t) ;                                   \
51     int64_t *restrict Zh     = NULL ; size_t Zh_size = 0 ;               \
52     int64_t *restrict Z_to_X = NULL ; size_t Z_to_X_size = 0 ;           \
53     int64_t *restrict Z_to_S = NULL ; size_t Z_to_S_size = 0 ;           \
54     int64_t *restrict Z_to_A = NULL ; size_t Z_to_A_size = 0 ;           \
55     int64_t *restrict Z_to_M = NULL ; size_t Z_to_M_size = 0 ;           \
56     struct GB_Matrix_opaque S_header ;                                      \
57     GrB_Matrix S = GB_clear_static_header (&S_header) ;
58 
59 //------------------------------------------------------------------------------
60 // GB_GET_C: get the C matrix (cannot be bitmap)
61 //------------------------------------------------------------------------------
62 
63 // C cannot be aliased with M or A.
64 
65 #define GB_GET_C                                                            \
66     ASSERT_MATRIX_OK (C, "C for subassign kernel", GB0) ;                   \
67     ASSERT (!GB_IS_BITMAP (C)) ;                                            \
68     int64_t *restrict Ci = C->i ;                                        \
69     GB_void *restrict Cx = (GB_void *) C->x ;                            \
70     const size_t csize = C->type->size ;                                    \
71     const GB_Type_code ccode = C->type->code ;                              \
72     const int64_t cvdim = C->vdim ;                                         \
73     const int64_t Cvlen = C->vlen ;                                         \
74     int64_t nzombies = C->nzombies ;                                        \
75     const bool is_matrix = (cvdim > 1) ;
76 
77 //------------------------------------------------------------------------------
78 // GB_GET_MASK: get the mask matrix M
79 //------------------------------------------------------------------------------
80 
81 // M and A can be aliased, but both are const.
82 
83 #define GB_GET_MASK                                                         \
84     ASSERT_MATRIX_OK (M, "M for assign", GB0) ;                             \
85     const int64_t *Mp = M->p ;                                              \
86     const int64_t *Mh = M->h ;                                              \
87     const int8_t  *Mb = M->b ;                                              \
88     const int64_t *Mi = M->i ;                                              \
89     const GB_void *Mx = (GB_void *) (Mask_struct ? NULL : (M->x)) ;         \
90     const size_t msize = M->type->size ;                                    \
91     const size_t Mvlen = M->vlen ;                                          \
92     const int64_t Mnvec = M->nvec ;                                         \
93     const bool M_is_hyper = GB_IS_HYPERSPARSE (M) ;                         \
94     const bool M_is_bitmap = GB_IS_BITMAP (M) ;
95 
96 //------------------------------------------------------------------------------
97 // GB_GET_ACCUM: get the accumulator op and its related typecasting functions
98 //------------------------------------------------------------------------------
99 
100 #define GB_GET_ACCUM                                                        \
101     ASSERT_BINARYOP_OK (accum, "accum for assign", GB0) ;                   \
102     ASSERT (!GB_OP_IS_POSITIONAL (accum)) ;                                 \
103     const GxB_binary_function faccum = accum->function ;                    \
104     const GB_cast_function                                                  \
105         cast_A_to_Y = GB_cast_factory (accum->ytype->code, acode),          \
106         cast_C_to_X = GB_cast_factory (accum->xtype->code, ccode),          \
107         cast_Z_to_C = GB_cast_factory (ccode, accum->ztype->code) ;         \
108     const size_t xsize = accum->xtype->size ;                               \
109     const size_t ysize = accum->ytype->size ;                               \
110     const size_t zsize = accum->ztype->size ;
111 
112 //------------------------------------------------------------------------------
113 // GB_GET_A: get the A matrix
114 //------------------------------------------------------------------------------
115 
116 #define GB_GET_A                                                            \
117     ASSERT_MATRIX_OK (A, "A for assign", GB0) ;                             \
118     const GrB_Type atype = A->type ;                                        \
119     const size_t asize = atype->size ;                                      \
120     const GB_Type_code acode = atype->code ;                                \
121     const int64_t *Ap = A->p ;                                              \
122     const int8_t  *Ab = A->b ;                                              \
123     const int64_t *Ai = A->i ;                                              \
124     const GB_void *Ax = (GB_void *) A->x ;                                  \
125     const GB_cast_function cast_A_to_C = GB_cast_factory (ccode, acode) ;   \
126     const int64_t Avlen = A->vlen ;                                         \
127     const bool A_is_bitmap = GB_IS_BITMAP (A) ;
128 
129 //------------------------------------------------------------------------------
130 // GB_GET_SCALAR: get the scalar
131 //------------------------------------------------------------------------------
132 
133 #define GB_GET_SCALAR                                                       \
134     ASSERT_TYPE_OK (atype, "atype for assign", GB0) ;                       \
135     const size_t asize = atype->size ;                                      \
136     const GB_Type_code acode = atype->code ;                                \
137     const GB_cast_function cast_A_to_C = GB_cast_factory (ccode, acode) ;   \
138     GB_void cwork [GB_VLA(csize)] ;                                         \
139     cast_A_to_C (cwork, scalar, asize) ;                                    \
140 
141 //------------------------------------------------------------------------------
142 // GB_GET_ACCUM_SCALAR: get the scalar and the accumulator
143 //------------------------------------------------------------------------------
144 
145 #define GB_GET_ACCUM_SCALAR                                                 \
146     GB_GET_SCALAR ;                                                         \
147     GB_GET_ACCUM ;                                                          \
148     GB_void ywork [GB_VLA(ysize)] ;                                         \
149     cast_A_to_Y (ywork, scalar, asize) ;
150 
151 //------------------------------------------------------------------------------
152 // GB_GET_S: get the S matrix
153 //------------------------------------------------------------------------------
154 
155 // S is never aliased with any other matrix.
156 // FUTURE: S->p could be C->p and S->x NULL if I and J are (:,:)
157 
158 #define GB_GET_S                                                            \
159     ASSERT_MATRIX_OK (S, "S extraction", GB0) ;                             \
160     const int64_t *restrict Sp = S->p ;                                  \
161     const int64_t *restrict Sh = S->h ;                                  \
162     const int64_t *restrict Si = S->i ;                                  \
163     const int64_t *restrict Sx = (int64_t *) S->x ;                      \
164     const int64_t Svlen = S->vlen ;                                         \
165     const int64_t Snvec = S->nvec ;                                         \
166     const bool S_is_hyper = GB_IS_HYPERSPARSE (S) ;
167 
168 //------------------------------------------------------------------------------
169 // basic actions
170 //------------------------------------------------------------------------------
171 
172     //--------------------------------------------------------------------------
173     // S_Extraction: finding C(iC,jC) via lookup through S=C(I,J)
174     //--------------------------------------------------------------------------
175 
176     // S is the symbolic pattern of the submatrix S = C(I,J).  The "numerical"
177     // value (held in S->x) of an entry S(i,j) is not a value, but a pointer
178     // back into C where the corresponding entry C(iC,jC) can be found, where
179     // iC = I [i] and jC = J [j].
180 
181     // The following macro performs the lookup.  Given a pointer pS into a
182     // column S(:,j), it finds the entry C(iC,jC), and also determines if the
183     // C(iC,jC) entry is a zombie.  The column indices j and jC are implicit.
184 
185     // Used for Methods 00 to 04, 06s, and 09 to 20, all of which use S.
186 
187     #define GB_C_S_LOOKUP                                                   \
188         int64_t pC = Sx [pS] ;                                              \
189         int64_t iC = GBI (Ci, pC, Cvlen) ;                                  \
190         bool is_zombie = GB_IS_ZOMBIE (iC) ;                                \
191         if (is_zombie) iC = GB_FLIP (iC) ;
192 
193     //--------------------------------------------------------------------------
194     // GB_VECTOR_LOOKUP
195     //--------------------------------------------------------------------------
196 
197     // Find pX_start and pX_end for the vector X (:,j)
198 
199     #define GB_VECTOR_LOOKUP(pX_start,pX_end,X,j)                           \
200     {                                                                       \
201         int64_t pleft = 0, pright = X ## nvec-1 ;                           \
202         GB_lookup (X ## _is_hyper, X ## h, X ## p, X ## vlen, &pleft,       \
203             pright, j, &pX_start, &pX_end) ;                                \
204     }
205 
206     //--------------------------------------------------------------------------
207     // get the C(:,jC) vector where jC = J [j]
208     //--------------------------------------------------------------------------
209 
210     // C may be standard sparse, or hypersparse
211     // time: O(1) if standard, O(log(Cnvec)) if hyper
212 
213     // This used for GB_subassign_one_slice and GB_subassign_emult_slice,
214     // which compute the parallel schedule for Methods 05, 06n, 07, and 08n.
215 
216     #define GB_LOOKUP_jC                                                    \
217         /* lookup jC in C */                                                \
218         /* jC = J [j] ; or J is ":" or jbegin:jend or jbegin:jinc:jend */   \
219         jC = GB_ijlist (J, j, Jkind, Jcolon) ;                              \
220         int64_t pC_start, pC_end ;                                          \
221         GB_VECTOR_LOOKUP (pC_start, pC_end, C, jC) ;
222 
223     //--------------------------------------------------------------------------
224     // C(:,jC) is dense: iC = I [iA], and then look up C(iC,jC)
225     //--------------------------------------------------------------------------
226 
227     // C(:,jC) is dense, and thus can be accessed with a O(1)-time lookup
228     // with the index iC, where the index iC comes from I [iA] or via a
229     // colon notation for I.
230 
231     // This used for Methods 05, 06n, 07, and 08n, which do not use S.
232 
233     #define GB_iC_DENSE_LOOKUP                                              \
234         int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;                     \
235         int64_t pC = pC_start + iC ;                                        \
236         bool is_zombie = (Ci != NULL) && GB_IS_ZOMBIE (Ci [pC]) ;           \
237         ASSERT (GB_IMPLIES (Ci != NULL, GB_UNFLIP (Ci [pC]) == iC)) ;
238 
239     //--------------------------------------------------------------------------
240     // get C(iC,jC) via binary search of C(:,jC)
241     //--------------------------------------------------------------------------
242 
243     // This used for Methods 05, 06n, 07, and 08n, which do not use S.
244 
245     // New zombies may be introduced into C during the parallel computation.
246     // No coarse task shares the same C(:,jC) vector, so no race condition can
247     // occur.  Fine tasks do share the same C(:,jC) vector, but each fine task
248     // is given a unique range of pC_start:pC_end-1 to search.  Thus, no binary
249     // search of any fine tasks conflict with each other.
250 
251     #define GB_iC_BINARY_SEARCH                                             \
252         int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;                     \
253         int64_t pC = pC_start ;                                             \
254         int64_t pright = pC_end - 1 ;                                       \
255         bool cij_found, is_zombie ;                                         \
256         GB_BINARY_SEARCH_ZOMBIE (iC, Ci, pC, pright, cij_found, zorig,      \
257             is_zombie) ;
258 
259     //--------------------------------------------------------------------------
260     // for a 2-way or 3-way merge
261     //--------------------------------------------------------------------------
262 
263     // An entry S(i,j), A(i,j), or M(i,j) has been processed;
264     // move to the next one.
265     #define GB_NEXT(X) (p ## X)++ ;
266 
267     //--------------------------------------------------------------------------
268     // basic operations
269     //--------------------------------------------------------------------------
270 
271     #define GB_COPY_scalar_to_C                                             \
272     {                                                                       \
273         /* C(iC,jC) = scalar, already typecasted into cwork      */         \
274         memcpy (Cx +(pC*csize), cwork, csize) ;                             \
275     }
276 
277     #define GB_COPY_aij_to_C                                                \
278     {                                                                       \
279         /* C(iC,jC) = A(i,j), with typecasting                   */         \
280         cast_A_to_C (Cx +(pC*csize), Ax +(pA*asize), csize) ;               \
281     }
282 
283     #define GB_COPY_aij_to_ywork                                            \
284     {                                                                       \
285         /* ywork = A(i,j), with typecasting                      */         \
286         cast_A_to_Y (ywork, Ax +(pA*asize), asize) ;                        \
287     }
288 
289     #define GB_ACCUMULATE                                                   \
290     {                                                                       \
291         /* C(iC,jC) = accum (C(iC,jC), ywork)                    */         \
292         GB_void xwork [GB_VLA(xsize)] ;                                     \
293         cast_C_to_X (xwork, Cx +(pC*csize), csize) ;                        \
294         GB_void zwork [GB_VLA(zsize)] ;                                     \
295         faccum (zwork, xwork, ywork) ;                                      \
296         cast_Z_to_C (Cx +(pC*csize), zwork, csize) ;                        \
297     }                                                                       \
298 
299     #define GB_DELETE                                                       \
300     {                                                                       \
301         /* turn C(iC,jC) into a zombie */                                   \
302         ASSERT (!GB_IS_FULL (C)) ;                                          \
303         task_nzombies++ ;                                                   \
304         Ci [pC] = GB_FLIP (iC) ;                                            \
305     }
306 
307     #define GB_UNDELETE                                                     \
308     {                                                                       \
309         /* bring a zombie C(iC,jC) back to life;                 */         \
310         /* the value of C(iC,jC) must also be assigned.          */         \
311         ASSERT (!GB_IS_FULL (C)) ;                                          \
312         Ci [pC] = iC ;                                                      \
313         task_nzombies-- ;                                                   \
314     }
315 
316     //--------------------------------------------------------------------------
317     // C(I,J)<M> = accum (C(I,J),A): consider all cases
318     //--------------------------------------------------------------------------
319 
320         // The matrix C may have pending tuples and zombies:
321 
322         // (1) pending tuples:  this is a list of pending updates held as a set
323         // of (i,j,x) tuples.  They had been added to the list via a prior
324         // GrB_setElement or GxB_subassign.  No operator needs to be applied to
325         // them; the implied operator is SECOND, for both GrB_setElement and
326         // GxB_subassign, regardless of whether or not an accum operator is
327         // present.  Pending tuples are inserted if and only if the
328         // corresponding entry C(i,j) does not exist, and in that case no accum
329         // operator is applied.
330 
331         //      The GrB_setElement method (C(i,j) = x) is same as GxB_subassign
332         //      with: accum is SECOND, C not replaced, no mask M, mask not
333         //      complemented.  If GrB_setElement needs to insert its update as
334         //      a pending tuple, then it will always be compatible with all
335         //      pending tuples inserted here, by GxB_subassign.
336 
337         // (2) zombie entries.  These are entries that are still present in the
338         // pattern but marked for deletion (via GB_FLIP(i) for the row index).
339 
340         // For the current GxB_subassign, there are 16 cases to handle,
341         // all combinations of the following options:
342 
343         //      accum is NULL, accum is not NULL
344         //      C is not replaced, C is replaced
345         //      no mask, mask is present
346         //      mask is not complemented, mask is complemented
347 
348         // Complementing an empty mask:  This does not require the matrix A
349         // at all so it is handled as a special case.  It corresponds to
350         // the GB_RETURN_IF_QUICK_MASK option in other GraphBLAS operations.
351         // Thus only 12 cases are considered in the tables below:
352 
353         //      These 4 cases are listed in Four Tables below:
354         //      2 cases: accum is NULL, accum is not NULL
355         //      2 cases: C is not replaced, C is replaced
356 
357         //      3 cases: no mask, M is present and not complemented,
358         //               and M is present and complemented.  If there is no
359         //               mask, then M(i,j)=1 for all (i,j).  These 3 cases
360         //               are the columns of each of the Four Tables.
361 
362         // Each of these 12 cases can encounter up to 12 combinations of
363         // entries in C, A, and M (6 if no mask M is present).  The left
364         // column of the Four Tables below consider all 12 combinations for all
365         // (i,j) in the cross product IxJ:
366 
367         //      C(I(i),J(j)) present, zombie, or not there: C, X, or '.'
368         //      A(i,j) present or not, labeled 'A' or '.' below
369         //      M(i,j) = 1 or 0 (but only if M is present)
370 
371         //      These 12 cases become the left columns as listed below.
372         //      The zombie cases are handled a sub-case for "C present:
373         //      regular entry or zombie".  The acronyms below use "D" for
374         //      "dot", meaning the entry (C or A) is not present.
375 
376         //      [ C A 1 ]   C_A_1: both C and A present, M=1
377         //      [ X A 1 ]   C_A_1: both C and A present, M=1, C is a zombie
378         //      [ . A 1 ]   D_A_1: C not present, A present, M=1
379 
380         //      [ C . 1 ]   C_D_1: C present, A not present, M=1
381         //      [ X . 1 ]   C_D_1: C present, A not present, M=1, C a zombie
382         //      [ . . 1 ]          only M=1 present, but nothing to do
383 
384         //      [ C A 0 ]   C_A_0: both C and A present, M=0
385         //      [ X A 0 ]   C_A_0: both C and A present, M=0, C is a zombie
386         //      [ . A 0 ]          C not present, A present, M=0,
387         //                              nothing to do
388 
389         //      [ C . 0 ]   C_D_0: C present, A not present, M=1
390         //      [ X . 0 ]   C_D_0: C present, A not present, M=1, C a zombie
391         //      [ . . 0 ]          only M=0 present, but nothing to do
392 
393         // Legend for action taken in the right half of the table:
394 
395         //      delete   live entry C(I(i),J(j)) marked for deletion (zombie)
396         //      =A       live entry C(I(i),J(j)) is overwritten with new value
397         //      =C+A     live entry C(I(i),J(j)) is modified with accum(c,a)
398         //      C        live entry C(I(i),J(j)) is unchanged
399 
400         //      undelete entry C(I(i),J(j)) a zombie, bring back with A(i,j)
401         //      X        entry C(I(i),J(j)) a zombie, no change, still zombie
402 
403         //      insert   entry C(I(i),J(j)) not present, add pending tuple
404         //      .        entry C(I(i),J(j)) not present, no change
405 
406         //      blank    the table is left blank where the the event cannot
407         //               occur:  GxB_subassign with no M cannot have
408         //               M(i,j)=0, and GrB_setElement does not have the M
409         //               column
410 
411         //----------------------------------------------------------------------
412         // GrB_setElement and the Four Tables for GxB_subassign:
413         //----------------------------------------------------------------------
414 
415             //------------------------------------------------------------
416             // GrB_setElement:  no mask
417             //------------------------------------------------------------
418 
419             // C A 1        =A                               |
420             // X A 1        undelete                         |
421             // . A 1        insert                           |
422 
423             //          GrB_setElement acts exactly like GxB_subassign with the
424             //          implicit GrB_SECOND_Ctype operator, I=i, J=j, and a
425             //          1-by-1 matrix A containing a single entry (not an
426             //          implicit entry; there is no "." for A).  That is,
427             //          nnz(A)==1.  No mask, and the descriptor is the default;
428             //          C_replace effectively false, mask not complemented, A
429             //          not transposed.  As a result, GrB_setElement can be
430             //          freely mixed with calls to GxB_subassign with C_replace
431             //          effectively false and with the identical
432             //          GrB_SECOND_Ctype operator.  These calls to
433             //          GxB_subassign can use the mask, either complemented or
434             //          not, and they can transpose A if desired, and there is
435             //          no restriction on I and J.  The matrix A can be any
436             //          type and the type of A can change from call to call.
437 
438             //------------------------------------------------------------
439             // NO accum  |  no mask     mask        mask
440             // NO repl   |              not compl   compl
441             //------------------------------------------------------------
442 
443             // C A 1        =A          =A          C        |
444             // X A 1        undelete    undelete    X        |
445             // . A 1        insert      insert      .        |
446 
447             // C . 1        delete      delete      C        |
448             // X . 1        X           X           X        |
449             // . . 1        .           .           .        |
450 
451             // C A 0                    C           =A       |
452             // X A 0                    X           undelete |
453             // . A 0                    .           insert   |
454 
455             // C . 0                    C           delete   |
456             // X . 0                    X           X        |
457             // . . 0                    .           .        |
458 
459             //          S_Extraction method works well: first extract pattern
460             //          of S=C(I,J). Then examine all of A, M, S, and update
461             //          C(I,J).  The method needs to examine all entries in
462             //          in C(I,J) to delete them if A is not present, so
463             //          S=C(I,J) is not costly.
464 
465             //------------------------------------------------------------
466             // NO accum  |  no mask     mask        mask
467             // WITH repl |              not compl   compl
468             //------------------------------------------------------------
469 
470             // C A 1        =A          =A          delete   |
471             // X A 1        undelete    undelete    X        |
472             // . A 1        insert      insert      .        |
473 
474             // C . 1        delete      delete      delete   |
475             // X . 1        X           X           X        |
476             // . . 1        .           .           .        |
477 
478             // C A 0                    delete      =A       |
479             // X A 0                    X           undelete |
480             // . A 0                    .           insert   |
481 
482             // C . 0                    delete      delete   |
483             // X . 0                    X           X        |
484             // . . 0                    .           .        |
485 
486             //          S_Extraction method works well, since all of C(I,J)
487             //          needs to be traversed, S=C(I,J) is reasonable to
488             //          compute.
489 
490             //          With no accum: If there is no M and M is not
491             //          complemented, then C_replace is irrelevant,  Whether
492             //          true or false, the results in the two tables
493             //          above are the same.
494 
495             //------------------------------------------------------------
496             // ACCUM     |  no mask     mask        mask
497             // NO repl   |              not compl   compl
498             //------------------------------------------------------------
499 
500             // C A 1        =C+A        =C+A        C        |
501             // X A 1        undelete    undelete    X        |
502             // . A 1        insert      insert      .        |
503 
504             // C . 1        C           C           C        |
505             // X . 1        X           X           X        |
506             // . . 1        .           .           .        |
507 
508             // C A 0                    C           =C+A     |
509             // X A 0                    X           undelete |
510             // . A 0                    .           insert   |
511 
512             // C . 0                    C           C        |
513             // X . 0                    X           X        |
514             // . . 0                    .           .        |
515 
516             //          With ACCUM but NO C_replace: This method only needs to
517             //          examine entries in A.  It does not need to examine all
518             //          entries in C(I,J), nor all entries in M.  Entries in
519             //          C but in not A remain unchanged.  This is like an
520             //          extended GrB_setElement.  No entries in C can be
521             //          deleted.  All other methods must examine all of C(I,J).
522 
523             //          Without S_Extraction: C(:,J) or M have many entries
524             //          compared with A, do not extract S=C(I,J); use
525             //          binary search instead.  Otherwise, use the same
526             //          S_Extraction method as the other 3 cases.
527 
528             //          S_Extraction method: if nnz(C(:,j)) + nnz(M) is
529             //          similar to nnz(A) then the S_Extraction method would
530             //          work well.
531 
532             //------------------------------------------------------------
533             // ACCUM     |  no mask     mask        mask
534             // WITH repl |              not compl   compl
535             //------------------------------------------------------------
536 
537             // C A 1        =C+A        =C+A        delete   |
538             // X A 1        undelete    undelete    X        |
539             // . A 1        insert      insert      .        |
540 
541             // C . 1        C           C           delete   |
542             // X . 1        X           X           X        |
543             // . . 1        .           .           .        |
544 
545             // C A 0                    delete      =C+A     |
546             // X A 0                    X           undelete |
547             // . A 0                    .           insert   |
548 
549             // C . 0                    delete      C        |
550             // X . 0                    X           X        |
551             // . . 0                    .           .        |
552 
553             //          S_Extraction method works well since all entries
554             //          in C(I,J) must be examined.
555 
556             //          With accum: If there is no M and M is not
557             //          complemented, then C_replace is irrelavant,  Whether
558             //          true or false, the results in the two tables
559             //          above are the same.
560 
561             //          This condition on C_replace holds with our without
562             //          accum.  Thus, if there is no M, and M is
563             //          not complemented, the C_replace can be set to false.
564 
565             //------------------------------------------------------------
566 
567             // ^^^^^ legend for left columns above:
568             // C        prior entry C(I(i),J(j)) exists
569             // X        prior entry C(I(i),J(j)) exists but is a zombie
570             // .        no prior entry C(I(i),J(j))
571             //   A      A(i,j) exists
572             //   .      A(i,j) does not exist
573             //     1    M(i,j)=1, assuming M exists (or if implicit)
574             //     0    M(i,j)=0, only if M exists
575 
576         //----------------------------------------------------------------------
577         // Actions in the Four Tables above
578         //----------------------------------------------------------------------
579 
580             // Each entry in the Four Tables above are now explained in more
581             // detail, describing what must be done in each case.  Zombies and
582             // pending tuples are disjoint; they do not mix.  Zombies are IN
583             // the pattern but pending tuples are updates that are NOT in the
584             // pattern.  That is why a separate list of pending tuples must be
585             // kept; there is no place for them in the pattern.  Zombies, on
586             // the other hand, are entries IN the pattern that have been
587             // marked for deletion.
588 
589             //--------------------------------
590             // For entries NOT in the pattern:
591             //--------------------------------
592 
593             // They can have pending tuples, and can acquire more.  No zombies.
594 
595             //      ( insert ):
596 
597             //          An entry C(I(i),J(j)) is NOT in the pattern, but its
598             //          value must be modified.  This is an insertion, like
599             //          GrB_setElement, and the insertion is added as a pending
600             //          tuple for C(I(i),J(j)).  There can be many insertions
601             //          to the same element, each in the list of pending
602             //          tuples, in order of their insertion.  Eventually these
603             //          pending tuples must be assembled into C(I(i),J(j)) in
604             //          the right order using the implied SECOND operator.
605 
606             //      ( . ):
607 
608             //          no change.  C(I(i),J(j)) not in the pattern, and not
609             //          modified.  This C(I(i),J(j)) position could have
610             //          pending tuples, in the list of pending tuples, but none
611             //          of them are changed.  If C_replace is true then those
612             //          pending tuples would have to be discarded, but that
613             //          condition will not occur because C_replace=true forces
614             //          all prior tuples to the matrix to be assembled.
615 
616             //--------------------------------
617             // For entries IN the pattern:
618             //--------------------------------
619 
620             // They have no pending tuples, and acquire none.  It can be
621             // zombie, can become a zombie, or a zombie can come back to life.
622 
623             //      ( delete ):
624 
625             //          C(I(i),J(j)) becomes a zombie, by flipping its row
626             //          index via the GB_FLIP function.
627 
628             //      ( undelete ):
629 
630             //          C(I(i),J(j)) = A(i,j) was a zombie and is no longer a
631             //          zombie.  Its row index is restored with GB_FLIP.
632 
633             //      ( X ):
634 
635             //          C(I(i),J(j)) was a zombie, and still is a zombie.
636             //          row index is < 0, and actual index is GB_FLIP(I(i))
637 
638             //      ( C ):
639 
640             //          no change; C(I(i),J(j)) already an entry, and its value
641             //          doesn't change.
642 
643             //      ( =A ):
644 
645             //          C(I(i),J(j)) = A(i,j), value gets overwritten.
646 
647             //      ( =C+A ):
648 
649             //          C(I(i),J(j)) = accum (C(I(i),J(j)), A(i,j))
650             //          The existing balue is modified via the accumulator.
651 
652 
653     //--------------------------------------------------------------------------
654     // handling each action
655     //--------------------------------------------------------------------------
656 
657         // Each of the 12 cases are handled by the following actions,
658         // implemented as macros.  The Four Tables are re-sorted below,
659         // and folded together according to their left column.
660 
661         // Once the M(i,j) entry is extracted, all GB_subassign_* functions
662         // explicitly complement the scalar value if Mask_comp is true, before
663         // using these action functions.  For the [no mask] case, M(i,j)=1.
664         // Thus, only the middle column needs to be considered by each action;
665         // the action will handle all three columns at the same time.  All
666         // three columns remain in the re-sorted tables below for reference.
667 
668         //----------------------------------------------------------------------
669         // ----[C A 1] or [X A 1]: C and A present, M=1
670         //----------------------------------------------------------------------
671 
672             //------------------------------------------------
673             //           |  no mask     mask        mask
674             //           |              not compl   compl
675             //------------------------------------------------
676             // C A 1        =A          =A          C        | no accum,no Crepl
677             // C A 1        =A          =A          delete   | no accum,Crepl
678             // C A 1        =C+A        =C+A        C        | accum, no Crepl
679             // C A 1        =C+A        =C+A        delete   | accum, Crepl
680 
681             // X A 1        undelete    undelete    X        | no accum,no Crepl
682             // X A 1        undelete    undelete    X        | no accum,Crepl
683             // X A 1        undelete    undelete    X        | accum, no Crepl
684             // X A 1        undelete    undelete    X        | accum, Crepl
685 
686             // Both C(I(i),J(j)) == S(i,j) and A(i,j) are present, and mij = 1.
687             // C(I(i),J(i)) is updated with the entry A(i,j).
688             // C_replace has no impact on this action.
689 
690             // [X A 1] matrix case
691             #define GB_X_A_1_matrix                                         \
692             {                                                               \
693                 /* ----[X A 1]                                           */ \
694                 /* action: ( undelete ): bring a zombie back to life     */ \
695                 GB_UNDELETE ;                                               \
696                 GB_COPY_aij_to_C ;                                          \
697             }
698 
699             // [X A 1] scalar case
700             #define GB_X_A_1_scalar                                         \
701             {                                                               \
702                 /* ----[X A 1]                                           */ \
703                 /* action: ( undelete ): bring a zombie back to life     */ \
704                 GB_UNDELETE ;                                               \
705                 GB_COPY_scalar_to_C ;                                       \
706             }
707 
708             // [C A 1] matrix case when accum is present
709             #define GB_withaccum_C_A_1_matrix                               \
710             {                                                               \
711                 if (is_zombie)                                              \
712                 {                                                           \
713                     /* ----[X A 1]                                       */ \
714                     /* action: ( undelete ): bring a zombie back to life */ \
715                     GB_X_A_1_matrix ;                                       \
716                 }                                                           \
717                 else                                                        \
718                 {                                                           \
719                     /* ----[C A 1] with accum                            */ \
720                     /* action: ( =C+A ): apply the accumulator           */ \
721                     GB_void ywork [GB_VLA(ysize)] ;                         \
722                     GB_COPY_aij_to_ywork ;                                  \
723                     GB_ACCUMULATE ;                                         \
724                 }                                                           \
725             }
726 
727             // [C A 1] scalar case when accum is present
728             #define GB_withaccum_C_A_1_scalar                               \
729             {                                                               \
730                 if (is_zombie)                                              \
731                 {                                                           \
732                     /* ----[X A 1]                                       */ \
733                     /* action: ( undelete ): bring a zombie back to life */ \
734                     GB_X_A_1_scalar ;                                       \
735                 }                                                           \
736                 else                                                        \
737                 {                                                           \
738                     /* ----[C A 1] with accum, scalar expansion          */ \
739                     /* action: ( =C+A ): apply the accumulator           */ \
740                     GB_ACCUMULATE ;                                         \
741                 }                                                           \
742             }
743 
744             // [C A 1] matrix case when no accum is present
745             #define GB_noaccum_C_A_1_matrix                                 \
746             {                                                               \
747                 if (is_zombie)                                              \
748                 {                                                           \
749                     /* ----[X A 1]                                       */ \
750                     /* action: ( undelete ): bring a zombie back to life */ \
751                     GB_X_A_1_matrix ;                                       \
752                 }                                                           \
753                 else                                                        \
754                 {                                                           \
755                     /* ----[C A 1] no accum, scalar expansion            */ \
756                     /* action: ( =A ): copy A into C                     */ \
757                     GB_COPY_aij_to_C ;                                      \
758                 }                                                           \
759             }
760 
761             // [C A 1] scalar case when no accum is present
762             #define GB_noaccum_C_A_1_scalar                                 \
763             {                                                               \
764                 if (is_zombie)                                              \
765                 {                                                           \
766                     /* ----[X A 1]                                       */ \
767                     /* action: ( undelete ): bring a zombie back to life */ \
768                     GB_X_A_1_scalar ;                                       \
769                 }                                                           \
770                 else                                                        \
771                 {                                                           \
772                     /* ----[C A 1] no accum, scalar expansion            */ \
773                     /* action: ( =A ): copy A into C                     */ \
774                     GB_COPY_scalar_to_C ;                                   \
775                 }                                                           \
776             }
777 
778         //----------------------------------------------------------------------
779         // ----[. A 1]: C not present, A present, M=1
780         //----------------------------------------------------------------------
781 
782             //------------------------------------------------
783             //           |  no mask     mask        mask
784             //           |              not compl   compl
785             //------------------------------------------------
786             // . A 1        insert      insert      .        | no accum,no Crepl
787             // . A 1        insert      insert      .        | no accum,Crepl
788             // . A 1        insert      insert      .        | accum, no Crepl
789             // . A 1        insert      insert      .        | accum, Crepl
790 
791             // C(I(i),J(j)) == S (i,j) is not present, A (i,j) is present, and
792             // mij = 1. The mask M allows C to be written, but no entry present
793             // in C (neither a live entry nor a zombie).  This entry must be
794             // added to C but it doesn't fit in the pattern.  It is added as a
795             // pending tuple.  Zombies and pending tuples do not intersect.
796 
797             // If adding the pending tuple fails, C is cleared entirely.
798             // Otherwise the matrix C would be left in an incoherent partial
799             // state of computation.  It's cleaner to just free it all.
800 
801             // The action is done by GB_PENDING_INSERT in GB_Pending.h.
802 
803             #if 0
804             #define GB_D_A_1_scalar                                         \
805             {                                                               \
806                 /* ----[. A 1]                                           */ \
807                 /* action: ( insert )                                    */ \
808                 GB_PENDING_INSERT (scalar) ;                                \
809             }
810 
811             #define GB_D_A_1_matrix                                         \
812             {                                                               \
813                 /* ----[. A 1]                                           */ \
814                 /* action: ( insert )                                    */ \
815                 GB_PENDING_INSERT (Ax +(pA*asize)) ;                        \
816             }
817             #endif
818 
819         //----------------------------------------------------------------------
820         // ----[C . 1] or [X . 1]: C present, A not present, M=1
821         //----------------------------------------------------------------------
822 
823             //------------------------------------------------
824             //           |  no mask     mask        mask
825             //           |              not compl   compl
826             //------------------------------------------------
827             // C . 1        delete      delete      C        | no accum,no Crepl
828             // C . 1        delete      delete      delete   | no accum,Crepl
829             // C . 1        C           C           C        | accum, no Crepl
830             // C . 1        C           C           delete   | accum, Crepl
831 
832             // X . 1        X           X           X        | no accum,no Crepl
833             // X . 1        X           X           X        | no accum,Crepl
834             // X . 1        X           X           X        | accum, no Crepl
835             // X . 1        X           X           X        | accum, Crepl
836 
837             // C(I(i),J(j)) == S (i,j) is present, A (i,j) not is present, and
838             // mij = 1. The mask M allows C to be written, but no entry present
839             // in A.  If no accum operator is present, C becomes a zombie.
840 
841             // This condition cannot occur if A is a dense matrix,
842             // nor for scalar expansion
843 
844             // [C . 1] matrix case when no accum is present
845 
846             #if 0
847             #define GB_noaccum_C_D_1_matrix                                 \
848             {                                                               \
849                 if (is_zombie)                                              \
850                 {                                                           \
851                     /* ----[X . 1]                                       */ \
852                     /* action: ( X ): still a zombie                     */ \
853                 }                                                           \
854                 else                                                        \
855                 {                                                           \
856                     /* ----[C . 1] no accum                              */ \
857                     /* action: ( delete ): becomes a zombie              */ \
858                     GB_DELETE ;                                             \
859                 }                                                           \
860             }
861             #endif
862 
863             // The above action is done via GB_DELETE_ENTRY.
864 
865         //----------------------------------------------------------------------
866         // ----[C A 0] or [X A 0]: both C and A present but M=0
867         //----------------------------------------------------------------------
868 
869             //------------------------------------------------
870             //           |  no mask     mask        mask
871             //           |              not compl   compl
872             //------------------------------------------------
873             // C A 0                    C           =A       | no accum,no Crepl
874             // C A 0                    delete      =A       | no accum,Crepl
875             // C A 0                    C           =C+A     | accum, no Crepl
876             // C A 0                    delete      =C+A     | accum, Crepl
877 
878             // X A 0                    X           undelete | no accum,no Crepl
879             // X A 0                    X           undelete | no accum,Crepl
880             // X A 0                    X           undelete | accum, no Crepl
881             // X A 0                    X           undelete | accum, Crepl
882 
883             // Both C(I(i),J(j)) == S(i,j) and A(i,j) are present, and mij = 0.
884             // The mask prevents A being written to C, so A has no effect on
885             // the result.  If C_replace is true, however, the entry is
886             // deleted, becoming a zombie.  This case does not occur if
887             // the mask M is not present.  This action also handles the
888             // [C . 0] and [X . 0] cases; see the next section below.
889 
890             // This condition can still occur if A is dense, so if a mask M is
891             // present, entries can still be deleted from C.  As a result, the
892             // fact that A is dense cannot be exploited when the mask M is
893             // present.
894 
895             #if 0
896             #define GB_C_A_0                                                \
897             {                                                               \
898                 if (is_zombie)                                              \
899                 {                                                           \
900                     /* ----[X A 0]                                       */ \
901                     /* ----[X . 0]                                       */ \
902                     /* action: ( X ): still a zombie                     */ \
903                 }                                                           \
904                 else if (C_replace)                                         \
905                 {                                                           \
906                     /* ----[C A 0] replace                               */ \
907                     /* ----[C . 0] replace                               */ \
908                     /* action: ( delete ): becomes a zombie              */ \
909                     GB_DELETE ;                                             \
910                 }                                                           \
911                 else                                                        \
912                 {                                                           \
913                     /* ----[C A 0] no replace                            */ \
914                     /* ----[C . 0] no replace                            */ \
915                     /* action: ( C ): no change                          */ \
916                 }                                                           \
917             }
918             #endif
919 
920             // The above action is done via GB_DELETE_ENTRY.
921 
922             // The above action is very similar to C_D_1.  The only difference
923             // is how the entry C becomes a zombie.  With C_D_1, there is no
924             // entry in A, so C becomes a zombie if no accum function is used
925             // because the implicit value A(i,j) gets copied into C, causing it
926             // to become an implicit value also (deleting the entry in C).
927             // With C_A_0, the entry C is protected from any modification from
928             // A (regardless of accum or not).  However, if C_replace is true,
929             // the entry is cleared.  The mask M does not protect C from the
930             // C_replace action.
931 
932             // If C_replace is false, then the [C A 0] action does nothing.
933             // If C_replace is true, then the action becomes the following:
934 
935             #define GB_DELETE_ENTRY                                         \
936             {                                                               \
937                 if (!is_zombie)                                             \
938                 {                                                           \
939                     GB_DELETE ;                                             \
940                 }                                                           \
941             }
942 
943         //----------------------------------------------------------------------
944         // ----[C . 0] or [X . 0]: C present, A not present, M=0
945         //----------------------------------------------------------------------
946 
947             //------------------------------------------------
948             //           |  no mask     mask        mask
949             //           |              not compl   compl
950             //------------------------------------------------
951 
952             // C . 0                    C           delete   | no accum,no Crepl
953             // C . 0                    delete      delete   | no accum,Crepl
954             // C . 0                    C           C        | accum, no Crepl
955             // C . 0                    delete      C        | accum, Crepl
956 
957             // X . 0                    X           X        | no accum,no Crepl
958             // X . 0                    X           X        | no accum,Crepl
959             // X . 0                    X           X        | accum, no Crepl
960             // X . 0                    X           X        | accum, Crepl
961 
962             // C(I(i),J(j)) == S(i,j) is present, but A(i,j) is not present,
963             // and mij = 0.  Since A(i,j) has no effect on the result,
964             // this is the same as the C_A_0 action above.
965 
966             // This condition cannot occur if A is a dense matrix, nor for
967             // scalar expansion, but the existance of the entry A is not
968             // relevant.
969 
970             // If C_replace is false, then the [C D 0] action does nothing.
971             // If C_replace is true, then the action becomes GB_DELETE_ENTRY.
972 
973             #if 0
974             #define GB_C_D_0 GB_C_A_0
975             #endif
976 
977         //----------------------------------------------------------------------
978         // ----[. A 0]: C not present, A present, M=0
979         //----------------------------------------------------------------------
980 
981             // . A 0                    .           insert   | no accum,no Crepl
982             // . A 0                    .           insert   | no accum,no Crepl
983             // . A 0                    .           insert   | accum, no Crepl
984             // . A 0                    .           insert   | accum, Crepl
985 
986             // C(I(i),J(j)) == S(i,j) is not present, A(i,j) is present,
987             // but mij = 0.  The mask M prevents A from modifying C, so the
988             // A(i,j) entry is ignored.  C_replace has no effect since the
989             // entry is already cleared.  There is nothing to do.
990 
991         //----------------------------------------------------------------------
992         // ----[. . 1] and [. . 0]: no entries in C and A, M = 0 or 1
993         //----------------------------------------------------------------------
994 
995             //------------------------------------------------
996             //           |  no mask     mask        mask
997             //           |              not compl   compl
998             //------------------------------------------------
999 
1000             // . . 1        .           .           .        | no accum,no Crepl
1001             // . . 1        .           .           .        | no accum,Crepl
1002             // . . 1        .           .           .        | accum, no Crepl
1003             // . . 1        .           .           .        | accum, Crepl
1004 
1005             // . . 0        .           .           .        | no accum,no Crepl
1006             // . . 0        .           .           .        | no accum,Crepl
1007             // . . 0        .           .           .        | accum, no Crepl
1008             // . . 0        .           .           .        | accum, Crepl
1009 
1010             // Neither C(I(i),J(j)) == S(i,j) nor A(i,j) are not present,
1011             // Nothing happens.  The M(i,j) entry is present, otherwise
1012             // this (i,j) position would not be considered at all.
1013             // The M(i,j) entry has no effect.  There is nothing to do.
1014 
1015 //------------------------------------------------------------------------------
1016 // GB_subassign_symbolic: S = C(I,J)
1017 //------------------------------------------------------------------------------
1018 
1019 GrB_Info GB_subassign_symbolic  // S = C(I,J), extracting the pattern not values
1020 (
1021     // output
1022     GrB_Matrix S,               // output matrix, static header
1023     // inputs, not modified:
1024     const GrB_Matrix C,         // matrix to extract the pattern of
1025     const GrB_Index *I,         // index list for S = C(I,J), or GrB_ALL, etc.
1026     const int64_t ni,           // length of I, or special
1027     const GrB_Index *J,         // index list for S = C(I,J), or GrB_ALL, etc.
1028     const int64_t nj,           // length of J, or special
1029     const bool S_must_not_be_jumbled,   // if true, S cannot be jumbled
1030     GB_Context Context
1031 ) ;
1032 
1033 //------------------------------------------------------------------------------
1034 // GB_subassign_zombie: C(I,J) = empty ; using S
1035 //------------------------------------------------------------------------------
1036 
1037 GrB_Info GB_subassign_zombie
1038 (
1039     GrB_Matrix C,
1040     // input:
1041     const GrB_Index *I,
1042     const int64_t ni,
1043     const int64_t nI,
1044     const int Ikind,
1045     const int64_t Icolon [3],
1046     const GrB_Index *J,
1047     const int64_t nj,
1048     const int64_t nJ,
1049     const int Jkind,
1050     const int64_t Jcolon [3],
1051     GB_Context Context
1052 ) ;
1053 
1054 //------------------------------------------------------------------------------
1055 // GB_subassign_01: C(I,J) = scalar ; using S
1056 //------------------------------------------------------------------------------
1057 
1058 GrB_Info GB_subassign_01
1059 (
1060     GrB_Matrix C,
1061     // input:
1062     const GrB_Index *I,
1063     const int64_t ni,
1064     const int64_t nI,
1065     const int Ikind,
1066     const int64_t Icolon [3],
1067     const GrB_Index *J,
1068     const int64_t nj,
1069     const int64_t nJ,
1070     const int Jkind,
1071     const int64_t Jcolon [3],
1072     const void *scalar,
1073     const GrB_Type atype,
1074     GB_Context Context
1075 ) ;
1076 
1077 //------------------------------------------------------------------------------
1078 // GB_subassign_02: C(I,J) = A ; using S
1079 //------------------------------------------------------------------------------
1080 
1081 GrB_Info GB_subassign_02
1082 (
1083     GrB_Matrix C,
1084     // input:
1085     const GrB_Index *I,
1086     const int64_t ni,
1087     const int64_t nI,
1088     const int Ikind,
1089     const int64_t Icolon [3],
1090     const GrB_Index *J,
1091     const int64_t nj,
1092     const int64_t nJ,
1093     const int Jkind,
1094     const int64_t Jcolon [3],
1095     const GrB_Matrix A,
1096     GB_Context Context
1097 ) ;
1098 
1099 //------------------------------------------------------------------------------
1100 // GB_subassign_03: C(I,J) += scalar ; using S
1101 //------------------------------------------------------------------------------
1102 
1103 GrB_Info GB_subassign_03
1104 (
1105     GrB_Matrix C,
1106     // input:
1107     const GrB_Index *I,
1108     const int64_t ni,
1109     const int64_t nI,
1110     const int Ikind,
1111     const int64_t Icolon [3],
1112     const GrB_Index *J,
1113     const int64_t nj,
1114     const int64_t nJ,
1115     const int Jkind,
1116     const int64_t Jcolon [3],
1117     const GrB_BinaryOp accum,
1118     const void *scalar,
1119     const GrB_Type atype,
1120     GB_Context Context
1121 ) ;
1122 
1123 //------------------------------------------------------------------------------
1124 // GB_subassign_04: C(I,J) += A ; using S
1125 //------------------------------------------------------------------------------
1126 
1127 GrB_Info GB_subassign_04
1128 (
1129     GrB_Matrix C,
1130     // input:
1131     const GrB_Index *I,
1132     const int64_t ni,
1133     const int64_t nI,
1134     const int Ikind,
1135     const int64_t Icolon [3],
1136     const GrB_Index *J,
1137     const int64_t nj,
1138     const int64_t nJ,
1139     const int Jkind,
1140     const int64_t Jcolon [3],
1141     const GrB_BinaryOp accum,
1142     const GrB_Matrix A,
1143     GB_Context Context
1144 ) ;
1145 
1146 //------------------------------------------------------------------------------
1147 // GB_subassign_05: C(I,J)<M> = scalar ; no S
1148 //------------------------------------------------------------------------------
1149 
1150 GrB_Info GB_subassign_05
1151 (
1152     GrB_Matrix C,
1153     // input:
1154     const GrB_Index *I,
1155     const int64_t nI,
1156     const int Ikind,
1157     const int64_t Icolon [3],
1158     const GrB_Index *J,
1159     const int64_t nJ,
1160     const int Jkind,
1161     const int64_t Jcolon [3],
1162     const GrB_Matrix M,
1163     const bool Mask_struct,
1164     const void *scalar,
1165     const GrB_Type atype,
1166     GB_Context Context
1167 ) ;
1168 
1169 //------------------------------------------------------------------------------
1170 // GB_subassign_05e: C(:,:)<M,struct> = scalar ; no S, C empty
1171 //------------------------------------------------------------------------------
1172 
1173 GrB_Info GB_subassign_05e
1174 (
1175     GrB_Matrix C,
1176     // input:
1177     const GrB_Matrix M,
1178     const void *scalar,
1179     const GrB_Type atype,
1180     GB_Context Context
1181 ) ;
1182 
1183 //------------------------------------------------------------------------------
1184 // GB_subassign_05f: C(:,:)<C,struct> = scalar ; no S, C anything
1185 //------------------------------------------------------------------------------
1186 
1187 GrB_Info GB_subassign_05f
1188 (
1189     GrB_Matrix C,
1190     // input:
1191     const void *scalar,
1192     const GrB_Type atype,
1193     GB_Context Context
1194 ) ;
1195 
1196 //------------------------------------------------------------------------------
1197 // GB_subassign_06n: C(I,J)<M> = A ; no S
1198 //------------------------------------------------------------------------------
1199 
1200 GrB_Info GB_subassign_06n
1201 (
1202     GrB_Matrix C,
1203     // input:
1204     const GrB_Index *I,
1205     const int64_t nI,
1206     const int Ikind,
1207     const int64_t Icolon [3],
1208     const GrB_Index *J,
1209     const int64_t nJ,
1210     const int Jkind,
1211     const int64_t Jcolon [3],
1212     const GrB_Matrix M,
1213     const bool Mask_struct,
1214     const GrB_Matrix A,
1215     GB_Context Context
1216 ) ;
1217 
1218 //------------------------------------------------------------------------------
1219 // GB_subassign_06s_and_14: C(I,J)<M or !M> = A ; using S
1220 //------------------------------------------------------------------------------
1221 
1222 GrB_Info GB_subassign_06s_and_14
1223 (
1224     GrB_Matrix C,
1225     // input:
1226     const GrB_Index *I,
1227     const int64_t ni,
1228     const int64_t nI,
1229     const int Ikind,
1230     const int64_t Icolon [3],
1231     const GrB_Index *J,
1232     const int64_t nj,
1233     const int64_t nJ,
1234     const int Jkind,
1235     const int64_t Jcolon [3],
1236     const GrB_Matrix M,
1237     const bool Mask_struct,         // if true, use the only structure of M
1238     const bool Mask_comp,           // if true, !M, else use M
1239     const GrB_Matrix A,
1240     GB_Context Context
1241 ) ;
1242 
1243 //------------------------------------------------------------------------------
1244 // GB_subassign_07: C(I,J)<M> += scalar ; no S
1245 //------------------------------------------------------------------------------
1246 
1247 GrB_Info GB_subassign_07
1248 (
1249     GrB_Matrix C,
1250     // input:
1251     const GrB_Index *I,
1252     const int64_t nI,
1253     const int Ikind,
1254     const int64_t Icolon [3],
1255     const GrB_Index *J,
1256     const int64_t nJ,
1257     const int Jkind,
1258     const int64_t Jcolon [3],
1259     const GrB_Matrix M,
1260     const bool Mask_struct,
1261     const GrB_BinaryOp accum,
1262     const void *scalar,
1263     const GrB_Type atype,
1264     GB_Context Context
1265 ) ;
1266 
1267 //------------------------------------------------------------------------------
1268 // GB_subassign_08n: C(I,J)<M> += A ; no S
1269 //------------------------------------------------------------------------------
1270 
1271 GrB_Info GB_subassign_08n
1272 (
1273     GrB_Matrix C,
1274     // input:
1275     const GrB_Index *I,
1276     const int64_t nI,
1277     const int Ikind,
1278     const int64_t Icolon [3],
1279     const GrB_Index *J,
1280     const int64_t nJ,
1281     const int Jkind,
1282     const int64_t Jcolon [3],
1283     const GrB_Matrix M,
1284     const bool Mask_struct,
1285     const GrB_BinaryOp accum,
1286     const GrB_Matrix A,
1287     GB_Context Context
1288 ) ;
1289 
1290 //------------------------------------------------------------------------------
1291 // GB_subassign_09: C(I,J)<M,repl> = scalar ; using S
1292 //------------------------------------------------------------------------------
1293 
1294 GrB_Info GB_subassign_09
1295 (
1296     GrB_Matrix C,
1297     // input:
1298     const GrB_Index *I,
1299     const int64_t ni,
1300     const int64_t nI,
1301     const int Ikind,
1302     const int64_t Icolon [3],
1303     const GrB_Index *J,
1304     const int64_t nj,
1305     const int64_t nJ,
1306     const int Jkind,
1307     const int64_t Jcolon [3],
1308     const GrB_Matrix M,
1309     const bool Mask_struct,
1310     const void *scalar,
1311     const GrB_Type atype,
1312     GB_Context Context
1313 ) ;
1314 
1315 //------------------------------------------------------------------------------
1316 // GB_subassign_10_and_18: C(I,J)<M or !M,repl> = A ; using S
1317 //------------------------------------------------------------------------------
1318 
1319 GrB_Info GB_subassign_10_and_18
1320 (
1321     GrB_Matrix C,
1322     // input:
1323     const GrB_Index *I,
1324     const int64_t ni,
1325     const int64_t nI,
1326     const int Ikind,
1327     const int64_t Icolon [3],
1328     const GrB_Index *J,
1329     const int64_t nj,
1330     const int64_t nJ,
1331     const int Jkind,
1332     const int64_t Jcolon [3],
1333     const GrB_Matrix M,
1334     const bool Mask_struct,         // if true, use the only structure of M
1335     const bool Mask_comp,           // if true, !M, else use M
1336     const GrB_Matrix A,
1337     GB_Context Context
1338 ) ;
1339 
1340 //------------------------------------------------------------------------------
1341 // GB_subassign_11: C(I,J)<M,repl> += scalar ; using S
1342 //------------------------------------------------------------------------------
1343 
1344 GrB_Info GB_subassign_11
1345 (
1346     GrB_Matrix C,
1347     // input:
1348     const GrB_Index *I,
1349     const int64_t ni,
1350     const int64_t nI,
1351     const int Ikind,
1352     const int64_t Icolon [3],
1353     const GrB_Index *J,
1354     const int64_t nj,
1355     const int64_t nJ,
1356     const int Jkind,
1357     const int64_t Jcolon [3],
1358     const GrB_Matrix M,
1359     const bool Mask_struct,
1360     const GrB_BinaryOp accum,
1361     const void *scalar,
1362     const GrB_Type atype,
1363     GB_Context Context
1364 ) ;
1365 
1366 //------------------------------------------------------------------------------
1367 // GB_subassign_12_and_20: C(I,J)<M or !M,repl> += A ; using S
1368 //------------------------------------------------------------------------------
1369 
1370 GrB_Info GB_subassign_12_and_20
1371 (
1372     GrB_Matrix C,
1373     // input:
1374     const GrB_Index *I,
1375     const int64_t ni,
1376     const int64_t nI,
1377     const int Ikind,
1378     const int64_t Icolon [3],
1379     const GrB_Index *J,
1380     const int64_t nj,
1381     const int64_t nJ,
1382     const int Jkind,
1383     const int64_t Jcolon [3],
1384     const GrB_Matrix M,
1385     const bool Mask_struct,         // if true, use the only structure of M
1386     const bool Mask_comp,           // if true, !M, else use M
1387     const GrB_BinaryOp accum,
1388     const GrB_Matrix A,
1389     GB_Context Context
1390 ) ;
1391 
1392 //------------------------------------------------------------------------------
1393 // GB_subassign_13: C(I,J)<!M> = scalar ; using S
1394 //------------------------------------------------------------------------------
1395 
1396 GrB_Info GB_subassign_13
1397 (
1398     GrB_Matrix C,
1399     // input:
1400     const GrB_Index *I,
1401     const int64_t ni,
1402     const int64_t nI,
1403     const int Ikind,
1404     const int64_t Icolon [3],
1405     const GrB_Index *J,
1406     const int64_t nj,
1407     const int64_t nJ,
1408     const int Jkind,
1409     const int64_t Jcolon [3],
1410     const GrB_Matrix M,
1411     const bool Mask_struct,
1412     const void *scalar,
1413     const GrB_Type atype,
1414     GB_Context Context
1415 ) ;
1416 
1417 //------------------------------------------------------------------------------
1418 // GB_subassign_15: C(I,J)<!M> += scalar ; using S
1419 //------------------------------------------------------------------------------
1420 
1421 GrB_Info GB_subassign_15
1422 (
1423     GrB_Matrix C,
1424     // input:
1425     const GrB_Index *I,
1426     const int64_t ni,
1427     const int64_t nI,
1428     const int Ikind,
1429     const int64_t Icolon [3],
1430     const GrB_Index *J,
1431     const int64_t nj,
1432     const int64_t nJ,
1433     const int Jkind,
1434     const int64_t Jcolon [3],
1435     const GrB_Matrix M,
1436     const bool Mask_struct,
1437     const GrB_BinaryOp accum,
1438     const void *scalar,
1439     const GrB_Type atype,
1440     GB_Context Context
1441 ) ;
1442 
1443 //------------------------------------------------------------------------------
1444 // GB_subassign_16:  C(I,J)<!M> += A ; using S
1445 // GB_subassign_08s: C(I,J)<M> += A ; using S.  Compare with method 08n
1446 //------------------------------------------------------------------------------
1447 
1448 GrB_Info GB_subassign_08s_and_16
1449 (
1450     GrB_Matrix C,
1451     // input:
1452     const GrB_Index *I,
1453     const int64_t ni,
1454     const int64_t nI,
1455     const int Ikind,
1456     const int64_t Icolon [3],
1457     const GrB_Index *J,
1458     const int64_t nj,
1459     const int64_t nJ,
1460     const int Jkind,
1461     const int64_t Jcolon [3],
1462     const GrB_Matrix M,
1463     const bool Mask_struct,         // if true, use the only structure of M
1464     const bool Mask_comp,           // if true, !M, else use M
1465     const GrB_BinaryOp accum,
1466     const GrB_Matrix A,
1467     GB_Context Context
1468 ) ;
1469 
1470 //------------------------------------------------------------------------------
1471 // GB_subassign_17: C(I,J)<!M,repl> = scalar ; using S
1472 //------------------------------------------------------------------------------
1473 
1474 GrB_Info GB_subassign_17
1475 (
1476     GrB_Matrix C,
1477     // input:
1478     const GrB_Index *I,
1479     const int64_t ni,
1480     const int64_t nI,
1481     const int Ikind,
1482     const int64_t Icolon [3],
1483     const GrB_Index *J,
1484     const int64_t nj,
1485     const int64_t nJ,
1486     const int Jkind,
1487     const int64_t Jcolon [3],
1488     const GrB_Matrix M,
1489     const bool Mask_struct,
1490     const void *scalar,
1491     const GrB_Type atype,
1492     GB_Context Context
1493 ) ;
1494 
1495 //------------------------------------------------------------------------------
1496 // GB_subassign_19: C(I,J)<!M,repl> += scalar ; using S
1497 //------------------------------------------------------------------------------
1498 
1499 GrB_Info GB_subassign_19
1500 (
1501     GrB_Matrix C,
1502     // input:
1503     const GrB_Index *I,
1504     const int64_t ni,
1505     const int64_t nI,
1506     const int Ikind,
1507     const int64_t Icolon [3],
1508     const GrB_Index *J,
1509     const int64_t nj,
1510     const int64_t nJ,
1511     const int Jkind,
1512     const int64_t Jcolon [3],
1513     const GrB_Matrix M,
1514     const bool Mask_struct,
1515     const GrB_BinaryOp accum,
1516     const void *scalar,
1517     const GrB_Type atype,
1518     GB_Context Context
1519 ) ;
1520 
1521 //------------------------------------------------------------------------------
1522 // GB_ALLOCATE_NPENDING_WERK: allocate Npending workspace
1523 //------------------------------------------------------------------------------
1524 
1525 #define GB_ALLOCATE_NPENDING_WERK                                           \
1526     GB_WERK_PUSH (Npending, ntasks+1, int64_t) ;                            \
1527     if (Npending == NULL)                                                   \
1528     {                                                                       \
1529         GB_FREE_ALL ;                                                       \
1530         return (GrB_OUT_OF_MEMORY) ;                                        \
1531     }
1532 
1533 //------------------------------------------------------------------------------
1534 // GB_SUBASSIGN_ONE_SLICE: slice one matrix (M)
1535 //------------------------------------------------------------------------------
1536 
1537 // Methods: 05, 06n, 07.  If C is dense, it is sliced for a fine task, so that
1538 // it can do a binary search via GB_iC_BINARY_SEARCH.  But if C(:,jC) is dense,
1539 // C(:,jC) is not sliced, so the fine task must do a direct lookup via
1540 // GB_iC_DENSE_LOOKUP.  Otherwise a race condition will occur.
1541 
1542 #define GB_SUBASSIGN_ONE_SLICE(M)                                           \
1543     GB_OK (GB_subassign_one_slice (                                         \
1544         &TaskList, &TaskList_size, &ntasks, &nthreads,                      \
1545         C, I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,                      \
1546         M, Context)) ;                                                      \
1547     GB_ALLOCATE_NPENDING_WERK ;
1548 
1549 //------------------------------------------------------------------------------
1550 // GB_SUBASSIGN_TWO_SLICE: slice two matrices
1551 //------------------------------------------------------------------------------
1552 
1553 // Methods: 02, 04, 06s, 09, 10, 11, 12, 14, 16, 18, 20.
1554 
1555 // Create tasks for Z = X+S, and the mapping of Z to X and S.  The matrix X is
1556 // either A or M.  No need to examine C, since it will be accessed via S, not
1557 // via binary search.
1558 
1559 // If X is bitmap, this method is not used.  Instead, GB_SUBASSIGN_IXJ_SLICE is
1560 // used to iterate over the matrix X.
1561 
1562 #define GB_SUBASSIGN_TWO_SLICE(X,S)                                         \
1563     int Z_sparsity = GxB_SPARSE ;                                           \
1564     int64_t Znvec ;                                                         \
1565     GB_OK (GB_add_phase0 (                                                  \
1566         &Znvec, &Zh, &Zh_size, NULL, NULL, &Z_to_X, &Z_to_X_size,           \
1567         &Z_to_S, &Z_to_S_size, NULL, &Z_sparsity,                           \
1568         NULL, X, S, Context)) ;                                             \
1569     GB_OK (GB_ewise_slice (                                                 \
1570         &TaskList, &TaskList_size, &ntasks, &nthreads,                      \
1571         Znvec, Zh, NULL, Z_to_X, Z_to_S, false,                             \
1572         NULL, X, S, Context)) ;                                             \
1573     GB_ALLOCATE_NPENDING_WERK ;
1574 
1575 //------------------------------------------------------------------------------
1576 // GB_SUBASSIGN_IXJ_SLICE: slice IxJ for a scalar assignement method
1577 //------------------------------------------------------------------------------
1578 
1579 // Methods: 01, 03, 13, 15, 17, 19.  All of these methods access the C matrix
1580 // via S, not via binary search.
1581 
1582 #define GB_SUBASSIGN_IXJ_SLICE                                              \
1583     GB_OK (GB_subassign_IxJ_slice (                                         \
1584         &TaskList, &TaskList_size, &ntasks, &nthreads,                      \
1585         /* I, */ nI, /* Ikind, Icolon, J, */ nJ, /* Jkind, Jcolon, */       \
1586         Context)) ;                                                         \
1587     GB_ALLOCATE_NPENDING_WERK ;
1588 
1589 //------------------------------------------------------------------------------
1590 // GB_subassign_one_slice
1591 //------------------------------------------------------------------------------
1592 
1593 // Slice A or M into fine/coarse tasks, for GB_subassign_05, 06n, and 07
1594 
1595 GrB_Info GB_subassign_one_slice
1596 (
1597     // output:
1598     GB_task_struct **p_TaskList,    // array of structs
1599     size_t *p_TaskList_size,        // size of TaskList
1600     int *p_ntasks,                  // # of tasks constructed
1601     int *p_nthreads,                // # of threads to use
1602     // input:
1603     const GrB_Matrix C,             // output matrix C
1604     const GrB_Index *I,
1605     const int64_t nI,
1606     const int Ikind,
1607     const int64_t Icolon [3],
1608     const GrB_Index *J,
1609     const int64_t nJ,
1610     const int Jkind,
1611     const int64_t Jcolon [3],
1612     const GrB_Matrix A,             // matrix to slice (M or A)
1613     GB_Context Context
1614 ) ;
1615 
1616 //------------------------------------------------------------------------------
1617 // GB_subassign_emult_slice: slice the entries and vectors for GB_subassign_08n
1618 //------------------------------------------------------------------------------
1619 
1620 GrB_Info GB_subassign_emult_slice
1621 (
1622     // output:
1623     GB_task_struct **p_TaskList,    // array of structs, of size max_ntasks
1624     size_t *p_TaskList_size,        // size of TaskList
1625     int *p_ntasks,                  // # of tasks constructed
1626     int *p_nthreads,                // # of threads to use
1627     int64_t *p_Znvec,               // # of vectors to compute in Z
1628     const int64_t *restrict *Zh_handle,  // Zh_shallow is A->h, M->h, or NULL
1629     int64_t *restrict *Z_to_A_handle,    // Z_to_A: size Znvec, or NULL
1630     size_t *Z_to_A_size_handle,
1631     int64_t *restrict *Z_to_M_handle,    // Z_to_M: size Znvec, or NULL
1632     size_t *Z_to_M_size_handle,
1633     // input:
1634     const GrB_Matrix C,             // output matrix C
1635     const GrB_Index *I,
1636     const int64_t nI,
1637     const int Ikind,
1638     const int64_t Icolon [3],
1639     const GrB_Index *J,
1640     const int64_t nJ,
1641     const int Jkind,
1642     const int64_t Jcolon [3],
1643     const GrB_Matrix A,             // matrix to slice
1644     const GrB_Matrix M,             // matrix to slice
1645     GB_Context Context
1646 ) ;
1647 
1648 //------------------------------------------------------------------------------
1649 // GB_GET_TASK_DESCRIPTOR: get coarse/fine task descriptor
1650 //------------------------------------------------------------------------------
1651 
1652 #define GB_GET_TASK_DESCRIPTOR                                              \
1653     int64_t kfirst = TaskList [taskid].kfirst ;                             \
1654     int64_t klast  = TaskList [taskid].klast ;                              \
1655     bool fine_task = (klast == -1) ;                                        \
1656     if (fine_task)                                                          \
1657     {                                                                       \
1658         /* a fine task operates on a slice of a single vector */            \
1659         klast = kfirst ;                                                    \
1660     }                                                                       \
1661 
1662 #define GB_GET_TASK_DESCRIPTOR_PHASE1                                       \
1663     GB_GET_TASK_DESCRIPTOR ;                                                \
1664     int64_t task_nzombies = 0 ;                                             \
1665     int64_t task_pending = 0 ;
1666 
1667 //------------------------------------------------------------------------------
1668 // GB_GET_MAPPED: get the content of a vector for a coarse/fine task
1669 //------------------------------------------------------------------------------
1670 
1671 #define GB_GET_MAPPED(pX_start, pX_fini, pX, pX_end, Xp, j, k, Z_to_X, Xvlen) \
1672     int64_t pX_start = -1, pX_fini = -1 ;                                   \
1673     if (fine_task)                                                          \
1674     {                                                                       \
1675         /* A fine task operates on a slice of X(:,k) */                     \
1676         pX_start = TaskList [taskid].pX ;                                   \
1677         pX_fini  = TaskList [taskid].pX_end ;                               \
1678     }                                                                       \
1679     else                                                                    \
1680     {                                                                       \
1681         /* vectors are never sliced for a coarse task */                    \
1682         int64_t kX = (Z_to_X == NULL) ? j : Z_to_X [k] ;                    \
1683         if (kX >= 0)                                                        \
1684         {                                                                   \
1685             pX_start = GBP (Xp, kX, Xvlen) ;                                \
1686             pX_fini  = GBP (Xp, kX+1, Xvlen) ;                              \
1687         }                                                                   \
1688     }
1689 
1690 //------------------------------------------------------------------------------
1691 // GB_GET_EVEC: get the content of a vector for Method08n
1692 //------------------------------------------------------------------------------
1693 
1694 #define GB_GET_EVEC(pX_start, pX_fini, pX, pX_end, Xp, Xh, j,k,Z_to_X,Xvlen)\
1695     int64_t pX_start = -1, pX_fini = -1 ;                                   \
1696     if (fine_task)                                                          \
1697     {                                                                       \
1698         /* A fine task operates on a slice of X(:,k) */                     \
1699         pX_start = TaskList [taskid].pX ;                                   \
1700         pX_fini  = TaskList [taskid].pX_end ;                               \
1701     }                                                                       \
1702     else                                                                    \
1703     {                                                                       \
1704         /* vectors are never sliced for a coarse task */                    \
1705         int64_t kX = (Zh_shallow == Xh) ? k :                               \
1706             ((Z_to_X == NULL) ? j : Z_to_X [k]) ;                           \
1707         if (kX >= 0)                                                        \
1708         {                                                                   \
1709             pX_start = GBP (Xp, kX, Xvlen) ;                                \
1710             pX_fini  = GBP (Xp, kX+1, Xvlen) ;                              \
1711         }                                                                   \
1712     }
1713 
1714 //------------------------------------------------------------------------------
1715 // GB_GET_jC: get the vector C(:,jC)
1716 //------------------------------------------------------------------------------
1717 
1718 #define GB_GET_jC                                                           \
1719     int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;                          \
1720     int64_t pC_start, pC_end ;                                              \
1721     if (fine_task)                                                          \
1722     {                                                                       \
1723         pC_start = TaskList [taskid].pC ;                                   \
1724         pC_end   = TaskList [taskid].pC_end ;                               \
1725     }                                                                       \
1726     else                                                                    \
1727     {                                                                       \
1728         GB_VECTOR_LOOKUP (pC_start, pC_end, C, jC) ;                        \
1729     }
1730 
1731 //------------------------------------------------------------------------------
1732 // GB_GET_IXJ_TASK_DESCRIPTOR*: get the task descriptor for IxJ
1733 //------------------------------------------------------------------------------
1734 
1735 // Q denotes the Cartesian product IXJ
1736 
1737 #define GB_GET_IXJ_TASK_DESCRIPTOR(iQ_start,iQ_end)                         \
1738     GB_GET_TASK_DESCRIPTOR ;                                                \
1739     int64_t iQ_start = 0, iQ_end = nI ;                                     \
1740     if (fine_task)                                                          \
1741     {                                                                       \
1742         iQ_start = TaskList [taskid].pA ;                                   \
1743         iQ_end   = TaskList [taskid].pA_end ;                               \
1744     }
1745 
1746 #define GB_GET_IXJ_TASK_DESCRIPTOR_PHASE1(iQ_start,iQ_end)                  \
1747     GB_GET_IXJ_TASK_DESCRIPTOR (iQ_start, iQ_end)                           \
1748     int64_t task_nzombies = 0 ;                                             \
1749     int64_t task_pending = 0 ;
1750 
1751 #define GB_GET_IXJ_TASK_DESCRIPTOR_PHASE2(iQ_start,iQ_end)                  \
1752     GB_GET_IXJ_TASK_DESCRIPTOR (iQ_start, iQ_end)                           \
1753     GB_START_PENDING_INSERTION ;
1754 
1755 //------------------------------------------------------------------------------
1756 // GB_GET_VECTOR_FOR_IXJ: get the start of a vector for scalar assignment
1757 //------------------------------------------------------------------------------
1758 
1759 // Find pX and pX_end for the vector X (iQ_start:iQ_end, j), for a scalar
1760 // assignment method, or a method iterating over all IxJ for a bitmap M or A.
1761 
1762 #define GB_GET_VECTOR_FOR_IXJ(X,iQ_start)                                   \
1763     int64_t p ## X, p ## X ## _end ;                                        \
1764     GB_VECTOR_LOOKUP (p ## X, p ## X ## _end, X, j) ;                       \
1765     if (iQ_start != 0)                                                      \
1766     {                                                                       \
1767         if (X ## i == NULL)                                                 \
1768         {                                                                   \
1769             /* X is full or bitmap */                                       \
1770             p ## X += iQ_start ;                                            \
1771         }                                                                   \
1772         else                                                                \
1773         {                                                                   \
1774             /* X is sparse or hypersparse */                                \
1775             int64_t pright = p ## X ## _end - 1 ;                           \
1776             bool found ;                                                    \
1777             GB_SPLIT_BINARY_SEARCH (iQ_start, X ## i, p ## X, pright, found) ;\
1778         }                                                                   \
1779     }
1780 
1781 //------------------------------------------------------------------------------
1782 // GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP
1783 //------------------------------------------------------------------------------
1784 
1785 // mij = M(i,j)
1786 
1787 #define GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP(i)                             \
1788     bool mij ;                                                              \
1789     if (M_is_bitmap)                                                        \
1790     {                                                                       \
1791         /* M(:,j) is bitmap, no need for binary search */                   \
1792         int64_t pM = pM_start + i ;                                         \
1793         mij = Mb [pM] && GB_mcast (Mx, pM, msize) ;                         \
1794     }                                                                       \
1795     else if (mjdense)                                                       \
1796     {                                                                       \
1797         /* M(:,j) is dense, no need for binary search */                    \
1798         int64_t pM = pM_start + i ;                                         \
1799         mij = GB_mcast (Mx, pM, msize) ;                                    \
1800     }                                                                       \
1801     else                                                                    \
1802     {                                                                       \
1803         /* M(:,j) is sparse, binary search for M(i,j) */                    \
1804         int64_t pM     = pM_start ;                                         \
1805         int64_t pright = pM_end - 1 ;                                       \
1806         bool found ;                                                        \
1807         GB_BINARY_SEARCH (i, Mi, pM, pright, found) ;                       \
1808         if (found)                                                          \
1809         {                                                                   \
1810             mij = GB_mcast (Mx, pM, msize) ;                                \
1811         }                                                                   \
1812         else                                                                \
1813         {                                                                   \
1814             mij = false ;                                                   \
1815         }                                                                   \
1816     }                                                                       \
1817 
1818 //------------------------------------------------------------------------------
1819 // GB_PHASE1_TASK_WRAPUP: wrapup for a task in phase 1
1820 //------------------------------------------------------------------------------
1821 
1822 // sum up the zombie count, and record the # of pending tuples for this task
1823 
1824 #define GB_PHASE1_TASK_WRAPUP                                               \
1825     nzombies += task_nzombies ;                                             \
1826     Npending [taskid] = task_pending ;
1827 
1828 //------------------------------------------------------------------------------
1829 // GB_PENDING_CUMSUM: finalize zombies, count # pending tuples for all tasks
1830 //------------------------------------------------------------------------------
1831 
1832 #define GB_PENDING_CUMSUM                                                   \
1833     C->nzombies = nzombies ;                                                \
1834     GB_cumsum (Npending, ntasks, NULL, 1, NULL) ;                           \
1835     int64_t nnew = Npending [ntasks] ;                                      \
1836     if (nnew == 0)                                                          \
1837     {                                                                       \
1838         /* no pending tuples, so skip phase 2 */                            \
1839         GB_FREE_ALL ;                                                       \
1840         ASSERT_MATRIX_OK (C, "C, no pending tuples " __FILE__, \
1841             GB_FLIP (GB0)) ;      \
1842         return (GrB_SUCCESS) ;                                              \
1843     }                                                                       \
1844     /* ensure that C->Pending is large enough to handle nnew more tuples */ \
1845     if (!GB_Pending_ensure (&(C->Pending), atype, accum, is_matrix, nnew,   \
1846         Context))                                                           \
1847     {                                                                       \
1848         GB_FREE_ALL ;                                                       \
1849         return (GrB_OUT_OF_MEMORY) ;                                        \
1850     }                                                                       \
1851     GB_Pending Pending = C->Pending ;                                       \
1852     int64_t *restrict Pending_i = Pending->i ;                           \
1853     int64_t *restrict Pending_j = Pending->j ;                           \
1854     GB_void *restrict Pending_x = Pending->x ;                           \
1855     int64_t npending_orig = Pending->n ;                                    \
1856     bool pending_sorted = Pending->sorted ;
1857 
1858 //------------------------------------------------------------------------------
1859 // GB_START_PENDING_INSERTION: start insertion of pending tuples (phase 2)
1860 //------------------------------------------------------------------------------
1861 
1862 #define GB_START_PENDING_INSERTION                                          \
1863     bool task_sorted = true ;                                               \
1864     int64_t ilast = -1 ;                                                    \
1865     int64_t jlast = -1 ;                                                    \
1866     int64_t n = Npending [taskid] ;                                         \
1867     int64_t task_pending = Npending [taskid+1] - n ;                        \
1868     if (task_pending == 0) continue ;                                       \
1869     n += npending_orig ;
1870 
1871 #define GB_GET_TASK_DESCRIPTOR_PHASE2                                       \
1872     GB_GET_TASK_DESCRIPTOR ;                                                \
1873     GB_START_PENDING_INSERTION ;
1874 
1875 //------------------------------------------------------------------------------
1876 // GB_PHASE2_TASK_WRAPUP: wrapup for a task in phase 2
1877 //------------------------------------------------------------------------------
1878 
1879 #define GB_PHASE2_TASK_WRAPUP                                               \
1880     pending_sorted = pending_sorted && task_sorted ;                        \
1881     ASSERT (n == npending_orig + Npending [taskid+1]) ;
1882 
1883 //------------------------------------------------------------------------------
1884 // GB_SUBASSIGN_WRAPUP: finalize the subassign method after phase 2
1885 //------------------------------------------------------------------------------
1886 
1887 // If pending_sorted is true, then the original pending tuples (if any) were
1888 // sorted, and each task found that its tuples were also sorted.  The
1889 // boundaries between each task must now be checked.
1890 
1891 #define GB_SUBASSIGN_WRAPUP                                                 \
1892     if (pending_sorted)                                                     \
1893     {                                                                       \
1894         for (int taskid = 0 ; pending_sorted && taskid < ntasks ; taskid++) \
1895         {                                                                   \
1896             int64_t n = Npending [taskid] ;                                 \
1897             int64_t task_pending = Npending [taskid+1] - n ;                \
1898             n += npending_orig ;                                            \
1899             if (task_pending > 0 && n > 0)                                  \
1900             {                                                               \
1901                 /* (i,j) is the first pending tuple for this task; check */ \
1902                 /* with the pending tuple just before it                 */ \
1903                 ASSERT (n < npending_orig + nnew) ;                         \
1904                 int64_t i = Pending_i [n] ;                                 \
1905                 int64_t j = (Pending_j != NULL) ? Pending_j [n] : 0 ;       \
1906                 int64_t ilast = Pending_i [n-1] ;                           \
1907                 int64_t jlast = (Pending_j != NULL) ? Pending_j [n-1] : 0 ; \
1908                 pending_sorted = pending_sorted &&                          \
1909                     ((jlast < j) || (jlast == j && ilast <= i)) ;           \
1910             }                                                               \
1911         }                                                                   \
1912     }                                                                       \
1913     Pending->n += nnew ;                                                    \
1914     Pending->sorted = pending_sorted ;                                      \
1915     GB_FREE_ALL ;                                                           \
1916     ASSERT_MATRIX_OK (C, "C with pending tuples :" __FILE__, GB_FLIP (GB0)) ;\
1917     return (GrB_SUCCESS) ;
1918 
1919 #endif
1920 
1921