1 //------------------------------------------------------------------------------ 2 // GB_subassign_methods.h: definitions for GB_subassign methods 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 // macros for the construction of the GB_subassign methods 11 12 #ifndef GB_SUBASSIGN_METHODS_H 13 #define GB_SUBASSIGN_METHODS_H 14 #include "GB_add.h" 15 #include "GB_ij.h" 16 #include "GB_Pending.h" 17 #include "GB_unused.h" 18 #include "GB_subassign_IxJ_slice.h" 19 20 //------------------------------------------------------------------------------ 21 // free workspace 22 //------------------------------------------------------------------------------ 23 24 #ifndef GB_FREE_WORK 25 #define GB_FREE_WORK ; 26 #endif 27 28 #undef GB_FREE_ALL 29 #define GB_FREE_ALL \ 30 { \ 31 GB_FREE_WORK ; \ 32 GB_WERK_POP (Npending, int64_t) ; \ 33 GB_FREE_WERK (&TaskList, TaskList_size) ; \ 34 GB_FREE (&Zh, Zh_size) ; \ 35 GB_FREE_WERK (&Z_to_X, Z_to_X_size) ; \ 36 GB_FREE_WERK (&Z_to_S, Z_to_S_size) ; \ 37 GB_FREE_WERK (&Z_to_A, Z_to_A_size) ; \ 38 GB_FREE_WERK (&Z_to_M, Z_to_M_size) ; \ 39 GB_phbix_free (S); \ 40 } 41 42 //------------------------------------------------------------------------------ 43 // GB_EMPTY_TASKLIST: declare an empty TaskList 44 //------------------------------------------------------------------------------ 45 46 #define GB_EMPTY_TASKLIST \ 47 GrB_Info info ; \ 48 int taskid, ntasks = 0, nthreads ; \ 49 GB_task_struct *TaskList = NULL ; size_t TaskList_size = 0 ; \ 50 GB_WERK_DECLARE (Npending, int64_t) ; \ 51 int64_t *restrict Zh = NULL ; size_t Zh_size = 0 ; \ 52 int64_t *restrict Z_to_X = NULL ; size_t Z_to_X_size = 0 ; \ 53 int64_t *restrict Z_to_S = NULL ; size_t Z_to_S_size = 0 ; \ 54 int64_t *restrict Z_to_A = NULL ; size_t Z_to_A_size = 0 ; \ 55 int64_t *restrict Z_to_M = NULL ; size_t Z_to_M_size = 0 ; \ 56 struct GB_Matrix_opaque S_header ; \ 57 GrB_Matrix S = GB_clear_static_header (&S_header) ; 58 59 //------------------------------------------------------------------------------ 60 // GB_GET_C: get the C matrix (cannot be bitmap) 61 //------------------------------------------------------------------------------ 62 63 // C cannot be aliased with M or A. 64 65 #define GB_GET_C \ 66 ASSERT_MATRIX_OK (C, "C for subassign kernel", GB0) ; \ 67 ASSERT (!GB_IS_BITMAP (C)) ; \ 68 int64_t *restrict Ci = C->i ; \ 69 GB_void *restrict Cx = (GB_void *) C->x ; \ 70 const size_t csize = C->type->size ; \ 71 const GB_Type_code ccode = C->type->code ; \ 72 const int64_t cvdim = C->vdim ; \ 73 const int64_t Cvlen = C->vlen ; \ 74 int64_t nzombies = C->nzombies ; \ 75 const bool is_matrix = (cvdim > 1) ; 76 77 //------------------------------------------------------------------------------ 78 // GB_GET_MASK: get the mask matrix M 79 //------------------------------------------------------------------------------ 80 81 // M and A can be aliased, but both are const. 82 83 #define GB_GET_MASK \ 84 ASSERT_MATRIX_OK (M, "M for assign", GB0) ; \ 85 const int64_t *Mp = M->p ; \ 86 const int64_t *Mh = M->h ; \ 87 const int8_t *Mb = M->b ; \ 88 const int64_t *Mi = M->i ; \ 89 const GB_void *Mx = (GB_void *) (Mask_struct ? NULL : (M->x)) ; \ 90 const size_t msize = M->type->size ; \ 91 const size_t Mvlen = M->vlen ; \ 92 const int64_t Mnvec = M->nvec ; \ 93 const bool M_is_hyper = GB_IS_HYPERSPARSE (M) ; \ 94 const bool M_is_bitmap = GB_IS_BITMAP (M) ; 95 96 //------------------------------------------------------------------------------ 97 // GB_GET_ACCUM: get the accumulator op and its related typecasting functions 98 //------------------------------------------------------------------------------ 99 100 #define GB_GET_ACCUM \ 101 ASSERT_BINARYOP_OK (accum, "accum for assign", GB0) ; \ 102 ASSERT (!GB_OP_IS_POSITIONAL (accum)) ; \ 103 const GxB_binary_function faccum = accum->function ; \ 104 const GB_cast_function \ 105 cast_A_to_Y = GB_cast_factory (accum->ytype->code, acode), \ 106 cast_C_to_X = GB_cast_factory (accum->xtype->code, ccode), \ 107 cast_Z_to_C = GB_cast_factory (ccode, accum->ztype->code) ; \ 108 const size_t xsize = accum->xtype->size ; \ 109 const size_t ysize = accum->ytype->size ; \ 110 const size_t zsize = accum->ztype->size ; 111 112 //------------------------------------------------------------------------------ 113 // GB_GET_A: get the A matrix 114 //------------------------------------------------------------------------------ 115 116 #define GB_GET_A \ 117 ASSERT_MATRIX_OK (A, "A for assign", GB0) ; \ 118 const GrB_Type atype = A->type ; \ 119 const size_t asize = atype->size ; \ 120 const GB_Type_code acode = atype->code ; \ 121 const int64_t *Ap = A->p ; \ 122 const int8_t *Ab = A->b ; \ 123 const int64_t *Ai = A->i ; \ 124 const GB_void *Ax = (GB_void *) A->x ; \ 125 const GB_cast_function cast_A_to_C = GB_cast_factory (ccode, acode) ; \ 126 const int64_t Avlen = A->vlen ; \ 127 const bool A_is_bitmap = GB_IS_BITMAP (A) ; 128 129 //------------------------------------------------------------------------------ 130 // GB_GET_SCALAR: get the scalar 131 //------------------------------------------------------------------------------ 132 133 #define GB_GET_SCALAR \ 134 ASSERT_TYPE_OK (atype, "atype for assign", GB0) ; \ 135 const size_t asize = atype->size ; \ 136 const GB_Type_code acode = atype->code ; \ 137 const GB_cast_function cast_A_to_C = GB_cast_factory (ccode, acode) ; \ 138 GB_void cwork [GB_VLA(csize)] ; \ 139 cast_A_to_C (cwork, scalar, asize) ; \ 140 141 //------------------------------------------------------------------------------ 142 // GB_GET_ACCUM_SCALAR: get the scalar and the accumulator 143 //------------------------------------------------------------------------------ 144 145 #define GB_GET_ACCUM_SCALAR \ 146 GB_GET_SCALAR ; \ 147 GB_GET_ACCUM ; \ 148 GB_void ywork [GB_VLA(ysize)] ; \ 149 cast_A_to_Y (ywork, scalar, asize) ; 150 151 //------------------------------------------------------------------------------ 152 // GB_GET_S: get the S matrix 153 //------------------------------------------------------------------------------ 154 155 // S is never aliased with any other matrix. 156 // FUTURE: S->p could be C->p and S->x NULL if I and J are (:,:) 157 158 #define GB_GET_S \ 159 ASSERT_MATRIX_OK (S, "S extraction", GB0) ; \ 160 const int64_t *restrict Sp = S->p ; \ 161 const int64_t *restrict Sh = S->h ; \ 162 const int64_t *restrict Si = S->i ; \ 163 const int64_t *restrict Sx = (int64_t *) S->x ; \ 164 const int64_t Svlen = S->vlen ; \ 165 const int64_t Snvec = S->nvec ; \ 166 const bool S_is_hyper = GB_IS_HYPERSPARSE (S) ; 167 168 //------------------------------------------------------------------------------ 169 // basic actions 170 //------------------------------------------------------------------------------ 171 172 //-------------------------------------------------------------------------- 173 // S_Extraction: finding C(iC,jC) via lookup through S=C(I,J) 174 //-------------------------------------------------------------------------- 175 176 // S is the symbolic pattern of the submatrix S = C(I,J). The "numerical" 177 // value (held in S->x) of an entry S(i,j) is not a value, but a pointer 178 // back into C where the corresponding entry C(iC,jC) can be found, where 179 // iC = I [i] and jC = J [j]. 180 181 // The following macro performs the lookup. Given a pointer pS into a 182 // column S(:,j), it finds the entry C(iC,jC), and also determines if the 183 // C(iC,jC) entry is a zombie. The column indices j and jC are implicit. 184 185 // Used for Methods 00 to 04, 06s, and 09 to 20, all of which use S. 186 187 #define GB_C_S_LOOKUP \ 188 int64_t pC = Sx [pS] ; \ 189 int64_t iC = GBI (Ci, pC, Cvlen) ; \ 190 bool is_zombie = GB_IS_ZOMBIE (iC) ; \ 191 if (is_zombie) iC = GB_FLIP (iC) ; 192 193 //-------------------------------------------------------------------------- 194 // GB_VECTOR_LOOKUP 195 //-------------------------------------------------------------------------- 196 197 // Find pX_start and pX_end for the vector X (:,j) 198 199 #define GB_VECTOR_LOOKUP(pX_start,pX_end,X,j) \ 200 { \ 201 int64_t pleft = 0, pright = X ## nvec-1 ; \ 202 GB_lookup (X ## _is_hyper, X ## h, X ## p, X ## vlen, &pleft, \ 203 pright, j, &pX_start, &pX_end) ; \ 204 } 205 206 //-------------------------------------------------------------------------- 207 // get the C(:,jC) vector where jC = J [j] 208 //-------------------------------------------------------------------------- 209 210 // C may be standard sparse, or hypersparse 211 // time: O(1) if standard, O(log(Cnvec)) if hyper 212 213 // This used for GB_subassign_one_slice and GB_subassign_emult_slice, 214 // which compute the parallel schedule for Methods 05, 06n, 07, and 08n. 215 216 #define GB_LOOKUP_jC \ 217 /* lookup jC in C */ \ 218 /* jC = J [j] ; or J is ":" or jbegin:jend or jbegin:jinc:jend */ \ 219 jC = GB_ijlist (J, j, Jkind, Jcolon) ; \ 220 int64_t pC_start, pC_end ; \ 221 GB_VECTOR_LOOKUP (pC_start, pC_end, C, jC) ; 222 223 //-------------------------------------------------------------------------- 224 // C(:,jC) is dense: iC = I [iA], and then look up C(iC,jC) 225 //-------------------------------------------------------------------------- 226 227 // C(:,jC) is dense, and thus can be accessed with a O(1)-time lookup 228 // with the index iC, where the index iC comes from I [iA] or via a 229 // colon notation for I. 230 231 // This used for Methods 05, 06n, 07, and 08n, which do not use S. 232 233 #define GB_iC_DENSE_LOOKUP \ 234 int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ; \ 235 int64_t pC = pC_start + iC ; \ 236 bool is_zombie = (Ci != NULL) && GB_IS_ZOMBIE (Ci [pC]) ; \ 237 ASSERT (GB_IMPLIES (Ci != NULL, GB_UNFLIP (Ci [pC]) == iC)) ; 238 239 //-------------------------------------------------------------------------- 240 // get C(iC,jC) via binary search of C(:,jC) 241 //-------------------------------------------------------------------------- 242 243 // This used for Methods 05, 06n, 07, and 08n, which do not use S. 244 245 // New zombies may be introduced into C during the parallel computation. 246 // No coarse task shares the same C(:,jC) vector, so no race condition can 247 // occur. Fine tasks do share the same C(:,jC) vector, but each fine task 248 // is given a unique range of pC_start:pC_end-1 to search. Thus, no binary 249 // search of any fine tasks conflict with each other. 250 251 #define GB_iC_BINARY_SEARCH \ 252 int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ; \ 253 int64_t pC = pC_start ; \ 254 int64_t pright = pC_end - 1 ; \ 255 bool cij_found, is_zombie ; \ 256 GB_BINARY_SEARCH_ZOMBIE (iC, Ci, pC, pright, cij_found, zorig, \ 257 is_zombie) ; 258 259 //-------------------------------------------------------------------------- 260 // for a 2-way or 3-way merge 261 //-------------------------------------------------------------------------- 262 263 // An entry S(i,j), A(i,j), or M(i,j) has been processed; 264 // move to the next one. 265 #define GB_NEXT(X) (p ## X)++ ; 266 267 //-------------------------------------------------------------------------- 268 // basic operations 269 //-------------------------------------------------------------------------- 270 271 #define GB_COPY_scalar_to_C \ 272 { \ 273 /* C(iC,jC) = scalar, already typecasted into cwork */ \ 274 memcpy (Cx +(pC*csize), cwork, csize) ; \ 275 } 276 277 #define GB_COPY_aij_to_C \ 278 { \ 279 /* C(iC,jC) = A(i,j), with typecasting */ \ 280 cast_A_to_C (Cx +(pC*csize), Ax +(pA*asize), csize) ; \ 281 } 282 283 #define GB_COPY_aij_to_ywork \ 284 { \ 285 /* ywork = A(i,j), with typecasting */ \ 286 cast_A_to_Y (ywork, Ax +(pA*asize), asize) ; \ 287 } 288 289 #define GB_ACCUMULATE \ 290 { \ 291 /* C(iC,jC) = accum (C(iC,jC), ywork) */ \ 292 GB_void xwork [GB_VLA(xsize)] ; \ 293 cast_C_to_X (xwork, Cx +(pC*csize), csize) ; \ 294 GB_void zwork [GB_VLA(zsize)] ; \ 295 faccum (zwork, xwork, ywork) ; \ 296 cast_Z_to_C (Cx +(pC*csize), zwork, csize) ; \ 297 } \ 298 299 #define GB_DELETE \ 300 { \ 301 /* turn C(iC,jC) into a zombie */ \ 302 ASSERT (!GB_IS_FULL (C)) ; \ 303 task_nzombies++ ; \ 304 Ci [pC] = GB_FLIP (iC) ; \ 305 } 306 307 #define GB_UNDELETE \ 308 { \ 309 /* bring a zombie C(iC,jC) back to life; */ \ 310 /* the value of C(iC,jC) must also be assigned. */ \ 311 ASSERT (!GB_IS_FULL (C)) ; \ 312 Ci [pC] = iC ; \ 313 task_nzombies-- ; \ 314 } 315 316 //-------------------------------------------------------------------------- 317 // C(I,J)<M> = accum (C(I,J),A): consider all cases 318 //-------------------------------------------------------------------------- 319 320 // The matrix C may have pending tuples and zombies: 321 322 // (1) pending tuples: this is a list of pending updates held as a set 323 // of (i,j,x) tuples. They had been added to the list via a prior 324 // GrB_setElement or GxB_subassign. No operator needs to be applied to 325 // them; the implied operator is SECOND, for both GrB_setElement and 326 // GxB_subassign, regardless of whether or not an accum operator is 327 // present. Pending tuples are inserted if and only if the 328 // corresponding entry C(i,j) does not exist, and in that case no accum 329 // operator is applied. 330 331 // The GrB_setElement method (C(i,j) = x) is same as GxB_subassign 332 // with: accum is SECOND, C not replaced, no mask M, mask not 333 // complemented. If GrB_setElement needs to insert its update as 334 // a pending tuple, then it will always be compatible with all 335 // pending tuples inserted here, by GxB_subassign. 336 337 // (2) zombie entries. These are entries that are still present in the 338 // pattern but marked for deletion (via GB_FLIP(i) for the row index). 339 340 // For the current GxB_subassign, there are 16 cases to handle, 341 // all combinations of the following options: 342 343 // accum is NULL, accum is not NULL 344 // C is not replaced, C is replaced 345 // no mask, mask is present 346 // mask is not complemented, mask is complemented 347 348 // Complementing an empty mask: This does not require the matrix A 349 // at all so it is handled as a special case. It corresponds to 350 // the GB_RETURN_IF_QUICK_MASK option in other GraphBLAS operations. 351 // Thus only 12 cases are considered in the tables below: 352 353 // These 4 cases are listed in Four Tables below: 354 // 2 cases: accum is NULL, accum is not NULL 355 // 2 cases: C is not replaced, C is replaced 356 357 // 3 cases: no mask, M is present and not complemented, 358 // and M is present and complemented. If there is no 359 // mask, then M(i,j)=1 for all (i,j). These 3 cases 360 // are the columns of each of the Four Tables. 361 362 // Each of these 12 cases can encounter up to 12 combinations of 363 // entries in C, A, and M (6 if no mask M is present). The left 364 // column of the Four Tables below consider all 12 combinations for all 365 // (i,j) in the cross product IxJ: 366 367 // C(I(i),J(j)) present, zombie, or not there: C, X, or '.' 368 // A(i,j) present or not, labeled 'A' or '.' below 369 // M(i,j) = 1 or 0 (but only if M is present) 370 371 // These 12 cases become the left columns as listed below. 372 // The zombie cases are handled a sub-case for "C present: 373 // regular entry or zombie". The acronyms below use "D" for 374 // "dot", meaning the entry (C or A) is not present. 375 376 // [ C A 1 ] C_A_1: both C and A present, M=1 377 // [ X A 1 ] C_A_1: both C and A present, M=1, C is a zombie 378 // [ . A 1 ] D_A_1: C not present, A present, M=1 379 380 // [ C . 1 ] C_D_1: C present, A not present, M=1 381 // [ X . 1 ] C_D_1: C present, A not present, M=1, C a zombie 382 // [ . . 1 ] only M=1 present, but nothing to do 383 384 // [ C A 0 ] C_A_0: both C and A present, M=0 385 // [ X A 0 ] C_A_0: both C and A present, M=0, C is a zombie 386 // [ . A 0 ] C not present, A present, M=0, 387 // nothing to do 388 389 // [ C . 0 ] C_D_0: C present, A not present, M=1 390 // [ X . 0 ] C_D_0: C present, A not present, M=1, C a zombie 391 // [ . . 0 ] only M=0 present, but nothing to do 392 393 // Legend for action taken in the right half of the table: 394 395 // delete live entry C(I(i),J(j)) marked for deletion (zombie) 396 // =A live entry C(I(i),J(j)) is overwritten with new value 397 // =C+A live entry C(I(i),J(j)) is modified with accum(c,a) 398 // C live entry C(I(i),J(j)) is unchanged 399 400 // undelete entry C(I(i),J(j)) a zombie, bring back with A(i,j) 401 // X entry C(I(i),J(j)) a zombie, no change, still zombie 402 403 // insert entry C(I(i),J(j)) not present, add pending tuple 404 // . entry C(I(i),J(j)) not present, no change 405 406 // blank the table is left blank where the the event cannot 407 // occur: GxB_subassign with no M cannot have 408 // M(i,j)=0, and GrB_setElement does not have the M 409 // column 410 411 //---------------------------------------------------------------------- 412 // GrB_setElement and the Four Tables for GxB_subassign: 413 //---------------------------------------------------------------------- 414 415 //------------------------------------------------------------ 416 // GrB_setElement: no mask 417 //------------------------------------------------------------ 418 419 // C A 1 =A | 420 // X A 1 undelete | 421 // . A 1 insert | 422 423 // GrB_setElement acts exactly like GxB_subassign with the 424 // implicit GrB_SECOND_Ctype operator, I=i, J=j, and a 425 // 1-by-1 matrix A containing a single entry (not an 426 // implicit entry; there is no "." for A). That is, 427 // nnz(A)==1. No mask, and the descriptor is the default; 428 // C_replace effectively false, mask not complemented, A 429 // not transposed. As a result, GrB_setElement can be 430 // freely mixed with calls to GxB_subassign with C_replace 431 // effectively false and with the identical 432 // GrB_SECOND_Ctype operator. These calls to 433 // GxB_subassign can use the mask, either complemented or 434 // not, and they can transpose A if desired, and there is 435 // no restriction on I and J. The matrix A can be any 436 // type and the type of A can change from call to call. 437 438 //------------------------------------------------------------ 439 // NO accum | no mask mask mask 440 // NO repl | not compl compl 441 //------------------------------------------------------------ 442 443 // C A 1 =A =A C | 444 // X A 1 undelete undelete X | 445 // . A 1 insert insert . | 446 447 // C . 1 delete delete C | 448 // X . 1 X X X | 449 // . . 1 . . . | 450 451 // C A 0 C =A | 452 // X A 0 X undelete | 453 // . A 0 . insert | 454 455 // C . 0 C delete | 456 // X . 0 X X | 457 // . . 0 . . | 458 459 // S_Extraction method works well: first extract pattern 460 // of S=C(I,J). Then examine all of A, M, S, and update 461 // C(I,J). The method needs to examine all entries in 462 // in C(I,J) to delete them if A is not present, so 463 // S=C(I,J) is not costly. 464 465 //------------------------------------------------------------ 466 // NO accum | no mask mask mask 467 // WITH repl | not compl compl 468 //------------------------------------------------------------ 469 470 // C A 1 =A =A delete | 471 // X A 1 undelete undelete X | 472 // . A 1 insert insert . | 473 474 // C . 1 delete delete delete | 475 // X . 1 X X X | 476 // . . 1 . . . | 477 478 // C A 0 delete =A | 479 // X A 0 X undelete | 480 // . A 0 . insert | 481 482 // C . 0 delete delete | 483 // X . 0 X X | 484 // . . 0 . . | 485 486 // S_Extraction method works well, since all of C(I,J) 487 // needs to be traversed, S=C(I,J) is reasonable to 488 // compute. 489 490 // With no accum: If there is no M and M is not 491 // complemented, then C_replace is irrelevant, Whether 492 // true or false, the results in the two tables 493 // above are the same. 494 495 //------------------------------------------------------------ 496 // ACCUM | no mask mask mask 497 // NO repl | not compl compl 498 //------------------------------------------------------------ 499 500 // C A 1 =C+A =C+A C | 501 // X A 1 undelete undelete X | 502 // . A 1 insert insert . | 503 504 // C . 1 C C C | 505 // X . 1 X X X | 506 // . . 1 . . . | 507 508 // C A 0 C =C+A | 509 // X A 0 X undelete | 510 // . A 0 . insert | 511 512 // C . 0 C C | 513 // X . 0 X X | 514 // . . 0 . . | 515 516 // With ACCUM but NO C_replace: This method only needs to 517 // examine entries in A. It does not need to examine all 518 // entries in C(I,J), nor all entries in M. Entries in 519 // C but in not A remain unchanged. This is like an 520 // extended GrB_setElement. No entries in C can be 521 // deleted. All other methods must examine all of C(I,J). 522 523 // Without S_Extraction: C(:,J) or M have many entries 524 // compared with A, do not extract S=C(I,J); use 525 // binary search instead. Otherwise, use the same 526 // S_Extraction method as the other 3 cases. 527 528 // S_Extraction method: if nnz(C(:,j)) + nnz(M) is 529 // similar to nnz(A) then the S_Extraction method would 530 // work well. 531 532 //------------------------------------------------------------ 533 // ACCUM | no mask mask mask 534 // WITH repl | not compl compl 535 //------------------------------------------------------------ 536 537 // C A 1 =C+A =C+A delete | 538 // X A 1 undelete undelete X | 539 // . A 1 insert insert . | 540 541 // C . 1 C C delete | 542 // X . 1 X X X | 543 // . . 1 . . . | 544 545 // C A 0 delete =C+A | 546 // X A 0 X undelete | 547 // . A 0 . insert | 548 549 // C . 0 delete C | 550 // X . 0 X X | 551 // . . 0 . . | 552 553 // S_Extraction method works well since all entries 554 // in C(I,J) must be examined. 555 556 // With accum: If there is no M and M is not 557 // complemented, then C_replace is irrelavant, Whether 558 // true or false, the results in the two tables 559 // above are the same. 560 561 // This condition on C_replace holds with our without 562 // accum. Thus, if there is no M, and M is 563 // not complemented, the C_replace can be set to false. 564 565 //------------------------------------------------------------ 566 567 // ^^^^^ legend for left columns above: 568 // C prior entry C(I(i),J(j)) exists 569 // X prior entry C(I(i),J(j)) exists but is a zombie 570 // . no prior entry C(I(i),J(j)) 571 // A A(i,j) exists 572 // . A(i,j) does not exist 573 // 1 M(i,j)=1, assuming M exists (or if implicit) 574 // 0 M(i,j)=0, only if M exists 575 576 //---------------------------------------------------------------------- 577 // Actions in the Four Tables above 578 //---------------------------------------------------------------------- 579 580 // Each entry in the Four Tables above are now explained in more 581 // detail, describing what must be done in each case. Zombies and 582 // pending tuples are disjoint; they do not mix. Zombies are IN 583 // the pattern but pending tuples are updates that are NOT in the 584 // pattern. That is why a separate list of pending tuples must be 585 // kept; there is no place for them in the pattern. Zombies, on 586 // the other hand, are entries IN the pattern that have been 587 // marked for deletion. 588 589 //-------------------------------- 590 // For entries NOT in the pattern: 591 //-------------------------------- 592 593 // They can have pending tuples, and can acquire more. No zombies. 594 595 // ( insert ): 596 597 // An entry C(I(i),J(j)) is NOT in the pattern, but its 598 // value must be modified. This is an insertion, like 599 // GrB_setElement, and the insertion is added as a pending 600 // tuple for C(I(i),J(j)). There can be many insertions 601 // to the same element, each in the list of pending 602 // tuples, in order of their insertion. Eventually these 603 // pending tuples must be assembled into C(I(i),J(j)) in 604 // the right order using the implied SECOND operator. 605 606 // ( . ): 607 608 // no change. C(I(i),J(j)) not in the pattern, and not 609 // modified. This C(I(i),J(j)) position could have 610 // pending tuples, in the list of pending tuples, but none 611 // of them are changed. If C_replace is true then those 612 // pending tuples would have to be discarded, but that 613 // condition will not occur because C_replace=true forces 614 // all prior tuples to the matrix to be assembled. 615 616 //-------------------------------- 617 // For entries IN the pattern: 618 //-------------------------------- 619 620 // They have no pending tuples, and acquire none. It can be 621 // zombie, can become a zombie, or a zombie can come back to life. 622 623 // ( delete ): 624 625 // C(I(i),J(j)) becomes a zombie, by flipping its row 626 // index via the GB_FLIP function. 627 628 // ( undelete ): 629 630 // C(I(i),J(j)) = A(i,j) was a zombie and is no longer a 631 // zombie. Its row index is restored with GB_FLIP. 632 633 // ( X ): 634 635 // C(I(i),J(j)) was a zombie, and still is a zombie. 636 // row index is < 0, and actual index is GB_FLIP(I(i)) 637 638 // ( C ): 639 640 // no change; C(I(i),J(j)) already an entry, and its value 641 // doesn't change. 642 643 // ( =A ): 644 645 // C(I(i),J(j)) = A(i,j), value gets overwritten. 646 647 // ( =C+A ): 648 649 // C(I(i),J(j)) = accum (C(I(i),J(j)), A(i,j)) 650 // The existing balue is modified via the accumulator. 651 652 653 //-------------------------------------------------------------------------- 654 // handling each action 655 //-------------------------------------------------------------------------- 656 657 // Each of the 12 cases are handled by the following actions, 658 // implemented as macros. The Four Tables are re-sorted below, 659 // and folded together according to their left column. 660 661 // Once the M(i,j) entry is extracted, all GB_subassign_* functions 662 // explicitly complement the scalar value if Mask_comp is true, before 663 // using these action functions. For the [no mask] case, M(i,j)=1. 664 // Thus, only the middle column needs to be considered by each action; 665 // the action will handle all three columns at the same time. All 666 // three columns remain in the re-sorted tables below for reference. 667 668 //---------------------------------------------------------------------- 669 // ----[C A 1] or [X A 1]: C and A present, M=1 670 //---------------------------------------------------------------------- 671 672 //------------------------------------------------ 673 // | no mask mask mask 674 // | not compl compl 675 //------------------------------------------------ 676 // C A 1 =A =A C | no accum,no Crepl 677 // C A 1 =A =A delete | no accum,Crepl 678 // C A 1 =C+A =C+A C | accum, no Crepl 679 // C A 1 =C+A =C+A delete | accum, Crepl 680 681 // X A 1 undelete undelete X | no accum,no Crepl 682 // X A 1 undelete undelete X | no accum,Crepl 683 // X A 1 undelete undelete X | accum, no Crepl 684 // X A 1 undelete undelete X | accum, Crepl 685 686 // Both C(I(i),J(j)) == S(i,j) and A(i,j) are present, and mij = 1. 687 // C(I(i),J(i)) is updated with the entry A(i,j). 688 // C_replace has no impact on this action. 689 690 // [X A 1] matrix case 691 #define GB_X_A_1_matrix \ 692 { \ 693 /* ----[X A 1] */ \ 694 /* action: ( undelete ): bring a zombie back to life */ \ 695 GB_UNDELETE ; \ 696 GB_COPY_aij_to_C ; \ 697 } 698 699 // [X A 1] scalar case 700 #define GB_X_A_1_scalar \ 701 { \ 702 /* ----[X A 1] */ \ 703 /* action: ( undelete ): bring a zombie back to life */ \ 704 GB_UNDELETE ; \ 705 GB_COPY_scalar_to_C ; \ 706 } 707 708 // [C A 1] matrix case when accum is present 709 #define GB_withaccum_C_A_1_matrix \ 710 { \ 711 if (is_zombie) \ 712 { \ 713 /* ----[X A 1] */ \ 714 /* action: ( undelete ): bring a zombie back to life */ \ 715 GB_X_A_1_matrix ; \ 716 } \ 717 else \ 718 { \ 719 /* ----[C A 1] with accum */ \ 720 /* action: ( =C+A ): apply the accumulator */ \ 721 GB_void ywork [GB_VLA(ysize)] ; \ 722 GB_COPY_aij_to_ywork ; \ 723 GB_ACCUMULATE ; \ 724 } \ 725 } 726 727 // [C A 1] scalar case when accum is present 728 #define GB_withaccum_C_A_1_scalar \ 729 { \ 730 if (is_zombie) \ 731 { \ 732 /* ----[X A 1] */ \ 733 /* action: ( undelete ): bring a zombie back to life */ \ 734 GB_X_A_1_scalar ; \ 735 } \ 736 else \ 737 { \ 738 /* ----[C A 1] with accum, scalar expansion */ \ 739 /* action: ( =C+A ): apply the accumulator */ \ 740 GB_ACCUMULATE ; \ 741 } \ 742 } 743 744 // [C A 1] matrix case when no accum is present 745 #define GB_noaccum_C_A_1_matrix \ 746 { \ 747 if (is_zombie) \ 748 { \ 749 /* ----[X A 1] */ \ 750 /* action: ( undelete ): bring a zombie back to life */ \ 751 GB_X_A_1_matrix ; \ 752 } \ 753 else \ 754 { \ 755 /* ----[C A 1] no accum, scalar expansion */ \ 756 /* action: ( =A ): copy A into C */ \ 757 GB_COPY_aij_to_C ; \ 758 } \ 759 } 760 761 // [C A 1] scalar case when no accum is present 762 #define GB_noaccum_C_A_1_scalar \ 763 { \ 764 if (is_zombie) \ 765 { \ 766 /* ----[X A 1] */ \ 767 /* action: ( undelete ): bring a zombie back to life */ \ 768 GB_X_A_1_scalar ; \ 769 } \ 770 else \ 771 { \ 772 /* ----[C A 1] no accum, scalar expansion */ \ 773 /* action: ( =A ): copy A into C */ \ 774 GB_COPY_scalar_to_C ; \ 775 } \ 776 } 777 778 //---------------------------------------------------------------------- 779 // ----[. A 1]: C not present, A present, M=1 780 //---------------------------------------------------------------------- 781 782 //------------------------------------------------ 783 // | no mask mask mask 784 // | not compl compl 785 //------------------------------------------------ 786 // . A 1 insert insert . | no accum,no Crepl 787 // . A 1 insert insert . | no accum,Crepl 788 // . A 1 insert insert . | accum, no Crepl 789 // . A 1 insert insert . | accum, Crepl 790 791 // C(I(i),J(j)) == S (i,j) is not present, A (i,j) is present, and 792 // mij = 1. The mask M allows C to be written, but no entry present 793 // in C (neither a live entry nor a zombie). This entry must be 794 // added to C but it doesn't fit in the pattern. It is added as a 795 // pending tuple. Zombies and pending tuples do not intersect. 796 797 // If adding the pending tuple fails, C is cleared entirely. 798 // Otherwise the matrix C would be left in an incoherent partial 799 // state of computation. It's cleaner to just free it all. 800 801 // The action is done by GB_PENDING_INSERT in GB_Pending.h. 802 803 #if 0 804 #define GB_D_A_1_scalar \ 805 { \ 806 /* ----[. A 1] */ \ 807 /* action: ( insert ) */ \ 808 GB_PENDING_INSERT (scalar) ; \ 809 } 810 811 #define GB_D_A_1_matrix \ 812 { \ 813 /* ----[. A 1] */ \ 814 /* action: ( insert ) */ \ 815 GB_PENDING_INSERT (Ax +(pA*asize)) ; \ 816 } 817 #endif 818 819 //---------------------------------------------------------------------- 820 // ----[C . 1] or [X . 1]: C present, A not present, M=1 821 //---------------------------------------------------------------------- 822 823 //------------------------------------------------ 824 // | no mask mask mask 825 // | not compl compl 826 //------------------------------------------------ 827 // C . 1 delete delete C | no accum,no Crepl 828 // C . 1 delete delete delete | no accum,Crepl 829 // C . 1 C C C | accum, no Crepl 830 // C . 1 C C delete | accum, Crepl 831 832 // X . 1 X X X | no accum,no Crepl 833 // X . 1 X X X | no accum,Crepl 834 // X . 1 X X X | accum, no Crepl 835 // X . 1 X X X | accum, Crepl 836 837 // C(I(i),J(j)) == S (i,j) is present, A (i,j) not is present, and 838 // mij = 1. The mask M allows C to be written, but no entry present 839 // in A. If no accum operator is present, C becomes a zombie. 840 841 // This condition cannot occur if A is a dense matrix, 842 // nor for scalar expansion 843 844 // [C . 1] matrix case when no accum is present 845 846 #if 0 847 #define GB_noaccum_C_D_1_matrix \ 848 { \ 849 if (is_zombie) \ 850 { \ 851 /* ----[X . 1] */ \ 852 /* action: ( X ): still a zombie */ \ 853 } \ 854 else \ 855 { \ 856 /* ----[C . 1] no accum */ \ 857 /* action: ( delete ): becomes a zombie */ \ 858 GB_DELETE ; \ 859 } \ 860 } 861 #endif 862 863 // The above action is done via GB_DELETE_ENTRY. 864 865 //---------------------------------------------------------------------- 866 // ----[C A 0] or [X A 0]: both C and A present but M=0 867 //---------------------------------------------------------------------- 868 869 //------------------------------------------------ 870 // | no mask mask mask 871 // | not compl compl 872 //------------------------------------------------ 873 // C A 0 C =A | no accum,no Crepl 874 // C A 0 delete =A | no accum,Crepl 875 // C A 0 C =C+A | accum, no Crepl 876 // C A 0 delete =C+A | accum, Crepl 877 878 // X A 0 X undelete | no accum,no Crepl 879 // X A 0 X undelete | no accum,Crepl 880 // X A 0 X undelete | accum, no Crepl 881 // X A 0 X undelete | accum, Crepl 882 883 // Both C(I(i),J(j)) == S(i,j) and A(i,j) are present, and mij = 0. 884 // The mask prevents A being written to C, so A has no effect on 885 // the result. If C_replace is true, however, the entry is 886 // deleted, becoming a zombie. This case does not occur if 887 // the mask M is not present. This action also handles the 888 // [C . 0] and [X . 0] cases; see the next section below. 889 890 // This condition can still occur if A is dense, so if a mask M is 891 // present, entries can still be deleted from C. As a result, the 892 // fact that A is dense cannot be exploited when the mask M is 893 // present. 894 895 #if 0 896 #define GB_C_A_0 \ 897 { \ 898 if (is_zombie) \ 899 { \ 900 /* ----[X A 0] */ \ 901 /* ----[X . 0] */ \ 902 /* action: ( X ): still a zombie */ \ 903 } \ 904 else if (C_replace) \ 905 { \ 906 /* ----[C A 0] replace */ \ 907 /* ----[C . 0] replace */ \ 908 /* action: ( delete ): becomes a zombie */ \ 909 GB_DELETE ; \ 910 } \ 911 else \ 912 { \ 913 /* ----[C A 0] no replace */ \ 914 /* ----[C . 0] no replace */ \ 915 /* action: ( C ): no change */ \ 916 } \ 917 } 918 #endif 919 920 // The above action is done via GB_DELETE_ENTRY. 921 922 // The above action is very similar to C_D_1. The only difference 923 // is how the entry C becomes a zombie. With C_D_1, there is no 924 // entry in A, so C becomes a zombie if no accum function is used 925 // because the implicit value A(i,j) gets copied into C, causing it 926 // to become an implicit value also (deleting the entry in C). 927 // With C_A_0, the entry C is protected from any modification from 928 // A (regardless of accum or not). However, if C_replace is true, 929 // the entry is cleared. The mask M does not protect C from the 930 // C_replace action. 931 932 // If C_replace is false, then the [C A 0] action does nothing. 933 // If C_replace is true, then the action becomes the following: 934 935 #define GB_DELETE_ENTRY \ 936 { \ 937 if (!is_zombie) \ 938 { \ 939 GB_DELETE ; \ 940 } \ 941 } 942 943 //---------------------------------------------------------------------- 944 // ----[C . 0] or [X . 0]: C present, A not present, M=0 945 //---------------------------------------------------------------------- 946 947 //------------------------------------------------ 948 // | no mask mask mask 949 // | not compl compl 950 //------------------------------------------------ 951 952 // C . 0 C delete | no accum,no Crepl 953 // C . 0 delete delete | no accum,Crepl 954 // C . 0 C C | accum, no Crepl 955 // C . 0 delete C | accum, Crepl 956 957 // X . 0 X X | no accum,no Crepl 958 // X . 0 X X | no accum,Crepl 959 // X . 0 X X | accum, no Crepl 960 // X . 0 X X | accum, Crepl 961 962 // C(I(i),J(j)) == S(i,j) is present, but A(i,j) is not present, 963 // and mij = 0. Since A(i,j) has no effect on the result, 964 // this is the same as the C_A_0 action above. 965 966 // This condition cannot occur if A is a dense matrix, nor for 967 // scalar expansion, but the existance of the entry A is not 968 // relevant. 969 970 // If C_replace is false, then the [C D 0] action does nothing. 971 // If C_replace is true, then the action becomes GB_DELETE_ENTRY. 972 973 #if 0 974 #define GB_C_D_0 GB_C_A_0 975 #endif 976 977 //---------------------------------------------------------------------- 978 // ----[. A 0]: C not present, A present, M=0 979 //---------------------------------------------------------------------- 980 981 // . A 0 . insert | no accum,no Crepl 982 // . A 0 . insert | no accum,no Crepl 983 // . A 0 . insert | accum, no Crepl 984 // . A 0 . insert | accum, Crepl 985 986 // C(I(i),J(j)) == S(i,j) is not present, A(i,j) is present, 987 // but mij = 0. The mask M prevents A from modifying C, so the 988 // A(i,j) entry is ignored. C_replace has no effect since the 989 // entry is already cleared. There is nothing to do. 990 991 //---------------------------------------------------------------------- 992 // ----[. . 1] and [. . 0]: no entries in C and A, M = 0 or 1 993 //---------------------------------------------------------------------- 994 995 //------------------------------------------------ 996 // | no mask mask mask 997 // | not compl compl 998 //------------------------------------------------ 999 1000 // . . 1 . . . | no accum,no Crepl 1001 // . . 1 . . . | no accum,Crepl 1002 // . . 1 . . . | accum, no Crepl 1003 // . . 1 . . . | accum, Crepl 1004 1005 // . . 0 . . . | no accum,no Crepl 1006 // . . 0 . . . | no accum,Crepl 1007 // . . 0 . . . | accum, no Crepl 1008 // . . 0 . . . | accum, Crepl 1009 1010 // Neither C(I(i),J(j)) == S(i,j) nor A(i,j) are not present, 1011 // Nothing happens. The M(i,j) entry is present, otherwise 1012 // this (i,j) position would not be considered at all. 1013 // The M(i,j) entry has no effect. There is nothing to do. 1014 1015 //------------------------------------------------------------------------------ 1016 // GB_subassign_symbolic: S = C(I,J) 1017 //------------------------------------------------------------------------------ 1018 1019 GrB_Info GB_subassign_symbolic // S = C(I,J), extracting the pattern not values 1020 ( 1021 // output 1022 GrB_Matrix S, // output matrix, static header 1023 // inputs, not modified: 1024 const GrB_Matrix C, // matrix to extract the pattern of 1025 const GrB_Index *I, // index list for S = C(I,J), or GrB_ALL, etc. 1026 const int64_t ni, // length of I, or special 1027 const GrB_Index *J, // index list for S = C(I,J), or GrB_ALL, etc. 1028 const int64_t nj, // length of J, or special 1029 const bool S_must_not_be_jumbled, // if true, S cannot be jumbled 1030 GB_Context Context 1031 ) ; 1032 1033 //------------------------------------------------------------------------------ 1034 // GB_subassign_zombie: C(I,J) = empty ; using S 1035 //------------------------------------------------------------------------------ 1036 1037 GrB_Info GB_subassign_zombie 1038 ( 1039 GrB_Matrix C, 1040 // input: 1041 const GrB_Index *I, 1042 const int64_t ni, 1043 const int64_t nI, 1044 const int Ikind, 1045 const int64_t Icolon [3], 1046 const GrB_Index *J, 1047 const int64_t nj, 1048 const int64_t nJ, 1049 const int Jkind, 1050 const int64_t Jcolon [3], 1051 GB_Context Context 1052 ) ; 1053 1054 //------------------------------------------------------------------------------ 1055 // GB_subassign_01: C(I,J) = scalar ; using S 1056 //------------------------------------------------------------------------------ 1057 1058 GrB_Info GB_subassign_01 1059 ( 1060 GrB_Matrix C, 1061 // input: 1062 const GrB_Index *I, 1063 const int64_t ni, 1064 const int64_t nI, 1065 const int Ikind, 1066 const int64_t Icolon [3], 1067 const GrB_Index *J, 1068 const int64_t nj, 1069 const int64_t nJ, 1070 const int Jkind, 1071 const int64_t Jcolon [3], 1072 const void *scalar, 1073 const GrB_Type atype, 1074 GB_Context Context 1075 ) ; 1076 1077 //------------------------------------------------------------------------------ 1078 // GB_subassign_02: C(I,J) = A ; using S 1079 //------------------------------------------------------------------------------ 1080 1081 GrB_Info GB_subassign_02 1082 ( 1083 GrB_Matrix C, 1084 // input: 1085 const GrB_Index *I, 1086 const int64_t ni, 1087 const int64_t nI, 1088 const int Ikind, 1089 const int64_t Icolon [3], 1090 const GrB_Index *J, 1091 const int64_t nj, 1092 const int64_t nJ, 1093 const int Jkind, 1094 const int64_t Jcolon [3], 1095 const GrB_Matrix A, 1096 GB_Context Context 1097 ) ; 1098 1099 //------------------------------------------------------------------------------ 1100 // GB_subassign_03: C(I,J) += scalar ; using S 1101 //------------------------------------------------------------------------------ 1102 1103 GrB_Info GB_subassign_03 1104 ( 1105 GrB_Matrix C, 1106 // input: 1107 const GrB_Index *I, 1108 const int64_t ni, 1109 const int64_t nI, 1110 const int Ikind, 1111 const int64_t Icolon [3], 1112 const GrB_Index *J, 1113 const int64_t nj, 1114 const int64_t nJ, 1115 const int Jkind, 1116 const int64_t Jcolon [3], 1117 const GrB_BinaryOp accum, 1118 const void *scalar, 1119 const GrB_Type atype, 1120 GB_Context Context 1121 ) ; 1122 1123 //------------------------------------------------------------------------------ 1124 // GB_subassign_04: C(I,J) += A ; using S 1125 //------------------------------------------------------------------------------ 1126 1127 GrB_Info GB_subassign_04 1128 ( 1129 GrB_Matrix C, 1130 // input: 1131 const GrB_Index *I, 1132 const int64_t ni, 1133 const int64_t nI, 1134 const int Ikind, 1135 const int64_t Icolon [3], 1136 const GrB_Index *J, 1137 const int64_t nj, 1138 const int64_t nJ, 1139 const int Jkind, 1140 const int64_t Jcolon [3], 1141 const GrB_BinaryOp accum, 1142 const GrB_Matrix A, 1143 GB_Context Context 1144 ) ; 1145 1146 //------------------------------------------------------------------------------ 1147 // GB_subassign_05: C(I,J)<M> = scalar ; no S 1148 //------------------------------------------------------------------------------ 1149 1150 GrB_Info GB_subassign_05 1151 ( 1152 GrB_Matrix C, 1153 // input: 1154 const GrB_Index *I, 1155 const int64_t nI, 1156 const int Ikind, 1157 const int64_t Icolon [3], 1158 const GrB_Index *J, 1159 const int64_t nJ, 1160 const int Jkind, 1161 const int64_t Jcolon [3], 1162 const GrB_Matrix M, 1163 const bool Mask_struct, 1164 const void *scalar, 1165 const GrB_Type atype, 1166 GB_Context Context 1167 ) ; 1168 1169 //------------------------------------------------------------------------------ 1170 // GB_subassign_05e: C(:,:)<M,struct> = scalar ; no S, C empty 1171 //------------------------------------------------------------------------------ 1172 1173 GrB_Info GB_subassign_05e 1174 ( 1175 GrB_Matrix C, 1176 // input: 1177 const GrB_Matrix M, 1178 const void *scalar, 1179 const GrB_Type atype, 1180 GB_Context Context 1181 ) ; 1182 1183 //------------------------------------------------------------------------------ 1184 // GB_subassign_05f: C(:,:)<C,struct> = scalar ; no S, C anything 1185 //------------------------------------------------------------------------------ 1186 1187 GrB_Info GB_subassign_05f 1188 ( 1189 GrB_Matrix C, 1190 // input: 1191 const void *scalar, 1192 const GrB_Type atype, 1193 GB_Context Context 1194 ) ; 1195 1196 //------------------------------------------------------------------------------ 1197 // GB_subassign_06n: C(I,J)<M> = A ; no S 1198 //------------------------------------------------------------------------------ 1199 1200 GrB_Info GB_subassign_06n 1201 ( 1202 GrB_Matrix C, 1203 // input: 1204 const GrB_Index *I, 1205 const int64_t nI, 1206 const int Ikind, 1207 const int64_t Icolon [3], 1208 const GrB_Index *J, 1209 const int64_t nJ, 1210 const int Jkind, 1211 const int64_t Jcolon [3], 1212 const GrB_Matrix M, 1213 const bool Mask_struct, 1214 const GrB_Matrix A, 1215 GB_Context Context 1216 ) ; 1217 1218 //------------------------------------------------------------------------------ 1219 // GB_subassign_06s_and_14: C(I,J)<M or !M> = A ; using S 1220 //------------------------------------------------------------------------------ 1221 1222 GrB_Info GB_subassign_06s_and_14 1223 ( 1224 GrB_Matrix C, 1225 // input: 1226 const GrB_Index *I, 1227 const int64_t ni, 1228 const int64_t nI, 1229 const int Ikind, 1230 const int64_t Icolon [3], 1231 const GrB_Index *J, 1232 const int64_t nj, 1233 const int64_t nJ, 1234 const int Jkind, 1235 const int64_t Jcolon [3], 1236 const GrB_Matrix M, 1237 const bool Mask_struct, // if true, use the only structure of M 1238 const bool Mask_comp, // if true, !M, else use M 1239 const GrB_Matrix A, 1240 GB_Context Context 1241 ) ; 1242 1243 //------------------------------------------------------------------------------ 1244 // GB_subassign_07: C(I,J)<M> += scalar ; no S 1245 //------------------------------------------------------------------------------ 1246 1247 GrB_Info GB_subassign_07 1248 ( 1249 GrB_Matrix C, 1250 // input: 1251 const GrB_Index *I, 1252 const int64_t nI, 1253 const int Ikind, 1254 const int64_t Icolon [3], 1255 const GrB_Index *J, 1256 const int64_t nJ, 1257 const int Jkind, 1258 const int64_t Jcolon [3], 1259 const GrB_Matrix M, 1260 const bool Mask_struct, 1261 const GrB_BinaryOp accum, 1262 const void *scalar, 1263 const GrB_Type atype, 1264 GB_Context Context 1265 ) ; 1266 1267 //------------------------------------------------------------------------------ 1268 // GB_subassign_08n: C(I,J)<M> += A ; no S 1269 //------------------------------------------------------------------------------ 1270 1271 GrB_Info GB_subassign_08n 1272 ( 1273 GrB_Matrix C, 1274 // input: 1275 const GrB_Index *I, 1276 const int64_t nI, 1277 const int Ikind, 1278 const int64_t Icolon [3], 1279 const GrB_Index *J, 1280 const int64_t nJ, 1281 const int Jkind, 1282 const int64_t Jcolon [3], 1283 const GrB_Matrix M, 1284 const bool Mask_struct, 1285 const GrB_BinaryOp accum, 1286 const GrB_Matrix A, 1287 GB_Context Context 1288 ) ; 1289 1290 //------------------------------------------------------------------------------ 1291 // GB_subassign_09: C(I,J)<M,repl> = scalar ; using S 1292 //------------------------------------------------------------------------------ 1293 1294 GrB_Info GB_subassign_09 1295 ( 1296 GrB_Matrix C, 1297 // input: 1298 const GrB_Index *I, 1299 const int64_t ni, 1300 const int64_t nI, 1301 const int Ikind, 1302 const int64_t Icolon [3], 1303 const GrB_Index *J, 1304 const int64_t nj, 1305 const int64_t nJ, 1306 const int Jkind, 1307 const int64_t Jcolon [3], 1308 const GrB_Matrix M, 1309 const bool Mask_struct, 1310 const void *scalar, 1311 const GrB_Type atype, 1312 GB_Context Context 1313 ) ; 1314 1315 //------------------------------------------------------------------------------ 1316 // GB_subassign_10_and_18: C(I,J)<M or !M,repl> = A ; using S 1317 //------------------------------------------------------------------------------ 1318 1319 GrB_Info GB_subassign_10_and_18 1320 ( 1321 GrB_Matrix C, 1322 // input: 1323 const GrB_Index *I, 1324 const int64_t ni, 1325 const int64_t nI, 1326 const int Ikind, 1327 const int64_t Icolon [3], 1328 const GrB_Index *J, 1329 const int64_t nj, 1330 const int64_t nJ, 1331 const int Jkind, 1332 const int64_t Jcolon [3], 1333 const GrB_Matrix M, 1334 const bool Mask_struct, // if true, use the only structure of M 1335 const bool Mask_comp, // if true, !M, else use M 1336 const GrB_Matrix A, 1337 GB_Context Context 1338 ) ; 1339 1340 //------------------------------------------------------------------------------ 1341 // GB_subassign_11: C(I,J)<M,repl> += scalar ; using S 1342 //------------------------------------------------------------------------------ 1343 1344 GrB_Info GB_subassign_11 1345 ( 1346 GrB_Matrix C, 1347 // input: 1348 const GrB_Index *I, 1349 const int64_t ni, 1350 const int64_t nI, 1351 const int Ikind, 1352 const int64_t Icolon [3], 1353 const GrB_Index *J, 1354 const int64_t nj, 1355 const int64_t nJ, 1356 const int Jkind, 1357 const int64_t Jcolon [3], 1358 const GrB_Matrix M, 1359 const bool Mask_struct, 1360 const GrB_BinaryOp accum, 1361 const void *scalar, 1362 const GrB_Type atype, 1363 GB_Context Context 1364 ) ; 1365 1366 //------------------------------------------------------------------------------ 1367 // GB_subassign_12_and_20: C(I,J)<M or !M,repl> += A ; using S 1368 //------------------------------------------------------------------------------ 1369 1370 GrB_Info GB_subassign_12_and_20 1371 ( 1372 GrB_Matrix C, 1373 // input: 1374 const GrB_Index *I, 1375 const int64_t ni, 1376 const int64_t nI, 1377 const int Ikind, 1378 const int64_t Icolon [3], 1379 const GrB_Index *J, 1380 const int64_t nj, 1381 const int64_t nJ, 1382 const int Jkind, 1383 const int64_t Jcolon [3], 1384 const GrB_Matrix M, 1385 const bool Mask_struct, // if true, use the only structure of M 1386 const bool Mask_comp, // if true, !M, else use M 1387 const GrB_BinaryOp accum, 1388 const GrB_Matrix A, 1389 GB_Context Context 1390 ) ; 1391 1392 //------------------------------------------------------------------------------ 1393 // GB_subassign_13: C(I,J)<!M> = scalar ; using S 1394 //------------------------------------------------------------------------------ 1395 1396 GrB_Info GB_subassign_13 1397 ( 1398 GrB_Matrix C, 1399 // input: 1400 const GrB_Index *I, 1401 const int64_t ni, 1402 const int64_t nI, 1403 const int Ikind, 1404 const int64_t Icolon [3], 1405 const GrB_Index *J, 1406 const int64_t nj, 1407 const int64_t nJ, 1408 const int Jkind, 1409 const int64_t Jcolon [3], 1410 const GrB_Matrix M, 1411 const bool Mask_struct, 1412 const void *scalar, 1413 const GrB_Type atype, 1414 GB_Context Context 1415 ) ; 1416 1417 //------------------------------------------------------------------------------ 1418 // GB_subassign_15: C(I,J)<!M> += scalar ; using S 1419 //------------------------------------------------------------------------------ 1420 1421 GrB_Info GB_subassign_15 1422 ( 1423 GrB_Matrix C, 1424 // input: 1425 const GrB_Index *I, 1426 const int64_t ni, 1427 const int64_t nI, 1428 const int Ikind, 1429 const int64_t Icolon [3], 1430 const GrB_Index *J, 1431 const int64_t nj, 1432 const int64_t nJ, 1433 const int Jkind, 1434 const int64_t Jcolon [3], 1435 const GrB_Matrix M, 1436 const bool Mask_struct, 1437 const GrB_BinaryOp accum, 1438 const void *scalar, 1439 const GrB_Type atype, 1440 GB_Context Context 1441 ) ; 1442 1443 //------------------------------------------------------------------------------ 1444 // GB_subassign_16: C(I,J)<!M> += A ; using S 1445 // GB_subassign_08s: C(I,J)<M> += A ; using S. Compare with method 08n 1446 //------------------------------------------------------------------------------ 1447 1448 GrB_Info GB_subassign_08s_and_16 1449 ( 1450 GrB_Matrix C, 1451 // input: 1452 const GrB_Index *I, 1453 const int64_t ni, 1454 const int64_t nI, 1455 const int Ikind, 1456 const int64_t Icolon [3], 1457 const GrB_Index *J, 1458 const int64_t nj, 1459 const int64_t nJ, 1460 const int Jkind, 1461 const int64_t Jcolon [3], 1462 const GrB_Matrix M, 1463 const bool Mask_struct, // if true, use the only structure of M 1464 const bool Mask_comp, // if true, !M, else use M 1465 const GrB_BinaryOp accum, 1466 const GrB_Matrix A, 1467 GB_Context Context 1468 ) ; 1469 1470 //------------------------------------------------------------------------------ 1471 // GB_subassign_17: C(I,J)<!M,repl> = scalar ; using S 1472 //------------------------------------------------------------------------------ 1473 1474 GrB_Info GB_subassign_17 1475 ( 1476 GrB_Matrix C, 1477 // input: 1478 const GrB_Index *I, 1479 const int64_t ni, 1480 const int64_t nI, 1481 const int Ikind, 1482 const int64_t Icolon [3], 1483 const GrB_Index *J, 1484 const int64_t nj, 1485 const int64_t nJ, 1486 const int Jkind, 1487 const int64_t Jcolon [3], 1488 const GrB_Matrix M, 1489 const bool Mask_struct, 1490 const void *scalar, 1491 const GrB_Type atype, 1492 GB_Context Context 1493 ) ; 1494 1495 //------------------------------------------------------------------------------ 1496 // GB_subassign_19: C(I,J)<!M,repl> += scalar ; using S 1497 //------------------------------------------------------------------------------ 1498 1499 GrB_Info GB_subassign_19 1500 ( 1501 GrB_Matrix C, 1502 // input: 1503 const GrB_Index *I, 1504 const int64_t ni, 1505 const int64_t nI, 1506 const int Ikind, 1507 const int64_t Icolon [3], 1508 const GrB_Index *J, 1509 const int64_t nj, 1510 const int64_t nJ, 1511 const int Jkind, 1512 const int64_t Jcolon [3], 1513 const GrB_Matrix M, 1514 const bool Mask_struct, 1515 const GrB_BinaryOp accum, 1516 const void *scalar, 1517 const GrB_Type atype, 1518 GB_Context Context 1519 ) ; 1520 1521 //------------------------------------------------------------------------------ 1522 // GB_ALLOCATE_NPENDING_WERK: allocate Npending workspace 1523 //------------------------------------------------------------------------------ 1524 1525 #define GB_ALLOCATE_NPENDING_WERK \ 1526 GB_WERK_PUSH (Npending, ntasks+1, int64_t) ; \ 1527 if (Npending == NULL) \ 1528 { \ 1529 GB_FREE_ALL ; \ 1530 return (GrB_OUT_OF_MEMORY) ; \ 1531 } 1532 1533 //------------------------------------------------------------------------------ 1534 // GB_SUBASSIGN_ONE_SLICE: slice one matrix (M) 1535 //------------------------------------------------------------------------------ 1536 1537 // Methods: 05, 06n, 07. If C is dense, it is sliced for a fine task, so that 1538 // it can do a binary search via GB_iC_BINARY_SEARCH. But if C(:,jC) is dense, 1539 // C(:,jC) is not sliced, so the fine task must do a direct lookup via 1540 // GB_iC_DENSE_LOOKUP. Otherwise a race condition will occur. 1541 1542 #define GB_SUBASSIGN_ONE_SLICE(M) \ 1543 GB_OK (GB_subassign_one_slice ( \ 1544 &TaskList, &TaskList_size, &ntasks, &nthreads, \ 1545 C, I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon, \ 1546 M, Context)) ; \ 1547 GB_ALLOCATE_NPENDING_WERK ; 1548 1549 //------------------------------------------------------------------------------ 1550 // GB_SUBASSIGN_TWO_SLICE: slice two matrices 1551 //------------------------------------------------------------------------------ 1552 1553 // Methods: 02, 04, 06s, 09, 10, 11, 12, 14, 16, 18, 20. 1554 1555 // Create tasks for Z = X+S, and the mapping of Z to X and S. The matrix X is 1556 // either A or M. No need to examine C, since it will be accessed via S, not 1557 // via binary search. 1558 1559 // If X is bitmap, this method is not used. Instead, GB_SUBASSIGN_IXJ_SLICE is 1560 // used to iterate over the matrix X. 1561 1562 #define GB_SUBASSIGN_TWO_SLICE(X,S) \ 1563 int Z_sparsity = GxB_SPARSE ; \ 1564 int64_t Znvec ; \ 1565 GB_OK (GB_add_phase0 ( \ 1566 &Znvec, &Zh, &Zh_size, NULL, NULL, &Z_to_X, &Z_to_X_size, \ 1567 &Z_to_S, &Z_to_S_size, NULL, &Z_sparsity, \ 1568 NULL, X, S, Context)) ; \ 1569 GB_OK (GB_ewise_slice ( \ 1570 &TaskList, &TaskList_size, &ntasks, &nthreads, \ 1571 Znvec, Zh, NULL, Z_to_X, Z_to_S, false, \ 1572 NULL, X, S, Context)) ; \ 1573 GB_ALLOCATE_NPENDING_WERK ; 1574 1575 //------------------------------------------------------------------------------ 1576 // GB_SUBASSIGN_IXJ_SLICE: slice IxJ for a scalar assignement method 1577 //------------------------------------------------------------------------------ 1578 1579 // Methods: 01, 03, 13, 15, 17, 19. All of these methods access the C matrix 1580 // via S, not via binary search. 1581 1582 #define GB_SUBASSIGN_IXJ_SLICE \ 1583 GB_OK (GB_subassign_IxJ_slice ( \ 1584 &TaskList, &TaskList_size, &ntasks, &nthreads, \ 1585 /* I, */ nI, /* Ikind, Icolon, J, */ nJ, /* Jkind, Jcolon, */ \ 1586 Context)) ; \ 1587 GB_ALLOCATE_NPENDING_WERK ; 1588 1589 //------------------------------------------------------------------------------ 1590 // GB_subassign_one_slice 1591 //------------------------------------------------------------------------------ 1592 1593 // Slice A or M into fine/coarse tasks, for GB_subassign_05, 06n, and 07 1594 1595 GrB_Info GB_subassign_one_slice 1596 ( 1597 // output: 1598 GB_task_struct **p_TaskList, // array of structs 1599 size_t *p_TaskList_size, // size of TaskList 1600 int *p_ntasks, // # of tasks constructed 1601 int *p_nthreads, // # of threads to use 1602 // input: 1603 const GrB_Matrix C, // output matrix C 1604 const GrB_Index *I, 1605 const int64_t nI, 1606 const int Ikind, 1607 const int64_t Icolon [3], 1608 const GrB_Index *J, 1609 const int64_t nJ, 1610 const int Jkind, 1611 const int64_t Jcolon [3], 1612 const GrB_Matrix A, // matrix to slice (M or A) 1613 GB_Context Context 1614 ) ; 1615 1616 //------------------------------------------------------------------------------ 1617 // GB_subassign_emult_slice: slice the entries and vectors for GB_subassign_08n 1618 //------------------------------------------------------------------------------ 1619 1620 GrB_Info GB_subassign_emult_slice 1621 ( 1622 // output: 1623 GB_task_struct **p_TaskList, // array of structs, of size max_ntasks 1624 size_t *p_TaskList_size, // size of TaskList 1625 int *p_ntasks, // # of tasks constructed 1626 int *p_nthreads, // # of threads to use 1627 int64_t *p_Znvec, // # of vectors to compute in Z 1628 const int64_t *restrict *Zh_handle, // Zh_shallow is A->h, M->h, or NULL 1629 int64_t *restrict *Z_to_A_handle, // Z_to_A: size Znvec, or NULL 1630 size_t *Z_to_A_size_handle, 1631 int64_t *restrict *Z_to_M_handle, // Z_to_M: size Znvec, or NULL 1632 size_t *Z_to_M_size_handle, 1633 // input: 1634 const GrB_Matrix C, // output matrix C 1635 const GrB_Index *I, 1636 const int64_t nI, 1637 const int Ikind, 1638 const int64_t Icolon [3], 1639 const GrB_Index *J, 1640 const int64_t nJ, 1641 const int Jkind, 1642 const int64_t Jcolon [3], 1643 const GrB_Matrix A, // matrix to slice 1644 const GrB_Matrix M, // matrix to slice 1645 GB_Context Context 1646 ) ; 1647 1648 //------------------------------------------------------------------------------ 1649 // GB_GET_TASK_DESCRIPTOR: get coarse/fine task descriptor 1650 //------------------------------------------------------------------------------ 1651 1652 #define GB_GET_TASK_DESCRIPTOR \ 1653 int64_t kfirst = TaskList [taskid].kfirst ; \ 1654 int64_t klast = TaskList [taskid].klast ; \ 1655 bool fine_task = (klast == -1) ; \ 1656 if (fine_task) \ 1657 { \ 1658 /* a fine task operates on a slice of a single vector */ \ 1659 klast = kfirst ; \ 1660 } \ 1661 1662 #define GB_GET_TASK_DESCRIPTOR_PHASE1 \ 1663 GB_GET_TASK_DESCRIPTOR ; \ 1664 int64_t task_nzombies = 0 ; \ 1665 int64_t task_pending = 0 ; 1666 1667 //------------------------------------------------------------------------------ 1668 // GB_GET_MAPPED: get the content of a vector for a coarse/fine task 1669 //------------------------------------------------------------------------------ 1670 1671 #define GB_GET_MAPPED(pX_start, pX_fini, pX, pX_end, Xp, j, k, Z_to_X, Xvlen) \ 1672 int64_t pX_start = -1, pX_fini = -1 ; \ 1673 if (fine_task) \ 1674 { \ 1675 /* A fine task operates on a slice of X(:,k) */ \ 1676 pX_start = TaskList [taskid].pX ; \ 1677 pX_fini = TaskList [taskid].pX_end ; \ 1678 } \ 1679 else \ 1680 { \ 1681 /* vectors are never sliced for a coarse task */ \ 1682 int64_t kX = (Z_to_X == NULL) ? j : Z_to_X [k] ; \ 1683 if (kX >= 0) \ 1684 { \ 1685 pX_start = GBP (Xp, kX, Xvlen) ; \ 1686 pX_fini = GBP (Xp, kX+1, Xvlen) ; \ 1687 } \ 1688 } 1689 1690 //------------------------------------------------------------------------------ 1691 // GB_GET_EVEC: get the content of a vector for Method08n 1692 //------------------------------------------------------------------------------ 1693 1694 #define GB_GET_EVEC(pX_start, pX_fini, pX, pX_end, Xp, Xh, j,k,Z_to_X,Xvlen)\ 1695 int64_t pX_start = -1, pX_fini = -1 ; \ 1696 if (fine_task) \ 1697 { \ 1698 /* A fine task operates on a slice of X(:,k) */ \ 1699 pX_start = TaskList [taskid].pX ; \ 1700 pX_fini = TaskList [taskid].pX_end ; \ 1701 } \ 1702 else \ 1703 { \ 1704 /* vectors are never sliced for a coarse task */ \ 1705 int64_t kX = (Zh_shallow == Xh) ? k : \ 1706 ((Z_to_X == NULL) ? j : Z_to_X [k]) ; \ 1707 if (kX >= 0) \ 1708 { \ 1709 pX_start = GBP (Xp, kX, Xvlen) ; \ 1710 pX_fini = GBP (Xp, kX+1, Xvlen) ; \ 1711 } \ 1712 } 1713 1714 //------------------------------------------------------------------------------ 1715 // GB_GET_jC: get the vector C(:,jC) 1716 //------------------------------------------------------------------------------ 1717 1718 #define GB_GET_jC \ 1719 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ; \ 1720 int64_t pC_start, pC_end ; \ 1721 if (fine_task) \ 1722 { \ 1723 pC_start = TaskList [taskid].pC ; \ 1724 pC_end = TaskList [taskid].pC_end ; \ 1725 } \ 1726 else \ 1727 { \ 1728 GB_VECTOR_LOOKUP (pC_start, pC_end, C, jC) ; \ 1729 } 1730 1731 //------------------------------------------------------------------------------ 1732 // GB_GET_IXJ_TASK_DESCRIPTOR*: get the task descriptor for IxJ 1733 //------------------------------------------------------------------------------ 1734 1735 // Q denotes the Cartesian product IXJ 1736 1737 #define GB_GET_IXJ_TASK_DESCRIPTOR(iQ_start,iQ_end) \ 1738 GB_GET_TASK_DESCRIPTOR ; \ 1739 int64_t iQ_start = 0, iQ_end = nI ; \ 1740 if (fine_task) \ 1741 { \ 1742 iQ_start = TaskList [taskid].pA ; \ 1743 iQ_end = TaskList [taskid].pA_end ; \ 1744 } 1745 1746 #define GB_GET_IXJ_TASK_DESCRIPTOR_PHASE1(iQ_start,iQ_end) \ 1747 GB_GET_IXJ_TASK_DESCRIPTOR (iQ_start, iQ_end) \ 1748 int64_t task_nzombies = 0 ; \ 1749 int64_t task_pending = 0 ; 1750 1751 #define GB_GET_IXJ_TASK_DESCRIPTOR_PHASE2(iQ_start,iQ_end) \ 1752 GB_GET_IXJ_TASK_DESCRIPTOR (iQ_start, iQ_end) \ 1753 GB_START_PENDING_INSERTION ; 1754 1755 //------------------------------------------------------------------------------ 1756 // GB_GET_VECTOR_FOR_IXJ: get the start of a vector for scalar assignment 1757 //------------------------------------------------------------------------------ 1758 1759 // Find pX and pX_end for the vector X (iQ_start:iQ_end, j), for a scalar 1760 // assignment method, or a method iterating over all IxJ for a bitmap M or A. 1761 1762 #define GB_GET_VECTOR_FOR_IXJ(X,iQ_start) \ 1763 int64_t p ## X, p ## X ## _end ; \ 1764 GB_VECTOR_LOOKUP (p ## X, p ## X ## _end, X, j) ; \ 1765 if (iQ_start != 0) \ 1766 { \ 1767 if (X ## i == NULL) \ 1768 { \ 1769 /* X is full or bitmap */ \ 1770 p ## X += iQ_start ; \ 1771 } \ 1772 else \ 1773 { \ 1774 /* X is sparse or hypersparse */ \ 1775 int64_t pright = p ## X ## _end - 1 ; \ 1776 bool found ; \ 1777 GB_SPLIT_BINARY_SEARCH (iQ_start, X ## i, p ## X, pright, found) ;\ 1778 } \ 1779 } 1780 1781 //------------------------------------------------------------------------------ 1782 // GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP 1783 //------------------------------------------------------------------------------ 1784 1785 // mij = M(i,j) 1786 1787 #define GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP(i) \ 1788 bool mij ; \ 1789 if (M_is_bitmap) \ 1790 { \ 1791 /* M(:,j) is bitmap, no need for binary search */ \ 1792 int64_t pM = pM_start + i ; \ 1793 mij = Mb [pM] && GB_mcast (Mx, pM, msize) ; \ 1794 } \ 1795 else if (mjdense) \ 1796 { \ 1797 /* M(:,j) is dense, no need for binary search */ \ 1798 int64_t pM = pM_start + i ; \ 1799 mij = GB_mcast (Mx, pM, msize) ; \ 1800 } \ 1801 else \ 1802 { \ 1803 /* M(:,j) is sparse, binary search for M(i,j) */ \ 1804 int64_t pM = pM_start ; \ 1805 int64_t pright = pM_end - 1 ; \ 1806 bool found ; \ 1807 GB_BINARY_SEARCH (i, Mi, pM, pright, found) ; \ 1808 if (found) \ 1809 { \ 1810 mij = GB_mcast (Mx, pM, msize) ; \ 1811 } \ 1812 else \ 1813 { \ 1814 mij = false ; \ 1815 } \ 1816 } \ 1817 1818 //------------------------------------------------------------------------------ 1819 // GB_PHASE1_TASK_WRAPUP: wrapup for a task in phase 1 1820 //------------------------------------------------------------------------------ 1821 1822 // sum up the zombie count, and record the # of pending tuples for this task 1823 1824 #define GB_PHASE1_TASK_WRAPUP \ 1825 nzombies += task_nzombies ; \ 1826 Npending [taskid] = task_pending ; 1827 1828 //------------------------------------------------------------------------------ 1829 // GB_PENDING_CUMSUM: finalize zombies, count # pending tuples for all tasks 1830 //------------------------------------------------------------------------------ 1831 1832 #define GB_PENDING_CUMSUM \ 1833 C->nzombies = nzombies ; \ 1834 GB_cumsum (Npending, ntasks, NULL, 1, NULL) ; \ 1835 int64_t nnew = Npending [ntasks] ; \ 1836 if (nnew == 0) \ 1837 { \ 1838 /* no pending tuples, so skip phase 2 */ \ 1839 GB_FREE_ALL ; \ 1840 ASSERT_MATRIX_OK (C, "C, no pending tuples " __FILE__, \ 1841 GB_FLIP (GB0)) ; \ 1842 return (GrB_SUCCESS) ; \ 1843 } \ 1844 /* ensure that C->Pending is large enough to handle nnew more tuples */ \ 1845 if (!GB_Pending_ensure (&(C->Pending), atype, accum, is_matrix, nnew, \ 1846 Context)) \ 1847 { \ 1848 GB_FREE_ALL ; \ 1849 return (GrB_OUT_OF_MEMORY) ; \ 1850 } \ 1851 GB_Pending Pending = C->Pending ; \ 1852 int64_t *restrict Pending_i = Pending->i ; \ 1853 int64_t *restrict Pending_j = Pending->j ; \ 1854 GB_void *restrict Pending_x = Pending->x ; \ 1855 int64_t npending_orig = Pending->n ; \ 1856 bool pending_sorted = Pending->sorted ; 1857 1858 //------------------------------------------------------------------------------ 1859 // GB_START_PENDING_INSERTION: start insertion of pending tuples (phase 2) 1860 //------------------------------------------------------------------------------ 1861 1862 #define GB_START_PENDING_INSERTION \ 1863 bool task_sorted = true ; \ 1864 int64_t ilast = -1 ; \ 1865 int64_t jlast = -1 ; \ 1866 int64_t n = Npending [taskid] ; \ 1867 int64_t task_pending = Npending [taskid+1] - n ; \ 1868 if (task_pending == 0) continue ; \ 1869 n += npending_orig ; 1870 1871 #define GB_GET_TASK_DESCRIPTOR_PHASE2 \ 1872 GB_GET_TASK_DESCRIPTOR ; \ 1873 GB_START_PENDING_INSERTION ; 1874 1875 //------------------------------------------------------------------------------ 1876 // GB_PHASE2_TASK_WRAPUP: wrapup for a task in phase 2 1877 //------------------------------------------------------------------------------ 1878 1879 #define GB_PHASE2_TASK_WRAPUP \ 1880 pending_sorted = pending_sorted && task_sorted ; \ 1881 ASSERT (n == npending_orig + Npending [taskid+1]) ; 1882 1883 //------------------------------------------------------------------------------ 1884 // GB_SUBASSIGN_WRAPUP: finalize the subassign method after phase 2 1885 //------------------------------------------------------------------------------ 1886 1887 // If pending_sorted is true, then the original pending tuples (if any) were 1888 // sorted, and each task found that its tuples were also sorted. The 1889 // boundaries between each task must now be checked. 1890 1891 #define GB_SUBASSIGN_WRAPUP \ 1892 if (pending_sorted) \ 1893 { \ 1894 for (int taskid = 0 ; pending_sorted && taskid < ntasks ; taskid++) \ 1895 { \ 1896 int64_t n = Npending [taskid] ; \ 1897 int64_t task_pending = Npending [taskid+1] - n ; \ 1898 n += npending_orig ; \ 1899 if (task_pending > 0 && n > 0) \ 1900 { \ 1901 /* (i,j) is the first pending tuple for this task; check */ \ 1902 /* with the pending tuple just before it */ \ 1903 ASSERT (n < npending_orig + nnew) ; \ 1904 int64_t i = Pending_i [n] ; \ 1905 int64_t j = (Pending_j != NULL) ? Pending_j [n] : 0 ; \ 1906 int64_t ilast = Pending_i [n-1] ; \ 1907 int64_t jlast = (Pending_j != NULL) ? Pending_j [n-1] : 0 ; \ 1908 pending_sorted = pending_sorted && \ 1909 ((jlast < j) || (jlast == j && ilast <= i)) ; \ 1910 } \ 1911 } \ 1912 } \ 1913 Pending->n += nnew ; \ 1914 Pending->sorted = pending_sorted ; \ 1915 GB_FREE_ALL ; \ 1916 ASSERT_MATRIX_OK (C, "C with pending tuples :" __FILE__, GB_FLIP (GB0)) ;\ 1917 return (GrB_SUCCESS) ; 1918 1919 #endif 1920 1921