1 //------------------------------------------------------------------------------ 2 // GB_subref_template: C = A(I,J), or C = pattern (A(I,J)) 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 #if defined ( GB_SYMBOLIC ) 11 // symbolic method must tolerate zombies 12 #define GB_Ai(p) GBI_UNFLIP (Ai, p, avlen) 13 #else 14 // numeric method will not see any zombies 15 #define GB_Ai(p) GBI (Ai, p, avlen) 16 #endif 17 18 // to iterate across all entries in a bucket: 19 #define GB_for_each_index_in_bucket(inew,i) \ 20 for (int64_t inew = Mark [i] - 1 ; inew >= 0 ; inew = Inext [inew]) 21 22 // copy values from A(:,kA) to C(:,kC): Cx [pC:pC+len-1] = ... (pA:pA+len-1). 23 #if defined ( GB_SYMBOLIC ) 24 // symbolic copy: Cx is int64_t; Ax is ignored 25 #define GB_COPY_RANGE(pC,pA,len) \ 26 for (int64_t k = 0 ; k < (len) ; k++) \ 27 { \ 28 Cx [(pC) + k] = (pA) + k ; \ 29 } 30 #else 31 // numeric copy: Cx and Ax are both (GB_void *), and point to the same type 32 #define GB_COPY_RANGE(pC,pA,len) \ 33 memcpy (Cx + (pC)*asize, Ax + (pA)*asize, (len) * asize) ; 34 #endif 35 36 // copy a single value from A(:,kA) to C(:,kC): Cx [pC] = ... (pA]) 37 #if defined ( GB_SYMBOLIC ) 38 // symbolic copy: Cx is int64_t; Ax is ignored 39 #define GB_COPY_ENTRY(pC,pA) \ 40 Cx [pC] = (pA) ; 41 #else 42 // numeric copy: Cx and Ax are both (GB_void *), and point to the same type 43 #define GB_COPY_ENTRY(pC,pA) \ 44 /* Cx [pC] = Ax [pA] */ \ 45 memcpy (Cx + (pC)*asize, Ax + (pA)*asize, asize) ; 46 #endif 47 48 // the type of Cx 49 #if defined ( GB_SYMBOLIC ) 50 // C is an int64_t array; the type of A is ignored 51 #define GB_CTYPE int64_t 52 #define GB_CSIZE1 1 53 #define GB_CSIZE2 (sizeof (int64_t)) 54 #else 55 // C and A have the same type 56 #define GB_CTYPE GB_void 57 #define GB_CSIZE1 asize 58 #define GB_CSIZE2 asize 59 #endif 60 61 { 62 63 //-------------------------------------------------------------------------- 64 // get A 65 //-------------------------------------------------------------------------- 66 67 const int64_t *restrict Ai = A->i ; 68 const int64_t avlen = A->vlen ; 69 70 #if defined ( GB_SYMBOLIC ) 71 const int64_t nzombies = A->nzombies ; 72 #endif 73 74 #if defined ( GB_PHASE_2_OF_2 ) && defined ( GB_NUMERIC ) 75 ASSERT (C->type = A->type) ; 76 const GB_void *restrict Ax = (GB_void *) A->x ; 77 const int64_t asize = A->type->size ; 78 #endif 79 80 //-------------------------------------------------------------------------- 81 // get C 82 //-------------------------------------------------------------------------- 83 84 #if defined ( GB_PHASE_2_OF_2 ) 85 int64_t *restrict Ci = C->i ; 86 GB_CTYPE *restrict Cx = (GB_CTYPE *) C->x ; 87 #endif 88 89 //-------------------------------------------------------------------------- 90 // get I 91 //-------------------------------------------------------------------------- 92 93 // these values are ignored if Ikind == GB_LIST 94 int64_t ibegin = Icolon [GxB_BEGIN] ; 95 int64_t iinc = Icolon [GxB_INC ] ; 96 int64_t inc = (iinc < 0) ? (-iinc) : iinc ; 97 #ifdef GB_DEBUG 98 int64_t iend = Icolon [GxB_END ] ; 99 #endif 100 101 //-------------------------------------------------------------------------- 102 // phase1: count entries in each C(:,kC); phase2: compute C 103 //-------------------------------------------------------------------------- 104 105 int taskid ; 106 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) 107 for (taskid = 0 ; taskid < ntasks ; taskid++) 108 { 109 110 //---------------------------------------------------------------------- 111 // get the task descriptor 112 //---------------------------------------------------------------------- 113 114 int64_t kfirst = TaskList [taskid].kfirst ; 115 int64_t klast = TaskList [taskid].klast ; 116 bool fine_task = (klast < 0) ; 117 if (fine_task) 118 { 119 // a fine task operates on a slice of a single vector 120 klast = kfirst ; 121 } 122 123 // a coarse task accesses all of I for all its vectors 124 int64_t pI = 0 ; 125 int64_t pI_end = nI ; 126 int64_t ilen = nI ; 127 128 ASSERT (0 <= kfirst && kfirst <= klast && klast < Cnvec) ; 129 130 //---------------------------------------------------------------------- 131 // compute all vectors C(:,kfirst:klast) for this task 132 //---------------------------------------------------------------------- 133 134 for (int64_t kC = kfirst ; kC <= klast ; kC++) 135 { 136 137 //------------------------------------------------------------------ 138 // get C(:,kC) 139 //------------------------------------------------------------------ 140 141 #if defined ( GB_PHASE_1_OF_2 ) 142 // phase1 simply counts the # of entries in C(*,kC). 143 int64_t clen = 0 ; 144 #else 145 // This task computes all or part of C(:,kC), which are the entries 146 // in Ci,Cx [pC:pC_end-1]. 147 int64_t pC, pC_end ; 148 if (fine_task) 149 { 150 // A fine task computes a slice of C(:,kC) 151 pC = TaskList [taskid ].pC ; 152 pC_end = TaskList [taskid+1].pC ; 153 ASSERT (Cp [kC] <= pC && pC <= pC_end && pC_end <= Cp [kC+1]) ; 154 } 155 else 156 { 157 // The vectors of C are never sliced for a coarse task, so this 158 // task computes all of C(:,kC). 159 pC = Cp [kC] ; 160 pC_end = Cp [kC+1] ; 161 } 162 int64_t clen = pC_end - pC ; 163 if (clen == 0) continue ; 164 #endif 165 166 //------------------------------------------------------------------ 167 // get A(:,kA) 168 //------------------------------------------------------------------ 169 170 int64_t pA, pA_end ; 171 172 if (fine_task) 173 { 174 // a fine task computes a slice of a single vector C(:,kC). 175 // The task accesses Ai,Ax [pA:pA_end-1], which holds either 176 // the entire vector A(imin:imax,kA) for method 6, the entire 177 // dense A(:,kA) for methods 1 and 2, or a slice of the 178 // A(imin:max,kA) vector for all other methods. 179 pA = TaskList [taskid].pA ; 180 pA_end = TaskList [taskid].pA_end ; 181 } 182 else 183 { 184 // a coarse task computes the entire vector C(:,kC). The task 185 // accesses all of A(imin:imax,kA), for most methods, or all of 186 // A(:,kA) for methods 1 and 2. The vector A(*,kA) appears in 187 // Ai,Ax [pA:pA_end-1]. 188 pA = Ap_start [kC] ; 189 pA_end = Ap_end [kC] ; 190 } 191 192 int64_t alen = pA_end - pA ; 193 if (alen == 0) continue ; 194 195 //------------------------------------------------------------------ 196 // get I 197 //------------------------------------------------------------------ 198 199 if (fine_task) 200 { 201 // A fine task accesses I [pI:pI_end-1]. For methods 2 and 6, 202 // pI:pI_end is a subset of the entire 0:nI-1 list. For all 203 // other methods, pI = 0 and pI_end = nI, and the task can 204 // access all of I. 205 pI = TaskList [taskid].pB ; 206 pI_end = TaskList [taskid].pB_end ; 207 ilen = pI_end - pI ; 208 } 209 210 //------------------------------------------------------------------ 211 // determine the method to use 212 //------------------------------------------------------------------ 213 214 int method ; 215 if (fine_task) 216 { 217 // The method that the fine task uses for its slice of A(*,kA) 218 // and C(*,kC) has already been determined by GB_subref_slice. 219 method = (int) (-TaskList [taskid].klast) ; 220 } 221 else 222 { 223 // determine the method based on A(*,kA) and I 224 method = GB_subref_method (NULL, NULL, alen, avlen, Ikind, nI, 225 (Mark != NULL), need_qsort, iinc, nduplicates) ; 226 } 227 228 //------------------------------------------------------------------ 229 // extract C (:,kC) = A (I,kA): consider all cases 230 //------------------------------------------------------------------ 231 232 switch (method) 233 { 234 235 //-------------------------------------------------------------- 236 case 1 : // C(:,kC) = A(:,kA) where A(:,kA) is dense 237 //-------------------------------------------------------------- 238 239 // A (:,kA) has not been sliced 240 ASSERT (Ikind == GB_ALL) ; 241 ASSERT (pA == Ap_start [kC]) ; 242 ASSERT (pA_end == Ap_end [kC]) ; 243 // copy the entire vector and construct indices 244 #if defined ( GB_PHASE_1_OF_2 ) 245 clen = ilen ; 246 #else 247 for (int64_t k = 0 ; k < ilen ; k++) 248 { 249 int64_t inew = k + pI ; 250 ASSERT (inew == GB_ijlist (I, inew, Ikind, Icolon)) ; 251 ASSERT (inew == GB_Ai (pA + inew)) ; 252 Ci [pC + k] = inew ; 253 } 254 GB_COPY_RANGE (pC, pA + pI, ilen) ; 255 #endif 256 break ; 257 258 //-------------------------------------------------------------- 259 case 2 : // C(:,kC) = A(I,kA) where A(I,kA) is dense 260 //-------------------------------------------------------------- 261 262 // This method handles any kind of list I, but A(:,kA) 263 // must be dense. A(:,kA) has not been sliced. 264 ASSERT (pA == Ap_start [kC]) ; 265 ASSERT (pA_end == Ap_end [kC]) ; 266 // scan I and get the entry in A(:,kA) via direct lookup 267 #if defined ( GB_PHASE_1_OF_2 ) 268 clen = ilen ; 269 #else 270 for (int64_t k = 0 ; k < ilen ; k++) 271 { 272 // C(inew,kC) = A(i,kA), and it always exists. 273 int64_t inew = k + pI ; 274 int64_t i = GB_ijlist (I, inew, Ikind, Icolon) ; 275 ASSERT (i == GB_Ai (pA + i)) ; 276 Ci [pC + k] = inew ; 277 GB_COPY_ENTRY (pC + k, pA + i) ; 278 } 279 #endif 280 break ; 281 282 //-------------------------------------------------------------- 283 case 3 : // the list I has a single index, ibegin 284 //-------------------------------------------------------------- 285 286 // binary search in GB_subref_phase0 has already found it. 287 // This can be any Ikind with nI=1: GB_ALL with A->vlen=1, 288 // GB_RANGE with ibegin==iend, GB_STRIDE such as 0:-1:0 289 // (with length 1), or a GB_LIST with ni=1. 290 291 // Time: 50x faster than MATLAB 292 293 ASSERT (!fine_task) ; 294 ASSERT (alen == 1) ; 295 ASSERT (nI == 1) ; 296 ASSERT (GB_Ai (pA) == GB_ijlist (I, 0, Ikind, Icolon)) ; 297 #if defined ( GB_PHASE_1_OF_2 ) 298 clen = 1 ; 299 #else 300 Ci [pC] = 0 ; 301 GB_COPY_ENTRY (pC, pA) ; 302 #endif 303 break ; 304 305 //-------------------------------------------------------------- 306 case 4 : // Ikind is ":", thus C(:,kC) = A (:,kA) 307 //-------------------------------------------------------------- 308 309 // Time: 1x MATLAB but low speedup on the Mac. Why? 310 // Probably memory bound since it is just memcpy's. 311 312 ASSERT (Ikind == GB_ALL && ibegin == 0) ; 313 #if defined ( GB_PHASE_1_OF_2 ) 314 clen = alen ; 315 #else 316 #if defined ( GB_SYMBOLIC ) 317 if (nzombies == 0) 318 { 319 memcpy (Ci + pC, Ai + pA, alen * sizeof (int64_t)) ; 320 } 321 else 322 { 323 // with zombies 324 for (int64_t k = 0 ; k < alen ; k++) 325 { 326 // symbolic C(:,kC) = A(:,kA) where A has zombies 327 int64_t i = GB_Ai (pA + k) ; 328 ASSERT (i == GB_ijlist (I, i, Ikind, Icolon)) ; 329 Ci [pC + k] = i ; 330 } 331 } 332 #else 333 memcpy (Ci + pC, Ai + pA, alen * sizeof (int64_t)) ; 334 #endif 335 GB_COPY_RANGE (pC, pA, alen) ; 336 #endif 337 break ; 338 339 //-------------------------------------------------------------- 340 case 5 : // Ikind is GB_RANGE = ibegin:iend 341 //-------------------------------------------------------------- 342 343 // Time: much faster than MATLAB. Good speedup too. 344 345 ASSERT (Ikind == GB_RANGE) ; 346 #if defined ( GB_PHASE_1_OF_2 ) 347 clen = alen ; 348 #else 349 for (int64_t k = 0 ; k < alen ; k++) 350 { 351 int64_t i = GB_Ai (pA + k) ; 352 int64_t inew = i - ibegin ; 353 ASSERT (i == GB_ijlist (I, inew, Ikind, Icolon)) ; 354 Ci [pC + k] = inew ; 355 } 356 GB_COPY_RANGE (pC, pA, alen) ; 357 #endif 358 break ; 359 360 //-------------------------------------------------------------- 361 case 6 : // I is short vs nnz (A (:,kA)), use binary search 362 //-------------------------------------------------------------- 363 364 // Time: very slow unless I is very short and A(:,kA) is 365 // very long. 366 367 // This case can handle any kind of I, and A(:,kA) of any 368 // properties. For a fine task, A(:,kA) has not been 369 // sliced; I has been sliced instead. 370 371 // If the I bucket inverse has not been created, this 372 // method is the only option. Alternatively, if nI = 373 // length (I) is << nnz (A (:,kA)), then scanning I and 374 // doing a binary search of A (:,kA) is faster than doing a 375 // linear-time search of A(:,kA) and a lookup into the I 376 // bucket inverse. 377 378 // The vector of C is constructed in sorted order, so no 379 // sort is needed. 380 381 // A(:,kA) has not been sliced. 382 ASSERT (pA == Ap_start [kC]) ; 383 ASSERT (pA_end == Ap_end [kC]) ; 384 385 // scan I, in order, and search for the entry in A(:,kA) 386 for (int64_t k = 0 ; k < ilen ; k++) 387 { 388 // C(inew,kC) = A (i,kA), if it exists. 389 // i = I [inew] ; or from a colon expression 390 int64_t inew = k + pI ; 391 int64_t i = GB_ijlist (I, inew, Ikind, Icolon) ; 392 bool found ; 393 int64_t pleft = pA ; 394 int64_t pright = pA_end - 1 ; 395 #if defined ( GB_SYMBOLIC ) 396 bool is_zombie ; 397 GB_BINARY_SEARCH_ZOMBIE (i, Ai, pleft, pright, found, 398 nzombies, is_zombie) ; 399 #else 400 GB_BINARY_SEARCH (i, Ai, pleft, pright, found) ; 401 #endif 402 if (found) 403 { 404 ASSERT (i == GB_Ai (pleft)) ; 405 #if defined ( GB_PHASE_1_OF_2 ) 406 clen++ ; 407 #else 408 ASSERT (pC < pC_end) ; 409 Ci [pC] = inew ; 410 GB_COPY_ENTRY (pC, pleft) ; 411 pC++ ; 412 #endif 413 } 414 } 415 #if defined ( GB_PHASE_2_OF_2 ) 416 ASSERT (pC == pC_end) ; 417 #endif 418 break ; 419 420 //-------------------------------------------------------------- 421 case 7 : // I is ibegin:iinc:iend with iinc > 1 422 //-------------------------------------------------------------- 423 424 // Time: 1 thread: C=A(1:2:n,:) is 3x slower than MATLAB 425 // but has good speedup. About as fast as MATLAB with 426 // enough threads. 427 428 ASSERT (Ikind == GB_STRIDE && iinc > 1) ; 429 for (int64_t k = 0 ; k < alen ; k++) 430 { 431 // A(i,kA) present; see if it is in ibegin:iinc:iend 432 int64_t i = GB_Ai (pA + k) ; 433 ASSERT (ibegin <= i && i <= iend) ; 434 i = i - ibegin ; 435 if (i % iinc == 0) 436 { 437 // i is in the sequence ibegin:iinc:iend 438 #if defined ( GB_PHASE_1_OF_2 ) 439 clen++ ; 440 #else 441 int64_t inew = i / iinc ; 442 ASSERT (pC < pC_end) ; 443 Ci [pC] = inew ; 444 GB_COPY_ENTRY (pC, pA + k) ; 445 pC++ ; 446 #endif 447 } 448 } 449 #if defined ( GB_PHASE_2_OF_2 ) 450 ASSERT (pC == pC_end) ; 451 #endif 452 break ; 453 454 //---------------------------------------------------------- 455 case 8 : // I = ibegin:(-iinc):iend, with iinc < -1 456 //---------------------------------------------------------- 457 458 // Time: 2x slower than MATLAB for iinc = -2 or -8. 459 // Good speedup though. Faster than MATLAB for 460 // large values (iinc = -128). 461 462 ASSERT (Ikind == GB_STRIDE && iinc < -1) ; 463 for (int64_t k = alen - 1 ; k >= 0 ; k--) 464 { 465 // A(i,kA) present; see if it is in ibegin:iinc:iend 466 int64_t i = GB_Ai (pA + k) ; 467 ASSERT (iend <= i && i <= ibegin) ; 468 i = ibegin - i ; 469 if (i % inc == 0) 470 { 471 // i is in the sequence ibegin:iinc:iend 472 #if defined ( GB_PHASE_1_OF_2 ) 473 clen++ ; 474 #else 475 int64_t inew = i / inc ; 476 ASSERT (pC < pC_end) ; 477 Ci [pC] = inew ; 478 GB_COPY_ENTRY (pC, pA + k) ; 479 pC++ ; 480 #endif 481 } 482 } 483 #if defined ( GB_PHASE_2_OF_2 ) 484 ASSERT (pC == pC_end) ; 485 #endif 486 break ; 487 488 //---------------------------------------------------------- 489 case 9 : // I = ibegin:(-1):iend 490 //---------------------------------------------------------- 491 492 // Time: much faster than MATLAB. Good speedup. 493 494 ASSERT (Ikind == GB_STRIDE && iinc == -1) ; 495 #if defined ( GB_PHASE_1_OF_2 ) 496 clen = alen ; 497 #else 498 for (int64_t k = alen - 1 ; k >= 0 ; k--) 499 { 500 // A(i,kA) is present 501 int64_t i = GB_Ai (pA + k) ; 502 int64_t inew = (ibegin - i) ; 503 ASSERT (i == GB_ijlist (I, inew, Ikind, Icolon)) ; 504 Ci [pC] = inew ; 505 GB_COPY_ENTRY (pC, pA + k) ; 506 pC++ ; 507 } 508 #endif 509 break ; 510 511 //-------------------------------------------------------------- 512 case 10 : // I unsorted, and C needs qsort, duplicates OK 513 //-------------------------------------------------------------- 514 515 // Time: with one thread: 2x slower than MATLAB, probably 516 // because of the qsort. Good speedup however. This used 517 // if qsort is needed but ndupl == 0. Try a method that 518 // needs qsort, but no duplicates? 519 520 // Case 10 works well when I has many entries and A(:,kA) 521 // has few entries. C(:,kC) must be sorted after this pass. 522 523 ASSERT (Ikind == GB_LIST) ; 524 for (int64_t k = 0 ; k < alen ; k++) 525 { 526 // A(i,kA) present, look it up in the I inverse buckets 527 int64_t i = GB_Ai (pA + k) ; 528 // traverse bucket i for all indices inew where 529 // i == I [inew] or where i is from a colon expression GB_for_each_index_in_bucket(inew,i)530 GB_for_each_index_in_bucket (inew, i) 531 { 532 ASSERT (inew >= 0 && inew < nI) ; 533 ASSERT (i == GB_ijlist (I, inew, Ikind, Icolon)) ; 534 #if defined ( GB_PHASE_1_OF_2 ) 535 clen++ ; 536 #else 537 Ci [pC] = inew ; 538 GB_COPY_ENTRY (pC, pA + k) ; 539 pC++ ; 540 #endif 541 } 542 } 543 544 // TODO: skip the sort if C is allowed to be jumbled on 545 // output. Flag C as jumbled instead. 546 547 #if defined ( GB_PHASE_2_OF_2 ) 548 ASSERT (pC == pC_end) ; 549 if (!fine_task) 550 { 551 // a coarse task owns this entire C(:,kC) vector, so 552 // the sort can be done now. The sort for vectors 553 // handled by multiple fine tasks must wait until all 554 // task are completed, below in the post sort. 555 pC = Cp [kC] ; 556 GB_qsort_1b (Ci + pC, (GB_void *) (Cx + pC*GB_CSIZE1), 557 GB_CSIZE2, clen) ; 558 } 559 #endif 560 break ; 561 562 //-------------------------------------------------------------- 563 case 11 : // I not contiguous, with duplicates. No qsort needed 564 //-------------------------------------------------------------- 565 566 // Case 11 works well when I has many entries and A(:,kA) 567 // has few entries. It requires that I be sorted on input, 568 // so that no sort is required for C(:,kC). It is 569 // otherwise identical to Case 10. 570 571 ASSERT (Ikind == GB_LIST) ; 572 for (int64_t k = 0 ; k < alen ; k++) 573 { 574 // A(i,kA) present, look it up in the I inverse buckets 575 int64_t i = GB_Ai (pA + k) ; 576 // traverse bucket i for all indices inew where 577 // i == I [inew] or where i is from a colon expression GB_for_each_index_in_bucket(inew,i)578 GB_for_each_index_in_bucket (inew, i) 579 { 580 ASSERT (inew >= 0 && inew < nI) ; 581 ASSERT (i == GB_ijlist (I, inew, Ikind, Icolon)) ; 582 #if defined ( GB_PHASE_1_OF_2 ) 583 clen++ ; 584 #else 585 Ci [pC] = inew ; 586 GB_COPY_ENTRY (pC, pA + k) ; 587 pC++ ; 588 #endif 589 } 590 } 591 592 #if defined ( GB_PHASE_2_OF_2 ) 593 ASSERT (pC == pC_end) ; 594 #endif 595 break ; 596 597 //-------------------------------------------------------------- 598 case 12 : // I not contiguous, no duplicates. No qsort needed. 599 //-------------------------------------------------------------- 600 601 // Identical to Case 11, except GB_for_each_index_in_bucket 602 // just needs to iterate 0 or 1 times. Works well when I 603 // has many entries and A(:,kA) has few entries. 604 605 ASSERT (Ikind == GB_LIST && nduplicates == 0) ; 606 for (int64_t k = 0 ; k < alen ; k++) 607 { 608 // A(i,kA) present, look it up in the I inverse buckets 609 int64_t i = GB_Ai (pA + k) ; 610 // bucket i has at most one index inew such that 611 // i == I [inew] 612 int64_t inew = Mark [i] - 1 ; 613 if (inew >= 0) 614 { 615 ASSERT (inew >= 0 && inew < nI) ; 616 ASSERT (i == GB_ijlist (I, inew, Ikind, Icolon)) ; 617 #if defined ( GB_PHASE_1_OF_2 ) 618 clen++ ; 619 #else 620 Ci [pC] = inew ; 621 GB_COPY_ENTRY (pC, pA + k) ; 622 pC++ ; 623 #endif 624 } 625 } 626 627 #if defined ( GB_PHASE_2_OF_2 ) 628 ASSERT (pC == pC_end) ; 629 #endif 630 break ; 631 632 //-------------------------------------------------------------- 633 default: ; 634 //-------------------------------------------------------------- 635 } 636 637 //------------------------------------------------------------------ 638 // final count of nnz (C (:,j)) 639 //------------------------------------------------------------------ 640 641 #if defined ( GB_PHASE_1_OF_2 ) 642 if (fine_task) 643 { 644 TaskList [taskid].pC = clen ; 645 } 646 else 647 { 648 Cp [kC] = clen ; 649 } 650 #endif 651 } 652 } 653 654 //-------------------------------------------------------------------------- 655 // phase2: post sort for any vectors handled by fine tasks with method 10 656 //-------------------------------------------------------------------------- 657 658 // TODO: skip the sort if C is allowed to be jumbled on output. 659 // Flag C as jumbled instead. 660 661 #if defined ( GB_PHASE_2_OF_2 ) 662 if (post_sort) 663 { 664 int taskid ; 665 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) 666 for (taskid = 0 ; taskid < ntasks ; taskid++) 667 { 668 int64_t kC = TaskList [taskid].kfirst ; 669 bool do_post_sort = (TaskList [taskid].len != 0) ; 670 if (do_post_sort) 671 { 672 // This is the first fine task with method 10 for C(:,kC). The 673 // vector C(:,kC) must be sorted, since method 10 left it with 674 // unsorted indices. 675 int64_t pC = Cp [kC] ; 676 int64_t clen = Cp [kC+1] - pC ; 677 GB_qsort_1b (Ci + pC, (GB_void *) (Cx + pC*GB_CSIZE1), 678 GB_CSIZE2, clen) ; 679 } 680 } 681 } 682 683 #endif 684 685 } 686 687 #undef GB_Ai 688 #undef GB_for_each_index_in_bucket 689 #undef GB_COPY_RANGE 690 #undef GB_COPY_ENTRY 691 #undef GB_CTYPE 692 #undef GB_CSIZE1 693 #undef GB_CSIZE2 694 695