1 //------------------------------------------------------------------------------ 2 // GB_dense_subassign_23_template: C += B where C is dense; B is sparse or dense 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 // All entries in C+=B are computed entirely in parallel, using the same kind of 11 // parallelism as Template/GB_AxB_colscale.c. 12 13 #include "GB_unused.h" 14 15 { 16 17 //-------------------------------------------------------------------------- 18 // get C and B 19 //-------------------------------------------------------------------------- 20 21 const GB_BTYPE *restrict Bx = (GB_BTYPE *) B->x ; 22 GB_CTYPE *restrict Cx = (GB_CTYPE *) C->x ; 23 ASSERT (GB_is_dense (C)) ; 24 const int64_t cnz = GB_NNZ_HELD (C) ; 25 26 if (GB_IS_BITMAP (B)) 27 { 28 29 //---------------------------------------------------------------------- 30 // C += B when C is dense and B is bitmap 31 //---------------------------------------------------------------------- 32 33 const int8_t *restrict Bb = B->b ; 34 int64_t p ; 35 #pragma omp parallel for num_threads(B_nthreads) schedule(static) 36 for (p = 0 ; p < cnz ; p++) 37 { 38 if (!Bb [p]) continue ; 39 GB_GETB (bij, Bx, p) ; // bij = B(i,j) 40 GB_BINOP (GB_CX (p), GB_CX (p), bij, 0, 0) ; // C(i,j) += bij 41 } 42 43 } 44 else if (B_ek_slicing == NULL) 45 { 46 47 //---------------------------------------------------------------------- 48 // C += B when both C and B are dense 49 //---------------------------------------------------------------------- 50 51 ASSERT (GB_is_dense (B)) ; 52 int64_t p ; 53 #pragma omp parallel for num_threads(B_nthreads) schedule(static) 54 for (p = 0 ; p < cnz ; p++) 55 { 56 GB_GETB (bij, Bx, p) ; // bij = B(i,j) 57 GB_BINOP (GB_CX (p), GB_CX (p), bij, 0, 0) ; // C(i,j) += bij 58 } 59 60 } 61 else 62 { 63 64 //---------------------------------------------------------------------- 65 // C += B when C is dense and B is sparse 66 //---------------------------------------------------------------------- 67 68 ASSERT (GB_JUMBLED_OK (B)) ; 69 70 const int64_t *restrict Bp = B->p ; 71 const int64_t *restrict Bh = B->h ; 72 const int64_t *restrict Bi = B->i ; 73 const int64_t bvlen = B->vlen ; 74 const int64_t cvlen = C->vlen ; 75 bool B_jumbled = B->jumbled ; 76 77 const int64_t *restrict kfirst_Bslice = B_ek_slicing ; 78 const int64_t *restrict klast_Bslice = kfirst_Bslice + B_ntasks ; 79 const int64_t *restrict pstart_Bslice = klast_Bslice + B_ntasks ; 80 81 int taskid ; 82 #pragma omp parallel for num_threads(B_nthreads) schedule(dynamic,1) 83 for (taskid = 0 ; taskid < B_ntasks ; taskid++) 84 { 85 86 // if kfirst > klast then taskid does no work at all 87 int64_t kfirst = kfirst_Bslice [taskid] ; 88 int64_t klast = klast_Bslice [taskid] ; 89 90 //------------------------------------------------------------------ 91 // C(:,kfirst:klast) += B(:,kfirst:klast) 92 //------------------------------------------------------------------ 93 94 for (int64_t k = kfirst ; k <= klast ; k++) 95 { 96 97 //-------------------------------------------------------------- 98 // find the part of B(:,k) and C(:,k) for this task 99 //-------------------------------------------------------------- 100 101 int64_t j = GBH (Bh, k) ; 102 int64_t my_pB_start, my_pB_end ; 103 GB_get_pA (&my_pB_start, &my_pB_end, taskid, k, 104 kfirst, klast, pstart_Bslice, Bp, bvlen) ; 105 106 int64_t pB_start = GBP (Bp, k, bvlen) ; 107 int64_t pB_end = GBP (Bp, k+1, bvlen) ; 108 bool bjdense = ((pB_end - pB_start) == cvlen) ; 109 110 // pC points to the start of C(:,j) if C is dense 111 int64_t pC = j * cvlen ; 112 113 //-------------------------------------------------------------- 114 // C(:,j) += B(:,j) 115 //-------------------------------------------------------------- 116 117 if (bjdense && !B_jumbled) 118 { 119 120 //---------------------------------------------------------- 121 // both C(:,j) and B(:,j) are dense 122 //---------------------------------------------------------- 123 124 GB_PRAGMA_SIMD_VECTORIZE 125 for (int64_t pB = my_pB_start ; pB < my_pB_end ; pB++) 126 { 127 int64_t i = pB - pB_start ; 128 int64_t p = pC + i ; 129 // bij = B(i,j) 130 GB_GETB (bij, Bx, pB) ; 131 // C(i,j) += bij 132 GB_BINOP (GB_CX (p), GB_CX (p), bij, 0, 0) ; 133 } 134 135 } 136 else 137 { 138 139 //---------------------------------------------------------- 140 // C(:,j) is dense; B(:,j) is sparse 141 //---------------------------------------------------------- 142 143 GB_PRAGMA_SIMD_VECTORIZE 144 for (int64_t pB = my_pB_start ; pB < my_pB_end ; pB++) 145 { 146 int64_t i = Bi [pB] ; 147 int64_t p = pC + i ; 148 GB_GETB (bij, Bx, pB) ; // bij = B(i,j) 149 // C(i,j) += bij 150 GB_BINOP (GB_CX (p), GB_CX (p), bij, 0, 0) ; 151 } 152 } 153 } 154 } 155 } 156 } 157 158