1 //------------------------------------------------------------------------------ 2 // GB_full_add_template: phase2 for C=A+B, C<M>=A+B, C<!M>=A+B, C is full 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 // C is full. The mask M is not present (otherwise, C would be sparse, 11 // hypersparse, or bitmap). All of these methods are asymptotically optimal. 12 13 // ------------------------------------------ 14 // C = A + B 15 // ------------------------------------------ 16 // full . sparse full 17 // full . bitmap full 18 // full . full sparse 19 // full . full bitmap 20 // full . full full 21 22 { 23 24 int64_t p ; 25 ASSERT (M == NULL) ; 26 ASSERT (A_is_full || B_is_full) ; 27 ASSERT (C_sparsity == GxB_FULL) ; 28 29 if (A_is_full && B_is_full) 30 { 31 32 //---------------------------------------------------------------------- 33 // Method30: C, A, B are all full 34 //---------------------------------------------------------------------- 35 36 #pragma omp parallel for num_threads(C_nthreads) schedule(static) 37 for (p = 0 ; p < cnz ; p++) 38 { 39 // C (i,j) = A (i,j) + B (i,j) 40 GB_GETA (aij, Ax, p) ; 41 GB_GETB (bij, Bx, p) ; 42 GB_BINOP (GB_CX (p), aij, bij, p % vlen, p / vlen) ; 43 } 44 45 } 46 else if (A_is_full) 47 { 48 49 //---------------------------------------------------------------------- 50 // C and A are full; B is hypersparse, sparse, or bitmap 51 //---------------------------------------------------------------------- 52 53 if (B_is_bitmap) 54 { 55 56 //------------------------------------------------------------------ 57 // Method31: C and A are full; B is bitmap 58 //------------------------------------------------------------------ 59 60 #pragma omp parallel for num_threads(C_nthreads) schedule(static) 61 for (p = 0 ; p < cnz ; p++) 62 { 63 if (Bb [p]) 64 { 65 // C (i,j) = A (i,j) + B (i,j) 66 GB_GETA (aij, Ax, p) ; 67 GB_GETB (bij, Bx, p) ; 68 GB_BINOP (GB_CX (p), aij, bij, p % vlen, p / vlen) ; 69 } 70 else 71 { 72 // C (i,j) = A (i,j) 73 GB_COPY_A_TO_C (GB_CX (p), Ax, p) ; 74 } 75 } 76 77 } 78 else 79 { 80 81 //------------------------------------------------------------------ 82 // Method32: C and A full; B is sparse or hypersparse 83 //------------------------------------------------------------------ 84 85 #pragma omp parallel for num_threads(C_nthreads) schedule(static) 86 for (p = 0 ; p < cnz ; p++) 87 { 88 // C (i,j) = A (i,j) 89 GB_COPY_A_TO_C (GB_CX (p), Ax, p) ; 90 } 91 92 GB_SLICE_MATRIX (B, 8, chunk) ; 93 94 #pragma omp parallel for num_threads(B_nthreads) schedule(dynamic,1) 95 for (taskid = 0 ; taskid < B_ntasks ; taskid++) 96 { 97 int64_t kfirst = kfirst_Bslice [taskid] ; 98 int64_t klast = klast_Bslice [taskid] ; 99 for (int64_t k = kfirst ; k <= klast ; k++) 100 { 101 // find the part of B(:,k) for this task 102 int64_t j = GBH (Bh, k) ; 103 int64_t pB_start, pB_end ; 104 GB_get_pA (&pB_start, &pB_end, taskid, k, kfirst, 105 klast, pstart_Bslice, Bp, vlen) ; 106 int64_t pC_start = j * vlen ; 107 // traverse over B(:,j), the kth vector of B 108 for (int64_t pB = pB_start ; pB < pB_end ; pB++) 109 { 110 // C (i,j) = A (i,j) + B (i,j) 111 int64_t i = Bi [pB] ; 112 int64_t p = pC_start + i ; 113 GB_GETA (aij, Ax, p) ; 114 GB_GETB (bij, Bx, pB) ; 115 GB_BINOP (GB_CX (p), aij, bij, i, j) ; 116 } 117 } 118 } 119 } 120 121 } 122 else 123 { 124 125 //---------------------------------------------------------------------- 126 // C and B are full; A is hypersparse, sparse, or bitmap 127 //---------------------------------------------------------------------- 128 129 if (A_is_bitmap) 130 { 131 132 //------------------------------------------------------------------ 133 // Method33: C and B are full; A is bitmap 134 //------------------------------------------------------------------ 135 136 #pragma omp parallel for num_threads(C_nthreads) schedule(static) 137 for (p = 0 ; p < cnz ; p++) 138 { 139 if (Ab [p]) 140 { 141 // C (i,j) = A (i,j) + B (i,j) 142 GB_GETA (aij, Ax, p) ; 143 GB_GETB (bij, Bx, p) ; 144 GB_BINOP (GB_CX (p), aij, bij, p % vlen, p / vlen) ; 145 } 146 else 147 { 148 // C (i,j) = B (i,j) 149 GB_COPY_B_TO_C (GB_CX (p), Bx, p) ; 150 } 151 } 152 153 } 154 else 155 { 156 157 //------------------------------------------------------------------ 158 // Method34: C and B are full; A is hypersparse or sparse 159 //------------------------------------------------------------------ 160 161 #pragma omp parallel for num_threads(C_nthreads) schedule(static) 162 for (p = 0 ; p < cnz ; p++) 163 { 164 // C (i,j) = B (i,j) 165 GB_COPY_B_TO_C (GB_CX (p), Bx, p) ; 166 } 167 168 GB_SLICE_MATRIX (A, 8, chunk) ; 169 170 #pragma omp parallel for num_threads(A_nthreads) schedule(dynamic,1) 171 for (taskid = 0 ; taskid < A_ntasks ; taskid++) 172 { 173 int64_t kfirst = kfirst_Aslice [taskid] ; 174 int64_t klast = klast_Aslice [taskid] ; 175 for (int64_t k = kfirst ; k <= klast ; k++) 176 { 177 // find the part of A(:,k) for this task 178 int64_t j = GBH (Ah, k) ; 179 int64_t pA_start, pA_end ; 180 GB_get_pA (&pA_start, &pA_end, taskid, k, kfirst, 181 klast, pstart_Aslice, Ap, vlen) ; 182 int64_t pC_start = j * vlen ; 183 // traverse over A(:,j), the kth vector of A 184 for (int64_t pA = pA_start ; pA < pA_end ; pA++) 185 { 186 // C (i,j) = A (i,j) + B (i,j) 187 int64_t i = Ai [pA] ; 188 int64_t p = pC_start + i ; 189 GB_GETA (aij, Ax, pA) ; 190 GB_GETB (bij, Bx, p) ; 191 GB_BINOP (GB_CX (p), aij, bij, i, j) ; 192 } 193 } 194 } 195 } 196 } 197 } 198 199