1 //------------------------------------------------------------------------------ 2 // GB_bitmap_masker_template: phase2 for R = masker (C, M, Z), R is bitmap 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 // Computes C<M>=Z or C<!M>=Z, returning the result in R, which is bitmap. 11 // The input matrix C is not modified. Effectively, this computes R=C and then 12 // R<M>=Z or R<!M>=Z. If the C_replace descriptor is enabled, then C has 13 // already been cleared, and is an empty (but non-NULL) matrix. 14 15 // phase2: computes R in a single pass 16 17 // C is sparse or hypersparse. Z is bitmap or full. R is bitmap. 18 // M has any sparsity structure. 19 20 // ------------------------------------------ 21 // C <!M> = Z R 22 // ------------------------------------------ 23 24 // sparse sparse bitmap bitmap 25 // sparse sparse full bitmap 26 27 // sparse bitmap bitmap bitmap 28 // sparse bitmap full bitmap 29 30 // sparse full bitmap bitmap 31 // sparse full full bitmap 32 33 // ------------------------------------------ 34 // C <M> = Z R 35 // ------------------------------------------ 36 37 // sparse bitmap bitmap bitmap 38 // sparse bitmap full bitmap 39 40 // sparse full bitmap bitmap 41 // sparse full full bitmap 42 43 // FUTURE:: add special cases for C==Z, C==M, and Z==M aliases 44 45 { 46 47 int64_t p, rnvals = 0 ; 48 49 ASSERT (R_sparsity == GxB_BITMAP) ; 50 ASSERT (C_is_sparse || C_is_hyper) ; 51 ASSERT (Z_is_bitmap || Z_is_full) ; 52 53 //-------------------------------------------------------------------------- 54 // scatter C into the R bitmap 55 //-------------------------------------------------------------------------- 56 57 ASSERT_MATRIX_OK (C, "C input to R_bitmap_masker", GB0) ; 58 GB_SLICE_MATRIX (C, 8, chunk) ; 59 60 #pragma omp parallel for num_threads(C_nthreads) schedule(dynamic,1) \ 61 reduction(+:rnvals) 62 for (taskid = 0 ; taskid < C_ntasks ; taskid++) 63 { 64 int64_t kfirst = kfirst_Cslice [taskid] ; 65 int64_t klast = klast_Cslice [taskid] ; 66 for (int64_t k = kfirst ; k <= klast ; k++) 67 { 68 // find the part of C(:,k) for this task 69 int64_t j = GBH (Ch, k) ; 70 int64_t pC_start, pC_end ; 71 GB_get_pA (&pC_start, &pC_end, taskid, k, kfirst, 72 klast, pstart_Cslice, Cp, vlen) ; 73 int64_t pR_start = j * vlen ; 74 // traverse over C(:,j), the kth vector of C 75 for (int64_t pC = pC_start ; pC < pC_end ; pC++) 76 { 77 // R(i,j) = C(i,j) 78 int64_t i = Ci [pC] ; 79 int64_t pR = pR_start + i ; 80 Rb [pR] = 1 ; 81 rnvals++ ; 82 memcpy (Rx + (pR)*rsize, Cx +(pC)*rsize, rsize) ; 83 } 84 } 85 } 86 87 R->nvals = rnvals ; 88 ASSERT_MATRIX_OK (R, "R with C scattered", GB0) ; 89 90 //-------------------------------------------------------------------------- 91 // R<M>=Z or R<!M>=Z 92 //-------------------------------------------------------------------------- 93 94 if (M_is_sparse || M_is_hyper) 95 { 96 97 //---------------------------------------------------------------------- 98 // Method05: M is sparse or hypersparse, Z bitmap/full, R bitmap 99 //---------------------------------------------------------------------- 100 101 // ------------------------------------------ 102 // C <!M> = Z R 103 // ------------------------------------------ 104 105 // sparse sparse bitmap bitmap 106 // sparse sparse full bitmap 107 108 ASSERT (Mask_comp) ; 109 110 //---------------------------------------------------------------------- 111 // scatter M into the R bitmap 112 //---------------------------------------------------------------------- 113 114 GB_SLICE_MATRIX (M, 8, chunk) ; 115 116 #pragma omp parallel for num_threads(M_nthreads) schedule(dynamic,1) 117 for (taskid = 0 ; taskid < M_ntasks ; taskid++) 118 { 119 int64_t kfirst = kfirst_Mslice [taskid] ; 120 int64_t klast = klast_Mslice [taskid] ; 121 for (int64_t k = kfirst ; k <= klast ; k++) 122 { 123 // find the part of M(:,k) for this task 124 int64_t j = GBH (Mh, k) ; 125 int64_t pM_start, pM_end ; 126 GB_get_pA (&pM_start, &pM_end, taskid, k, kfirst, 127 klast, pstart_Mslice, Mp, vlen) ; 128 int64_t pR_start = j * vlen ; 129 // traverse over M(:,j), the kth vector of M 130 for (int64_t pM = pM_start ; pM < pM_end ; pM++) 131 { 132 // mark R(i,j) if M(i,j) is true 133 bool mij = GB_mcast (Mx, pM, msize) ; 134 if (mij) 135 { 136 int64_t i = Mi [pM] ; 137 int64_t p = pR_start + i ; 138 Rb [p] += 2 ; 139 } 140 } 141 } 142 } 143 144 //---------------------------------------------------------------------- 145 // R<!M>=Z, using M scattered into R 146 //---------------------------------------------------------------------- 147 148 // Rb is marked as follows: 149 // 0: R(i,j) is not present, and M(i,j) is false 150 // 1: R(i,j) is present, and M(i,j) is false 151 // 2: R(i,j) is not present, and M(i,j) is true 152 // 3: R(i,j) is present, and M(i,j) is true 153 154 // M is complemented, but shown uncomplemented in the table below since 155 // that is how it is scattered into R. 156 157 // Rb R(i,j) M(i,j) Z(i,j) modification to R(i,j) 158 // 0 - 0 zij R(i,j) = Z(i,j), new value, rnvals++ 159 // 0 - 0 - do nothing 160 // 1 rij 0 zij R(i,j) = Z(i,j), overwrite 161 // 1 rij 0 - delete R(i,j), rnvals-- 162 // 2 - 1 zij do nothing, set Rb to 0 163 // 2 - 1 - do nothing, set Rb to 0 164 // 3 rij 1 zij keep R(i,j), set Rb to 1 165 // 3 rij 1 - keep R(i,j), set Rb to 1 166 167 #pragma omp parallel for num_threads(R_nthreads) schedule(static) \ 168 reduction(+:rnvals) 169 for (p = 0 ; p < rnz ; p++) 170 { 171 int8_t r = Rb [p] ; 172 int8_t z = GBB (Zb, p) ; 173 switch (r) 174 { 175 case 0 : // R(i,j) not present, M(i,j) false 176 if (z) 177 { 178 // R(i,j) = Z(i,j), insert new value 179 memcpy (Rx +(p)*rsize, Zx +(p)*rsize, rsize) ; 180 Rb [p] = 1 ; 181 rnvals++ ; 182 } 183 break ; 184 185 case 1 : // R(i,j) present, M(i,j) false 186 if (z) 187 { 188 // R(i,j) = Z(i,j), update prior value 189 memcpy (Rx +(p)*rsize, Zx +(p)*rsize, rsize) ; 190 } 191 else 192 { 193 // delete R(i,j) 194 Rb [p] = 0 ; 195 rnvals-- ; 196 } 197 break ; 198 199 case 2 : // R(i,j) not present, M(i,j) true 200 Rb [p] = 0 ; 201 break ; 202 203 case 3 : // R(i,j) present, M(i,j) true 204 Rb [p] = 1 ; 205 break ; 206 207 default: ; 208 } 209 } 210 211 } 212 else 213 { 214 215 //---------------------------------------------------------------------- 216 // Method06: M and Z are bitmap or full, R is bitmap 217 //---------------------------------------------------------------------- 218 219 // ------------------------------------------ 220 // C <!M> = Z R 221 // ------------------------------------------ 222 223 // sparse bitmap bitmap bitmap 224 // sparse bitmap full bitmap 225 226 // sparse full bitmap bitmap 227 // sparse full full bitmap 228 229 // ------------------------------------------ 230 // C <M> = Z R 231 // ------------------------------------------ 232 233 // sparse bitmap bitmap bitmap 234 // sparse bitmap full bitmap 235 236 // sparse full bitmap bitmap 237 // sparse full full bitmap 238 239 // Rb R(i,j) M(i,j) Z(i,j) modification to R(i,j) 240 241 // 0 - 0 zij do nothing 242 // 0 - 0 - do nothing 243 // 1 rij 0 zij do nothing 244 // 1 rij 0 - do nothing 245 246 // 0 - 1 zij R(i,j) = Z(i,j), rnvals++ 247 // 0 - 1 - do nothing 248 // 1 rij 1 zij R(i,j) = Z(i,j), no change to rnvals 249 // 1 rij 1 - delete, rnvals-- 250 251 #pragma omp parallel for num_threads(R_nthreads) schedule(static) \ 252 reduction(+:rnvals) 253 for (p = 0 ; p < rnz ; p++) 254 { 255 bool mij = GBB (Mb, p) && GB_mcast (Mx, p, msize) ; 256 if (Mask_comp) mij = !mij ; 257 if (mij) 258 { 259 int8_t z = GBB (Zb, p) ; 260 int8_t r = Rb [p] ; 261 if (r) 262 { 263 if (z) 264 { 265 // R(i,j) = Z(i,j), update, no change to rnvals 266 memcpy (Rx +(p)*rsize, Zx +(p)*rsize, rsize) ; 267 } 268 else 269 { 270 // delete R(i,j) 271 Rb [p] = 0 ; 272 rnvals-- ; 273 } 274 } 275 else if (z) 276 { 277 // R(i,j) = Z(i,j), new entry 278 memcpy (Rx +(p)*rsize, Zx +(p)*rsize, rsize) ; 279 Rb [p] = 1 ; 280 rnvals++ ; 281 } 282 } 283 } 284 } 285 286 R->nvals = rnvals ; 287 } 288 289