1 //------------------------------------------------------------------------------ 2 // GB_AxB_dot_cij: compute C(i,j) = A(:,i)'*B(:,j) 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 // computes C(i,j) = A (:,i)'*B(:,j) via sparse dot product. This template is 11 // used for all three cases: C=A'*B, C<M>=A'*B, and C<!M>=A'*B in dot2 when C 12 // is bitmap, and for C<M>=A'*B when C and M are sparse or hyper in dot3. 13 14 // When used as the multiplicative operator, the PAIR operator provides some 15 // useful special cases. Its output is always one, for any matching pair of 16 // entries A(k,i)'*B(k,j) for some k. If the monoid is ANY, then C(i,j)=1 if 17 // the intersection for the dot product is non-empty. This intersection has to 18 // be found, in general. However, suppose B(:,j) is dense. Then every entry 19 // in the pattern of A(:,i)' will produce a 1 from the PAIR operator. If the 20 // monoid is ANY, then C(i,j)=1 if A(:,i)' is nonempty. If the monoid is PLUS, 21 // then C(i,j) is simply nnz(A(:,i)), assuming no overflow. The XOR monoid 22 // acts like a 1-bit summation, so the result of the XOR_PAIR_BOOL semiring 23 // will be C(i,j) = mod (nnz(A(:,i)'*B(:,j)),2). 24 25 // If both A(:,i) and B(:,j) are sparse, then the intersection must still be 26 // found, so these optimizations can be used only if A(:,i) and/or B(:,j) are 27 // entirely populated. 28 29 // For built-in, pre-generated semirings, the PAIR operator is only coupled 30 // with either the ANY, PLUS, EQ, or XOR monoids, since the other monoids are 31 // equivalent to the ANY monoid. 32 33 //------------------------------------------------------------------------------ 34 // C(i,j) = A(:,i)'*B(:,j): a single dot product 35 //------------------------------------------------------------------------------ 36 37 { 38 int64_t pB = pB_start ; 39 40 #if ( GB_A_IS_FULL && GB_B_IS_FULL ) 41 { 42 43 //---------------------------------------------------------------------- 44 // both A and B are full 45 //---------------------------------------------------------------------- 46 47 #if GB_IS_PAIR_MULTIPLIER 48 { 49 #if GB_IS_ANY_MONOID 50 // ANY monoid: take the first entry found; this sets cij = 1 51 GB_MULT (cij, ignore, ignore, 0, 0, 0) ; 52 #elif GB_IS_EQ_MONOID 53 // EQ_PAIR semiring: all entries are equal to 1 54 cij = 1 ; 55 #elif (GB_CTYPE_BITS > 0) 56 // PLUS, XOR monoids: A(:,i)'*B(:,j) is nnz(A(:,i)), 57 // for bool, 8-bit, 16-bit, or 32-bit integer 58 cij = (GB_CTYPE) (((uint64_t) vlen) & GB_CTYPE_BITS) ; 59 #else 60 // PLUS monoid for float, double, or 64-bit integers 61 cij = GB_CTYPE_CAST (vlen, 0) ; 62 #endif 63 } 64 #else 65 { 66 // cij = A(0,i) * B(0,j) 67 GB_GETA (aki, Ax, pA) ; // aki = A(0,i) 68 GB_GETB (bkj, Bx, pB) ; // bkj = B(0,j) 69 GB_MULT (cij, aki, bkj, i, 0, j) ; // cij = aki * bkj 70 GB_PRAGMA_SIMD_DOT (cij) 71 for (int64_t k = 1 ; k < vlen ; k++) 72 { 73 GB_DOT_TERMINAL (cij) ; // break if cij terminal 74 // cij += A(k,i) * B(k,j) 75 GB_GETA (aki, Ax, pA+k) ; // aki = A(k,i) 76 GB_GETB (bkj, Bx, pB+k) ; // bkj = B(k,j) 77 GB_MULTADD (cij, aki, bkj, i, k, j) ; // cij += aki * bkj 78 } 79 } 80 #endif 81 GB_DOT_ALWAYS_SAVE_CIJ ; 82 83 } 84 #elif ( GB_A_IS_FULL && GB_B_IS_BITMAP ) 85 { 86 87 //---------------------------------------------------------------------- 88 // A is full and B is bitmap 89 //---------------------------------------------------------------------- 90 91 for (int64_t k = 0 ; k < vlen ; k++) 92 { 93 if (Bb [pB+k]) 94 { 95 GB_DOT (k, pA+k, pB+k) ; 96 } 97 } 98 GB_DOT_SAVE_CIJ ; 99 100 } 101 #elif ( GB_A_IS_FULL && ( GB_B_IS_SPARSE || GB_B_IS_HYPER ) ) 102 { 103 104 //---------------------------------------------------------------------- 105 // A is full and B is sparse/hyper 106 //---------------------------------------------------------------------- 107 108 #if GB_IS_PAIR_MULTIPLIER 109 { 110 #if GB_IS_ANY_MONOID 111 // ANY monoid: take the first entry found; this sets cij = 1 112 GB_MULT (cij, ignore, ignore, 0, 0, 0) ; 113 #elif GB_IS_EQ_MONOID 114 // EQ_PAIR semiring: all entries are equal to 1 115 cij = 1 ; 116 #elif (GB_CTYPE_BITS > 0) 117 // PLUS, XOR monoids: A(:,i)'*B(:,j) is nnz(A(:,i)), 118 // for bool, 8-bit, 16-bit, or 32-bit integer 119 cij = (GB_CTYPE) (((uint64_t) bjnz) & GB_CTYPE_BITS) ; 120 #else 121 // PLUS monoid for float, double, or 64-bit integers 122 cij = GB_CTYPE_CAST (bjnz, 0) ; 123 #endif 124 } 125 #else 126 { 127 int64_t k = Bi [pB] ; // first row index of B(:,j) 128 // cij = A(k,i) * B(k,j) 129 GB_GETA (aki, Ax, pA+k) ; // aki = A(k,i) 130 GB_GETB (bkj, Bx, pB ) ; // bkj = B(k,j) 131 GB_MULT (cij, aki, bkj, i, k, j) ; // cij = aki * bkj 132 GB_PRAGMA_SIMD_DOT (cij) 133 for (int64_t p = pB+1 ; p < pB_end ; p++) 134 { 135 GB_DOT_TERMINAL (cij) ; // break if cij terminal 136 int64_t k = Bi [p] ; 137 // cij += A(k,i) * B(k,j) 138 GB_GETA (aki, Ax, pA+k) ; // aki = A(k,i) 139 GB_GETB (bkj, Bx, p ) ; // bkj = B(k,j) 140 GB_MULTADD (cij, aki, bkj, i, k, j) ; // cij += aki * bkj 141 } 142 } 143 #endif 144 GB_DOT_ALWAYS_SAVE_CIJ ; 145 146 } 147 #elif ( GB_A_IS_BITMAP && GB_B_IS_FULL ) 148 { 149 150 //---------------------------------------------------------------------- 151 // A is bitmap and B is full 152 //---------------------------------------------------------------------- 153 154 for (int64_t k = 0 ; k < vlen ; k++) 155 { 156 if (Ab [pA+k]) 157 { 158 GB_DOT (k, pA+k, pB+k) ; 159 } 160 } 161 GB_DOT_SAVE_CIJ ; 162 163 } 164 #elif ( GB_A_IS_BITMAP && GB_B_IS_BITMAP ) 165 { 166 167 //---------------------------------------------------------------------- 168 // both A and B are bitmap 169 //---------------------------------------------------------------------- 170 171 for (int64_t k = 0 ; k < vlen ; k++) 172 { 173 if (Ab [pA+k] && Bb [pB+k]) 174 { 175 GB_DOT (k, pA+k, pB+k) ; 176 } 177 } 178 GB_DOT_SAVE_CIJ ; 179 180 } 181 #elif ( GB_A_IS_BITMAP && ( GB_B_IS_SPARSE || GB_B_IS_HYPER ) ) 182 { 183 184 //---------------------------------------------------------------------- 185 // A is bitmap and B is sparse/hyper 186 //---------------------------------------------------------------------- 187 188 for (int64_t p = pB ; p < pB_end ; p++) 189 { 190 int64_t k = Bi [p] ; 191 if (Ab [pA+k]) 192 { 193 GB_DOT (k, pA+k, p) ; 194 } 195 } 196 GB_DOT_SAVE_CIJ ; 197 198 } 199 #elif ( (GB_A_IS_SPARSE || GB_A_IS_HYPER) && GB_B_IS_FULL ) 200 { 201 202 //---------------------------------------------------------------------- 203 // A is sparse/hyper and B is full 204 //---------------------------------------------------------------------- 205 206 #if GB_IS_PAIR_MULTIPLIER 207 { 208 #if GB_IS_ANY_MONOID 209 // ANY monoid: take the first entry found; this sets cij = 1 210 GB_MULT (cij, ignore, ignore, 0, 0, 0) ; 211 #elif GB_IS_EQ_MONOID 212 // EQ_PAIR semiring: all entries are equal to 1 213 cij = 1 ; 214 #elif (GB_CTYPE_BITS > 0) 215 // PLUS, XOR monoids: A(:,i)'*B(:,j) is nnz(A(:,i)), 216 // for bool, 8-bit, 16-bit, or 32-bit integer 217 cij = (GB_CTYPE) (((uint64_t) ainz) & GB_CTYPE_BITS) ; 218 #else 219 // PLUS monoid for float, double, or 64-bit integers 220 cij = GB_CTYPE_CAST (ainz, 0) ; 221 #endif 222 } 223 #else 224 { 225 int64_t k = Ai [pA] ; // first row index of A(:,i) 226 // cij = A(k,i) * B(k,j) 227 GB_GETA (aki, Ax, pA ) ; // aki = A(k,i) 228 GB_GETB (bkj, Bx, pB+k) ; // bkj = B(k,j) 229 GB_MULT (cij, aki, bkj, i, k, j) ; // cij = aki * bkj 230 GB_PRAGMA_SIMD_DOT (cij) 231 for (int64_t p = pA+1 ; p < pA_end ; p++) 232 { 233 GB_DOT_TERMINAL (cij) ; // break if cij terminal 234 int64_t k = Ai [p] ; 235 // cij += A(k,i) * B(k,j) 236 GB_GETA (aki, Ax, p ) ; // aki = A(k,i) 237 GB_GETB (bkj, Bx, pB+k) ; // bkj = B(k,j) 238 GB_MULTADD (cij, aki, bkj, i, k, j) ; // cij += aki * bkj 239 } 240 } 241 #endif 242 GB_DOT_ALWAYS_SAVE_CIJ ; 243 244 } 245 #elif ( (GB_A_IS_SPARSE || GB_A_IS_HYPER) && GB_B_IS_BITMAP ) 246 { 247 248 //---------------------------------------------------------------------- 249 // A is sparse/hyper and B is bitmap 250 //---------------------------------------------------------------------- 251 252 for (int64_t p = pA ; p < pA_end ; p++) 253 { 254 int64_t k = Ai [p] ; 255 if (Bb [pB+k]) 256 { 257 GB_DOT (k, p, pB+k) ; 258 } 259 } 260 GB_DOT_SAVE_CIJ ; 261 262 } 263 #else 264 { 265 266 //---------------------------------------------------------------------- 267 // both A and B are sparse/hyper 268 //---------------------------------------------------------------------- 269 270 271 if (Ai [pA_end-1] < ib_first || ib_last < Ai [pA]) 272 { 273 274 //------------------------------------------------------------------ 275 // pattern of A(:,i) and B(:,j) don't overlap; C(i,j) doesn't exist 276 //------------------------------------------------------------------ 277 278 ASSERT (!GB_CIJ_EXISTS) ; 279 280 } 281 else if (ainz > 8 * bjnz) 282 { 283 284 //------------------------------------------------------------------ 285 // B(:,j) is very sparse compared to A(:,i) 286 //------------------------------------------------------------------ 287 288 while (pA < pA_end && pB < pB_end) 289 { 290 int64_t ia = Ai [pA] ; 291 int64_t ib = Bi [pB] ; 292 if (ia < ib) 293 { 294 // A(ia,i) appears before B(ib,j) 295 // discard all entries A(ia:ib-1,i) 296 int64_t pleft = pA + 1 ; 297 int64_t pright = pA_end - 1 ; 298 GB_TRIM_BINARY_SEARCH (ib, Ai, pleft, pright) ; 299 ASSERT (pleft > pA) ; 300 pA = pleft ; 301 } 302 else if (ib < ia) 303 { 304 // B(ib,j) appears before A(ia,i) 305 pB++ ; 306 } 307 else // ia == ib == k 308 { 309 // A(k,i) and B(k,j) are the next entries to merge 310 GB_DOT (ia, pA, pB) ; 311 pA++ ; 312 pB++ ; 313 } 314 } 315 GB_DOT_SAVE_CIJ ; 316 317 } 318 else if (bjnz > 8 * ainz) 319 { 320 321 //------------------------------------------------------------------ 322 // A(:,i) is very sparse compared to B(:,j) 323 //------------------------------------------------------------------ 324 325 while (pA < pA_end && pB < pB_end) 326 { 327 int64_t ia = Ai [pA] ; 328 int64_t ib = Bi [pB] ; 329 if (ia < ib) 330 { 331 // A(ia,i) appears before B(ib,j) 332 pA++ ; 333 } 334 else if (ib < ia) 335 { 336 // B(ib,j) appears before A(ia,i) 337 // discard all entries B(ib:ia-1,j) 338 int64_t pleft = pB + 1 ; 339 int64_t pright = pB_end - 1 ; 340 GB_TRIM_BINARY_SEARCH (ia, Bi, pleft, pright) ; 341 ASSERT (pleft > pB) ; 342 pB = pleft ; 343 } 344 else // ia == ib == k 345 { 346 // A(k,i) and B(k,j) are the next entries to merge 347 GB_DOT (ia, pA, pB) ; 348 pA++ ; 349 pB++ ; 350 } 351 } 352 GB_DOT_SAVE_CIJ ; 353 354 } 355 else 356 { 357 358 //------------------------------------------------------------------ 359 // A(:,i) and B(:,j) have about the same sparsity 360 //------------------------------------------------------------------ 361 362 while (pA < pA_end && pB < pB_end) 363 { 364 int64_t ia = Ai [pA] ; 365 int64_t ib = Bi [pB] ; 366 if (ia < ib) 367 { 368 // A(ia,i) appears before B(ib,j) 369 pA++ ; 370 } 371 else if (ib < ia) 372 { 373 // B(ib,j) appears before A(ia,i) 374 pB++ ; 375 } 376 else // ia == ib == k 377 { 378 // A(k,i) and B(k,j) are the next entries to merge 379 GB_DOT (ia, pA, pB) ; 380 pA++ ; 381 pB++ ; 382 } 383 } 384 GB_DOT_SAVE_CIJ ; 385 } 386 } 387 #endif 388 } 389 390