1 //------------------------------------------------------------------------------
2 // GB_AxB_dot4_meta:  C+=A'*B via dot products, where C is full
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // C+=A'*B where C is a dense matrix and computed in-place.  The monoid of the
11 // semiring matches the accum operator, and the type of C matches the ztype of
12 // accum.  That is, no typecasting can be done with C.
13 
14 #define GB_DOT4
15 
16 // cij += A(k,i) * B(k,j)
17 #undef  GB_DOT
18 #define GB_DOT(k,pA,pB)                                             \
19 {                                                                   \
20     if (!cij_updated)                                               \
21     {                                                               \
22         cij_updated = true ;                                        \
23         GB_GETC (cij, pC) ;                 /* cij = Cx [pC] */     \
24     }                                                               \
25     GB_GETA (aki, Ax, pA) ;                 /* aki = A(k,i) */      \
26     GB_GETB (bkj, Bx, pB) ;                 /* bkj = B(k,j) */      \
27     GB_MULTADD (cij, aki, bkj, i, k, j) ;   /* cij += aki * bkj */  \
28     GB_DOT_TERMINAL (cij) ;         /* break if cij == terminal */  \
29 }
30 
31 // C(i,j) = cij
32 #undef  GB_DOT_ALWAYS_SAVE_CIJ
33 #define GB_DOT_ALWAYS_SAVE_CIJ  \
34 {                               \
35     GB_PUTC (cij, pC) ;         \
36 }
37 
38 // save C(i,j) if it has been updated
39 #undef  GB_DOT_SAVE_CIJ
40 #define GB_DOT_SAVE_CIJ         \
41 {                               \
42     if (cij_updated)            \
43     {                           \
44         GB_PUTC (cij, pC) ;     \
45     }                           \
46 }
47 
48 {
49 
50     //--------------------------------------------------------------------------
51     // get A, B, and C
52     //--------------------------------------------------------------------------
53 
54     GB_CTYPE *restrict Cx = (GB_CTYPE *) C->x ;
55     const int64_t cvlen = C->vlen ;
56 
57     const int64_t  *restrict Bp = B->p ;
58     const int8_t   *restrict Bb = B->b ;
59     const int64_t  *restrict Bh = B->h ;
60     const int64_t  *restrict Bi = B->i ;
61     const GB_BTYPE *restrict Bx = (GB_BTYPE *) (B_is_pattern ? NULL : B->x) ;
62     const int64_t vlen = B->vlen ;
63     const bool B_is_hyper = GB_IS_HYPERSPARSE (B) ;
64     const bool B_is_bitmap = GB_IS_BITMAP (B) ;
65     const bool B_is_sparse = GB_IS_SPARSE (B) ;
66 
67     const int64_t  *restrict Ap = A->p ;
68     const int8_t   *restrict Ab = A->b ;
69     const int64_t  *restrict Ah = A->h ;
70     const int64_t  *restrict Ai = A->i ;
71     const GB_ATYPE *restrict Ax = (GB_ATYPE *) (A_is_pattern ? NULL : A->x) ;
72     ASSERT (A->vlen == B->vlen) ;
73     const bool A_is_hyper = GB_IS_HYPERSPARSE (A) ;
74     const bool A_is_bitmap = GB_IS_BITMAP (A) ;
75     const bool A_is_sparse = GB_IS_SPARSE (A) ;
76 
77     int ntasks = naslice * nbslice ;
78 
79     //--------------------------------------------------------------------------
80     // C += A'*B
81     //--------------------------------------------------------------------------
82 
83     #include "GB_meta16_factory.c"
84 }
85 
86 #undef GB_DOT_ALWAYS_SAVE_CIJ
87 #undef GB_DOT_SAVE_CIJ
88 
89 #undef GB_DOT4
90 
91