1 //------------------------------------------------------------------------------
2 // GB_dense_subassign_23_template: C += B where C is dense; B is sparse or dense
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // All entries in C+=B are computed entirely in parallel, using the same kind of
11 // parallelism as Template/GB_AxB_colscale.c.
12 
13 #include "GB_unused.h"
14 
15 {
16 
17     //--------------------------------------------------------------------------
18     // get C and B
19     //--------------------------------------------------------------------------
20 
21     const GB_BTYPE *restrict Bx = (GB_BTYPE *) B->x ;
22     GB_CTYPE *restrict Cx = (GB_CTYPE *) C->x ;
23     ASSERT (GB_is_dense (C)) ;
24     const int64_t cnz = GB_NNZ_HELD (C) ;
25 
26     if (GB_IS_BITMAP (B))
27     {
28 
29         //----------------------------------------------------------------------
30         // C += B when C is dense and B is bitmap
31         //----------------------------------------------------------------------
32 
33         const int8_t *restrict Bb = B->b ;
34         int64_t p ;
35         #pragma omp parallel for num_threads(B_nthreads) schedule(static)
36         for (p = 0 ; p < cnz ; p++)
37         {
38             if (!Bb [p]) continue ;
39             GB_GETB (bij, Bx, p) ;                  // bij = B(i,j)
40             GB_BINOP (GB_CX (p), GB_CX (p), bij, 0, 0) ;  // C(i,j) += bij
41         }
42 
43     }
44     else if (B_ek_slicing == NULL)
45     {
46 
47         //----------------------------------------------------------------------
48         // C += B when both C and B are dense
49         //----------------------------------------------------------------------
50 
51         ASSERT (GB_is_dense (B)) ;
52         int64_t p ;
53         #pragma omp parallel for num_threads(B_nthreads) schedule(static)
54         for (p = 0 ; p < cnz ; p++)
55         {
56             GB_GETB (bij, Bx, p) ;                  // bij = B(i,j)
57             GB_BINOP (GB_CX (p), GB_CX (p), bij, 0, 0) ;  // C(i,j) += bij
58         }
59 
60     }
61     else
62     {
63 
64         //----------------------------------------------------------------------
65         // C += B when C is dense and B is sparse
66         //----------------------------------------------------------------------
67 
68         ASSERT (GB_JUMBLED_OK (B)) ;
69 
70         const int64_t *restrict Bp = B->p ;
71         const int64_t *restrict Bh = B->h ;
72         const int64_t *restrict Bi = B->i ;
73         const int64_t bvlen = B->vlen ;
74         const int64_t cvlen = C->vlen ;
75         bool B_jumbled = B->jumbled ;
76 
77         const int64_t *restrict kfirst_Bslice = B_ek_slicing ;
78         const int64_t *restrict klast_Bslice  = kfirst_Bslice + B_ntasks ;
79         const int64_t *restrict pstart_Bslice = klast_Bslice + B_ntasks ;
80 
81         int taskid ;
82         #pragma omp parallel for num_threads(B_nthreads) schedule(dynamic,1)
83         for (taskid = 0 ; taskid < B_ntasks ; taskid++)
84         {
85 
86             // if kfirst > klast then taskid does no work at all
87             int64_t kfirst = kfirst_Bslice [taskid] ;
88             int64_t klast  = klast_Bslice  [taskid] ;
89 
90             //------------------------------------------------------------------
91             // C(:,kfirst:klast) += B(:,kfirst:klast)
92             //------------------------------------------------------------------
93 
94             for (int64_t k = kfirst ; k <= klast ; k++)
95             {
96 
97                 //--------------------------------------------------------------
98                 // find the part of B(:,k) and C(:,k) for this task
99                 //--------------------------------------------------------------
100 
101                 int64_t j = GBH (Bh, k) ;
102                 int64_t my_pB_start, my_pB_end ;
103                 GB_get_pA (&my_pB_start, &my_pB_end, taskid, k,
104                     kfirst, klast, pstart_Bslice, Bp, bvlen) ;
105 
106                 int64_t pB_start = GBP (Bp, k, bvlen) ;
107                 int64_t pB_end   = GBP (Bp, k+1, bvlen) ;
108                 bool bjdense = ((pB_end - pB_start) == cvlen) ;
109 
110                 // pC points to the start of C(:,j) if C is dense
111                 int64_t pC = j * cvlen ;
112 
113                 //--------------------------------------------------------------
114                 // C(:,j) += B(:,j)
115                 //--------------------------------------------------------------
116 
117                 if (bjdense && !B_jumbled)
118                 {
119 
120                     //----------------------------------------------------------
121                     // both C(:,j) and B(:,j) are dense
122                     //----------------------------------------------------------
123 
124                     GB_PRAGMA_SIMD_VECTORIZE
125                     for (int64_t pB = my_pB_start ; pB < my_pB_end ; pB++)
126                     {
127                         int64_t i = pB - pB_start ;
128                         int64_t p = pC + i ;
129                         // bij = B(i,j)
130                         GB_GETB (bij, Bx, pB) ;
131                         // C(i,j) += bij
132                         GB_BINOP (GB_CX (p), GB_CX (p), bij, 0, 0) ;
133                     }
134 
135                 }
136                 else
137                 {
138 
139                     //----------------------------------------------------------
140                     // C(:,j) is dense; B(:,j) is sparse
141                     //----------------------------------------------------------
142 
143                     GB_PRAGMA_SIMD_VECTORIZE
144                     for (int64_t pB = my_pB_start ; pB < my_pB_end ; pB++)
145                     {
146                         int64_t i = Bi [pB] ;
147                         int64_t p = pC + i ;
148                         GB_GETB (bij, Bx, pB) ;                 // bij = B(i,j)
149                         // C(i,j) += bij
150                         GB_BINOP (GB_CX (p), GB_CX (p), bij, 0, 0) ;
151                     }
152                 }
153             }
154         }
155     }
156 }
157 
158