1 //------------------------------------------------------------------------------
2 // GB_full_add_template:  phase2 for C=A+B, C<M>=A+B, C<!M>=A+B, C is full
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // C is full.  The mask M is not present (otherwise, C would be sparse,
11 // hypersparse, or bitmap).  All of these methods are asymptotically optimal.
12 
13     //      ------------------------------------------
14     //      C       =           A       +       B
15     //      ------------------------------------------
16     //      full    .           sparse          full
17     //      full    .           bitmap          full
18     //      full    .           full            sparse
19     //      full    .           full            bitmap
20     //      full    .           full            full
21 
22 {
23 
24     int64_t p ;
25     ASSERT (M == NULL) ;
26     ASSERT (A_is_full || B_is_full) ;
27     ASSERT (C_sparsity == GxB_FULL) ;
28 
29     if (A_is_full && B_is_full)
30     {
31 
32         //----------------------------------------------------------------------
33         // Method30: C, A, B are all full
34         //----------------------------------------------------------------------
35 
36         #pragma omp parallel for num_threads(C_nthreads) schedule(static)
37         for (p = 0 ; p < cnz ; p++)
38         {
39             // C (i,j) = A (i,j) + B (i,j)
40             GB_GETA (aij, Ax, p) ;
41             GB_GETB (bij, Bx, p) ;
42             GB_BINOP (GB_CX (p), aij, bij, p % vlen, p / vlen) ;
43         }
44 
45     }
46     else if (A_is_full)
47     {
48 
49         //----------------------------------------------------------------------
50         // C and A are full; B is hypersparse, sparse, or bitmap
51         //----------------------------------------------------------------------
52 
53         if (B_is_bitmap)
54         {
55 
56             //------------------------------------------------------------------
57             // Method31: C and A are full; B is bitmap
58             //------------------------------------------------------------------
59 
60             #pragma omp parallel for num_threads(C_nthreads) schedule(static)
61             for (p = 0 ; p < cnz ; p++)
62             {
63                 if (Bb [p])
64                 {
65                     // C (i,j) = A (i,j) + B (i,j)
66                     GB_GETA (aij, Ax, p) ;
67                     GB_GETB (bij, Bx, p) ;
68                     GB_BINOP (GB_CX (p), aij, bij, p % vlen, p / vlen) ;
69                 }
70                 else
71                 {
72                     // C (i,j) = A (i,j)
73                     GB_COPY_A_TO_C (GB_CX (p), Ax, p) ;
74                 }
75             }
76 
77         }
78         else
79         {
80 
81             //------------------------------------------------------------------
82             // Method32: C and A full; B is sparse or hypersparse
83             //------------------------------------------------------------------
84 
85             #pragma omp parallel for num_threads(C_nthreads) schedule(static)
86             for (p = 0 ; p < cnz ; p++)
87             {
88                 // C (i,j) = A (i,j)
89                 GB_COPY_A_TO_C (GB_CX (p), Ax, p) ;
90             }
91 
92             GB_SLICE_MATRIX (B, 8, chunk) ;
93 
94             #pragma omp parallel for num_threads(B_nthreads) schedule(dynamic,1)
95             for (taskid = 0 ; taskid < B_ntasks ; taskid++)
96             {
97                 int64_t kfirst = kfirst_Bslice [taskid] ;
98                 int64_t klast  = klast_Bslice  [taskid] ;
99                 for (int64_t k = kfirst ; k <= klast ; k++)
100                 {
101                     // find the part of B(:,k) for this task
102                     int64_t j = GBH (Bh, k) ;
103                     int64_t pB_start, pB_end ;
104                     GB_get_pA (&pB_start, &pB_end, taskid, k, kfirst,
105                         klast, pstart_Bslice, Bp, vlen) ;
106                     int64_t pC_start = j * vlen ;
107                     // traverse over B(:,j), the kth vector of B
108                     for (int64_t pB = pB_start ; pB < pB_end ; pB++)
109                     {
110                         // C (i,j) = A (i,j) + B (i,j)
111                         int64_t i = Bi [pB] ;
112                         int64_t p = pC_start + i ;
113                         GB_GETA (aij, Ax, p) ;
114                         GB_GETB (bij, Bx, pB) ;
115                         GB_BINOP (GB_CX (p), aij, bij, i, j) ;
116                     }
117                 }
118             }
119         }
120 
121     }
122     else
123     {
124 
125         //----------------------------------------------------------------------
126         // C and B are full; A is hypersparse, sparse, or bitmap
127         //----------------------------------------------------------------------
128 
129         if (A_is_bitmap)
130         {
131 
132             //------------------------------------------------------------------
133             // Method33: C and B are full; A is bitmap
134             //------------------------------------------------------------------
135 
136             #pragma omp parallel for num_threads(C_nthreads) schedule(static)
137             for (p = 0 ; p < cnz ; p++)
138             {
139                 if (Ab [p])
140                 {
141                     // C (i,j) = A (i,j) + B (i,j)
142                     GB_GETA (aij, Ax, p) ;
143                     GB_GETB (bij, Bx, p) ;
144                     GB_BINOP (GB_CX (p), aij, bij, p % vlen, p / vlen) ;
145                 }
146                 else
147                 {
148                     // C (i,j) = B (i,j)
149                     GB_COPY_B_TO_C (GB_CX (p), Bx, p) ;
150                 }
151             }
152 
153         }
154         else
155         {
156 
157             //------------------------------------------------------------------
158             // Method34: C and B are full; A is hypersparse or sparse
159             //------------------------------------------------------------------
160 
161             #pragma omp parallel for num_threads(C_nthreads) schedule(static)
162             for (p = 0 ; p < cnz ; p++)
163             {
164                 // C (i,j) = B (i,j)
165                 GB_COPY_B_TO_C (GB_CX (p), Bx, p) ;
166             }
167 
168             GB_SLICE_MATRIX (A, 8, chunk) ;
169 
170             #pragma omp parallel for num_threads(A_nthreads) schedule(dynamic,1)
171             for (taskid = 0 ; taskid < A_ntasks ; taskid++)
172             {
173                 int64_t kfirst = kfirst_Aslice [taskid] ;
174                 int64_t klast  = klast_Aslice  [taskid] ;
175                 for (int64_t k = kfirst ; k <= klast ; k++)
176                 {
177                     // find the part of A(:,k) for this task
178                     int64_t j = GBH (Ah, k) ;
179                     int64_t pA_start, pA_end ;
180                     GB_get_pA (&pA_start, &pA_end, taskid, k, kfirst,
181                         klast, pstart_Aslice, Ap, vlen) ;
182                     int64_t pC_start = j * vlen ;
183                     // traverse over A(:,j), the kth vector of A
184                     for (int64_t pA = pA_start ; pA < pA_end ; pA++)
185                     {
186                         // C (i,j) = A (i,j) + B (i,j)
187                         int64_t i = Ai [pA] ;
188                         int64_t p = pC_start + i ;
189                         GB_GETA (aij, Ax, pA) ;
190                         GB_GETB (bij, Bx, p) ;
191                         GB_BINOP (GB_CX (p), aij, bij, i, j) ;
192                     }
193                 }
194             }
195         }
196     }
197 }
198 
199