1 //------------------------------------------------------------------------------
2 // GB_AxB_colscale_meta: C=A*D where D is a square diagonal matrix
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // All entries in C=A*D are computed entirely in parallel.
11 
12 // A and C can be jumbled.  D cannot, but it is a diagonal matrix so it is
13 // never jumbled.
14 
15 {
16 
17     //--------------------------------------------------------------------------
18     // check inputs
19     //--------------------------------------------------------------------------
20 
21     // Dx, j, and Ah are unused if the operator is FIRST or PAIR
22     #include "GB_unused.h"
23 
24     ASSERT (GB_JUMBLED_OK (C)) ;
25     ASSERT (GB_JUMBLED_OK (A)) ;
26     ASSERT (!GB_JUMBLED (D)) ;
27 
28     //--------------------------------------------------------------------------
29     // get C, A, and D
30     //--------------------------------------------------------------------------
31 
32     const int64_t  *restrict Ap = A->p ;
33     const int64_t  *restrict Ah = A->h ;
34     const GB_ATYPE *restrict Ax = (GB_ATYPE *) (A_is_pattern ? NULL : A->x) ;
35     const GB_BTYPE *restrict Dx = (GB_BTYPE *) (D_is_pattern ? NULL : D->x) ;
36     const int64_t avlen = A->vlen ;
37 
38     const int64_t *restrict kfirst_Aslice = A_ek_slicing ;
39     const int64_t *restrict klast_Aslice  = A_ek_slicing + A_ntasks ;
40     const int64_t *restrict pstart_Aslice = A_ek_slicing + A_ntasks * 2 ;
41 
42     //--------------------------------------------------------------------------
43     // C=A*D
44     //--------------------------------------------------------------------------
45 
46     int tid ;
47     #pragma omp parallel for num_threads(A_nthreads) schedule(dynamic,1)
48     for (tid = 0 ; tid < A_ntasks ; tid++)
49     {
50 
51         // if kfirst > klast then task tid does no work at all
52         int64_t kfirst = kfirst_Aslice [tid] ;
53         int64_t klast  = klast_Aslice  [tid] ;
54 
55         //----------------------------------------------------------------------
56         // C(:,kfirst:klast) = A(:,kfirst:klast)*D(kfirst:klast,kfirst:klast)
57         //----------------------------------------------------------------------
58 
59         for (int64_t k = kfirst ; k <= klast ; k++)
60         {
61 
62             //------------------------------------------------------------------
63             // find the part of A(:,k) and C(:,k) to be operated on by this task
64             //------------------------------------------------------------------
65 
66             int64_t j = GBH (Ah, k) ;
67             int64_t pA_start, pA_end ;
68             GB_get_pA (&pA_start, &pA_end, tid, k,
69                 kfirst, klast, pstart_Aslice, Ap, avlen) ;
70 
71             //------------------------------------------------------------------
72             // C(:,j) = A(:,j)*D(j,j)
73             //------------------------------------------------------------------
74 
75             GB_GETB (djj, Dx, j) ;                  // djj = D (j,j)
76             GB_PRAGMA_SIMD_VECTORIZE
77             for (int64_t p = pA_start ; p < pA_end ; p++)
78             {
79                 GB_GETA (aij, Ax, p) ;                  // aij = A(i,j)
80                 GB_BINOP (GB_CX (p), aij, djj, 0, 0) ;  // C(i,j) = aij * djj
81             }
82         }
83     }
84 }
85 
86