1 //------------------------------------------------------------------------------ 2 // GB_AxB_colscale_meta: C=A*D where D is a square diagonal matrix 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 // All entries in C=A*D are computed entirely in parallel. 11 12 // A and C can be jumbled. D cannot, but it is a diagonal matrix so it is 13 // never jumbled. 14 15 { 16 17 //-------------------------------------------------------------------------- 18 // check inputs 19 //-------------------------------------------------------------------------- 20 21 // Dx, j, and Ah are unused if the operator is FIRST or PAIR 22 #include "GB_unused.h" 23 24 ASSERT (GB_JUMBLED_OK (C)) ; 25 ASSERT (GB_JUMBLED_OK (A)) ; 26 ASSERT (!GB_JUMBLED (D)) ; 27 28 //-------------------------------------------------------------------------- 29 // get C, A, and D 30 //-------------------------------------------------------------------------- 31 32 const int64_t *restrict Ap = A->p ; 33 const int64_t *restrict Ah = A->h ; 34 const GB_ATYPE *restrict Ax = (GB_ATYPE *) (A_is_pattern ? NULL : A->x) ; 35 const GB_BTYPE *restrict Dx = (GB_BTYPE *) (D_is_pattern ? NULL : D->x) ; 36 const int64_t avlen = A->vlen ; 37 38 const int64_t *restrict kfirst_Aslice = A_ek_slicing ; 39 const int64_t *restrict klast_Aslice = A_ek_slicing + A_ntasks ; 40 const int64_t *restrict pstart_Aslice = A_ek_slicing + A_ntasks * 2 ; 41 42 //-------------------------------------------------------------------------- 43 // C=A*D 44 //-------------------------------------------------------------------------- 45 46 int tid ; 47 #pragma omp parallel for num_threads(A_nthreads) schedule(dynamic,1) 48 for (tid = 0 ; tid < A_ntasks ; tid++) 49 { 50 51 // if kfirst > klast then task tid does no work at all 52 int64_t kfirst = kfirst_Aslice [tid] ; 53 int64_t klast = klast_Aslice [tid] ; 54 55 //---------------------------------------------------------------------- 56 // C(:,kfirst:klast) = A(:,kfirst:klast)*D(kfirst:klast,kfirst:klast) 57 //---------------------------------------------------------------------- 58 59 for (int64_t k = kfirst ; k <= klast ; k++) 60 { 61 62 //------------------------------------------------------------------ 63 // find the part of A(:,k) and C(:,k) to be operated on by this task 64 //------------------------------------------------------------------ 65 66 int64_t j = GBH (Ah, k) ; 67 int64_t pA_start, pA_end ; 68 GB_get_pA (&pA_start, &pA_end, tid, k, 69 kfirst, klast, pstart_Aslice, Ap, avlen) ; 70 71 //------------------------------------------------------------------ 72 // C(:,j) = A(:,j)*D(j,j) 73 //------------------------------------------------------------------ 74 75 GB_GETB (djj, Dx, j) ; // djj = D (j,j) 76 GB_PRAGMA_SIMD_VECTORIZE 77 for (int64_t p = pA_start ; p < pA_end ; p++) 78 { 79 GB_GETA (aij, Ax, p) ; // aij = A(i,j) 80 GB_BINOP (GB_CX (p), aij, djj, 0, 0) ; // C(i,j) = aij * djj 81 } 82 } 83 } 84 } 85 86