1 //------------------------------------------------------------------------------ 2 // GB_AxB_dot3_phase1_template: analysis phase for dot3 (C<M> = A'*B) 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 { 11 int taskid ; 12 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) 13 for (taskid = 0 ; taskid < ntasks ; taskid++) 14 { 15 16 //---------------------------------------------------------------------- 17 // get the task descriptor 18 //---------------------------------------------------------------------- 19 20 int64_t kfirst = TaskList [taskid].kfirst ; 21 int64_t klast = TaskList [taskid].klast ; 22 bool fine_task = (klast == -1) ; 23 if (fine_task) 24 { 25 // a fine task operates on a slice of a single vector 26 klast = kfirst ; 27 } 28 int64_t bpleft = 0 ; // Ch is not jumbled 29 30 //---------------------------------------------------------------------- 31 // compute all vectors in this task 32 //---------------------------------------------------------------------- 33 34 for (int64_t k = kfirst ; k <= klast ; k++) 35 { 36 37 //------------------------------------------------------------------ 38 // get j, the kth vector of C and M 39 //------------------------------------------------------------------ 40 41 #if defined ( GB_MASK_SPARSE_AND_STRUCTURAL ) 42 // M and C are sparse 43 const int64_t j = k ; 44 #else 45 // M and C are either both sparse or both hypersparse 46 const int64_t j = GBH (Ch, k) ; 47 #endif 48 49 GB_GET_VECTOR (pM, pM_end, pM, pM_end, Mp, k, mvlen) ; 50 51 //------------------------------------------------------------------ 52 // get B(:,j) 53 //------------------------------------------------------------------ 54 55 #if GB_B_IS_HYPER 56 // B is hyper 57 int64_t pB_start, pB_end ; 58 GB_lookup (true, Bh, Bp, vlen, &bpleft, bnvec-1, j, 59 &pB_start, &pB_end) ; 60 #elif GB_B_IS_SPARSE 61 // B is sparse 62 const int64_t pB_start = Bp [j] ; 63 const int64_t pB_end = Bp [j+1] ; 64 #else 65 // B is bitmap or full 66 const int64_t pB_start = j * vlen ; 67 const int64_t pB_end = (j+1) * vlen ; 68 #endif 69 const int64_t bjnz = pB_end - pB_start ; 70 71 //------------------------------------------------------------------ 72 // estimate the work to compute each entry of C(:,j) 73 //------------------------------------------------------------------ 74 75 // A decent estimate of the work to compute the dot product C(i,j) 76 // = A(:,i)'*B(:,j) is min (|A(:,i)|, |B(:,j)|) + 1. This is a 77 // lower bound. The actual work could require a binary search of 78 // either A(:,i) or B(:,j), or a merge of the two vectors. Or it 79 // could require no work at all if all entries in A(:,i) appear 80 // before all entries in B(:,j), or visa versa. No work is done if 81 // M(i,j)=0. 82 83 if (bjnz == 0) 84 { 85 // B(:,j) is empty, so C(:,j) is empty as well. No work is to 86 // be done, but it still takes unit work to flag each C(:,j) as 87 // a zombie 88 for ( ; pM < pM_end ; pM++) 89 { 90 Cwork [pM] = 1 ; 91 } 92 } 93 else 94 { 95 for ( ; pM < pM_end ; pM++) 96 { 97 int64_t work = 1 ; 98 #if !defined ( GB_MASK_SPARSE_AND_STRUCTURAL ) 99 // if M is structural, no need to check its values 100 if (GB_mcast (Mx, pM, msize)) 101 #endif 102 { 103 const int64_t i = Mi [pM] ; 104 #if GB_A_IS_HYPER 105 // A is hyper 106 int64_t pA, pA_end ; 107 int64_t apleft = 0 ; // M might be jumbled 108 GB_lookup (true, Ah, Ap, vlen, &apleft, anvec-1, i, 109 &pA, &pA_end) ; 110 const int64_t ainz = pA_end - pA ; 111 work += GB_IMIN (ainz, bjnz) ; 112 #elif GB_A_IS_SPARSE 113 // A is sparse 114 const int64_t pA = Ap [i] ; 115 const int64_t pA_end = Ap [i+1] ; 116 const int64_t ainz = pA_end - pA ; 117 work += GB_IMIN (ainz, bjnz) ; 118 #else 119 // A is bitmap or full 120 work += bjnz ; 121 #endif 122 } 123 Cwork [pM] = work ; 124 } 125 } 126 } 127 } 128 } 129 130