1 //------------------------------------------------------------------------------
2 // GB_AxB_dot3_phase1_template: analysis phase for dot3 (C<M> = A'*B)
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 {
11     int taskid ;
12     #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
13     for (taskid = 0 ; taskid < ntasks ; taskid++)
14     {
15 
16         //----------------------------------------------------------------------
17         // get the task descriptor
18         //----------------------------------------------------------------------
19 
20         int64_t kfirst = TaskList [taskid].kfirst ;
21         int64_t klast  = TaskList [taskid].klast ;
22         bool fine_task = (klast == -1) ;
23         if (fine_task)
24         {
25             // a fine task operates on a slice of a single vector
26             klast = kfirst ;
27         }
28         int64_t bpleft = 0 ;    // Ch is not jumbled
29 
30         //----------------------------------------------------------------------
31         // compute all vectors in this task
32         //----------------------------------------------------------------------
33 
34         for (int64_t k = kfirst ; k <= klast ; k++)
35         {
36 
37             //------------------------------------------------------------------
38             // get j, the kth vector of C and M
39             //------------------------------------------------------------------
40 
41             #if defined ( GB_MASK_SPARSE_AND_STRUCTURAL )
42             // M and C are sparse
43             const int64_t j = k ;
44             #else
45             // M and C are either both sparse or both hypersparse
46             const int64_t j = GBH (Ch, k) ;
47             #endif
48 
49             GB_GET_VECTOR (pM, pM_end, pM, pM_end, Mp, k, mvlen) ;
50 
51             //------------------------------------------------------------------
52             // get B(:,j)
53             //------------------------------------------------------------------
54 
55             #if GB_B_IS_HYPER
56                 // B is hyper
57                 int64_t pB_start, pB_end ;
58                 GB_lookup (true, Bh, Bp, vlen, &bpleft, bnvec-1, j,
59                     &pB_start, &pB_end) ;
60             #elif GB_B_IS_SPARSE
61                 // B is sparse
62                 const int64_t pB_start = Bp [j] ;
63                 const int64_t pB_end = Bp [j+1] ;
64             #else
65                 // B is bitmap or full
66                 const int64_t pB_start = j * vlen ;
67                 const int64_t pB_end = (j+1) * vlen ;
68             #endif
69             const int64_t bjnz = pB_end - pB_start ;
70 
71             //------------------------------------------------------------------
72             // estimate the work to compute each entry of C(:,j)
73             //------------------------------------------------------------------
74 
75             // A decent estimate of the work to compute the dot product C(i,j)
76             // = A(:,i)'*B(:,j) is min (|A(:,i)|, |B(:,j)|) + 1.  This is a
77             // lower bound.  The actual work could require a binary search of
78             // either A(:,i) or B(:,j), or a merge of the two vectors.  Or it
79             // could require no work at all if all entries in A(:,i) appear
80             // before all entries in B(:,j), or visa versa.  No work is done if
81             // M(i,j)=0.
82 
83             if (bjnz == 0)
84             {
85                 // B(:,j) is empty, so C(:,j) is empty as well.  No work is to
86                 // be done, but it still takes unit work to flag each C(:,j) as
87                 // a zombie
88                 for ( ; pM < pM_end ; pM++)
89                 {
90                     Cwork [pM] = 1 ;
91                 }
92             }
93             else
94             {
95                 for ( ; pM < pM_end ; pM++)
96                 {
97                     int64_t work = 1 ;
98                     #if !defined ( GB_MASK_SPARSE_AND_STRUCTURAL )
99                     // if M is structural, no need to check its values
100                     if (GB_mcast (Mx, pM, msize))
101                     #endif
102                     {
103                         const int64_t i = Mi [pM] ;
104                         #if GB_A_IS_HYPER
105                         // A is hyper
106                         int64_t pA, pA_end ;
107                         int64_t apleft = 0 ;    // M might be jumbled
108                         GB_lookup (true, Ah, Ap, vlen, &apleft, anvec-1, i,
109                             &pA, &pA_end) ;
110                         const int64_t ainz = pA_end - pA ;
111                         work += GB_IMIN (ainz, bjnz) ;
112                         #elif GB_A_IS_SPARSE
113                         // A is sparse
114                         const int64_t pA = Ap [i] ;
115                         const int64_t pA_end = Ap [i+1] ;
116                         const int64_t ainz = pA_end - pA ;
117                         work += GB_IMIN (ainz, bjnz) ;
118                         #else
119                         // A is bitmap or full
120                         work += bjnz ;
121                         #endif
122                     }
123                     Cwork [pM] = work ;
124                 }
125             }
126         }
127     }
128 }
129 
130