1 //------------------------------------------------------------------------------
2 // GB_ek_slice_merge2: merge final results for matrix C
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Prior to calling this function, a method using GB_ek_slice to slice an input
11 // matrix A has computed the vector counts Cp, where Cp [k] is the number of
12 // entries in the kth vector of C on input to this function.
13 
14 // The input matrix and the matrix C is sliced by kfirst_Aslice and
15 // klast_Aslice, where kfirst = kfirst_Aslice [tid] is the first vector in A
16 // and C computed by task tid, and klast = klast_Aslice [tid] is the last
17 // vector computed by task tid.  Tasks tid and tid+1 may cooperate on a single
18 // vector, however, where klast_Aslice [tid] may be the same as kfirst_Aslice
19 // [tid].  The method has also computed Wfirst [tid] and Wlast [tid] for each
20 // task id, tid.  Wfirst [tid] is the number of entries task tid contributes to
21 // C(:,kfirst), and Wlast [tid] is the number of entries task tid contributes
22 // to C(:,klast).
23 
24 // On output, Cp [0:cnvec] is overwritten with its cumulative sum.
25 // C_nvec_nonempty is the count of how many vectors in C are empty.
26 // Cp_kfirst [tid] is the position in C where task tid owns entries in
27 // C(:,kfirst), which is a cumulative sum of the entries computed in C(:,k) for
28 // all tasks that cooperate to compute that vector, starting at Cp [kfirst].
29 // There is no need to compute this for C(:,klast):  if kfirst < klast, then
30 // either task tid fully owns C(:,klast), or it is always the first task to
31 // operate on C(:,klast).  In both cases, task tid starts its computations at
32 // the top of C(:,klast), which can be found at Cp [klast].
33 
34 #include "GB_ek_slice.h"
35 
GB_ek_slice_merge2(int64_t * C_nvec_nonempty,int64_t * restrict Cp_kfirst,int64_t * restrict Cp,const int64_t cnvec,const int64_t * restrict Wfirst,const int64_t * restrict Wlast,const int64_t * A_ek_slicing,const int ntasks,const int nthreads,GB_Context Context)36 void GB_ek_slice_merge2     // merge final results for matrix C
37 (
38     // output
39     int64_t *C_nvec_nonempty,           // # of non-empty vectors in C
40     int64_t *restrict Cp_kfirst,     // size ntasks
41     // input/output
42     int64_t *restrict Cp,            // size cnvec+1
43     // input
44     const int64_t cnvec,
45     const int64_t *restrict Wfirst,          // size ntasks
46     const int64_t *restrict Wlast,           // size ntasks
47     const int64_t *A_ek_slicing,        // size 3*ntasks+1
48     const int ntasks,                   // # of tasks used to construct C
49     const int nthreads,                 // # of threads to use
50     GB_Context Context
51 )
52 {
53 
54     //--------------------------------------------------------------------------
55     // Cp = cumsum (Cp)
56     //--------------------------------------------------------------------------
57 
58     GB_cumsum (Cp, cnvec, C_nvec_nonempty, nthreads, Context) ;
59 
60     //--------------------------------------------------------------------------
61     // determine the slice boundaries in the new C matrix
62     //--------------------------------------------------------------------------
63 
64     const int64_t *restrict kfirst_Aslice = A_ek_slicing ;
65     const int64_t *restrict klast_Aslice  = A_ek_slicing + ntasks ;
66 //  const int64_t *restrict pstart_Aslice = A_ek_slicing + ntasks * 2 ;
67 
68     int64_t kprior = -1 ;
69     int64_t pC = 0 ;
70 
71     for (int tid = 0 ; tid < ntasks ; tid++)
72     {
73         int64_t kfirst = kfirst_Aslice [tid] ;
74 
75         if (kprior < kfirst)
76         {
77             // Task tid is the first one to do work on C(:,kfirst), so it
78             // starts at Cp [kfirst], and it contributes Wfirst [tid] entries
79             // to C(:,kfirst).
80             pC = Cp [kfirst] ;
81             kprior = kfirst ;
82         }
83 
84         // Task tid contributes Wfirst [tid] entries to C(:,kfirst)
85         Cp_kfirst [tid] = pC ;
86         pC += Wfirst [tid] ;
87 
88         int64_t klast = klast_Aslice [tid] ;
89         if (kfirst < klast)
90         {
91             // Task tid is the last to contribute to C(:,kfirst).
92             ASSERT (pC == Cp [kfirst+1]) ;
93             // Task tid contributes the first Wlast [tid] entries to
94             // C(:,klast), so the next task tid+1 starts at location Cp [klast]
95             // + Wlast [tid], if its first vector is klast of this task.
96             pC = Cp [klast] + Wlast [tid] ;
97             kprior = klast ;
98         }
99     }
100 }
101 
102