1 //------------------------------------------------------------------------------
2 // GB_ek_slice_merge2: merge final results for matrix C
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Prior to calling this function, a method using GB_ek_slice to slice an input
11 // matrix A has computed the vector counts Cp, where Cp [k] is the number of
12 // entries in the kth vector of C on input to this function.
13
14 // The input matrix and the matrix C is sliced by kfirst_Aslice and
15 // klast_Aslice, where kfirst = kfirst_Aslice [tid] is the first vector in A
16 // and C computed by task tid, and klast = klast_Aslice [tid] is the last
17 // vector computed by task tid. Tasks tid and tid+1 may cooperate on a single
18 // vector, however, where klast_Aslice [tid] may be the same as kfirst_Aslice
19 // [tid]. The method has also computed Wfirst [tid] and Wlast [tid] for each
20 // task id, tid. Wfirst [tid] is the number of entries task tid contributes to
21 // C(:,kfirst), and Wlast [tid] is the number of entries task tid contributes
22 // to C(:,klast).
23
24 // On output, Cp [0:cnvec] is overwritten with its cumulative sum.
25 // C_nvec_nonempty is the count of how many vectors in C are empty.
26 // Cp_kfirst [tid] is the position in C where task tid owns entries in
27 // C(:,kfirst), which is a cumulative sum of the entries computed in C(:,k) for
28 // all tasks that cooperate to compute that vector, starting at Cp [kfirst].
29 // There is no need to compute this for C(:,klast): if kfirst < klast, then
30 // either task tid fully owns C(:,klast), or it is always the first task to
31 // operate on C(:,klast). In both cases, task tid starts its computations at
32 // the top of C(:,klast), which can be found at Cp [klast].
33
34 #include "GB_ek_slice.h"
35
GB_ek_slice_merge2(int64_t * C_nvec_nonempty,int64_t * restrict Cp_kfirst,int64_t * restrict Cp,const int64_t cnvec,const int64_t * restrict Wfirst,const int64_t * restrict Wlast,const int64_t * A_ek_slicing,const int ntasks,const int nthreads,GB_Context Context)36 void GB_ek_slice_merge2 // merge final results for matrix C
37 (
38 // output
39 int64_t *C_nvec_nonempty, // # of non-empty vectors in C
40 int64_t *restrict Cp_kfirst, // size ntasks
41 // input/output
42 int64_t *restrict Cp, // size cnvec+1
43 // input
44 const int64_t cnvec,
45 const int64_t *restrict Wfirst, // size ntasks
46 const int64_t *restrict Wlast, // size ntasks
47 const int64_t *A_ek_slicing, // size 3*ntasks+1
48 const int ntasks, // # of tasks used to construct C
49 const int nthreads, // # of threads to use
50 GB_Context Context
51 )
52 {
53
54 //--------------------------------------------------------------------------
55 // Cp = cumsum (Cp)
56 //--------------------------------------------------------------------------
57
58 GB_cumsum (Cp, cnvec, C_nvec_nonempty, nthreads, Context) ;
59
60 //--------------------------------------------------------------------------
61 // determine the slice boundaries in the new C matrix
62 //--------------------------------------------------------------------------
63
64 const int64_t *restrict kfirst_Aslice = A_ek_slicing ;
65 const int64_t *restrict klast_Aslice = A_ek_slicing + ntasks ;
66 // const int64_t *restrict pstart_Aslice = A_ek_slicing + ntasks * 2 ;
67
68 int64_t kprior = -1 ;
69 int64_t pC = 0 ;
70
71 for (int tid = 0 ; tid < ntasks ; tid++)
72 {
73 int64_t kfirst = kfirst_Aslice [tid] ;
74
75 if (kprior < kfirst)
76 {
77 // Task tid is the first one to do work on C(:,kfirst), so it
78 // starts at Cp [kfirst], and it contributes Wfirst [tid] entries
79 // to C(:,kfirst).
80 pC = Cp [kfirst] ;
81 kprior = kfirst ;
82 }
83
84 // Task tid contributes Wfirst [tid] entries to C(:,kfirst)
85 Cp_kfirst [tid] = pC ;
86 pC += Wfirst [tid] ;
87
88 int64_t klast = klast_Aslice [tid] ;
89 if (kfirst < klast)
90 {
91 // Task tid is the last to contribute to C(:,kfirst).
92 ASSERT (pC == Cp [kfirst+1]) ;
93 // Task tid contributes the first Wlast [tid] entries to
94 // C(:,klast), so the next task tid+1 starts at location Cp [klast]
95 // + Wlast [tid], if its first vector is klast of this task.
96 pC = Cp [klast] + Wlast [tid] ;
97 kprior = klast ;
98 }
99 }
100 }
101
102