1 //------------------------------------------------------------------------------
2 // GB_ek_slice_merge1: merge column counts for a matrix
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // The input matrix A has been sliced via GB_ek_slice, and scanned to compute
11 // the counts of entries in each vector of C in Cp, Wfirst, and Wlast. This
12 // phase finalizes the column counts, Cp, merging the results of each task.
13
14 // On input, Cp [k] has been partially computed. Task tid operators on vector
15 // kfirst = kfirst_Aslice [tid] to klast = klast_Aslice [tid]. If kfirst < k <
16 // klast, then Cp [k] is the total count of entries in C(:,k). Otherwise, the
17 // counts are held in Wfirst and Wlast, and Cp [k] is zero (or uninititalized).
18 // Wfirst [tid] is the number of entries in C(:,kfirst) constructed by task
19 // tid, and Wlast [tid] is the number of entries in C(:,klast) constructed by
20 // task tid.
21
22 // This function sums up the entries computed for C(:,k) by all tasks, so that
23 // on output, Cp [k] is the total count of entries in C(:,k).
24
25 #include "GB_ek_slice.h"
26
GB_ek_slice_merge1(int64_t * restrict Cp,const int64_t * restrict Wfirst,const int64_t * restrict Wlast,const int64_t * A_ek_slicing,const int A_ntasks)27 void GB_ek_slice_merge1 // merge column counts for the matrix C
28 (
29 // input/output:
30 int64_t *restrict Cp, // column counts
31 // input:
32 const int64_t *restrict Wfirst, // size A_ntasks
33 const int64_t *restrict Wlast, // size A_ntasks
34 const int64_t *A_ek_slicing, // size 3*A_ntasks+1
35 const int A_ntasks // # of tasks
36 )
37 {
38
39 const int64_t *restrict kfirst_Aslice = A_ek_slicing ;
40 const int64_t *restrict klast_Aslice = A_ek_slicing + A_ntasks ;
41
42 int64_t kprior = -1 ;
43
44 for (int tid = 0 ; tid < A_ntasks ; tid++)
45 {
46
47 //----------------------------------------------------------------------
48 // sum up the partial result that thread tid computed for kfirst
49 //----------------------------------------------------------------------
50
51 int64_t kfirst = kfirst_Aslice [tid] ;
52 int64_t klast = klast_Aslice [tid] ;
53
54 if (kfirst <= klast)
55 {
56 if (kprior < kfirst)
57 {
58 // This thread is the first one that did work on
59 // A(:,kfirst), so use it to start the reduction.
60 Cp [kfirst] = Wfirst [tid] ;
61 }
62 else
63 {
64 Cp [kfirst] += Wfirst [tid] ;
65 }
66 kprior = kfirst ;
67 }
68
69 //----------------------------------------------------------------------
70 // sum up the partial result that thread tid computed for klast
71 //----------------------------------------------------------------------
72
73 if (kfirst < klast)
74 {
75 ASSERT (kprior < klast) ;
76 // This thread is the first one that did work on
77 // A(:,klast), so use it to start the reduction.
78 Cp [klast] = Wlast [tid] ;
79 kprior = klast ;
80 }
81 }
82 }
83
84