1 //------------------------------------------------------------------------------
2 // GB_ek_slice_merge1: merge column counts for a matrix
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // The input matrix A has been sliced via GB_ek_slice, and scanned to compute
11 // the counts of entries in each vector of C in Cp, Wfirst, and Wlast.  This
12 // phase finalizes the column counts, Cp, merging the results of each task.
13 
14 // On input, Cp [k] has been partially computed.  Task tid operators on vector
15 // kfirst = kfirst_Aslice [tid] to klast = klast_Aslice [tid].  If kfirst < k <
16 // klast, then Cp [k] is the total count of entries in C(:,k).  Otherwise, the
17 // counts are held in Wfirst and Wlast, and Cp [k] is zero (or uninititalized).
18 // Wfirst [tid] is the number of entries in C(:,kfirst) constructed by task
19 // tid, and Wlast [tid] is the number of entries in C(:,klast) constructed by
20 // task tid.
21 
22 // This function sums up the entries computed for C(:,k) by all tasks, so that
23 // on output, Cp [k] is the total count of entries in C(:,k).
24 
25 #include "GB_ek_slice.h"
26 
GB_ek_slice_merge1(int64_t * restrict Cp,const int64_t * restrict Wfirst,const int64_t * restrict Wlast,const int64_t * A_ek_slicing,const int A_ntasks)27 void GB_ek_slice_merge1     // merge column counts for the matrix C
28 (
29     // input/output:
30     int64_t *restrict Cp,                    // column counts
31     // input:
32     const int64_t *restrict Wfirst,          // size A_ntasks
33     const int64_t *restrict Wlast,           // size A_ntasks
34     const int64_t *A_ek_slicing,                // size 3*A_ntasks+1
35     const int A_ntasks                          // # of tasks
36 )
37 {
38 
39     const int64_t *restrict kfirst_Aslice = A_ek_slicing ;
40     const int64_t *restrict klast_Aslice  = A_ek_slicing + A_ntasks ;
41 
42     int64_t kprior = -1 ;
43 
44     for (int tid = 0 ; tid < A_ntasks ; tid++)
45     {
46 
47         //----------------------------------------------------------------------
48         // sum up the partial result that thread tid computed for kfirst
49         //----------------------------------------------------------------------
50 
51         int64_t kfirst = kfirst_Aslice [tid] ;
52         int64_t klast  = klast_Aslice  [tid] ;
53 
54         if (kfirst <= klast)
55         {
56             if (kprior < kfirst)
57             {
58                 // This thread is the first one that did work on
59                 // A(:,kfirst), so use it to start the reduction.
60                 Cp [kfirst] = Wfirst [tid] ;
61             }
62             else
63             {
64                 Cp [kfirst] += Wfirst [tid] ;
65             }
66             kprior = kfirst ;
67         }
68 
69         //----------------------------------------------------------------------
70         // sum up the partial result that thread tid computed for klast
71         //----------------------------------------------------------------------
72 
73         if (kfirst < klast)
74         {
75             ASSERT (kprior < klast) ;
76             // This thread is the first one that did work on
77             // A(:,klast), so use it to start the reduction.
78             Cp [klast] = Wlast [tid] ;
79             kprior = klast ;
80         }
81     }
82 }
83 
84