1 //------------------------------------------------------------------------------
2 // GB_ijsort: sort an index array I and remove duplicates
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Sort an index array and remove duplicates. In MATLAB notation:
11
12 /*
13 [I1 I1k] = sort (I) ;
14 Iduplicate = [(I1 (1:end-1) == I1 (2:end)), false] ;
15 I2 = I1 (~Iduplicate) ;
16 I2k = I1k (~Iduplicate) ;
17 */
18
19 #include "GB_ij.h"
20 #include "GB_sort.h"
21
22 #define GB_FREE_WORK \
23 { \
24 GB_FREE_WERK (&Work, Work_size) ; \
25 }
26
GB_ijsort(const GrB_Index * restrict I,int64_t * restrict p_ni,GrB_Index * restrict * p_I2,size_t * I2_size_handle,GrB_Index * restrict * p_I2k,size_t * I2k_size_handle,GB_Context Context)27 GrB_Info GB_ijsort
28 (
29 const GrB_Index *restrict I, // size ni, where ni > 1 always holds
30 int64_t *restrict p_ni, // : size of I, output: # of indices in I2
31 GrB_Index *restrict *p_I2, // size ni2, where I2 [0..ni2-1]
32 // contains the sorted indices with duplicates removed.
33 size_t *I2_size_handle,
34 GrB_Index *restrict *p_I2k, // output array of size ni2
35 size_t *I2k_size_handle,
36 GB_Context Context
37 )
38 {
39
40 //--------------------------------------------------------------------------
41 // check inputs
42 //--------------------------------------------------------------------------
43
44 GrB_Info info ;
45 ASSERT (I != NULL) ;
46 ASSERT (p_ni != NULL) ;
47 ASSERT (p_I2 != NULL) ;
48 ASSERT (p_I2k != NULL) ;
49
50 //--------------------------------------------------------------------------
51 // get inputs
52 //--------------------------------------------------------------------------
53
54 GrB_Index *Work = NULL ; size_t Work_size = 0 ;
55 GrB_Index *restrict I2 = NULL ; size_t I2_size = 0 ;
56 GrB_Index *restrict I2k = NULL ; size_t I2k_size = 0 ;
57 int64_t ni = *p_ni ;
58 ASSERT (ni > 1) ;
59 int ntasks = 0 ;
60
61 //--------------------------------------------------------------------------
62 // determine the number of threads to use
63 //--------------------------------------------------------------------------
64
65 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
66 int nthreads = GB_nthreads (ni, chunk, nthreads_max) ;
67
68 //--------------------------------------------------------------------------
69 // determine number of tasks to create
70 //--------------------------------------------------------------------------
71
72 ntasks = (nthreads == 1) ? 1 : (32 * nthreads) ;
73 ntasks = GB_IMIN (ntasks, ni) ;
74 ntasks = GB_IMAX (ntasks, 1) ;
75
76 //--------------------------------------------------------------------------
77 // allocate workspace
78 //--------------------------------------------------------------------------
79
80 Work = GB_MALLOC_WERK (2*ni + ntasks + 1, GrB_Index, &Work_size) ;
81 if (Work == NULL)
82 {
83 // out of memory
84 return (GrB_OUT_OF_MEMORY) ;
85 }
86
87 GrB_Index *restrict I1 = Work ; // size ni
88 GrB_Index *restrict I1k = Work + ni ; // size ni
89 int64_t *restrict Count = (int64_t *) (Work + 2*ni) ; // size ntasks+1
90
91 //--------------------------------------------------------------------------
92 // copy I into I1 and construct I1k
93 //--------------------------------------------------------------------------
94
95 GB_memcpy (I1, I, ni * sizeof (GrB_Index), nthreads) ;
96
97 int64_t k ;
98 #pragma omp parallel for num_threads(nthreads) schedule(static)
99 for (k = 0 ; k < ni ; k++)
100 {
101 // the key is selected so that the last duplicate entry comes first in
102 // the sorted result. It must be adjusted later, so that the kth entry
103 // has a key equal to k.
104 I1k [k] = (ni-k) ;
105 }
106
107 //--------------------------------------------------------------------------
108 // sort [I1 I1k]
109 //--------------------------------------------------------------------------
110
111 info = GB_msort_2b ((int64_t *) I1, (int64_t *) I1k, ni, nthreads) ;
112 if (info != GrB_SUCCESS)
113 {
114 // out of memory
115 GB_FREE_WORK ;
116 return (GrB_OUT_OF_MEMORY) ;
117 }
118
119 //--------------------------------------------------------------------------
120 // count unique entries in I1
121 //--------------------------------------------------------------------------
122
123 int tid ;
124 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
125 for (tid = 0 ; tid < ntasks ; tid++)
126 {
127 int64_t kfirst, klast, my_count = (tid == 0) ? 1 : 0 ;
128 GB_PARTITION (kfirst, klast, ni, tid, ntasks) ;
129 for (int64_t k = GB_IMAX (kfirst,1) ; k < klast ; k++)
130 {
131 if (I1 [k-1] != I1 [k])
132 {
133 my_count++ ;
134 }
135 }
136 Count [tid] = my_count ;
137 }
138
139 GB_cumsum (Count, ntasks, NULL, 1, NULL) ;
140 int64_t ni2 = Count [ntasks] ;
141
142 //--------------------------------------------------------------------------
143 // allocate the result I2
144 //--------------------------------------------------------------------------
145
146 I2 = GB_MALLOC_WERK (ni2, GrB_Index, &I2_size) ;
147 I2k = GB_MALLOC_WERK (ni2, GrB_Index, &I2k_size) ;
148 if (I2 == NULL || I2k == NULL)
149 {
150 // out of memory
151 GB_FREE_WORK ;
152 GB_FREE_WERK (&I2, I2_size) ;
153 GB_FREE_WERK (&I2k, I2k_size) ;
154 return (GrB_OUT_OF_MEMORY) ;
155 }
156
157 //--------------------------------------------------------------------------
158 // construct the new list I2 from I1, removing duplicates
159 //--------------------------------------------------------------------------
160
161 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
162 for (tid = 0 ; tid < ntasks ; tid++)
163 {
164 int64_t kfirst, klast, k2 = Count [tid] ;
165 GB_PARTITION (kfirst, klast, ni, tid, ntasks) ;
166 if (tid == 0)
167 {
168 // the first entry in I1 is never a duplicate
169 I2 [k2] = I1 [0] ;
170 I2k [k2] = (ni - I1k [0]) ;
171 k2++ ;
172 }
173 for (int64_t k = GB_IMAX (kfirst,1) ; k < klast ; k++)
174 {
175 if (I1 [k-1] != I1 [k])
176 {
177 I2 [k2] = I1 [k] ;
178 I2k [k2] = ni - I1k [k] ;
179 k2++ ;
180 }
181 }
182 }
183
184 //--------------------------------------------------------------------------
185 // check result: compare with single-pass, single-threaded algorithm
186 //--------------------------------------------------------------------------
187
188 #ifdef GB_DEBUG
189 {
190 int64_t ni1 = 1 ;
191 I1k [0] = ni - I1k [0] ;
192 for (int64_t k = 1 ; k < ni ; k++)
193 {
194 if (I1 [ni1-1] != I1 [k])
195 {
196 I1 [ni1] = I1 [k] ;
197 I1k [ni1] = ni - I1k [k] ;
198 ni1++ ;
199 }
200 }
201 ASSERT (ni1 == ni2) ;
202 for (int64_t k = 0 ; k < ni1 ; k++)
203 {
204 ASSERT (I1 [k] == I2 [k]) ;
205 ASSERT (I1k [k] == I2k [k]) ;
206 }
207 }
208 #endif
209
210 //--------------------------------------------------------------------------
211 // free workspace and return the new sorted list
212 //--------------------------------------------------------------------------
213
214 GB_FREE_WORK ;
215 *(p_I2 ) = (GrB_Index *) I2 ; (*I2_size_handle ) = I2_size ;
216 *(p_I2k) = (GrB_Index *) I2k ; (*I2k_size_handle) = I2k_size ;
217 *(p_ni ) = (int64_t ) ni2 ;
218 return (GrB_SUCCESS) ;
219 }
220
221