1 //------------------------------------------------------------------------------
2 // GB_ijsort:  sort an index array I and remove duplicates
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Sort an index array and remove duplicates.  In MATLAB notation:
11 
12 /*
13     [I1 I1k] = sort (I) ;
14     Iduplicate = [(I1 (1:end-1) == I1 (2:end)), false] ;
15     I2  = I1  (~Iduplicate) ;
16     I2k = I1k (~Iduplicate) ;
17 */
18 
19 #include "GB_ij.h"
20 #include "GB_sort.h"
21 
22 #define GB_FREE_WORK                    \
23 {                                       \
24     GB_FREE_WERK (&Work, Work_size) ;   \
25 }
26 
GB_ijsort(const GrB_Index * restrict I,int64_t * restrict p_ni,GrB_Index * restrict * p_I2,size_t * I2_size_handle,GrB_Index * restrict * p_I2k,size_t * I2k_size_handle,GB_Context Context)27 GrB_Info GB_ijsort
28 (
29     const GrB_Index *restrict I, // size ni, where ni > 1 always holds
30     int64_t *restrict p_ni,      // : size of I, output: # of indices in I2
31     GrB_Index *restrict *p_I2,   // size ni2, where I2 [0..ni2-1]
32                         // contains the sorted indices with duplicates removed.
33     size_t *I2_size_handle,
34     GrB_Index *restrict *p_I2k,  // output array of size ni2
35     size_t *I2k_size_handle,
36     GB_Context Context
37 )
38 {
39 
40     //--------------------------------------------------------------------------
41     // check inputs
42     //--------------------------------------------------------------------------
43 
44     GrB_Info info ;
45     ASSERT (I != NULL) ;
46     ASSERT (p_ni != NULL) ;
47     ASSERT (p_I2 != NULL) ;
48     ASSERT (p_I2k != NULL) ;
49 
50     //--------------------------------------------------------------------------
51     // get inputs
52     //--------------------------------------------------------------------------
53 
54     GrB_Index *Work = NULL ; size_t Work_size = 0 ;
55     GrB_Index *restrict I2  = NULL ; size_t I2_size = 0 ;
56     GrB_Index *restrict I2k = NULL ; size_t I2k_size = 0 ;
57     int64_t ni = *p_ni ;
58     ASSERT (ni > 1) ;
59     int ntasks = 0 ;
60 
61     //--------------------------------------------------------------------------
62     // determine the number of threads to use
63     //--------------------------------------------------------------------------
64 
65     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
66     int nthreads = GB_nthreads (ni, chunk, nthreads_max) ;
67 
68     //--------------------------------------------------------------------------
69     // determine number of tasks to create
70     //--------------------------------------------------------------------------
71 
72     ntasks = (nthreads == 1) ? 1 : (32 * nthreads) ;
73     ntasks = GB_IMIN (ntasks, ni) ;
74     ntasks = GB_IMAX (ntasks, 1) ;
75 
76     //--------------------------------------------------------------------------
77     // allocate workspace
78     //--------------------------------------------------------------------------
79 
80     Work = GB_MALLOC_WERK (2*ni + ntasks + 1, GrB_Index, &Work_size) ;
81     if (Work == NULL)
82     {
83         // out of memory
84         return (GrB_OUT_OF_MEMORY) ;
85     }
86 
87     GrB_Index *restrict I1  = Work ;                         // size ni
88     GrB_Index *restrict I1k = Work + ni ;                    // size ni
89     int64_t *restrict Count = (int64_t *) (Work + 2*ni) ;    // size ntasks+1
90 
91     //--------------------------------------------------------------------------
92     // copy I into I1 and construct I1k
93     //--------------------------------------------------------------------------
94 
95     GB_memcpy (I1, I, ni * sizeof (GrB_Index), nthreads) ;
96 
97     int64_t k ;
98     #pragma omp parallel for num_threads(nthreads) schedule(static)
99     for (k = 0 ; k < ni ; k++)
100     {
101         // the key is selected so that the last duplicate entry comes first in
102         // the sorted result.  It must be adjusted later, so that the kth entry
103         // has a key equal to k.
104         I1k [k] = (ni-k) ;
105     }
106 
107     //--------------------------------------------------------------------------
108     // sort [I1 I1k]
109     //--------------------------------------------------------------------------
110 
111     info = GB_msort_2b ((int64_t *) I1, (int64_t *) I1k, ni, nthreads) ;
112     if (info != GrB_SUCCESS)
113     {
114         // out of memory
115         GB_FREE_WORK ;
116         return (GrB_OUT_OF_MEMORY) ;
117     }
118 
119     //--------------------------------------------------------------------------
120     // count unique entries in I1
121     //--------------------------------------------------------------------------
122 
123     int tid ;
124     #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
125     for (tid = 0 ; tid < ntasks ; tid++)
126     {
127         int64_t kfirst, klast, my_count = (tid == 0) ? 1 : 0 ;
128         GB_PARTITION (kfirst, klast, ni, tid, ntasks) ;
129         for (int64_t k = GB_IMAX (kfirst,1) ; k < klast ; k++)
130         {
131             if (I1 [k-1] != I1 [k])
132             {
133                 my_count++ ;
134             }
135         }
136         Count [tid] = my_count ;
137     }
138 
139     GB_cumsum (Count, ntasks, NULL, 1, NULL) ;
140     int64_t ni2 = Count [ntasks] ;
141 
142     //--------------------------------------------------------------------------
143     // allocate the result I2
144     //--------------------------------------------------------------------------
145 
146     I2  = GB_MALLOC_WERK (ni2, GrB_Index, &I2_size) ;
147     I2k = GB_MALLOC_WERK (ni2, GrB_Index, &I2k_size) ;
148     if (I2 == NULL || I2k == NULL)
149     {
150         // out of memory
151         GB_FREE_WORK ;
152         GB_FREE_WERK (&I2, I2_size) ;
153         GB_FREE_WERK (&I2k, I2k_size) ;
154         return (GrB_OUT_OF_MEMORY) ;
155     }
156 
157     //--------------------------------------------------------------------------
158     // construct the new list I2 from I1, removing duplicates
159     //--------------------------------------------------------------------------
160 
161     #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
162     for (tid = 0 ; tid < ntasks ; tid++)
163     {
164         int64_t kfirst, klast, k2 = Count [tid] ;
165         GB_PARTITION (kfirst, klast, ni, tid, ntasks) ;
166         if (tid == 0)
167         {
168             // the first entry in I1 is never a duplicate
169             I2  [k2] = I1  [0] ;
170             I2k [k2] = (ni - I1k [0]) ;
171             k2++ ;
172         }
173         for (int64_t k = GB_IMAX (kfirst,1) ; k < klast ; k++)
174         {
175             if (I1 [k-1] != I1 [k])
176             {
177                 I2  [k2] = I1  [k] ;
178                 I2k [k2] = ni - I1k [k] ;
179                 k2++ ;
180             }
181         }
182     }
183 
184     //--------------------------------------------------------------------------
185     // check result: compare with single-pass, single-threaded algorithm
186     //--------------------------------------------------------------------------
187 
188     #ifdef GB_DEBUG
189     {
190         int64_t ni1 = 1 ;
191         I1k [0] = ni - I1k [0] ;
192         for (int64_t k = 1 ; k < ni ; k++)
193         {
194             if (I1 [ni1-1] != I1 [k])
195             {
196                 I1  [ni1] = I1  [k] ;
197                 I1k [ni1] = ni - I1k [k] ;
198                 ni1++ ;
199             }
200         }
201         ASSERT (ni1 == ni2) ;
202         for (int64_t k = 0 ; k < ni1 ; k++)
203         {
204             ASSERT (I1  [k] == I2 [k]) ;
205             ASSERT (I1k [k] == I2k [k]) ;
206         }
207     }
208     #endif
209 
210     //--------------------------------------------------------------------------
211     // free workspace and return the new sorted list
212     //--------------------------------------------------------------------------
213 
214     GB_FREE_WORK ;
215     *(p_I2 ) = (GrB_Index *) I2  ; (*I2_size_handle ) = I2_size ;
216     *(p_I2k) = (GrB_Index *) I2k ; (*I2k_size_handle) = I2k_size ;
217     *(p_ni ) = (int64_t    ) ni2 ;
218     return (GrB_SUCCESS) ;
219 }
220 
221