1 #include <petscsys.h> /*I "petscsys.h" I*/
2 #include <petsc/private/petscimpl.h>
3
Compare_PetscMPIInt_Private(const void * left,const void * right,PETSC_UNUSED void * ctx)4 PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5 {
6 PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7 return (l < r) ? -1 : (l > r);
8 }
9
Compare_PetscInt_Private(const void * left,const void * right,PETSC_UNUSED void * ctx)10 PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11 {
12 PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13 return (l < r) ? -1 : (l > r);
14 }
15
Compare_PetscReal_Private(const void * left,const void * right,PETSC_UNUSED void * ctx)16 PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17 {
18 PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19 return (l < r) ? -1 : (l > r);
20 }
21
22 #define MIN_GALLOP_CONST_GLOBAL 8
23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24 typedef int (*CompFunc)(const void *, const void *, void *);
25
26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27 #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
COPYSWAPPY(char * a,char * b,char * t,size_t size)28 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29 {
30 __builtin_memcpy(t, b, size);
31 __builtin_memmove(b, a, size);
32 __builtin_memcpy(a, t, size);
33 return;
34 }
35
COPYSWAPPY2(char * aa,char * ab,size_t asize,char * ba,char * bb,size_t bsize,char * t)36 PETSC_STATIC_INLINE void COPYSWAPPY2(char *aa, char *ab, size_t asize, char *ba, char *bb, size_t bsize, char *t)
37 {
38 __builtin_memcpy(t, ab, asize);
39 __builtin_memmove(ab, aa, asize);
40 __builtin_memcpy(aa, t, asize);
41 __builtin_memcpy(t, bb, bsize);
42 __builtin_memmove(bb, ba, bsize);
43 __builtin_memcpy(ba, t, bsize);
44 return;
45 }
46
Petsc_memcpy(char * dest,const char * src,size_t size)47 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
48 {
49 __builtin_memcpy(dest, src, size);
50 return;
51 }
52
Petsc_memcpy2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)53 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54 {
55 __builtin_memcpy(adest, asrc, asize);
56 __builtin_memcpy(bdest, asrc, bsize);
57 return;
58 }
59
Petsc_memmove(char * dest,const char * src,size_t size)60 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
61 {
62 __builtin_memmove(dest, src, size);
63 return;
64 }
65
Petsc_memmove2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)66 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67 {
68 __builtin_memmove(adest, asrc, asize);
69 __builtin_memmove(bdest, bsrc, bsize);
70 return;
71 }
72 # else
COPYSWAPPY(char * a,char * b,char * t,size_t size)73 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74 {
75 PetscErrorCode ierr;
76 PetscFunctionBegin;
77 ierr = PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78 ierr = PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79 ierr = PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80 PetscFunctionReturnVoid();
81 }
82
COPYSWAPPY2(char * aa,char * ab,size_t asize,char * ba,char * bb,size_t bsize,char * t)83 PETSC_STATIC_INLINE void COPYSWAPPY2(char *aa, char *ab, size_t asize, char *ba, char *bb, size_t bsize, char *t)
84 {
85 PetscErrorCode ierr;
86 PetscFunctionBegin;
87 ierr = PetscMemcpy(t, ab, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88 ierr = PetscMemmove(ab, aa, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89 ierr = PetscMemcpy(aa, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90 ierr = PetscMemcpy(t, bb, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91 ierr = PetscMemmove(bb, ba, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92 ierr = PetscMemcpy(ba, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93 PetscFunctionReturnVoid();
94 }
95
Petsc_memcpy(char * dest,const char * src,size_t size)96 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
97 {
98 PetscErrorCode ierr;
99 PetscFunctionBegin;
100 ierr = PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101 PetscFunctionReturnVoid();
102 }
103
Petsc_memcpy2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)104 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105 {
106 PetscErrorCode ierr;
107 PetscFunctionBegin;
108 ierr = PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109 ierr = PetscMemcpy(bdest, asrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110 PetscFunctionReturnVoid();
111 }
112
Petsc_memmove(char * dest,const char * src,size_t size)113 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114 {
115 PetscErrorCode ierr;
116 PetscFunctionBegin;
117 ierr = PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118 PetscFunctionReturnVoid();
119 }
120
Petsc_memmove2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)121 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122 {
123 PetscErrorCode ierr;
124 PetscFunctionBegin;
125 ierr = PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126 ierr = PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127 PetscFunctionReturnVoid();
128 }
129 #endif
130
131 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132 x. Output also inclusive.
133
134 NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
135
136 {0,1,2,3,4,5}
137
138 when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139 */
PetscGallopSearchLeft_Private(const char * arr,size_t size,CompFunc cmp,void * ctx,PetscInt l,PetscInt r,const char * x,PetscInt * m)140 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141 {
142 PetscInt last = l, k = 1, mid, cur = l+1;
143
144 PetscFunctionBegin;
145 *m = l;
146 if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147 if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; PetscFunctionReturn(0);}
148 if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
149 while (PETSC_TRUE) {
150 if (PetscUnlikely(cur > r)) {cur = r; break;}
151 if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152 last = cur;
153 cur += (k <<= 1) + 1;
154 ++k;
155 }
156 /* standard binary search but take last 0 mid 0 cur 1 into account*/
157 while (cur > last + 1) {
158 mid = last + ((cur - last) >> 1);
159 if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160 cur = mid;
161 } else {
162 last = mid;
163 }
164 }
165 *m = cur;
166 PetscFunctionReturn(0);
167 }
168
169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170 < x. Output also inclusive */
PetscGallopSearchRight_Private(const char * arr,size_t size,CompFunc cmp,void * ctx,PetscInt l,PetscInt r,const char * x,PetscInt * m)171 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172 {
173 PetscInt last = r, k = 1, mid, cur = r-1;
174
175 PetscFunctionBegin;
176 *m = r;
177 if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178 if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; PetscFunctionReturn(0);}
179 if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
180 while (PETSC_TRUE) {
181 if (PetscUnlikely(cur < l)) {cur = l; break;}
182 if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183 last = cur;
184 cur -= (k <<= 1) + 1;
185 ++k;
186 }
187 /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188 while (last > cur + 1) {
189 mid = last - ((last - cur) >> 1);
190 if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191 cur = mid;
192 } else {
193 last = mid;
194 }
195 }
196 *m = cur;
197 PetscFunctionReturn(0);
198 }
199
200 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201 complete array, left is first index of left array, mid is first index of right array, right is last index of right
202 array */
PetscTimSortMergeLo_Private(char * arr,char * tarr,size_t size,CompFunc cmp,void * ctx,PetscInt left,PetscInt mid,PetscInt right)203 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204 {
205 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206 const PetscInt llen = mid-left;
207 PetscErrorCode ierr;
208
209 PetscFunctionBegin;
210 Petsc_memcpy(tarr, arr+(left*size), llen*size);
211 while ((i < llen) && (j <= right)) {
212 if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213 Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214 ++k;
215 gallopright = 0;
216 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217 PetscInt l1, l2, diff1, diff2;
218 do {
219 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220 /* search temp for right[j], can move up to that of temp into arr immediately */
221 ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
222 diff1 = l1-i;
223 Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224 k += diff1;
225 i = l1;
226 /* search right for temp[i], can move up to that many of right into arr */
227 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
228 diff2 = l2-j;
229 Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230 k += diff2;
231 j = l2;
232 if (i >= llen || j > right) break;
233 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234 ++MIN_GALLOP_GLOBAL;
235 }
236 } else {
237 Petsc_memmove(arr+(k*size), arr+(j*size), size);
238 ++k;
239 gallopleft = 0;
240 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241 PetscInt l1, l2, diff1, diff2;
242 do {
243 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244 /* search right for temp[i], can move up to that many of right into arr */
245 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
246 diff2 = l2-j;
247 Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248 k += diff2;
249 j = l2;
250 /* search temp for right[j], can copy up to that of temp into arr immediately */
251 ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
252 diff1 = l1-i;
253 Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254 k += diff1;
255 i = l1;
256 if (i >= llen || j > right) break;
257 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258 ++MIN_GALLOP_GLOBAL;
259 }
260 }
261 }
262 if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263 PetscFunctionReturn(0);
264 }
265
PetscTimSortMergeLoWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,void * ctx,PetscInt left,PetscInt mid,PetscInt right)266 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267 {
268 PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269 const PetscInt llen = mid-left;
270 PetscErrorCode ierr;
271
272 PetscFunctionBegin;
273 Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274 while ((i < llen) && (j <= right)) {
275 if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276 Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277 ++k;
278 gallopright = 0;
279 if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280 PetscInt l1, l2, diff1, diff2;
281 do {
282 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283 /* search temp for right[j], can move up to that of temp into arr immediately */
284 ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
285 diff1 = l1-i;
286 Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287 k += diff1;
288 i = l1;
289 /* search right for temp[i], can move up to that many of right into arr */
290 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
291 diff2 = l2-j;
292 Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293 k += diff2;
294 j = l2;
295 if (i >= llen || j > right) break;
296 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297 ++MIN_GALLOP_GLOBAL;
298 }
299 } else {
300 Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301 ++k;
302 gallopleft = 0;
303 if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304 PetscInt l1, l2, diff1, diff2;
305 do {
306 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307 /* search right for temp[i], can move up to that many of right into arr */
308 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
309 diff2 = l2-j;
310 Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311 k += diff2;
312 j = l2;
313 /* search temp for right[j], can copy up to that of temp into arr immediately */
314 ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
315 diff1 = l1-i;
316 Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317 k += diff1;
318 i = l1;
319 if (i >= llen || j > right) break;
320 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321 ++MIN_GALLOP_GLOBAL;
322 }
323 }
324 }
325 if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326 PetscFunctionReturn(0);
327 }
328
329 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330 complete array, left is first index of left array, mid is first index of right array, right is last index of right
331 array */
PetscTimSortMergeHi_Private(char * arr,char * tarr,size_t size,CompFunc cmp,void * ctx,PetscInt left,PetscInt mid,PetscInt right)332 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333 {
334 PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335 const PetscInt rlen = right-mid+1;
336 PetscErrorCode ierr;
337
338 PetscFunctionBegin;
339 Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340 while ((i >= 0) && (j >= left)) {
341 if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342 Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343 --k;
344 gallopleft = 0;
345 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346 PetscInt l1, l2, diff1, diff2;
347 do {
348 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349 /* search temp for left[j], can copy up to that many of temp into arr */
350 ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
351 diff1 = i-l1;
352 Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353 k -= diff1;
354 i = l1;
355 /* search left for temp[i], can move up to that many of left up arr */
356 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
357 diff2 = j-l2;
358 Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359 k -= diff2;
360 j = l2;
361 if (i < 0 || j < left) break;
362 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363 ++MIN_GALLOP_GLOBAL;
364 }
365 } else {
366 Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367 --k;
368 gallopright = 0;
369 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370 PetscInt l1, l2, diff1, diff2;
371 do {
372 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373 /* search left for temp[i], can move up to that many of left up arr */
374 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
375 diff2 = j-l2;
376 Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377 k -= diff2;
378 j = l2;
379 /* search temp for left[j], can copy up to that many of temp into arr */
380 ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
381 diff1 = i-l1;
382 Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383 k -= diff1;
384 i = l1;
385 if (i < 0 || j < left) break;
386 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387 ++MIN_GALLOP_GLOBAL;
388 }
389 }
390 }
391 if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392 PetscFunctionReturn(0);
393 }
394
PetscTimSortMergeHiWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,void * ctx,PetscInt left,PetscInt mid,PetscInt right)395 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396 {
397 PetscInt i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398 const PetscInt rlen = right-mid+1;
399 PetscErrorCode ierr;
400
401 PetscFunctionBegin;
402 Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403 while ((i >= 0) && (j >= left)) {
404 if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405 Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406 --k;
407 gallopleft = 0;
408 if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409 PetscInt l1, l2, diff1, diff2;
410 do {
411 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412 /* search temp for left[j], can copy up to that many of temp into arr */
413 ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
414 diff1 = i-l1;
415 Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416 k -= diff1;
417 i = l1;
418 /* search left for temp[i], can move up to that many of left up arr */
419 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
420 diff2 = j-l2;
421 Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422 k -= diff2;
423 j = l2;
424 if (i < 0 || j < left) break;
425 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426 ++MIN_GALLOP_GLOBAL;
427 }
428 } else {
429 Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430 --k;
431 gallopright = 0;
432 if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433 PetscInt l1, l2, diff1, diff2;
434 do {
435 if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436 /* search left for temp[i], can move up to that many of left up arr */
437 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
438 diff2 = j-l2;
439 Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440 k -= diff2;
441 j = l2;
442 /* search temp for left[j], can copy up to that many of temp into arr */
443 ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
444 diff1 = i-l1;
445 Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446 k -= diff1;
447 i = l1;
448 if (i < 0 || j < left) break;
449 } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450 ++MIN_GALLOP_GLOBAL;
451 }
452 }
453 }
454 if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455 PetscFunctionReturn(0);
456 }
457
458 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459 bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
PetscInsertionSort_Private(char * arr,char * tarr,size_t size,CompFunc cmp,void * ctx,PetscInt left,PetscInt start,PetscInt right)460 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461 {
462 PetscInt i = start == left ? start+1 : start;
463
464 PetscFunctionBegin;
465 for (; i <= right; ++i) {
466 PetscInt j = i-1;
467 Petsc_memcpy(tarr, arr+(i*size), size);
468 while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469 COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470 --j;
471 }
472 Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473 }
474 PetscFunctionReturn(0);
475 }
476
PetscInsertionSortWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,void * ctx,PetscInt left,PetscInt start,PetscInt right)477 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478 {
479 PetscInt i = start == left ? start+1 : start;
480
481 PetscFunctionBegin;
482 for (; i <= right; ++i) {
483 PetscInt j = i-1;
484 Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485 while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486 COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487 --j;
488 }
489 Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490 }
491 PetscFunctionReturn(0);
492 }
493
494 /* See PetscInsertionSort_Private */
PetscBinaryInsertionSort_Private(char * arr,char * tarr,size_t size,CompFunc cmp,void * ctx,PetscInt left,PetscInt start,PetscInt right)495 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496 {
497 PetscInt i = start == left ? start+1 : start;
498
499 PetscFunctionBegin;
500 for (; i <= right; ++i) {
501 PetscInt l = left, r = i;
502 Petsc_memcpy(tarr, arr+(i*size), size);
503 do {
504 const PetscInt m = l + ((r - l) >> 1);
505 if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506 r = m;
507 } else {
508 l = m + 1;
509 }
510 } while (l < r);
511 Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512 Petsc_memcpy(arr+(l*size), tarr, size);
513 }
514 PetscFunctionReturn(0);
515 }
516
PetscBinaryInsertionSortWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,void * ctx,PetscInt left,PetscInt start,PetscInt right)517 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518 {
519 PetscInt i = start == left ? start+1 : start;
520
521 PetscFunctionBegin;
522 for (; i <= right; ++i) {
523 PetscInt l = left, r = i;
524 Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525 do {
526 const PetscInt m = l + ((r - l) >> 1);
527 if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528 r = m;
529 } else {
530 l = m + 1;
531 }
532 } while (l < r);
533 Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534 Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535 }
536 PetscFunctionReturn(0);
537 }
538
539 typedef struct {
540 PetscInt size;
541 PetscInt start;
542 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
543
544 typedef struct {
545 char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
546 size_t size;
547 size_t maxsize;
548 } PetscTimSortBuffer;
549
PetscTimSortResizeBuffer_Private(PetscTimSortBuffer * buff,size_t newSize)550 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
551 {
552 PetscFunctionBegin;
553 if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0);
554 {
555 /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
556 PetscErrorCode ierr;
557 size_t newMax = PetscMin(newSize*newSize, buff->maxsize);
558 ierr = PetscFree(buff->ptr);CHKERRQ(ierr);
559 ierr = PetscMalloc1(newMax, &buff->ptr);CHKERRQ(ierr);
560 buff->size = newMax;
561 }
562 PetscFunctionReturn(0);
563 }
564
PetscTimSortForceCollapse_Private(char * arr,size_t size,CompFunc cmp,void * ctx,PetscTimSortBuffer * buff,PetscTimSortStack * stack,PetscInt stacksize)565 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
566 {
567 PetscFunctionBegin;
568 for (;stacksize; --stacksize) {
569 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
570 if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
571 PetscInt l, m = stack[stacksize].start, r;
572 PetscErrorCode ierr;
573 /* Search A for B[0] insertion */
574 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);CHKERRQ(ierr);
575 /* Search B for A[-1] insertion */
576 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);CHKERRQ(ierr);
577 if (m-l <= r-m) {
578 ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
579 ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
580 } else {
581 ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
582 ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
583 }
584 }
585 /* Update A with merge */
586 stack[stacksize-1].size += stack[stacksize].size;
587 }
588 PetscFunctionReturn(0);
589 }
590
PetscTimSortForceCollapseWithArray_Private(char * arr,size_t asize,char * barr,size_t bsize,CompFunc cmp,void * ctx,PetscTimSortBuffer * abuff,PetscTimSortBuffer * bbuff,PetscTimSortStack * stack,PetscInt stacksize)591 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
592 {
593 PetscFunctionBegin;
594 for (;stacksize; --stacksize) {
595 /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
596 if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
597 PetscInt l, m = stack[stacksize].start, r;
598 PetscErrorCode ierr;
599 /* Search A for B[0] insertion */
600 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);CHKERRQ(ierr);
601 /* Search B for A[-1] insertion */
602 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);CHKERRQ(ierr);
603 if (m-l <= r-m) {
604 ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
605 ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
606 ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
607 } else {
608 ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
609 ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
610 ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
611 }
612 }
613 /* Update A with merge */
614 stack[stacksize-1].size += stack[stacksize].size;
615 }
616 PetscFunctionReturn(0);
617 }
618
PetscTimSortMergeCollapse_Private(char * arr,size_t size,CompFunc cmp,void * ctx,PetscTimSortBuffer * buff,PetscTimSortStack * stack,PetscInt * stacksize)619 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
620 {
621 PetscInt i = *stacksize;
622 PetscErrorCode ierr;
623
624 PetscFunctionBegin;
625 while (i) {
626 PetscInt l, m, r, itemp = i;
627
628 if (i == 1) {
629 /* A = stack[i-1], B = stack[i] */
630 if (stack[i-1].size < stack[i].size) {
631 /* if A[-1] <= B[0] then sorted */
632 if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
633 m = stack[i].start;
634 /* Search A for B[0] insertion */
635 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);CHKERRQ(ierr);
636 /* Search B for A[-1] insertion */
637 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
638 if (m-l <= r-m) {
639 ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
640 ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
641 } else {
642 ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
643 ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
644 }
645 }
646 /* Update A with merge */
647 stack[i-1].size += stack[i].size;
648 --i;
649 }
650 } else {
651 /* i > 2, i.e. C exists
652 A = stack[i-2], B = stack[i-1], C = stack[i]; */
653 if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
654 if (stack[i-2].size < stack[i].size) {
655 /* merge B into A, but if A[-1] <= B[0] then already sorted */
656 if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
657 m = stack[i-1].start;
658 /* Search A for B[0] insertion */
659 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);CHKERRQ(ierr);
660 /* Search B for A[-1] insertion */
661 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);CHKERRQ(ierr);
662 if (m-l <= r-m) {
663 ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
664 ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
665 } else {
666 ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
667 ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
668 }
669 }
670 /* Update A with merge */
671 stack[i-2].size += stack[i-1].size;
672 /* Push C up the stack */
673 stack[i-1].start = stack[i].start;
674 stack[i-1].size = stack[i].size;
675 } else {
676 /* merge C into B */
677 mergeBC:
678 /* If B[-1] <= C[0] then... you know the drill */
679 if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
680 m = stack[i].start;
681 /* Search B for C[0] insertion */
682 ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);CHKERRQ(ierr);
683 /* Search C for B[-1] insertion */
684 ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
685 if (m-l <= r-m) {
686 ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
687 ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
688 } else {
689 ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
690 ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
691 }
692 }
693 /* Update B with merge */
694 stack[i-1].size += stack[i].size;
695 }
696 --i;
697 } else if (stack[i-1].size <= stack[i].size) {
698 /* merge C into B */
699 goto mergeBC;
700 }
701 }
702 if (itemp == i) break;
703 }
704 *stacksize = i;
705 PetscFunctionReturn(0);
706 }
707
PetscTimSortMergeCollapseWithArray_Private(char * arr,size_t asize,char * barr,size_t bsize,CompFunc cmp,void * ctx,PetscTimSortBuffer * abuff,PetscTimSortBuffer * bbuff,PetscTimSortStack * stack,PetscInt * stacksize)708 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
709 {
710 PetscInt i = *stacksize;
711 PetscErrorCode ierr;
712
713 PetscFunctionBegin;
714 while (i) {
715 PetscInt l, m, r, itemp = i;
716
717 if (i == 1) {
718 /* A = stack[i-1], B = stack[i] */
719 if (stack[i-1].size < stack[i].size) {
720 /* if A[-1] <= B[0] then sorted */
721 if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
722 m = stack[i].start;
723 /* Search A for B[0] insertion */
724 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);CHKERRQ(ierr);
725 /* Search B for A[-1] insertion */
726 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
727 if (m-l <= r-m) {
728 ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
729 ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
730 ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
731 } else {
732 ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
733 ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
734 ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
735 }
736 }
737 /* Update A with merge */
738 stack[i-1].size += stack[i].size;
739 --i;
740 }
741 } else {
742 /* i > 2, i.e. C exists
743 A = stack[i-2], B = stack[i-1], C = stack[i]; */
744 if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
745 if (stack[i-2].size < stack[i].size) {
746 /* merge B into A, but if A[-1] <= B[0] then already sorted */
747 if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
748 m = stack[i-1].start;
749 /* Search A for B[0] insertion */
750 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);CHKERRQ(ierr);
751 /* Search B for A[-1] insertion */
752 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);CHKERRQ(ierr);
753 if (m-l <= r-m) {
754 ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
755 ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
756 ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
757 } else {
758 ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
759 ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
760 ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
761 }
762 }
763 /* Update A with merge */
764 stack[i-2].size += stack[i-1].size;
765 /* Push C up the stack */
766 stack[i-1].start = stack[i].start;
767 stack[i-1].size = stack[i].size;
768 } else {
769 /* merge C into B */
770 mergeBC:
771 /* If B[-1] <= C[0] then... you know the drill */
772 if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
773 m = stack[i].start;
774 /* Search B for C[0] insertion */
775 ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);CHKERRQ(ierr);
776 /* Search C for B[-1] insertion */
777 ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
778 if (m-l <= r-m) {
779 ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
780 ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
781 ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
782 } else {
783 ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
784 ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
785 ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
786 }
787 }
788 /* Update B with merge */
789 stack[i-1].size += stack[i].size;
790 }
791 --i;
792 } else if (stack[i-1].size <= stack[i].size) {
793 /* merge C into B */
794 goto mergeBC;
795 }
796 }
797 if (itemp == i) break;
798 }
799 *stacksize = i;
800 PetscFunctionReturn(0);
801 }
802
803 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
804 elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
805 binary insertion sort or regulat insertion sort */
PetscTimSortBuildRun_Private(char * arr,char * tarr,size_t size,CompFunc cmp,void * ctx,PetscInt n,PetscInt minrun,PetscInt runstart,PetscInt * runend)806 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
807 {
808 const PetscInt re = PetscMin(runstart+minrun, n-1);
809 PetscInt ri = runstart;
810
811 PetscFunctionBegin;
812 if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
813 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
814 if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
815 ++ri;
816 while (ri < n-1) {
817 if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
818 ++ri;
819 }
820 {
821 PetscInt lo = runstart, hi = ri;
822 for (; lo < hi; ++lo, --hi) {
823 COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
824 }
825 }
826 } else {
827 ++ri;
828 while (ri < n-1) {
829 if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
830 ++ri;
831 }
832 }
833 #if defined(PETSC_USE_DEBUG)
834 {
835 PetscErrorCode ierr;
836 ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
837 }
838 #endif
839 if (ri < re) {
840 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
841 binary search */
842 if (ri-runstart <= minrun >> 1) {
843 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
844 PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
845 } else {
846 PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
847 }
848 *runend = re;
849 } else *runend = ri;
850 PetscFunctionReturn(0);
851 }
852
PetscTimSortBuildRunWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,void * ctx,PetscInt n,PetscInt minrun,PetscInt runstart,PetscInt * runend)853 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
854 {
855 const PetscInt re = PetscMin(runstart+minrun, n-1);
856 PetscInt ri = runstart;
857
858 PetscFunctionBegin;
859 if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
860 /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
861 if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
862 ++ri;
863 while (ri < n-1) {
864 if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
865 ++ri;
866 }
867 {
868 PetscInt lo = runstart, hi = ri;
869 for (; lo < hi; ++lo, --hi) {
870 COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
871 }
872 }
873 } else {
874 ++ri;
875 while (ri < n-1) {
876 if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
877 ++ri;
878 }
879 }
880 #if defined(PETSC_USE_DEBUG)
881 {
882 PetscErrorCode ierr;
883 ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
884 }
885 #endif
886 if (ri < re) {
887 /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
888 binary search */
889 if (ri-runstart <= minrun >> 1) {
890 ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
891 PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
892 } else {
893 PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
894 }
895 *runend = re;
896 } else *runend = ri;
897 PetscFunctionReturn(0);
898 }
899
900 /*@C
901 PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
902
903 Not Collective
904
905 Input Parameters:
906 + n - number of values
907 . arr - array to be sorted
908 . size - size in bytes of the datatype held in arr
909 . cmp - function pointer to comparison function
910 - ctx - optional context to be passed to comparison function, NULL if not needed
911
912 Output Parameters:
913 . arr - sorted array
914
915 Notes:
916 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
917 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
918 select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
919 do so it repeatedly triggers attempts throughout to merge adjacent runs.
920
921 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
922 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
923 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
924 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
925 likely all contain similar data.
926
927 Sample usage:
928 The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
929 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
930 may also
931 change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
932 the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
933 order
934
935 .vb
936 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
937 my_type l = *(my_type *) left, r = *(my_type *) right;
938 return (l < r) ? -1 : (l > r);
939 }
940 .ve
941 Note the context is unused here but you may use it to pass and subsequently access whatever information required
942 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
943 Then pass the function
944 .vb
945 PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
946 .ve
947
948 Fortran Notes:
949 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
950 returns result. For example
951 .vb
952 subroutine CompareIntegers(left,right,ctx,result)
953 implicit none
954
955 PetscInt,intent(in) :: left, right
956 type(UserCtx) :: ctx
957 integer,intent(out) :: result
958
959 if (left < right) then
960 result = -1
961 else if (left == right) then
962 result = 0
963 else
964 result = 1
965 end if
966 return
967 end subroutine CompareIntegers
968 .ve
969
970 References:
971 1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
972
973 Level: developer
974
975 .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
976 @*/
PetscTimSort(PetscInt n,void * arr,size_t size,int (* cmp)(const void *,const void *,void *),void * ctx)977 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
978 {
979 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
980 PetscTimSortStack runstack[128];
981 PetscTimSortBuffer buff;
982 PetscErrorCode ierr;
983 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
984 It is so unlikely that this limit is reached that this is __never__ checked for */
985
986 PetscFunctionBegin;
987 /* Compute minrun. Minrun should be (32, 65) such that N/minrun
988 is a power of 2 or one plus a power of 2 */
989 {
990 PetscInt t = n, r = 0;
991 /* r becomes 1 if the least significant bits contain at least one off bit */
992 while (t >= 64) {
993 r |= t & 1;
994 t >>= 1;
995 }
996 minrun = t + r;
997 }
998 if (PetscDefined(USE_DEBUG)) {
999 ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1000 if (n < 64) {
1001 ierr = PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);CHKERRQ(ierr);
1002 } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1003 }
1004 ierr = PetscMalloc1((size_t) minrun*size, &buff.ptr);CHKERRQ(ierr);
1005 buff.size = (size_t) minrun*size;
1006 buff.maxsize = (size_t) n*size;
1007 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1008 while (runstart < n) {
1009 /* Check if additional entries are at least partially ordered and build natural run */
1010 ierr = PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1011 runstack[stacksize].start = runstart;
1012 runstack[stacksize].size = runend-runstart+1;
1013 ierr = PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);CHKERRQ(ierr);
1014 ++stacksize;
1015 runstart = runend+1;
1016 }
1017 /* Have been inside while, so discard last stacksize++ */
1018 --stacksize;
1019 ierr = PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);CHKERRQ(ierr);
1020 ierr = PetscFree(buff.ptr);CHKERRQ(ierr);
1021 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1022 PetscFunctionReturn(0);
1023 }
1024
1025 /*@C
1026 PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1027 reorders a second array to match the first. The arrays need not be the same type.
1028
1029 Not Collective
1030
1031 Input Parameters:
1032 + n - number of values
1033 . arr - array to be sorted
1034 . asize - size in bytes of the datatype held in arr
1035 . barr - array to be reordered
1036 . asize - size in bytes of the datatype held in barr
1037 . cmp - function pointer to comparison function
1038 - ctx - optional context to be passed to comparison function, NULL if not needed
1039
1040 Output Parameters:
1041 + arr - sorted array
1042 - barr - reordered array
1043
1044 Notes:
1045 The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1046 overlap.
1047
1048 Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1049 sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1050 to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1051 arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1052
1053 Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1054 the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1055 bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1056 searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1057 likely all contain similar data.
1058
1059 Sample usage:
1060 The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1061 between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1062 may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1063 guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1064 increasing order
1065
1066 .vb
1067 int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1068 my_type l = *(my_type *) left, r = *(my_type *) right;
1069 return (l < r) ? -1 : (l > r);
1070 }
1071 .ve
1072 Note the context is unused here but you may use it to pass and subsequently access whatever information required
1073 inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1074 Then pass the function
1075 .vb
1076 PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1077 .ve
1078
1079
1080 Fortran Notes:
1081 To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1082 returns result. For example
1083 .vb
1084 subroutine CompareIntegers(left,right,ctx,result)
1085 implicit none
1086
1087 PetscInt,intent(in) :: left, right
1088 type(UserCtx) :: ctx
1089 integer,intent(out) :: result
1090
1091 if (left < right) then
1092 result = -1
1093 else if (left == right) then
1094 result = 0
1095 else
1096 result = 1
1097 end if
1098 return
1099 end subroutine CompareIntegers
1100 .ve
1101
1102 References:
1103 1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1104
1105 Level: developer
1106
1107 .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1108 @*/
PetscTimSortWithArray(PetscInt n,void * arr,size_t asize,void * barr,size_t bsize,int (* cmp)(const void *,const void *,void *),void * ctx)1109 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1110 {
1111 PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
1112 PetscTimSortStack runstack[128];
1113 PetscTimSortBuffer abuff, bbuff;
1114 PetscErrorCode ierr;
1115 /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1116 It is so unlikely that this limit is reached that this is __never__ checked for */
1117
1118 PetscFunctionBegin;
1119 /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1120 is a power of 2 or one plus a power of 2 */
1121 {
1122 PetscInt t = n, r = 0;
1123 /* r becomes 1 if the least significant bits contain at least one off bit */
1124 while (t >= 64) {
1125 r |= t & 1;
1126 t >>= 1;
1127 }
1128 minrun = t + r;
1129 }
1130 if (PetscDefined(USE_DEBUG)) {
1131 ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1132 if (n < 64) {
1133 ierr = PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);CHKERRQ(ierr);
1134 } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1135 }
1136 ierr = PetscMalloc1((size_t) minrun*asize, &abuff.ptr);CHKERRQ(ierr);
1137 abuff.size = (size_t) minrun*asize;
1138 abuff.maxsize = (size_t) n*asize;
1139 ierr = PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);CHKERRQ(ierr);
1140 bbuff.size = (size_t) minrun*bsize;
1141 bbuff.maxsize = (size_t) n*bsize;
1142 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1143 while (runstart < n) {
1144 /* Check if additional entries are at least partially ordered and build natural run */
1145 ierr = PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1146 runstack[stacksize].start = runstart;
1147 runstack[stacksize].size = runend-runstart+1;
1148 ierr = PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);CHKERRQ(ierr);
1149 ++stacksize;
1150 runstart = runend+1;
1151 }
1152 /* Have been inside while, so discard last stacksize++ */
1153 --stacksize;
1154 ierr = PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);CHKERRQ(ierr);
1155 ierr = PetscFree(abuff.ptr);CHKERRQ(ierr);
1156 ierr = PetscFree(bbuff.ptr);CHKERRQ(ierr);
1157 MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1158 PetscFunctionReturn(0);
1159 }
1160
1161 /*@
1162 PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1163
1164 Not Collective
1165
1166 Input Parameters:
1167 + n - number of values
1168 - arr - array of integers
1169
1170 Output Parameters:
1171 . arr - sorted array of integers
1172
1173 Notes:
1174 If the array is less than 64 entries long PetscSortInt() is automatically used.
1175
1176 This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1177 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1178 recomended that the user benchmark their code to see which routine is fastest.
1179
1180 Level: intermediate
1181
1182 .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1183 @*/
PetscIntSortSemiOrdered(PetscInt n,PetscInt arr[])1184 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1185 {
1186 PetscErrorCode ierr;
1187 PetscFunctionBegin;
1188 if (n <= 1) PetscFunctionReturn(0);
1189 PetscValidIntPointer(arr,2);
1190 if (n < 64) {
1191 ierr = PetscSortInt(n, arr);CHKERRQ(ierr);
1192 } else {
1193 ierr = PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1194 }
1195 PetscFunctionReturn(0);
1196 }
1197
1198 /*@
1199 PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1200 array to match the first.
1201
1202 Not Collective
1203
1204 Input Parameters:
1205 + n - number of values
1206 . arr1 - array of integers to be sorted
1207 - arr2 - array of integers to be reordered
1208
1209 Output Parameters:
1210 + arr1 - sorted array of integers
1211 - arr2 - reordered array of integers
1212
1213 Notes:
1214 The arrays CANNOT overlap.
1215
1216 If the array to be sorted is less than 64 entries long PetscSortIntWithArray() is automatically used.
1217
1218 This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1219 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1220 recomended that the user benchmark their code to see which routine is fastest.
1221
1222 Level: intermediate
1223
1224 .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1225 @*/
PetscIntSortSemiOrderedWithArray(PetscInt n,PetscInt arr1[],PetscInt arr2[])1226 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1227 {
1228 PetscErrorCode ierr;
1229 PetscFunctionBegin;
1230 PetscValidIntPointer(arr1,2);
1231 PetscValidIntPointer(arr2,3);
1232 if (n == 1) PetscFunctionReturn(0);
1233 if (n < 64) {
1234 ierr = PetscSortIntWithArray(n, arr1, arr2);CHKERRQ(ierr);
1235 } else {
1236 ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1237 }
1238 PetscFunctionReturn(0);
1239 }
1240
1241 /*@
1242 PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1243
1244 Not Collective
1245
1246 Input Parameters:
1247 + n - number of values
1248 - arr - array of PetscMPIInts
1249
1250 Output Parameters:
1251 . arr - sorted array of integers
1252
1253 Notes:
1254 If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1255
1256 This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1257 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1258 recomended that the user benchmark their code to see which routine is fastest.
1259
1260 Level: intermediate
1261
1262 .seealso: PetscTimSort(), PetscSortMPIInt()
1263 @*/
PetscMPIIntSortSemiOrdered(PetscInt n,PetscMPIInt arr[])1264 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1265 {
1266 PetscErrorCode ierr;
1267
1268 PetscFunctionBegin;
1269 PetscValidIntPointer(arr,2);
1270 if (n == 1) PetscFunctionReturn(0);
1271 if (n < 64) {
1272 ierr = PetscSortMPIInt(n, arr);CHKERRQ(ierr);
1273 } else {
1274 ierr = PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1275 }
1276 PetscFunctionReturn(0);
1277 }
1278
1279 /*@
1280 PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1281 array to match the first.
1282
1283 Not Collective
1284
1285 Input Parameters:
1286 + n - number of values
1287 . arr1 - array of integers to be sorted
1288 - arr2 - array of integers to be reordered
1289
1290 Output Parameters:
1291 + arr1 - sorted array of integers
1292 - arr2 - reordered array of integers
1293
1294 Notes:
1295 The arrays CANNOT overlap.
1296
1297 If the array to be sorted is less than 64 entries long PetscSortMPIIntWithArray() is automatically used.
1298
1299 This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1300 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1301 recomended that the user benchmark their code to see which routine is fastest.
1302
1303 Level: intermediate
1304
1305 .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1306 @*/
PetscMPIIntSortSemiOrderedWithArray(PetscInt n,PetscMPIInt arr1[],PetscMPIInt arr2[])1307 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1308 {
1309 PetscErrorCode ierr;
1310 PetscFunctionBegin;
1311 if (n <= 1) PetscFunctionReturn(0);
1312 PetscValidIntPointer(arr1,2);
1313 PetscValidIntPointer(arr2,3);
1314 if (n < 64) {
1315 ierr = PetscSortMPIIntWithArray(n, arr1, arr2);CHKERRQ(ierr);
1316 } else {
1317 ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1318 }
1319 PetscFunctionReturn(0);
1320 }
1321
1322 /*@
1323 PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1324
1325 Not Collective
1326
1327 Input Parameters:
1328 + n - number of values
1329 - arr - array of PetscReals
1330
1331 Output Parameters:
1332 . arr - sorted array of integers
1333
1334 Notes:
1335 If the array is less than 64 entries long PetscSortReal() is automatically used.
1336
1337 This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1338 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1339 recomended that the user benchmark their code to see which routine is fastest.
1340
1341 Level: intermediate
1342
1343 .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1344 @*/
PetscRealSortSemiOrdered(PetscInt n,PetscReal arr[])1345 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1346 {
1347 PetscErrorCode ierr;
1348
1349 PetscFunctionBegin;
1350 if (n <= 1) PetscFunctionReturn(0);
1351 PetscValidRealPointer(arr,2);
1352 if (n < 64) {
1353 ierr = PetscSortReal(n, arr);CHKERRQ(ierr);
1354 } else {
1355 ierr = PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1356 }
1357 PetscFunctionReturn(0);
1358 }
1359
1360 /*@
1361 PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1362 array of PetscInts to match the first.
1363
1364 Not Collective
1365
1366 Input Parameters:
1367 + n - number of values
1368 . arr1 - array of PetscReals to be sorted
1369 - arr2 - array of PetscReals to be reordered
1370
1371 Output Parameters:
1372 + arr1 - sorted array of PetscReals
1373 - arr2 - reordered array of PetscInts
1374
1375 Notes:
1376 If the array to be sorted is less than 64 entries long PetscSortRealWithArrayInt() is automatically used.
1377
1378 This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1379 significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1380 recomended that the user benchmark their code to see which routine is fastest.
1381
1382 Level: intermediate
1383
1384 .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1385 @*/
PetscRealSortSemiOrderedWithArrayInt(PetscInt n,PetscReal arr1[],PetscInt arr2[])1386 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1387 {
1388 PetscErrorCode ierr;
1389 PetscFunctionBegin;
1390 if (n <= 1) PetscFunctionReturn(0);
1391 PetscValidRealPointer(arr1,2);
1392 PetscValidIntPointer(arr2,3);
1393 if (n < 64) {
1394 ierr = PetscSortRealWithArrayInt(n, arr1, arr2);CHKERRQ(ierr);
1395 } else {
1396 ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1397 }
1398 PetscFunctionReturn(0);
1399 }
1400