1 #include <petscsys.h>                /*I  "petscsys.h"  I*/
2 #include <petsc/private/petscimpl.h>
3 
Compare_PetscMPIInt_Private(const void * left,const void * right,PETSC_UNUSED void * ctx)4 PETSC_STATIC_INLINE int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5 {
6   PetscMPIInt l = *(PetscMPIInt *) left, r = *(PetscMPIInt *) right;
7   return (l < r) ? -1 : (l > r);
8 }
9 
Compare_PetscInt_Private(const void * left,const void * right,PETSC_UNUSED void * ctx)10 PETSC_STATIC_INLINE int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11 {
12   PetscInt l = *(PetscInt *) left, r = *(PetscInt *) right;
13   return (l < r) ? -1 : (l > r);
14 }
15 
Compare_PetscReal_Private(const void * left,const void * right,PETSC_UNUSED void * ctx)16 PETSC_STATIC_INLINE int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17 {
18   PetscReal l = *(PetscReal *) left, r = *(PetscReal *) right;
19   return (l < r) ? -1 : (l > r);
20 }
21 
22 #define MIN_GALLOP_CONST_GLOBAL 8
23 static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24 typedef int (*CompFunc)(const void *, const void *, void *);
25 
26 /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27 #if !defined (PETSC_USE_DEBUG) && defined(__GNUC__)
COPYSWAPPY(char * a,char * b,char * t,size_t size)28 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29 {
30   __builtin_memcpy(t, b, size);
31   __builtin_memmove(b, a, size);
32   __builtin_memcpy(a, t, size);
33   return;
34 }
35 
COPYSWAPPY2(char * aa,char * ab,size_t asize,char * ba,char * bb,size_t bsize,char * t)36 PETSC_STATIC_INLINE void COPYSWAPPY2(char *aa, char *ab, size_t asize, char *ba, char *bb, size_t bsize, char *t)
37 {
38   __builtin_memcpy(t, ab, asize);
39   __builtin_memmove(ab, aa, asize);
40   __builtin_memcpy(aa, t, asize);
41   __builtin_memcpy(t, bb, bsize);
42   __builtin_memmove(bb, ba, bsize);
43   __builtin_memcpy(ba, t, bsize);
44   return;
45 }
46 
Petsc_memcpy(char * dest,const char * src,size_t size)47 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
48 {
49   __builtin_memcpy(dest, src, size);
50   return;
51 }
52 
Petsc_memcpy2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)53 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54 {
55   __builtin_memcpy(adest, asrc, asize);
56   __builtin_memcpy(bdest, asrc, bsize);
57   return;
58 }
59 
Petsc_memmove(char * dest,const char * src,size_t size)60 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
61 {
62   __builtin_memmove(dest, src, size);
63   return;
64 }
65 
Petsc_memmove2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)66 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67 {
68   __builtin_memmove(adest, asrc, asize);
69   __builtin_memmove(bdest, bsrc, bsize);
70   return;
71 }
72 # else
COPYSWAPPY(char * a,char * b,char * t,size_t size)73 PETSC_STATIC_INLINE void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74 {
75   PetscErrorCode ierr;
76   PetscFunctionBegin;
77   ierr = PetscMemcpy(t, b, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
78   ierr = PetscMemmove(b, a, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
79   ierr = PetscMemcpy(a, t, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
80   PetscFunctionReturnVoid();
81 }
82 
COPYSWAPPY2(char * aa,char * ab,size_t asize,char * ba,char * bb,size_t bsize,char * t)83 PETSC_STATIC_INLINE void COPYSWAPPY2(char *aa, char *ab, size_t asize, char *ba, char *bb, size_t bsize, char *t)
84 {
85   PetscErrorCode ierr;
86   PetscFunctionBegin;
87   ierr = PetscMemcpy(t, ab, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
88   ierr = PetscMemmove(ab, aa, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
89   ierr = PetscMemcpy(aa, t, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
90   ierr = PetscMemcpy(t, bb, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
91   ierr = PetscMemmove(bb, ba, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
92   ierr = PetscMemcpy(ba, t, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
93   PetscFunctionReturnVoid();
94 }
95 
Petsc_memcpy(char * dest,const char * src,size_t size)96 PETSC_STATIC_INLINE void Petsc_memcpy(char *dest, const char *src, size_t size)
97 {
98   PetscErrorCode ierr;
99   PetscFunctionBegin;
100   ierr = PetscMemcpy(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
101   PetscFunctionReturnVoid();
102 }
103 
Petsc_memcpy2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)104 PETSC_STATIC_INLINE void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
105 {
106   PetscErrorCode ierr;
107   PetscFunctionBegin;
108   ierr = PetscMemcpy(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
109   ierr = PetscMemcpy(bdest, asrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
110   PetscFunctionReturnVoid();
111 }
112 
Petsc_memmove(char * dest,const char * src,size_t size)113 PETSC_STATIC_INLINE void Petsc_memmove(char *dest, const char *src, size_t size)
114 {
115   PetscErrorCode ierr;
116   PetscFunctionBegin;
117   ierr = PetscMemmove(dest, src, size);CHKERRABORT(PETSC_COMM_SELF,ierr);
118   PetscFunctionReturnVoid();
119 }
120 
Petsc_memmove2(char * adest,const char * asrc,size_t asize,char * bdest,const char * bsrc,size_t bsize)121 PETSC_STATIC_INLINE void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
122 {
123   PetscErrorCode ierr;
124   PetscFunctionBegin;
125   ierr = PetscMemmove(adest, asrc, asize);CHKERRABORT(PETSC_COMM_SELF,ierr);
126   ierr = PetscMemmove(bdest, bsrc, bsize);CHKERRABORT(PETSC_COMM_SELF,ierr);
127   PetscFunctionReturnVoid();
128 }
129 #endif
130 
131 /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
132  x. Output also inclusive.
133 
134  NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
135 
136    {0,1,2,3,4,5}
137 
138    when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
139   */
PetscGallopSearchLeft_Private(const char * arr,size_t size,CompFunc cmp,void * ctx,PetscInt l,PetscInt r,const char * x,PetscInt * m)140 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
141 {
142   PetscInt last = l, k = 1, mid, cur = l+1;
143 
144   PetscFunctionBegin;
145   *m = l;
146   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchLeft",r,l);
147   if ((*cmp)(x, arr+r*size, ctx) >= 0) {*m = r; PetscFunctionReturn(0);}
148   if ((*cmp)(x, (arr)+l*size, ctx) < 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
149   while (PETSC_TRUE) {
150     if (PetscUnlikely(cur > r)) {cur = r; break;}
151     if ((*cmp)(x, (arr)+cur*size, ctx) < 0) break;
152     last = cur;
153     cur += (k <<= 1) + 1;
154     ++k;
155   }
156   /* standard binary search but take last 0 mid 0 cur 1 into account*/
157   while (cur > last + 1) {
158     mid = last + ((cur - last) >> 1);
159     if ((*cmp)(x, (arr)+mid*size, ctx) < 0) {
160       cur = mid;
161     } else {
162       last = mid;
163     }
164   }
165   *m = cur;
166   PetscFunctionReturn(0);
167 }
168 
169 /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170  < x. Output also inclusive */
PetscGallopSearchRight_Private(const char * arr,size_t size,CompFunc cmp,void * ctx,PetscInt l,PetscInt r,const char * x,PetscInt * m)171 PETSC_STATIC_INLINE PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172 {
173   PetscInt last = r, k = 1, mid, cur = r-1;
174 
175   PetscFunctionBegin;
176   *m = r;
177   if (PetscUnlikelyDebug(r < l)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"r %D < l %D in PetscGallopSearchRight",r,l);
178   if ((*cmp)(x, arr+l*size, ctx) <= 0) {*m = l; PetscFunctionReturn(0);}
179   if ((*cmp)(x, (arr)+r*size, ctx) > 0 || PetscUnlikely(!(r-l))) PetscFunctionReturn(0);
180   while (PETSC_TRUE) {
181     if (PetscUnlikely(cur < l)) {cur = l; break;}
182     if ((*cmp)(x, (arr)+cur*size, ctx) > 0) break;
183     last = cur;
184     cur -= (k <<= 1) + 1;
185     ++k;
186   }
187   /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
188   while (last > cur + 1) {
189     mid = last - ((last - cur) >> 1);
190     if ((*cmp)(x, (arr)+mid*size, ctx) > 0) {
191       cur = mid;
192     } else {
193       last = mid;
194     }
195   }
196   *m = cur;
197   PetscFunctionReturn(0);
198 }
199 
200 /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
201  complete array, left is first index of left array, mid is first index of right array, right is last index of right
202  array */
PetscTimSortMergeLo_Private(char * arr,char * tarr,size_t size,CompFunc cmp,void * ctx,PetscInt left,PetscInt mid,PetscInt right)203 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
204 {
205   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
206   const PetscInt llen = mid-left;
207   PetscErrorCode ierr;
208 
209   PetscFunctionBegin;
210   Petsc_memcpy(tarr, arr+(left*size), llen*size);
211   while ((i < llen) && (j <= right)) {
212     if ((*cmp)(tarr+(i*size), arr+(j*size), ctx) < 0) {
213       Petsc_memcpy(arr+(k*size), tarr+(i*size), size);
214       ++k;
215       gallopright = 0;
216       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
217         PetscInt l1, l2, diff1, diff2;
218         do {
219           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
220           /* search temp for right[j], can move up to that of temp into arr immediately */
221           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
222           diff1 = l1-i;
223           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
224           k += diff1;
225           i = l1;
226           /* search right for temp[i], can move up to that many of right into arr */
227           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
228           diff2 = l2-j;
229           Petsc_memmove((arr)+k*size, (arr)+j*size, diff2*size);
230           k += diff2;
231           j = l2;
232           if (i >= llen || j > right) break;
233         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
234         ++MIN_GALLOP_GLOBAL;
235       }
236     } else {
237       Petsc_memmove(arr+(k*size), arr+(j*size), size);
238       ++k;
239       gallopleft = 0;
240       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
241         PetscInt l1, l2, diff1, diff2;
242         do {
243           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
244           /* search right for temp[i], can move up to that many of right into arr */
245           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr+(i*size), &l2);CHKERRQ(ierr);
246           diff2 = l2-j;
247           Petsc_memmove(arr+(k*size), arr+(j*size), diff2*size);
248           k += diff2;
249           j = l2;
250           /* search temp for right[j], can copy up to that of temp into arr immediately */
251           ierr = PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen-1, arr+(j*size), &l1);CHKERRQ(ierr);
252           diff1 = l1-i;
253           Petsc_memcpy(arr+(k*size), tarr+(i*size), diff1*size);
254           k += diff1;
255           i = l1;
256           if (i >= llen || j > right) break;
257         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
258         ++MIN_GALLOP_GLOBAL;
259       }
260     }
261   }
262   if (i<llen) {Petsc_memcpy(arr+(k*size), tarr+(i*size), (llen-i)*size);}
263   PetscFunctionReturn(0);
264 }
265 
PetscTimSortMergeLoWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,void * ctx,PetscInt left,PetscInt mid,PetscInt right)266 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
267 {
268   PetscInt       i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
269   const PetscInt llen = mid-left;
270   PetscErrorCode ierr;
271 
272   PetscFunctionBegin;
273   Petsc_memcpy2(atarr, arr+(left*asize), llen*asize, btarr, barr+(left*bsize), llen*bsize);
274   while ((i < llen) && (j <= right)) {
275     if ((*cmp)(atarr+(i*asize), arr+(j*asize), ctx) < 0) {
276       Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), asize, barr+(k*bsize), btarr+(i*bsize), bsize);
277       ++k;
278       gallopright = 0;
279       if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
280         PetscInt l1, l2, diff1, diff2;
281         do {
282           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
283           /* search temp for right[j], can move up to that of temp into arr immediately */
284           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
285           diff1 = l1-i;
286           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
287           k += diff1;
288           i = l1;
289           /* search right for temp[i], can move up to that many of right into arr */
290           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
291           diff2 = l2-j;
292           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
293           k += diff2;
294           j = l2;
295           if (i >= llen || j > right) break;
296         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
297         ++MIN_GALLOP_GLOBAL;
298       }
299     } else {
300       Petsc_memmove2(arr+(k*asize), arr+(j*asize), asize, barr+(k*bsize), barr+(j*bsize), bsize);
301       ++k;
302       gallopleft = 0;
303       if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
304         PetscInt l1, l2, diff1, diff2;
305         do {
306           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
307           /* search right for temp[i], can move up to that many of right into arr */
308           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr+(i*asize), &l2);CHKERRQ(ierr);
309           diff2 = l2-j;
310           Petsc_memmove2(arr+(k*asize), arr+(j*asize), diff2*asize, barr+(k*bsize), barr+(j*bsize), diff2*bsize);
311           k += diff2;
312           j = l2;
313           /* search temp for right[j], can copy up to that of temp into arr immediately */
314           ierr = PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen-1, arr+(j*asize), &l1);CHKERRQ(ierr);
315           diff1 = l1-i;
316           Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), diff1*asize, barr+(k*bsize), btarr+(i*bsize), diff1*bsize);
317           k += diff1;
318           i = l1;
319           if (i >= llen || j > right) break;
320         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
321         ++MIN_GALLOP_GLOBAL;
322       }
323     }
324   }
325   if (i<llen) {Petsc_memcpy2(arr+(k*asize), atarr+(i*asize), (llen-i)*asize, barr+(k*bsize), btarr+(i*bsize), (llen-i)*bsize);}
326   PetscFunctionReturn(0);
327 }
328 
329 /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
330  complete array, left is first index of left array, mid is first index of right array, right is last index of right
331  array */
PetscTimSortMergeHi_Private(char * arr,char * tarr,size_t size,CompFunc cmp,void * ctx,PetscInt left,PetscInt mid,PetscInt right)332 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
333 {
334   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
335   const PetscInt rlen = right-mid+1;
336   PetscErrorCode ierr;
337 
338   PetscFunctionBegin;
339   Petsc_memcpy(tarr, (arr)+mid*size, rlen*size);
340   while ((i >= 0) && (j >= left)) {
341     if ((*cmp)((tarr)+i*size, (arr)+j*size, ctx) > 0) {
342       Petsc_memcpy((arr)+k*size, (tarr)+i*size, size);
343       --k;
344       gallopleft = 0;
345       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
346         PetscInt l1, l2, diff1, diff2;
347         do {
348           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
349           /* search temp for left[j], can copy up to that many of temp into arr */
350           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
351           diff1 = i-l1;
352           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
353           k -= diff1;
354           i = l1;
355           /* search left for temp[i], can move up to that many of left up arr */
356           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
357           diff2 = j-l2;
358           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
359           k -= diff2;
360           j = l2;
361           if (i < 0 || j < left) break;
362         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
363         ++MIN_GALLOP_GLOBAL;
364       }
365     } else {
366       Petsc_memmove((arr)+k*size, (arr)+j*size, size);
367       --k;
368       gallopright = 0;
369       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
370         PetscInt l1, l2, diff1, diff2;
371         do {
372           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
373           /* search left for temp[i], can move up to that many of left up arr */
374           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr)+i*size, &l2);CHKERRQ(ierr);
375           diff2 = j-l2;
376           Petsc_memmove((arr)+(k-diff2+1)*size, (arr)+(l2+1)*size, diff2*size);
377           k -= diff2;
378           j = l2;
379           /* search temp for left[j], can copy up to that many of temp into arr */
380           ierr = PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr)+j*size, &l1);CHKERRQ(ierr);
381           diff1 = i-l1;
382           Petsc_memcpy((arr)+(k-diff1+1)*size, (tarr)+(l1+1)*size, diff1*size);
383           k -= diff1;
384           i = l1;
385           if (i < 0 || j < left) break;
386         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
387         ++MIN_GALLOP_GLOBAL;
388       }
389     }
390   }
391   if (i >= 0) {Petsc_memcpy((arr)+left*size, tarr, (i+1)*size);}
392   PetscFunctionReturn(0);
393 }
394 
PetscTimSortMergeHiWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,void * ctx,PetscInt left,PetscInt mid,PetscInt right)395 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
396 {
397   PetscInt       i = right-mid, j = mid-1, k = right, gallopleft = 0, gallopright = 0;
398   const PetscInt rlen = right-mid+1;
399   PetscErrorCode ierr;
400 
401   PetscFunctionBegin;
402   Petsc_memcpy2(atarr, (arr)+mid*asize, rlen*asize, btarr, (barr)+mid*bsize, rlen*bsize);
403   while ((i >= 0) && (j >= left)) {
404     if ((*cmp)((atarr)+i*asize, (arr)+j*asize, ctx) > 0) {
405       Petsc_memcpy2((arr)+k*asize, (atarr)+i*asize, asize, (barr)+k*bsize, (btarr)+i*bsize, bsize);
406       --k;
407       gallopleft = 0;
408       if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
409         PetscInt l1, l2, diff1, diff2;
410         do {
411           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
412           /* search temp for left[j], can copy up to that many of temp into arr */
413           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
414           diff1 = i-l1;
415           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
416           k -= diff1;
417           i = l1;
418           /* search left for temp[i], can move up to that many of left up arr */
419           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
420           diff2 = j-l2;
421           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
422           k -= diff2;
423           j = l2;
424           if (i < 0 || j < left) break;
425         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
426         ++MIN_GALLOP_GLOBAL;
427       }
428     } else {
429       Petsc_memmove2((arr)+k*asize, (arr)+j*asize, asize, (barr)+k*bsize, (barr)+j*bsize, bsize);
430       --k;
431       gallopright = 0;
432       if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
433         PetscInt l1, l2, diff1, diff2;
434         do {
435           if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
436           /* search left for temp[i], can move up to that many of left up arr */
437           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr)+i*asize, &l2);CHKERRQ(ierr);
438           diff2 = j-l2;
439           Petsc_memmove2((arr)+(k-diff2+1)*asize, (arr)+(l2+1)*asize, diff2*asize, (barr)+(k-diff2+1)*bsize, (barr)+(l2+1)*bsize, diff2*bsize);
440           k -= diff2;
441           j = l2;
442           /* search temp for left[j], can copy up to that many of temp into arr */
443           ierr = PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr)+j*asize, &l1);CHKERRQ(ierr);
444           diff1 = i-l1;
445           Petsc_memcpy2((arr)+(k-diff1+1)*asize, (atarr)+(l1+1)*asize, diff1*asize, (barr)+(k-diff1+1)*bsize, (btarr)+(l1+1)*bsize, diff1*bsize);
446           k -= diff1;
447           i = l1;
448           if (i < 0 || j < left) break;
449         } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
450         ++MIN_GALLOP_GLOBAL;
451       }
452     }
453   }
454   if (i >= 0) {Petsc_memcpy2((arr)+left*asize, atarr, (i+1)*asize, (barr)+left*bsize, btarr, (i+1)*bsize);}
455   PetscFunctionReturn(0);
456 }
457 
458 /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
459  bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
PetscInsertionSort_Private(char * arr,char * tarr,size_t size,CompFunc cmp,void * ctx,PetscInt left,PetscInt start,PetscInt right)460 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
461 {
462   PetscInt i = start == left ? start+1 : start;
463 
464   PetscFunctionBegin;
465   for (; i <= right; ++i) {
466     PetscInt j = i-1;
467     Petsc_memcpy(tarr, arr+(i*size), size);
468     while ((j >= left) && ((*cmp)(tarr, (arr)+j*size, ctx) < 0)) {
469       COPYSWAPPY(arr+(j+1)*size, arr+j*size, tarr+size, size);
470       --j;
471     }
472     Petsc_memcpy((arr)+(j+1)*size, tarr, size);
473   }
474   PetscFunctionReturn(0);
475 }
476 
PetscInsertionSortWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,void * ctx,PetscInt left,PetscInt start,PetscInt right)477 PETSC_STATIC_INLINE PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
478 {
479   PetscInt i = start == left ? start+1 : start;
480 
481   PetscFunctionBegin;
482   for (; i <= right; ++i) {
483     PetscInt j = i-1;
484     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
485     while ((j >= left) && ((*cmp)(atarr, arr+(j*asize), ctx) < 0)) {
486       COPYSWAPPY2(arr+(j+1)*asize, arr+j*asize, asize, barr+(j+1)*bsize, barr+j*bsize, bsize, atarr+asize);
487       --j;
488     }
489     Petsc_memcpy2(arr+(j+1)*asize, atarr, asize, barr+(j+1)*bsize, btarr, bsize);
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 /* See PetscInsertionSort_Private */
PetscBinaryInsertionSort_Private(char * arr,char * tarr,size_t size,CompFunc cmp,void * ctx,PetscInt left,PetscInt start,PetscInt right)495 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
496 {
497   PetscInt i = start == left ? start+1 : start;
498 
499   PetscFunctionBegin;
500   for (; i <= right; ++i) {
501     PetscInt l = left, r = i;
502     Petsc_memcpy(tarr, arr+(i*size), size);
503     do {
504       const PetscInt m = l + ((r - l) >> 1);
505       if ((*cmp)(tarr, arr+(m*size), ctx) < 0) {
506         r = m;
507       } else {
508         l = m + 1;
509       }
510     } while (l < r);
511     Petsc_memmove(arr+((l+1)*size), arr+(l*size), (i-l)*size);
512     Petsc_memcpy(arr+(l*size), tarr, size);
513   }
514   PetscFunctionReturn(0);
515 }
516 
PetscBinaryInsertionSortWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,void * ctx,PetscInt left,PetscInt start,PetscInt right)517 PETSC_STATIC_INLINE PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
518 {
519   PetscInt i = start == left ? start+1 : start;
520 
521   PetscFunctionBegin;
522   for (; i <= right; ++i) {
523     PetscInt l = left, r = i;
524     Petsc_memcpy2(atarr, arr+(i*asize), asize, btarr, barr+(i*bsize), bsize);
525     do {
526       const PetscInt m = l + ((r - l) >> 1);
527       if ((*cmp)(atarr, arr+(m*asize), ctx) < 0) {
528         r = m;
529       } else {
530         l = m + 1;
531       }
532     } while (l < r);
533     Petsc_memmove2(arr+((l+1)*asize), arr+(l*asize), (i-l)*asize, barr+((l+1)*bsize), barr+(l*bsize), (i-l)*bsize);
534     Petsc_memcpy2(arr+(l*asize), atarr, asize, barr+(l*bsize), btarr, bsize);
535   }
536   PetscFunctionReturn(0);
537 }
538 
539 typedef struct {
540   PetscInt size;
541   PetscInt start;
542 } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2*sizeof(PetscInt));
543 
544 typedef struct {
545   char   *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
546   size_t size;
547   size_t maxsize;
548 } PetscTimSortBuffer;
549 
PetscTimSortResizeBuffer_Private(PetscTimSortBuffer * buff,size_t newSize)550 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
551 {
552   PetscFunctionBegin;
553   if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(0);
554   {
555     /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
556     PetscErrorCode ierr;
557     size_t         newMax = PetscMin(newSize*newSize, buff->maxsize);
558     ierr = PetscFree(buff->ptr);CHKERRQ(ierr);
559     ierr = PetscMalloc1(newMax, &buff->ptr);CHKERRQ(ierr);
560     buff->size = newMax;
561   }
562   PetscFunctionReturn(0);
563 }
564 
PetscTimSortForceCollapse_Private(char * arr,size_t size,CompFunc cmp,void * ctx,PetscTimSortBuffer * buff,PetscTimSortStack * stack,PetscInt stacksize)565 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
566 {
567   PetscFunctionBegin;
568   for (;stacksize; --stacksize) {
569     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
570     if ((*cmp)(arr+(stack[stacksize].start-1)*size, arr+(stack[stacksize].start)*size, ctx) > 0) {
571       PetscInt       l, m = stack[stacksize].start, r;
572       PetscErrorCode ierr;
573       /* Search A for B[0] insertion */
574       ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*size, &l);CHKERRQ(ierr);
575       /* Search B for A[-1] insertion */
576       ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*size, &r);CHKERRQ(ierr);
577       if (m-l <= r-m) {
578         ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
579         ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
580       } else {
581         ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
582         ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
583       }
584     }
585     /* Update A with merge */
586     stack[stacksize-1].size += stack[stacksize].size;
587   }
588   PetscFunctionReturn(0);
589 }
590 
PetscTimSortForceCollapseWithArray_Private(char * arr,size_t asize,char * barr,size_t bsize,CompFunc cmp,void * ctx,PetscTimSortBuffer * abuff,PetscTimSortBuffer * bbuff,PetscTimSortStack * stack,PetscInt stacksize)591 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
592 {
593   PetscFunctionBegin;
594   for (;stacksize; --stacksize) {
595     /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
596     if ((*cmp)(arr+(stack[stacksize].start-1)*asize, arr+(stack[stacksize].start)*asize, ctx) > 0) {
597       PetscInt       l, m = stack[stacksize].start, r;
598       PetscErrorCode ierr;
599       /* Search A for B[0] insertion */
600       ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize-1].start, stack[stacksize].start-1, (arr)+(stack[stacksize].start)*asize, &l);CHKERRQ(ierr);
601       /* Search B for A[-1] insertion */
602       ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start+stack[stacksize].size-1, (arr)+(stack[stacksize].start-1)*asize, &r);CHKERRQ(ierr);
603       if (m-l <= r-m) {
604         ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
605         ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
606         ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
607       } else {
608         ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
609         ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
610         ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
611       }
612     }
613     /* Update A with merge */
614     stack[stacksize-1].size += stack[stacksize].size;
615   }
616   PetscFunctionReturn(0);
617 }
618 
PetscTimSortMergeCollapse_Private(char * arr,size_t size,CompFunc cmp,void * ctx,PetscTimSortBuffer * buff,PetscTimSortStack * stack,PetscInt * stacksize)619 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
620 {
621   PetscInt       i = *stacksize;
622   PetscErrorCode ierr;
623 
624   PetscFunctionBegin;
625   while (i) {
626     PetscInt l, m, r, itemp = i;
627 
628     if (i == 1) {
629       /* A = stack[i-1], B = stack[i] */
630       if (stack[i-1].size < stack[i].size) {
631         /* if A[-1] <= B[0] then sorted */
632         if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
633           m = stack[i].start;
634           /* Search A for B[0] insertion */
635           ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*size, &l);CHKERRQ(ierr);
636           /* Search B for A[-1] insertion */
637           ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
638           if (m-l <= r-m) {
639             ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
640             ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
641           } else {
642             ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
643             ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
644           }
645         }
646         /* Update A with merge */
647         stack[i-1].size += stack[i].size;
648         --i;
649       }
650     } else {
651       /* i > 2, i.e. C exists
652        A = stack[i-2], B = stack[i-1], C = stack[i]; */
653       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
654         if (stack[i-2].size < stack[i].size) {
655           /* merge B into A, but if A[-1] <= B[0] then already sorted */
656           if ((*cmp)(arr+(stack[i-1].start-1)*size, arr+(stack[i-1].start)*size, ctx) > 0) {
657             m = stack[i-1].start;
658             /* Search A for B[0] insertion */
659             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*size, &l);CHKERRQ(ierr);
660             /* Search B for A[-1] insertion */
661             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*size, &r);CHKERRQ(ierr);
662             if (m-l <= r-m) {
663               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
664               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
665             } else {
666               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
667               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
668             }
669           }
670           /* Update A with merge */
671           stack[i-2].size += stack[i-1].size;
672           /* Push C up the stack */
673           stack[i-1].start = stack[i].start;
674           stack[i-1].size = stack[i].size;
675         } else {
676           /* merge C into B */
677           mergeBC:
678           /* If B[-1] <= C[0] then... you know the drill */
679           if ((*cmp)(arr+(stack[i].start-1)*size, arr+(stack[i].start)*size, ctx) > 0) {
680             m = stack[i].start;
681             /* Search B for C[0] insertion */
682             ierr = PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*size, &l);CHKERRQ(ierr);
683             /* Search C for B[-1] insertion */
684             ierr = PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*size, &r);CHKERRQ(ierr);
685             if (m-l <= r-m) {
686               ierr = PetscTimSortResizeBuffer_Private(buff, (m-l+1)*size);CHKERRQ(ierr);
687               ierr = PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
688             } else {
689               ierr = PetscTimSortResizeBuffer_Private(buff, (r-m+1)*size);CHKERRQ(ierr);
690               ierr = PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r);CHKERRQ(ierr);
691             }
692           }
693           /* Update B with merge */
694           stack[i-1].size += stack[i].size;
695         }
696         --i;
697       } else if (stack[i-1].size <= stack[i].size) {
698         /* merge C into B */
699         goto mergeBC;
700       }
701     }
702     if (itemp == i) break;
703   }
704   *stacksize = i;
705   PetscFunctionReturn(0);
706 }
707 
PetscTimSortMergeCollapseWithArray_Private(char * arr,size_t asize,char * barr,size_t bsize,CompFunc cmp,void * ctx,PetscTimSortBuffer * abuff,PetscTimSortBuffer * bbuff,PetscTimSortStack * stack,PetscInt * stacksize)708 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
709 {
710   PetscInt       i = *stacksize;
711   PetscErrorCode ierr;
712 
713   PetscFunctionBegin;
714   while (i) {
715     PetscInt l, m, r, itemp = i;
716 
717     if (i == 1) {
718       /* A = stack[i-1], B = stack[i] */
719       if (stack[i-1].size < stack[i].size) {
720         /* if A[-1] <= B[0] then sorted */
721         if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
722           m = stack[i].start;
723           /* Search A for B[0] insertion */
724           ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, (arr)+stack[i].start*asize, &l);CHKERRQ(ierr);
725           /* Search B for A[-1] insertion */
726           ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, arr+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
727           if (m-l <= r-m) {
728             ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
729             ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
730             ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
731           } else {
732             ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
733             ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
734             ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
735           }
736         }
737         /* Update A with merge */
738         stack[i-1].size += stack[i].size;
739         --i;
740       }
741     } else {
742       /* i > 2, i.e. C exists
743        A = stack[i-2], B = stack[i-1], C = stack[i]; */
744       if (stack[i-2].size <= stack[i-1].size+stack[i].size) {
745         if (stack[i-2].size < stack[i].size) {
746           /* merge B into A, but if A[-1] <= B[0] then already sorted */
747           if ((*cmp)(arr+(stack[i-1].start-1)*asize, arr+(stack[i-1].start)*asize, ctx) > 0) {
748             m = stack[i-1].start;
749             /* Search A for B[0] insertion */
750             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-2].start, stack[i-1].start-1, (arr)+(stack[i-1].start)*asize, &l);CHKERRQ(ierr);
751             /* Search B for A[-1] insertion */
752             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i-1].start+stack[i-1].size-1, (arr)+(stack[i-1].start-1)*asize, &r);CHKERRQ(ierr);
753             if (m-l <= r-m) {
754               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
755               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
756               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
757             } else {
758               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
759               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
760               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
761             }
762           }
763           /* Update A with merge */
764           stack[i-2].size += stack[i-1].size;
765           /* Push C up the stack */
766           stack[i-1].start = stack[i].start;
767           stack[i-1].size = stack[i].size;
768         } else {
769           /* merge C into B */
770           mergeBC:
771           /* If B[-1] <= C[0] then... you know the drill */
772           if ((*cmp)(arr+(stack[i].start-1)*asize, arr+(stack[i].start)*asize, ctx) > 0) {
773             m = stack[i].start;
774             /* Search B for C[0] insertion */
775             ierr = PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i-1].start, stack[i].start-1, arr+stack[i].start*asize, &l);CHKERRQ(ierr);
776             /* Search C for B[-1] insertion */
777             ierr = PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start+stack[i].size-1, (arr)+(stack[i].start-1)*asize, &r);CHKERRQ(ierr);
778             if (m-l <= r-m) {
779               ierr = PetscTimSortResizeBuffer_Private(abuff, (m-l+1)*asize);CHKERRQ(ierr);
780               ierr = PetscTimSortResizeBuffer_Private(bbuff, (m-l+1)*bsize);CHKERRQ(ierr);
781               ierr = PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
782             } else {
783               ierr = PetscTimSortResizeBuffer_Private(abuff, (r-m+1)*asize);CHKERRQ(ierr);
784               ierr = PetscTimSortResizeBuffer_Private(bbuff, (r-m+1)*bsize);CHKERRQ(ierr);
785               ierr = PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r);CHKERRQ(ierr);
786             }
787           }
788           /* Update B with merge */
789           stack[i-1].size += stack[i].size;
790         }
791         --i;
792       } else if (stack[i-1].size <= stack[i].size) {
793         /* merge C into B */
794         goto mergeBC;
795       }
796     }
797     if (itemp == i) break;
798   }
799   *stacksize = i;
800   PetscFunctionReturn(0);
801 }
802 
803 /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
804  elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
805  binary insertion sort or regulat insertion sort */
PetscTimSortBuildRun_Private(char * arr,char * tarr,size_t size,CompFunc cmp,void * ctx,PetscInt n,PetscInt minrun,PetscInt runstart,PetscInt * runend)806 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
807 {
808   const PetscInt re = PetscMin(runstart+minrun, n-1);
809   PetscInt       ri = runstart;
810 
811   PetscFunctionBegin;
812   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
813   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
814   if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) {
815     ++ri;
816     while (ri < n-1) {
817       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) >= 0) break;
818       ++ri;
819     }
820     {
821       PetscInt lo = runstart, hi = ri;
822       for (; lo < hi; ++lo, --hi) {
823         COPYSWAPPY(arr+lo*size, arr+hi*size, tarr, size);
824       }
825     }
826   } else {
827     ++ri;
828     while (ri < n-1) {
829       if ((*cmp)((arr)+(ri+1)*size, (arr)+ri*size, ctx) < 0) break;
830       ++ri;
831     }
832   }
833 #if defined(PETSC_USE_DEBUG)
834   {
835     PetscErrorCode ierr;
836     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
837   }
838 #endif
839   if (ri < re) {
840     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
841      binary search */
842     if (ri-runstart <= minrun >> 1) {
843       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
844       PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
845     } else {
846       PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re);
847     }
848     *runend = re;
849   } else *runend = ri;
850   PetscFunctionReturn(0);
851 }
852 
PetscTimSortBuildRunWithArray_Private(char * arr,char * atarr,size_t asize,char * barr,char * btarr,size_t bsize,CompFunc cmp,void * ctx,PetscInt n,PetscInt minrun,PetscInt runstart,PetscInt * runend)853 PETSC_STATIC_INLINE PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
854 {
855   const PetscInt re = PetscMin(runstart+minrun, n-1);
856   PetscInt       ri = runstart;
857 
858   PetscFunctionBegin;
859   if (PetscUnlikely(runstart == n-1)) {*runend = runstart; PetscFunctionReturn(0);}
860   /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
861   if ((*cmp)((arr)+(ri+1)*asize, arr+(ri*asize), ctx) < 0) {
862     ++ri;
863     while (ri < n-1) {
864       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) >= 0) break;
865       ++ri;
866     }
867     {
868       PetscInt lo = runstart, hi = ri;
869       for (; lo < hi; ++lo, --hi) {
870         COPYSWAPPY2(arr+lo*asize, arr+hi*asize, asize, barr+lo*bsize, barr+hi*bsize, bsize, atarr);
871       }
872     }
873   } else {
874     ++ri;
875     while (ri < n-1) {
876       if ((*cmp)((arr)+(ri+1)*asize, (arr)+ri*asize, ctx) < 0) break;
877       ++ri;
878     }
879   }
880 #if defined(PETSC_USE_DEBUG)
881   {
882     PetscErrorCode ierr;
883     ierr = PetscInfo1(NULL, "natural run length = %D\n", ri-runstart+1);CHKERRQ(ierr);
884   }
885 #endif
886   if (ri < re) {
887     /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
888      binary search */
889     if (ri-runstart <= minrun >> 1) {
890       ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
891       PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
892     } else {
893       PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re);
894     }
895     *runend = re;
896   } else *runend = ri;
897   PetscFunctionReturn(0);
898 }
899 
900 /*@C
901   PetscTimSort - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm.
902 
903   Not Collective
904 
905   Input Parameters:
906 + n    - number of values
907 . arr  - array to be sorted
908 . size - size in bytes of the datatype held in arr
909 . cmp  - function pointer to comparison function
910 - ctx  - optional context to be passed to comparison function, NULL if not needed
911 
912   Output Parameters:
913 . arr  - sorted array
914 
915   Notes:
916   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
917  sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
918  select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
919  do so it repeatedly triggers attempts throughout to merge adjacent runs.
920 
921   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
922   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
923   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
924   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
925   likely all contain similar data.
926 
927   Sample usage:
928   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
929   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
930   may also
931  change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
932  the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
933   order
934 
935 .vb
936   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
937     my_type l = *(my_type *) left, r = *(my_type *) right;
938     return (l < r) ? -1 : (l > r);
939   }
940 .ve
941   Note the context is unused here but you may use it to pass and subsequently access whatever information required
942   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
943   Then pass the function
944 .vb
945   PetscTimSort(n, arr, sizeof(arr[0]), my_increasing_comparison_function, ctx)
946 .ve
947 
948   Fortran Notes:
949   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
950   returns result. For example
951 .vb
952  subroutine CompareIntegers(left,right,ctx,result)
953    implicit none
954 
955    PetscInt,intent(in) :: left, right
956    type(UserCtx)       :: ctx
957    integer,intent(out) :: result
958 
959    if (left < right) then
960      result = -1
961    else if (left == right) then
962      result = 0
963    else
964      result = 1
965    end if
966    return
967  end subroutine CompareIntegers
968 .ve
969 
970   References:
971   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
972 
973   Level: developer
974 
975 .seealso: PetscTimSortWithArray(), PetscIntSortSemiOrdered(), PetscRealSortSemiOrdered(), PetscMPIIntSortSemiOrdered()
976 @*/
PetscTimSort(PetscInt n,void * arr,size_t size,int (* cmp)(const void *,const void *,void *),void * ctx)977 PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
978 {
979   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
980   PetscTimSortStack  runstack[128];
981   PetscTimSortBuffer buff;
982   PetscErrorCode     ierr;
983   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
984    It is so unlikely that this limit is reached that this is __never__ checked for */
985 
986   PetscFunctionBegin;
987   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
988    is a power of 2 or one plus a power of 2 */
989   {
990     PetscInt t = n, r = 0;
991     /* r becomes 1 if the least significant bits contain at least one off bit */
992     while (t >= 64) {
993       r |= t & 1;
994       t >>= 1;
995     }
996     minrun = t + r;
997   }
998   if (PetscDefined(USE_DEBUG)) {
999     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1000     if (n < 64) {
1001       ierr = PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);CHKERRQ(ierr);
1002     } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1003   }
1004   ierr = PetscMalloc1((size_t) minrun*size, &buff.ptr);CHKERRQ(ierr);
1005   buff.size = (size_t) minrun*size;
1006   buff.maxsize = (size_t) n*size;
1007   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1008   while (runstart < n) {
1009     /* Check if additional entries are at least partially ordered and build natural run */
1010     ierr = PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1011     runstack[stacksize].start = runstart;
1012     runstack[stacksize].size = runend-runstart+1;
1013     ierr = PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize);CHKERRQ(ierr);
1014     ++stacksize;
1015     runstart = runend+1;
1016   }
1017   /* Have been inside while, so discard last stacksize++ */
1018   --stacksize;
1019   ierr = PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize);CHKERRQ(ierr);
1020   ierr = PetscFree(buff.ptr);CHKERRQ(ierr);
1021   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 /*@C
1026   PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters adaptive sorting algorithm and
1027   reorders a second array to match the first. The arrays need not be the same type.
1028 
1029   Not Collective
1030 
1031   Input Parameters:
1032 + n     - number of values
1033 . arr   - array to be sorted
1034 . asize - size in bytes of the datatype held in arr
1035 . barr  - array to be reordered
1036 . asize - size in bytes of the datatype held in barr
1037 . cmp   - function pointer to comparison function
1038 - ctx   - optional context to be passed to comparison function, NULL if not needed
1039 
1040   Output Parameters:
1041 + arr  - sorted array
1042 - barr - reordered array
1043 
1044   Notes:
1045   The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1046   overlap.
1047 
1048   Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1049   sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1050  to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1051  arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1052 
1053   Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1054   the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1055   bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1056   searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1057   likely all contain similar data.
1058 
1059   Sample usage:
1060   The comparison function must follow the qsort() comparison function paradigm, returning the sign of the difference
1061   between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
1062   may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1063   guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1064   increasing order
1065 
1066 .vb
1067   int my_increasing_comparison_function(const void *left, const void *right, void *ctx) {
1068     my_type l = *(my_type *) left, r = *(my_type *) right;
1069     return (l < r) ? -1 : (l > r);
1070   }
1071 .ve
1072   Note the context is unused here but you may use it to pass and subsequently access whatever information required
1073   inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1074   Then pass the function
1075 .vb
1076   PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), my_increasing_comparison_function, ctx)
1077 .ve
1078 
1079 
1080   Fortran Notes:
1081   To use this from fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1082   returns result. For example
1083 .vb
1084  subroutine CompareIntegers(left,right,ctx,result)
1085    implicit none
1086 
1087    PetscInt,intent(in) :: left, right
1088    type(UserCtx)       :: ctx
1089    integer,intent(out) :: result
1090 
1091    if (left < right) then
1092      result = -1
1093    else if (left == right) then
1094      result = 0
1095    else
1096      result = 1
1097    end if
1098    return
1099  end subroutine CompareIntegers
1100 .ve
1101 
1102   References:
1103   1. - Tim Peters. https://bugs.python.org/file4451/timsort.txt
1104 
1105   Level: developer
1106 
1107 .seealso: PetscTimSort(), PetscIntSortSemiOrderedWithArray(), PetscRealSortSemiOrderedWithArrayInt(), PetscMPIIntSortSemiOrderedWithArray()
1108 @*/
PetscTimSortWithArray(PetscInt n,void * arr,size_t asize,void * barr,size_t bsize,int (* cmp)(const void *,const void *,void *),void * ctx)1109 PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1110 {
1111   PetscInt           stacksize = 0, minrun, runstart = 0, runend = 0;
1112   PetscTimSortStack  runstack[128];
1113   PetscTimSortBuffer abuff, bbuff;
1114   PetscErrorCode     ierr;
1115   /* stacksize  = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1116    It is so unlikely that this limit is reached that this is __never__ checked for */
1117 
1118   PetscFunctionBegin;
1119   /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1120    is a power of 2 or one plus a power of 2 */
1121   {
1122     PetscInt t = n, r = 0;
1123     /* r becomes 1 if the least significant bits contain at least one off bit */
1124     while (t >= 64) {
1125       r |= t & 1;
1126       t >>= 1;
1127     }
1128     minrun = t + r;
1129   }
1130   if (PetscDefined(USE_DEBUG)) {
1131     ierr = PetscInfo1(NULL, "minrun = %D\n", minrun);CHKERRQ(ierr);
1132     if (n < 64) {
1133       ierr = PetscInfo1(NULL, "n %D < 64, consider using PetscSortInt() instead\n", n);CHKERRQ(ierr);
1134     } else if ((minrun < 32) || (minrun > 65)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Calculated minrun %D not in range (32,65)",minrun);
1135   }
1136   ierr = PetscMalloc1((size_t) minrun*asize, &abuff.ptr);CHKERRQ(ierr);
1137   abuff.size = (size_t) minrun*asize;
1138   abuff.maxsize = (size_t) n*asize;
1139   ierr = PetscMalloc1((size_t) minrun*bsize, &bbuff.ptr);CHKERRQ(ierr);
1140   bbuff.size = (size_t) minrun*bsize;
1141   bbuff.maxsize = (size_t) n*bsize;
1142   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1143   while (runstart < n) {
1144     /* Check if additional entries are at least partially ordered and build natural run */
1145     ierr = PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend);CHKERRQ(ierr);
1146     runstack[stacksize].start = runstart;
1147     runstack[stacksize].size = runend-runstart+1;
1148     ierr = PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize);CHKERRQ(ierr);
1149     ++stacksize;
1150     runstart = runend+1;
1151   }
1152   /* Have been inside while, so discard last stacksize++ */
1153   --stacksize;
1154   ierr = PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize);CHKERRQ(ierr);
1155   ierr = PetscFree(abuff.ptr);CHKERRQ(ierr);
1156   ierr = PetscFree(bbuff.ptr);CHKERRQ(ierr);
1157   MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 /*@
1162    PetscIntSortSemiOrdered - Sorts an array of integers in place in increasing order.
1163 
1164    Not Collective
1165 
1166    Input Parameters:
1167 +  n   - number of values
1168 -  arr - array of integers
1169 
1170    Output Parameters:
1171 .  arr - sorted array of integers
1172 
1173    Notes:
1174    If the array is less than 64 entries long PetscSortInt() is automatically used.
1175 
1176    This function serves as an alternative to PetscSortInt(). While this function works for any array of integers it is
1177    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1178    recomended that the user benchmark their code to see which routine is fastest.
1179 
1180    Level: intermediate
1181 
1182 .seealso: PetscTimSort(), PetscSortInt(), PetscSortIntWithPermutation()
1183 @*/
PetscIntSortSemiOrdered(PetscInt n,PetscInt arr[])1184 PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1185 {
1186   PetscErrorCode ierr;
1187   PetscFunctionBegin;
1188   if (n <= 1) PetscFunctionReturn(0);
1189   PetscValidIntPointer(arr,2);
1190   if (n < 64) {
1191     ierr = PetscSortInt(n, arr);CHKERRQ(ierr);
1192   } else {
1193     ierr = PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1194   }
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 /*@
1199    PetscIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1200    array to match the first.
1201 
1202    Not Collective
1203 
1204    Input Parameters:
1205 +  n   - number of values
1206 .  arr1 - array of integers to be sorted
1207 -  arr2 - array of integers to be reordered
1208 
1209    Output Parameters:
1210 +  arr1 - sorted array of integers
1211 -  arr2 - reordered array of integers
1212 
1213    Notes:
1214    The arrays CANNOT overlap.
1215 
1216    If the array to be sorted is less than 64 entries long PetscSortIntWithArray() is automatically used.
1217 
1218    This function serves as an alternative to PetscSortIntWithArray(). While this function works for any array of integers it is
1219    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1220    recomended that the user benchmark their code to see which routine is fastest.
1221 
1222    Level: intermediate
1223 
1224 .seealso: PetscTimSortWithArray(), PetscSortIntWithArray(), PetscSortIntWithPermutation()
1225 @*/
PetscIntSortSemiOrderedWithArray(PetscInt n,PetscInt arr1[],PetscInt arr2[])1226 PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1227 {
1228   PetscErrorCode ierr;
1229   PetscFunctionBegin;
1230   PetscValidIntPointer(arr1,2);
1231   PetscValidIntPointer(arr2,3);
1232   if (n == 1) PetscFunctionReturn(0);
1233   if (n < 64) {
1234     ierr = PetscSortIntWithArray(n, arr1, arr2);CHKERRQ(ierr);
1235   } else {
1236     ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL);CHKERRQ(ierr);
1237   }
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 /*@
1242    PetscMPIIntSortSemiOrdered - Sorts an array of PetscMPIInts in place in increasing order.
1243 
1244    Not Collective
1245 
1246    Input Parameters:
1247 +  n   - number of values
1248 -  arr - array of PetscMPIInts
1249 
1250    Output Parameters:
1251 .  arr - sorted array of integers
1252 
1253    Notes:
1254    If the array is less than 64 entries long PetscSortMPIInt() is automatically used.
1255 
1256    This function serves as an alternative to PetscSortMPIInt(). While this function works for any array of PetscMPIInts it is
1257    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1258    recomended that the user benchmark their code to see which routine is fastest.
1259 
1260    Level: intermediate
1261 
1262 .seealso: PetscTimSort(), PetscSortMPIInt()
1263 @*/
PetscMPIIntSortSemiOrdered(PetscInt n,PetscMPIInt arr[])1264 PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1265 {
1266   PetscErrorCode  ierr;
1267 
1268   PetscFunctionBegin;
1269   PetscValidIntPointer(arr,2);
1270   if (n == 1) PetscFunctionReturn(0);
1271   if (n < 64) {
1272     ierr = PetscSortMPIInt(n, arr);CHKERRQ(ierr);
1273   } else {
1274     ierr = PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 /*@
1280    PetscMPIIntSortSemiOrderedWithArray - Sorts an array of integers in place in increasing order and reorders a second
1281    array to match the first.
1282 
1283    Not Collective
1284 
1285    Input Parameters:
1286 +  n   - number of values
1287 .  arr1 - array of integers to be sorted
1288 -  arr2 - array of integers to be reordered
1289 
1290    Output Parameters:
1291 +  arr1 - sorted array of integers
1292 -  arr2 - reordered array of integers
1293 
1294    Notes:
1295    The arrays CANNOT overlap.
1296 
1297    If the array to be sorted is less than 64 entries long PetscSortMPIIntWithArray() is automatically used.
1298 
1299    This function serves as an alternative to PetscSortMPIIntWithArray(). While this function works for any array of integers it is
1300    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1301    recomended that the user benchmark their code to see which routine is fastest.
1302 
1303    Level: intermediate
1304 
1305 .seealso: PetscTimSortWithArray(), PetscSortMPIIntWithArray(), PetscSortMPIIntWithPermutation()
1306 @*/
PetscMPIIntSortSemiOrderedWithArray(PetscInt n,PetscMPIInt arr1[],PetscMPIInt arr2[])1307 PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1308 {
1309   PetscErrorCode ierr;
1310   PetscFunctionBegin;
1311   if (n <= 1) PetscFunctionReturn(0);
1312   PetscValidIntPointer(arr1,2);
1313   PetscValidIntPointer(arr2,3);
1314   if (n < 64) {
1315     ierr = PetscSortMPIIntWithArray(n, arr1, arr2);CHKERRQ(ierr);
1316   } else {
1317     ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL);CHKERRQ(ierr);
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 /*@
1323    PetscRealSortSemiOrdered - Sorts an array of PetscReals in place in increasing order.
1324 
1325    Not Collective
1326 
1327    Input Parameters:
1328 +  n   - number of values
1329 -  arr - array of PetscReals
1330 
1331    Output Parameters:
1332 .  arr - sorted array of integers
1333 
1334    Notes:
1335    If the array is less than 64 entries long PetscSortReal() is automatically used.
1336 
1337    This function serves as an alternative to PetscSortReal(). While this function works for any array of PetscReals it is
1338    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1339    recomended that the user benchmark their code to see which routine is fastest.
1340 
1341    Level: intermediate
1342 
1343 .seealso: PetscTimSort(), PetscSortReal(), PetscSortRealWithPermutation()
1344 @*/
PetscRealSortSemiOrdered(PetscInt n,PetscReal arr[])1345 PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1346 {
1347   PetscErrorCode  ierr;
1348 
1349   PetscFunctionBegin;
1350   if (n <= 1) PetscFunctionReturn(0);
1351   PetscValidRealPointer(arr,2);
1352   if (n < 64) {
1353     ierr = PetscSortReal(n, arr);CHKERRQ(ierr);
1354   } else {
1355     ierr = PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1356   }
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 /*@
1361    PetscRealSortSemiOrderedWithArrayInt - Sorts an array of PetscReals in place in increasing order and reorders a second
1362    array of PetscInts to match the first.
1363 
1364    Not Collective
1365 
1366    Input Parameters:
1367 +  n   - number of values
1368 .  arr1 - array of PetscReals to be sorted
1369 -  arr2 - array of PetscReals to be reordered
1370 
1371    Output Parameters:
1372 +  arr1 - sorted array of PetscReals
1373 -  arr2 - reordered array of PetscInts
1374 
1375    Notes:
1376    If the array to be sorted is less than 64 entries long PetscSortRealWithArrayInt() is automatically used.
1377 
1378    This function serves as an alternative to PetscSortRealWithArray(). While this function works for any array of PetscReals it is
1379    significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1380    recomended that the user benchmark their code to see which routine is fastest.
1381 
1382    Level: intermediate
1383 
1384 .seealso: PetscTimSortWithArray(), PetscSortRealWithArrayInt(), PetscSortRealWithPermutation()
1385 @*/
PetscRealSortSemiOrderedWithArrayInt(PetscInt n,PetscReal arr1[],PetscInt arr2[])1386 PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1387 {
1388   PetscErrorCode ierr;
1389   PetscFunctionBegin;
1390   if (n <= 1) PetscFunctionReturn(0);
1391   PetscValidRealPointer(arr1,2);
1392   PetscValidIntPointer(arr2,3);
1393   if (n < 64) {
1394     ierr = PetscSortRealWithArrayInt(n, arr1, arr2);CHKERRQ(ierr);
1395   } else {
1396     ierr = PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL);CHKERRQ(ierr);
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400