1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 #ifdef __cplusplus
9 #define REGISTER
10 #else
11 #define REGISTER register
12 #endif
13 
14 /*
15  * distributed_qsort.c:
16  * Our own version of the system qsort routine which is faster by an average
17  * of 25%, with lows and highs of 10% and 50%.
18  * The THRESHold below is the insertion sort threshold, and has been adjusted
19  * for records of size 48 bytes.
20  * The MTHREShold is where we stop finding a better median.
21  */
22 
23 #include <stdlib.h>			/* only for type declarations */
24 #include "_hypre_utilities.h"
25 
26 #define		THRESH		4	/* threshold for insertion */
27 #define		MTHRESH		6	/* threshold for median */
28 
29 static HYPRE_Int (*qcmp) (char*,char*);		/* the comparison routine */
30 static HYPRE_Int qsz;			/* size of each record */
31 static void qst(char *, char *);
32 
33 static HYPRE_Int thresh;		/* THRESHold in chars */
34 static HYPRE_Int mthresh;		/* MTHRESHold in chars */
35 
36 
37 
38 /*
39  * hypre_tex_qsort:
40  * First, set up some global parameters for qst to share.  Then, quicksort
41  * with qst(), and then a cleanup insertion sort ourselves.  Sound simple?
42  * It's not...
43  */
44 
45 void
hypre_tex_qsort(char * base,HYPRE_Int n,HYPRE_Int size,HYPRE_Int (* compar)(char *,char *))46 hypre_tex_qsort(char* base,HYPRE_Int n,HYPRE_Int size, HYPRE_Int (*compar) (char*,char*))
47 {
48     REGISTER char *i;
49     REGISTER char *j;
50     REGISTER char *lo;
51     REGISTER char *hi;
52     REGISTER char *min;
53     REGISTER char c;
54     char *max;
55 
56     if (n <= 1)
57 	return;
58     qsz = size;
59     qcmp = compar;
60     thresh = qsz * THRESH;
61     mthresh = qsz * MTHRESH;
62     max = base + n * qsz;
63     if (n >= THRESH)
64     {
65 	qst(base, max);
66 	hi = base + thresh;
67     }
68     else
69     {
70 	hi = max;
71     }
72     /* First put smallest element, which must be in the first THRESH, in the
73        first position as a sentinel.  This is done just by searching the
74        first THRESH elements (or the first n if n < THRESH), finding the min,
75        and swapping it into the first position. */
76     for (j = lo = base; (lo += qsz) < hi;)
77     {
78 	if ((*qcmp) (j, lo) > 0)
79 	    j = lo;
80     }
81     if (j != base)
82     {				/* swap j into place */
83 	for (i = base, hi = base + qsz; i < hi;)
84 	{
85 	    c = *j;
86 	    *j++ = *i;
87 	    *i++ = c;
88 	}
89     }
90     /* With our sentinel in place, we now run the following hyper-fast
91        insertion sort.  For each remaining element, min, from [1] to [n-1],
92        set hi to the index of the element AFTER which this one goes. Then, do
93        the standard insertion sort shift on a character at a time basis for
94        each element in the frob. */
95     for (min = base; (hi = min += qsz) < max;)
96     {
97 	while ((*qcmp) (hi -= qsz, min) > 0);
98 	if ((hi += qsz) != min)
99 	{
100 	    for (lo = min + qsz; --lo >= min;)
101 	    {
102 		c = *lo;
103 		for (i = j = lo; (j -= qsz) >= hi; i = j)
104 		    *i = *j;
105 		*i = c;
106 	    }
107 	}
108     }
109 }
110 
111 
112 
113 /*
114  * qst:
115  * Do a quicksort
116  * First, find the median element, and put that one in the first place as the
117  * discriminator.  (This "median" is just the median of the first, last and
118  * middle elements).  (Using this median instead of the first element is a big
119  * win).  Then, the usual partitioning/swapping, followed by moving the
120  * discriminator into the right place.  Then, figure out the sizes of the two
121  * partions, do the smaller one recursively and the larger one via a repeat of
122  * this code.  Stopping when there are less than THRESH elements in a partition
123  * and cleaning up with an insertion sort (in our caller) is a huge win.
124  * All data swaps are done in-line, which is space-losing but time-saving.
125  * (And there are only three places where this is done).
126  */
127 
qst(char * base,char * max)128 static void qst(char *base, char *max)
129 {
130     REGISTER char *i;
131     REGISTER char *j;
132     REGISTER char *jj;
133     REGISTER char *mid;
134     REGISTER HYPRE_Int ii;
135     REGISTER char c;
136     char *tmp;
137     HYPRE_Int lo;
138     HYPRE_Int hi;
139 
140     lo = max - base;		/* number of elements as chars */
141     do
142     {
143 	/* At the top here, lo is the number of characters of elements in the
144 	   current partition.  (Which should be max - base). Find the median
145 	   of the first, last, and middle element and make that the middle
146 	   element.  Set j to largest of first and middle.  If max is larger
147 	   than that guy, then it's that guy, else compare max with loser of
148 	   first and take larger.  Things are set up to prefer the middle,
149 	   then the first in case of ties. */
150 	mid = i = base + qsz * ((unsigned) (lo / qsz) >> 1);
151 	if (lo >= mthresh)
152 	{
153 	    j = ((*qcmp) ((jj = base), i) > 0 ? jj : i);
154 	    if ((*qcmp) (j, (tmp = max - qsz)) > 0)
155 	    {
156 		j = (j == jj ? i : jj);	/* switch to first loser */
157 		if ((*qcmp) (j, tmp) < 0)
158 		    j = tmp;
159 	    }
160 	    if (j != i)
161 	    {
162 		ii = qsz;
163 		do
164 		{
165 		    c = *i;
166 		    *i++ = *j;
167 		    *j++ = c;
168 		} while (--ii);
169 	    }
170 	}
171 	/* Semi-standard quicksort partitioning/swapping */
172 	for (i = base, j = max - qsz;;)
173 	{
174 	    while (i < mid && (*qcmp) (i, mid) <= 0)
175 		i += qsz;
176 	    while (j > mid)
177 	    {
178 		if ((*qcmp) (mid, j) <= 0)
179 		{
180 		    j -= qsz;
181 		    continue;
182 		}
183 		tmp = i + qsz;	/* value of i after swap */
184 		if (i == mid)
185 		{		/* j <-> mid, new mid is j */
186 		    mid = jj = j;
187 		}
188 		else
189 		{		/* i <-> j */
190 		    jj = j;
191 		    j -= qsz;
192 		}
193 		goto swap;
194 	    }
195 	    if (i == mid)
196 	    {
197 		break;
198 	    }
199 	    else
200 	    {			/* i <-> mid, new mid is i */
201 		jj = mid;
202 		tmp = mid = i;	/* value of i after swap */
203 		j -= qsz;
204 	    }
205     swap:
206 	    ii = qsz;
207 	    do
208 	    {
209 		c = *i;
210 		*i++ = *jj;
211 		*jj++ = c;
212 	    } while (--ii);
213 	    i = tmp;
214 	}
215 	/* Look at sizes of the two partitions, do the smaller one first by
216 	   recursion, then do the larger one by making sure lo is its size,
217 	   base and max are update correctly, and branching back. But only
218 	   repeat (recursively or by branching) if the partition is of at
219 	   least size THRESH. */
220 	i = (j = mid) + qsz;
221 	if ((lo = j - base) <= (hi = max - i))
222 	{
223 	    if (lo >= thresh)
224 		qst(base, j);
225 	    base = i;
226 	    lo = hi;
227 	}
228 	else
229 	{
230 	    if (hi >= thresh)
231 		qst(i, max);
232 	    max = j;
233 	}
234     } while (lo >= thresh);
235 }
236