1 /*-
2  * Copyright (c) 1996, 2020 Oracle and/or its affiliates.  All rights reserved.
3  *
4  * See the file LICENSE for license information.
5  */
6 
7 #include "db_config.h"
8 
9 #include "db_int.h"
10 #include "dbinc/db_page.h"
11 #include "dbinc/btree.h"
12 
13 static int __db_quicksort __P((DB *, DBT *, DBT *, u_int32_t *, u_int32_t *,
14 		u_int32_t *, u_int32_t *, u_int32_t));
15 
16 /*
17  * __db_compare_both --
18  *	Use the comparison functions from db to compare akey and bkey, and if
19  *	DB_DUPSORT adata and bdata.
20  *
21  * PUBLIC: int __db_compare_both __P((DB *, const DBT *, const DBT *,
22  * PUBLIC:   const DBT *, const DBT *));
23  */
24 int
__db_compare_both(db,akey,adata,bkey,bdata)25 __db_compare_both(db, akey, adata, bkey, bdata)
26 	DB *db;
27 	const DBT *akey;
28 	const DBT *adata;
29 	const DBT *bkey;
30 	const DBT *bdata;
31 {
32 	BTREE *t;
33 	int cmp;
34 
35 	t = (BTREE *)db->bt_internal;
36 
37 	cmp = t->bt_compare(db, akey, bkey, NULL);
38 	if (cmp != 0) return cmp;
39 	if (!F_ISSET(db, DB_AM_DUPSORT))
40 	    return (0);
41 
42 	if (adata == 0) return bdata == 0 ? 0 : -1;
43 	if (bdata == 0) return 1;
44 
45 #ifdef HAVE_COMPRESSION
46 	if (DB_IS_COMPRESSED(db))
47 		return t->compress_dup_compare(db, adata, bdata, NULL);
48 #endif
49 	return db->dup_compare(db, adata, bdata, NULL);
50 }
51 
52 #define	DB_SORT_SWAP(a, ad, b, bd)					\
53 do {									\
54 	tmp = (a)[0]; (a)[0] = (b)[0]; (b)[0] = tmp;			\
55 	tmp = (a)[-1]; (a)[-1] = (b)[-1]; (b)[-1] = tmp;		\
56 	if (data != NULL) {						\
57 		tmp = (ad)[0]; (ad)[0] = (bd)[0]; (bd)[0] = tmp;	\
58 		tmp = (ad)[-1]; (ad)[-1] = (bd)[-1]; (bd)[-1] = tmp;	\
59 	}								\
60 } while (0)
61 
62 #define	DB_SORT_LOAD_DBT(a, ad, aptr, adptr)				\
63 do {									\
64 	(a).data = (u_int8_t*)key->data + (aptr)[0];			\
65 	(a).size = (aptr)[-1];						\
66 	if (data != NULL) {						\
67 		(ad).data = (u_int8_t*)data->data + (adptr)[0];		\
68 		(ad).size = (adptr)[-1];				\
69 	}								\
70 } while (0)
71 
72 #define	DB_SORT_COMPARE(a, ad, b, bd) (data != NULL ?			\
73 	__db_compare_both(db, &(a), &(ad), &(b), &(bd)) :		\
74 	__db_compare_both(db, &(a), 0, &(b), 0))
75 
76 #define	DB_SORT_STACKSIZE 32
77 
78 /*
79  * __db_quicksort --
80  *	The quicksort implementation for __db_sort_multiple() and
81  *	__db_sort_multiple_key().
82  */
83 static int
__db_quicksort(db,key,data,kstart,kend,dstart,dend,size)84 __db_quicksort(db, key, data, kstart, kend, dstart, dend, size)
85 	DB *db;
86 	DBT *key, *data;
87 	u_int32_t *kstart, *kend, *dstart, *dend;
88 	u_int32_t size;
89 {
90 	int ret, cmp;
91 	u_int32_t tmp, len;
92 	u_int32_t *kptr, *dptr, *kl, *dl, *kr, *dr;
93 	DBT a, ad, b, bd, m, md;
94 	ENV *env;
95 
96 	struct DB_SORT_quicksort_stack {
97 		u_int32_t *kstart;
98 		u_int32_t *kend;
99 		u_int32_t *dstart;
100 		u_int32_t *dend;
101 	} stackbuf[DB_SORT_STACKSIZE], *stack;
102 	u_int32_t soff, slen;
103 
104 	ret = 0;
105 	env = db->env;
106 
107 	memset(&a, 0, sizeof(DBT));
108 	memset(&ad, 0, sizeof(DBT));
109 	memset(&b, 0, sizeof(DBT));
110 	memset(&bd, 0, sizeof(DBT));
111 	memset(&m, 0, sizeof(DBT));
112 	memset(&md, 0, sizeof(DBT));
113 
114 	/* NB end is smaller than start */
115 
116 	stack = stackbuf;
117 	soff = 0;
118 	slen = DB_SORT_STACKSIZE;
119 
120  start:
121 	if (kend >= kstart) goto pop;
122 
123 	/* If there's only one value, it's already sorted */
124 	len = (u_int32_t)(kstart - kend) / size;
125 	if (len == 1) goto pop;
126 
127 	DB_SORT_LOAD_DBT(a, ad, kstart, dstart);
128 	DB_SORT_LOAD_DBT(b, bd, kend + size, dend + size);
129 
130 	if (len == 2) {
131 		/* Special case the sorting of two value sequences */
132 		if (DB_SORT_COMPARE(a, ad, b, bd) > 0) {
133 			DB_SORT_SWAP(kstart, dstart, kend + size,
134 				dend + size);
135 		}
136 		goto pop;
137 	}
138 
139 	kptr = kstart - (len / 2) * size;
140 	dptr = dstart - (len / 2) * size;
141 	DB_SORT_LOAD_DBT(m, md, kptr, dptr);
142 
143 	/* Find the median of three */
144 	if (DB_SORT_COMPARE(a, ad, b, bd) < 0) {
145 		if (DB_SORT_COMPARE(m, md, a, ad) < 0) {
146 			/* m < a < b */
147 			if (len == 3) {
148 				DB_SORT_SWAP(kstart, dstart, kptr, dptr);
149 				goto pop;
150 			}
151 			DB_SORT_SWAP(kstart, dstart, kend + size, dend + size);
152 		} else if (DB_SORT_COMPARE(m, md, b, bd) < 0) {
153 			/* a <= m < b */
154 			if (len == 3) {
155 				goto pop;
156 			}
157 			DB_SORT_SWAP(kptr, dptr, kend + size, dend + size);
158 		} else {
159 			/* a < b <= m */
160 			if (len == 3) {
161 				DB_SORT_SWAP(kptr, dptr, kend + size,
162 					dend + size);
163 				goto pop;
164 			}
165 			/* Do nothing */
166 		}
167 	} else {
168 		if (DB_SORT_COMPARE(a, ad, m, md) < 0) {
169 			/* b <= a < m */
170 			DB_SORT_SWAP(kstart, dstart, kend + size,
171 			    dend + size);
172 			if (len == 3) {
173 				DB_SORT_SWAP(kptr, dptr, kend + size,
174 				    dend + size);
175 				goto pop;
176 			}
177 		} else if (DB_SORT_COMPARE(b, bd, m, md) < 0) {
178 			/* b < m <= a */
179 			if (len == 3) {
180 				DB_SORT_SWAP(kstart, dstart, kend + size,
181 					dend + size);
182 				goto pop;
183 			}
184 			DB_SORT_SWAP(kptr, dptr, kend + size, dend + size);
185 		} else {
186 			/* m <= b <= a */
187 			if (len == 3) {
188 				DB_SORT_SWAP(kstart, dstart, kptr, dptr);
189 				DB_SORT_SWAP(kptr, dptr, kend + size,
190 					dend + size);
191 				goto pop;
192 			}
193 			/* Do nothing */
194 		}
195 	}
196 
197 	/* partition */
198 	DB_SORT_LOAD_DBT(b, bd, kend + size, dend + size);
199 	kl = kstart;
200 	dl = dstart;
201 	kr = kend + size;
202 	dr = dend + size;
203 	kptr = kstart;
204 	dptr = dstart;
205 	while (kptr >= kr) {
206 		DB_SORT_LOAD_DBT(a, ad, kptr, dptr);
207 		cmp = DB_SORT_COMPARE(a, ad, b, bd);
208 		if (cmp < 0) {
209 			DB_SORT_SWAP(kl, dl, kptr, dptr);
210 			kl -= size;
211 			dl -= size;
212 			kptr -= size;
213 			dptr -= size;
214 		} else if (cmp > 0) {
215 			DB_SORT_SWAP(kr, dr, kptr, dptr);
216 			kr += size;
217 			dr += size;
218 		} else {
219 			kptr -= size;
220 			dptr -= size;
221 		}
222 	}
223 
224 	if (soff == slen) {
225 		/* Grow the stack */
226 		slen = slen * 2;
227 		if (stack == stackbuf) {
228 			ret = __os_malloc(env, slen *
229 				sizeof(struct DB_SORT_quicksort_stack), &stack);
230 			if (ret != 0) goto error;
231 			memcpy(stack, stackbuf, soff *
232 				sizeof(struct DB_SORT_quicksort_stack));
233 		} else {
234 			ret = __os_realloc(env, slen *
235 				sizeof(struct DB_SORT_quicksort_stack), &stack);
236 			if (ret != 0) goto error;
237 		}
238 	}
239 
240 	/* divide and conquer */
241 	stack[soff].kstart = kr - size;
242 	stack[soff].kend = kend;
243 	stack[soff].dstart = dr - size;
244 	stack[soff].dend = dend;
245 	++soff;
246 
247 	kend = kl;
248 	dend = dl;
249 
250 	goto start;
251 
252  pop:
253 	if (soff != 0) {
254 		--soff;
255 		kstart = stack[soff].kstart;
256 		kend = stack[soff].kend;
257 		dstart = stack[soff].dstart;
258 		dend = stack[soff].dend;
259 		goto start;
260 	}
261 
262  error:
263 	if (stack != stackbuf)
264 		__os_free(env, stack);
265 
266 	return (ret);
267 }
268 
269 #undef DB_SORT_SWAP
270 #undef DB_SORT_LOAD_DBT
271 
272 /*
273  * __db_sort_multiple --
274  *	If flags == DB_MULTIPLE_KEY, sorts a DB_MULTIPLE_KEY format DBT using
275  *	the BTree comparison function and duplicate comparison function.
276  *
277  *	If flags == DB_MULTIPLE, sorts one or two DB_MULTIPLE format DBTs using
278  *	the BTree comparison function and duplicate comparison function. Will
279  *	assume key and data specifies pairs of key/data to sort together. If
280  *	data is NULL, will just sort key according to the btree comparison
281  *	function.
282  *
283  *	Uses an in-place quicksort algorithm, with median of three for the pivot
284  *	point.
285  *
286  * PUBLIC: int __db_sort_multiple __P((DB *, DBT *, DBT *, u_int32_t));
287  */
288 int
__db_sort_multiple(db,key,data,flags)289 __db_sort_multiple(db, key, data, flags)
290 	DB *db;
291 	DBT *key, *data;
292 	u_int32_t flags;
293 {
294 	u_int32_t *kstart, *kend, *dstart, *dend;
295 
296 	/* TODO: sanity checks on the DBTs */
297 	/* DB_ILLEGAL_METHOD(db, DB_OK_BTREE); */
298 
299 	kstart = (u_int32_t*)((u_int8_t *)key->data + key->ulen) - 1;
300 
301 	switch (flags) {
302 	case DB_MULTIPLE:
303 		if (data != NULL)
304 			dstart = (u_int32_t*)((u_int8_t *)data->data +
305 				data->ulen) - 1;
306 		else
307 			dstart = kstart;
308 
309 		/* Find the end */
310 		for (kend = kstart, dend = dstart;
311 		    *kend != (u_int32_t)-1 && *dend != (u_int32_t)-1;
312 		    kend -= 2, dend -= 2)
313 			;
314 
315 		return (__db_quicksort(db, key, data, kstart, kend, dstart,
316 			dend, 2));
317 	case DB_MULTIPLE_KEY:
318 		/* Find the end */
319 		for (kend = kstart; *kend != (u_int32_t)-1; kend -= 4)
320 			;
321 
322 		return (__db_quicksort(db, key, key, kstart, kend, kstart - 2,
323 			kend - 2, 4));
324 	default:
325 		return (__db_ferr(db->env, "DB->sort_multiple", 0));
326 	}
327 }
328