1#!/usr/bin/perl
2
3#
4# gen_qsort_tuple.pl
5#
6# This script generates specialized versions of the quicksort algorithm for
7# tuple sorting.  The quicksort code is derived from the NetBSD code.  The
8# code generated by this script runs significantly faster than vanilla qsort
9# when used to sort tuples.  This speedup comes from a number of places.
10# The major effects are (1) inlining simple tuple comparators is much faster
11# than jumping through a function pointer and (2) swap and vecswap operations
12# specialized to the particular data type of interest (in this case, SortTuple)
13# are faster than the generic routines.
14#
15#	Modifications from vanilla NetBSD source:
16#	  Add do ... while() macro fix
17#	  Remove __inline, _DIAGASSERTs, __P
18#	  Remove ill-considered "swap_cnt" switch to insertion sort,
19#	  in favor of a simple check for presorted input.
20#	  Take care to recurse on the smaller partition, to bound stack usage.
21#
22#     Instead of sorting arbitrary objects, we're always sorting SortTuples.
23#     Add CHECK_FOR_INTERRUPTS().
24#
25# CAUTION: if you change this file, see also qsort.c and qsort_arg.c
26#
27
28use strict;
29use warnings;
30
31my $SUFFIX;
32my $EXTRAARGS;
33my $EXTRAPARAMS;
34my $CMPPARAMS;
35
36emit_qsort_boilerplate();
37
38$SUFFIX      = 'tuple';
39$EXTRAARGS   = ', SortTupleComparator cmp_tuple, Tuplesortstate *state';
40$EXTRAPARAMS = ', cmp_tuple, state';
41$CMPPARAMS   = ', state';
42emit_qsort_implementation();
43
44$SUFFIX      = 'ssup';
45$EXTRAARGS   = ', SortSupport ssup';
46$EXTRAPARAMS = ', ssup';
47$CMPPARAMS   = ', ssup';
48print <<'EOM';
49
50#define cmp_ssup(a, b, ssup) \
51	ApplySortComparator((a)->datum1, (a)->isnull1, \
52						(b)->datum1, (b)->isnull1, ssup)
53
54EOM
55emit_qsort_implementation();
56
57sub emit_qsort_boilerplate
58{
59	print <<'EOM';
60/*
61 * autogenerated by src/backend/utils/sort/gen_qsort_tuple.pl, do not edit!
62 *
63 * This file is included by tuplesort.c, rather than compiled separately.
64 */
65
66/*	$NetBSD: qsort.c,v 1.13 2003/08/07 16:43:42 agc Exp $	*/
67
68/*-
69 * Copyright (c) 1992, 1993
70 *	The Regents of the University of California.  All rights reserved.
71 *
72 * Redistribution and use in source and binary forms, with or without
73 * modification, are permitted provided that the following conditions
74 * are met:
75 * 1. Redistributions of source code must retain the above copyright
76 *	  notice, this list of conditions and the following disclaimer.
77 * 2. Redistributions in binary form must reproduce the above copyright
78 *	  notice, this list of conditions and the following disclaimer in the
79 *	  documentation and/or other materials provided with the distribution.
80 * 3. Neither the name of the University nor the names of its contributors
81 *	  may be used to endorse or promote products derived from this software
82 *	  without specific prior written permission.
83 *
84 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
85 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
86 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
87 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
88 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
89 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
90 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
91 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
92 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
93 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
94 * SUCH DAMAGE.
95 */
96
97/*
98 * Qsort routine based on J. L. Bentley and M. D. McIlroy,
99 * "Engineering a sort function",
100 * Software--Practice and Experience 23 (1993) 1249-1265.
101 *
102 * We have modified their original by adding a check for already-sorted input,
103 * which seems to be a win per discussions on pgsql-hackers around 2006-03-21.
104 *
105 * Also, we recurse on the smaller partition and iterate on the larger one,
106 * which ensures we cannot recurse more than log(N) levels (since the
107 * partition recursed to is surely no more than half of the input).  Bentley
108 * and McIlroy explicitly rejected doing this on the grounds that it's "not
109 * worth the effort", but we have seen crashes in the field due to stack
110 * overrun, so that judgment seems wrong.
111 */
112
113static void
114swapfunc(SortTuple *a, SortTuple *b, size_t n)
115{
116	do
117	{
118		SortTuple 	t = *a;
119		*a++ = *b;
120		*b++ = t;
121	} while (--n > 0);
122}
123
124#define swap(a, b)						\
125	do { 								\
126		SortTuple t = *(a);				\
127		*(a) = *(b);					\
128		*(b) = t;						\
129	} while (0)
130
131#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n)
132
133EOM
134
135	return;
136}
137
138sub emit_qsort_implementation
139{
140	print <<EOM;
141static SortTuple *
142med3_$SUFFIX(SortTuple *a, SortTuple *b, SortTuple *c$EXTRAARGS)
143{
144	return cmp_$SUFFIX(a, b$CMPPARAMS) < 0 ?
145		(cmp_$SUFFIX(b, c$CMPPARAMS) < 0 ? b :
146			(cmp_$SUFFIX(a, c$CMPPARAMS) < 0 ? c : a))
147		: (cmp_$SUFFIX(b, c$CMPPARAMS) > 0 ? b :
148			(cmp_$SUFFIX(a, c$CMPPARAMS) < 0 ? a : c));
149}
150
151static void
152qsort_$SUFFIX(SortTuple *a, size_t n$EXTRAARGS)
153{
154	SortTuple  *pa,
155			   *pb,
156			   *pc,
157			   *pd,
158			   *pl,
159			   *pm,
160			   *pn;
161	size_t		d1,
162				d2;
163	int			r,
164				presorted;
165
166loop:
167	CHECK_FOR_INTERRUPTS();
168	if (n < 7)
169	{
170		for (pm = a + 1; pm < a + n; pm++)
171			for (pl = pm; pl > a && cmp_$SUFFIX(pl - 1, pl$CMPPARAMS) > 0; pl--)
172				swap(pl, pl - 1);
173		return;
174	}
175	presorted = 1;
176	for (pm = a + 1; pm < a + n; pm++)
177	{
178		CHECK_FOR_INTERRUPTS();
179		if (cmp_$SUFFIX(pm - 1, pm$CMPPARAMS) > 0)
180		{
181			presorted = 0;
182			break;
183		}
184	}
185	if (presorted)
186		return;
187	pm = a + (n / 2);
188	if (n > 7)
189	{
190		pl = a;
191		pn = a + (n - 1);
192		if (n > 40)
193		{
194			size_t		d = (n / 8);
195
196			pl = med3_$SUFFIX(pl, pl + d, pl + 2 * d$EXTRAPARAMS);
197			pm = med3_$SUFFIX(pm - d, pm, pm + d$EXTRAPARAMS);
198			pn = med3_$SUFFIX(pn - 2 * d, pn - d, pn$EXTRAPARAMS);
199		}
200		pm = med3_$SUFFIX(pl, pm, pn$EXTRAPARAMS);
201	}
202	swap(a, pm);
203	pa = pb = a + 1;
204	pc = pd = a + (n - 1);
205	for (;;)
206	{
207		while (pb <= pc && (r = cmp_$SUFFIX(pb, a$CMPPARAMS)) <= 0)
208		{
209			if (r == 0)
210			{
211				swap(pa, pb);
212				pa++;
213			}
214			pb++;
215			CHECK_FOR_INTERRUPTS();
216		}
217		while (pb <= pc && (r = cmp_$SUFFIX(pc, a$CMPPARAMS)) >= 0)
218		{
219			if (r == 0)
220			{
221				swap(pc, pd);
222				pd--;
223			}
224			pc--;
225			CHECK_FOR_INTERRUPTS();
226		}
227		if (pb > pc)
228			break;
229		swap(pb, pc);
230		pb++;
231		pc--;
232	}
233	pn = a + n;
234	d1 = Min(pa - a, pb - pa);
235	vecswap(a, pb - d1, d1);
236	d1 = Min(pd - pc, pn - pd - 1);
237	vecswap(pb, pn - d1, d1);
238	d1 = pb - pa;
239	d2 = pd - pc;
240	if (d1 <= d2)
241	{
242		/* Recurse on left partition, then iterate on right partition */
243		if (d1 > 1)
244			qsort_$SUFFIX(a, d1$EXTRAPARAMS);
245		if (d2 > 1)
246		{
247			/* Iterate rather than recurse to save stack space */
248			/* qsort_$SUFFIX(pn - d2, d2$EXTRAPARAMS); */
249			a = pn - d2;
250			n = d2;
251			goto loop;
252		}
253	}
254	else
255	{
256		/* Recurse on right partition, then iterate on left partition */
257		if (d2 > 1)
258			qsort_$SUFFIX(pn - d2, d2$EXTRAPARAMS);
259		if (d1 > 1)
260		{
261			/* Iterate rather than recurse to save stack space */
262			/* qsort_$SUFFIX(a, d1$EXTRAPARAMS); */
263			n = d1;
264			goto loop;
265		}
266	}
267}
268EOM
269
270	return;
271}
272