1#!/usr/bin/perl -w
2
3#
4# gen_qsort_tuple.pl
5#
6# This script generates specialized versions of the quicksort algorithm for
7# tuple sorting.  The quicksort code is derived from the NetBSD code.  The
8# code generated by this script runs significantly faster than vanilla qsort
9# when used to sort tuples.  This speedup comes from a number of places.
10# The major effects are (1) inlining simple tuple comparators is much faster
11# than jumping through a function pointer and (2) swap and vecswap operations
12# specialized to the particular data type of interest (in this case, SortTuple)
13# are faster than the generic routines.
14#
15#	Modifications from vanilla NetBSD source:
16#	  Add do ... while() macro fix
17#	  Remove __inline, _DIAGASSERTs, __P
18#	  Remove ill-considered "swap_cnt" switch to insertion sort,
19#	  in favor of a simple check for presorted input.
20#	  Take care to recurse on the smaller partition, to bound stack usage.
21#
22#     Instead of sorting arbitrary objects, we're always sorting SortTuples.
23#     Add CHECK_FOR_INTERRUPTS().
24#
25# CAUTION: if you change this file, see also qsort.c and qsort_arg.c
26#
27
28use strict;
29
30my $SUFFIX;
31my $EXTRAARGS;
32my $EXTRAPARAMS;
33my $CMPPARAMS;
34
35emit_qsort_boilerplate();
36
37$SUFFIX      = 'tuple';
38$EXTRAARGS   = ', SortTupleComparator cmp_tuple, Tuplesortstate *state';
39$EXTRAPARAMS = ', cmp_tuple, state';
40$CMPPARAMS   = ', state';
41emit_qsort_implementation();
42
43$SUFFIX      = 'ssup';
44$EXTRAARGS   = ', SortSupport ssup';
45$EXTRAPARAMS = ', ssup';
46$CMPPARAMS   = ', ssup';
47print <<'EOM';
48
49#define cmp_ssup(a, b, ssup) \
50	ApplySortComparator((a)->datum1, (a)->isnull1, \
51						(b)->datum1, (b)->isnull1, ssup)
52
53EOM
54emit_qsort_implementation();
55
56sub emit_qsort_boilerplate
57{
58	print <<'EOM';
59/*
60 * autogenerated by src/backend/utils/sort/gen_qsort_tuple.pl, do not edit!
61 *
62 * This file is included by tuplesort.c, rather than compiled separately.
63 */
64
65/*	$NetBSD: qsort.c,v 1.13 2003/08/07 16:43:42 agc Exp $	*/
66
67/*-
68 * Copyright (c) 1992, 1993
69 *	The Regents of the University of California.  All rights reserved.
70 *
71 * Redistribution and use in source and binary forms, with or without
72 * modification, are permitted provided that the following conditions
73 * are met:
74 * 1. Redistributions of source code must retain the above copyright
75 *	  notice, this list of conditions and the following disclaimer.
76 * 2. Redistributions in binary form must reproduce the above copyright
77 *	  notice, this list of conditions and the following disclaimer in the
78 *	  documentation and/or other materials provided with the distribution.
79 * 3. Neither the name of the University nor the names of its contributors
80 *	  may be used to endorse or promote products derived from this software
81 *	  without specific prior written permission.
82 *
83 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
84 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
85 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
86 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93 * SUCH DAMAGE.
94 */
95
96/*
97 * Qsort routine based on J. L. Bentley and M. D. McIlroy,
98 * "Engineering a sort function",
99 * Software--Practice and Experience 23 (1993) 1249-1265.
100 *
101 * We have modified their original by adding a check for already-sorted input,
102 * which seems to be a win per discussions on pgsql-hackers around 2006-03-21.
103 *
104 * Also, we recurse on the smaller partition and iterate on the larger one,
105 * which ensures we cannot recurse more than log(N) levels (since the
106 * partition recursed to is surely no more than half of the input).  Bentley
107 * and McIlroy explicitly rejected doing this on the grounds that it's "not
108 * worth the effort", but we have seen crashes in the field due to stack
109 * overrun, so that judgment seems wrong.
110 */
111
112static void
113swapfunc(SortTuple *a, SortTuple *b, size_t n)
114{
115	do
116	{
117		SortTuple 	t = *a;
118		*a++ = *b;
119		*b++ = t;
120	} while (--n > 0);
121}
122
123#define swap(a, b)						\
124	do { 								\
125		SortTuple t = *(a);				\
126		*(a) = *(b);					\
127		*(b) = t;						\
128	} while (0)
129
130#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n)
131
132EOM
133}
134
135sub emit_qsort_implementation
136{
137	print <<EOM;
138static SortTuple *
139med3_$SUFFIX(SortTuple *a, SortTuple *b, SortTuple *c$EXTRAARGS)
140{
141	return cmp_$SUFFIX(a, b$CMPPARAMS) < 0 ?
142		(cmp_$SUFFIX(b, c$CMPPARAMS) < 0 ? b :
143			(cmp_$SUFFIX(a, c$CMPPARAMS) < 0 ? c : a))
144		: (cmp_$SUFFIX(b, c$CMPPARAMS) > 0 ? b :
145			(cmp_$SUFFIX(a, c$CMPPARAMS) < 0 ? a : c));
146}
147
148static void
149qsort_$SUFFIX(SortTuple *a, size_t n$EXTRAARGS)
150{
151	SortTuple  *pa,
152			   *pb,
153			   *pc,
154			   *pd,
155			   *pl,
156			   *pm,
157			   *pn;
158	size_t		d1,
159				d2;
160	int			r,
161				presorted;
162
163loop:
164	CHECK_FOR_INTERRUPTS();
165	if (n < 7)
166	{
167		for (pm = a + 1; pm < a + n; pm++)
168			for (pl = pm; pl > a && cmp_$SUFFIX(pl - 1, pl$CMPPARAMS) > 0; pl--)
169				swap(pl, pl - 1);
170		return;
171	}
172	presorted = 1;
173	for (pm = a + 1; pm < a + n; pm++)
174	{
175		CHECK_FOR_INTERRUPTS();
176		if (cmp_$SUFFIX(pm - 1, pm$CMPPARAMS) > 0)
177		{
178			presorted = 0;
179			break;
180		}
181	}
182	if (presorted)
183		return;
184	pm = a + (n / 2);
185	if (n > 7)
186	{
187		pl = a;
188		pn = a + (n - 1);
189		if (n > 40)
190		{
191			size_t		d = (n / 8);
192
193			pl = med3_$SUFFIX(pl, pl + d, pl + 2 * d$EXTRAPARAMS);
194			pm = med3_$SUFFIX(pm - d, pm, pm + d$EXTRAPARAMS);
195			pn = med3_$SUFFIX(pn - 2 * d, pn - d, pn$EXTRAPARAMS);
196		}
197		pm = med3_$SUFFIX(pl, pm, pn$EXTRAPARAMS);
198	}
199	swap(a, pm);
200	pa = pb = a + 1;
201	pc = pd = a + (n - 1);
202	for (;;)
203	{
204		while (pb <= pc && (r = cmp_$SUFFIX(pb, a$CMPPARAMS)) <= 0)
205		{
206			if (r == 0)
207			{
208				swap(pa, pb);
209				pa++;
210			}
211			pb++;
212			CHECK_FOR_INTERRUPTS();
213		}
214		while (pb <= pc && (r = cmp_$SUFFIX(pc, a$CMPPARAMS)) >= 0)
215		{
216			if (r == 0)
217			{
218				swap(pc, pd);
219				pd--;
220			}
221			pc--;
222			CHECK_FOR_INTERRUPTS();
223		}
224		if (pb > pc)
225			break;
226		swap(pb, pc);
227		pb++;
228		pc--;
229	}
230	pn = a + n;
231	d1 = Min(pa - a, pb - pa);
232	vecswap(a, pb - d1, d1);
233	d1 = Min(pd - pc, pn - pd - 1);
234	vecswap(pb, pn - d1, d1);
235	d1 = pb - pa;
236	d2 = pd - pc;
237	if (d1 <= d2)
238	{
239		/* Recurse on left partition, then iterate on right partition */
240		if (d1 > 1)
241			qsort_$SUFFIX(a, d1$EXTRAPARAMS);
242		if (d2 > 1)
243		{
244			/* Iterate rather than recurse to save stack space */
245			/* qsort_$SUFFIX(pn - d2, d2$EXTRAPARAMS); */
246			a = pn - d2;
247			n = d2;
248			goto loop;
249		}
250	}
251	else
252	{
253		/* Recurse on right partition, then iterate on left partition */
254		if (d2 > 1)
255			qsort_$SUFFIX(pn - d2, d2$EXTRAPARAMS);
256		if (d1 > 1)
257		{
258			/* Iterate rather than recurse to save stack space */
259			/* qsort_$SUFFIX(a, d1$EXTRAPARAMS); */
260			n = d1;
261			goto loop;
262		}
263	}
264}
265EOM
266}
267