xref: /openbsd/gnu/usr.bin/perl/lib/sort.pm (revision 7b36286a)
1package sort;
2
3our $VERSION = '1.02';
4
5# Currently the hints for pp_sort are stored in the global variable
6# $sort::hints. An improvement would be to store them in $^H{SORT} and have
7# this information available somewhere in the listop OP_SORT, to allow lexical
8# scoping of this pragma. -- rgs 2002-04-30
9
10our $hints	       = 0;
11
12$sort::quicksort_bit   = 0x00000001;
13$sort::mergesort_bit   = 0x00000002;
14$sort::sort_bits       = 0x000000FF; # allow 256 different ones
15$sort::stable_bit      = 0x00000100;
16
17use strict;
18
19sub import {
20    shift;
21    if (@_ == 0) {
22	require Carp;
23	Carp::croak("sort pragma requires arguments");
24    }
25    local $_;
26    no warnings 'uninitialized';	# bitops would warn
27    while ($_ = shift(@_)) {
28	if (/^_q(?:uick)?sort$/) {
29	    $hints &= ~$sort::sort_bits;
30	    $hints |=  $sort::quicksort_bit;
31	} elsif ($_ eq '_mergesort') {
32	    $hints &= ~$sort::sort_bits;
33	    $hints |=  $sort::mergesort_bit;
34	} elsif ($_ eq 'stable') {
35	    $hints |=  $sort::stable_bit;
36	} elsif ($_ eq 'defaults') {
37	    $hints =   0;
38	} else {
39	    require Carp;
40	    Carp::croak("sort: unknown subpragma '$_'");
41	}
42    }
43}
44
45sub unimport {
46    shift;
47    if (@_ == 0) {
48	require Carp;
49	Carp::croak("sort pragma requires arguments");
50    }
51    local $_;
52    no warnings 'uninitialized';	# bitops would warn
53    while ($_ = shift(@_)) {
54	if (/^_q(?:uick)?sort$/) {
55	    $hints &= ~$sort::sort_bits;
56	} elsif ($_ eq '_mergesort') {
57	    $hints &= ~$sort::sort_bits;
58	} elsif ($_ eq 'stable') {
59	    $hints &= ~$sort::stable_bit;
60	} else {
61	    require Carp;
62	    Carp::croak("sort: unknown subpragma '$_'");
63	}
64    }
65}
66
67sub current {
68    my @sort;
69    if ($hints) {
70	push @sort, 'quicksort' if $hints & $sort::quicksort_bit;
71	push @sort, 'mergesort' if $hints & $sort::mergesort_bit;
72	push @sort, 'stable'    if $hints & $sort::stable_bit;
73    }
74    push @sort, 'mergesort' unless @sort;
75    join(' ', @sort);
76}
77
781;
79__END__
80
81=head1 NAME
82
83sort - perl pragma to control sort() behaviour
84
85=head1 SYNOPSIS
86
87    use sort 'stable';		# guarantee stability
88    use sort '_quicksort';	# use a quicksort algorithm
89    use sort '_mergesort';	# use a mergesort algorithm
90    use sort 'defaults';	# revert to default behavior
91    no  sort 'stable';		# stability not important
92
93    use sort '_qsort';		# alias for quicksort
94
95    my $current = sort::current();	# identify prevailing algorithm
96
97=head1 DESCRIPTION
98
99With the C<sort> pragma you can control the behaviour of the builtin
100C<sort()> function.
101
102In Perl versions 5.6 and earlier the quicksort algorithm was used to
103implement C<sort()>, but in Perl 5.8 a mergesort algorithm was also made
104available, mainly to guarantee worst case O(N log N) behaviour:
105the worst case of quicksort is O(N**2).  In Perl 5.8 and later,
106quicksort defends against quadratic behaviour by shuffling large
107arrays before sorting.
108
109A stable sort means that for records that compare equal, the original
110input ordering is preserved.  Mergesort is stable, quicksort is not.
111Stability will matter only if elements that compare equal can be
112distinguished in some other way.  That means that simple numerical
113and lexical sorts do not profit from stability, since equal elements
114are indistinguishable.  However, with a comparison such as
115
116   { substr($a, 0, 3) cmp substr($b, 0, 3) }
117
118stability might matter because elements that compare equal on the
119first 3 characters may be distinguished based on subsequent characters.
120In Perl 5.8 and later, quicksort can be stabilized, but doing so will
121add overhead, so it should only be done if it matters.
122
123The best algorithm depends on many things.  On average, mergesort
124does fewer comparisons than quicksort, so it may be better when
125complicated comparison routines are used.  Mergesort also takes
126advantage of pre-existing order, so it would be favored for using
127C<sort()> to merge several sorted arrays.  On the other hand, quicksort
128is often faster for small arrays, and on arrays of a few distinct
129values, repeated many times.  You can force the
130choice of algorithm with this pragma, but this feels heavy-handed,
131so the subpragmas beginning with a C<_> may not persist beyond Perl 5.8.
132The default algorithm is mergesort, which will be stable even if
133you do not explicitly demand it.
134But the stability of the default sort is a side-effect that could
135change in later versions.  If stability is important, be sure to
136say so with a
137
138  use sort 'stable';
139
140The C<no sort> pragma doesn't
141I<forbid> what follows, it just leaves the choice open.  Thus, after
142
143  no sort qw(_mergesort stable);
144
145a mergesort, which happens to be stable, will be employed anyway.
146Note that
147
148  no sort "_quicksort";
149  no sort "_mergesort";
150
151have exactly the same effect, leaving the choice of sort algorithm open.
152
153=head1 CAVEATS
154
155This pragma is not lexically scoped: its effect is global to the program
156it appears in.  That means the following will probably not do what you
157expect, because I<both> pragmas take effect at compile time, before
158I<either> C<sort()> happens.
159
160  { use sort "_quicksort";
161    print sort::current . "\n";
162    @a = sort @b;
163  }
164  { use sort "stable";
165    print sort::current . "\n";
166    @c = sort @d;
167  }
168  # prints:
169  # quicksort stable
170  # quicksort stable
171
172You can achieve the effect you probably wanted by using C<eval()>
173to defer the pragmas until run time.  Use the quoted argument
174form of C<eval()>, I<not> the BLOCK form, as in
175
176  eval { use sort "_quicksort" }; # WRONG
177
178or the effect will still be at compile time.
179Reset to default options before selecting other subpragmas
180(in case somebody carelessly left them on) and after sorting,
181as a courtesy to others.
182
183  { eval 'use sort qw(defaults _quicksort)'; # force quicksort
184    eval 'no sort "stable"';      # stability not wanted
185    print sort::current . "\n";
186    @a = sort @b;
187    eval 'use sort "defaults"';   # clean up, for others
188  }
189  { eval 'use sort qw(defaults stable)';     # force stability
190    print sort::current . "\n";
191    @c = sort @d;
192    eval 'use sort "defaults"';   # clean up, for others
193  }
194  # prints:
195  # quicksort
196  # stable
197
198Scoping for this pragma may change in future versions.
199
200=cut
201
202