1package Sort::Key::Natural; 2 3our $VERSION = '0.04'; 4 5use strict; 6use warnings; 7 8require Exporter; 9 10our @ISA = qw( Exporter ); 11our @EXPORT_OK = qw( natkeysort 12 natkeysort_inplace 13 rnatkeysort 14 rnatkeysort_inplace 15 mkkey_natural 16 natsort 17 rnatsort 18 natsort_inplace 19 rnatsort_inplace 20 21 natwfkeysort 22 natwfkeysort_inplace 23 rnatwfkeysort 24 rnatwfkeysort_inplace 25 mkkey_natural_with_floats 26 natwfsort 27 rnatwfsort 28 natwfsort_inplace 29 rnatwfsort_inplace ); 30 31 32require locale; 33 34sub mkkey_natural { 35 my $nat = @_ ? shift : $_; 36 my @parts = do { 37 if ((caller 0)[8] & $locale::hint_bits) { 38 use locale; 39 $nat =~ /\d+|\p{IsAlpha}+/g; 40 } 41 else { 42 $nat =~ /\d+|\p{IsAlpha}+/g; 43 } 44 }; 45 for (@parts) { 46 if (/^\d/) { 47 s/^0+//; 48 my $len = length; 49 my $nines = int ($len / 9); 50 my $rest = $len - 9 * $nines; 51 $_ = ('9' x $nines) . $rest . $_; 52 } 53 } 54 return join("\0", @parts); 55} 56 57use Sort::Key::Register natural => \&mkkey_natural, 'string'; 58use Sort::Key::Register nat => \&mkkey_natural, 'string'; 59 60use Sort::Key::Maker natkeysort => 'nat'; 61use Sort::Key::Maker rnatkeysort => '-nat'; 62use Sort::Key::Maker natsort => \&mkkey_natural, 'str'; 63use Sort::Key::Maker rnatsort => \&mkkey_natural, '-str'; 64 65sub mkkey_natural_with_floats { 66 my $nat = @_ ? shift : $_; 67 my @parts = do { 68 if ((caller 0)[8] & $locale::hint_bits) { 69 use locale; 70 $nat =~ /[+\-]?\d+(?:\.\d*)?|\p{IsAlpha}+/g; 71 } 72 else { 73 $nat =~ /[+\-]?\d+(?:\.\d*)?|\p{IsAlpha}+/g; 74 } 75 }; 76 for (@parts) { 77 if (my ($sign, $number, $dec) = /^([+-]?)(\d+)(?:\.(\d*))?$/) { 78 $number =~ s/^0+//; 79 $dec = '' unless defined $dec; 80 $dec =~ s/0+$//; 81 my $len = length $number; 82 my $nines = int ($len / 9); 83 my $rest = $len - 9 * $nines; 84 $_ = ('9' x $nines) . $rest . $number . $dec; 85 if ($sign eq '-' and $_ ne '0') { 86 tr/0123456789/9876543210/; 87 $_ = "-$_"; 88 } 89 } 90 } 91 return join("\0", @parts); 92} 93 94use Sort::Key::Register natural_with_floats => \&mkkey_natural_with_floats, 'string'; 95use Sort::Key::Register natwf => \&mkkey_natural_with_floats, 'string'; 96 97use Sort::Key::Maker natwfkeysort => 'natwf'; 98use Sort::Key::Maker rnatwfkeysort => '-natwf'; 99use Sort::Key::Maker natwfsort => \&mkkey_natural_with_floats, 'str'; 100use Sort::Key::Maker rnatwfsort => \&mkkey_natural_with_floats, '-str'; 101 102 1031; 104 105=head1 NAME 106 107Sort::Key::Natural - fast natural sorting 108 109=head1 SYNOPSIS 110 111 use Sort::Key::Natural qw(natsort); 112 113 my @data = qw(foo1 foo23 foo6 bar12 bar1 114 foo bar2 bar-45 foomatic b-a-r-45); 115 116 my @sorted = natsort @data; 117 118 print "@sorted\n"; 119 # prints: 120 # b-a-r-45 bar1 bar2 bar12 bar-45 foo foo1 foo6 foo23 foomatic 121 122 use Sort::Key::Natural qw(natkeysort); 123 124 my @objects = (...); 125 my @sorted = natkeysort { $_->get_id } @objects; 126 127 128=head1 DESCRIPTION 129 130This module extends the L<Sort::Key> family of modules to support 131natural sorting. 132 133Under natural sorting, strings are split at word and number 134boundaries, and the resulting substrings are compared as follows: 135 136=over 4 137 138=item * 139 140numeric substrings are compared numerically 141 142=item * 143 144alphabetic substrings are compared lexically 145 146=item * 147 148numeric substrings come always before alphabetic substrings 149 150=back 151 152Spaces, symbols and non-printable characters are only considered for 153splitting the string into its parts but not for sorting. For instance 154C<foo-bar-42> is broken in three substrings C<foo>, C<bar> and C<42> 155and after that the dashes are ignored. 156 157Note, that the sorting is case sensitive. To do a case insensitive 158sort you have to convert the keys explicitly: 159 160 my @sorted = natkeysort { lc $_ } @data 161 162Also, once this module is loaded, the new type C<natural> (or C<nat>) will 163be available from L<Sort::Key::Maker>. For instance: 164 165 use Sort::Key::Natural; 166 use Sort::Key::Maker i_rnat_keysort => qw(integer -natural); 167 168creates a multi-key sorter C<i_rnat_keysort> accepting two keys, the 169first to be compared as an integer and the second in natural 170descending order. 171 172There is also an alternative set of natural sorting functions that 173recognize floating point numbers. They use the key type C<natwf> 174(abbreviation of C<natural_with_floats>). 175 176=head2 FUNCTIONS 177 178the functions that can be imported from this module are: 179 180=over 4 181 182=item natsort @data 183 184returns the elements of C<@data> sorted in natural order. 185 186=item rnatsort @data 187 188returns the elements of C<@data> sorted in natural descending order. 189 190=item natkeysort { CALC_KEY($_) } @data 191 192returns the elements on C<@array> naturally sorted by the keys 193resulting from applying them C<CALC_KEY>. 194 195=item rnatkeysort { CALC_KEY($_) } @data 196 197is similar to C<natkeysort> but sorts the elements in descending 198order. 199 200=item natsort_inplace @data 201 202=item rnatsort_inplace @data 203 204=item natkeysort_inplace { CALC_KEY($_) } @data 205 206=item rnatkeysort_inplace { CALC_KEY($_) } @data 207 208these functions are similar respectively to C<natsort>, C<rnatsort>, 209C<natsortkey> and C<rnatsortkey>, but they sort the array C<@data> in 210place. 211 212=item $key = mkkey_natural $string 213 214given C<$string>, returns a key that can be compared lexicographically 215to another key obtained in the same manner, results in the same order 216as comparing the former strings as in the natural order. 217 218If the argument C<$key> is not provided it defaults to C<$_>. 219 220=item natwfsort @data 221 222=item rnatwfsort @data 223 224=item natwfkeysort { CALC_KEY($_) } @data 225 226=item rnatwfkeysort { CALC_KEY($_) } @data 227 228=item natwfsort_inplace @data 229 230=item rnatwfsort_inplace @data 231 232=item natwfkeysort_inplace { CALC_KEY($_) } @data 233 234=item rnatwfkeysort_inplace { CALC_KEY($_) } @data 235 236=item mkkey_natural_with_floats $key 237 238this ugly named set of functions perform in the same way as its 239s/natwf/nat/ counterpart with the difference that they honor floating 240point numbers embedded inside the strings. 241 242In this context a floating point number is a string matching the 243regular expression C</[+\-]?\d+(\.\d*)?/>. Note that numbers with an 244exponent part (i.e. C<1.12E-12>) are not recognized as such. 245 246Note also that numbers without an integer part (i.e. C<.2> or C<-.12>) 247are not supported either. 248 249=back 250 251=head1 SEE ALSO 252 253L<Sort::Key>, L<Sort::Key::Maker>. 254 255Other module providing similar functionality is L<Sort::Naturally>. 256 257=head1 COPYRIGHT AND LICENSE 258 259Copyright (C) 2006, 2012, 2014 by Salvador FandiE<ntilde>o, 260E<lt>sfandino@yahoo.comE<gt>. 261 262This library is free software; you can redistribute it and/or modify 263it under the same terms as Perl itself, either Perl version 5.8.4 or, 264at your option, any later version of Perl 5 you may have available. 265 266=cut 267