1# You may distribute under the terms of either the GNU General Public License 2# or the Artistic License (the same terms as Perl itself) 3# 4# (C) Paul Evans, 2009-2018 -- leonerd@leonerd.org.uk 5 6package List::UtilsBy; 7 8use strict; 9use warnings; 10 11our $VERSION = '0.11'; 12 13use Exporter 'import'; 14 15our @EXPORT_OK = qw( 16 sort_by 17 nsort_by 18 rev_sort_by 19 rev_nsort_by 20 21 max_by nmax_by 22 min_by nmin_by 23 minmax_by nminmax_by 24 25 uniq_by 26 27 partition_by 28 count_by 29 30 zip_by 31 unzip_by 32 33 extract_by 34 extract_first_by 35 36 weighted_shuffle_by 37 38 bundle_by 39); 40 41=head1 NAME 42 43C<List::UtilsBy> - higher-order list utility functions 44 45=head1 SYNOPSIS 46 47 use List::UtilsBy qw( nsort_by min_by ); 48 49 use File::stat qw( stat ); 50 my @files_by_age = nsort_by { stat($_)->mtime } @files; 51 52 my $shortest_name = min_by { length } @names; 53 54=head1 DESCRIPTION 55 56This module provides a number of list utility functions, all of which take an 57initial code block to control their behaviour. They are variations on similar 58core perl or C<List::Util> functions of similar names, but which use the block 59to control their behaviour. For example, the core Perl function C<sort> takes 60a list of values and returns them, sorted into order by their string value. 61The L</sort_by> function sorts them according to the string value returned by 62the extra function, when given each value. 63 64 my @names_sorted = sort @names; 65 66 my @people_sorted = sort_by { $_->name } @people; 67 68=cut 69 70=head1 FUNCTIONS 71 72All functions added since version 0.04 unless otherwise stated, as the 73original names for earlier versions were renamed. 74 75=cut 76 77=head2 sort_by 78 79 @vals = sort_by { KEYFUNC } @vals 80 81Returns the list of values sorted according to the string values returned by 82the C<KEYFUNC> block or function. A typical use of this may be to sort objects 83according to the string value of some accessor, such as 84 85 sort_by { $_->name } @people 86 87The key function is called in scalar context, being passed each value in turn 88as both C<$_> and the only argument in the parameters, C<@_>. The values are 89then sorted according to string comparisons on the values returned. 90 91This is equivalent to 92 93 sort { $a->name cmp $b->name } @people 94 95except that it guarantees the C<name> accessor will be executed only once per 96value. 97 98One interesting use-case is to sort strings which may have numbers embedded in 99them "naturally", rather than lexically. 100 101 sort_by { s/(\d+)/sprintf "%09d", $1/eg; $_ } @strings 102 103This sorts strings by generating sort keys which zero-pad the embedded numbers 104to some level (9 digits in this case), helping to ensure the lexical sort puts 105them in the correct order. 106 107=cut 108 109sub sort_by(&@) 110{ 111 my $keygen = shift; 112 113 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_; 114 return @_[ sort { $keys[$a] cmp $keys[$b] } 0 .. $#_ ]; 115} 116 117=head2 nsort_by 118 119 @vals = nsort_by { KEYFUNC } @vals 120 121Similar to L</sort_by> but compares its key values numerically. 122 123=cut 124 125sub nsort_by(&@) 126{ 127 my $keygen = shift; 128 129 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_; 130 return @_[ sort { $keys[$a] <=> $keys[$b] } 0 .. $#_ ]; 131} 132 133=head2 rev_sort_by 134 135=head2 rev_nsort_by 136 137 @vals = rev_sort_by { KEYFUNC } @vals 138 139 @vals = rev_nsort_by { KEYFUNC } @vals 140 141I<Since version 0.06.> 142 143Similar to L</sort_by> and L</nsort_by> but returns the list in the reverse 144order. Equivalent to 145 146 @vals = reverse sort_by { KEYFUNC } @vals 147 148except that these functions are slightly more efficient because they avoid 149the final C<reverse> operation. 150 151=cut 152 153sub rev_sort_by(&@) 154{ 155 my $keygen = shift; 156 157 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_; 158 return @_[ sort { $keys[$b] cmp $keys[$a] } 0 .. $#_ ]; 159} 160 161sub rev_nsort_by(&@) 162{ 163 my $keygen = shift; 164 165 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_; 166 return @_[ sort { $keys[$b] <=> $keys[$a] } 0 .. $#_ ]; 167} 168 169=head2 max_by 170 171 $optimal = max_by { KEYFUNC } @vals 172 173 @optimal = max_by { KEYFUNC } @vals 174 175Returns the (first) value from C<@vals> that gives the numerically largest 176result from the key function. 177 178 my $tallest = max_by { $_->height } @people 179 180 use File::stat qw( stat ); 181 my $newest = max_by { stat($_)->mtime } @files; 182 183In scalar context, the first maximal value is returned. In list context, a 184list of all the maximal values is returned. This may be used to obtain 185positions other than the first, if order is significant. 186 187If called on an empty list, an empty list is returned. 188 189For symmetry with the L</nsort_by> function, this is also provided under the 190name C<nmax_by> since it behaves numerically. 191 192=cut 193 194sub max_by(&@) 195{ 196 my $code = shift; 197 198 return unless @_; 199 200 local $_; 201 202 my @maximal = $_ = shift @_; 203 my $max = $code->( $_ ); 204 205 foreach ( @_ ) { 206 my $this = $code->( $_ ); 207 if( $this > $max ) { 208 @maximal = $_; 209 $max = $this; 210 } 211 elsif( wantarray and $this == $max ) { 212 push @maximal, $_; 213 } 214 } 215 216 return wantarray ? @maximal : $maximal[0]; 217} 218 219*nmax_by = \&max_by; 220 221=head2 min_by 222 223 $optimal = min_by { KEYFUNC } @vals 224 225 @optimal = min_by { KEYFUNC } @vals 226 227Similar to L</max_by> but returns values which give the numerically smallest 228result from the key function. Also provided as C<nmin_by> 229 230=cut 231 232sub min_by(&@) 233{ 234 my $code = shift; 235 236 return unless @_; 237 238 local $_; 239 240 my @minimal = $_ = shift @_; 241 my $min = $code->( $_ ); 242 243 foreach ( @_ ) { 244 my $this = $code->( $_ ); 245 if( $this < $min ) { 246 @minimal = $_; 247 $min = $this; 248 } 249 elsif( wantarray and $this == $min ) { 250 push @minimal, $_; 251 } 252 } 253 254 return wantarray ? @minimal : $minimal[0]; 255} 256 257*nmin_by = \&min_by; 258 259=head2 minmax_by 260 261 ( $minimal, $maximal ) = minmax_by { KEYFUNC } @vals 262 263I<Since version 0.11.> 264 265Similar to calling both L</min_by> and L</max_by> with the same key function 266on the same list. This version is more efficient than calling the two other 267functions individually, as it has less work to perform overall. In the case of 268ties, only the first optimal element found in each case is returned. Also 269provided as C<nminmax_by>. 270 271=cut 272 273sub minmax_by(&@) 274{ 275 my $code = shift; 276 277 return unless @_; 278 279 my $minimal = $_ = shift @_; 280 my $min = $code->( $_ ); 281 282 return ( $minimal, $minimal ) unless @_; 283 284 my $maximal = $_ = shift @_; 285 my $max = $code->( $_ ); 286 287 if( $max < $min ) { 288 ( $maximal, $minimal ) = ( $minimal, $maximal ); 289 ( $max, $min ) = ( $min, $max ); 290 } 291 292 # Minmax algorithm is faster than naïve min + max individually because it 293 # takes pairs of values 294 while( @_ ) { 295 my $try_minimal = $_ = shift @_; 296 my $try_min = $code->( $_ ); 297 298 my $try_maximal = $try_minimal; 299 my $try_max = $try_min; 300 if( @_ ) { 301 $try_maximal = $_ = shift @_; 302 $try_max = $code->( $_ ); 303 304 if( $try_max < $try_min ) { 305 ( $try_minimal, $try_maximal ) = ( $try_maximal, $try_minimal ); 306 ( $try_min, $try_max ) = ( $try_max, $try_min ); 307 } 308 } 309 310 if( $try_min < $min ) { 311 $minimal = $try_minimal; 312 $min = $try_min; 313 } 314 if( $try_max > $max ) { 315 $maximal = $try_maximal; 316 $max = $try_max; 317 } 318 } 319 320 return ( $minimal, $maximal ); 321} 322 323*nminmax_by = \&minmax_by; 324 325=head2 uniq_by 326 327 @vals = uniq_by { KEYFUNC } @vals 328 329Returns a list of the subset of values for which the key function block 330returns unique values. The first value yielding a particular key is chosen, 331subsequent values are rejected. 332 333 my @some_fruit = uniq_by { $_->colour } @fruit; 334 335To select instead the last value per key, reverse the input list. If the order 336of the results is significant, don't forget to reverse the result as well: 337 338 my @some_fruit = reverse uniq_by { $_->colour } reverse @fruit; 339 340Because the values returned by the key function are used as hash keys, they 341ought to either be strings, or at least well-behaved as strings (such as 342numbers, or object references which overload stringification in a suitable 343manner). 344 345=cut 346 347sub uniq_by(&@) 348{ 349 my $code = shift; 350 351 my %present; 352 return grep { 353 my $key = $code->( local $_ = $_ ); 354 !$present{$key}++ 355 } @_; 356} 357 358=head2 partition_by 359 360 %parts = partition_by { KEYFUNC } @vals 361 362Returns a key/value list of ARRAY refs containing all the original values 363distributed according to the result of the key function block. Each value will 364be an ARRAY ref containing all the values which returned the string from the 365key function, in their original order. 366 367 my %balls_by_colour = partition_by { $_->colour } @balls; 368 369Because the values returned by the key function are used as hash keys, they 370ought to either be strings, or at least well-behaved as strings (such as 371numbers, or object references which overload stringification in a suitable 372manner). 373 374=cut 375 376sub partition_by(&@) 377{ 378 my $code = shift; 379 380 my %parts; 381 push @{ $parts{ $code->( local $_ = $_ ) } }, $_ for @_; 382 383 return %parts; 384} 385 386=head2 count_by 387 388 %counts = count_by { KEYFUNC } @vals 389 390I<Since version 0.07.> 391 392Returns a key/value list of integers, giving the number of times the key 393function block returned the key, for each value in the list. 394 395 my %count_of_balls = count_by { $_->colour } @balls; 396 397Because the values returned by the key function are used as hash keys, they 398ought to either be strings, or at least well-behaved as strings (such as 399numbers, or object references which overload stringification in a suitable 400manner). 401 402=cut 403 404sub count_by(&@) 405{ 406 my $code = shift; 407 408 my %counts; 409 $counts{ $code->( local $_ = $_ ) }++ for @_; 410 411 return %counts; 412} 413 414=head2 zip_by 415 416 @vals = zip_by { ITEMFUNC } \@arr0, \@arr1, \@arr2,... 417 418Returns a list of each of the values returned by the function block, when 419invoked with values from across each each of the given ARRAY references. Each 420value in the returned list will be the result of the function having been 421invoked with arguments at that position, from across each of the arrays given. 422 423 my @transposition = zip_by { [ @_ ] } @matrix; 424 425 my @names = zip_by { "$_[1], $_[0]" } \@firstnames, \@surnames; 426 427 print zip_by { "$_[0] => $_[1]\n" } [ keys %hash ], [ values %hash ]; 428 429If some of the arrays are shorter than others, the function will behave as if 430they had C<undef> in the trailing positions. The following two lines are 431equivalent: 432 433 zip_by { f(@_) } [ 1, 2, 3 ], [ "a", "b" ] 434 f( 1, "a" ), f( 2, "b" ), f( 3, undef ) 435 436The item function is called by C<map>, so if it returns a list, the entire 437list is included in the result. This can be useful for example, for generating 438a hash from two separate lists of keys and values 439 440 my %nums = zip_by { @_ } [qw( one two three )], [ 1, 2, 3 ]; 441 # %nums = ( one => 1, two => 2, three => 3 ) 442 443(A function having this behaviour is sometimes called C<zipWith>, e.g. in 444Haskell, but that name would not fit the naming scheme used by this module). 445 446=cut 447 448sub zip_by(&@) 449{ 450 my $code = shift; 451 452 @_ or return; 453 454 my $len = 0; 455 scalar @$_ > $len and $len = scalar @$_ for @_; 456 457 return map { 458 my $idx = $_; 459 $code->( map { $_[$_][$idx] } 0 .. $#_ ) 460 } 0 .. $len-1; 461} 462 463=head2 unzip_by 464 465 $arr0, $arr1, $arr2, ... = unzip_by { ITEMFUNC } @vals 466 467I<Since version 0.09.> 468 469Returns a list of ARRAY references containing the values returned by the 470function block, when invoked for each of the values given in the input list. 471Each of the returned ARRAY references will contain the values returned at that 472corresponding position by the function block. That is, the first returned 473ARRAY reference will contain all the values returned in the first position by 474the function block, the second will contain all the values from the second 475position, and so on. 476 477 my ( $firstnames, $lastnames ) = unzip_by { m/^(.*?) (.*)$/ } @names; 478 479If the function returns lists of differing lengths, the result will be padded 480with C<undef> in the missing elements. 481 482This function is an inverse of L</zip_by>, if given a corresponding inverse 483function. 484 485=cut 486 487sub unzip_by(&@) 488{ 489 my $code = shift; 490 491 my @ret; 492 foreach my $idx ( 0 .. $#_ ) { 493 my @slice = $code->( local $_ = $_[$idx] ); 494 $#slice = $#ret if @slice < @ret; 495 $ret[$_][$idx] = $slice[$_] for 0 .. $#slice; 496 } 497 498 return @ret; 499} 500 501=head2 extract_by 502 503 @vals = extract_by { SELECTFUNC } @arr 504 505I<Since version 0.05.> 506 507Removes elements from the referenced array on which the selection function 508returns true, and returns a list containing those elements. This function is 509similar to C<grep>, except that it modifies the referenced array to remove the 510selected values from it, leaving only the unselected ones. 511 512 my @red_balls = extract_by { $_->color eq "red" } @balls; 513 514 # Now there are no red balls in the @balls array 515 516This function modifies a real array, unlike most of the other functions in this 517module. Because of this, it requires a real array, not just a list. 518 519This function is implemented by invoking C<splice> on the array, not by 520constructing a new list and assigning it. One result of this is that weak 521references will not be disturbed. 522 523 extract_by { !defined $_ } @refs; 524 525will leave weak references weakened in the C<@refs> array, whereas 526 527 @refs = grep { defined $_ } @refs; 528 529will strengthen them all again. 530 531=cut 532 533sub extract_by(&\@) 534{ 535 my $code = shift; 536 my ( $arrref ) = @_; 537 538 my @ret; 539 for( my $idx = 0; ; $idx++ ) { 540 last if $idx > $#$arrref; 541 next unless $code->( local $_ = $arrref->[$idx] ); 542 543 push @ret, splice @$arrref, $idx, 1, (); 544 redo; 545 } 546 547 return @ret; 548} 549 550=head2 extract_first_by 551 552 $val = extract_first_by { SELECTFUNC } @arr 553 554I<Since version 0.10.> 555 556A hybrid between L</extract_by> and C<List::Util::first>. Removes the first 557element from the referenced array on which the selection function returns 558true, returning it. 559 560As with L</extract_by>, this function requires a real array and not just a 561list, and is also implemented using C<splice> so that weak references are 562not disturbed. 563 564If this function fails to find a matching element, it will return an empty 565list in list context. This allows a caller to distinguish the case between 566no matching element, and the first matching element being C<undef>. 567 568=cut 569 570sub extract_first_by(&\@) 571{ 572 my $code = shift; 573 my ( $arrref ) = @_; 574 575 foreach my $idx ( 0 .. $#$arrref ) { 576 next unless $code->( local $_ = $arrref->[$idx] ); 577 578 return splice @$arrref, $idx, 1, (); 579 } 580 581 return; 582} 583 584=head2 weighted_shuffle_by 585 586 @vals = weighted_shuffle_by { WEIGHTFUNC } @vals 587 588I<Since version 0.07.> 589 590Returns the list of values shuffled into a random order. The randomisation is 591not uniform, but weighted by the value returned by the C<WEIGHTFUNC>. The 592probabilty of each item being returned first will be distributed with the 593distribution of the weights, and so on recursively for the remaining items. 594 595=cut 596 597sub weighted_shuffle_by(&@) 598{ 599 my $code = shift; 600 my @vals = @_; 601 602 my @weights = map { $code->( local $_ = $_ ) } @vals; 603 604 my @ret; 605 while( @vals > 1 ) { 606 my $total = 0; $total += $_ for @weights; 607 my $select = int rand $total; 608 my $idx = 0; 609 while( $select >= $weights[$idx] ) { 610 $select -= $weights[$idx++]; 611 } 612 613 push @ret, splice @vals, $idx, 1, (); 614 splice @weights, $idx, 1, (); 615 } 616 617 push @ret, @vals if @vals; 618 619 return @ret; 620} 621 622=head2 bundle_by 623 624 @vals = bundle_by { BLOCKFUNC } $number, @vals 625 626I<Since version 0.07.> 627 628Similar to a regular C<map> functional, returns a list of the values returned 629by C<BLOCKFUNC>. Values from the input list are given to the block function in 630bundles of C<$number>. 631 632If given a list of values whose length does not evenly divide by C<$number>, 633the final call will be passed fewer elements than the others. 634 635=cut 636 637sub bundle_by(&@) 638{ 639 my $code = shift; 640 my $n = shift; 641 642 my @ret; 643 for( my ( $pos, $next ) = ( 0, $n ); $pos < @_; $pos = $next, $next += $n ) { 644 $next = @_ if $next > @_; 645 push @ret, $code->( @_[$pos .. $next-1] ); 646 } 647 return @ret; 648} 649 650=head1 TODO 651 652=over 4 653 654=item * XS implementations 655 656These functions are currently all written in pure perl. Some at least, may 657benefit from having XS implementations to speed up their logic. 658 659=item * Merge into L<List::Util> or L<List::MoreUtils> 660 661This module shouldn't really exist. The functions should instead be part of 662one of the existing modules that already contain many list utility functions. 663Having Yet Another List Utilty Module just worsens the problem. 664 665I have attempted to contact the authors of both of the above modules, to no 666avail; therefore I decided it best to write and release this code here anyway 667so that it is at least on CPAN. Once there, we can then see how best to merge 668it into an existing module. 669 670I<Updated 2015/07/16>: As I am now the maintainer of L<List::Util>, some 671amount of merging/copying should be possible. However, given the latter's key 672position in the core F<perl> distribution and head of the "CPAN River" I am 673keen not to do this wholesale, but a selected pick of what seems best, by a 674popular consensus. 675 676=item * C<head> and C<tail>-like functions 677 678Consider perhaps 679 680 head_before { COND } LIST # excludes terminating element 681 head_upto { COND } LIST # includes terminating element 682 683 tail_since { COND } LIST # includes initiating element 684 tail_after { COND } LIST # excludes initiating element 685 686(See also L<https://rt.cpan.org/Ticket/Display.html?id=105907>). 687 688=back 689 690=head1 AUTHOR 691 692Paul Evans <leonerd@leonerd.org.uk> 693 694=cut 695 6960x55AA; 697