1#  You may distribute under the terms of either the GNU General Public License
2#  or the Artistic License (the same terms as Perl itself)
3#
4#  (C) Paul Evans, 2009-2018 -- leonerd@leonerd.org.uk
5
6package List::UtilsBy;
7
8use strict;
9use warnings;
10
11our $VERSION = '0.11';
12
13use Exporter 'import';
14
15our @EXPORT_OK = qw(
16   sort_by
17   nsort_by
18   rev_sort_by
19   rev_nsort_by
20
21   max_by nmax_by
22   min_by nmin_by
23   minmax_by nminmax_by
24
25   uniq_by
26
27   partition_by
28   count_by
29
30   zip_by
31   unzip_by
32
33   extract_by
34   extract_first_by
35
36   weighted_shuffle_by
37
38   bundle_by
39);
40
41=head1 NAME
42
43C<List::UtilsBy> - higher-order list utility functions
44
45=head1 SYNOPSIS
46
47   use List::UtilsBy qw( nsort_by min_by );
48
49   use File::stat qw( stat );
50   my @files_by_age = nsort_by { stat($_)->mtime } @files;
51
52   my $shortest_name = min_by { length } @names;
53
54=head1 DESCRIPTION
55
56This module provides a number of list utility functions, all of which take an
57initial code block to control their behaviour. They are variations on similar
58core perl or C<List::Util> functions of similar names, but which use the block
59to control their behaviour. For example, the core Perl function C<sort> takes
60a list of values and returns them, sorted into order by their string value.
61The L</sort_by> function sorts them according to the string value returned by
62the extra function, when given each value.
63
64   my @names_sorted = sort @names;
65
66   my @people_sorted = sort_by { $_->name } @people;
67
68=cut
69
70=head1 FUNCTIONS
71
72All functions added since version 0.04 unless otherwise stated, as the
73original names for earlier versions were renamed.
74
75=cut
76
77=head2 sort_by
78
79   @vals = sort_by { KEYFUNC } @vals
80
81Returns the list of values sorted according to the string values returned by
82the C<KEYFUNC> block or function. A typical use of this may be to sort objects
83according to the string value of some accessor, such as
84
85   sort_by { $_->name } @people
86
87The key function is called in scalar context, being passed each value in turn
88as both C<$_> and the only argument in the parameters, C<@_>. The values are
89then sorted according to string comparisons on the values returned.
90
91This is equivalent to
92
93   sort { $a->name cmp $b->name } @people
94
95except that it guarantees the C<name> accessor will be executed only once per
96value.
97
98One interesting use-case is to sort strings which may have numbers embedded in
99them "naturally", rather than lexically.
100
101   sort_by { s/(\d+)/sprintf "%09d", $1/eg; $_ } @strings
102
103This sorts strings by generating sort keys which zero-pad the embedded numbers
104to some level (9 digits in this case), helping to ensure the lexical sort puts
105them in the correct order.
106
107=cut
108
109sub sort_by(&@)
110{
111   my $keygen = shift;
112
113   my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
114   return @_[ sort { $keys[$a] cmp $keys[$b] } 0 .. $#_ ];
115}
116
117=head2 nsort_by
118
119   @vals = nsort_by { KEYFUNC } @vals
120
121Similar to L</sort_by> but compares its key values numerically.
122
123=cut
124
125sub nsort_by(&@)
126{
127   my $keygen = shift;
128
129   my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
130   return @_[ sort { $keys[$a] <=> $keys[$b] } 0 .. $#_ ];
131}
132
133=head2 rev_sort_by
134
135=head2 rev_nsort_by
136
137   @vals = rev_sort_by { KEYFUNC } @vals
138
139   @vals = rev_nsort_by { KEYFUNC } @vals
140
141I<Since version 0.06.>
142
143Similar to L</sort_by> and L</nsort_by> but returns the list in the reverse
144order. Equivalent to
145
146   @vals = reverse sort_by { KEYFUNC } @vals
147
148except that these functions are slightly more efficient because they avoid
149the final C<reverse> operation.
150
151=cut
152
153sub rev_sort_by(&@)
154{
155   my $keygen = shift;
156
157   my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
158   return @_[ sort { $keys[$b] cmp $keys[$a] } 0 .. $#_ ];
159}
160
161sub rev_nsort_by(&@)
162{
163   my $keygen = shift;
164
165   my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
166   return @_[ sort { $keys[$b] <=> $keys[$a] } 0 .. $#_ ];
167}
168
169=head2 max_by
170
171   $optimal = max_by { KEYFUNC } @vals
172
173   @optimal = max_by { KEYFUNC } @vals
174
175Returns the (first) value from C<@vals> that gives the numerically largest
176result from the key function.
177
178   my $tallest = max_by { $_->height } @people
179
180   use File::stat qw( stat );
181   my $newest = max_by { stat($_)->mtime } @files;
182
183In scalar context, the first maximal value is returned. In list context, a
184list of all the maximal values is returned. This may be used to obtain
185positions other than the first, if order is significant.
186
187If called on an empty list, an empty list is returned.
188
189For symmetry with the L</nsort_by> function, this is also provided under the
190name C<nmax_by> since it behaves numerically.
191
192=cut
193
194sub max_by(&@)
195{
196   my $code = shift;
197
198   return unless @_;
199
200   local $_;
201
202   my @maximal = $_ = shift @_;
203   my $max     = $code->( $_ );
204
205   foreach ( @_ ) {
206      my $this = $code->( $_ );
207      if( $this > $max ) {
208         @maximal = $_;
209         $max     = $this;
210      }
211      elsif( wantarray and $this == $max ) {
212         push @maximal, $_;
213      }
214   }
215
216   return wantarray ? @maximal : $maximal[0];
217}
218
219*nmax_by = \&max_by;
220
221=head2 min_by
222
223   $optimal = min_by { KEYFUNC } @vals
224
225   @optimal = min_by { KEYFUNC } @vals
226
227Similar to L</max_by> but returns values which give the numerically smallest
228result from the key function. Also provided as C<nmin_by>
229
230=cut
231
232sub min_by(&@)
233{
234   my $code = shift;
235
236   return unless @_;
237
238   local $_;
239
240   my @minimal = $_ = shift @_;
241   my $min     = $code->( $_ );
242
243   foreach ( @_ ) {
244      my $this = $code->( $_ );
245      if( $this < $min ) {
246         @minimal = $_;
247         $min     = $this;
248      }
249      elsif( wantarray and $this == $min ) {
250         push @minimal, $_;
251      }
252   }
253
254   return wantarray ? @minimal : $minimal[0];
255}
256
257*nmin_by = \&min_by;
258
259=head2 minmax_by
260
261   ( $minimal, $maximal ) = minmax_by { KEYFUNC } @vals
262
263I<Since version 0.11.>
264
265Similar to calling both L</min_by> and L</max_by> with the same key function
266on the same list. This version is more efficient than calling the two other
267functions individually, as it has less work to perform overall. In the case of
268ties, only the first optimal element found in each case is returned. Also
269provided as C<nminmax_by>.
270
271=cut
272
273sub minmax_by(&@)
274{
275   my $code = shift;
276
277   return unless @_;
278
279   my $minimal = $_ = shift @_;
280   my $min     = $code->( $_ );
281
282   return ( $minimal, $minimal ) unless @_;
283
284   my $maximal = $_ = shift @_;
285   my $max     = $code->( $_ );
286
287   if( $max < $min ) {
288      ( $maximal, $minimal ) = ( $minimal, $maximal );
289      ( $max,     $min     ) = ( $min,     $max );
290   }
291
292   # Minmax algorithm is faster than naïve min + max individually because it
293   # takes pairs of values
294   while( @_ ) {
295      my $try_minimal = $_ = shift @_;
296      my $try_min     = $code->( $_ );
297
298      my $try_maximal = $try_minimal;
299      my $try_max     = $try_min;
300      if( @_ ) {
301         $try_maximal = $_ = shift @_;
302         $try_max     = $code->( $_ );
303
304         if( $try_max < $try_min ) {
305            ( $try_minimal, $try_maximal ) = ( $try_maximal, $try_minimal );
306            ( $try_min,     $try_max     ) = ( $try_max,     $try_min );
307         }
308      }
309
310      if( $try_min < $min ) {
311         $minimal = $try_minimal;
312         $min     = $try_min;
313      }
314      if( $try_max > $max ) {
315         $maximal = $try_maximal;
316         $max     = $try_max;
317      }
318   }
319
320   return ( $minimal, $maximal );
321}
322
323*nminmax_by = \&minmax_by;
324
325=head2 uniq_by
326
327   @vals = uniq_by { KEYFUNC } @vals
328
329Returns a list of the subset of values for which the key function block
330returns unique values. The first value yielding a particular key is chosen,
331subsequent values are rejected.
332
333   my @some_fruit = uniq_by { $_->colour } @fruit;
334
335To select instead the last value per key, reverse the input list. If the order
336of the results is significant, don't forget to reverse the result as well:
337
338   my @some_fruit = reverse uniq_by { $_->colour } reverse @fruit;
339
340Because the values returned by the key function are used as hash keys, they
341ought to either be strings, or at least well-behaved as strings (such as
342numbers, or object references which overload stringification in a suitable
343manner).
344
345=cut
346
347sub uniq_by(&@)
348{
349   my $code = shift;
350
351   my %present;
352   return grep {
353      my $key = $code->( local $_ = $_ );
354      !$present{$key}++
355   } @_;
356}
357
358=head2 partition_by
359
360   %parts = partition_by { KEYFUNC } @vals
361
362Returns a key/value list of ARRAY refs containing all the original values
363distributed according to the result of the key function block. Each value will
364be an ARRAY ref containing all the values which returned the string from the
365key function, in their original order.
366
367   my %balls_by_colour = partition_by { $_->colour } @balls;
368
369Because the values returned by the key function are used as hash keys, they
370ought to either be strings, or at least well-behaved as strings (such as
371numbers, or object references which overload stringification in a suitable
372manner).
373
374=cut
375
376sub partition_by(&@)
377{
378   my $code = shift;
379
380   my %parts;
381   push @{ $parts{ $code->( local $_ = $_ ) } }, $_ for @_;
382
383   return %parts;
384}
385
386=head2 count_by
387
388   %counts = count_by { KEYFUNC } @vals
389
390I<Since version 0.07.>
391
392Returns a key/value list of integers, giving the number of times the key
393function block returned the key, for each value in the list.
394
395   my %count_of_balls = count_by { $_->colour } @balls;
396
397Because the values returned by the key function are used as hash keys, they
398ought to either be strings, or at least well-behaved as strings (such as
399numbers, or object references which overload stringification in a suitable
400manner).
401
402=cut
403
404sub count_by(&@)
405{
406   my $code = shift;
407
408   my %counts;
409   $counts{ $code->( local $_ = $_ ) }++ for @_;
410
411   return %counts;
412}
413
414=head2 zip_by
415
416   @vals = zip_by { ITEMFUNC } \@arr0, \@arr1, \@arr2,...
417
418Returns a list of each of the values returned by the function block, when
419invoked with values from across each each of the given ARRAY references. Each
420value in the returned list will be the result of the function having been
421invoked with arguments at that position, from across each of the arrays given.
422
423   my @transposition = zip_by { [ @_ ] } @matrix;
424
425   my @names = zip_by { "$_[1], $_[0]" } \@firstnames, \@surnames;
426
427   print zip_by { "$_[0] => $_[1]\n" } [ keys %hash ], [ values %hash ];
428
429If some of the arrays are shorter than others, the function will behave as if
430they had C<undef> in the trailing positions. The following two lines are
431equivalent:
432
433   zip_by { f(@_) } [ 1, 2, 3 ], [ "a", "b" ]
434   f( 1, "a" ), f( 2, "b" ), f( 3, undef )
435
436The item function is called by C<map>, so if it returns a list, the entire
437list is included in the result. This can be useful for example, for generating
438a hash from two separate lists of keys and values
439
440   my %nums = zip_by { @_ } [qw( one two three )], [ 1, 2, 3 ];
441   # %nums = ( one => 1, two => 2, three => 3 )
442
443(A function having this behaviour is sometimes called C<zipWith>, e.g. in
444Haskell, but that name would not fit the naming scheme used by this module).
445
446=cut
447
448sub zip_by(&@)
449{
450   my $code = shift;
451
452   @_ or return;
453
454   my $len = 0;
455   scalar @$_ > $len and $len = scalar @$_ for @_;
456
457   return map {
458      my $idx = $_;
459      $code->( map { $_[$_][$idx] } 0 .. $#_ )
460   } 0 .. $len-1;
461}
462
463=head2 unzip_by
464
465   $arr0, $arr1, $arr2, ... = unzip_by { ITEMFUNC } @vals
466
467I<Since version 0.09.>
468
469Returns a list of ARRAY references containing the values returned by the
470function block, when invoked for each of the values given in the input list.
471Each of the returned ARRAY references will contain the values returned at that
472corresponding position by the function block. That is, the first returned
473ARRAY reference will contain all the values returned in the first position by
474the function block, the second will contain all the values from the second
475position, and so on.
476
477   my ( $firstnames, $lastnames ) = unzip_by { m/^(.*?) (.*)$/ } @names;
478
479If the function returns lists of differing lengths, the result will be padded
480with C<undef> in the missing elements.
481
482This function is an inverse of L</zip_by>, if given a corresponding inverse
483function.
484
485=cut
486
487sub unzip_by(&@)
488{
489   my $code = shift;
490
491   my @ret;
492   foreach my $idx ( 0 .. $#_ ) {
493      my @slice = $code->( local $_ = $_[$idx] );
494      $#slice = $#ret if @slice < @ret;
495      $ret[$_][$idx] = $slice[$_] for 0 .. $#slice;
496   }
497
498   return @ret;
499}
500
501=head2 extract_by
502
503   @vals = extract_by { SELECTFUNC } @arr
504
505I<Since version 0.05.>
506
507Removes elements from the referenced array on which the selection function
508returns true, and returns a list containing those elements. This function is
509similar to C<grep>, except that it modifies the referenced array to remove the
510selected values from it, leaving only the unselected ones.
511
512   my @red_balls = extract_by { $_->color eq "red" } @balls;
513
514   # Now there are no red balls in the @balls array
515
516This function modifies a real array, unlike most of the other functions in this
517module. Because of this, it requires a real array, not just a list.
518
519This function is implemented by invoking C<splice> on the array, not by
520constructing a new list and assigning it. One result of this is that weak
521references will not be disturbed.
522
523   extract_by { !defined $_ } @refs;
524
525will leave weak references weakened in the C<@refs> array, whereas
526
527   @refs = grep { defined $_ } @refs;
528
529will strengthen them all again.
530
531=cut
532
533sub extract_by(&\@)
534{
535   my $code = shift;
536   my ( $arrref ) = @_;
537
538   my @ret;
539   for( my $idx = 0; ; $idx++ ) {
540      last if $idx > $#$arrref;
541      next unless $code->( local $_ = $arrref->[$idx] );
542
543      push @ret, splice @$arrref, $idx, 1, ();
544      redo;
545   }
546
547   return @ret;
548}
549
550=head2 extract_first_by
551
552   $val = extract_first_by { SELECTFUNC } @arr
553
554I<Since version 0.10.>
555
556A hybrid between L</extract_by> and C<List::Util::first>. Removes the first
557element from the referenced array on which the selection function returns
558true, returning it.
559
560As with L</extract_by>, this function requires a real array and not just a
561list, and is also implemented using C<splice> so that weak references are
562not disturbed.
563
564If this function fails to find a matching element, it will return an empty
565list in list context. This allows a caller to distinguish the case between
566no matching element, and the first matching element being C<undef>.
567
568=cut
569
570sub extract_first_by(&\@)
571{
572   my $code = shift;
573   my ( $arrref ) = @_;
574
575   foreach my $idx ( 0 .. $#$arrref ) {
576      next unless $code->( local $_ = $arrref->[$idx] );
577
578      return splice @$arrref, $idx, 1, ();
579   }
580
581   return;
582}
583
584=head2 weighted_shuffle_by
585
586   @vals = weighted_shuffle_by { WEIGHTFUNC } @vals
587
588I<Since version 0.07.>
589
590Returns the list of values shuffled into a random order. The randomisation is
591not uniform, but weighted by the value returned by the C<WEIGHTFUNC>. The
592probabilty of each item being returned first will be distributed with the
593distribution of the weights, and so on recursively for the remaining items.
594
595=cut
596
597sub weighted_shuffle_by(&@)
598{
599   my $code = shift;
600   my @vals = @_;
601
602   my @weights = map { $code->( local $_ = $_ ) } @vals;
603
604   my @ret;
605   while( @vals > 1 ) {
606      my $total = 0; $total += $_ for @weights;
607      my $select = int rand $total;
608      my $idx = 0;
609      while( $select >= $weights[$idx] ) {
610         $select -= $weights[$idx++];
611      }
612
613      push @ret, splice @vals, $idx, 1, ();
614      splice @weights, $idx, 1, ();
615   }
616
617   push @ret, @vals if @vals;
618
619   return @ret;
620}
621
622=head2 bundle_by
623
624   @vals = bundle_by { BLOCKFUNC } $number, @vals
625
626I<Since version 0.07.>
627
628Similar to a regular C<map> functional, returns a list of the values returned
629by C<BLOCKFUNC>. Values from the input list are given to the block function in
630bundles of C<$number>.
631
632If given a list of values whose length does not evenly divide by C<$number>,
633the final call will be passed fewer elements than the others.
634
635=cut
636
637sub bundle_by(&@)
638{
639   my $code = shift;
640   my $n = shift;
641
642   my @ret;
643   for( my ( $pos, $next ) = ( 0, $n ); $pos < @_; $pos = $next, $next += $n ) {
644      $next = @_ if $next > @_;
645      push @ret, $code->( @_[$pos .. $next-1] );
646   }
647   return @ret;
648}
649
650=head1 TODO
651
652=over 4
653
654=item * XS implementations
655
656These functions are currently all written in pure perl. Some at least, may
657benefit from having XS implementations to speed up their logic.
658
659=item * Merge into L<List::Util> or L<List::MoreUtils>
660
661This module shouldn't really exist. The functions should instead be part of
662one of the existing modules that already contain many list utility functions.
663Having Yet Another List Utilty Module just worsens the problem.
664
665I have attempted to contact the authors of both of the above modules, to no
666avail; therefore I decided it best to write and release this code here anyway
667so that it is at least on CPAN. Once there, we can then see how best to merge
668it into an existing module.
669
670I<Updated 2015/07/16>: As I am now the maintainer of L<List::Util>, some
671amount of merging/copying should be possible. However, given the latter's key
672position in the core F<perl> distribution and head of the "CPAN River" I am
673keen not to do this wholesale, but a selected pick of what seems best, by a
674popular consensus.
675
676=item * C<head> and C<tail>-like functions
677
678Consider perhaps
679
680   head_before { COND } LIST  # excludes terminating element
681   head_upto   { COND } LIST  # includes terminating element
682
683   tail_since  { COND } LIST  # includes initiating element
684   tail_after  { COND } LIST  # excludes initiating element
685
686(See also L<https://rt.cpan.org/Ticket/Display.html?id=105907>).
687
688=back
689
690=head1 AUTHOR
691
692Paul Evans <leonerd@leonerd.org.uk>
693
694=cut
695
6960x55AA;
697