1package TAP::Parser::Scheduler;
2
3use strict;
4use warnings;
5
6use Carp;
7use TAP::Parser::Scheduler::Job;
8use TAP::Parser::Scheduler::Spinner;
9
10=head1 NAME
11
12TAP::Parser::Scheduler - Schedule tests during parallel testing
13
14=head1 VERSION
15
16Version 3.30
17
18=cut
19
20our $VERSION = '3.30_01';
21
22=head1 SYNOPSIS
23
24    use TAP::Parser::Scheduler;
25
26=head1 DESCRIPTION
27
28=head1 METHODS
29
30=head2 Class Methods
31
32=head3 C<new>
33
34    my $sched = TAP::Parser::Scheduler->new(tests => \@tests);
35    my $sched = TAP::Parser::Scheduler->new(
36        tests => [ ['t/test_name.t','Test Description'], ... ],
37        rules => \%rules,
38    );
39
40Given 'tests' and optional 'rules' as input, returns a new
41C<TAP::Parser::Scheduler> object.  Each member of C<@tests> should be either a
42a test file name, or a two element arrayref, where the first element is a test
43file name, and the second element is a test description. By default, we'll use
44the test name as the description.
45
46The optional C<rules> attribute provides direction on which tests should be run
47in parallel and which should be run sequentially. If no rule data structure is
48provided, a default data structure is used which makes every test eligible to
49be run in parallel:
50
51    { par => '**' },
52
53The rules data structure is documented more in the next section.
54
55=head2 Rules data structure
56
57The "C<rules>" data structure is the the heart of the scheduler. It allows you
58to express simple rules like "run all tests in sequence" or "run all tests in
59parallel except these five tests.". However, the rules structure also supports
60glob-style pattern matching and recursive definitions, so you can also express
61arbitarily complicated patterns.
62
63The rule must only have one top level key: either 'par' for "parallel" or 'seq'
64for "sequence".
65
66Values must be either strings with possible glob-style matching, or arrayrefs
67of strings or hashrefs which follow this pattern recursively.
68
69Every element in an arrayref directly below a 'par' key is eligible to be run
70in parallel, while vavalues directly below a 'seq' key must be run in sequence.
71
72=head3 Rules examples
73
74Here are some examples:
75
76    # All tests be run in parallel (the default rule)
77    { par => '**' },
78
79    # Run all tests in sequence, except those starting with "p"
80    { par => 't/p*.t' },
81
82    # Run all tests in parallel, except those starting with "p"
83    {
84        seq => [
85                  { seq => 't/p*.t' },
86                  { par => '**'     },
87               ],
88    }
89
90    # Run some  startup tests in sequence, then some parallel tests than some
91    # teardown tests in sequence.
92    {
93        seq => [
94            { seq => 't/startup/*.t' },
95            { par => ['t/a/*.t','t/b/*.t','t/c/*.t'], }
96            { seq => 't/shutdown/*.t' },
97        ],
98    },
99
100
101=head3 Rules resolution
102
103=over4
104
105=item * By default, all tests are eligible to be run in parallel. Specifying any of your own rules removes this one.
106
107=item * "First match wins". The first rule that matches a test will be the one that applies.
108
109=item * Any test which does not match a rule will be run in sequence at the end of the run.
110
111=item * The existence of a rule does not imply selecting a test. You must still specify the tests to run.
112
113=item * Specifying a rule to allow tests to run in parallel does not make the run in parallel. You still need specify the number of parallel C<jobs> in your Harness object.
114
115=back
116
117=head3 Glob-style pattern matching for rules
118
119We implement our own glob-style pattern matching. Here are the patterns it supports:
120
121    ** is any number of characters, including /, within a pathname
122    * is zero or more characters within a filename/directory name
123    ? is exactly one character within a filename/directory name
124    {foo,bar,baz} is any of foo, bar or baz.
125    \ is an escape character
126
127=cut
128
129sub new {
130    my $class = shift;
131
132    croak "Need a number of key, value pairs" if @_ % 2;
133
134    my %args  = @_;
135    my $tests = delete $args{tests} || croak "Need a 'tests' argument";
136    my $rules = delete $args{rules} || { par => '**' };
137
138    croak "Unknown arg(s): ", join ', ', sort keys %args
139      if keys %args;
140
141    # Turn any simple names into a name, description pair. TODO: Maybe
142    # construct jobs here?
143    my $self = bless {}, $class;
144
145    $self->_set_rules( $rules, $tests );
146
147    return $self;
148}
149
150# Build the scheduler data structure.
151#
152# SCHEDULER-DATA ::= JOB
153#                ||  ARRAY OF ARRAY OF SCHEDULER-DATA
154#
155# The nested arrays are the key to scheduling. The outer array contains
156# a list of things that may be executed in parallel. Whenever an
157# eligible job is sought any element of the outer array that is ready to
158# execute can be selected. The inner arrays represent sequential
159# execution. They can only proceed when the first job is ready to run.
160
161sub _set_rules {
162    my ( $self, $rules, $tests ) = @_;
163
164    # Convert all incoming tests to job objects.
165    # If no test description is provided use the file name as the description.
166    my @tests = map { TAP::Parser::Scheduler::Job->new(@$_) }
167      map { 'ARRAY' eq ref $_ ? $_ : [ $_, $_ ] } @$tests;
168    my $schedule = $self->_rule_clause( $rules, \@tests );
169
170    # If any tests are left add them as a sequential block at the end of
171    # the run.
172    $schedule = [ [ $schedule, @tests ] ] if @tests;
173
174    $self->{schedule} = $schedule;
175}
176
177sub _rule_clause {
178    my ( $self, $rule, $tests ) = @_;
179    croak 'Rule clause must be a hash'
180      unless 'HASH' eq ref $rule;
181
182    my @type = keys %$rule;
183    croak 'Rule clause must have exactly one key'
184      unless @type == 1;
185
186    my %handlers = (
187        par => sub {
188            [ map { [$_] } @_ ];
189        },
190        seq => sub { [ [@_] ] },
191    );
192
193    my $handler = $handlers{ $type[0] }
194      || croak 'Unknown scheduler type: ', $type[0];
195    my $val = $rule->{ $type[0] };
196
197    return $handler->(
198        map {
199            'HASH' eq ref $_
200              ? $self->_rule_clause( $_, $tests )
201              : $self->_expand( $_, $tests )
202          } 'ARRAY' eq ref $val ? @$val : $val
203    );
204}
205
206sub _glob_to_regexp {
207    my ( $self, $glob ) = @_;
208    my $nesting;
209    my $pattern;
210
211    while (1) {
212        if ( $glob =~ /\G\*\*/gc ) {
213
214            # ** is any number of characters, including /, within a pathname
215            $pattern .= '.*?';
216        }
217        elsif ( $glob =~ /\G\*/gc ) {
218
219            # * is zero or more characters within a filename/directory name
220            $pattern .= '[^/]*';
221        }
222        elsif ( $glob =~ /\G\?/gc ) {
223
224            # ? is exactly one character within a filename/directory name
225            $pattern .= '[^/]';
226        }
227        elsif ( $glob =~ /\G\{/gc ) {
228
229            # {foo,bar,baz} is any of foo, bar or baz.
230            $pattern .= '(?:';
231            ++$nesting;
232        }
233        elsif ( $nesting and $glob =~ /\G,/gc ) {
234
235            # , is only special inside {}
236            $pattern .= '|';
237        }
238        elsif ( $nesting and $glob =~ /\G\}/gc ) {
239
240            # } that matches { is special. But unbalanced } are not.
241            $pattern .= ')';
242            --$nesting;
243        }
244        elsif ( $glob =~ /\G(\\.)/gc ) {
245
246            # A quoted literal
247            $pattern .= $1;
248        }
249        elsif ( $glob =~ /\G([\},])/gc ) {
250
251            # Sometimes meta characters
252            $pattern .= '\\' . $1;
253        }
254        else {
255
256            # Eat everything that is not a meta character.
257            $glob =~ /\G([^{?*\\\},]*)/gc;
258            $pattern .= quotemeta $1;
259        }
260        return $pattern if pos $glob == length $glob;
261    }
262}
263
264sub _expand {
265    my ( $self, $name, $tests ) = @_;
266
267    my $pattern = $self->_glob_to_regexp($name);
268    $pattern = qr/^ $pattern $/x;
269    my @match = ();
270
271    for ( my $ti = 0; $ti < @$tests; $ti++ ) {
272        if ( $tests->[$ti]->filename =~ $pattern ) {
273            push @match, splice @$tests, $ti, 1;
274            $ti--;
275        }
276    }
277
278    return @match;
279}
280
281=head2 Instance Methods
282
283=head3 C<get_all>
284
285Get a list of all remaining tests.
286
287=cut
288
289sub get_all {
290    my $self = shift;
291    my @all  = $self->_gather( $self->{schedule} );
292    $self->{count} = @all;
293    @all;
294}
295
296sub _gather {
297    my ( $self, $rule ) = @_;
298    return unless defined $rule;
299    return $rule unless 'ARRAY' eq ref $rule;
300    return map { defined() ? $self->_gather($_) : () } map {@$_} @$rule;
301}
302
303=head3 C<get_job>
304
305Return the next available job as L<TAP::Parser::Scheduler::Job> object or
306C<undef> if none are available. Returns a L<TAP::Parser::Scheduler::Spinner> if
307the scheduler still has pending jobs but none are available to run right now.
308
309=cut
310
311sub get_job {
312    my $self = shift;
313    $self->{count} ||= $self->get_all;
314    my @jobs = $self->_find_next_job( $self->{schedule} );
315    if (@jobs) {
316        --$self->{count};
317        return $jobs[0];
318    }
319
320    return TAP::Parser::Scheduler::Spinner->new
321      if $self->{count};
322
323    return;
324}
325
326sub _not_empty {
327    my $ar = shift;
328    return 1 unless 'ARRAY' eq ref $ar;
329    for (@$ar) {
330        return 1 if _not_empty($_);
331    }
332    return;
333}
334
335sub _is_empty { !_not_empty(@_) }
336
337sub _find_next_job {
338    my ( $self, $rule ) = @_;
339
340    my @queue = ();
341    my $index = 0;
342    while ( $index < @$rule ) {
343        my $seq = $rule->[$index];
344
345        # Prune any exhausted items.
346        shift @$seq while @$seq && _is_empty( $seq->[0] );
347        if (@$seq) {
348            if ( defined $seq->[0] ) {
349                if ( 'ARRAY' eq ref $seq->[0] ) {
350                    push @queue, $seq;
351                }
352                else {
353                    my $job = splice @$seq, 0, 1, undef;
354                    $job->on_finish( sub { shift @$seq } );
355                    return $job;
356                }
357            }
358            ++$index;
359        }
360        else {
361
362            # Remove the empty sub-array from the array
363            splice @$rule, $index, 1;
364        }
365    }
366
367    for my $seq (@queue) {
368        if ( my @jobs = $self->_find_next_job( $seq->[0] ) ) {
369            return @jobs;
370        }
371    }
372
373    return;
374}
375
376=head3 C<as_string>
377
378Return a human readable representation of the scheduling tree.
379For example:
380
381    my @tests = (qw{
382        t/startup/foo.t
383        t/shutdown/foo.t
384
385        t/a/foo.t t/b/foo.t t/c/foo.t t/d/foo.t
386    });
387    my $sched = TAP::Parser::Scheduler->new(
388        tests => \@tests,
389        rules => {
390            seq => [
391                { seq => 't/startup/*.t' },
392                { par => ['t/a/*.t','t/b/*.t','t/c/*.t'] },
393                { seq => 't/shutdown/*.t' },
394            ],
395        },
396    );
397
398Produces:
399
400    par:
401      seq:
402        par:
403          seq:
404            par:
405              seq:
406                't/startup/foo.t'
407            par:
408              seq:
409                't/a/foo.t'
410              seq:
411                't/b/foo.t'
412              seq:
413                't/c/foo.t'
414            par:
415              seq:
416                't/shutdown/foo.t'
417        't/d/foo.t'
418
419
420=cut
421
422
423sub as_string {
424    my $self = shift;
425    return $self->_as_string( $self->{schedule} );
426}
427
428sub _as_string {
429    my ( $self, $rule, $depth ) = ( shift, shift, shift || 0 );
430    my $pad    = ' ' x 2;
431    my $indent = $pad x $depth;
432    if ( !defined $rule ) {
433        return "$indent(undef)\n";
434    }
435    elsif ( 'ARRAY' eq ref $rule ) {
436        return unless @$rule;
437        my $type = ( 'par', 'seq' )[ $depth % 2 ];
438        return join(
439            '', "$indent$type:\n",
440            map { $self->_as_string( $_, $depth + 1 ) } @$rule
441        );
442    }
443    else {
444        return "$indent'" . $rule->filename . "'\n";
445    }
446}
447
4481;
449