1package TAP::Parser::Grammar; 2 3use strict; 4use vars qw($VERSION @ISA); 5 6use TAP::Object (); 7use TAP::Parser::ResultFactory (); 8use TAP::Parser::YAMLish::Reader (); 9 10@ISA = qw(TAP::Object); 11 12=head1 NAME 13 14TAP::Parser::Grammar - A grammar for the Test Anything Protocol. 15 16=head1 VERSION 17 18Version 3.26 19 20=cut 21 22$VERSION = '3.26'; 23 24=head1 SYNOPSIS 25 26 use TAP::Parser::Grammar; 27 my $grammar = $self->make_grammar({ 28 iterator => $tap_parser_iterator, 29 parser => $tap_parser, 30 version => 12, 31 }); 32 33 my $result = $grammar->tokenize; 34 35=head1 DESCRIPTION 36 37C<TAP::Parser::Grammar> tokenizes lines from a L<TAP::Parser::Iterator> and 38constructs L<TAP::Parser::Result> subclasses to represent the tokens. 39 40Do not attempt to use this class directly. It won't make sense. It's mainly 41here to ensure that we will be able to have pluggable grammars when TAP is 42expanded at some future date (plus, this stuff was really cluttering the 43parser). 44 45=head1 METHODS 46 47=head2 Class Methods 48 49=head3 C<new> 50 51 my $grammar = TAP::Parser::Grammar->new({ 52 iterator => $iterator, 53 parser => $parser, 54 version => $version, 55 }); 56 57Returns L<TAP::Parser> grammar object that will parse the TAP stream from the 58specified iterator. Both C<iterator> and C<parser> are required arguments. 59If C<version> is not set it defaults to C<12> (see L</set_version> for more 60details). 61 62=cut 63 64# new() implementation supplied by TAP::Object 65sub _initialize { 66 my ( $self, $args ) = @_; 67 $self->{iterator} = $args->{iterator}; # TODO: accessor 68 $self->{iterator} ||= $args->{stream}; # deprecated 69 $self->{parser} = $args->{parser}; # TODO: accessor 70 $self->set_version( $args->{version} || 12 ); 71 return $self; 72} 73 74my %language_for; 75 76{ 77 78 # XXX the 'not' and 'ok' might be on separate lines in VMS ... 79 my $ok = qr/(?:not )?ok\b/; 80 my $num = qr/\d+/; 81 82 my %v12 = ( 83 version => { 84 syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i, 85 handler => sub { 86 my ( $self, $line ) = @_; 87 my $version = $1; 88 return $self->_make_version_token( $line, $version, ); 89 }, 90 }, 91 plan => { 92 syntax => qr/^1\.\.(\d+)\s*(.*)\z/, 93 handler => sub { 94 my ( $self, $line ) = @_; 95 my ( $tests_planned, $tail ) = ( $1, $2 ); 96 my $explanation = undef; 97 my $skip = ''; 98 99 if ( $tail =~ /^todo((?:\s+\d+)+)/ ) { 100 my @todo = split /\s+/, _trim($1); 101 return $self->_make_plan_token( 102 $line, $tests_planned, 'TODO', 103 '', \@todo 104 ); 105 } 106 elsif ( 0 == $tests_planned ) { 107 $skip = 'SKIP'; 108 109 # If we can't match # SKIP the directive should be undef. 110 ($explanation) = $tail =~ /^#\s*SKIP\S*\s+(.*)/i; 111 } 112 elsif ( $tail !~ /^\s*$/ ) { 113 return $self->_make_unknown_token($line); 114 } 115 116 $explanation = '' unless defined $explanation; 117 118 return $self->_make_plan_token( 119 $line, $tests_planned, $skip, 120 $explanation, [] 121 ); 122 123 }, 124 }, 125 126 # An optimization to handle the most common test lines without 127 # directives. 128 simple_test => { 129 syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x, 130 handler => sub { 131 my ( $self, $line ) = @_; 132 my ( $ok, $num, $desc ) = ( $1, $2, $3 ); 133 134 return $self->_make_test_token( 135 $line, $ok, $num, 136 $desc 137 ); 138 }, 139 }, 140 test => { 141 syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x, 142 handler => sub { 143 my ( $self, $line ) = @_; 144 my ( $ok, $num, $desc ) = ( $1, $2, $3 ); 145 my ( $dir, $explanation ) = ( '', '' ); 146 if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* ) 147 \# \s* (SKIP|TODO) \b \s* (.*) $/ix 148 ) 149 { 150 ( $desc, $dir, $explanation ) = ( $1, $2, $3 ); 151 } 152 return $self->_make_test_token( 153 $line, $ok, $num, $desc, 154 $dir, $explanation 155 ); 156 }, 157 }, 158 comment => { 159 syntax => qr/^#(.*)/, 160 handler => sub { 161 my ( $self, $line ) = @_; 162 my $comment = $1; 163 return $self->_make_comment_token( $line, $comment ); 164 }, 165 }, 166 bailout => { 167 syntax => qr/^\s*Bail out!\s*(.*)/, 168 handler => sub { 169 my ( $self, $line ) = @_; 170 my $explanation = $1; 171 return $self->_make_bailout_token( 172 $line, 173 $explanation 174 ); 175 }, 176 }, 177 ); 178 179 my %v13 = ( 180 %v12, 181 plan => { 182 syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i, 183 handler => sub { 184 my ( $self, $line ) = @_; 185 my ( $tests_planned, $explanation ) = ( $1, $2 ); 186 my $skip 187 = ( 0 == $tests_planned || defined $explanation ) 188 ? 'SKIP' 189 : ''; 190 $explanation = '' unless defined $explanation; 191 return $self->_make_plan_token( 192 $line, $tests_planned, $skip, 193 $explanation, [] 194 ); 195 }, 196 }, 197 yaml => { 198 syntax => qr/^ (\s+) (---.*) $/x, 199 handler => sub { 200 my ( $self, $line ) = @_; 201 my ( $pad, $marker ) = ( $1, $2 ); 202 return $self->_make_yaml_token( $pad, $marker ); 203 }, 204 }, 205 pragma => { 206 syntax => 207 qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x, 208 handler => sub { 209 my ( $self, $line ) = @_; 210 my $pragmas = $1; 211 return $self->_make_pragma_token( $line, $pragmas ); 212 }, 213 }, 214 ); 215 216 %language_for = ( 217 '12' => { 218 tokens => \%v12, 219 }, 220 '13' => { 221 tokens => \%v13, 222 setup => sub { 223 shift->{iterator}->handle_unicode; 224 }, 225 }, 226 ); 227} 228 229############################################################################## 230 231=head2 Instance Methods 232 233=head3 C<set_version> 234 235 $grammar->set_version(13); 236 237Tell the grammar which TAP syntax version to support. The lowest 238supported version is 12. Although 'TAP version' isn't valid version 12 239syntax it is accepted so that higher version numbers may be parsed. 240 241=cut 242 243sub set_version { 244 my $self = shift; 245 my $version = shift; 246 247 if ( my $language = $language_for{$version} ) { 248 $self->{version} = $version; 249 $self->{tokens} = $language->{tokens}; 250 251 if ( my $setup = $language->{setup} ) { 252 $self->$setup(); 253 } 254 255 $self->_order_tokens; 256 } 257 else { 258 require Carp; 259 Carp::croak("Unsupported syntax version: $version"); 260 } 261} 262 263# Optimization to put the most frequent tokens first. 264sub _order_tokens { 265 my $self = shift; 266 267 my %copy = %{ $self->{tokens} }; 268 my @ordered_tokens = grep {defined} 269 map { delete $copy{$_} } qw( simple_test test comment plan ); 270 push @ordered_tokens, values %copy; 271 272 $self->{ordered_tokens} = \@ordered_tokens; 273} 274 275############################################################################## 276 277=head3 C<tokenize> 278 279 my $token = $grammar->tokenize; 280 281This method will return a L<TAP::Parser::Result> object representing the 282current line of TAP. 283 284=cut 285 286sub tokenize { 287 my $self = shift; 288 289 my $line = $self->{iterator}->next; 290 unless ( defined $line ) { 291 delete $self->{parser}; # break circular ref 292 return; 293 } 294 295 my $token; 296 297 for my $token_data ( @{ $self->{ordered_tokens} } ) { 298 if ( $line =~ $token_data->{syntax} ) { 299 my $handler = $token_data->{handler}; 300 $token = $self->$handler($line); 301 last; 302 } 303 } 304 305 $token = $self->_make_unknown_token($line) unless $token; 306 307 return $self->{parser}->make_result($token); 308} 309 310############################################################################## 311 312=head3 C<token_types> 313 314 my @types = $grammar->token_types; 315 316Returns the different types of tokens which this grammar can parse. 317 318=cut 319 320sub token_types { 321 my $self = shift; 322 return keys %{ $self->{tokens} }; 323} 324 325############################################################################## 326 327=head3 C<syntax_for> 328 329 my $syntax = $grammar->syntax_for($token_type); 330 331Returns a pre-compiled regular expression which will match a chunk of TAP 332corresponding to the token type. For example (not that you should really pay 333attention to this, C<< $grammar->syntax_for('comment') >> will return 334C<< qr/^#(.*)/ >>. 335 336=cut 337 338sub syntax_for { 339 my ( $self, $type ) = @_; 340 return $self->{tokens}->{$type}->{syntax}; 341} 342 343############################################################################## 344 345=head3 C<handler_for> 346 347 my $handler = $grammar->handler_for($token_type); 348 349Returns a code reference which, when passed an appropriate line of TAP, 350returns the lexed token corresponding to that line. As a result, the basic 351TAP parsing loop looks similar to the following: 352 353 my @tokens; 354 my $grammar = TAP::Grammar->new; 355 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) { 356 for my $type ( $grammar->token_types ) { 357 my $syntax = $grammar->syntax_for($type); 358 if ( $line =~ $syntax ) { 359 my $handler = $grammar->handler_for($type); 360 push @tokens => $grammar->$handler($line); 361 next LINE; 362 } 363 } 364 push @tokens => $grammar->_make_unknown_token($line); 365 } 366 367=cut 368 369sub handler_for { 370 my ( $self, $type ) = @_; 371 return $self->{tokens}->{$type}->{handler}; 372} 373 374sub _make_version_token { 375 my ( $self, $line, $version ) = @_; 376 return { 377 type => 'version', 378 raw => $line, 379 version => $version, 380 }; 381} 382 383sub _make_plan_token { 384 my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_; 385 386 if ( $directive eq 'SKIP' 387 && 0 != $tests_planned 388 && $self->{version} < 13 ) 389 { 390 warn 391 "Specified SKIP directive in plan but more than 0 tests ($line)\n"; 392 } 393 394 return { 395 type => 'plan', 396 raw => $line, 397 tests_planned => $tests_planned, 398 directive => $directive, 399 explanation => _trim($explanation), 400 todo_list => $todo, 401 }; 402} 403 404sub _make_test_token { 405 my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_; 406 return { 407 ok => $ok, 408 test_num => $num, 409 description => _trim($desc), 410 directive => ( defined $dir ? uc $dir : '' ), 411 explanation => _trim($explanation), 412 raw => $line, 413 type => 'test', 414 }; 415} 416 417sub _make_unknown_token { 418 my ( $self, $line ) = @_; 419 return { 420 raw => $line, 421 type => 'unknown', 422 }; 423} 424 425sub _make_comment_token { 426 my ( $self, $line, $comment ) = @_; 427 return { 428 type => 'comment', 429 raw => $line, 430 comment => _trim($comment) 431 }; 432} 433 434sub _make_bailout_token { 435 my ( $self, $line, $explanation ) = @_; 436 return { 437 type => 'bailout', 438 raw => $line, 439 bailout => _trim($explanation) 440 }; 441} 442 443sub _make_yaml_token { 444 my ( $self, $pad, $marker ) = @_; 445 446 my $yaml = TAP::Parser::YAMLish::Reader->new; 447 448 my $iterator = $self->{iterator}; 449 450 # Construct a reader that reads from our input stripping leading 451 # spaces from each line. 452 my $leader = length($pad); 453 my $strip = qr{ ^ (\s{$leader}) (.*) $ }x; 454 my @extra = ($marker); 455 my $reader = sub { 456 return shift @extra if @extra; 457 my $line = $iterator->next; 458 return $2 if $line =~ $strip; 459 return; 460 }; 461 462 my $data = $yaml->read($reader); 463 464 # Reconstitute input. This is convoluted. Maybe we should just 465 # record it on the way in... 466 chomp( my $raw = $yaml->get_raw ); 467 $raw =~ s/^/$pad/mg; 468 469 return { 470 type => 'yaml', 471 raw => $raw, 472 data => $data 473 }; 474} 475 476sub _make_pragma_token { 477 my ( $self, $line, $pragmas ) = @_; 478 return { 479 type => 'pragma', 480 raw => $line, 481 pragmas => [ split /\s*,\s*/, _trim($pragmas) ], 482 }; 483} 484 485sub _trim { 486 my $data = shift; 487 488 return '' unless defined $data; 489 490 $data =~ s/^\s+//; 491 $data =~ s/\s+$//; 492 return $data; 493} 494 4951; 496 497=head1 TAP GRAMMAR 498 499B<NOTE:> This grammar is slightly out of date. There's still some discussion 500about it and a new one will be provided when we have things better defined. 501 502The L<TAP::Parser> does not use a formal grammar because TAP is essentially a 503stream-based protocol. In fact, it's quite legal to have an infinite stream. 504For the same reason that we don't apply regexes to streams, we're not using a 505formal grammar here. Instead, we parse the TAP in lines. 506 507For purposes for forward compatibility, any result which does not match the 508following grammar is currently referred to as 509L<TAP::Parser::Result::Unknown>. It is I<not> a parse error. 510 511A formal grammar would look similar to the following: 512 513 (* 514 For the time being, I'm cheating on the EBNF by allowing 515 certain terms to be defined by POSIX character classes by 516 using the following syntax: 517 518 digit ::= [:digit:] 519 520 As far as I am aware, that's not valid EBNF. Sue me. I 521 didn't know how to write "char" otherwise (Unicode issues). 522 Suggestions welcome. 523 *) 524 525 tap ::= version? { comment | unknown } leading_plan lines 526 | 527 lines trailing_plan {comment} 528 529 version ::= 'TAP version ' positiveInteger {positiveInteger} "\n" 530 531 leading_plan ::= plan skip_directive? "\n" 532 533 trailing_plan ::= plan "\n" 534 535 plan ::= '1..' nonNegativeInteger 536 537 lines ::= line {line} 538 539 line ::= (comment | test | unknown | bailout ) "\n" 540 541 test ::= status positiveInteger? description? directive? 542 543 status ::= 'not '? 'ok ' 544 545 description ::= (character - (digit | '#')) {character - '#'} 546 547 directive ::= todo_directive | skip_directive 548 549 todo_directive ::= hash_mark 'TODO' ' ' {character} 550 551 skip_directive ::= hash_mark 'SKIP' ' ' {character} 552 553 comment ::= hash_mark {character} 554 555 hash_mark ::= '#' {' '} 556 557 bailout ::= 'Bail out!' {character} 558 559 unknown ::= { (character - "\n") } 560 561 (* POSIX character classes and other terminals *) 562 563 digit ::= [:digit:] 564 character ::= ([:print:] - "\n") 565 positiveInteger ::= ( digit - '0' ) {digit} 566 nonNegativeInteger ::= digit {digit} 567 568=head1 SUBCLASSING 569 570Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview. 571 572If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to 573do is read through the code. There's no easy way of summarizing it here. 574 575=head1 SEE ALSO 576 577L<TAP::Object>, 578L<TAP::Parser>, 579L<TAP::Parser::Iterator>, 580L<TAP::Parser::Result>, 581 582=cut 583