1#!perl 2# This software is copyright (c) 2011 by Jeffrey Kegler 3# This is free software; you can redistribute it and/or modify it 4# under the same terms as the Perl 5 programming language system 5# itself. 6 7use 5.010; 8use strict; 9use warnings; 10use English qw( -no_match_vars ); 11 12# These tests are based closely on those in the HTML-Tree module, 13# the authors of which I gratefully acknowledge. 14 15use Test::More tests => 43; 16my $DEBUG = 2; 17 18Test::More::use_ok('HTML::Entities'); 19Test::More::use_ok('HTML::PullParser'); 20Test::More::use_ok('Marpa::HTML'); 21 22my $html_args = { 23 ':CRUFT' => sub { 24 my $literal = Marpa::HTML::literal(); 25 say STDERR 'Cruft: ', $literal 26 or Carp::croak("Cannot print: $ERRNO"); 27 return qq{<CRUFT literal="$literal">}; 28 }, 29 ':PCDATA' => sub { 30 my $literal = Marpa::HTML::literal(); 31 if ( defined &HTML::Entities::decode_entities ) { 32 $literal = 33 HTML::Entities::encode_entities( 34 HTML::Entities::decode_entities($literal) ); 35 } 36 return $literal; 37 }, 38 ':PROLOG' => sub { 39 my $literal = Marpa::HTML::literal(); 40 $literal =~ s/\A [\x{20}\t\f\x{200B}]+ //xms; 41 $literal =~ s/ [\x{20}\t\f\x{200B}]+ \z//xms; 42 return $literal; 43 }, 44 ':COMMENT' => sub { return q{} }, 45 q{*} => sub { 46 my $tagname = Marpa::HTML::tagname(); 47 48 # say STDERR "In handler for $tagname element"; 49 50 Carp::croak('Not in an element') if not $tagname; 51 my $attributes = Marpa::HTML::attributes(); 52 53 # Note this logic suffices to get through 54 # the test set but it does not handle 55 # the necessary escaping for a production 56 # version 57 my $start_tag = "<$tagname"; 58 for my $attribute ( sort keys %{$attributes} ) { 59 $start_tag 60 .= qq{ $attribute="} . $attributes->{$attribute} . q{"}; 61 } 62 $start_tag .= '>'; 63 my $end_tag = "</$tagname>"; 64 65 my $descendant_data = 66 67# Marpa::HTML::Display 68# name: dataspec example 69 70 Marpa::HTML::descendants('token_type,literal,element') 71 72# Marpa::HTML::Display::End 73 74 ; # semi to end $descendant_data definition 75 76 # For UL element, eliminate all but the LI element children 77 if ( $tagname eq 'ul' ) { 78 $descendant_data = 79 [ grep { defined $_->[2] and $_->[2] eq 'li' } 80 @{$descendant_data} ]; 81 } 82 83 my $contents = join q{}, map { $_->[1] } 84 grep { not defined $_->[0] or not $_->[0] ~~ [qw(S E)] } 85 @{$descendant_data}; 86 $contents =~ s/\A [\x{20}\t\f\x{200B}]+ //xms; 87 $contents =~ s/ [\x{20}\t\f\x{200B}]+ \z//xms; 88 return join q{}, $start_tag, $contents, $end_tag; 89 }, 90}; 91 92Test::More::ok 1; 93 94Test::More::ok same( 95 '<title>foo</title><p>I like pie', 96 '<html><head><title>foo</title></head><body><p>I like pie</p></body></html>' 97); 98 99Test::More::ok !same( 'x' => 'y', 1 ); 100Test::More::ok !same( '<p>' => 'y', 1 ); 101 102Test::More::ok same( q{} => q{} ); 103Test::More::ok same( q{} => q{ } ); 104Test::More::ok same( q{} => q{ } ); 105 106Test::More::ok same( q{} => '<!-- tra la la -->' ); 107Test::More::ok same( q{} => '<!-- tra la la --><!-- foo -->' ); 108 109Test::More::ok same( q{} => \'<head></head><body></body>' ); 110 111Test::More::ok same( '<head>' => q{} ); 112 113Test::More::ok same( '<head></head><body>' => \'<head></head><body></body>' ); 114 115Test::More::ok same( 116 '<img alt="456" src="123">' => '<img src="123" alt="456">' ); 117Test::More::ok same( 118 '<img alt="456" src="123">' => '<img src="123" alt="456">' ); 119Test::More::ok same( 120 '<img alt="456" src="123">' => '<img src="123" alt="456" >' ); 121 122Test::More::ok !same( 123 '<img alt="456" >' => '<img src="123" alt="456" >', 124 1 125); 126 127SKIP: { 128 ## no critic (ValuesAndExpressions::ProhibitMagicNumbers) 129 defined &HTML::Entities::decode_entities 130 or Test::More::skip 'HTML::Entities not installed', 6; 131 ## use critic 132 133 Test::More::ok same( 'abc xyz' => 'abc xyz' ); 134 Test::More::ok same( 'abc xyz' => 'abc xyz' ); 135 136 Test::More::ok same( 'abc+xyz' => 'abc+xyz' ); 137 Test::More::ok same( 'abc+xyz' => 'abc+xyz' ); 138 139 Test::More::ok same( 'abc+xyz' => 'abc+xyz' ); 140 Test::More::ok same( 'abc+xyz' => 'abc+xyz' ); 141 142} ## end SKIP: 143 144# Now some list tests. 145 146Test::More::ok same( '<ul><li>x</ul>after' => '<ul><li>x</li></ul>after' ); 147Test::More::ok same( 148 '<ul><li>x<li>y</ul>after' => '<ul><li>x</li><li>y</li></ul>after' ); 149 150Test::More::ok same( '<ul> <li>x</li> <li>y</li> </ul>after' => 151 '<ul><li>x</li><li>y</li></ul>after' ); 152 153Test::More::ok same( '<ul><li>x<li>y</ul>after' => \ 154 '<head></head><body><ul><li>x</li><li>y</li></ul>after</body>' ); 155 156# Now some table tests. 157 158Test::More::ok same( '<table>x<td>y<td>z' => 159 '<table><tr><td>x</td><td>y</td><td>z</td></table>' ); 160 161Test::More::ok same( '<table>x<td>y<tr>z' => 162 '<table><tr><td>x</td><td>y</td></tr><tr><td>z</td></tr></table>' ); 163 164Test::More::ok same( 165 '<table><tr><td>x</td><td>y</td></tr><tr><td>z</td></tr></table>' => 166 '<table><tr><td>x</td><td>y</td></tr><tr><td>z</td></tr></table>' ); 167Test::More::ok same( 168 '<table><tr><td>x</td><td>y</td></tr><tr><td>z</td></tr></table>' => \ 169 '<head></head><body><table><tr><td>x</td><td>y</td></tr><tr><td>z</td></tr></table>' 170); 171 172Test::More::ok same( '<table>x' => '<td>x' ); 173Test::More::ok same( '<table>x' => '<table><td>x' ); 174Test::More::ok same( '<table>x' => '<tr>x' ); 175Test::More::ok same( '<table>x' => '<tr><td>x' ); 176Test::More::ok same( '<table>x' => '<table><tr>x' ); 177Test::More::ok same( '<table>x' => '<table><tr><td>x' ); 178 179# Now some p tests. 180 181Test::More::ok same( '<p>x<p>y<p>z' => '<p>x</p><p>y</p><p>z' ); 182Test::More::ok same( '<p>x<p>y<p>z' => '<p>x</p><p>y<p>z</p>' ); 183Test::More::ok same( '<p>x<p>y<p>z' => '<p>x</p><p>y</p><p>z</p>' ); 184Test::More::ok same( 185 '<p>x<p>y<p>z' => \'<head></head><body><p>x</p><p>y</p><p>z</p>' ); 186 187sub same { 188 my ( $code1, $code2, $flip ) = @_; 189 190 if ( ref $code1 ) { $code1 = ${$code1} } 191 if ( ref $code2 ) { $code2 = ${$code2} } 192 193 my $value1; 194 if ( not eval { $value1 = Marpa::HTML::html( \$code1, $html_args ); 1 } ) 195 { 196 say "No parse for $code1" 197 or Carp::croak("Cannot print: $ERRNO"); 198 return $flip; 199 } ## end if ( not eval { $value1 = Marpa::HTML::html( \$code1...)}) 200 201 my $value2; 202 if ( not eval { $value2 = Marpa::HTML::html( \$code2, $html_args ); 1 } ) 203 { 204 say "No parse for $code2" 205 or Carp::croak("Cannot print: $ERRNO"); 206 return $flip; 207 } ## end if ( not eval { $value2 = Marpa::HTML::html( \$code2...)}) 208 209 my $out1 = ${$value1}; 210 my $out2 = ${$value2}; 211 212 my $rv = ( $out1 eq $out2 ); 213 214 if ( $flip ? ( !$rv ) : $rv ) { 215 if ( $DEBUG > 2 ) { 216 print 217 "In1 $code1\n", 218 "In2 $code2\n", "Out1 $out1\n", "Out2 $out2\n", "\n\n" 219 or Carp::croak("Cannot print: $ERRNO"); 220 } ## end if ( $DEBUG > 2 ) 221 } ## end if ( $flip ? ( !$rv ) : $rv ) 222 else { 223 print '# The following failure is at ' . join( ' : ', caller ), "\n", 224 '# Explanation of failure: ' 225 . ( $flip ? 'same' : 'different' ) 226 . ' parse trees!', "\n", 227 '# Input code 1:', $code1, "\n", 228 '# Input code 2:', $code2, "\n", 229 '# Output tree (as XML) 1:', $out1, "\n", 230 '# Output tree (as XML) 2:', $out2, "\n", 231 or Carp::croak("Cannot print: $ERRNO"); 232 } ## end else [ if ( $flip ? ( !$rv ) : $rv ) ] 233 234 return $rv; 235} ## end sub same 236 237