1#!perl
2# This software is copyright (c) 2011 by Jeffrey Kegler
3# This is free software; you can redistribute it and/or modify it
4# under the same terms as the Perl 5 programming language system
5# itself.
6
7use 5.010;
8use strict;
9use warnings;
10use English qw( -no_match_vars );
11
12# These tests are based closely on those in the HTML-Tree module,
13# the authors of which I gratefully acknowledge.
14
15use Test::More tests => 43;
16my $DEBUG = 2;
17
18Test::More::use_ok('HTML::Entities');
19Test::More::use_ok('HTML::PullParser');
20Test::More::use_ok('Marpa::HTML');
21
22my $html_args = {
23    ':CRUFT' => sub {
24        my $literal = Marpa::HTML::literal();
25        say STDERR 'Cruft: ', $literal
26            or Carp::croak("Cannot print: $ERRNO");
27        return qq{<CRUFT literal="$literal">};
28    },
29    ':PCDATA' => sub {
30        my $literal = Marpa::HTML::literal();
31        if ( defined &HTML::Entities::decode_entities ) {
32            $literal =
33                HTML::Entities::encode_entities(
34                HTML::Entities::decode_entities($literal) );
35        }
36        return $literal;
37    },
38    ':PROLOG' => sub {
39        my $literal = Marpa::HTML::literal();
40        $literal =~ s/\A [\x{20}\t\f\x{200B}]+ //xms;
41        $literal =~ s/ [\x{20}\t\f\x{200B}]+ \z//xms;
42        return $literal;
43    },
44    ':COMMENT' => sub { return q{} },
45    q{*}       => sub {
46        my $tagname = Marpa::HTML::tagname();
47
48        # say STDERR "In handler for $tagname element";
49
50        Carp::croak('Not in an element') if not $tagname;
51        my $attributes = Marpa::HTML::attributes();
52
53        # Note this logic suffices to get through
54        # the test set but it does not handle
55        # the necessary escaping for a production
56        # version
57        my $start_tag = "<$tagname";
58        for my $attribute ( sort keys %{$attributes} ) {
59            $start_tag
60                .= qq{ $attribute="} . $attributes->{$attribute} . q{"};
61        }
62        $start_tag .= '>';
63        my $end_tag = "</$tagname>";
64
65        my $descendant_data =
66
67# Marpa::HTML::Display
68# name: dataspec example
69
70            Marpa::HTML::descendants('token_type,literal,element')
71
72# Marpa::HTML::Display::End
73
74            ;    # semi to end $descendant_data definition
75
76        # For UL element, eliminate all but the LI element children
77        if ( $tagname eq 'ul' ) {
78            $descendant_data =
79                [ grep { defined $_->[2] and $_->[2] eq 'li' }
80                    @{$descendant_data} ];
81        }
82
83        my $contents = join q{}, map { $_->[1] }
84            grep { not defined $_->[0] or not $_->[0] ~~ [qw(S E)] }
85            @{$descendant_data};
86        $contents =~ s/\A [\x{20}\t\f\x{200B}]+ //xms;
87        $contents =~ s/ [\x{20}\t\f\x{200B}]+ \z//xms;
88        return join q{}, $start_tag, $contents, $end_tag;
89    },
90};
91
92Test::More::ok 1;
93
94Test::More::ok same(
95    '<title>foo</title><p>I like pie',
96    '<html><head><title>foo</title></head><body><p>I like pie</p></body></html>'
97);
98
99Test::More::ok !same( 'x'   => 'y', 1 );
100Test::More::ok !same( '<p>' => 'y', 1 );
101
102Test::More::ok same( q{} => q{} );
103Test::More::ok same( q{} => q{ } );
104Test::More::ok same( q{} => q{  } );
105
106Test::More::ok same( q{} => '<!-- tra la la -->' );
107Test::More::ok same( q{} => '<!-- tra la la --><!-- foo -->' );
108
109Test::More::ok same( q{} => \'<head></head><body></body>' );
110
111Test::More::ok same( '<head>' => q{} );
112
113Test::More::ok same( '<head></head><body>' => \'<head></head><body></body>' );
114
115Test::More::ok same(
116    '<img alt="456" src="123">' => '<img src="123" alt="456">' );
117Test::More::ok same(
118    '<img alt="456" src="123">' => '<img src="123"    alt="456">' );
119Test::More::ok same(
120    '<img alt="456" src="123">' => '<img src="123"    alt="456"   >' );
121
122Test::More::ok !same(
123    '<img alt="456" >' => '<img src="123"    alt="456"   >',
124    1
125);
126
127SKIP: {
128    ## no critic (ValuesAndExpressions::ProhibitMagicNumbers)
129    defined &HTML::Entities::decode_entities
130        or Test::More::skip 'HTML::Entities not installed', 6;
131    ## use critic
132
133    Test::More::ok same( 'abc&#32;xyz'  => 'abc xyz' );
134    Test::More::ok same( 'abc&#x20;xyz' => 'abc xyz' );
135
136    Test::More::ok same( 'abc&#43;xyz'  => 'abc+xyz' );
137    Test::More::ok same( 'abc&#x2b;xyz' => 'abc+xyz' );
138
139    Test::More::ok same( '&#97;bc+xyz'  => 'abc+xyz' );
140    Test::More::ok same( '&#x61;bc+xyz' => 'abc+xyz' );
141
142} ## end SKIP:
143
144# Now some list tests.
145
146Test::More::ok same( '<ul><li>x</ul>after' => '<ul><li>x</li></ul>after' );
147Test::More::ok same(
148    '<ul><li>x<li>y</ul>after' => '<ul><li>x</li><li>y</li></ul>after' );
149
150Test::More::ok same( '<ul> <li>x</li> <li>y</li> </ul>after' =>
151        '<ul><li>x</li><li>y</li></ul>after' );
152
153Test::More::ok same( '<ul><li>x<li>y</ul>after' => \
154        '<head></head><body><ul><li>x</li><li>y</li></ul>after</body>' );
155
156# Now some table tests.
157
158Test::More::ok same( '<table>x<td>y<td>z' =>
159        '<table><tr><td>x</td><td>y</td><td>z</td></table>' );
160
161Test::More::ok same( '<table>x<td>y<tr>z' =>
162        '<table><tr><td>x</td><td>y</td></tr><tr><td>z</td></tr></table>' );
163
164Test::More::ok same(
165    '<table><tr><td>x</td><td>y</td></tr><tr><td>z</td></tr></table>' =>
166        '<table><tr><td>x</td><td>y</td></tr><tr><td>z</td></tr></table>' );
167Test::More::ok same(
168    '<table><tr><td>x</td><td>y</td></tr><tr><td>z</td></tr></table>' => \
169        '<head></head><body><table><tr><td>x</td><td>y</td></tr><tr><td>z</td></tr></table>'
170);
171
172Test::More::ok same( '<table>x' => '<td>x' );
173Test::More::ok same( '<table>x' => '<table><td>x' );
174Test::More::ok same( '<table>x' => '<tr>x' );
175Test::More::ok same( '<table>x' => '<tr><td>x' );
176Test::More::ok same( '<table>x' => '<table><tr>x' );
177Test::More::ok same( '<table>x' => '<table><tr><td>x' );
178
179# Now some p tests.
180
181Test::More::ok same( '<p>x<p>y<p>z' => '<p>x</p><p>y</p><p>z' );
182Test::More::ok same( '<p>x<p>y<p>z' => '<p>x</p><p>y<p>z</p>' );
183Test::More::ok same( '<p>x<p>y<p>z' => '<p>x</p><p>y</p><p>z</p>' );
184Test::More::ok same(
185    '<p>x<p>y<p>z' => \'<head></head><body><p>x</p><p>y</p><p>z</p>' );
186
187sub same {
188    my ( $code1, $code2, $flip ) = @_;
189
190    if ( ref $code1 ) { $code1 = ${$code1} }
191    if ( ref $code2 ) { $code2 = ${$code2} }
192
193    my $value1;
194    if ( not eval { $value1 = Marpa::HTML::html( \$code1, $html_args ); 1 } )
195    {
196        say "No parse for $code1"
197            or Carp::croak("Cannot print: $ERRNO");
198        return $flip;
199    } ## end if ( not eval { $value1 = Marpa::HTML::html( \$code1...)})
200
201    my $value2;
202    if ( not eval { $value2 = Marpa::HTML::html( \$code2, $html_args ); 1 } )
203    {
204        say "No parse for $code2"
205            or Carp::croak("Cannot print: $ERRNO");
206        return $flip;
207    } ## end if ( not eval { $value2 = Marpa::HTML::html( \$code2...)})
208
209    my $out1 = ${$value1};
210    my $out2 = ${$value2};
211
212    my $rv = ( $out1 eq $out2 );
213
214    if ( $flip ? ( !$rv ) : $rv ) {
215        if ( $DEBUG > 2 ) {
216            print
217                "In1 $code1\n",
218                "In2 $code2\n", "Out1 $out1\n", "Out2 $out2\n", "\n\n"
219                or Carp::croak("Cannot print: $ERRNO");
220        } ## end if ( $DEBUG > 2 )
221    } ## end if ( $flip ? ( !$rv ) : $rv )
222    else {
223        print '# The following failure is at ' . join( ' : ', caller ), "\n",
224              '# Explanation of failure: '
225            . ( $flip ? 'same' : 'different' )
226            . ' parse trees!', "\n",
227            '# Input code 1:',           $code1, "\n",
228            '# Input code 2:',           $code2, "\n",
229            '# Output tree (as XML) 1:', $out1,  "\n",
230            '# Output tree (as XML) 2:', $out2,  "\n",
231            or Carp::croak("Cannot print: $ERRNO");
232    } ## end else [ if ( $flip ? ( !$rv ) : $rv ) ]
233
234    return $rv;
235} ## end sub same
236
237