1#!/usr/bin/perl -T
2
3use warnings;
4use strict;
5use Test::More;
6my $DEBUG = 2;
7
8BEGIN {
9
10    # Make sure we've got Unicode support:
11    eval "use v5.8.0;  utf8::is_utf8('x');";
12    if ($@) {
13        plan skip_all => "Perl 5.8.0 or newer required for Unicode tests";
14        exit;
15    }
16
17    plan tests => 11;
18    binmode STDOUT, ":utf8";
19}    # end BEGIN
20
21use Encode;
22use HTML::TreeBuilder;
23
24print "#Using Encode version v", $Encode::VERSION || "?", "\n";
25print "#Using HTML::TreeBuilder version v$HTML::TreeBuilder::VERSION\n";
26print "#Using HTML::Element version v$HTML::Element::VERSION\n";
27print "#Using HTML::Parser version v", $HTML::Parser::VERSION || "?", "\n";
28print "#Using HTML::Entities version v", $HTML::Entities::VERSION || "?",
29    "\n";
30print "#Using HTML::Tagset version v", $HTML::Tagset::VERSION || "?", "\n";
31print "# Running under perl version $] for $^O",
32    ( chr(65) eq 'A' ) ? "\n" : " in a non-ASCII world\n";
33print "# Win32::BuildNumber ", &Win32::BuildNumber(), "\n"
34    if defined(&Win32::BuildNumber)
35        and defined &Win32::BuildNumber();
36print "# MacPerl verison $MacPerl::Version\n"
37    if defined $MacPerl::Version;
38printf
39    "# Current time local: %s\n# Current time GMT:   %s\n",
40    scalar( localtime($^T) ), scalar( gmtime($^T) );
41
42ok 1;
43
44ok same( '<p>&nbsp;</p>', decode( 'latin1', "<p>\xA0</p>" ) );
45
46ok !same( '<p></p>',  decode( 'latin1', "<p>\xA0</p>" ), 1 );
47ok !same( '<p> </p>', decode( 'latin1', "<p>\xA0</p>" ), 1 );
48
49ok same( '<p>&nbsp;&nbsp;&nbsp;</p>',
50    decode( 'latin1', "<p>\xA0\xA0\xA0</p>" ) );
51ok same( "<p>\xA0\xA0\xA0</p>", decode( 'latin1', "<p>\xA0\xA0\xA0</p>" ) );
52
53ok !same( '<p></p>',  decode( 'latin1', "<p>\xA0\xA0\xA0</p>" ), 1 );
54ok !same( '<p> </p>', decode( 'latin1', "<p>\xA0\xA0\xA0</p>" ), 1 );
55
56ok same(
57    '<p>&nbsp;&nbsp;&mdash;&nbsp;&nbsp;</p>',
58    "<p>\xA0\xA0\x{2014}\xA0\xA0</p>"
59);
60
61ok same(
62    '<p>&nbsp;&nbsp;XXmdashXX&nbsp;&nbsp;</p>',
63    "<p>\xA0\xA0\x{2014}\xA0\xA0</p>",
64    0, sub { $_[0] =~ s/XXmdashXX/\x{2014}/ }
65);
66
67ok same( '<p>&nbsp;<b>bold</b>&nbsp;&nbsp;</p>',
68    decode( 'latin1', "<p>\xA0<b>bold</b>\xA0\xA0</p>" ) );
69
70sub same {
71    my ( $code1, $code2, $flip, $fixup ) = @_;
72    my $t1 = HTML::TreeBuilder->new;
73    my $t2 = HTML::TreeBuilder->new;
74
75    if ( ref $code1 ) { $t1->implicit_tags(0); $code1 = $$code1 }
76    if ( ref $code2 ) { $t2->implicit_tags(0); $code2 = $$code2 }
77
78    $t1->parse($code1);
79    $t1->eof;
80    $t2->parse($code2);
81    $t2->eof;
82
83    my $out1 = $t1->as_XML;
84    my $out2 = $t2->as_XML;
85
86    $fixup->( $out1, $out2 ) if $fixup;
87
88    my $rv = ( $out1 eq $out2 );
89
90    #print $rv? "RV TRUE\n" : "RV FALSE\n";
91    #print $flip? "FLIP TRUE\n" : "FLIP FALSE\n";
92
93    if ( $flip ? ( !$rv ) : $rv ) {
94        if ( $DEBUG > 2 ) {
95            print
96                "In1 $code1\n",
97                "In2 $code2\n",
98                "Out1 $out1\n",
99                "Out2 $out2\n",
100                "\n\n";
101        }
102    }
103    else {
104        local $_;
105        foreach my $line (
106            '',
107            "The following failure is at " . join( ' : ', caller ),
108            "Explanation of failure: "
109            . ( $flip ? 'same' : 'different' )
110            . " parse trees!",
111            sprintf( "Input code 1 (utf8=%d):", utf8::is_utf8($code1) ),
112            $code1,
113            sprintf( "Input code 2 (utf8=%d):", utf8::is_utf8($code2) ),
114            $code2,
115            "Output tree (as XML) 1:",
116            $out1,
117            "Output tree (as XML) 2:",
118            $out2,
119            )
120        {
121            $_ = $line;
122            s/\n/\n# /g;
123            print "# $_\n";
124        }
125    }
126
127    $t1->delete;
128    $t2->delete;
129
130    return $rv;
131}    # end same
132