1package XML::Liberal::Remedy::HTMLEntity; 2use strict; 3 4use HTML::Entities (); 5 6my %DECODE = map { 7 (my $name = $_) =~ s{\;\z}{}; 8 $name => sprintf '&#x%x;', ord $HTML::Entities::entity2char{$_} 9} keys %HTML::Entities::entity2char; 10 11# optimized to fix all errors in one apply() call 12sub apply { 13 my $class = shift; 14 my($driver, $error, $xml_ref) = @_; 15 16 return 0 if $error->message !~ /^parser error : Entity '.*' not defined/; 17 18 # Note that we can't tell whether "É" is meant to be "é" 19 # or "É", so we arbitrarily choose "é". Fortunately, the 20 # only HTML entities whose names aren't all-lower-case are the 21 # upper-case equivalents of all-lower-case ones, so this doesn't 22 # introduce any ambiguity that didn't exist in the source document. 23 return scalar $$xml_ref =~ s{&([a-zA-Z0-9]+);}{ 24 $DECODE{$1} || $DECODE{lc $1} 25 || Carp::carp("Can't find named HTML entity $1, error was: ", 26 $error->summary) 27 }ge; 28} 29 301; 31