1package HTML::Lint; 2 3use warnings; 4use strict; 5 6use HTML::Lint::Error; 7use HTML::Lint::Parser (); 8 9use HTML::Entities (); 10 11=head1 NAME 12 13HTML::Lint - check for HTML errors in a string or file 14 15=head1 VERSION 16 17Version 2.32 18 19=cut 20 21our $VERSION = '2.32'; 22 23=head1 SYNOPSIS 24 25 my $lint = HTML::Lint->new; 26 $lint->only_types( HTML::Lint::Error::STRUCTURE ); 27 28 # Parse lines of data. 29 $lint->newfile( $filename ); 30 while ( my $line = <> ) { 31 $lint->parse( $line ); 32 } 33 $lint->eof(); 34 35 # Or, parse an entire file at once. 36 $lint->parse_file( $filename ); 37 38 # Fetch the errors that the linter found. 39 my $error_count = $lint->errors; 40 41 foreach my $error ( $lint->errors ) { 42 print $error->as_string, "\n"; 43 } 44 45HTML::Lint also comes with a wrapper program called F<weblint> that handles 46linting from the command line: 47 48 $ weblint http://www.cnn.com/ 49 http://www.cnn.com/ (395:83) <IMG SRC="spacer.gif"> tag has no HEIGHT and WIDTH attributes. 50 http://www.cnn.com/ (395:83) <IMG SRC="goofus.gif"> does not have ALT text defined 51 http://www.cnn.com/ (396:217) Unknown element <nobr> 52 http://www.cnn.com/ (396:241) </nobr> with no opening <nobr> 53 http://www.cnn.com/ (842:7) target attribute in <a> is repeated 54 55And finally, you can also get L<Apache::HTML::Lint> that passes any 56mod_perl-generated code through HTML::Lint and get it dumped into your 57Apache F<error_log>. 58 59 [Mon Jun 3 14:03:31 2002] [warn] /foo.pl (1:45) </p> with no opening <p> 60 [Mon Jun 3 14:03:31 2002] [warn] /foo.pl (1:49) Unknown element <gronk> 61 [Mon Jun 3 14:03:31 2002] [warn] /foo.pl (1:56) Unknown attribute "x" for tag <table> 62 63=cut 64 65=head1 METHODS 66 67NOTE: Some of these methods mirror L<HTML::Parser>'s methods, but HTML::Lint 68is not a subclass of HTML::Parser. 69 70=head2 new() 71 72Create an HTML::Lint object, which inherits from HTML::Parser. 73You may pass the types of errors you want to check for in the 74C<only_types> parm. 75 76 my $lint = HTML::Lint->new( only_types => HTML::Lint::Error::STRUCTURE ); 77 78If you want more than one, you must pass an arrayref: 79 80 my $lint = HTML::Lint->new( 81 only_types => [HTML::Lint::Error::STRUCTURE, HTML::Lint::Error::FLUFF] ); 82 83=cut 84 85sub new { 86 my $class = shift; 87 my %args = @_; 88 89 my $self = { 90 _errors => [], 91 _types => [], 92 }; 93 bless $self, $class; 94 95 if ( my $only = $args{only_types} ) { 96 $self->only_types( ref $only eq 'ARRAY' ? @{$only} : $only ); 97 delete $args{only_types}; 98 } 99 100 warn "Unknown argument $_\n" for keys %args; 101 102 return $self; 103} 104 105=head2 $lint->parser() 106 107Returns the parser object for this object, creating one if necessary. 108 109=cut 110 111sub parser { 112 my $self = shift; 113 114 if ( not $self->{_parser} ) { 115 $self->{_parser} = HTML::Lint::Parser->new( sub { $self->gripe( @_ ) } ); 116 $self->{_parser}->ignore_elements( qw(script style) ); 117 } 118 119 return $self->{_parser}; 120} 121 122=head2 $lint->parse( $text ) 123 124=head2 $lint->parse( $code_ref ) 125 126Passes in a chunk of HTML to be linted, either as a piece of text, 127or a code reference. 128See L<HTML::Parser>'s C<parse_file> method for details. 129 130=cut 131 132sub parse { 133 my $self = shift; 134 135 my $rc = $self->parser->parse( @_ ); 136 137 $self->{_parse_called} = 1; 138 139 return $rc; 140} 141 142=head2 $lint->parse_file( $file ) 143 144Analyzes HTML directly from a file. The C<$file> argument can be a filename, 145an open file handle, or a reference to an open file handle. 146See L<HTML::Parser>'s C<parse_file> method for details. 147 148=cut 149 150sub parse_file { 151 my $self = shift; 152 153 my $rc = $self->parser->parse_file( @_ ); 154 155 $self->{_parse_called} = 1; 156 $self->eof; 157 158 return $rc; 159} 160 161=head2 $lint->eof() 162 163Signals the end of a block of text getting passed in. This must be 164called to make sure that all parsing is complete before looking at errors. 165 166Any parameters (and there shouldn't be any) are passed through to 167HTML::Parser's eof() method. 168 169=cut 170 171sub eof { ## no critic ( Subroutines::ProhibitBuiltinHomonyms ) 172 my $self = shift; 173 174 my $rc; 175 my $parser = $self->parser; 176 if ( $parser ) { 177 $rc = $parser->eof(@_); 178 delete $self->{_parser}; 179 $self->{_eof_called} = 1; 180 } 181 182 return $rc; 183} 184 185=head2 $lint->errors() 186 187In list context, C<errors> returns all of the errors found in the 188parsed text. Each error is an object of the type L<HTML::Lint::Error>. 189 190In scalar context, it returns the number of errors found. 191 192=cut 193 194sub errors { 195 my $self = shift; 196 197 if ( !$self->{_parse_called} ) { 198 $self->gripe( 'api-parse-not-called' ); 199 } 200 elsif ( !$self->{_eof_called} ) { 201 $self->gripe( 'api-eof-not-called' ); 202 } 203 204 if ( wantarray ) { 205 return @{$self->{_errors}}; 206 } 207 else { 208 return scalar @{$self->{_errors}}; 209 } 210} 211 212=head2 $lint->clear_errors() 213 214Clears the list of errors, in case you want to print and clear, print and clear. 215 216=cut 217 218sub clear_errors { 219 my $self = shift; 220 221 $self->{_errors} = []; 222 223 return; 224} 225 226=head2 $lint->only_types( $type1[, $type2...] ) 227 228Specifies to only want errors of a certain type. 229 230 $lint->only_types( HTML::Lint::Error::STRUCTURE ); 231 232Calling this without parameters makes the object return all possible 233errors. 234 235The error types are C<STRUCTURE>, C<HELPER> and C<FLUFF>. 236See L<HTML::Lint::Error> for details on these types. 237 238=cut 239 240sub only_types { 241 my $self = shift; 242 243 $self->{_types} = [@_]; 244 245 return; 246} 247 248=head2 $lint->gripe( $errcode, [$key1=>$val1, ...] ) 249 250Adds an error message, in the form of an L<HTML::Lint::Error> object, 251to the list of error messages for the current object. The file, 252line and column are automatically passed to the L<HTML::Lint::Error> 253constructor, as well as whatever other key value pairs are passed. 254 255For example: 256 257 $lint->gripe( 'attr-repeated', tag => $tag, attr => $attr ); 258 259Usually, the user of the object won't call this directly, but just 260in case, here you go. 261 262=cut 263 264sub gripe { 265 my $self = shift; 266 267 my $error = HTML::Lint::Error->new( 268 $self->{_file}, $self->parser->{_line}, $self->parser->{_column}, @_ ); 269 270 my @keeps = @{$self->{_types}}; 271 if ( !@keeps || $error->is_type(@keeps) ) { 272 push( @{$self->{_errors}}, $error ); 273 } 274 275 return; 276} 277 278 279=head2 $lint->newfile( $filename ) 280 281Call C<newfile()> whenever you switch to another file in a batch 282of linting. Otherwise, the object thinks everything is from the 283same file. Note that the list of errors is NOT cleared. 284 285Note that I<$filename> does NOT need to match what's put into C<parse()> 286or C<parse_file()>. It can be a description, a URL, or whatever. 287 288You should call C<newfile()> even if you are only validating one file. If 289you do not call C<newfile()> then your errors will not have a filename 290attached to them. 291 292=cut 293 294sub newfile { 295 my $self = shift; 296 my $file = shift; 297 298 delete $self->{_parser}; 299 delete $self->{_parse_called}; 300 delete $self->{_eof_called}; 301 $self->{_file} = $file; 302 $self->{_line} = 0; 303 $self->{_column} = 0; 304 $self->{_first_seen} = {}; 305 306 return $self->{_file}; 307} # newfile 308 3091; 310 311=head1 MODIFYING HTML::LINT'S BEHAVIOR 312 313Sometimes you'll have HTML that for some reason cannot conform to 314HTML::Lint's expectations. For those instances, you can use HTML 315comments to modify HTML::Lint's behavior. 316 317Say you have an image where for whatever reason you can't get 318dimensions for the image. This HTML snippet: 319 320 <img src="logo.png" height="120" width="50" alt="Company logo"> 321 <img src="that.png"> 322 323causes this error: 324 325 foo.html (14:20) <img src="that.png"> tag has no HEIGHT and WIDTH attributes 326 327But if for some reason you can't get those dimensions when you build 328the page, you can at least stop HTML::Lint complaining about it. 329 330 <img src="this.png" height="120" width="50" alt="Company logo"> 331 <!-- html-lint elem-img-sizes-missing: off, elem-img-alt-missing: off --> 332 <img src="that.png"> 333 <!-- html-lint elem-img-sizes-missing: on, elem-img-alt-missing: off --> 334 335If you want to turn off all HTML::Lint warnings for a block of code, use 336 337 <!-- html-lint all: off --> 338 339And turn them back on with 340 341 <!-- html-lint all: on --> 342 343You don't have to use "on" and "off". For "on", you can use "true" 344or "1". For "off", you can use "0" or "false". 345 346For a list of possible errors and their codes, see L<HTML::Lint::Error>, 347or run F<perldoc HTML::Lint::Error>. 348 349=head1 BUGS, WISHES AND CORRESPONDENCE 350 351All bugs and requests are now being handled through GitHub. 352 353 https://github.com/petdance/html-lint/issues 354 355DO NOT send bug reports to http://rt.cpan.org/ or http://code.google.com/ 356 357=head1 TODO 358 359=over 4 360 361=item * Check for attributes that require values 362 363=item * <TABLE>s that have no rows. 364 365=item * Form fields that aren't in a FORM 366 367=item * DIVs with nothing in them. 368 369=item * HEIGHT= that have percents in them. 370 371=item * Check for goofy stuff like: 372 373 <b><li></b><b>Hello Reader - Spanish Level 1 (K-3)</b> 374 375=back 376 377=head1 COPYRIGHT & LICENSE 378 379Copyright 2005-2018 Andy Lester. 380 381This program is free software; you can redistribute it and/or modify it 382under the terms of the Artistic License v2.0. 383 384http://www.opensource.org/licenses/Artistic-2.0 385 386Please note that these modules are not products of or supported by the 387employers of the various contributors to the code. 388 389=head1 AUTHOR 390 391Andy Lester, andy at petdance.com 392 393=cut 394 3951; 396