1# Copyright (C) 2004-2012, Parrot Foundation. 2 3=head1 NAME 4 5Parrot::Docs::File - Docs-Related File 6 7=head1 SYNOPSIS 8 9 use Parrot::Docs::File; 10 my $file = Parrot::Docs::File->new('MANIFEST'); 11 12=head1 DESCRIPTION 13 14This C<Parrot::IO::File> subclass adds a few documentation-related 15methods to do with POD and file type. 16 17It's used by the documentation tools in F<tools/docs>. 18 19=head2 Class Methods 20 21=over 4 22 23=cut 24 25package Parrot::Docs::File; 26 27use strict; 28use warnings; 29 30use base qw( Parrot::IO::File ); 31 32use Pod::Simple::Checker; 33use Parrot::Docs::POD2HTML; 34 35# wrapped here to reset the error timestamp to speed up check_pod() 36sub write { 37 my $self = shift; 38 $self->{POD_ERRORS_TIME} = 0; 39 return $self->SUPER::write(@_); 40} 41 42my $UNDEFINED = 'Undefined'; 43 44# These are the Parrot file types excluding the ICU specific ones. 45 46my %type_for_suffix = ( 47 'BASIC' => 'README file', 48 'C' => 'C code', 49 'PL' => 'Perl script', 50 'SKIP' => 'MANIFEST skip file', 51 'TXT' => 'Text file', 52 'txt' => 'Text file', 53 'a' => 'Library file', 54 'bas' => 'BACIC code', 55 'bef' => 'Befunge code', 56 'bf' => 'BF code', 57 'bnf' => 'Grammar file', 58 'c' => 'C code', 59 'cs' => 'C# code', 60 'declarations' => 'Library declarations file', 61 'def' => 'Library definitions file', 62 'dev' => 'Development documentation', 63 'dump' => 'Dump file', 64 'el' => 'eMacs Lisp code', 65 'exp' => 'Perl 6 expected parse tree', 66 'flag' => 'Some kind of IMCC file', 67 'generated' => 'MANIFEST generated file', 68 'h' => 'C header', 69 'hacking' => 'README file', 70 'il' => 'MSIL assembler code', 71 'imc' => 'IMC code', 72 'in' => 'Configuration file', 73 'jako' => 'Jako code', 74 'jit' => 'JIT file', 75 'l' => 'Lex file', 76 'num' => 'Opcode numbering file', 77 'o' => 'Compiled file', 78 'ook' => 'Ook! code', 79 'ops' => 'Parrot opcode file', 80 'p6' => 'Perl 6 code', 81 'pasm' => 'Parrot assembly code', 82 'pbc' => 'Parrot bytecode', 83 'pl' => 'Perl script', 84 'pm' => 'Perl module', 85 'pmc' => 'PMC code', 86 'pod' => 'POD documentation', 87 'prd' => 'Parse::RecDescent grammar file', 88 'ps' => 'Postscript code', 89 'py' => 'Python code', 90 'rb' => 'Ruby code', 91 's' => 'Some kind of configuration file', 92 'scheme' => 'Scheme code', 93 'sh' => 'Shell script', 94 'spec' => 'RPM build specification', 95 't' => 'Test file', 96 'tbl' => 'Vtable file', 97 'txt' => 'Text file', 98 'urm' => 'URM code', 99 'vim' => 'Vim file', 100 'xml' => 'XML file', 101 'xs' => 'XS code', 102 'y' => 'Yacc file' 103); 104 105# These are the various types of files without suffix. 106 107my %type_for_name = ( 108 'Artistic' => 'Licence file', 109 'BUGS' => 'Project info', 110 'ChangeLog' => 'Project info', 111 'Changes' => 'Project info', 112 'CREDITS' => 'Project info', 113 'DEVELOPING' => 'Project info', 114 'harness' => 'Perl test harness', 115 'INSTALL' => 'Installation documentation', 116 'LICENSE' => 'Licence file', 117 'MAINTAINER' => 'Maintainer info', 118 'Makefile' => 'Makefile', 119 'MANIFEST' => 'Manifest file', 120 'PBC_COMPAT' => 'Bytecode compatibility file', 121 'PLATFORMS' => 'Project info', 122 'README' => 'README file', 123 'RELEASE_INSTRUCTIONS' => 'Project info', 124 'RESPONSIBLE_PARTIES' => 'Project info', 125 'TODO' => 'TODO file', 126 'VERSION' => 'Project info', 127); 128 129=item C<type_for_suffix($suffix)> 130 131This is a class method that converts a file suffix to a description of 132the type of files which have this suffix. 133 134=cut 135 136sub type_for_suffix { 137 my $self = shift; 138 my $suffix = shift; 139 140 return $type_for_suffix{$suffix} if exists $type_for_suffix{$suffix}; 141 142 return $UNDEFINED; 143} 144 145=item C<type_for_name($name)> 146 147This is a class method that converts a file name to a description of the 148type of files which have this name. 149 150=cut 151 152sub type_for_name { 153 my $self = shift; 154 my $name = shift; 155 156 return $type_for_name{$name} if exists $type_for_name{$name}; 157 158 return $UNDEFINED; 159} 160 161=back 162 163=head2 Instance Methods 164 165=over 4 166 167=item C<type()> 168 169This first tries to find a type for the file's suffix, failing that it 170looks at the file's name. If it fails for both it returns 'Undefined'. 171 172=cut 173 174sub type { 175 my $self = shift; 176 my $type = $self->type_for_suffix( $self->suffix ); 177 178 return $type unless $type eq $UNDEFINED; 179 180 $type = $self->type_for_name( $self->name ); 181 182 return $type unless $type eq $UNDEFINED; 183 184 return 'Executable' if $self->is_executable; 185 186 return $UNDEFINED; 187} 188 189=item C<is_of_type($type)> 190 191Returns whether the file is of the specified type. 192 193=cut 194 195sub is_of_type { 196 my $self = shift; 197 my $type = shift; 198 199 return 0 unless defined $type; 200 201 return $self->type eq $type; 202} 203 204=item C<check_pod()> 205 206Runs C<Pod::Simple::Checker> on the contents of the file. Executable 207files, and F<*.dump> files are assumed not to contain POD and therefore 208not checked. 209 210Note that the results are cached and the POD will only be rechecked 211if the file has been modified since it was checked. 212 213=cut 214 215sub check_pod { 216 my $self = shift; 217 218 return 219 if ( $self->is_executable and $self->name ne 'parrotbug' ) 220 or $self->suffix eq 'dump'; 221 222 if ( !exists $self->{POD_ERRORS_TIME} 223 or $self->modified_since( $self->{POD_ERRORS_TIME} ) ) 224 { 225 my $checker = Pod::Simple::Checker->new; 226 227 $self->{POD_ERRORS_TIME} = time; 228 $self->{POD_ERRORS} = ''; 229 $checker->output_string( \$self->{POD_ERRORS} ); 230 $checker->parse_file( $self->path ); 231 $self->{POD_ERRORS_COUNT} = $checker->errors_seen; 232 $self->{CONTAINS_POD} = $checker->content_seen; 233 } 234} 235 236=item C<contains_pod()> 237 238Tells you whether there is any POD formatted documentation in the file. 239Executable files are assumed not to contain POD. 240 241=cut 242 243sub contains_pod { 244 my $self = shift; 245 246 $self->check_pod; 247 248 return $self->{CONTAINS_POD}; 249} 250 251=item C<num_pod_errors()> 252 253Tells you the number of POD errors in the file. 254 255=cut 256 257sub num_pod_errors { 258 my $self = shift; 259 260 $self->check_pod; 261 262 return $self->{POD_ERRORS_COUNT} || 0; 263} 264 265=item C<pod_errors($options)> 266 267Gives you a description of any POD errors in the file. 268 269=cut 270 271sub pod_errors { 272 my $self = shift; 273 274 $self->check_pod; 275 276 return $self->{POD_ERRORS}; 277} 278 279=item C<is_docs_link()> 280 281Returns whether the file is suitable for inclusion in a documentation link. 282 283If a file contains plain text rather than POD it may be directly linked to. 284 285=cut 286 287sub is_docs_link { 288 my $self = shift; 289 290 # GH #626 - This needs more thought. I'm trying to work out which files 291 # it's sensible to link directly to. Suffixes other than txt are a 292 # problem (for me at least) because the browser thinks it should 293 # download the file. 294 295 if ( $self->has_suffix ) { 296 return 0 if $self->suffix !~ m/txt/i; 297 } 298 else { 299 return 1 if $self->name =~ m/^[[:upper:]]+$/; 300 } 301 302 return $self->type =~ /Licence|info|docu|Text|TODO|status|MANIFEST|README/; 303} 304 305=item C<title()> 306 307Returns the title of the file. 308 309=cut 310 311sub title { 312 my $self = shift; 313 314 return $self->name unless $self->contains_pod; 315 316 my $text = $self->read; 317 318 return '' 319 unless $text =~ /^=head1\s+([^\n\r]+)\s*[\n\r]+/smo; 320 321 return $1 322 if ($1 ne 'NAME' and $1 ne 'TITLE'); 323 324 return '' 325 unless $text =~ /^=head1\s+(?:NAME|TITLE)\s*[\n\r]+([^\n\r]+)/smo; 326 327 $text = $1; 328 329 # Tidy it up a bit. 330 $text =~ s/^\s+//; 331 $text =~ s/\s+$//; 332 $text =~ s/\s*-$//; 333 334 # There was not text, just another POD command (=head2 probably). 335 return '' if $text =~ /^=\w/; 336 337 return $text unless $text =~ /-/; 338 339 # There has to be some space each side of the dash. 340 my ( $path, $desc ) = split /\s+--?\s+/, $text, 2; 341 342 return $desc; 343} 344 345=item C<short_description()> 346 347Returns a short description of the file extracted from the C<NAME> section 348of the POD documentation, if it exists. If an C<ABSTRACT> is found then 349that is preferred. 350 351=cut 352 353sub short_description { 354 my $self = shift; 355 356 return '' unless $self->contains_pod; 357 358 my @lines = $self->read; 359 my $firstline = shift @lines; 360 return $self->title unless $firstline =~ /^=head1\s+ABSTRACT/; 361 362 my $all_text = join "\n" => @lines; 363 $all_text =~ s/^\s+//; 364 my @paragraphs = split /\n{2,}/, $all_text; 365 my $desc; 366 # For a short description, we take only the first paragraph of any 367 # ABSTRACT. 368 ($desc = $paragraphs[0]) =~ s/\n/ /g; 369 $desc =~ s/\s+/ /sg; 370 # We eliminate certain POD formatting characters. 371 $desc =~ s/[CFL]<([^>]+)>/$1/sg; 372 return $desc; 373} 374 375=back 376 377=head1 SEE ALSO 378 379=over 4 380 381=item C<Parrot::Docs::Directory> 382 383=item C<Parrot::Docs::POD2HTML> 384 385=item C<Pod::Simple::Checker> 386 387=back 388 389=cut 390 3911; 392 393# Local Variables: 394# mode: cperl 395# cperl-indent-level: 4 396# fill-column: 100 397# End: 398# vim: expandtab shiftwidth=4: 399