1# Copyright (C) 2004-2012, Parrot Foundation.
2
3=head1 NAME
4
5Parrot::Docs::File - Docs-Related File
6
7=head1 SYNOPSIS
8
9        use Parrot::Docs::File;
10        my $file = Parrot::Docs::File->new('MANIFEST');
11
12=head1 DESCRIPTION
13
14This C<Parrot::IO::File> subclass adds a few documentation-related
15methods to do with POD and file type.
16
17It's used by the documentation tools in F<tools/docs>.
18
19=head2 Class Methods
20
21=over 4
22
23=cut
24
25package Parrot::Docs::File;
26
27use strict;
28use warnings;
29
30use base qw( Parrot::IO::File );
31
32use Pod::Simple::Checker;
33use Parrot::Docs::POD2HTML;
34
35# wrapped here to reset the error timestamp to speed up check_pod()
36sub write {
37    my $self                 = shift;
38    $self->{POD_ERRORS_TIME} = 0;
39    return $self->SUPER::write(@_);
40}
41
42my $UNDEFINED = 'Undefined';
43
44# These are the Parrot file types excluding the ICU specific ones.
45
46my %type_for_suffix = (
47    'BASIC'        => 'README file',
48    'C'            => 'C code',
49    'PL'           => 'Perl script',
50    'SKIP'         => 'MANIFEST skip file',
51    'TXT'          => 'Text file',
52    'txt'          => 'Text file',
53    'a'            => 'Library file',
54    'bas'          => 'BACIC code',
55    'bef'          => 'Befunge code',
56    'bf'           => 'BF code',
57    'bnf'          => 'Grammar file',
58    'c'            => 'C code',
59    'cs'           => 'C# code',
60    'declarations' => 'Library declarations file',
61    'def'          => 'Library definitions file',
62    'dev'          => 'Development documentation',
63    'dump'         => 'Dump file',
64    'el'           => 'eMacs Lisp code',
65    'exp'          => 'Perl 6 expected parse tree',
66    'flag'         => 'Some kind of IMCC file',
67    'generated'    => 'MANIFEST generated file',
68    'h'            => 'C header',
69    'hacking'      => 'README file',
70    'il'           => 'MSIL assembler code',
71    'imc'          => 'IMC code',
72    'in'           => 'Configuration file',
73    'jako'         => 'Jako code',
74    'jit'          => 'JIT file',
75    'l'            => 'Lex file',
76    'num'          => 'Opcode numbering file',
77    'o'            => 'Compiled file',
78    'ook'          => 'Ook! code',
79    'ops'          => 'Parrot opcode file',
80    'p6'           => 'Perl 6 code',
81    'pasm'         => 'Parrot assembly code',
82    'pbc'          => 'Parrot bytecode',
83    'pl'           => 'Perl script',
84    'pm'           => 'Perl module',
85    'pmc'          => 'PMC code',
86    'pod'          => 'POD documentation',
87    'prd'          => 'Parse::RecDescent grammar file',
88    'ps'           => 'Postscript code',
89    'py'           => 'Python code',
90    'rb'           => 'Ruby code',
91    's'            => 'Some kind of configuration file',
92    'scheme'       => 'Scheme code',
93    'sh'           => 'Shell script',
94    'spec'         => 'RPM build specification',
95    't'            => 'Test file',
96    'tbl'          => 'Vtable file',
97    'txt'          => 'Text file',
98    'urm'          => 'URM code',
99    'vim'          => 'Vim file',
100    'xml'          => 'XML file',
101    'xs'           => 'XS code',
102    'y'            => 'Yacc file'
103);
104
105# These are the various types of files without suffix.
106
107my %type_for_name = (
108    'Artistic'             => 'Licence file',
109    'BUGS'                 => 'Project info',
110    'ChangeLog'            => 'Project info',
111    'Changes'              => 'Project info',
112    'CREDITS'              => 'Project info',
113    'DEVELOPING'           => 'Project info',
114    'harness'              => 'Perl test harness',
115    'INSTALL'              => 'Installation documentation',
116    'LICENSE'              => 'Licence file',
117    'MAINTAINER'           => 'Maintainer info',
118    'Makefile'             => 'Makefile',
119    'MANIFEST'             => 'Manifest file',
120    'PBC_COMPAT'           => 'Bytecode compatibility file',
121    'PLATFORMS'            => 'Project info',
122    'README'               => 'README file',
123    'RELEASE_INSTRUCTIONS' => 'Project info',
124    'RESPONSIBLE_PARTIES'  => 'Project info',
125    'TODO'                 => 'TODO file',
126    'VERSION'              => 'Project info',
127);
128
129=item C<type_for_suffix($suffix)>
130
131This is a class method that converts a file suffix to a description of
132the type of files which have this suffix.
133
134=cut
135
136sub type_for_suffix {
137    my $self   = shift;
138    my $suffix = shift;
139
140    return $type_for_suffix{$suffix} if exists $type_for_suffix{$suffix};
141
142    return $UNDEFINED;
143}
144
145=item C<type_for_name($name)>
146
147This is a class method that converts a file name to a description of the
148type of files which have this name.
149
150=cut
151
152sub type_for_name {
153    my $self = shift;
154    my $name = shift;
155
156    return $type_for_name{$name} if exists $type_for_name{$name};
157
158    return $UNDEFINED;
159}
160
161=back
162
163=head2 Instance Methods
164
165=over 4
166
167=item C<type()>
168
169This first tries to find a type for the file's suffix, failing that it
170looks at the file's name. If it fails for both it returns 'Undefined'.
171
172=cut
173
174sub type {
175    my $self = shift;
176    my $type = $self->type_for_suffix( $self->suffix );
177
178    return $type unless $type eq $UNDEFINED;
179
180    $type = $self->type_for_name( $self->name );
181
182    return $type unless $type eq $UNDEFINED;
183
184    return 'Executable' if $self->is_executable;
185
186    return $UNDEFINED;
187}
188
189=item C<is_of_type($type)>
190
191Returns whether the file is of the specified type.
192
193=cut
194
195sub is_of_type {
196    my $self = shift;
197    my $type = shift;
198
199    return 0 unless defined $type;
200
201    return $self->type eq $type;
202}
203
204=item C<check_pod()>
205
206Runs C<Pod::Simple::Checker> on the contents of the file. Executable
207files, and F<*.dump> files are assumed not to contain POD and therefore
208not checked.
209
210Note that the results are cached and the POD will only be rechecked
211if the file has been modified since it was checked.
212
213=cut
214
215sub check_pod {
216    my $self = shift;
217
218    return
219        if ( $self->is_executable and $self->name ne 'parrotbug' )
220        or $self->suffix eq 'dump';
221
222    if ( !exists $self->{POD_ERRORS_TIME}
223        or $self->modified_since( $self->{POD_ERRORS_TIME} ) )
224    {
225        my $checker = Pod::Simple::Checker->new;
226
227        $self->{POD_ERRORS_TIME} = time;
228        $self->{POD_ERRORS}      = '';
229        $checker->output_string( \$self->{POD_ERRORS} );
230        $checker->parse_file( $self->path );
231        $self->{POD_ERRORS_COUNT} = $checker->errors_seen;
232        $self->{CONTAINS_POD}     = $checker->content_seen;
233    }
234}
235
236=item C<contains_pod()>
237
238Tells you whether there is any POD formatted documentation in the file.
239Executable files are assumed not to contain POD.
240
241=cut
242
243sub contains_pod {
244    my $self = shift;
245
246    $self->check_pod;
247
248    return $self->{CONTAINS_POD};
249}
250
251=item C<num_pod_errors()>
252
253Tells you the number of POD errors in the file.
254
255=cut
256
257sub num_pod_errors {
258    my $self = shift;
259
260    $self->check_pod;
261
262    return $self->{POD_ERRORS_COUNT} || 0;
263}
264
265=item C<pod_errors($options)>
266
267Gives you a description of any POD errors in the file.
268
269=cut
270
271sub pod_errors {
272    my $self = shift;
273
274    $self->check_pod;
275
276    return $self->{POD_ERRORS};
277}
278
279=item C<is_docs_link()>
280
281Returns whether the file is suitable for inclusion in a documentation link.
282
283If a file contains plain text rather than POD it may be directly linked to.
284
285=cut
286
287sub is_docs_link {
288    my $self = shift;
289
290    # GH #626 - This needs more thought. I'm trying to work out which files
291    # it's sensible to link directly to. Suffixes other than txt are a
292    # problem (for me at least) because the browser thinks it should
293    # download the file.
294
295    if ( $self->has_suffix ) {
296        return 0 if $self->suffix !~ m/txt/i;
297    }
298    else {
299        return 1 if $self->name =~ m/^[[:upper:]]+$/;
300    }
301
302    return $self->type =~ /Licence|info|docu|Text|TODO|status|MANIFEST|README/;
303}
304
305=item C<title()>
306
307Returns the title of the file.
308
309=cut
310
311sub title {
312    my $self = shift;
313
314    return $self->name unless $self->contains_pod;
315
316    my $text = $self->read;
317
318    return ''
319        unless $text =~ /^=head1\s+([^\n\r]+)\s*[\n\r]+/smo;
320
321    return $1
322        if ($1 ne 'NAME' and $1 ne 'TITLE');
323
324    return ''
325        unless $text =~ /^=head1\s+(?:NAME|TITLE)\s*[\n\r]+([^\n\r]+)/smo;
326
327    $text = $1;
328
329    # Tidy it up a bit.
330    $text =~ s/^\s+//;
331    $text =~ s/\s+$//;
332    $text =~ s/\s*-$//;
333
334    # There was not text, just another POD command (=head2 probably).
335    return '' if $text =~ /^=\w/;
336
337    return $text unless $text =~ /-/;
338
339    # There has to be some space each side of the dash.
340    my ( $path, $desc ) = split /\s+--?\s+/, $text, 2;
341
342    return $desc;
343}
344
345=item C<short_description()>
346
347Returns a short description of the file extracted from the C<NAME> section
348of the POD documentation, if it exists. If an C<ABSTRACT> is found then
349that is preferred.
350
351=cut
352
353sub short_description {
354    my $self = shift;
355
356    return '' unless $self->contains_pod;
357
358    my @lines = $self->read;
359    my $firstline = shift @lines;
360    return $self->title unless $firstline =~ /^=head1\s+ABSTRACT/;
361
362    my $all_text = join "\n" => @lines;
363    $all_text =~ s/^\s+//;
364    my @paragraphs = split /\n{2,}/, $all_text;
365    my $desc;
366    # For a short description, we take only the first paragraph of any
367    # ABSTRACT.
368    ($desc = $paragraphs[0]) =~ s/\n/ /g;
369    $desc =~ s/\s+/ /sg;
370    # We eliminate certain POD formatting characters.
371    $desc =~ s/[CFL]<([^>]+)>/$1/sg;
372    return $desc;
373}
374
375=back
376
377=head1 SEE ALSO
378
379=over 4
380
381=item C<Parrot::Docs::Directory>
382
383=item C<Parrot::Docs::POD2HTML>
384
385=item C<Pod::Simple::Checker>
386
387=back
388
389=cut
390
3911;
392
393# Local Variables:
394#   mode: cperl
395#   cperl-indent-level: 4
396#   fill-column: 100
397# End:
398# vim: expandtab shiftwidth=4:
399