1#! perl
2# Copyright (C) 2010-2011, Parrot Foundation.
3
4=head1 NAME
5
6tools/docs/mk_html_docs.pl - Write HTML documentation
7
8=head1 SYNOPSIS
9
10    % perl tools/docs/mk_html_docs.pl [--version=VERSION]
11
12=head1 DESCRIPTION
13
14This script writes the HTML documentation for Parrot by transforming existing POD
15files into pretty HTML output. Index pages are also generated.
16
17This script will look in the F<docs/index> directory for JSON files.  Each JSON
18file will generate a separate "table of contents" page, and each referenced POD
19file will generate a HTML page. Referenced files must reside within the Parrot
20repo, but they may be located outside of the F</docs> directory.
21
22=head2 JSON Syntax
23
24Each JSON file should look similar to the following:
25
26    {
27       "page" : "index",
28       "content" : [
29          {
30             "title" : "Introduction",
31             "source" : [
32                "docs/intro.pod",
33                "docs/project/*.pod"
34             ]
35          },
36          {
37             "title" : "Working with Parrot",
38             "source" : [
39                "docs/running.pod",
40                "docs/tests.pod",
41                "docs/gettingstarted.pod",
42                "docs/submissions.pod"
43             ],
44             "resource" : "docs/*.png"
45          },
46    }
47
48=head3 Elements
49
50=over
51
52=item C<page>
53
54The name of this page. It must be unique for all JSON pages; the generated HTML
55will reside at "$target_dir/$page.html", where C<$target_dir> is the
56destination for all generated content.
57
58Each page must contain a C<content> element.
59
60=item C<content>
61
62An array of sections shown for this page.
63
64Each section must be a hash that contains a C<title> and C<source> entry.
65
66=item C<title>
67
68The title of each section. It need not be unique.
69
70=item C<source>
71
72A listing of POD files. This may be an array or a string; a string will behave as
73an array of one element.
74
75Each element in the array must be a path relative to the Parrot repo, such as
76F<docs/pmc/default.pmc>. Globbing is supported, so paths like F<docs/pmc/*.pmc> are
77also allowed.
78
79=item C<resource>
80
81An optional listing of files that should be copied directly to
82C<$target_dir>. This is useful for things like images that require no
83transformation, but should be accessible to generated output.
84
85This element behaves similarly to C<source>: a string or array may be passed,
86and globbing is performed for each element.
87
88=back
89
90=cut
91
92use strict;
93use warnings;
94
95# as long as we have a separate docs/ makefile, this will be run from
96# inside it. So jump up a dir to catch all the files.
97chdir '..';
98
99use lib 'lib';
100use Fatal qw/open close/;
101
102use File::Basename qw/basename dirname/;
103use File::Path;
104use File::Copy;
105use File::Spec;
106use Getopt::Long;
107use JSON;
108use Parrot::Docs::PodToHtml;
109use Parrot::Docs::HTMLPage;
110
111my $version='';
112
113GetOptions( 'version=s' => \$version );
114
115my $target_dir   = 'docs/html';
116my $resource_dir = '../resources';
117
118my $json = JSON->new();
119
120# Transform the json
121my %pages;
122my @json_index_files = glob 'docs/index/*.json';
123foreach my $index_file (@json_index_files) {
124    my $contents;
125    open my $fh, '<', $index_file;
126    { local $/; $contents = <$fh> }
127    my $section = '';
128    eval {
129        $section = $json->decode($contents);
130    };
131    if ($@) {
132        print STDERR "Error in $index_file:\n";
133        die $@;
134    }
135
136    my $outfile = $section->{page} . '.html';
137    my $title   = $section->{title};
138
139    foreach my $chunk (@{$section->{content}}) {
140        my @sources_list = canonicalize_files($chunk->{source});
141        my @resources_list = canonicalize_files($chunk->{resource});
142
143        my %sources;
144        foreach my $file (@sources_list) {
145            $sources{$file} = 1;
146        }
147
148        # These are only literals, no globs (for now?)
149        if (exists $chunk->{exclude}) {
150            foreach my $exclusion (@{$chunk->{exclude}}) {
151                delete $sources{$exclusion}
152            }
153            # remove exclusions from @sources_list
154            my @no_exclusions;
155            foreach my $not_excluded (@sources_list) {
156                push @no_exclusions, ($not_excluded) if $sources{$not_excluded};
157            }
158            @sources_list = @no_exclusions;
159        }
160        $chunk->{input_files} = [keys %sources];
161        $chunk->{sorted_list} = \@sources_list;
162        $chunk->{resources} = \@resources_list;
163    }
164    $pages{lc $section->{page}} = $section;
165}
166
167foreach my $page (keys %pages) {
168    $page = $pages{$page};
169    foreach my $section (@{$page->{content}}) {
170        foreach my $source (@{$section->{input_files}}) {
171            if ($source =~ /^:(.*)/) {
172                # Indicates link to other page.
173                my $link = lc $1;
174                if (! exists $pages{$link}) {
175                    die "invalid link $source specified.\n";
176                }
177                # assuming a link only in one page
178                $pages{$link}->{parent} = $page;
179            }
180            else {
181                transform_input($source, $page->{page}, $page->{title});
182            }
183        }
184        foreach my $resource (@{$section->{resources}}) {
185            my $outfile = File::Spec->catfile($target_dir, $resource);
186            File::Path::mkpath(File::Basename::dirname($outfile));
187            File::Copy::copy($resource, $outfile);
188        }
189    }
190}
191
192
193my %file_titles;
194#
195# generate index/header pages
196#
197
198foreach my $page (keys %pages) {
199    my $outfilename = $page;
200    $page = $pages{$page};
201    my $title = $page->{title};
202
203    my $outfile = File::Spec->catfile($target_dir, $outfilename) . '.html';
204
205    # replace make_path with legacy functional mkpath to accommodate older
206    # versions of Perl
207    my $dir = File::Path::mkpath(File::Basename::dirname($outfile));
208
209    open my $out_fh, '>', $outfile;
210
211    # set up and output header
212    my $nav_HTML = qq{<a href="index.html">Home</a>};
213    print $out_fh Parrot::Docs::HTMLPage->header($title, $nav_HTML,
214                                                 $resource_dir, $version);
215
216    foreach my $section (@{$page->{content}}) {
217        # output Section title
218        print $out_fh "<h2>$section->{title}</h2>\n\n<ul>";
219
220        foreach my $source (@{$section->{sorted_list}}) {
221            if ($source =~ /^:(.*)/) {
222                my $link = lc $1;
223                # output link with title
224                print $out_fh qq(<li><a href="$link.html">$pages{$link}->{title}</a></li>\n);
225            }
226            else {
227                # output source and name (from title)
228                print $out_fh qq(<li><a href="$source.html">$file_titles{$source}</a></li>\n);
229            }
230        }
231        print $out_fh "</ul>\n\n";
232    }
233
234    # ========================================================================#
235    # Note: The below is a'bit of a hack to output html links directly into   #
236    #       the 'index.html' file.  The reason for this hackery is, this      #
237    #       script uses json formatted files to pull in various *.pod files   #
238    #       in order to create the 'index.html' (along with other *.html      #
239    #       pages.  Unfortunately, there is no simple way to insert html      #
240    #       markup directly into the 'index.html' file; hence, the below      #
241    #       bits.  -- acy 01/25/12                                            #
242    #                                                                         #
243    # Note: We only want to do this with the 'index.html' page, and NOT any   #
244    #       of the others pages (e.g., 'ops.html' ... 'pct_tutorial.html').   #
245    # ========================================================================#
246    if ($outfilename eq "index") {
247      my $title = "<h2>Development Languages</h2>\n\n";
248      my $lang1 = "<ul><li><a href=\"http://whiteknight.github.com/Rosella/winxed/index.html\">The Winxed Programming Language</a></li>\n";
249      my $lang2 = "<li><a href=\"https://github.com/perl6/nqp\">The NQP Programming Language</a></li>\n</ul>\n\n";
250      print $out_fh Parrot::Docs::HTMLPage->body($title, $lang1, $lang2);
251    }
252
253    # output footer
254    print $out_fh Parrot::Docs::HTMLPage->footer('', $resource_dir, $version);
255}
256
257exit 0;
258
259=head2 Utility Methods
260
261=over
262
263=item C<canonicalize_files($json_chunk)>
264
265Process a given JSON chunk to retrieve a list of input files. Currently, this
266is used to retrieve input POD files and static images.
267
268The JSON chunk may be an array or a string (which is transformed into a
269single-element array).
270
271Each array element should be a file path relative to the parrot repo; it is not
272necessary for resources to live under docs/. Globs are also allowed, so you can
273include many files at once.
274
275=cut
276
277sub canonicalize_files {
278    my ($file_chunk) = @_;
279
280    my @raw_files;
281    if (ref $file_chunk eq "ARRAY" ) {
282        @raw_files = @{$file_chunk};
283    }
284    elsif ($file_chunk) {
285        push @raw_files, $file_chunk;
286    };
287
288    my @files_list;
289
290    foreach my $file_elem (@raw_files) {
291        foreach my $file (sort glob($file_elem)) {
292            push @files_list, ($file)
293        }
294    }
295
296    return @files_list;
297}
298
299
300=item C<transform_input($input, $parent, $parent_title)>
301
302Transform the specfied POD file into HTML. C<$input> should be a path to the
303POD file, relative to the Parrot repo (e.g., "src/pmc/default.pmc"). C<$parent>
304and C<$parent_title> both refer to the parent's "page" JSON chunk and "title"
305JSON chunk, respectively. Parent information will be used to create breadcrumb
306links.
307
308The resulting HTML will be copied to C<$target_dir>, preserving the relative
309location of the input file, for example:
310
311    "src/pmc/default.pmc" => "$target_dir/src/pmc/default.pmc"
312
313=cut
314
315sub transform_input {
316    my ($input, $parent, $parent_title) = @_;
317
318    if (! -f $input) {
319        die "$input not found or not a regular file\n" .
320            "You might need to restrict your glob specification.";
321    }
322
323    my $formatter = Parrot::Docs::PodToHtml->new();
324    $formatter->set_parent($parent, $parent_title);
325
326    # Errata is currently noisy; e.g. complains about U<> even after
327    # formatting it as expected. skip it until we can properly quiet it down.
328    $formatter->no_errata_section(1);
329
330    my $outfile = File::Spec->catfile($target_dir, $input) . '.html';
331
332    # replace make_path with legacy functional mkpath to accommodate older versions of Perl
333    my $dir = File::Path::mkpath(File::Basename::dirname($outfile));
334
335    open my $out_fh, '>', $outfile;
336
337    $formatter->output_fh($out_fh);
338    $formatter->parse_file($input);
339    warn "$input generated no HTML output\n"
340        unless $formatter->content_seen;
341
342    my $page_title = $formatter->return_page_title();
343
344    $file_titles{$input} = $page_title;
345}
346
347=back
348
349=cut
350
351# Local Variables:
352#   mode: cperl
353#   cperl-indent-level: 4
354#   fill-column: 100
355# End:
356# vim: expandtab shiftwidth=4:
357