1#! /usr/bin/perl
2#
3# Copyright (c) 2004  Motoyuki Kasahara
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions
7# are met:
8# 1. Redistributions of source code must retain the above copyright
9#    notice, this list of conditions and the following disclaimer.
10# 2. Redistributions in binary form must reproduce the above copyright
11#    notice, this list of conditions and the following disclaimer in the
12#    documentation and/or other materials provided with the distribution.
13# 3. Neither the name of the project nor the names of its contributors
14#    may be used to endorse or promote products derived from this software
15#    without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20# ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27# SUCH DAMAGE.
28#
29
30#
31# html-toc -- make `table of contents' of HTML files.
32#
33# Usage:
34#     html-index [option...] input-file...
35#
36# `html-toc' reads HTML files, and generates `table of contents' (TOC)
37# of the HTML files. The TOC is created from <h1>...<h6> tags and
38# <a name="..."> tag in the HTML files.  Since `html-toc' doesn't parse
39# HTML precisely, the tags must be the following form:
40#
41#      <h?><a name="...">heading</a></h?>
42#
43# where `?' is 1..6.  Note that <h?> and </h?> above must be in the same
44# line.
45#
46# `html-toc' outputs TOC to standard out by default.
47#
48# Options:
49#     -o file               specify output file.
50#     -h                    do not output file name in <a href="....">.
51#     -m level              minimum target heading level
52#                           (default: h1)
53#     -M level              maximum target heading level
54#                           (default: h6)
55
56require 5.005;
57use Getopt::Std;
58
59#
60# Usage
61#
62my $usage = "Usage: $0 [option...] input-file...\n";
63
64#
65# Variables
66#
67my $out_file = '-';
68my @preamble = ();
69my $fragment_only = 0;
70my $min_level = 1;
71my $max_level = 6;
72
73#
74# Parse command line arguments.
75#
76my %options;
77getopts('o:hm:M:', \%options) or die $usage;
78die $usage if (@ARGV == 0);
79
80$fragment_only = 1 if (defined($options{h}) || @ARGV == 1);
81$out_file = $options{o} if (defined($options{o}));
82if (defined($options{m})) {
83    $options{m} =~ s/^h//;
84    $min_level = $options{m};
85}
86if (defined($options{M})) {
87    $options{M} =~ s/^h//;
88    $max_level = $options{M};
89}
90
91#
92# Read an HTML file.
93#
94$current_level = $min_level;
95
96if ($out_file eq '-') {
97    $out_file = 'stdout';
98    open(OUT_FILE, ">& STDOUT");
99} else {
100    if (!open(OUT_FILE, "> $out_file")) {
101	die "$0: failed to open the file, $!: $out_file\n";
102    }
103}
104
105print OUT_FILE "<ul>\n";
106
107foreach my $in_file (@ARGV) {
108    if (!open(IN_FILE, "< $in_file")) {
109	die "$0: failed to open the file, $!: $in_file\n";
110    }
111
112    while (<IN_FILE>) {
113	chomp;
114	next unless (m|^<h([1-6])><a name="([^\"]+)">(.*)</a>|);
115	my ($level, $tag, $heading) = ($1, $2, $3);
116
117	if ($level >= $min_level && $level <= $max_level) {
118	    while ($current_level > $level) {
119		$current_level--;
120		print OUT_FILE ' ' x ($current_level - $min_level + 1);
121		print OUT_FILE "</ul>\n";
122	    }
123	    while ($current_level < $level) {
124		print OUT_FILE ' ' x ($current_level - $min_level + 1);
125		print OUT_FILE "<ul>\n";
126		$current_level++;
127	    }
128
129	    print OUT_FILE ' ' x ($current_level - $min_level + 1);
130	    if ($fragment_only) {
131		print OUT_FILE sprintf("<li><a href=\"\#%s\">%s</a>\n",
132				       $tag, $heading);
133	    } else {
134		print OUT_FILE sprintf("<li><a href=\"%s\#%s\">%s</a>\n",
135				       $in_file, $tag, $heading);
136	    }
137	}
138    }
139
140    close(IN_FILE);
141}
142
143while ($current_level > $min_level) {
144    $current_level--;
145    print OUT_FILE ' ' x ($current_level - $min_level + 1);
146    print OUT_FILE "</ul>\n";
147}
148
149print OUT_FILE "</ul>\n";
150
151close(OUT_FILE);
152