1#! /usr/bin/perl 2# 3# Copyright (c) 2004 Motoyuki Kasahara 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions 7# are met: 8# 1. Redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer. 10# 2. Redistributions in binary form must reproduce the above copyright 11# notice, this list of conditions and the following disclaimer in the 12# documentation and/or other materials provided with the distribution. 13# 3. Neither the name of the project nor the names of its contributors 14# may be used to endorse or promote products derived from this software 15# without specific prior written permission. 16# 17# THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 18# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20# ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 21# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27# SUCH DAMAGE. 28# 29 30# 31# html-toc -- make `table of contents' of HTML files. 32# 33# Usage: 34# html-index [option...] input-file... 35# 36# `html-toc' reads HTML files, and generates `table of contents' (TOC) 37# of the HTML files. The TOC is created from <h1>...<h6> tags and 38# <a name="..."> tag in the HTML files. Since `html-toc' doesn't parse 39# HTML precisely, the tags must be the following form: 40# 41# <h?><a name="...">heading</a></h?> 42# 43# where `?' is 1..6. Note that <h?> and </h?> above must be in the same 44# line. 45# 46# `html-toc' outputs TOC to standard out by default. 47# 48# Options: 49# -o file specify output file. 50# -h do not output file name in <a href="....">. 51# -m level minimum target heading level 52# (default: h1) 53# -M level maximum target heading level 54# (default: h6) 55 56require 5.005; 57use Getopt::Std; 58 59# 60# Usage 61# 62my $usage = "Usage: $0 [option...] input-file...\n"; 63 64# 65# Variables 66# 67my $out_file = '-'; 68my @preamble = (); 69my $fragment_only = 0; 70my $min_level = 1; 71my $max_level = 6; 72 73# 74# Parse command line arguments. 75# 76my %options; 77getopts('o:hm:M:', \%options) or die $usage; 78die $usage if (@ARGV == 0); 79 80$fragment_only = 1 if (defined($options{h}) || @ARGV == 1); 81$out_file = $options{o} if (defined($options{o})); 82if (defined($options{m})) { 83 $options{m} =~ s/^h//; 84 $min_level = $options{m}; 85} 86if (defined($options{M})) { 87 $options{M} =~ s/^h//; 88 $max_level = $options{M}; 89} 90 91# 92# Read an HTML file. 93# 94$current_level = $min_level; 95 96if ($out_file eq '-') { 97 $out_file = 'stdout'; 98 open(OUT_FILE, ">& STDOUT"); 99} else { 100 if (!open(OUT_FILE, "> $out_file")) { 101 die "$0: failed to open the file, $!: $out_file\n"; 102 } 103} 104 105print OUT_FILE "<ul>\n"; 106 107foreach my $in_file (@ARGV) { 108 if (!open(IN_FILE, "< $in_file")) { 109 die "$0: failed to open the file, $!: $in_file\n"; 110 } 111 112 while (<IN_FILE>) { 113 chomp; 114 next unless (m|^<h([1-6])><a name="([^\"]+)">(.*)</a>|); 115 my ($level, $tag, $heading) = ($1, $2, $3); 116 117 if ($level >= $min_level && $level <= $max_level) { 118 while ($current_level > $level) { 119 $current_level--; 120 print OUT_FILE ' ' x ($current_level - $min_level + 1); 121 print OUT_FILE "</ul>\n"; 122 } 123 while ($current_level < $level) { 124 print OUT_FILE ' ' x ($current_level - $min_level + 1); 125 print OUT_FILE "<ul>\n"; 126 $current_level++; 127 } 128 129 print OUT_FILE ' ' x ($current_level - $min_level + 1); 130 if ($fragment_only) { 131 print OUT_FILE sprintf("<li><a href=\"\#%s\">%s</a>\n", 132 $tag, $heading); 133 } else { 134 print OUT_FILE sprintf("<li><a href=\"%s\#%s\">%s</a>\n", 135 $in_file, $tag, $heading); 136 } 137 } 138 } 139 140 close(IN_FILE); 141} 142 143while ($current_level > $min_level) { 144 $current_level--; 145 print OUT_FILE ' ' x ($current_level - $min_level + 1); 146 print OUT_FILE "</ul>\n"; 147} 148 149print OUT_FILE "</ul>\n"; 150 151close(OUT_FILE); 152