1#!/usr/bin/perl -w
2# Finds potential problems in tex files, and issues warnings to the console
3#   about what it finds.  Takes a list of files as its only arguments,
4#   and does checks on all the files listed.  The assumption is that these are
5#   valid (or close to valid) LaTeX files.  It follows \include statements
6#   recursively to pick up any included tex files.
7#
8#
9#
10# Currently the following checks are made:
11#
12#   -- Multiple hyphens not inside a verbatim environment (or \verb).  These
13#      should be placed inside a \verb{} contruct so they will not be converted
14#      to single hyphen by latex and latex2html.
15
16
17# Original creation 3-8-05 by Karl Cunningham  karlc -at- keckec -dot- com
18#
19#
20
21use strict;
22
23# The following builds the test string to identify and change multiple
24#   hyphens in the tex files.  Several constructs are identified but only
25#   multiple hyphens are changed; the others are fed to the output
26#   unchanged.
27my $b = '\\\\begin\\*?\\s*\\{\\s*';  # \begin{
28my $e = '\\\\end\\*?\\s*\\{\\s*';    # \end{
29my $c = '\\s*\\}';                   # closing curly brace
30
31# This captures entire verbatim environments.  These are passed to the output
32#   file unchanged.
33my $verbatimenv = $b . "verbatim" . $c . ".*?" . $e . "verbatim" . $c;
34
35# This captures \verb{..{ constructs.  They are passed to the output unchanged.
36my $verb = '\\\\verb\\*?(.).*?\\1';
37
38# This captures multiple hyphens with a leading and trailing space.  These are not changed.
39my $hyphsp = '\\s\\-{2,}\\s';
40
41# This identifies other multiple hyphens.
42my $hyphens = '\\-{2,}';
43
44# This identifies \hyperpage{..} commands, which should be ignored.
45my $hyperpage = '\\\\hyperpage\\*?\\{.*?\\}';
46
47# This builds the actual test string from the above strings.
48#my $teststr = "$verbatimenv|$verb|$tocentry|$hyphens";
49my $teststr = "$verbatimenv|$verb|$hyphsp|$hyperpage|$hyphens";
50
51
52sub get_includes {
53	# Get a list of include files from the top-level tex file.  The first
54	#   argument is a pointer to the list of files found. The rest of the
55	#   arguments is a list of filenames to check for includes.
56	my $files = shift;
57	my ($fileline,$includefile,$includes);
58
59	while (my $filename = shift) {
60		# Get a list of all the html files in the directory.
61		open my $if,"<$filename" or die "Cannot open input file $filename\n";
62		$fileline = 0;
63		$includes = 0;
64		while (<$if>) {
65			chomp;
66			$fileline++;
67			# If a file is found in an include, process it.
68			if (($includefile) = /\\include\s*\{(.*?)\}/) {
69				$includes++;
70				# Append .tex to the filename
71				$includefile .= '.tex';
72
73				# If the include file has already been processed, issue a warning
74				#   and don't do it again.
75				my $found = 0;
76				foreach (@$files) {
77					if ($_ eq $includefile) {
78						$found = 1;
79						last;
80					}
81				}
82				if ($found) {
83					print "$includefile found at line $fileline in $filename was previously included\n";
84				} else {
85					# The file has not been previously found.  Save it and
86					# 	recursively process it.
87					push (@$files,$includefile);
88					get_includes($files,$includefile);
89				}
90			}
91		}
92		close IF;
93	}
94}
95
96
97sub check_hyphens {
98	my (@files) = @_;
99	my ($filedata,$this,$linecnt,$before);
100
101	# Build the test string to check for the various environments.
102	#   We only do the conversion if the multiple hyphens are outside of a
103	#   verbatim environment (either \begin{verbatim}...\end{verbatim} or
104	#   \verb{--}).  Capture those environments and pass them to the output
105	#   unchanged.
106
107	foreach my $file (@files) {
108		# Open the file and load the whole thing into $filedata. A bit wasteful but
109		#   easier to deal with, and we don't have a problem with speed here.
110		$filedata = "";
111		open IF,"<$file" or die "Cannot open input file $file";
112		while (<IF>) {
113			$filedata .= $_;
114		}
115		close IF;
116
117		# Set up to process the file data.
118		$linecnt = 1;
119
120		# Go through the file data from beginning to end.  For each match, save what
121		#   came before it and what matched.  $filedata now becomes only what came
122		#   after the match.
123		#   Chech the match to see if it starts with a multiple-hyphen.  If so
124		#     warn the user.  Keep track of line numbers so they can be output
125		#     with the warning message.
126		while ($filedata =~ /$teststr/os) {
127			$this = $&;
128			$before = $`;
129			$filedata = $';
130			$linecnt += $before =~ tr/\n/\n/;
131
132			# Check if the multiple hyphen is present outside of one of the
133			#   acceptable constructs.
134			if ($this =~ /^\-+/) {
135				print "Possible unwanted multiple hyphen found in line ",
136					"$linecnt of file $file\n";
137			}
138			$linecnt += $this =~ tr/\n/\n/;
139		}
140	}
141}
142##################################################################
143#                       MAIN                                  ####
144##################################################################
145
146my (@includes,$cnt);
147
148# Examine the file pointed to by the first argument to get a list of
149#  includes to test.
150get_includes(\@includes,@ARGV);
151
152check_hyphens(@includes);
153