1#!/usr/bin/perl -w 2# Finds potential problems in tex files, and issues warnings to the console 3# about what it finds. Takes a list of files as its only arguments, 4# and does checks on all the files listed. The assumption is that these are 5# valid (or close to valid) LaTeX files. It follows \include statements 6# recursively to pick up any included tex files. 7# 8# 9# 10# Currently the following checks are made: 11# 12# -- Multiple hyphens not inside a verbatim environment (or \verb). These 13# should be placed inside a \verb{} contruct so they will not be converted 14# to single hyphen by latex and latex2html. 15 16 17# Original creation 3-8-05 by Karl Cunningham karlc -at- keckec -dot- com 18# 19# 20 21use strict; 22 23# The following builds the test string to identify and change multiple 24# hyphens in the tex files. Several constructs are identified but only 25# multiple hyphens are changed; the others are fed to the output 26# unchanged. 27my $b = '\\\\begin\\*?\\s*\\{\\s*'; # \begin{ 28my $e = '\\\\end\\*?\\s*\\{\\s*'; # \end{ 29my $c = '\\s*\\}'; # closing curly brace 30 31# This captures entire verbatim environments. These are passed to the output 32# file unchanged. 33my $verbatimenv = $b . "verbatim" . $c . ".*?" . $e . "verbatim" . $c; 34 35# This captures \verb{..{ constructs. They are passed to the output unchanged. 36my $verb = '\\\\verb\\*?(.).*?\\1'; 37 38# This captures multiple hyphens with a leading and trailing space. These are not changed. 39my $hyphsp = '\\s\\-{2,}\\s'; 40 41# This identifies other multiple hyphens. 42my $hyphens = '\\-{2,}'; 43 44# This identifies \hyperpage{..} commands, which should be ignored. 45my $hyperpage = '\\\\hyperpage\\*?\\{.*?\\}'; 46 47# This builds the actual test string from the above strings. 48#my $teststr = "$verbatimenv|$verb|$tocentry|$hyphens"; 49my $teststr = "$verbatimenv|$verb|$hyphsp|$hyperpage|$hyphens"; 50 51 52sub get_includes { 53 # Get a list of include files from the top-level tex file. The first 54 # argument is a pointer to the list of files found. The rest of the 55 # arguments is a list of filenames to check for includes. 56 my $files = shift; 57 my ($fileline,$includefile,$includes); 58 59 while (my $filename = shift) { 60 # Get a list of all the html files in the directory. 61 open my $if,"<$filename" or die "Cannot open input file $filename\n"; 62 $fileline = 0; 63 $includes = 0; 64 while (<$if>) { 65 chomp; 66 $fileline++; 67 # If a file is found in an include, process it. 68 if (($includefile) = /\\include\s*\{(.*?)\}/) { 69 $includes++; 70 # Append .tex to the filename 71 $includefile .= '.tex'; 72 73 # If the include file has already been processed, issue a warning 74 # and don't do it again. 75 my $found = 0; 76 foreach (@$files) { 77 if ($_ eq $includefile) { 78 $found = 1; 79 last; 80 } 81 } 82 if ($found) { 83 print "$includefile found at line $fileline in $filename was previously included\n"; 84 } else { 85 # The file has not been previously found. Save it and 86 # recursively process it. 87 push (@$files,$includefile); 88 get_includes($files,$includefile); 89 } 90 } 91 } 92 close IF; 93 } 94} 95 96 97sub check_hyphens { 98 my (@files) = @_; 99 my ($filedata,$this,$linecnt,$before); 100 101 # Build the test string to check for the various environments. 102 # We only do the conversion if the multiple hyphens are outside of a 103 # verbatim environment (either \begin{verbatim}...\end{verbatim} or 104 # \verb{--}). Capture those environments and pass them to the output 105 # unchanged. 106 107 foreach my $file (@files) { 108 # Open the file and load the whole thing into $filedata. A bit wasteful but 109 # easier to deal with, and we don't have a problem with speed here. 110 $filedata = ""; 111 open IF,"<$file" or die "Cannot open input file $file"; 112 while (<IF>) { 113 $filedata .= $_; 114 } 115 close IF; 116 117 # Set up to process the file data. 118 $linecnt = 1; 119 120 # Go through the file data from beginning to end. For each match, save what 121 # came before it and what matched. $filedata now becomes only what came 122 # after the match. 123 # Chech the match to see if it starts with a multiple-hyphen. If so 124 # warn the user. Keep track of line numbers so they can be output 125 # with the warning message. 126 while ($filedata =~ /$teststr/os) { 127 $this = $&; 128 $before = $`; 129 $filedata = $'; 130 $linecnt += $before =~ tr/\n/\n/; 131 132 # Check if the multiple hyphen is present outside of one of the 133 # acceptable constructs. 134 if ($this =~ /^\-+/) { 135 print "Possible unwanted multiple hyphen found in line ", 136 "$linecnt of file $file\n"; 137 } 138 $linecnt += $this =~ tr/\n/\n/; 139 } 140 } 141} 142################################################################## 143# MAIN #### 144################################################################## 145 146my (@includes,$cnt); 147 148# Examine the file pointed to by the first argument to get a list of 149# includes to test. 150get_includes(\@includes,@ARGV); 151 152check_hyphens(@includes); 153