1 import java.io.*;
2 import antlr.*;
3 import java.util.Hashtable;
4
5 class LinkChecker implements LinkListener {
6 /** Which directory is the document in? */
7 private String directory = "."; // default to current dir
8 /** Which document are we to process? */
9 private String document;
10
11 /** Record which files we have seen so that we don't get into an
12 * infinite loop and for efficiency. The absolute path is stored here
13 * to uniquely identify the files. That is, a file can be arrived
14 * at from many different locations such as help.html from .
15 * and ../help.html from a directory below.
16 *
17 * This table is shared by all instances of LinkChecker.
18 */
19 private static Hashtable visited = new Hashtable(100);
20
21 /** A table of the images visited by any document; a cache of correctness */
22 private static Hashtable imgVisited = new Hashtable(100);
23
24 private static int recursionDepth = 0;
25 private static final String separator = "/"; // not OS sensitive in HTML
26 private static final String localSeparator =
27 System.getProperty("file.separator");
28
29
LinkChecker(String document)30 public LinkChecker(String document) {
31 this.document = document;
32 this.directory = pathMinusFile(document);
33 }
checkLinkRules(String fName, int line)34 public boolean checkLinkRules(String fName, int line) {
35 // Check case of path (check for UNIX compatibility on a PC)!
36 String offensive = offensivePathMember(directory + separator + fName);
37 if (offensive != null) {
38 String file="";
39 try {
40 File f = new File(offensive);
41 file = f.getCanonicalPath();
42 error("Case mismatch in reference " + fName + ":"+
43 System.getProperty("line.separator")+"\treal name is "+
44 fileMinusPathLocal(file)+System.getProperty("line.separator")+
45 "\treal absolute path is "+file, line);
46 return false;
47 }
48 catch (IOException io) {
49 error("internal error: cannot get canonical name for "+offensive, line);
50 }
51 }
52 if (new File(fName).isAbsolute()) {
53 error("Reference to " + fName + " with absolute path", line);
54 return false;
55 }
56 return true;
57 }
doCheck()58 public void doCheck() throws IOException {
59 if ( !document.endsWith(".html") ) {
60 return;
61 }
62
63 // prevent infinite recursion to this file
64 if (visited(document)) {
65 return;
66 }
67 visit(document);
68 recursionDepth++;
69 FileReader f = new FileReader(document);
70 LinkExtractor lexer = new LinkExtractor(f);
71 lexer.addLinkListener(this);
72 // this will parse whole file since all tokens are skipped
73 try {
74 lexer.nextToken();
75 }
76 catch (antlr.TokenStreamException e) {
77 error("internal error:" + e,1);
78 }
79 recursionDepth--;
80 }
error(String err, int line)81 public void error(String err, int line) {
82 String d="<internal error>";
83 try {
84 File f = new File(document);
85 d = f.getCanonicalPath();
86 }
87 catch (IOException io) {
88 System.err.println("internal error: cannot find file that has error");
89 System.exit(0);
90 }
91 System.err.println(d+":"+line+":"+System.getProperty("line.separator")+"\t"+err);
92 }
fileAbsolute(String path)93 public static boolean fileAbsolute(String path) {
94 return path.startsWith("/") || path.charAt(1)==':';
95 }
96 /** Return file from end of HTML path; i.e., use '/' separator */
fileMinusPath(String f)97 public static String fileMinusPath(String f) {
98 int endOfPath = f.lastIndexOf(separator);
99 if ( endOfPath == -1 ) {
100 return f; // no path found
101 }
102 return f.substring(endOfPath+1);
103 }
104 /** Return file from end of locally correct path; i.e., use '/' or '\' separator */
fileMinusPathLocal(String f)105 public static String fileMinusPathLocal(String f) {
106 int endOfPath = f.lastIndexOf(localSeparator);
107 if ( endOfPath == -1 ) {
108 return f; // no path found
109 }
110 return f.substring(endOfPath+1);
111 }
fileProtocolURL(String target)112 public static boolean fileProtocolURL(String target) {
113 return target.indexOf("://") == -1 &&
114 !target.startsWith("mailto:") &&
115 !target.startsWith("news:");
116 }
getParent(String path)117 public static String getParent(String path) {
118 int index = path.lastIndexOf(separator);
119 if (index < 0) {
120 return null;
121 }
122 if ( !fileAbsolute(path) || path.indexOf(separator) != index ) {
123 return path.substring(0, index);
124 }
125 if (index < path.length() - 1) {
126 return path.substring(0, index + 1);
127 }
128 return null;
129 }
hrefReference(String target, int line)130 public void hrefReference(String target, int line) {
131 // System.out.println(document+":"+line+": href to "+target);
132 // recursively check the target document unless non-file ref
133 if (fileProtocolURL(target)) {
134 // prune off any #name reference on end of file
135 int pound = target.indexOf('#');
136 String path = target;
137 if (pound != -1) {
138 path = target.substring(0, pound); // rip off #name on end, leave file
139 if (path.length() == 0) {
140 return; // ref to name in this file
141 }
142 }
143
144 // first check existence on disk
145 File f = new File(directory + separator + path);
146 if (!f.exists()) {
147 error("Reference to missing file " + path, line);
148 return;
149 }
150
151 // check the case
152 checkLinkRules(path, line);
153
154 try {
155 // Link is ok, now follow the link
156 LinkChecker chk = new LinkChecker(directory + separator + path);
157 chk.doCheck();
158 } catch (IOException io) {
159 error("Document does not exist: " + target, line);
160 }
161 }
162 }
imageLinkIsOk(String file)163 public static boolean imageLinkIsOk(String file) throws IOException {
164 File f = new File(file);
165 file = f.getCanonicalPath();
166 Boolean b = (Boolean)imgVisited.get(file);
167 if ( b!=null ) {
168 return b.booleanValue();
169 }
170 return false;
171 }
imageReference(String imageFileName, int line)172 public void imageReference(String imageFileName, int line) {
173 // first check if we have seen this exact file
174 try {
175 if (imageLinkIsOk(directory+separator+imageFileName)) {
176 return;
177 }
178 File f = new File(directory + separator + imageFileName);
179 if (!f.exists()) {
180 error("Reference to missing file " + imageFileName, line);
181 return;
182 }
183 if (checkLinkRules(imageFileName, line)) {
184 visitImage(directory+separator+imageFileName);
185 }
186 } catch (IOException io) {
187 if (!(io instanceof FileNotFoundException)) {
188 System.err.println("internal error: " + io.getMessage());
189 }
190 }
191 }
192 /** Given a path to a file or dir, is the case of the reference
193 * the same as the actual path on the disk? This is only
194 * meaningful on a PC which is case-insensitive (not a real
195 * file system).
196 *
197 * Returns null if there is nothing offensive and the file exists.
198 * Returns offending file/dir if it does not exist or
199 * it has there is a case mismatch for it. The last file is checked
200 * first followed by the parent directory, recursively, all the way
201 * to the absolute or relative path root in that String; i.e., we parse
202 * from right to left.
203 *
204 * Because the File object won't actually go get the real filename
205 * from the disk so we can compare, we must get a directory listing
206 * of the directory and then look for the referenced file or dir.
207 * For example, for "./images/logo.gif" we would check "./images" dir
208 * listing for "logo.gif" with the appropriate case and then check
209 * directory "." for a dir called images with the right case. When
210 * no parent exists, we can stop looking for case problems.
211 */
offensivePathMember(String fName)212 public static String offensivePathMember(String fName) {
213 // System.out.println("caseMismatch(" + fName + ")");
214 // have we reached the root? (stopping condition)
215 if (fName==null || getParent(fName) == null) {
216 return null;
217 }
218 String parent = getParent(fName);
219 fName = fileMinusPath(fName);
220 File f = new File(parent);
221 String[] parentFiles = f.list();
222 // System.out.println("checking dir " + parent + " for " + fName);
223
224 // handle weird stuff like "c:/doc/../foo"; skip this parent dir
225 if ( fName.equals("..") ) {
226 return offensivePathMember(getParent(parent));
227 }
228
229 for (int i = 0; i < parentFiles.length; i++) {
230 // System.out.println("is it " + parentFiles[i] + "?");
231 if (parentFiles[i].equalsIgnoreCase(fName)) {
232 if (!parentFiles[i].equals(fName)) {
233 // System.out.println("case mismatch " + fName + " in " + parent);
234 return parent + separator + fName;
235 }
236 // found a match, verify parent is ok
237 return offensivePathMember(parent);
238 }
239 }
240 // System.out.println("can't find " + fName + " in " + parent);
241 return parent + separator + fName;
242 }
pathMinusFile(String f)243 public static String pathMinusFile(String f) {
244 int endOfPath = f.lastIndexOf(separator);
245 if ( endOfPath == -1 ) {
246 return "."; // no path found: use current directory
247 }
248 return f.substring(0, endOfPath);
249 }
visit(String file)250 public static void visit(String file) throws IOException {
251 File f = new File(file);
252 file = f.getCanonicalPath();
253 visited.put(file, new Boolean(true));
254 }
visited(String file)255 public static boolean visited(String file) throws IOException {
256 File f = new File(file);
257 file = f.getCanonicalPath();
258 return visited.get(file) != null;
259 }
visitImage(String file)260 public static void visitImage(String file) throws IOException {
261 File f = new File(file);
262 file = f.getCanonicalPath();
263 // System.out.println("caching image "+file);
264 imgVisited.put(file, new Boolean(true));
265 }
266 }
267