1 import java.io.*;
2 import antlr.*;
3 import java.util.Hashtable;
4 
5 class LinkChecker implements LinkListener {
6   /** Which directory is the document in? */
7   private String directory = "."; // default to current dir
8   /** Which document are we to process? */
9   private String document;
10 
11   /** Record which files we have seen so that we don't get into an
12    *  infinite loop and for efficiency.  The absolute path is stored here
13    *  to uniquely identify the files.  That is, a file can be arrived
14    *  at from many different locations such as help.html from .
15    *  and ../help.html from a directory below.
16    *
17    *  This table is shared by all instances of LinkChecker.
18    */
19   private static Hashtable visited = new Hashtable(100);
20 
21   /** A table of the images visited by any document; a cache of correctness */
22   private static Hashtable imgVisited = new Hashtable(100);
23 
24   private static int recursionDepth = 0;
25   private static final String separator = "/"; // not OS sensitive in HTML
26   private static final String localSeparator =
27 	  System.getProperty("file.separator");
28 
29 
LinkChecker(String document)30   public LinkChecker(String document) {
31 	this.document = document;
32 	this.directory = pathMinusFile(document);
33   }
checkLinkRules(String fName, int line)34 public boolean checkLinkRules(String fName, int line) {
35 	// Check case of path (check for UNIX compatibility on a PC)!
36 	String offensive = offensivePathMember(directory + separator + fName);
37 	if (offensive != null) {
38 		String file="";
39 		try {
40 			File f = new File(offensive);
41 			file = f.getCanonicalPath();
42 			error("Case mismatch in reference " + fName + ":"+
43 					System.getProperty("line.separator")+"\treal name is "+
44 					fileMinusPathLocal(file)+System.getProperty("line.separator")+
45 					"\treal absolute path is "+file, line);
46 			return false;
47 		}
48 		catch (IOException io) {
49 			error("internal error: cannot get canonical name for "+offensive, line);
50 		}
51 	}
52 	if (new File(fName).isAbsolute()) {
53 		error("Reference to " + fName + " with absolute path", line);
54 		return false;
55 	}
56 	return true;
57 }
doCheck()58 public void doCheck() throws IOException {
59 	if ( !document.endsWith(".html") ) {
60 		return;
61 	}
62 
63 	// prevent infinite recursion to this file
64 	if (visited(document)) {
65 		return;
66 	}
67 	visit(document);
68 	recursionDepth++;
69 	FileReader f = new FileReader(document);
70 	LinkExtractor lexer = new LinkExtractor(f);
71 	lexer.addLinkListener(this);
72 	// this will parse whole file since all tokens are skipped
73   try {
74     lexer.nextToken();
75   }
76   catch (antlr.TokenStreamException e) {
77     error("internal error:" + e,1);
78   }
79 	recursionDepth--;
80 }
error(String err, int line)81   public void error(String err, int line) {
82 	String d="<internal error>";
83 	try {
84 		File f = new File(document);
85 		d = f.getCanonicalPath();
86 	}
87 	catch (IOException io) {
88 		System.err.println("internal error: cannot find file that has error");
89 		System.exit(0);
90 	}
91 	System.err.println(d+":"+line+":"+System.getProperty("line.separator")+"\t"+err);
92   }
fileAbsolute(String path)93   public static boolean fileAbsolute(String path) {
94 	return path.startsWith("/") || path.charAt(1)==':';
95   }
96   /** Return file from end of HTML path; i.e., use '/' separator */
fileMinusPath(String f)97   public static String fileMinusPath(String f) {
98 	int endOfPath = f.lastIndexOf(separator);
99 	if ( endOfPath == -1 ) {
100 	  return f;	// no path found
101 	}
102 	return f.substring(endOfPath+1);
103   }
104   /** Return file from end of locally correct path; i.e., use '/' or '\' separator */
fileMinusPathLocal(String f)105   public static String fileMinusPathLocal(String f) {
106 	int endOfPath = f.lastIndexOf(localSeparator);
107 	if ( endOfPath == -1 ) {
108 	  return f;	// no path found
109 	}
110 	return f.substring(endOfPath+1);
111   }
fileProtocolURL(String target)112   public static boolean fileProtocolURL(String target) {
113 	return target.indexOf("://") == -1 &&
114 		!target.startsWith("mailto:") &&
115 		!target.startsWith("news:");
116   }
getParent(String path)117   public static String getParent(String path) {
118 	int index = path.lastIndexOf(separator);
119 	if (index < 0) {
120 	  return null;
121 	}
122 	if ( !fileAbsolute(path) || path.indexOf(separator) != index ) {
123 	  return path.substring(0, index);
124 	}
125 	if (index < path.length() - 1) {
126 	  return path.substring(0, index + 1);
127 	}
128 	return null;
129   }
hrefReference(String target, int line)130 public void hrefReference(String target, int line) {
131 	// System.out.println(document+":"+line+": href to "+target);
132 	// recursively check the target document unless non-file ref
133 	if (fileProtocolURL(target)) {
134 		// prune off any #name reference on end of file
135 		int pound = target.indexOf('#');
136 		String path = target;
137 		if (pound != -1) {
138 			path = target.substring(0, pound); // rip off #name on end, leave file
139 			if (path.length() == 0) {
140 				return; // ref to name in this file
141 			}
142 		}
143 
144 		// first check existence on disk
145 		File f = new File(directory + separator + path);
146 		if (!f.exists()) {
147 			error("Reference to missing file " + path, line);
148 			return;
149 		}
150 
151 		// check the case
152 		checkLinkRules(path, line);
153 
154 		try {
155 			// Link is ok, now follow the link
156 			LinkChecker chk = new LinkChecker(directory + separator + path);
157 			chk.doCheck();
158 		} catch (IOException io) {
159 			error("Document does not exist: " + target, line);
160 		}
161 	}
162 }
imageLinkIsOk(String file)163   public static boolean imageLinkIsOk(String file) throws IOException {
164 	File f = new File(file);
165 	file = f.getCanonicalPath();
166 	Boolean b = (Boolean)imgVisited.get(file);
167 	if ( b!=null ) {
168 		return b.booleanValue();
169 	}
170 	return false;
171   }
imageReference(String imageFileName, int line)172 public void imageReference(String imageFileName, int line) {
173 	// first check if we have seen this exact file
174 	try {
175 		if (imageLinkIsOk(directory+separator+imageFileName)) {
176 			return;
177 		}
178 		File f = new File(directory + separator + imageFileName);
179 		if (!f.exists()) {
180 			error("Reference to missing file " + imageFileName, line);
181 			return;
182 		}
183 		if (checkLinkRules(imageFileName, line)) {
184 			visitImage(directory+separator+imageFileName);
185 		}
186 	} catch (IOException io) {
187 		if (!(io instanceof FileNotFoundException)) {
188 			System.err.println("internal error: " + io.getMessage());
189 		}
190 	}
191 }
192 /** Given a path to a file or dir, is the case of the reference
193    *  the same as the actual path on the disk?  This is only
194    *  meaningful on a PC which is case-insensitive (not a real
195    *  file system).
196    *
197    *  Returns null if there is nothing offensive and the file exists.
198    *  Returns offending file/dir if it does not exist or
199    *  it has there is a case mismatch for it.  The last file is checked
200    *  first followed by the parent directory, recursively, all the way
201    *  to the absolute or relative path root in that String; i.e., we parse
202    *  from right to left.
203    *
204    *  Because the File object won't actually go get the real filename
205    *  from the disk so we can compare, we must get a directory listing
206    *  of the directory and then look for the referenced file or dir.
207    *  For example, for "./images/logo.gif" we would check "./images" dir
208    *  listing for "logo.gif" with the appropriate case and then check
209    *  directory "." for a dir called images with the right case.  When
210    *  no parent exists, we can stop looking for case problems.
211    */
offensivePathMember(String fName)212 public static String offensivePathMember(String fName) {
213 	// System.out.println("caseMismatch(" + fName + ")");
214 	// have we reached the root? (stopping condition)
215 	if (fName==null || getParent(fName) == null) {
216 		return null;
217 	}
218 	String parent = getParent(fName);
219 	fName = fileMinusPath(fName);
220 	File f = new File(parent);
221 	String[] parentFiles = f.list();
222 	// System.out.println("checking dir " + parent + " for " + fName);
223 
224 	// handle weird stuff like "c:/doc/../foo"; skip this parent dir
225 	if ( fName.equals("..") ) {
226 		return offensivePathMember(getParent(parent));
227 	}
228 
229 	for (int i = 0; i < parentFiles.length; i++) {
230 		// System.out.println("is it " + parentFiles[i] + "?");
231 		if (parentFiles[i].equalsIgnoreCase(fName)) {
232 			if (!parentFiles[i].equals(fName)) {
233 				// System.out.println("case mismatch " + fName + " in " + parent);
234 				return parent + separator + fName;
235 			}
236 			// found a match, verify parent is ok
237 			return offensivePathMember(parent);
238 		}
239 	}
240 	// System.out.println("can't find " + fName + " in " + parent);
241 	return parent + separator + fName;
242 }
pathMinusFile(String f)243   public static String pathMinusFile(String f) {
244 	int endOfPath = f.lastIndexOf(separator);
245 	if ( endOfPath == -1 ) {
246 	  return "."; // no path found: use current directory
247 	}
248 	return f.substring(0, endOfPath);
249   }
visit(String file)250   public static void visit(String file) throws IOException {
251 	File f = new File(file);
252 	file = f.getCanonicalPath();
253 	visited.put(file, new Boolean(true));
254   }
visited(String file)255   public static boolean visited(String file) throws IOException {
256 	File f = new File(file);
257 	file = f.getCanonicalPath();
258 	return visited.get(file) != null;
259   }
visitImage(String file)260   public static void visitImage(String file) throws IOException {
261 	File f = new File(file);
262 	file = f.getCanonicalPath();
263 	// System.out.println("caching image "+file);
264 	imgVisited.put(file, new Boolean(true));
265   }
266 }
267