1 /*
2  * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 import java.io.BufferedReader;
25 import java.io.BufferedWriter;
26 import java.io.ByteArrayOutputStream;
27 import java.io.File;
28 import java.io.FileNotFoundException;
29 import java.io.FileWriter;
30 import java.io.FilenameFilter;
31 import java.io.InputStreamReader;
32 import java.io.IOException;
33 import java.io.PrintStream;
34 import java.io.PrintWriter;
35 import java.io.StringReader;
36 import java.io.StringWriter;
37 import java.lang.annotation.Annotation;
38 import java.lang.annotation.Retention;
39 import java.lang.annotation.RetentionPolicy;
40 import java.lang.ref.SoftReference;
41 import java.lang.reflect.InvocationTargetException;
42 import java.lang.reflect.Method;
43 import java.net.URI;
44 import java.net.URISyntaxException;
45 import java.nio.charset.Charset;
46 import java.nio.charset.CharsetDecoder;
47 import java.nio.charset.CodingErrorAction;
48 import java.nio.charset.UnsupportedCharsetException;
49 import java.nio.file.FileVisitResult;
50 import java.nio.file.Files;
51 import java.nio.file.Path;
52 import java.nio.file.Paths;
53 import java.nio.file.SimpleFileVisitor;
54 import java.nio.file.attribute.BasicFileAttributes;
55 import java.util.ArrayList;
56 import java.util.Arrays;
57 import java.util.Collection;
58 import java.util.Collections;
59 import java.util.Comparator;
60 import java.util.EnumMap;
61 import java.util.HashMap;
62 import java.util.LinkedHashMap;
63 import java.util.List;
64 import java.util.Locale;
65 import java.util.Map;
66 import java.util.Objects;
67 import java.util.Set;
68 import java.util.TreeMap;
69 import java.util.TreeSet;
70 import java.util.function.Function;
71 import java.util.regex.Pattern;
72 import java.util.stream.Collectors;
73 
74 
75 /**
76  * Test framework for running javadoc and performing tests on the resulting output.
77  *
78  * <p>
79  * Tests are typically written as subtypes of JavadocTester, with a main
80  * method that creates an instance of the test class and calls the runTests()
81  * method. The runTests() methods calls all the test methods declared in the class,
82  * and then calls a method to print a summary, and throw an exception if
83  * any of the test methods reported a failure.
84  *
85  * <p>
86  * Test methods are identified with a @Test annotation. They have no parameters.
87  * The name of the method is not important, but if you have more than one, it is
88  * recommended that the names be meaningful and suggestive of the test case
89  * contained therein.
90  *
91  * <p>
92  * Typically, a test method will invoke javadoc, and then perform various
93  * checks on the results. The standard checks are:
94  *
95  * <dl>
96  * <dt>checkExitCode
97  * <dd>Check the exit code returned from javadoc.
98  * <dt>checkOutput
99  * <dd>Perform a series of checks on the contents on a file or output stream
100  *     generated by javadoc.
101  *     The checks can be either that a series of strings are found or are not found.
102  * <dt>checkFiles
103  * <dd>Perform a series of checks on the files generated by javadoc.
104  *     The checks can be that a series of files are found or are not found.
105  * </dl>
106  *
107  * <pre><code>
108  *  public class MyTester extends JavadocTester {
109  *      public static void main(String... args) throws Exception {
110  *          MyTester tester = new MyTester();
111  *          tester.runTests();
112  *      }
113  *
114  *      // test methods...
115  *      @Test
116  *      void test() {
117  *          javadoc(<i>args</i>);
118  *          checkExit(Exit.OK);
119  *          checkOutput(<i>file</i>, true,
120  *              <i>strings-to-find</i>);
121  *          checkOutput(<i>file</i>, false,
122  *              <i>strings-to-not-find</i>);
123  *      }
124  *  }
125  * </code></pre>
126  *
127  * <p>
128  * If javadoc is run more than once in a test method, you can compare the
129  * results that are generated with the diff method. Since files written by
130  * javadoc typically contain a timestamp, you may want to use the -notimestamp
131  * option if you are going to compare the results from two runs of javadoc.
132  *
133  * <p>
134  * If you have many calls of checkOutput that are very similar, you can write
135  * your own check... method to reduce the amount of duplication. For example,
136  * if you want to check that many files contain the same string, you could
137  * write a method that takes a varargs list of files and calls checkOutput
138  * on each file in turn with the string to be checked.
139  *
140  * <p>
141  * You can also write you own custom check methods, which can use
142  * readFile to get the contents of a file generated by javadoc,
143  * and then use pass(...) or fail(...) to report whether the check
144  * succeeded or not.
145  *
146  * <p>
147  * You can have many separate test methods, each identified with a @Test
148  * annotation. However, you should <b>not</b> assume they will be called
149  * in the order declared in your source file.  If the order of a series
150  * of javadoc invocations is important, do that within a single method.
151  * If the invocations are independent, for better clarity, use separate
152  * test methods, each with their own set of checks on the results.
153  *
154  * @author Doug Kramer
155  * @author Jamie Ho
156  * @author Jonathan Gibbons (rewrite)
157  */
158 public abstract class JavadocTester {
159 
160     public static final String FS = System.getProperty("file.separator");
161     public static final String PS = System.getProperty("path.separator");
162     public static final String NL = System.getProperty("line.separator");
163     public static final Path currDir = Paths.get(".").toAbsolutePath().normalize();
164 
165     public enum Output {
166         /** The name of the output stream from javadoc. */
167         OUT,
168         /** The name for any output written to System.out. */
169         STDOUT,
170         /** The name for any output written to System.err. */
171         STDERR
172     }
173 
174     /** The output directory used in the most recent call of javadoc. */
175     protected File outputDir;
176 
177     /** The output charset used in the most recent call of javadoc. */
178     protected Charset charset = Charset.defaultCharset();
179 
180     /** The exit code of the most recent call of javadoc. */
181     private int exitCode;
182 
183     /** The output generated by javadoc to the various writers and streams. */
184     private final Map<Output, String> outputMap = new EnumMap<>(Output.class);
185 
186     /** A cache of file content, to avoid reading files unnecessarily. */
187     private final Map<File,SoftReference<String>> fileContentCache = new HashMap<>();
188     /** The charset used for files in the fileContentCache. */
189     private Charset fileContentCacheCharset = null;
190 
191     /** Stream used for logging messages. */
192     protected final PrintStream out = System.out;
193 
194     /** The directory containing the source code for the test. */
195     public static final String testSrc = System.getProperty("test.src");
196 
197     /**
198      * Get the path for a source file in the test source directory.
199      * @param path the path of a file or directory in the source directory
200      * @return the full path of the specified file
201      */
testSrc(String path)202     public static String testSrc(String path) {
203         return new File(testSrc, path).getPath();
204     }
205 
206     /**
207      * Alternatives for checking the contents of a directory.
208      */
209     public enum DirectoryCheck {
210         /**
211          * Check that the directory is empty.
212          */
213         EMPTY((file, name) -> true),
214         /**
215          * Check that the directory does not contain any HTML files,
216          * such as may have been generated by a prior run of javadoc
217          * using this directory.
218          * For now, the check is only performed on the top level directory.
219          */
220         NO_HTML_FILES((file, name) -> name.endsWith(".html")),
221         /**
222          * No check is performed on the directory contents.
223          */
check(File dir)224         NONE(null) { @Override void check(File dir) { } };
225 
226         /** The filter used to detect that files should <i>not</i> be present. */
227         FilenameFilter filter;
228 
DirectoryCheck(FilenameFilter f)229         DirectoryCheck(FilenameFilter f) {
230             filter = f;
231         }
232 
check(File dir)233         void check(File dir) {
234             if (dir.isDirectory()) {
235                 String[] contents = dir.list(filter);
236                 if (contents == null)
237                     throw new Error("cannot list directory: " + dir);
238                 if (contents.length > 0) {
239                     System.err.println("Found extraneous files in dir:" + dir.getAbsolutePath());
240                     for (String x : contents) {
241                         System.err.println(x);
242                     }
243                     throw new Error("directory has unexpected content: " + dir);
244                 }
245             }
246         }
247     }
248 
249     private DirectoryCheck outputDirectoryCheck = DirectoryCheck.EMPTY;
250 
251     private boolean automaticCheckLinks = true;
252 
253     /** The current subtest number. Incremented when checking(...) is called. */
254     private int numTestsRun = 0;
255 
256     /** The number of subtests passed. Incremented when passed(...) is called. */
257     private int numTestsPassed = 0;
258 
259     /** The current run of javadoc. Incremented when javadoc is called. */
260     private int javadocRunNum = 0;
261 
262     /** The current subtest number for this run of javadoc. Incremented when checking(...) is called. */
263     private int javadocTestNum = 0;
264 
265     /** Marker annotation for test methods to be invoked by runTests. */
266     @Retention(RetentionPolicy.RUNTIME)
267     @interface Test { }
268 
269     /**
270      * Run all methods annotated with @Test, followed by printSummary.
271      * Typically called on a tester object in main()
272      * @throws Exception if any errors occurred
273      */
runTests()274     public void runTests() throws Exception {
275         runTests(m -> new Object[0]);
276     }
277 
278     /**
279      * Run all methods annotated with @Test, followed by printSummary.
280      * Typically called on a tester object in main()
281      * @param f a function which will be used to provide arguments to each
282      *          invoked method
283      * @throws Exception if any errors occurred
284      */
runTests(Function<Method, Object[]> f)285     public void runTests(Function<Method, Object[]> f) throws Exception {
286         for (Method m: getClass().getDeclaredMethods()) {
287             Annotation a = m.getAnnotation(Test.class);
288             if (a != null) {
289                 try {
290                     out.println("Running test " + m.getName());
291                     m.invoke(this, f.apply(m));
292                 } catch (InvocationTargetException e) {
293                     Throwable cause = e.getCause();
294                     throw (cause instanceof Exception) ? ((Exception) cause) : e;
295                 }
296                 out.println();
297             }
298         }
299         printSummary();
300     }
301 
302     /**
303      * Run javadoc.
304      * The output directory used by this call and the final exit code
305      * will be saved for later use.
306      * To aid the reader, it is recommended that calls to this method
307      * put each option and the arguments it takes on a separate line.
308      *
309      * Example:
310      * <pre><code>
311      *  javadoc("-d", "out",
312      *          "-sourcepath", testSrc,
313      *          "-notimestamp",
314      *          "pkg1", "pkg2", "pkg3/C.java");
315      * </code></pre>
316      *
317      * @param args the arguments to pass to javadoc
318      */
javadoc(String... args)319     public void javadoc(String... args) {
320         outputMap.clear();
321         fileContentCache.clear();
322 
323         javadocRunNum++;
324         javadocTestNum = 0; // reset counter for this run of javadoc
325         if (javadocRunNum == 1) {
326             out.println("Running javadoc...");
327         } else {
328             out.println("Running javadoc (run "+ javadocRunNum + ")...");
329         }
330 
331         outputDir = new File(".");
332         String charsetArg = null;
333         String docencodingArg = null;
334         String encodingArg = null;
335         for (int i = 0; i < args.length - 2; i++) {
336             switch (args[i]) {
337                 case "-d":
338                     outputDir = new File(args[++i]);
339                     break;
340                 case "-charset":
341                     charsetArg = args[++i];
342                     break;
343                 case "-docencoding":
344                     docencodingArg = args[++i];
345                     break;
346                 case "-encoding":
347                     encodingArg = args[++i];
348                     break;
349             }
350         }
351 
352         // The following replicates HtmlConfiguration.finishOptionSettings0
353         // and sets up the charset used to read files.
354         String cs;
355         if (docencodingArg == null) {
356             if (charsetArg == null) {
357                 cs = (encodingArg == null) ? "UTF-8" : encodingArg;
358             } else {
359                 cs = charsetArg;
360             }
361         } else {
362            cs = docencodingArg;
363         }
364         try {
365             charset = Charset.forName(cs);
366         } catch (UnsupportedCharsetException e) {
367             charset = Charset.defaultCharset();
368         }
369 
370         out.println("args: " + Arrays.toString(args));
371 //        log.setOutDir(outputDir);
372 
373         outputDirectoryCheck.check(outputDir);
374 
375         // This is the sole stream used by javadoc
376         WriterOutput outOut = new WriterOutput();
377 
378         // These are to catch output to System.out and System.err,
379         // in case these are used instead of the primary streams
380         StreamOutput sysOut = new StreamOutput(System.out, System::setOut);
381         StreamOutput sysErr = new StreamOutput(System.err, System::setErr);
382 
383         try {
384             exitCode = jdk.javadoc.internal.tool.Main.execute(args, outOut.pw);
385         } finally {
386             outputMap.put(Output.STDOUT, sysOut.close());
387             outputMap.put(Output.STDERR, sysErr.close());
388             outputMap.put(Output.OUT, outOut.close());
389         }
390 
391         outputMap.forEach((name, text) -> {
392             if (!text.isEmpty()) {
393                 out.println("javadoc " + name + ":");
394                 out.println(text);
395             }
396         });
397 
398         if (automaticCheckLinks && exitCode == Exit.OK.code && outputDir.exists()) {
399             checkLinks();
400         }
401     }
402 
403     /**
404      * Set the kind of check for the initial contents of the output directory
405      * before javadoc is run.
406      * The filter should return true for files that should <b>not</b> appear.
407      * @param c the kind of check to perform
408      */
setOutputDirectoryCheck(DirectoryCheck c)409     public void setOutputDirectoryCheck(DirectoryCheck c) {
410         outputDirectoryCheck = c;
411     }
412 
413     /**
414      * Set whether or not to perform an automatic call of checkLinks.
415      */
setAutomaticCheckLinks(boolean b)416     public void setAutomaticCheckLinks(boolean b) {
417         automaticCheckLinks = b;
418     }
419 
420     /**
421      * The exit codes returned by the javadoc tool.
422      * @see jdk.javadoc.internal.tool.Main.Result
423      */
424     public enum Exit {
425         OK(0),        // Javadoc completed with no errors.
426         ERROR(1),     // Completed but reported errors.
427         CMDERR(2),    // Bad command-line arguments
428         SYSERR(3),    // System error or resource exhaustion.
429         ABNORMAL(4);  // Javadoc terminated abnormally
430 
Exit(int code)431         Exit(int code) {
432             this.code = code;
433         }
434 
435         final int code;
436 
437         @Override
toString()438         public String toString() {
439             return name() + '(' + code + ')';
440         }
441     }
442 
443     /**
444      * Check the exit code of the most recent call of javadoc.
445      *
446      * @param expected the exit code that is required for the test
447      * to pass.
448      */
checkExit(Exit expected)449     public void checkExit(Exit expected) {
450         checking("check exit code");
451         if (exitCode == expected.code) {
452             passed("return code " + exitCode);
453         } else {
454             failed("return code " + exitCode +"; expected " + expected);
455         }
456     }
457 
458     /**
459      * Check for content in (or not in) the generated output.
460      * Within the search strings, the newline character \n
461      * will be translated to the platform newline character sequence.
462      * @param path a path within the most recent output directory
463      *  or the name of one of the output buffers, identifying
464      *  where to look for the search strings.
465      * @param expectedFound true if all of the search strings are expected
466      *  to be found, or false if the file is not expected to be found
467      * @param strings the strings to be searched for
468      */
checkFileAndOutput(String path, boolean expectedFound, String... strings)469     public void checkFileAndOutput(String path, boolean expectedFound, String... strings) {
470         if (expectedFound) {
471             checkOutput(path, true, strings);
472         } else {
473             checkFiles(false, path);
474         }
475     }
476 
477     /**
478      * Check for content in (or not in) the generated output.
479      * Within the search strings, the newline character \n
480      * will be translated to the platform newline character sequence.
481      * @param path a path within the most recent output directory, identifying
482      *  where to look for the search strings.
483      * @param expectedFound true if all of the search strings are expected
484      *  to be found, or false if all of the strings are expected to be
485      *  not found
486      * @param strings the strings to be searched for
487      */
checkOutput(String path, boolean expectedFound, String... strings)488     public void checkOutput(String path, boolean expectedFound, String... strings) {
489         // Read contents of file
490         try {
491             String fileString = readFile(outputDir, path);
492             checkOutput(new File(outputDir, path).getPath(), fileString, expectedFound, strings);
493         } catch (Error e) {
494             checking("Read file");
495             failed("Error reading file: " + e);
496         }
497     }
498 
499     /**
500      * Check for content in (or not in) the one of the output streams written by
501      * javadoc. Within the search strings, the newline character \n
502      * will be translated to the platform newline character sequence.
503      * @param output the output stream to check
504      * @param expectedFound true if all of the search strings are expected
505      *  to be found, or false if all of the strings are expected to be
506      *  not found
507      * @param strings the strings to be searched for
508      */
checkOutput(Output output, boolean expectedFound, String... strings)509     public void checkOutput(Output output, boolean expectedFound, String... strings) {
510         checkOutput(output.toString(), outputMap.get(output), expectedFound, strings);
511     }
512 
513     // NOTE: path may be the name of an Output stream as well as a file path
checkOutput(String path, String fileString, boolean expectedFound, String... strings)514     private void checkOutput(String path, String fileString, boolean expectedFound, String... strings) {
515         for (String stringToFind : strings) {
516 //            log.logCheckOutput(path, expectedFound, stringToFind);
517             checking("checkOutput");
518             // Find string in file's contents
519             boolean isFound = findString(fileString, stringToFind);
520             if (isFound == expectedFound) {
521                 passed(path + ": following text " + (isFound ? "found:" : "not found:") + "\n"
522                         + stringToFind);
523             } else {
524                 failed(path + ": following text " + (isFound ? "found:" : "not found:") + "\n"
525                         + stringToFind + '\n' +
526                         "found \n" +
527                         fileString);
528             }
529         }
530     }
531 
checkLinks()532     public void checkLinks() {
533         checking("Check links");
534         LinkChecker c = new LinkChecker(out, this::readFile);
535         try {
536             c.checkDirectory(outputDir.toPath());
537             c.report();
538             int errors = c.getErrorCount();
539             if (errors == 0) {
540                 passed("Links are OK");
541             } else {
542                 failed(errors + " errors found when checking links");
543             }
544         } catch (IOException e) {
545             failed("exception thrown when reading files: " + e);
546         }
547     }
548 
549     /**
550      * Get the content of the one of the output streams written by javadoc.
551      * @param output the name of the output stream
552      * @return the content of the output stream
553      */
getOutput(Output output)554     public String getOutput(Output output) {
555         return outputMap.get(output);
556     }
557 
558     /**
559      * Get the content of the one of the output streams written by javadoc.
560      * @param output the name of the output stream
561      * @return the content of the output stream, as a line of lines
562      */
getOutputLines(Output output)563     public List<String> getOutputLines(Output output) {
564         String text = outputMap.get(output);
565         return (text == null) ? Collections.emptyList() : Arrays.asList(text.split(NL));
566     }
567 
568     /**
569      * Check for files in (or not in) the generated output.
570      * @param expectedFound true if all of the files are expected
571      *  to be found, or false if all of the files are expected to be
572      *  not found
573      * @param paths the files to check, within the most recent output directory.
574      * */
checkFiles(boolean expectedFound, String... paths)575     public void checkFiles(boolean expectedFound, String... paths) {
576         checkFiles(expectedFound, Arrays.asList(paths));
577     }
578 
579     /**
580      * Check for files in (or not in) the generated output.
581      * @param expectedFound true if all of the files are expected
582      *  to be found, or false if all of the files are expected to be
583      *  not found
584      * @param paths the files to check, within the most recent output directory.
585      * */
checkFiles(boolean expectedFound, Collection<String> paths)586     public void checkFiles(boolean expectedFound, Collection<String> paths) {
587         for (String path: paths) {
588 //            log.logCheckFile(path, expectedFound);
589             checking("checkFile");
590             File file = new File(outputDir, path);
591             boolean isFound = file.exists();
592             if (isFound == expectedFound) {
593                 passed(file, "file " + (isFound ? "found:" : "not found:") + "\n");
594             } else {
595                 failed(file, "file " + (isFound ? "found:" : "not found:") + "\n");
596             }
597         }
598     }
599 
600     /**
601      * Check that a series of strings are found in order in a file in
602      * the generated output.
603      * @param path the file to check
604      * @param strings  the strings whose order to check
605      */
checkOrder(String path, String... strings)606     public void checkOrder(String path, String... strings) {
607         File file = new File(outputDir, path);
608         String fileString = readOutputFile(path);
609         int prevIndex = -1;
610         for (String s : strings) {
611             s = s.replace("\n", NL); // normalize new lines
612             int currentIndex = fileString.indexOf(s, prevIndex + 1);
613             checking("file: " + file + ": " + s + " at index " + currentIndex);
614             if (currentIndex == -1) {
615                 failed(file, s + " not found.");
616                 continue;
617             }
618             if (currentIndex > prevIndex) {
619                 passed(file, s + " is in the correct order");
620             } else {
621                 failed(file, s + " is in the wrong order.");
622             }
623             prevIndex = currentIndex;
624         }
625     }
626 
627     /**
628      * Ensures that a series of strings appear only once, in the generated output,
629      * noting that, this test does not exhaustively check for all other possible
630      * duplicates once one is found.
631      * @param path the file to check
632      * @param strings ensure each are unique
633      */
checkUnique(String path, String... strings)634     public void checkUnique(String path, String... strings) {
635         File file = new File(outputDir, path);
636         String fileString = readOutputFile(path);
637         for (String s : strings) {
638             int currentIndex = fileString.indexOf(s);
639             checking(s + " at index " + currentIndex);
640             if (currentIndex == -1) {
641                 failed(file, s + " not found.");
642                 continue;
643             }
644             int nextindex = fileString.indexOf(s, currentIndex + s.length());
645             if (nextindex == -1) {
646                 passed(file, s + " is unique");
647             } else {
648                 failed(file, s + " is not unique, found at " + nextindex);
649             }
650         }
651     }
652 
653     /**
654      * Compare a set of files in each of two directories.
655      *
656      * @param baseDir1 the directory containing the first set of files
657      * @param baseDir2 the directory containing the second set of files
658      * @param files the set of files to be compared
659      */
diff(String baseDir1, String baseDir2, String... files)660     public void diff(String baseDir1, String baseDir2, String... files) {
661         File bd1 = new File(baseDir1);
662         File bd2 = new File(baseDir2);
663         for (String file : files) {
664             diff(bd1, bd2, file);
665         }
666     }
667 
668     /**
669      * A utility to copy a directory from one place to another.
670      *
671      * @param targetDir the directory to copy.
672      * @param destDir the destination to copy the directory to.
673      */
674     // TODO: convert to using java.nio.Files.walkFileTree
copyDir(String targetDir, String destDir)675     public void copyDir(String targetDir, String destDir) {
676         try {
677             File targetDirObj = new File(targetDir);
678             File destDirParentObj = new File(destDir);
679             File destDirObj = new File(destDirParentObj, targetDirObj.getName());
680             if (! destDirParentObj.exists()) {
681                 destDirParentObj.mkdir();
682             }
683             if (! destDirObj.exists()) {
684                 destDirObj.mkdir();
685             }
686             String[] files = targetDirObj.list();
687             for (String file : files) {
688                 File srcFile = new File(targetDirObj, file);
689                 File destFile = new File(destDirObj, file);
690                 if (srcFile.isFile()) {
691                     out.println("Copying " + srcFile + " to " + destFile);
692                     copyFile(destFile, srcFile);
693                 } else if(srcFile.isDirectory()) {
694                     copyDir(srcFile.getAbsolutePath(), destDirObj.getAbsolutePath());
695                 }
696             }
697         } catch (IOException exc) {
698             throw new Error("Could not copy " + targetDir + " to " + destDir);
699         }
700     }
701 
702     /**
703      * Copy source file to destination file.
704      *
705      * @param destfile the destination file
706      * @param srcfile the source file
707      * @throws IOException
708      */
copyFile(File destfile, File srcfile)709     public void copyFile(File destfile, File srcfile) throws IOException {
710         Files.copy(srcfile.toPath(), destfile.toPath());
711     }
712 
713     /**
714      * Read a file from the output directory.
715      *
716      * @param fileName  the name of the file to read
717      * @return          the file in string format
718      */
readOutputFile(String fileName)719     public String readOutputFile(String fileName) throws Error {
720         return readFile(outputDir, fileName);
721     }
722 
readFile(String fileName)723     protected String readFile(String fileName) throws Error {
724         return readFile(outputDir, fileName);
725     }
726 
readFile(String baseDir, String fileName)727     protected String readFile(String baseDir, String fileName) throws Error {
728         return readFile(new File(baseDir), fileName);
729     }
730 
readFile(Path file)731     private String readFile(Path file) {
732         File baseDir;
733         if (file.startsWith(outputDir.toPath())) {
734             baseDir = outputDir;
735         } else if (file.startsWith(currDir)) {
736             baseDir = currDir.toFile();
737         } else {
738             baseDir = file.getParent().toFile();
739         }
740         String fileName = baseDir.toPath().relativize(file).toString();
741         return readFile(baseDir, fileName);
742     }
743 
744     /**
745      * Read the file and return it as a string.
746      *
747      * @param baseDir   the directory in which to locate the file
748      * @param fileName  the name of the file to read
749      * @return          the file in string format
750      */
readFile(File baseDir, String fileName)751     private String readFile(File baseDir, String fileName) throws Error {
752         if (!Objects.equals(fileContentCacheCharset, charset)) {
753             fileContentCache.clear();
754             fileContentCacheCharset = charset;
755         }
756         try {
757             File file = new File(baseDir, fileName);
758             SoftReference<String> ref = fileContentCache.get(file);
759             String content = (ref == null) ? null : ref.get();
760             if (content != null)
761                 return content;
762 
763             // charset defaults to a value inferred from latest javadoc run
764             content = new String(Files.readAllBytes(file.toPath()), charset);
765             fileContentCache.put(file, new SoftReference<>(content));
766             return content;
767         } catch (FileNotFoundException e) {
768             throw new Error("File not found: " + fileName + ": " + e);
769         } catch (IOException e) {
770             throw new Error("Error reading file: " + fileName + ": " + e);
771         }
772     }
773 
checking(String message)774     protected void checking(String message) {
775         numTestsRun++;
776         javadocTestNum++;
777         print("Starting subtest " + javadocRunNum + "." + javadocTestNum, message);
778     }
779 
passed(File file, String message)780     protected void passed(File file, String message) {
781         passed(file + ": " + message);
782     }
783 
passed(String message)784     protected void passed(String message) {
785         numTestsPassed++;
786         print("Passed", message);
787         out.println();
788     }
789 
failed(File file, String message)790     protected void failed(File file, String message) {
791         failed(file + ": " + message);
792     }
793 
failed(String message)794     protected void failed(String message) {
795         print("FAILED", message);
796         StackWalker.getInstance().walk(s -> {
797             s.dropWhile(f -> f.getMethodName().equals("failed"))
798                     .takeWhile(f -> !f.getMethodName().equals("runTests"))
799                     .forEach(f -> out.println("        at "
800                             + f.getClassName() + "." + f.getMethodName()
801                             + "(" + f.getFileName() + ":" + f.getLineNumber() + ")"));
802             return null;
803         });
804         out.println();
805     }
806 
print(String prefix, String message)807     private void print(String prefix, String message) {
808         if (message.isEmpty())
809             out.println(prefix);
810         else {
811             out.print(prefix);
812             out.print(": ");
813             out.print(message.replace("\n", NL));
814             if (!(message.endsWith("\n") || message.endsWith(NL))) {
815                 out.println();
816             }
817         }
818     }
819 
820     /**
821      * Print a summary of the test results.
822      */
printSummary()823     protected void printSummary() {
824         String javadocRuns = (javadocRunNum <= 1) ? ""
825                 : ", in " + javadocRunNum + " runs of javadoc";
826 
827         if (numTestsRun != 0 && numTestsPassed == numTestsRun) {
828             // Test passed
829             out.println();
830             out.println("All " + numTestsPassed + " subtests passed" + javadocRuns);
831         } else {
832             // Test failed
833             throw new Error((numTestsRun - numTestsPassed)
834                     + " of " + (numTestsRun)
835                     + " subtests failed"
836                     + javadocRuns);
837         }
838     }
839 
840     /**
841      * Search for the string in the given file and return true
842      * if the string was found.
843      *
844      * @param fileString    the contents of the file to search through
845      * @param stringToFind  the string to search for
846      * @return              true if the string was found
847      */
findString(String fileString, String stringToFind)848     private boolean findString(String fileString, String stringToFind) {
849         // javadoc (should) always use the platform newline sequence,
850         // but in the strings to find it is more convenient to use the Java
851         // newline character. So we translate \n to NL before we search.
852         stringToFind = stringToFind.replace("\n", NL);
853         return fileString.contains(stringToFind);
854     }
855 
856     /**
857      * Compare the two given files.
858      *
859      * @param baseDir1 the directory in which to locate the first file
860      * @param baseDir2 the directory in which to locate the second file
861      * @param file the file to compare in the two base directories
862      * @param throwErrorIFNoMatch flag to indicate whether or not to throw
863      * an error if the files do not match.
864      * @return true if the files are the same and false otherwise.
865      */
diff(File baseDir1, File baseDir2, String file)866     private void diff(File baseDir1, File baseDir2, String file) {
867         String file1Contents = readFile(baseDir1, file);
868         String file2Contents = readFile(baseDir2, file);
869         checking("diff " + new File(baseDir1, file) + ", " + new File(baseDir2, file));
870         if (file1Contents.trim().compareTo(file2Contents.trim()) == 0) {
871             passed("files are equal");
872         } else {
873             failed("files differ");
874         }
875     }
876 
877     /**
878      * Utility class to simplify the handling of temporarily setting a
879      * new stream for System.out or System.err.
880      */
881     private static class StreamOutput {
882         // functional interface to set a stream.
883         private interface Initializer {
set(PrintStream s)884             void set(PrintStream s);
885         }
886 
887         private final ByteArrayOutputStream baos = new ByteArrayOutputStream();
888         private final PrintStream ps = new PrintStream(baos);
889         private final PrintStream prev;
890         private final Initializer init;
891 
StreamOutput(PrintStream s, Initializer init)892         StreamOutput(PrintStream s, Initializer init) {
893             prev = s;
894             init.set(ps);
895             this.init = init;
896         }
897 
close()898         String close() {
899             init.set(prev);
900             ps.close();
901             return baos.toString();
902         }
903     }
904 
905     /**
906      * Utility class to simplify the handling of creating an in-memory PrintWriter.
907      */
908     private static class WriterOutput {
909         private final StringWriter sw = new StringWriter();
910         final PrintWriter pw = new PrintWriter(sw);
close()911         String close() {
912             pw.close();
913             return sw.toString();
914         }
915     }
916 
917 
918 //    private final Logger log = new Logger();
919 
920     //--------- Logging --------------------------------------------------------
921     //
922     // This class writes out the details of calls to checkOutput and checkFile
923     // in a canonical way, so that the resulting file can be checked against
924     // similar files from other versions of JavadocTester using the same logging
925     // facilities.
926 
927     static class Logger {
928         private static final int PREFIX = 40;
929         private static final int SUFFIX = 20;
930         private static final int MAX = PREFIX + SUFFIX;
931         List<String> tests = new ArrayList<>();
932         String outDir;
933         String rootDir = rootDir();
934 
rootDir()935         static String rootDir() {
936             File f = new File(".").getAbsoluteFile();
937             while (!new File(f, ".hg").exists())
938                 f = f.getParentFile();
939             return f.getPath();
940         }
941 
setOutDir(File outDir)942         void setOutDir(File outDir) {
943             this.outDir = outDir.getPath();
944         }
945 
logCheckFile(String file, boolean positive)946         void logCheckFile(String file, boolean positive) {
947             // Strip the outdir because that will typically not be the same
948             if (file.startsWith(outDir + "/"))
949                 file = file.substring(outDir.length() + 1);
950             tests.add(file + " " + positive);
951         }
952 
logCheckOutput(String file, boolean positive, String text)953         void logCheckOutput(String file, boolean positive, String text) {
954             // Compress the string to be displayed in the log file
955             String simpleText = text.replaceAll("\\s+", " ").replace(rootDir, "[ROOT]");
956             if (simpleText.length() > MAX)
957                 simpleText = simpleText.substring(0, PREFIX)
958                         + "..." + simpleText.substring(simpleText.length() - SUFFIX);
959             // Strip the outdir because that will typically not be the same
960             if (file.startsWith(outDir + "/"))
961                 file = file.substring(outDir.length() + 1);
962             // The use of text.hashCode ensure that all of "text" is taken into account
963             tests.add(file + " " + positive + " " + text.hashCode() + " " + simpleText);
964         }
965 
write()966         void write() {
967             // sort the log entries because the subtests may not be executed in the same order
968             tests.sort((a, b) -> a.compareTo(b));
969             try (BufferedWriter bw = new BufferedWriter(new FileWriter("tester.log"))) {
970                 for (String t: tests) {
971                     bw.write(t);
972                     bw.newLine();
973                 }
974             } catch (IOException e) {
975                 throw new Error("problem writing log: " + e);
976             }
977         }
978     }
979 
980     // Support classes for checkLinks
981 
982     /**
983      * A basic HTML parser. Override the protected methods as needed to get notified
984      * of significant items in any file that is read.
985      */
986     static abstract class HtmlParser {
987 
988         protected final PrintStream out;
989         protected final Function<Path,String> fileReader;
990 
991         private Path file;
992         private StringReader in;
993         private int ch;
994         private int lineNumber;
995         private boolean inScript;
996         private boolean xml;
997 
HtmlParser(PrintStream out, Function<Path,String> fileReader)998         HtmlParser(PrintStream out, Function<Path,String> fileReader) {
999             this.out = out;
1000             this.fileReader = fileReader;
1001         }
1002 
1003         /**
1004          * Read a file.
1005          * @param file the file to be read
1006          * @throws IOException if an error occurs while reading the file
1007          */
read(Path file)1008         void read(Path file) throws IOException {
1009             try (StringReader r = new StringReader(fileReader.apply(file))) {
1010                 this.file = file;
1011                 this.in = r;
1012 
1013                 startFile(file);
1014                 try {
1015                     lineNumber = 1;
1016                     xml = false;
1017                     nextChar();
1018 
1019                     while (ch != -1) {
1020                         switch (ch) {
1021 
1022                             case '<':
1023                                 html();
1024                                 break;
1025 
1026                             default:
1027                                 nextChar();
1028                         }
1029                     }
1030                 } finally {
1031                     endFile();
1032                 }
1033             } catch (IOException e) {
1034                 error(file, lineNumber, e);
1035             } catch (Throwable t) {
1036                 error(file, lineNumber, t);
1037                 t.printStackTrace(out);
1038             }
1039         }
1040 
1041 
getLineNumber()1042         int getLineNumber() {
1043             return lineNumber;
1044         }
1045 
1046         /**
1047          * Called when a file has been opened, before parsing begins.
1048          * This is always the first notification when reading a file.
1049          * This implementation does nothing.
1050          *
1051          * @param file the file
1052          */
startFile(Path file)1053         protected void startFile(Path file) { }
1054 
1055         /**
1056          * Called when the parser has finished reading a file.
1057          * This is always the last notification when reading a file,
1058          * unless any errors occur while closing the file.
1059          * This implementation does nothing.
1060          */
endFile()1061         protected void endFile() { }
1062 
1063         /**
1064          * Called when a doctype declaration is found, at the beginning of the file.
1065          * This implementation does nothing.
1066          * @param s the doctype declaration
1067          */
docType(String s)1068         protected void docType(String s) { }
1069 
1070         /**
1071          * Called when the opening tag of an HTML element is encountered.
1072          * This implementation does nothing.
1073          * @param name the name of the tag
1074          * @param attrs the attribute
1075          * @param selfClosing whether or not this is a self-closing tag
1076          */
startElement(String name, Map<String,String> attrs, boolean selfClosing)1077         protected void startElement(String name, Map<String,String> attrs, boolean selfClosing) { }
1078 
1079         /**
1080          * Called when the closing tag of an HTML tag is encountered.
1081          * This implementation does nothing.
1082          * @param name the name of the tag
1083          */
endElement(String name)1084         protected void endElement(String name) { }
1085 
1086         /**
1087          * Called when an error has been encountered.
1088          * @param file the file being read
1089          * @param lineNumber the line number of line containing the error
1090          * @param message a description of the error
1091          */
error(Path file, int lineNumber, String message)1092         protected void error(Path file, int lineNumber, String message) {
1093             out.println(file + ":" + lineNumber + ": " + message);
1094         }
1095 
1096         /**
1097          * Called when an exception has been encountered.
1098          * @param file the file being read
1099          * @param lineNumber the line number of the line being read when the exception was found
1100          * @param t the exception
1101          */
error(Path file, int lineNumber, Throwable t)1102         protected void error(Path file, int lineNumber, Throwable t) {
1103             out.println(file + ":" + lineNumber + ": " + t);
1104         }
1105 
nextChar()1106         private void nextChar() throws IOException {
1107             ch = in.read();
1108             if (ch == '\n')
1109                 lineNumber++;
1110         }
1111 
1112         /**
1113          * Read the start or end of an HTML tag, or an HTML comment
1114          * {@literal <identifier attrs> } or {@literal </identifier> }
1115          * @throws java.io.IOException if there is a problem reading the file
1116          */
html()1117         private void html() throws IOException {
1118             nextChar();
1119             if (isIdentifierStart((char) ch)) {
1120                 String name = readIdentifier().toLowerCase(Locale.US);
1121                 Map<String,String> attrs = htmlAttrs();
1122                 if (attrs != null) {
1123                     boolean selfClosing = false;
1124                     if (ch == '/') {
1125                         nextChar();
1126                         selfClosing = true;
1127                     }
1128                     if (ch == '>') {
1129                         nextChar();
1130                         startElement(name, attrs, selfClosing);
1131                         if (name.equals("script")) {
1132                             inScript = true;
1133                         }
1134                         return;
1135                     }
1136                 }
1137             } else if (ch == '/') {
1138                 nextChar();
1139                 if (isIdentifierStart((char) ch)) {
1140                     String name = readIdentifier().toLowerCase(Locale.US);
1141                     skipWhitespace();
1142                     if (ch == '>') {
1143                         nextChar();
1144                         endElement(name);
1145                         if (name.equals("script")) {
1146                             inScript = false;
1147                         }
1148                         return;
1149                     }
1150                 }
1151             } else if (ch == '!') {
1152                 nextChar();
1153                 if (ch == '-') {
1154                     nextChar();
1155                     if (ch == '-') {
1156                         nextChar();
1157                         while (ch != -1) {
1158                             int dash = 0;
1159                             while (ch == '-') {
1160                                 dash++;
1161                                 nextChar();
1162                             }
1163                             // Strictly speaking, a comment should not contain "--"
1164                             // so dash > 2 is an error, dash == 2 implies ch == '>'
1165                             // See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments
1166                             // for more details.
1167                             if (dash >= 2 && ch == '>') {
1168                                 nextChar();
1169                                 return;
1170                             }
1171 
1172                             nextChar();
1173                         }
1174                     }
1175                 } else if (ch == '[') {
1176                     nextChar();
1177                     if (ch == 'C') {
1178                         nextChar();
1179                         if (ch == 'D') {
1180                             nextChar();
1181                             if (ch == 'A') {
1182                                 nextChar();
1183                                 if (ch == 'T') {
1184                                     nextChar();
1185                                     if (ch == 'A') {
1186                                         nextChar();
1187                                         if (ch == '[') {
1188                                             while (true) {
1189                                                 nextChar();
1190                                                 if (ch == ']') {
1191                                                     nextChar();
1192                                                     if (ch == ']') {
1193                                                         nextChar();
1194                                                         if (ch == '>') {
1195                                                             nextChar();
1196                                                             return;
1197                                                         }
1198                                                     }
1199                                                 }
1200                                             }
1201 
1202                                         }
1203                                     }
1204                                 }
1205                             }
1206                         }
1207                     }
1208                 } else {
1209                     StringBuilder sb = new StringBuilder();
1210                     while (ch != -1 && ch != '>') {
1211                         sb.append((char) ch);
1212                         nextChar();
1213                     }
1214                     Pattern p = Pattern.compile("(?is)doctype\\s+html\\s?.*");
1215                     String s = sb.toString();
1216                     if (p.matcher(s).matches()) {
1217                         docType(s);
1218                         return;
1219                     }
1220                 }
1221             } else if (ch == '?') {
1222                 nextChar();
1223                 if (ch == 'x') {
1224                     nextChar();
1225                     if (ch == 'm') {
1226                         nextChar();
1227                         if (ch == 'l') {
1228                             Map<String,String> attrs = htmlAttrs();
1229                             if (ch == '?') {
1230                                 nextChar();
1231                                 if (ch == '>') {
1232                                     nextChar();
1233                                     xml = true;
1234                                     return;
1235                                 }
1236                             }
1237                         }
1238                     }
1239 
1240                 }
1241             }
1242 
1243             if (!inScript) {
1244                 error(file, lineNumber, "bad html");
1245             }
1246         }
1247 
1248         /**
1249          * Read a series of HTML attributes, terminated by {@literal > }.
1250          * Each attribute is of the form {@literal identifier[=value] }.
1251          * "value" may be unquoted, single-quoted, or double-quoted.
1252          */
htmlAttrs()1253         private Map<String,String> htmlAttrs() throws IOException {
1254             Map<String, String> map = new LinkedHashMap<>();
1255             skipWhitespace();
1256 
1257             loop:
1258             while (isIdentifierStart((char) ch)) {
1259                 String name = readAttributeName().toLowerCase(Locale.US);
1260                 skipWhitespace();
1261                 String value = null;
1262                 if (ch == '=') {
1263                     nextChar();
1264                     skipWhitespace();
1265                     if (ch == '\'' || ch == '"') {
1266                         char quote = (char) ch;
1267                         nextChar();
1268                         StringBuilder sb = new StringBuilder();
1269                         while (ch != -1 && ch != quote) {
1270                             sb.append((char) ch);
1271                             nextChar();
1272                         }
1273                         value = sb.toString() // hack to replace common entities
1274                                 .replace("&lt;", "<")
1275                                 .replace("&gt;", ">")
1276                                 .replace("&amp;", "&");
1277                         nextChar();
1278                     } else {
1279                         StringBuilder sb = new StringBuilder();
1280                         while (ch != -1 && !isUnquotedAttrValueTerminator((char) ch)) {
1281                             sb.append((char) ch);
1282                             nextChar();
1283                         }
1284                         value = sb.toString();
1285                     }
1286                     skipWhitespace();
1287                 }
1288                 map.put(name, value);
1289             }
1290 
1291             return map;
1292         }
1293 
isIdentifierStart(char ch)1294         private boolean isIdentifierStart(char ch) {
1295             return Character.isUnicodeIdentifierStart(ch);
1296         }
1297 
readIdentifier()1298         private String readIdentifier() throws IOException {
1299             StringBuilder sb = new StringBuilder();
1300             sb.append((char) ch);
1301             nextChar();
1302             while (ch != -1 && Character.isUnicodeIdentifierPart(ch)) {
1303                 sb.append((char) ch);
1304                 nextChar();
1305             }
1306             return sb.toString();
1307         }
1308 
readAttributeName()1309         private String readAttributeName() throws IOException {
1310             StringBuilder sb = new StringBuilder();
1311             sb.append((char) ch);
1312             nextChar();
1313             while (ch != -1 && Character.isUnicodeIdentifierPart(ch)
1314                     || ch == '-'
1315                     || xml && ch == ':') {
1316                 sb.append((char) ch);
1317                 nextChar();
1318             }
1319             return sb.toString();
1320         }
1321 
isWhitespace(char ch)1322         private boolean isWhitespace(char ch) {
1323             return Character.isWhitespace(ch);
1324         }
1325 
skipWhitespace()1326         private void skipWhitespace() throws IOException {
1327             while (isWhitespace((char) ch)) {
1328                 nextChar();
1329             }
1330         }
1331 
isUnquotedAttrValueTerminator(char ch)1332         private boolean isUnquotedAttrValueTerminator(char ch) {
1333             switch (ch) {
1334                 case '\f': case '\n': case '\r': case '\t':
1335                 case ' ':
1336                 case '"': case '\'': case '`':
1337                 case '=': case '<': case '>':
1338                     return true;
1339                 default:
1340                     return false;
1341             }
1342         }
1343     }
1344 
1345     /**
1346      * A class to check the links in a set of HTML files.
1347      */
1348     static class LinkChecker extends HtmlParser {
1349         private final Map<Path, IDTable> allFiles;
1350         private final Map<URI, IDTable> allURIs;
1351 
1352         private int files;
1353         private int links;
1354         private int badSchemes;
1355         private int duplicateIds;
1356         private int missingIds;
1357 
1358         private Path currFile;
1359         private IDTable currTable;
1360         private boolean html5;
1361         private boolean xml;
1362 
1363         private int errors;
1364 
LinkChecker(PrintStream out, Function<Path,String> fileReader)1365         LinkChecker(PrintStream out, Function<Path,String> fileReader) {
1366             super(out, fileReader);
1367             allFiles = new HashMap<>();
1368             allURIs = new HashMap<>();
1369         }
1370 
checkDirectory(Path dir)1371         void checkDirectory(Path dir) throws IOException {
1372             checkFiles(List.of(dir), false, Collections.emptySet());
1373         }
1374 
checkFiles(List<Path> files, boolean skipSubdirs, Set<Path> excludeFiles)1375         void checkFiles(List<Path> files, boolean skipSubdirs, Set<Path> excludeFiles) throws IOException {
1376             for (Path file : files) {
1377                 Files.walkFileTree(file, new SimpleFileVisitor<Path>() {
1378                     int depth = 0;
1379 
1380                     @Override
1381                     public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) {
1382                         if ((skipSubdirs && depth > 0) || excludeFiles.contains(dir)) {
1383                             return FileVisitResult.SKIP_SUBTREE;
1384                         }
1385                         depth++;
1386                         return FileVisitResult.CONTINUE;
1387                     }
1388 
1389                     @Override
1390                     public FileVisitResult visitFile(Path p, BasicFileAttributes attrs) {
1391                         if (excludeFiles.contains(p)) {
1392                             return FileVisitResult.CONTINUE;
1393                         }
1394 
1395                         if (Files.isRegularFile(p) && p.getFileName().toString().endsWith(".html")) {
1396                             checkFile(p);
1397                         }
1398                         return FileVisitResult.CONTINUE;
1399                     }
1400 
1401                     @Override
1402                     public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOException {
1403                         depth--;
1404                         return super.postVisitDirectory(dir, e);
1405                     }
1406                 });
1407             }
1408         }
1409 
checkFile(Path file)1410         void checkFile(Path file) {
1411             try {
1412                 read(file);
1413             } catch (IOException e) {
1414                 error(file, 0, e);
1415             }
1416         }
1417 
getErrorCount()1418         int getErrorCount() {
1419             return errors;
1420         }
1421 
report()1422         public void report() {
1423             List<Path> missingFiles = getMissingFiles();
1424             if (!missingFiles.isEmpty()) {
1425                 report("Missing files: (" + missingFiles.size() + ")");
1426                 missingFiles.stream()
1427                         .sorted()
1428                         .forEach(this::reportMissingFile);
1429 
1430             }
1431 
1432             if (!allURIs.isEmpty()) {
1433                 report(false, "External URLs:");
1434                 allURIs.keySet().stream()
1435                         .sorted(new URIComparator())
1436                         .forEach(uri -> report(false, "  %s", uri.toString()));
1437             }
1438 
1439             int anchors = 0;
1440             for (IDTable t : allFiles.values()) {
1441                 anchors += t.map.values().stream()
1442                         .filter(e -> !e.getReferences().isEmpty())
1443                         .count();
1444             }
1445             for (IDTable t : allURIs.values()) {
1446                 anchors += t.map.values().stream()
1447                         .filter(e -> !e.references.isEmpty())
1448                         .count();
1449             }
1450 
1451             report(false, "Checked " + files + " files.");
1452             report(false, "Found " + links + " references to " + anchors + " anchors "
1453                     + "in " + allFiles.size() + " files and " + allURIs.size() + " other URIs.");
1454             report(!missingFiles.isEmpty(),   "%6d missing files", missingFiles.size());
1455             report(duplicateIds > 0, "%6d duplicate ids", duplicateIds);
1456             report(missingIds > 0,   "%6d missing ids", missingIds);
1457 
1458             Map<String, Integer> schemeCounts = new TreeMap<>();
1459             Map<String, Integer> hostCounts = new TreeMap<>(new HostComparator());
1460             for (URI uri : allURIs.keySet()) {
1461                 String scheme = uri.getScheme();
1462                 if (scheme != null) {
1463                     schemeCounts.put(scheme, schemeCounts.computeIfAbsent(scheme, s -> 0) + 1);
1464                 }
1465                 String host = uri.getHost();
1466                 if (host != null) {
1467                     hostCounts.put(host, hostCounts.computeIfAbsent(host, h -> 0) + 1);
1468                 }
1469             }
1470 
1471             if (schemeCounts.size() > 0) {
1472                 report(false, "Schemes");
1473                 schemeCounts.forEach((s, n) -> report(!isSchemeOK(s), "%6d %s", n, s));
1474             }
1475 
1476             if (hostCounts.size() > 0) {
1477                 report(false, "Hosts");
1478                 hostCounts.forEach((h, n) -> report(false, "%6d %s", n, h));
1479             }
1480         }
1481 
report(String message, Object... args)1482         private void report(String message, Object... args) {
1483             out.println(String.format(message, args));
1484         }
1485 
report(boolean highlight, String message, Object... args)1486         private void report(boolean highlight, String message, Object... args) {
1487             out.print(highlight ? "* " : "  ");
1488             out.println(String.format(message, args));
1489         }
1490 
reportMissingFile(Path file)1491         private void reportMissingFile(Path file) {
1492             report("%s", relativePath(file));
1493             IDTable table = allFiles.get(file);
1494             Set<Path> refs = new TreeSet<>();
1495             for (ID id : table.map.values()) {
1496                 if (id.references != null) {
1497                     for (Position p : id.references) {
1498                         refs.add(p.path);
1499                     }
1500                 }
1501             }
1502             int n = 0;
1503             int MAX_REFS = 10;
1504             for (Path ref : refs) {
1505                 report("    in " + relativePath(ref));
1506                 if (++n == MAX_REFS) {
1507                     report("    ... and %d more", refs.size() - n);
1508                     break;
1509                 }
1510             }
1511         }
1512 
1513         @Override
startFile(Path path)1514         public void startFile(Path path) {
1515             currFile = path.toAbsolutePath().normalize();
1516             currTable = allFiles.computeIfAbsent(currFile, p -> new IDTable(p));
1517             html5 = false;
1518             files++;
1519         }
1520 
1521         @Override
endFile()1522         public void endFile() {
1523             currTable.check();
1524         }
1525 
1526         @Override
docType(String doctype)1527         public void docType(String doctype) {
1528             html5 = doctype.matches("(?i)<\\?doctype\\s+html>");
1529         }
1530 
1531         @Override @SuppressWarnings("fallthrough")
startElement(String name, Map<String, String> attrs, boolean selfClosing)1532         public void startElement(String name, Map<String, String> attrs, boolean selfClosing) {
1533             int line = getLineNumber();
1534             switch (name) {
1535                 case "a":
1536                     String nameAttr = html5 ? null : attrs.get("name");
1537                     if (nameAttr != null) {
1538                         foundAnchor(line, nameAttr);
1539                     }
1540                     // fallthrough
1541                 case "link":
1542                     String href = attrs.get("href");
1543                     if (href != null) {
1544                         foundReference(line, href);
1545                     }
1546                     break;
1547             }
1548 
1549             String idAttr = attrs.get("id");
1550             if (idAttr != null) {
1551                 foundAnchor(line, idAttr);
1552             }
1553         }
1554 
1555         @Override
endElement(String name)1556         public void endElement(String name) { }
1557 
foundAnchor(int line, String name)1558         private void foundAnchor(int line, String name) {
1559             currTable.addID(line, name);
1560         }
1561 
foundReference(int line, String ref)1562         private void foundReference(int line, String ref) {
1563             links++;
1564             try {
1565                 URI uri = new URI(ref);
1566                 if (uri.isAbsolute()) {
1567                     foundReference(line, uri);
1568                 } else {
1569                     Path p;
1570                     String uriPath = uri.getPath();
1571                     if (uriPath == null || uriPath.isEmpty()) {
1572                         p = currFile;
1573                     } else {
1574                         p = currFile.getParent().resolve(uriPath).normalize();
1575                     }
1576                     foundReference(line, p, uri.getFragment());
1577                 }
1578             } catch (URISyntaxException e) {
1579                 error(currFile, line, "invalid URI: " + e);
1580             }
1581         }
1582 
foundReference(int line, Path p, String fragment)1583         private void foundReference(int line, Path p, String fragment) {
1584             IDTable t = allFiles.computeIfAbsent(p, key -> new IDTable(key));
1585             t.addReference(fragment, currFile, line);
1586         }
1587 
foundReference(int line, URI uri)1588         private void foundReference(int line, URI uri) {
1589             if (!isSchemeOK(uri.getScheme())) {
1590                 error(currFile, line, "bad scheme in URI");
1591                 badSchemes++;
1592             }
1593 
1594             String fragment = uri.getFragment();
1595             try {
1596                 URI noFrag = new URI(uri.toString().replaceAll("#\\Q" + fragment + "\\E$", ""));
1597                 IDTable t = allURIs.computeIfAbsent(noFrag, key -> new IDTable(key.toString()));
1598                 t.addReference(fragment, currFile, line);
1599             } catch (URISyntaxException e) {
1600                 throw new Error(e);
1601             }
1602         }
1603 
isSchemeOK(String uriScheme)1604         private boolean isSchemeOK(String uriScheme) {
1605             if (uriScheme == null) {
1606                 return true;
1607             }
1608 
1609             switch (uriScheme) {
1610                 case "file":
1611                 case "ftp":
1612                 case "http":
1613                 case "https":
1614                 case "javascript":
1615                 case "mailto":
1616                     return true;
1617 
1618                 default:
1619                     return false;
1620             }
1621         }
1622 
getMissingFiles()1623         private List<Path> getMissingFiles() {
1624             return allFiles.entrySet().stream()
1625                     .filter(e -> !Files.exists(e.getKey()))
1626                     .map(e -> e.getKey())
1627                     .collect(Collectors.toList());
1628         }
1629 
1630         @Override
error(Path file, int lineNumber, String message)1631         protected void error(Path file, int lineNumber, String message) {
1632             super.error(relativePath(file), lineNumber, message);
1633             errors++;
1634         }
1635 
1636         @Override
error(Path file, int lineNumber, Throwable t)1637         protected void error(Path file, int lineNumber, Throwable t) {
1638             super.error(relativePath(file), lineNumber, t);
1639             errors++;
1640         }
1641 
relativePath(Path path)1642         private Path relativePath(Path path) {
1643             return path.startsWith(currDir) ? currDir.relativize(path) : path;
1644         }
1645 
1646         /**
1647          * A position in a file, as identified by a file name and line number.
1648          */
1649         static class Position implements Comparable<Position> {
1650             Path path;
1651             int line;
1652 
Position(Path path, int line)1653             Position(Path path, int line) {
1654                 this.path = path;
1655                 this.line = line;
1656             }
1657 
1658             @Override
compareTo(Position o)1659             public int compareTo(Position o) {
1660                 int v = path.compareTo(o.path);
1661                 return v != 0 ? v : Integer.compare(line, o.line);
1662             }
1663 
1664             @Override
equals(Object obj)1665             public boolean equals(Object obj) {
1666                 if (this == obj) {
1667                     return true;
1668                 } else if (obj == null || getClass() != obj.getClass()) {
1669                     return false;
1670                 } else {
1671                     final Position other = (Position) obj;
1672                     return Objects.equals(this.path, other.path)
1673                             && this.line == other.line;
1674                 }
1675             }
1676 
1677             @Override
hashCode()1678             public int hashCode() {
1679                 return Objects.hashCode(path) * 37 + line;
1680             }
1681         }
1682 
1683         /**
1684          * Infor for an ID within an HTML file, and a set of positions that reference it.
1685          */
1686         static class ID {
1687             boolean declared;
1688             Set<Position> references;
1689 
getReferences()1690             Set<Position> getReferences() {
1691                 return (references) == null ? Collections.emptySet() : references;
1692             }
1693         }
1694 
1695         /**
1696          * A table for the set of IDs in an HTML file.
1697          */
1698         class IDTable {
1699             private String name;
1700             private boolean checked;
1701             private final Map<String, ID> map = new HashMap<>();
1702 
IDTable(Path p)1703             IDTable(Path p) {
1704                 this(relativePath(p).toString());
1705             }
1706 
IDTable(String name)1707             IDTable(String name) {
1708                 this.name = name;
1709             }
1710 
addID(int line, String name)1711             void addID(int line, String name) {
1712                 if (checked) {
1713                     throw new IllegalStateException("Adding ID after file has been read");
1714                 }
1715                 Objects.requireNonNull(name);
1716                 ID id = map.computeIfAbsent(name, x -> new ID());
1717                 if (id.declared) {
1718                     error(currFile, line, "name already declared: " + name);
1719                     duplicateIds++;
1720                 } else {
1721                     id.declared = true;
1722                 }
1723             }
1724 
addReference(String name, Path from, int line)1725             void addReference(String name, Path from, int line) {
1726                 if (checked) {
1727                     if (name != null) {
1728                         ID id = map.get(name);
1729                         if (id == null || !id.declared) {
1730                             error(from, line, "id not found: " + this.name + "#" + name);
1731                         }
1732                     }
1733                 } else {
1734                     ID id = map.computeIfAbsent(name, x -> new ID());
1735                     if (id.references == null) {
1736                         id.references = new TreeSet<>();
1737                     }
1738                     id.references.add(new Position(from, line));
1739                 }
1740             }
1741 
check()1742             void check() {
1743                 map.forEach((name, id) -> {
1744                     if (name != null && !id.declared) {
1745                         //log.error(currFile, 0, "id not declared: " + name);
1746                         for (Position ref : id.references) {
1747                             error(ref.path, ref.line, "id not found: " + this.name + "#" + name);
1748                         }
1749                         missingIds++;
1750                     }
1751                 });
1752                 checked = true;
1753             }
1754         }
1755 
1756         static class URIComparator implements Comparator<URI> {
1757             final HostComparator hostComparator = new HostComparator();
1758 
1759             @Override
compare(URI o1, URI o2)1760             public int compare(URI o1, URI o2) {
1761                 if (o1.isOpaque() || o2.isOpaque()) {
1762                     return o1.compareTo(o2);
1763                 }
1764                 String h1 = o1.getHost();
1765                 String h2 = o2.getHost();
1766                 String s1 = o1.getScheme();
1767                 String s2 = o2.getScheme();
1768                 if (h1 == null || h1.isEmpty() || s1 == null || s1.isEmpty()
1769                         || h2 == null || h2.isEmpty() || s2 == null || s2.isEmpty()) {
1770                     return o1.compareTo(o2);
1771                 }
1772                 int v = hostComparator.compare(h1, h2);
1773                 if (v != 0) {
1774                     return v;
1775                 }
1776                 v = s1.compareTo(s2);
1777                 if (v != 0) {
1778                     return v;
1779                 }
1780                 return o1.compareTo(o2);
1781             }
1782         }
1783 
1784         static class HostComparator implements Comparator<String> {
1785             @Override
compare(String h1, String h2)1786             public int compare(String h1, String h2) {
1787                 List<String> l1 = new ArrayList<>(Arrays.asList(h1.split("\\.")));
1788                 Collections.reverse(l1);
1789                 String r1 = String.join(".", l1);
1790                 List<String> l2 = new ArrayList<>(Arrays.asList(h2.split("\\.")));
1791                 Collections.reverse(l2);
1792                 String r2 = String.join(".", l2);
1793                 return r1.compareTo(r2);
1794             }
1795         }
1796 
1797     }
1798 }
1799