1 /*
2  * Copyright (c) 2012,2016, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package com.sun.tools.javadoc.main;
27 
28 import java.util.Arrays;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.Locale;
32 import java.util.Map;
33 import java.util.Set;
34 
35 import com.sun.tools.javadoc.main.JavaScriptScanner.TagParser.Kind;
36 
37 import static com.sun.tools.javac.util.LayoutCharacters.EOI;
38 
39 /**
40  * Parser to detect use of JavaScript in documentation comments.
41  */
42 @Deprecated(since="9", forRemoval=true)
43 @SuppressWarnings("removal")
44 public class JavaScriptScanner {
45     public static interface Reporter {
report()46         void report();
47     }
48 
49     static class ParseException extends Exception {
50         private static final long serialVersionUID = 0;
ParseException(String key)51         ParseException(String key) {
52             super(key);
53         }
54     }
55 
56     private Reporter reporter;
57 
58     /** The input buffer, index of most recent character read,
59      *  index of one past last character in buffer.
60      */
61     protected char[] buf;
62     protected int bp;
63     protected int buflen;
64 
65     /** The current character.
66      */
67     protected char ch;
68 
69     private boolean newline = true;
70 
71     Map<String, TagParser> tagParsers;
72     Set<String> uriAttrs;
73 
JavaScriptScanner()74     public JavaScriptScanner() {
75         initTagParsers();
76         initURIAttrs();
77     }
78 
parse(String comment, Reporter r)79     public void parse(String comment, Reporter r) {
80         reporter = r;
81         String c = comment;
82         buf = new char[c.length() + 1];
83         c.getChars(0, c.length(), buf, 0);
84         buf[buf.length - 1] = EOI;
85         buflen = buf.length - 1;
86         bp = -1;
87         newline = true;
88         nextChar();
89 
90         blockContent();
91         blockTags();
92     }
93 
checkHtmlTag(String tag)94     private void checkHtmlTag(String tag) {
95         if (tag.equalsIgnoreCase("script")) {
96             reporter.report();
97         }
98     }
99 
checkHtmlAttr(String name, String value)100     private void checkHtmlAttr(String name, String value) {
101         String n = name.toLowerCase(Locale.ENGLISH);
102         // See https://www.w3.org/TR/html52/webappapis.html#events-event-handlers
103         // An event handler has a name, which always starts with "on" and is followed by
104         // the name of the event for which it is intended.
105         if (n.startsWith("on")
106                 || uriAttrs.contains(n)
107                     && value != null && value.toLowerCase(Locale.ENGLISH).trim().startsWith("javascript:")) {
108             reporter.report();
109         }
110     }
111 
nextChar()112     void nextChar() {
113         ch = buf[bp < buflen ? ++bp : buflen];
114         switch (ch) {
115             case '\f': case '\n': case '\r':
116                 newline = true;
117         }
118     }
119 
120     /**
121      * Read block content, consisting of text, html and inline tags.
122      * Terminated by the end of input, or the beginning of the next block tag:
123      * i.e. @ as the first non-whitespace character on a line.
124      */
125     @SuppressWarnings("fallthrough")
blockContent()126     protected void blockContent() {
127 
128         loop:
129         while (bp < buflen) {
130             switch (ch) {
131                 case '\n': case '\r': case '\f':
132                     newline = true;
133                     // fallthrough
134 
135                 case ' ': case '\t':
136                     nextChar();
137                     break;
138 
139                 case '&':
140                     entity(null);
141                     break;
142 
143                 case '<':
144                     html();
145                     break;
146 
147                 case '>':
148                     newline = false;
149                     nextChar();
150                     break;
151 
152                 case '{':
153                     inlineTag(null);
154                     break;
155 
156                 case '@':
157                     if (newline) {
158                         break loop;
159                     }
160                     // fallthrough
161 
162                 default:
163                     newline = false;
164                     nextChar();
165             }
166         }
167     }
168 
169     /**
170      * Read a series of block tags, including their content.
171      * Standard tags parse their content appropriately.
172      * Non-standard tags are represented by {@link UnknownBlockTag}.
173      */
blockTags()174     protected void blockTags() {
175         while (ch == '@')
176             blockTag();
177     }
178 
179     /**
180      * Read a single block tag, including its content.
181      * Standard tags parse their content appropriately.
182      * Non-standard tags are represented by {@link UnknownBlockTag}.
183      */
blockTag()184     protected void blockTag() {
185         int p = bp;
186         try {
187             nextChar();
188             if (isIdentifierStart(ch)) {
189                 String name = readTagName();
190                 TagParser tp = tagParsers.get(name);
191                 if (tp == null) {
192                     blockContent();
193                 } else {
194                     switch (tp.getKind()) {
195                         case BLOCK:
196                             tp.parse(p);
197                             return;
198                         case INLINE:
199                             return;
200                     }
201                 }
202             }
203             blockContent();
204         } catch (ParseException e) {
205             blockContent();
206         }
207     }
208 
inlineTag(Void list)209     protected void inlineTag(Void list) {
210         newline = false;
211         nextChar();
212         if (ch == '@') {
213             inlineTag();
214         }
215     }
216 
217     /**
218      * Read a single inline tag, including its content.
219      * Standard tags parse their content appropriately.
220      * Non-standard tags are represented by {@link UnknownBlockTag}.
221      * Malformed tags may be returned as {@link Erroneous}.
222      */
inlineTag()223     protected void inlineTag() {
224         int p = bp - 1;
225         try {
226             nextChar();
227             if (isIdentifierStart(ch)) {
228                 String name = readTagName();
229                 TagParser tp = tagParsers.get(name);
230 
231                 if (tp == null) {
232                     skipWhitespace();
233                     inlineText(WhitespaceRetentionPolicy.REMOVE_ALL);
234                     nextChar();
235                 } else {
236                     skipWhitespace();
237                     if (tp.getKind() == TagParser.Kind.INLINE) {
238                         tp.parse(p);
239                     } else { // handle block tags (ex: @see) in inline content
240                         inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip content
241                         nextChar();
242                     }
243                 }
244             }
245         } catch (ParseException e) {
246         }
247     }
248 
249     private static enum WhitespaceRetentionPolicy {
250         RETAIN_ALL,
251         REMOVE_FIRST_SPACE,
252         REMOVE_ALL
253     }
254 
255     /**
256      * Read plain text content of an inline tag.
257      * Matching pairs of { } are skipped; the text is terminated by the first
258      * unmatched }. It is an error if the beginning of the next tag is detected.
259      */
inlineText(WhitespaceRetentionPolicy whitespacePolicy)260     private void inlineText(WhitespaceRetentionPolicy whitespacePolicy) throws ParseException {
261         switch (whitespacePolicy) {
262             case REMOVE_ALL:
263                 skipWhitespace();
264                 break;
265             case REMOVE_FIRST_SPACE:
266                 if (ch == ' ')
267                     nextChar();
268                 break;
269             case RETAIN_ALL:
270             default:
271                 // do nothing
272                 break;
273 
274         }
275         int pos = bp;
276         int depth = 1;
277 
278         loop:
279         while (bp < buflen) {
280             switch (ch) {
281                 case '\n': case '\r': case '\f':
282                     newline = true;
283                     break;
284 
285                 case ' ': case '\t':
286                     break;
287 
288                 case '{':
289                     newline = false;
290                     depth++;
291                     break;
292 
293                 case '}':
294                     if (--depth == 0) {
295                         return;
296                     }
297                     newline = false;
298                     break;
299 
300                 case '@':
301                     if (newline)
302                         break loop;
303                     newline = false;
304                     break;
305 
306                 default:
307                     newline = false;
308                     break;
309             }
310             nextChar();
311         }
312         throw new ParseException("dc.unterminated.inline.tag");
313     }
314 
315     /**
316      * Read Java class name, possibly followed by member
317      * Matching pairs of {@literal < >} are skipped. The text is terminated by the first
318      * unmatched }. It is an error if the beginning of the next tag is detected.
319      */
320     // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
321     // TODO: improve quality of parse to forbid bad constructions.
322     // TODO: update to use ReferenceParser
323     @SuppressWarnings("fallthrough")
reference(boolean allowMember)324     protected void reference(boolean allowMember) throws ParseException {
325         int pos = bp;
326         int depth = 0;
327 
328         // scan to find the end of the signature, by looking for the first
329         // whitespace not enclosed in () or <>, or the end of the tag
330         loop:
331         while (bp < buflen) {
332             switch (ch) {
333                 case '\n': case '\r': case '\f':
334                     newline = true;
335                     // fallthrough
336 
337                 case ' ': case '\t':
338                     if (depth == 0)
339                         break loop;
340                     break;
341 
342                 case '(':
343                 case '<':
344                     newline = false;
345                     depth++;
346                     break;
347 
348                 case ')':
349                 case '>':
350                     newline = false;
351                     --depth;
352                     break;
353 
354                 case '}':
355                     if (bp == pos)
356                         return;
357                     newline = false;
358                     break loop;
359 
360                 case '@':
361                     if (newline)
362                         break loop;
363                     // fallthrough
364 
365                 default:
366                     newline = false;
367 
368             }
369             nextChar();
370         }
371 
372         if (depth != 0)
373             throw new ParseException("dc.unterminated.signature");
374     }
375 
376     /**
377      * Read Java identifier
378      * Matching pairs of { } are skipped; the text is terminated by the first
379      * unmatched }. It is an error if the beginning of the next tag is detected.
380      */
381     @SuppressWarnings("fallthrough")
identifier()382     protected void identifier() throws ParseException {
383         skipWhitespace();
384         int pos = bp;
385 
386         if (isJavaIdentifierStart(ch)) {
387             readJavaIdentifier();
388             return;
389         }
390 
391         throw new ParseException("dc.identifier.expected");
392     }
393 
394     /**
395      * Read a quoted string.
396      * It is an error if the beginning of the next tag is detected.
397      */
398     @SuppressWarnings("fallthrough")
quotedString()399     protected void quotedString() {
400         int pos = bp;
401         nextChar();
402 
403         loop:
404         while (bp < buflen) {
405             switch (ch) {
406                 case '\n': case '\r': case '\f':
407                     newline = true;
408                     break;
409 
410                 case ' ': case '\t':
411                     break;
412 
413                 case '"':
414                     nextChar();
415                     // trim trailing white-space?
416                     return;
417 
418                 case '@':
419                     if (newline)
420                         break loop;
421 
422             }
423             nextChar();
424         }
425     }
426 
427     /**
428      * Read a term ie. one word.
429      * It is an error if the beginning of the next tag is detected.
430      */
431     @SuppressWarnings("fallthrough")
inlineWord()432     protected void inlineWord() {
433         int pos = bp;
434         int depth = 0;
435         loop:
436         while (bp < buflen) {
437             switch (ch) {
438                 case '\n':
439                     newline = true;
440                     // fallthrough
441 
442                 case '\r': case '\f': case ' ': case '\t':
443                     return;
444 
445                 case '@':
446                     if (newline)
447                         break loop;
448 
449                 case '{':
450                     depth++;
451                     break;
452 
453                 case '}':
454                     if (depth == 0 || --depth == 0)
455                         return;
456                     break;
457             }
458             newline = false;
459             nextChar();
460         }
461     }
462 
463     /**
464      * Read general text content of an inline tag, including HTML entities and elements.
465      * Matching pairs of { } are skipped; the text is terminated by the first
466      * unmatched }. It is an error if the beginning of the next tag is detected.
467      */
468     @SuppressWarnings("fallthrough")
inlineContent()469     private void inlineContent() {
470 
471         skipWhitespace();
472         int pos = bp;
473         int depth = 1;
474 
475         loop:
476         while (bp < buflen) {
477 
478             switch (ch) {
479                 case '\n': case '\r': case '\f':
480                     newline = true;
481                     // fall through
482 
483                 case ' ': case '\t':
484                     nextChar();
485                     break;
486 
487                 case '&':
488                     entity(null);
489                     break;
490 
491                 case '<':
492                     newline = false;
493                     html();
494                     break;
495 
496                 case '{':
497                     newline = false;
498                     depth++;
499                     nextChar();
500                     break;
501 
502                 case '}':
503                     newline = false;
504                     if (--depth == 0) {
505                         nextChar();
506                         return;
507                     }
508                     nextChar();
509                     break;
510 
511                 case '@':
512                     if (newline)
513                         break loop;
514                     // fallthrough
515 
516                 default:
517                     nextChar();
518                     break;
519             }
520         }
521 
522     }
523 
entity(Void list)524     protected void entity(Void list) {
525         newline = false;
526         entity();
527     }
528 
529     /**
530      * Read an HTML entity.
531      * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
532      */
entity()533     protected void entity() {
534         nextChar();
535         String name = null;
536         if (ch == '#') {
537             int namep = bp;
538             nextChar();
539             if (isDecimalDigit(ch)) {
540                 nextChar();
541                 while (isDecimalDigit(ch))
542                     nextChar();
543                 name = new String(buf, namep, bp - namep);
544             } else if (ch == 'x' || ch == 'X') {
545                 nextChar();
546                 if (isHexDigit(ch)) {
547                     nextChar();
548                     while (isHexDigit(ch))
549                         nextChar();
550                     name = new String(buf, namep, bp - namep);
551                 }
552             }
553         } else if (isIdentifierStart(ch)) {
554             name = readIdentifier();
555         }
556 
557         if (name != null) {
558             if (ch != ';')
559                 return;
560             nextChar();
561         }
562     }
563 
564     /**
565      * Read the start or end of an HTML tag, or an HTML comment
566      * {@literal <identifier attrs> } or {@literal </identifier> }
567      */
html()568     protected void html() {
569         int p = bp;
570         nextChar();
571         if (isIdentifierStart(ch)) {
572             String name = readIdentifier();
573             checkHtmlTag(name);
574             htmlAttrs();
575             if (ch == '/') {
576                 nextChar();
577             }
578             if (ch == '>') {
579                 nextChar();
580                 return;
581             }
582         } else if (ch == '/') {
583             nextChar();
584             if (isIdentifierStart(ch)) {
585                 readIdentifier();
586                 skipWhitespace();
587                 if (ch == '>') {
588                     nextChar();
589                     return;
590                 }
591             }
592         } else if (ch == '!') {
593             nextChar();
594             if (ch == '-') {
595                 nextChar();
596                 if (ch == '-') {
597                     nextChar();
598                     while (bp < buflen) {
599                         int dash = 0;
600                         while (ch == '-') {
601                             dash++;
602                             nextChar();
603                         }
604                         // Strictly speaking, a comment should not contain "--"
605                         // so dash > 2 is an error, dash == 2 implies ch == '>'
606                         // See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments
607                         // for more details.
608                         if (dash >= 2 && ch == '>') {
609                             nextChar();
610                             return;
611                         }
612 
613                         nextChar();
614                     }
615                 }
616             }
617         }
618 
619         bp = p + 1;
620         ch = buf[bp];
621     }
622 
623     /**
624      * Read a series of HTML attributes, terminated by {@literal > }.
625      * Each attribute is of the form {@literal identifier[=value] }.
626      * "value" may be unquoted, single-quoted, or double-quoted.
627      */
htmlAttrs()628     protected void htmlAttrs() {
629         skipWhitespace();
630 
631         loop:
632         while (isIdentifierStart(ch)) {
633             int namePos = bp;
634             String name = readAttributeName();
635             skipWhitespace();
636             StringBuilder value = new StringBuilder();
637             if (ch == '=') {
638                 nextChar();
639                 skipWhitespace();
640                 if (ch == '\'' || ch == '"') {
641                     char quote = ch;
642                     nextChar();
643                     while (bp < buflen && ch != quote) {
644                         if (newline && ch == '@') {
645                             // No point trying to read more.
646                             // In fact, all attrs get discarded by the caller
647                             // and superseded by a malformed.html node because
648                             // the html tag itself is not terminated correctly.
649                             break loop;
650                         }
651                         value.append(ch);
652                         nextChar();
653                     }
654                     nextChar();
655                 } else {
656                     while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
657                         value.append(ch);
658                         nextChar();
659                     }
660                 }
661                 skipWhitespace();
662             }
663             checkHtmlAttr(name, value.toString());
664         }
665     }
666 
attrValueChar(Void list)667     protected void attrValueChar(Void list) {
668         switch (ch) {
669             case '&':
670                 entity(list);
671                 break;
672 
673             case '{':
674                 inlineTag(list);
675                 break;
676 
677             default:
678                 nextChar();
679         }
680     }
681 
isIdentifierStart(char ch)682     protected boolean isIdentifierStart(char ch) {
683         return Character.isUnicodeIdentifierStart(ch);
684     }
685 
readIdentifier()686     protected String readIdentifier() {
687         int start = bp;
688         nextChar();
689         while (bp < buflen && Character.isUnicodeIdentifierPart(ch))
690             nextChar();
691         return new String(buf, start, bp - start);
692     }
693 
readAttributeName()694     protected String readAttributeName() {
695         int start = bp;
696         nextChar();
697         while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '-'))
698             nextChar();
699         return new String(buf, start, bp - start);
700     }
701 
readTagName()702     protected String readTagName() {
703         int start = bp;
704         nextChar();
705         while (bp < buflen
706                 && (Character.isUnicodeIdentifierPart(ch) || ch == '.'
707                 || ch == '-' || ch == ':')) {
708             nextChar();
709         }
710         return new String(buf, start, bp - start);
711     }
712 
isJavaIdentifierStart(char ch)713     protected boolean isJavaIdentifierStart(char ch) {
714         return Character.isJavaIdentifierStart(ch);
715     }
716 
readJavaIdentifier()717     protected String readJavaIdentifier() {
718         int start = bp;
719         nextChar();
720         while (bp < buflen && Character.isJavaIdentifierPart(ch))
721             nextChar();
722         return new String(buf, start, bp - start);
723     }
724 
isDecimalDigit(char ch)725     protected boolean isDecimalDigit(char ch) {
726         return ('0' <= ch && ch <= '9');
727     }
728 
isHexDigit(char ch)729     protected boolean isHexDigit(char ch) {
730         return ('0' <= ch && ch <= '9')
731                 || ('a' <= ch && ch <= 'f')
732                 || ('A' <= ch && ch <= 'F');
733     }
734 
isUnquotedAttrValueTerminator(char ch)735     protected boolean isUnquotedAttrValueTerminator(char ch) {
736         switch (ch) {
737             case '\f': case '\n': case '\r': case '\t':
738             case ' ':
739             case '"': case '\'': case '`':
740             case '=': case '<': case '>':
741                 return true;
742             default:
743                 return false;
744         }
745     }
746 
isWhitespace(char ch)747     protected boolean isWhitespace(char ch) {
748         return Character.isWhitespace(ch);
749     }
750 
skipWhitespace()751     protected void skipWhitespace() {
752         while (isWhitespace(ch)) {
753             nextChar();
754         }
755     }
756 
757     /**
758      * @param start position of first character of string
759      * @param end position of character beyond last character to be included
760      */
newString(int start, int end)761     String newString(int start, int end) {
762         return new String(buf, start, end - start);
763     }
764 
765     static abstract class TagParser {
766         enum Kind { INLINE, BLOCK }
767 
768         final Kind kind;
769         final String name;
770 
771 
TagParser(Kind k, String tk)772         TagParser(Kind k, String tk) {
773             kind = k;
774             name = tk;
775         }
776 
TagParser(Kind k, String tk, boolean retainWhiteSpace)777         TagParser(Kind k, String tk, boolean retainWhiteSpace) {
778             this(k, tk);
779         }
780 
getKind()781         Kind getKind() {
782             return kind;
783         }
784 
getName()785         String getName() {
786             return name;
787         }
788 
parse(int pos)789         abstract void parse(int pos) throws ParseException;
790     }
791 
792     /**
793      * @see <a href="http://docs.oracle.com/javase/8/docs/technotes/tools/unix/javadoc.html#CHDJGIJB">Javadoc Tags</a>
794      */
795     @SuppressWarnings("deprecation")
initTagParsers()796     private void initTagParsers() {
797         TagParser[] parsers = {
798             // @author name-text
799             new TagParser(Kind.BLOCK, "author") {
800                 @Override
801                 public void parse(int pos) {
802                     blockContent();
803                 }
804             },
805 
806             // {@code text}
807             new TagParser(Kind.INLINE, "code", true) {
808                 @Override
809                 public void parse(int pos) throws ParseException {
810                     inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);
811                     nextChar();
812                 }
813             },
814 
815             // @deprecated deprecated-text
816             new TagParser(Kind.BLOCK, "deprecated") {
817                 @Override
818                 public void parse(int pos) {
819                     blockContent();
820                 }
821             },
822 
823             // {@docRoot}
824             new TagParser(Kind.INLINE, "docRoot") {
825                 @Override
826                 public void parse(int pos) throws ParseException {
827                     if (ch == '}') {
828                         nextChar();
829                         return;
830                     }
831                     inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content
832                     nextChar();
833                     throw new ParseException("dc.unexpected.content");
834                 }
835             },
836 
837             // @exception class-name description
838             new TagParser(Kind.BLOCK, "exception") {
839                 @Override
840                 public void parse(int pos) throws ParseException {
841                     skipWhitespace();
842                     reference(false);
843                     blockContent();
844                 }
845             },
846 
847             // @hidden hidden-text
848             new TagParser(Kind.BLOCK, "hidden") {
849                 @Override
850                 public void parse(int pos) {
851                     blockContent();
852                 }
853             },
854 
855             // @index search-term options-description
856             new TagParser(Kind.INLINE, "index") {
857                 @Override
858                 public void parse(int pos) throws ParseException {
859                     skipWhitespace();
860                     if (ch == '}') {
861                         throw new ParseException("dc.no.content");
862                     }
863                     if (ch == '"') quotedString(); else inlineWord();
864                     skipWhitespace();
865                     if (ch != '}') {
866                         inlineContent();
867                     } else {
868                         nextChar();
869                     }
870                 }
871             },
872 
873             // {@inheritDoc}
874             new TagParser(Kind.INLINE, "inheritDoc") {
875                 @Override
876                 public void parse(int pos) throws ParseException {
877                     if (ch == '}') {
878                         nextChar();
879                         return;
880                     }
881                     inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content
882                     nextChar();
883                     throw new ParseException("dc.unexpected.content");
884                 }
885             },
886 
887             // {@link package.class#member label}
888             new TagParser(Kind.INLINE, "link") {
889                 @Override
890                 public void parse(int pos) throws ParseException {
891                     reference(true);
892                     inlineContent();
893                 }
894             },
895 
896             // {@linkplain package.class#member label}
897             new TagParser(Kind.INLINE, "linkplain") {
898                 @Override
899                 public void parse(int pos) throws ParseException {
900                     reference(true);
901                     inlineContent();
902                 }
903             },
904 
905             // {@literal text}
906             new TagParser(Kind.INLINE, "literal", true) {
907                 @Override
908                 public void parse(int pos) throws ParseException {
909                     inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE);
910                     nextChar();
911                 }
912             },
913 
914             // @param parameter-name description
915             new TagParser(Kind.BLOCK, "param") {
916                 @Override
917                 public void parse(int pos) throws ParseException {
918                     skipWhitespace();
919 
920                     boolean typaram = false;
921                     if (ch == '<') {
922                         typaram = true;
923                         nextChar();
924                     }
925 
926                     identifier();
927 
928                     if (typaram) {
929                         if (ch != '>')
930                             throw new ParseException("dc.gt.expected");
931                         nextChar();
932                     }
933 
934                     skipWhitespace();
935                     blockContent();
936                 }
937             },
938 
939             // @return description
940             new TagParser(Kind.BLOCK, "return") {
941                 @Override
942                 public void parse(int pos) {
943                     blockContent();
944                 }
945             },
946 
947             // @see reference | quoted-string | HTML
948             new TagParser(Kind.BLOCK, "see") {
949                 @Override
950                 public void parse(int pos) throws ParseException {
951                     skipWhitespace();
952                     switch (ch) {
953                         case '"':
954                             quotedString();
955                             skipWhitespace();
956                             if (ch == '@'
957                                     || ch == EOI && bp == buf.length - 1) {
958                                 return;
959                             }
960                             break;
961 
962                         case '<':
963                             blockContent();
964                             return;
965 
966                         case '@':
967                             if (newline)
968                                 throw new ParseException("dc.no.content");
969                             break;
970 
971                         case EOI:
972                             if (bp == buf.length - 1)
973                                 throw new ParseException("dc.no.content");
974                             break;
975 
976                         default:
977                             if (isJavaIdentifierStart(ch) || ch == '#') {
978                                 reference(true);
979                                 blockContent();
980                             }
981                     }
982                     throw new ParseException("dc.unexpected.content");
983                 }
984             },
985 
986             // @serialData data-description
987             new TagParser(Kind.BLOCK, "@serialData") {
988                 @Override
989                 public void parse(int pos) {
990                     blockContent();
991                 }
992             },
993 
994             // @serialField field-name field-type description
995             new TagParser(Kind.BLOCK, "serialField") {
996                 @Override
997                 public void parse(int pos) throws ParseException {
998                     skipWhitespace();
999                     identifier();
1000                     skipWhitespace();
1001                     reference(false);
1002                     if (isWhitespace(ch)) {
1003                         skipWhitespace();
1004                         blockContent();
1005                     }
1006                 }
1007             },
1008 
1009             // @serial field-description | include | exclude
1010             new TagParser(Kind.BLOCK, "serial") {
1011                 @Override
1012                 public void parse(int pos) {
1013                     blockContent();
1014                 }
1015             },
1016 
1017             // @since since-text
1018             new TagParser(Kind.BLOCK, "since") {
1019                 @Override
1020                 public void parse(int pos) {
1021                     blockContent();
1022                 }
1023             },
1024 
1025             // @throws class-name description
1026             new TagParser(Kind.BLOCK, "throws") {
1027                 @Override
1028                 public void parse(int pos) throws ParseException {
1029                     skipWhitespace();
1030                     reference(false);
1031                     blockContent();
1032                 }
1033             },
1034 
1035             // {@value package.class#field}
1036             new TagParser(Kind.INLINE, "value") {
1037                 @Override
1038                 public void parse(int pos) throws ParseException {
1039                     reference(true);
1040                     skipWhitespace();
1041                     if (ch == '}') {
1042                         nextChar();
1043                         return;
1044                     }
1045                     nextChar();
1046                     throw new ParseException("dc.unexpected.content");
1047                 }
1048             },
1049 
1050             // @version version-text
1051             new TagParser(Kind.BLOCK, "version") {
1052                 @Override
1053                 public void parse(int pos) {
1054                     blockContent();
1055                 }
1056             },
1057         };
1058 
1059         tagParsers = new HashMap<>();
1060         for (TagParser p: parsers)
1061             tagParsers.put(p.getName(), p);
1062 
1063     }
1064 
initURIAttrs()1065     private void initURIAttrs() {
1066         uriAttrs = new HashSet<>(Arrays.asList(
1067             // See https://www.w3.org/TR/html4/sgml/dtd.html
1068             //     https://www.w3.org/TR/html5/
1069             // These are all the attributes that take a %URI or a valid URL potentially surrounded
1070             // by spaces
1071             "action",  "cite",  "classid",  "codebase",  "data",
1072             "datasrc",  "for",  "href",  "longdesc",  "profile",
1073             "src",  "usemap"
1074         ));
1075     }
1076 
1077 }
1078