1 /*
2  * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package com.sun.xml.internal.dtdparser;
27 
28 import org.xml.sax.EntityResolver;
29 import org.xml.sax.InputSource;
30 import org.xml.sax.Locator;
31 import org.xml.sax.SAXException;
32 import org.xml.sax.SAXParseException;
33 
34 import java.io.IOException;
35 import java.util.ArrayList;
36 import java.util.Enumeration;
37 import java.util.Hashtable;
38 import java.util.Locale;
39 import java.util.Set;
40 import java.util.Vector;
41 
42 /**
43  * This implements parsing of XML 1.0 DTDs.
44  * <p/>
45  * This conforms to the portion of the XML 1.0 specification related
46  * to the external DTD subset.
47  * <p/>
48  * For multi-language applications (such as web servers using XML
49  * processing to create dynamic content), a method supports choosing
50  * a locale for parser diagnostics which is both understood by the
51  * message recipient and supported by the parser.
52  * <p/>
53  * This parser produces a stream of parse events.  It supports some
54  * features (exposing comments, CDATA sections, and entity references)
55  * which are not required to be reported by conformant XML processors.
56  *
57  * @author David Brownell
58  * @author Janet Koenig
59  * @author Kohsuke KAWAGUCHI
60  * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $
61  */
62 public class DTDParser {
63     public final static String TYPE_CDATA = "CDATA";
64     public final static String TYPE_ID = "ID";
65     public final static String TYPE_IDREF = "IDREF";
66     public final static String TYPE_IDREFS = "IDREFS";
67     public final static String TYPE_ENTITY = "ENTITY";
68     public final static String TYPE_ENTITIES = "ENTITIES";
69     public final static String TYPE_NMTOKEN = "NMTOKEN";
70     public final static String TYPE_NMTOKENS = "NMTOKENS";
71     public final static String TYPE_NOTATION = "NOTATION";
72     public final static String TYPE_ENUMERATION = "ENUMERATION";
73 
74 
75     // stack of input entities being merged
76     private InputEntity in;
77 
78     // temporaries reused during parsing
79     private StringBuffer strTmp;
80     private char nameTmp [];
81     private NameCache nameCache;
82     private char charTmp [] = new char[2];
83 
84     // temporary DTD parsing state
85     private boolean doLexicalPE;
86 
87     // DTD state, used during parsing
88 //    private SimpleHashtable    elements = new SimpleHashtable (47);
89     protected final Set declaredElements = new java.util.HashSet();
90     private SimpleHashtable params = new SimpleHashtable(7);
91 
92     // exposed to package-private subclass
93     Hashtable notations = new Hashtable(7);
94     SimpleHashtable entities = new SimpleHashtable(17);
95 
96     private SimpleHashtable ids = new SimpleHashtable();
97 
98     // listeners for DTD parsing events
99     private DTDEventListener dtdHandler;
100 
101     private EntityResolver resolver;
102     private Locale locale;
103 
104     // string constants -- use these copies so "==" works
105     // package private
106     static final String strANY = "ANY";
107     static final String strEMPTY = "EMPTY";
108 
109     /**
110      * Used by applications to request locale for diagnostics.
111      *
112      * @param l The locale to use, or null to use system defaults
113      *          (which may include only message IDs).
114      */
setLocale(Locale l)115     public void setLocale(Locale l) throws SAXException {
116 
117         if (l != null && !messages.isLocaleSupported(l.toString())) {
118             throw new SAXException(messages.getMessage(locale,
119                     "P-078", new Object[]{l}));
120         }
121         locale = l;
122     }
123 
124     /**
125      * Returns the diagnostic locale.
126      */
getLocale()127     public Locale getLocale() {
128         return locale;
129     }
130 
131     /**
132      * Chooses a client locale to use for diagnostics, using the first
133      * language specified in the list that is supported by this parser.
134      * That locale is then set using <a href="#setLocale(java.util.Locale)">
135      * setLocale()</a>.  Such a list could be provided by a variety of user
136      * preference mechanisms, including the HTTP <em>Accept-Language</em>
137      * header field.
138      *
139      * @param languages Array of language specifiers, ordered with the most
140      *                  preferable one at the front.  For example, "en-ca" then "fr-ca",
141      *                  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
142      * @return The chosen locale, or null.
143      * @see MessageCatalog
144      */
chooseLocale(String languages [])145     public Locale chooseLocale(String languages [])
146             throws SAXException {
147 
148         Locale l = messages.chooseLocale(languages);
149 
150         if (l != null) {
151             setLocale(l);
152         }
153         return l;
154     }
155 
156     /**
157      * Lets applications control entity resolution.
158      */
setEntityResolver(EntityResolver r)159     public void setEntityResolver(EntityResolver r) {
160 
161         resolver = r;
162     }
163 
164     /**
165      * Returns the object used to resolve entities
166      */
getEntityResolver()167     public EntityResolver getEntityResolver() {
168 
169         return resolver;
170     }
171 
172     /**
173      * Used by applications to set handling of DTD parsing events.
174      */
setDtdHandler(DTDEventListener handler)175     public void setDtdHandler(DTDEventListener handler) {
176         dtdHandler = handler;
177         if (handler != null)
178             handler.setDocumentLocator(new Locator() {
179                 public String getPublicId() {
180                     return DTDParser.this.getPublicId();
181                 }
182 
183                 public String getSystemId() {
184                     return DTDParser.this.getSystemId();
185                 }
186 
187                 public int getLineNumber() {
188                     return DTDParser.this.getLineNumber();
189                 }
190 
191                 public int getColumnNumber() {
192                     return DTDParser.this.getColumnNumber();
193                 }
194             });
195     }
196 
197     /**
198      * Returns the handler used to for DTD parsing events.
199      */
getDtdHandler()200     public DTDEventListener getDtdHandler() {
201         return dtdHandler;
202     }
203 
204     /**
205      * Parse a DTD.
206      */
parse(InputSource in)207     public void parse(InputSource in)
208             throws IOException, SAXException {
209         init();
210         parseInternal(in);
211     }
212 
213     /**
214      * Parse a DTD.
215      */
parse(String uri)216     public void parse(String uri)
217             throws IOException, SAXException {
218         InputSource in;
219 
220         init();
221         // System.out.println ("parse (\"" + uri + "\")");
222         in = resolver.resolveEntity(null, uri);
223 
224         // If custom resolver punts resolution to parser, handle it ...
225         if (in == null) {
226             in = Resolver.createInputSource(new java.net.URL(uri), false);
227 
228             // ... or if custom resolver doesn't correctly construct the
229             // input entity, patch it up enough so relative URIs work, and
230             // issue a warning to minimize later confusion.
231         } else if (in.getSystemId() == null) {
232             warning("P-065", null);
233             in.setSystemId(uri);
234         }
235 
236         parseInternal(in);
237     }
238 
239     // makes sure the parser is reset to "before a document"
init()240     private void init() {
241         in = null;
242 
243         // alloc temporary data used in parsing
244         strTmp = new StringBuffer();
245         nameTmp = new char[20];
246         nameCache = new NameCache();
247 
248         // reset doc info
249 //        isInAttribute = false;
250 
251         doLexicalPE = false;
252 
253         entities.clear();
254         notations.clear();
255         params.clear();
256         //    elements.clear ();
257         declaredElements.clear();
258 
259         // initialize predefined references ... re-interpreted later
260         builtin("amp", "&#38;");
261         builtin("lt", "&#60;");
262         builtin("gt", ">");
263         builtin("quot", "\"");
264         builtin("apos", "'");
265 
266         if (locale == null)
267             locale = Locale.getDefault();
268         if (resolver == null)
269             resolver = new Resolver();
270         if (dtdHandler == null)
271             dtdHandler = new DTDHandlerBase();
272     }
273 
builtin(String entityName, String entityValue)274     private void builtin(String entityName, String entityValue) {
275         InternalEntity entity;
276         entity = new InternalEntity(entityName, entityValue.toCharArray());
277         entities.put(entityName, entity);
278     }
279 
280 
281     ////////////////////////////////////////////////////////////////
282     //
283     // parsing is by recursive descent, code roughly
284     // following the BNF rules except tweaked for simple
285     // lookahead.  rules are more or less in numeric order,
286     // except where code sharing suggests other structures.
287     //
288     // a classic benefit of recursive descent parsers:  it's
289     // relatively easy to get diagnostics that make sense.
290     //
291     ////////////////////////////////////////////////////////////////
292 
293 
parseInternal(InputSource input)294     private void parseInternal(InputSource input)
295             throws IOException, SAXException {
296 
297         if (input == null)
298             fatal("P-000");
299 
300         try {
301             in = InputEntity.getInputEntity(dtdHandler, locale);
302             in.init(input, null, null, false);
303 
304             dtdHandler.startDTD(in);
305 
306             // [30] extSubset ::= TextDecl? extSubsetDecl
307             // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
308             //        | PEReference | S )*
309             //    ... same as [79] extPE, which is where the code is
310 
311             ExternalEntity externalSubset = new ExternalEntity(in);
312             externalParameterEntity(externalSubset);
313 
314             if (!in.isEOF()) {
315                 fatal("P-001", new Object[]
316                 {Integer.toHexString(((int) getc()))});
317             }
318             afterRoot();
319             dtdHandler.endDTD();
320 
321         } catch (EndOfInputException e) {
322             if (!in.isDocument()) {
323                 String name = in.getName();
324                 do {    // force a relevant URI and line number
325                     in = in.pop();
326                 } while (in.isInternal());
327                 fatal("P-002", new Object[]{name});
328             } else {
329                 fatal("P-003", null);
330             }
331         } catch (RuntimeException e) {
332             // Don't discard location that triggered the exception
333             // ## Should properly wrap exception
334             System.err.print("Internal DTD parser error: "); // ##
335             e.printStackTrace();
336             throw new SAXParseException(e.getMessage() != null
337                     ? e.getMessage() : e.getClass().getName(),
338                     getPublicId(), getSystemId(),
339                     getLineNumber(), getColumnNumber());
340 
341         } finally {
342             // recycle temporary data used during parsing
343             strTmp = null;
344             nameTmp = null;
345             nameCache = null;
346 
347             // ditto input sources etc
348             if (in != null) {
349                 in.close();
350                 in = null;
351             }
352 
353             // get rid of all DTD info ... some of it would be
354             // useful for editors etc, investigate later.
355 
356             params.clear();
357             entities.clear();
358             notations.clear();
359             declaredElements.clear();
360 //        elements.clear();
361             ids.clear();
362         }
363     }
364 
afterRoot()365     void afterRoot() throws SAXException {
366         // Make sure all IDREFs match declared ID attributes.  We scan
367         // after the document element is parsed, since XML allows forward
368         // references, and only now can we know if they're all resolved.
369 
370         for (Enumeration e = ids.keys();
371              e.hasMoreElements();
372                 ) {
373             String id = (String) e.nextElement();
374             Boolean value = (Boolean) ids.get(id);
375             if (Boolean.FALSE == value)
376                 error("V-024", new Object[]{id});
377         }
378     }
379 
380 
381     // role is for diagnostics
whitespace(String roleId)382     private void whitespace(String roleId)
383             throws IOException, SAXException {
384 
385         // [3] S ::= (#x20 | #x9 | #xd | #xa)+
386         if (!maybeWhitespace()) {
387             fatal("P-004", new Object[]
388             {messages.getMessage(locale, roleId)});
389         }
390     }
391 
392     // S?
maybeWhitespace()393     private boolean maybeWhitespace()
394             throws IOException, SAXException {
395 
396         if (!doLexicalPE)
397             return in.maybeWhitespace();
398 
399         // see getc() for the PE logic -- this lets us splice
400         // expansions of PEs in "anywhere".  getc() has smarts,
401         // so for external PEs we don't bypass it.
402 
403         // XXX we can marginally speed PE handling, and certainly
404         // be cleaner (hence potentially more correct), by using
405         // the observations that expanded PEs only start and stop
406         // where whitespace is allowed.  getc wouldn't need any
407         // "lexical" PE expansion logic, and no other method needs
408         // to handle termination of PEs.  (parsing of literals would
409         // still need to pop entities, but not parsing of references
410         // in content.)
411 
412         char c = getc();
413         boolean saw = false;
414 
415         while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
416             saw = true;
417 
418             // this gracefully ends things when we stop playing
419             // with internal parameters.  caller should have a
420             // grammar rule allowing whitespace at end of entity.
421             if (in.isEOF() && !in.isInternal())
422                 return saw;
423             c = getc();
424         }
425         ungetc();
426         return saw;
427     }
428 
maybeGetName()429     private String maybeGetName()
430             throws IOException, SAXException {
431 
432         NameCacheEntry entry = maybeGetNameCacheEntry();
433         return (entry == null) ? null : entry.name;
434     }
435 
maybeGetNameCacheEntry()436     private NameCacheEntry maybeGetNameCacheEntry()
437             throws IOException, SAXException {
438 
439         // [5] Name ::= (Letter|'_'|':') (Namechar)*
440         char c = getc();
441 
442         if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
443             ungetc();
444             return null;
445         }
446         return nameCharString(c);
447     }
448 
449     // Used when parsing enumerations
getNmtoken()450     private String getNmtoken()
451             throws IOException, SAXException {
452 
453         // [7] Nmtoken ::= (Namechar)+
454         char c = getc();
455         if (!XmlChars.isNameChar(c))
456             fatal("P-006", new Object[]{new Character(c)});
457         return nameCharString(c).name;
458     }
459 
460     // n.b. this gets used when parsing attribute values (for
461     // internal references) so we can't use strTmp; it's also
462     // a hotspot for CPU and memory in the parser (called at least
463     // once for each element) so this has been optimized a bit.
464 
nameCharString(char c)465     private NameCacheEntry nameCharString(char c)
466             throws IOException, SAXException {
467 
468         int i = 1;
469 
470         nameTmp[0] = c;
471         for (; ;) {
472             if ((c = in.getNameChar()) == 0)
473                 break;
474             if (i >= nameTmp.length) {
475                 char tmp [] = new char[nameTmp.length + 10];
476                 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
477                 nameTmp = tmp;
478             }
479             nameTmp[i++] = c;
480         }
481         return nameCache.lookupEntry(nameTmp, i);
482     }
483 
484     //
485     // much similarity between parsing entity values in DTD
486     // and attribute values (in DTD or content) ... both follow
487     // literal parsing rules, newline canonicalization, etc
488     //
489     // leaves value in 'strTmp' ... either a "replacement text" (4.5),
490     // or else partially normalized attribute value (the first bit
491     // of 3.3.3's spec, without the "if not CDATA" bits).
492     //
parseLiteral(boolean isEntityValue)493     private void parseLiteral(boolean isEntityValue)
494             throws IOException, SAXException {
495 
496         // [9] EntityValue ::=
497         //    '"' ([^"&%] | Reference | PEReference)* '"'
498         //    |    "'" ([^'&%] | Reference | PEReference)* "'"
499         // [10] AttValue ::=
500         //    '"' ([^"&]  | Reference             )* '"'
501         //    |    "'" ([^'&]  | Reference             )* "'"
502         char quote = getc();
503         char c;
504         InputEntity source = in;
505 
506         if (quote != '\'' && quote != '"') {
507             fatal("P-007");
508         }
509 
510         // don't report entity expansions within attributes,
511         // they're reported "fully expanded" via SAX
512 //    isInAttribute = !isEntityValue;
513 
514         // get value into strTmp
515         strTmp = new StringBuffer();
516 
517         // scan, allowing entity push/pop wherever ...
518         // expanded entities can't terminate the literal!
519         for (; ;) {
520             if (in != source && in.isEOF()) {
521                 // we don't report end of parsed entities
522                 // within attributes (no SAX hooks)
523                 in = in.pop();
524                 continue;
525             }
526             if ((c = getc()) == quote && in == source) {
527                 break;
528             }
529 
530             //
531             // Basically the "reference in attribute value"
532             // row of the chart in section 4.4 of the spec
533             //
534             if (c == '&') {
535                 String entityName = maybeGetName();
536 
537                 if (entityName != null) {
538                     nextChar(';', "F-020", entityName);
539 
540                     // 4.4 says:  bypass these here ... we'll catch
541                     // forbidden refs to unparsed entities on use
542                     if (isEntityValue) {
543                         strTmp.append('&');
544                         strTmp.append(entityName);
545                         strTmp.append(';');
546                         continue;
547                     }
548                     expandEntityInLiteral(entityName, entities, isEntityValue);
549 
550 
551                     // character references are always included immediately
552                 } else if ((c = getc()) == '#') {
553                     int tmp = parseCharNumber();
554 
555                     if (tmp > 0xffff) {
556                         tmp = surrogatesToCharTmp(tmp);
557                         strTmp.append(charTmp[0]);
558                         if (tmp == 2)
559                             strTmp.append(charTmp[1]);
560                     } else
561                         strTmp.append((char) tmp);
562                 } else
563                     fatal("P-009");
564                 continue;
565 
566             }
567 
568             // expand parameter entities only within entity value literals
569             if (c == '%' && isEntityValue) {
570                 String entityName = maybeGetName();
571 
572                 if (entityName != null) {
573                     nextChar(';', "F-021", entityName);
574                     expandEntityInLiteral(entityName, params, isEntityValue);
575                     continue;
576                 } else
577                     fatal("P-011");
578             }
579 
580             // For attribute values ...
581             if (!isEntityValue) {
582                 // 3.3.3 says whitespace normalizes to space...
583                 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
584                     strTmp.append(' ');
585                     continue;
586                 }
587 
588                 // "<" not legal in parsed literals ...
589                 if (c == '<')
590                     fatal("P-012");
591             }
592 
593             strTmp.append(c);
594         }
595 //    isInAttribute = false;
596     }
597 
598     // does a SINGLE expansion of the entity (often reparsed later)
expandEntityInLiteral(String name, SimpleHashtable table, boolean isEntityValue)599     private void expandEntityInLiteral(String name, SimpleHashtable table,
600                                        boolean isEntityValue)
601             throws IOException, SAXException {
602 
603         Object entity = table.get(name);
604 
605         if (entity instanceof InternalEntity) {
606             InternalEntity value = (InternalEntity) entity;
607             pushReader(value.buf, name, !value.isPE);
608 
609         } else if (entity instanceof ExternalEntity) {
610             if (!isEntityValue)    // must be a PE ...
611                 fatal("P-013", new Object[]{name});
612             // XXX if this returns false ...
613             pushReader((ExternalEntity) entity);
614 
615         } else if (entity == null) {
616             //
617             // Note:  much confusion about whether spec requires such
618             // errors to be fatal in many cases, but none about whether
619             // it allows "normal" errors to be unrecoverable!
620             //
621             fatal((table == params) ? "V-022" : "P-014",
622                     new Object[]{name});
623         }
624     }
625 
626     // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
627     // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
628 
629     // NOTE:  XML spec should explicitly say that PE ref syntax is
630     // ignored in PIs, comments, SystemLiterals, and Pubid Literal
631     // values ... can't process the XML spec's own DTD without doing
632     // that for comments.
633 
getQuotedString(String type, String extra)634     private String getQuotedString(String type, String extra)
635             throws IOException, SAXException {
636 
637         // use in.getc to bypass PE processing
638         char quote = in.getc();
639 
640         if (quote != '\'' && quote != '"')
641             fatal("P-015", new Object[]{
642                 messages.getMessage(locale, type, new Object[]{extra})
643             });
644 
645         char c;
646 
647         strTmp = new StringBuffer();
648         while ((c = in.getc()) != quote)
649             strTmp.append((char) c);
650         return strTmp.toString();
651     }
652 
653 
parsePublicId()654     private String parsePublicId() throws IOException, SAXException {
655 
656         // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
657         // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
658         String retval = getQuotedString("F-033", null);
659         for (int i = 0; i < retval.length(); i++) {
660             char c = retval.charAt(i);
661             if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
662                     && !(c >= 'A' && c <= 'Z')
663                     && !(c >= 'a' && c <= 'z'))
664                 fatal("P-016", new Object[]{new Character(c)});
665         }
666         strTmp = new StringBuffer();
667         strTmp.append(retval);
668         return normalize(false);
669     }
670 
671     // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
672     // handled by:  InputEntity.parsedContent()
673 
maybeComment(boolean skipStart)674     private boolean maybeComment(boolean skipStart)
675             throws IOException, SAXException {
676 
677         // [15] Comment ::= '<!--'
678         //        ( (Char - '-') | ('-' (Char - '-'))*
679         //        '-->'
680         if (!in.peek(skipStart ? "!--" : "<!--", null))
681             return false;
682 
683         boolean savedLexicalPE = doLexicalPE;
684         boolean saveCommentText;
685 
686         doLexicalPE = false;
687         saveCommentText = false;
688         if (saveCommentText)
689             strTmp = new StringBuffer();
690 
691         oneComment:
692         for (; ;) {
693             try {
694                 // bypass PE expansion, but permit PEs
695                 // to complete ... valid docs won't care.
696                 for (; ;) {
697                     int c = getc();
698                     if (c == '-') {
699                         c = getc();
700                         if (c != '-') {
701                             if (saveCommentText)
702                                 strTmp.append('-');
703                             ungetc();
704                             continue;
705                         }
706                         nextChar('>', "F-022", null);
707                         break oneComment;
708                     }
709                     if (saveCommentText)
710                         strTmp.append((char) c);
711                 }
712             } catch (EndOfInputException e) {
713                 //
714                 // This is fatal EXCEPT when we're processing a PE...
715                 // in which case a validating processor reports an error.
716                 // External PEs are easy to detect; internal ones we
717                 // infer by being an internal entity outside an element.
718                 //
719                 if (in.isInternal()) {
720                     error("V-021", null);
721                 }
722                 fatal("P-017");
723             }
724         }
725         doLexicalPE = savedLexicalPE;
726         if (saveCommentText)
727             dtdHandler.comment(strTmp.toString());
728         return true;
729     }
730 
maybePI(boolean skipStart)731     private boolean maybePI(boolean skipStart)
732             throws IOException, SAXException {
733 
734         // [16] PI ::= '<?' PITarget
735         //        (S (Char* - (Char* '?>' Char*)))?
736         //        '?>'
737         // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
738         boolean savedLexicalPE = doLexicalPE;
739 
740         if (!in.peek(skipStart ? "?" : "<?", null))
741             return false;
742         doLexicalPE = false;
743 
744         String target = maybeGetName();
745 
746         if (target == null) {
747             fatal("P-018");
748         }
749         if ("xml".equals(target)) {
750             fatal("P-019");
751         }
752         if ("xml".equalsIgnoreCase(target)) {
753             fatal("P-020", new Object[]{target});
754         }
755 
756         if (maybeWhitespace()) {
757             strTmp = new StringBuffer();
758             try {
759                 for (; ;) {
760                     // use in.getc to bypass PE processing
761                     char c = in.getc();
762                     //Reached the end of PI.
763                     if (c == '?' && in.peekc('>'))
764                         break;
765                     strTmp.append(c);
766                 }
767             } catch (EndOfInputException e) {
768                 fatal("P-021");
769             }
770             dtdHandler.processingInstruction(target, strTmp.toString());
771         } else {
772             if (!in.peek("?>", null)) {
773                 fatal("P-022");
774             }
775             dtdHandler.processingInstruction(target, "");
776         }
777 
778         doLexicalPE = savedLexicalPE;
779         return true;
780     }
781 
782     // [18] CDSect ::= CDStart CData CDEnd
783     // [19] CDStart ::= '<![CDATA['
784     // [20] CData ::= (Char* - (Char* ']]>' Char*))
785     // [21] CDEnd ::= ']]>'
786     //
787     //    ... handled by InputEntity.unparsedContent()
788 
789     // collapsing several rules together ...
790     // simpler than attribute literals -- no reference parsing!
maybeReadAttribute(String name, boolean must)791     private String maybeReadAttribute(String name, boolean must)
792             throws IOException, SAXException {
793 
794         // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
795         // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
796         // [32] SDDecl ::=  S 'standalone' Eq \'|\" ... \'|\"
797         if (!maybeWhitespace()) {
798             if (!must) {
799                 return null;
800             }
801             fatal("P-024", new Object[]{name});
802             // NOTREACHED
803         }
804 
805         if (!peek(name)) {
806             if (must) {
807                 fatal("P-024", new Object[]{name});
808             } else {
809                 // To ensure that the whitespace is there so that when we
810                 // check for the next attribute we assure that the
811                 // whitespace still exists.
812                 ungetc();
813                 return null;
814             }
815         }
816 
817         // [25] Eq ::= S? '=' S?
818         maybeWhitespace();
819         nextChar('=', "F-023", null);
820         maybeWhitespace();
821 
822         return getQuotedString("F-035", name);
823     }
824 
readVersion(boolean must, String versionNum)825     private void readVersion(boolean must, String versionNum)
826             throws IOException, SAXException {
827 
828         String value = maybeReadAttribute("version", must);
829 
830         // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
831 
832         if (must && value == null)
833             fatal("P-025", new Object[]{versionNum});
834         if (value != null) {
835             int length = value.length();
836             for (int i = 0; i < length; i++) {
837                 char c = value.charAt(i);
838                 if (!((c >= '0' && c <= '9')
839                         || c == '_' || c == '.'
840                         || (c >= 'a' && c <= 'z')
841                         || (c >= 'A' && c <= 'Z')
842                         || c == ':' || c == '-')
843                 )
844                     fatal("P-026", new Object[]{value});
845             }
846         }
847         if (value != null && !value.equals(versionNum))
848             error("P-027", new Object[]{versionNum, value});
849     }
850 
851     // common code used by most markup declarations
852     // ... S (Q)Name ...
getMarkupDeclname(String roleId, boolean qname)853     private String getMarkupDeclname(String roleId, boolean qname)
854             throws IOException, SAXException {
855 
856         String name;
857 
858         whitespace(roleId);
859         name = maybeGetName();
860         if (name == null)
861             fatal("P-005", new Object[]
862             {messages.getMessage(locale, roleId)});
863         return name;
864     }
865 
maybeMarkupDecl()866     private boolean maybeMarkupDecl()
867             throws IOException, SAXException {
868 
869         // [29] markupdecl ::= elementdecl | Attlistdecl
870         //           | EntityDecl | NotationDecl | PI | Comment
871         return maybeElementDecl()
872                 || maybeAttlistDecl()
873                 || maybeEntityDecl()
874                 || maybeNotationDecl()
875                 || maybePI(false)
876                 || maybeComment(false);
877     }
878 
879     private static final String XmlLang = "xml:lang";
880 
isXmlLang(String value)881     private boolean isXmlLang(String value) {
882 
883         // [33] LanguageId ::= Langcode ('-' Subcode)*
884         // [34] Langcode ::= ISO639Code | IanaCode | UserCode
885         // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
886         // [36] IanaCode ::= [iI] '-' SubCode
887         // [37] UserCode ::= [xX] '-' SubCode
888         // [38] SubCode ::= [a-zA-Z]+
889 
890         // the ISO and IANA codes (and subcodes) are registered,
891         // but that's neither a WF nor a validity constraint.
892 
893         int nextSuffix;
894         char c;
895 
896         if (value.length() < 2)
897             return false;
898         c = value.charAt(1);
899         if (c == '-') {        // IANA, or user, code
900             c = value.charAt(0);
901             if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
902                 return false;
903             nextSuffix = 1;
904         } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
905             // 2 letter ISO code, or error
906             c = value.charAt(0);
907             if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
908                 return false;
909             nextSuffix = 2;
910         } else
911             return false;
912 
913         // here "suffix" ::= '-' [a-zA-Z]+ suffix*
914         while (nextSuffix < value.length()) {
915             c = value.charAt(nextSuffix);
916             if (c != '-')
917                 break;
918             while (++nextSuffix < value.length()) {
919                 c = value.charAt(nextSuffix);
920                 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
921                     break;
922             }
923         }
924         return value.length() == nextSuffix && c != '-';
925     }
926 
927 
928     //
929     // CHAPTER 3:  Logical Structures
930     //
931 
932     /**
933      * To validate, subclassers should at this time make sure that
934      * values are of the declared types:<UL>
935      * <LI> ID and IDREF(S) values are Names
936      * <LI> NMTOKEN(S) are Nmtokens
937      * <LI> ENUMERATION values match one of the tokens
938      * <LI> NOTATION values match a notation name
939      * <LI> ENTITIY(IES) values match an unparsed external entity
940      * </UL>
941      * <p/>
942      * <P> Separately, make sure IDREF values match some ID
943      * provided in the document (in the afterRoot method).
944      */
945 /*    void validateAttributeSyntax (Attribute attr, String value)
946          throws DTDParseException {
947         // ID, IDREF(S) ... values are Names
948         if (Attribute.ID == attr.type()) {
949             if (!XmlNames.isName (value))
950                 error ("V-025", new Object [] { value });
951 
952             Boolean             b = (Boolean) ids.getNonInterned (value);
953             if (b == null || b.equals (Boolean.FALSE))
954                 ids.put (value.intern (), Boolean.TRUE);
955             else
956                 error ("V-026", new Object [] { value });
957 
958         } else if (Attribute.IDREF == attr.type()) {
959             if (!XmlNames.isName (value))
960                 error ("V-027", new Object [] { value });
961 
962             Boolean             b = (Boolean) ids.getNonInterned (value);
963             if (b == null)
964                 ids.put (value.intern (), Boolean.FALSE);
965 
966         } else if (Attribute.IDREFS == attr.type()) {
967             StringTokenizer     tokenizer = new StringTokenizer (value);
968             Boolean             b;
969             boolean             sawValue = false;
970 
971             while (tokenizer.hasMoreTokens ()) {
972                 value = tokenizer.nextToken ();
973                 if (!XmlNames.isName (value))
974                     error ("V-027", new Object [] { value });
975                 b = (Boolean) ids.getNonInterned (value);
976                 if (b == null)
977                     ids.put (value.intern (), Boolean.FALSE);
978                 sawValue = true;
979             }
980             if (!sawValue)
981                 error ("V-039", null);
982 
983 
984         // NMTOKEN(S) ... values are Nmtoken(s)
985         } else if (Attribute.NMTOKEN == attr.type()) {
986             if (!XmlNames.isNmtoken (value))
987                 error ("V-028", new Object [] { value });
988 
989         } else if (Attribute.NMTOKENS == attr.type()) {
990             StringTokenizer     tokenizer = new StringTokenizer (value);
991             boolean             sawValue = false;
992 
993             while (tokenizer.hasMoreTokens ()) {
994                 value = tokenizer.nextToken ();
995                 if (!XmlNames.isNmtoken (value))
996                     error ("V-028", new Object [] { value });
997                 sawValue = true;
998             }
999             if (!sawValue)
1000                 error ("V-032", null);
1001 
1002         // ENUMERATION ... values match one of the tokens
1003         } else if (Attribute.ENUMERATION == attr.type()) {
1004             for (int i = 0; i < attr.values().length; i++)
1005                 if (value.equals (attr.values()[i]))
1006                     return;
1007             error ("V-029", new Object [] { value });
1008 
1009         // NOTATION values match a notation name
1010         } else if (Attribute.NOTATION == attr.type()) {
1011             //
1012             // XXX XML 1.0 spec should probably list references to
1013             // externally defined notations in standalone docs as
1014             // validity errors.  Ditto externally defined unparsed
1015             // entities; neither should show up in attributes, else
1016             // one needs to read the external declarations in order
1017             // to make sense of the document (exactly what tagging
1018             // a doc as "standalone" intends you won't need to do).
1019             //
1020             for (int i = 0; i < attr.values().length; i++)
1021                 if (value.equals (attr.values()[i]))
1022                     return;
1023             error ("V-030", new Object [] { value });
1024 
1025         // ENTITY(IES) values match an unparsed entity(ies)
1026         } else if (Attribute.ENTITY == attr.type()) {
1027             // see note above re standalone
1028             if (!isUnparsedEntity (value))
1029                 error ("V-031", new Object [] { value });
1030 
1031         } else if (Attribute.ENTITIES == attr.type()) {
1032             StringTokenizer     tokenizer = new StringTokenizer (value);
1033             boolean             sawValue = false;
1034 
1035             while (tokenizer.hasMoreTokens ()) {
1036                 value = tokenizer.nextToken ();
1037                 // see note above re standalone
1038                 if (!isUnparsedEntity (value))
1039                     error ("V-031", new Object [] { value });
1040                 sawValue = true;
1041             }
1042             if (!sawValue)
1043                 error ("V-040", null);
1044 
1045         } else if (Attribute.CDATA != attr.type())
1046             throw new InternalError (attr.type());
1047     }
1048 */
1049 /*
1050     private boolean isUnparsedEntity (String name)
1051     {
1052         Object e = entities.getNonInterned (name);
1053         if (e == null || !(e instanceof ExternalEntity))
1054             return false;
1055         return ((ExternalEntity)e).notation != null;
1056     }
1057 */
maybeElementDecl()1058     private boolean maybeElementDecl()
1059             throws IOException, SAXException {
1060 
1061         // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1062         // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1063         InputEntity start = peekDeclaration("!ELEMENT");
1064 
1065         if (start == null)
1066             return false;
1067 
1068         // n.b. for content models where inter-element whitespace is
1069         // ignorable, we mark that fact here.
1070         String name = getMarkupDeclname("F-015", true);
1071 //    Element        element = (Element) elements.get (name);
1072 //    boolean        declEffective = false;
1073 
1074 /*
1075     if (element != null) {
1076         if (element.contentModel() != null) {
1077             error ("V-012", new Object [] { name });
1078         } // else <!ATTLIST name ...> came first
1079     } else {
1080         element = new Element(name);
1081         elements.put (element.name(), element);
1082         declEffective = true;
1083     }
1084 */
1085         if (declaredElements.contains(name))
1086             error("V-012", new Object[]{name});
1087         else {
1088             declaredElements.add(name);
1089 //        declEffective = true;
1090         }
1091 
1092         short modelType;
1093         whitespace("F-000");
1094         if (peek(strEMPTY)) {
1095 ///        // leave element.contentModel as null for this case.
1096             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
1097         } else if (peek(strANY)) {
1098 ///        element.setContentModel(new StringModel(StringModelType.ANY));
1099             dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
1100         } else {
1101             modelType = getMixedOrChildren(name);
1102         }
1103 
1104         dtdHandler.endContentModel(name, modelType);
1105 
1106         maybeWhitespace();
1107         char c = getc();
1108         if (c != '>')
1109             fatal("P-036", new Object[]{name, new Character(c)});
1110         if (start != in)
1111             error("V-013", null);
1112 
1113 ///        dtdHandler.elementDecl(element);
1114 
1115         return true;
1116     }
1117 
1118     // We're leaving the content model as a regular expression;
1119     // it's an efficient natural way to express such things, and
1120     // libraries often interpret them.  No whitespace in the
1121     // model we store, though!
1122 
1123     /**
1124      * returns content model type.
1125      */
getMixedOrChildren(String elementName )1126     private short getMixedOrChildren(String elementName/*Element element*/)
1127             throws IOException, SAXException {
1128 
1129         InputEntity start;
1130 
1131         // [47] children ::= (choice|seq) ('?'|'*'|'+')?
1132         strTmp = new StringBuffer();
1133 
1134         nextChar('(', "F-028", elementName);
1135         start = in;
1136         maybeWhitespace();
1137         strTmp.append('(');
1138 
1139         short modelType;
1140         if (peek("#PCDATA")) {
1141             strTmp.append("#PCDATA");
1142             dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);
1143             getMixed(elementName, start);
1144         } else {
1145             dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
1146             getcps(elementName, start);
1147         }
1148 
1149         return modelType;
1150     }
1151 
1152     // '(' S? already consumed
1153     // matching ')' must be in "start" entity if validating
getcps( String elementName, InputEntity start)1154     private void getcps(/*Element element,*/String elementName, InputEntity start)
1155             throws IOException, SAXException {
1156 
1157         // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
1158         // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
1159         // [50] seq    ::= '(' S? cp (S? ',' S? cp)* S? ')'
1160         boolean decided = false;
1161         char type = 0;
1162 //        ContentModel       retval, temp, current;
1163 
1164 //        retval = temp = current = null;
1165 
1166         dtdHandler.startModelGroup();
1167 
1168         do {
1169             String tag;
1170 
1171             tag = maybeGetName();
1172             if (tag != null) {
1173                 strTmp.append(tag);
1174 //                temp = new ElementModel(tag);
1175 //                getFrequency((RepeatableContent)temp);
1176 ///->
1177                 dtdHandler.childElement(tag, getFrequency());
1178 ///<-
1179             } else if (peek("(")) {
1180                 InputEntity next = in;
1181                 strTmp.append('(');
1182                 maybeWhitespace();
1183 //                temp = getcps(element, next);
1184 //                getFrequency(temp);
1185 ///->
1186                 getcps(elementName, next);
1187 ///                getFrequency();        <- this looks like a bug
1188 ///<-
1189             } else
1190                 fatal((type == 0) ? "P-039" :
1191                         ((type == ',') ? "P-037" : "P-038"),
1192                         new Object[]{new Character(getc())});
1193 
1194             maybeWhitespace();
1195             if (decided) {
1196                 char c = getc();
1197 
1198 //                if (current != null) {
1199 //                    current.addChild(temp);
1200 //                }
1201                 if (c == type) {
1202                     strTmp.append(type);
1203                     maybeWhitespace();
1204                     reportConnector(type);
1205                     continue;
1206                 } else if (c == '\u0029') {    // rparen
1207                     ungetc();
1208                     continue;
1209                 } else {
1210                     fatal((type == 0) ? "P-041" : "P-040",
1211                             new Object[]{
1212                                 new Character(c),
1213                                 new Character(type)
1214                             });
1215                 }
1216             } else {
1217                 type = getc();
1218                 switch (type) {
1219                 case '|':
1220                 case ',':
1221                     reportConnector(type);
1222                     break;
1223                 default:
1224 //                        retval = temp;
1225                     ungetc();
1226                     continue;
1227                 }
1228 //                retval = (ContentModel)current;
1229                 decided = true;
1230 //                current.addChild(temp);
1231                 strTmp.append(type);
1232             }
1233             maybeWhitespace();
1234         } while (!peek(")"));
1235 
1236         if (in != start)
1237             error("V-014", new Object[]{elementName});
1238         strTmp.append(')');
1239 
1240         dtdHandler.endModelGroup(getFrequency());
1241 //        return retval;
1242     }
1243 
reportConnector(char type)1244     private void reportConnector(char type) throws SAXException {
1245         switch (type) {
1246         case '|':
1247             dtdHandler.connector(DTDEventListener.CHOICE);    ///<-
1248             return;
1249         case ',':
1250             dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
1251             return;
1252         default:
1253             throw new Error();    //assertion failed.
1254         }
1255     }
1256 
getFrequency()1257     private short getFrequency()
1258             throws IOException, SAXException {
1259 
1260         final char c = getc();
1261 
1262         if (c == '?') {
1263             strTmp.append(c);
1264             return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
1265             //        original.setRepeat(Repeat.ZERO_OR_ONE);
1266         } else if (c == '+') {
1267             strTmp.append(c);
1268             return DTDEventListener.OCCURENCE_ONE_OR_MORE;
1269             //        original.setRepeat(Repeat.ONE_OR_MORE);
1270         } else if (c == '*') {
1271             strTmp.append(c);
1272             return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
1273             //        original.setRepeat(Repeat.ZERO_OR_MORE);
1274         } else {
1275             ungetc();
1276             return DTDEventListener.OCCURENCE_ONCE;
1277         }
1278     }
1279 
1280     // '(' S? '#PCDATA' already consumed
1281     // matching ')' must be in "start" entity if validating
getMixed(String elementName, InputEntity start)1282     private void getMixed(String elementName, /*Element element,*/ InputEntity start)
1283             throws IOException, SAXException {
1284 
1285         // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
1286         //        | '(' S? '#PCDATA'                   S? ')'
1287         maybeWhitespace();
1288         if (peek("\u0029*") || peek("\u0029")) {
1289             if (in != start)
1290                 error("V-014", new Object[]{elementName});
1291             strTmp.append(')');
1292 //            element.setContentModel(new StringModel(StringModelType.PCDATA));
1293             return;
1294         }
1295 
1296         ArrayList l = new ArrayList();
1297 //    l.add(new StringModel(StringModelType.PCDATA));
1298 
1299 
1300         while (peek("|")) {
1301             String name;
1302 
1303             strTmp.append('|');
1304             maybeWhitespace();
1305 
1306             doLexicalPE = true;
1307             name = maybeGetName();
1308             if (name == null)
1309                 fatal("P-042", new Object[]
1310                 {elementName, Integer.toHexString(getc())});
1311             if (l.contains(name)) {
1312                 error("V-015", new Object[]{name});
1313             } else {
1314                 l.add(name);
1315                 dtdHandler.mixedElement(name);
1316             }
1317             strTmp.append(name);
1318             maybeWhitespace();
1319         }
1320 
1321         if (!peek("\u0029*"))    // right paren
1322             fatal("P-043", new Object[]
1323             {elementName, new Character(getc())});
1324         if (in != start)
1325             error("V-014", new Object[]{elementName});
1326         strTmp.append(')');
1327 //        ChoiceModel cm = new ChoiceModel((Collection)l);
1328 //    cm.setRepeat(Repeat.ZERO_OR_MORE);
1329 //       element.setContentModel(cm);
1330     }
1331 
maybeAttlistDecl()1332     private boolean maybeAttlistDecl()
1333             throws IOException, SAXException {
1334 
1335         // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1336         InputEntity start = peekDeclaration("!ATTLIST");
1337 
1338         if (start == null)
1339             return false;
1340 
1341         String elementName = getMarkupDeclname("F-016", true);
1342 //    Element    element = (Element) elements.get (name);
1343 
1344 //    if (element == null) {
1345 //        // not yet declared -- no problem.
1346 //        element = new Element(name);
1347 //        elements.put(name, element);
1348 //    }
1349 
1350         while (!peek(">")) {
1351 
1352             // [53] AttDef ::= S Name S AttType S DefaultDecl
1353             // [54] AttType ::= StringType | TokenizedType | EnumeratedType
1354 
1355             // look for global attribute definitions, don't expand for now...
1356             maybeWhitespace();
1357             char c = getc();
1358             if (c == '%') {
1359                 String entityName = maybeGetName();
1360                 if (entityName != null) {
1361                     nextChar(';', "F-021", entityName);
1362                     whitespace("F-021");
1363                     continue;
1364                 } else
1365                     fatal("P-011");
1366             }
1367 
1368             ungetc();
1369             // look for attribute name otherwise
1370             String attName = maybeGetName();
1371             if (attName == null) {
1372                 fatal("P-044", new Object[]{new Character(getc())});
1373             }
1374             whitespace("F-001");
1375 
1376 ///        Attribute    a = new Attribute (name);
1377 
1378             String typeName;
1379             Vector values = null;    // notation/enumeration values
1380 
1381             // Note:  use the type constants from Attribute
1382             // so that "==" may be used (faster)
1383 
1384             // [55] StringType ::= 'CDATA'
1385             if (peek(TYPE_CDATA))
1386 ///            a.setType(Attribute.CDATA);
1387                 typeName = TYPE_CDATA;
1388 
1389             // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
1390             //        | 'ENTITY' | 'ENTITIES'
1391             //        | 'NMTOKEN' | 'NMTOKENS'
1392             // n.b. if "IDREFS" is there, both "ID" and "IDREF"
1393             // match peekahead ... so this order matters!
1394             else if (peek(TYPE_IDREFS))
1395                 typeName = TYPE_IDREFS;
1396             else if (peek(TYPE_IDREF))
1397                 typeName = TYPE_IDREF;
1398             else if (peek(TYPE_ID)) {
1399                 typeName = TYPE_ID;
1400 // TODO: should implement this error check?
1401 ///        if (element.id() != null) {
1402 ///                    error ("V-016", new Object [] { element.id() });
1403 ///        } else
1404 ///            element.setId(name);
1405             } else if (peek(TYPE_ENTITY))
1406                 typeName = TYPE_ENTITY;
1407             else if (peek(TYPE_ENTITIES))
1408                 typeName = TYPE_ENTITIES;
1409             else if (peek(TYPE_NMTOKENS))
1410                 typeName = TYPE_NMTOKENS;
1411             else if (peek(TYPE_NMTOKEN))
1412                 typeName = TYPE_NMTOKEN;
1413 
1414             // [57] EnumeratedType ::= NotationType | Enumeration
1415             // [58] NotationType ::= 'NOTATION' S '(' S? Name
1416             //        (S? '|' S? Name)* S? ')'
1417             else if (peek(TYPE_NOTATION)) {
1418                 typeName = TYPE_NOTATION;
1419                 whitespace("F-002");
1420                 nextChar('(', "F-029", null);
1421                 maybeWhitespace();
1422 
1423                 values = new Vector();
1424                 do {
1425                     String name;
1426                     if ((name = maybeGetName()) == null)
1427                         fatal("P-068");
1428                     // permit deferred declarations
1429                     if (notations.get(name) == null)
1430                         notations.put(name, name);
1431                     values.addElement(name);
1432                     maybeWhitespace();
1433                     if (peek("|"))
1434                         maybeWhitespace();
1435                 } while (!peek(")"));
1436 ///            a.setValues(new String [v.size ()]);
1437 ///            for (int i = 0; i < v.size (); i++)
1438 ///                a.setValue(i, (String)v.elementAt(i));
1439 
1440                 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
1441             } else if (peek("(")) {
1442 ///            a.setType(Attribute.ENUMERATION);
1443                 typeName = TYPE_ENUMERATION;
1444 
1445                 maybeWhitespace();
1446 
1447 ///            Vector v = new Vector ();
1448                 values = new Vector();
1449                 do {
1450                     String name = getNmtoken();
1451 ///                v.addElement (name);
1452                     values.addElement(name);
1453                     maybeWhitespace();
1454                     if (peek("|"))
1455                         maybeWhitespace();
1456                 } while (!peek(")"));
1457 ///            a.setValues(new String [v.size ()]);
1458 ///            for (int i = 0; i < v.size (); i++)
1459 ///                a.setValue(i, (String)v.elementAt(i));
1460             } else {
1461                 fatal("P-045",
1462                         new Object[]{attName, new Character(getc())});
1463                 typeName = null;
1464             }
1465 
1466             short attributeUse;
1467             String defaultValue = null;
1468 
1469             // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
1470             //        | (('#FIXED' S)? AttValue)
1471             whitespace("F-003");
1472             if (peek("#REQUIRED"))
1473                 attributeUse = DTDEventListener.USE_REQUIRED;
1474 ///            a.setIsRequired(true);
1475             else if (peek("#FIXED")) {
1476 ///            if (a.type() == Attribute.ID)
1477                 if (typeName == TYPE_ID)
1478                     error("V-017", new Object[]{attName});
1479 ///            a.setIsFixed(true);
1480                 attributeUse = DTDEventListener.USE_FIXED;
1481                 whitespace("F-004");
1482                 parseLiteral(false);
1483 ///            if (a.type() != Attribute.CDATA)
1484 ///                a.setDefaultValue(normalize(false));
1485 ///            else
1486 ///                a.setDefaultValue(strTmp.toString());
1487 
1488                 if (typeName == TYPE_CDATA)
1489                     defaultValue = normalize(false);
1490                 else
1491                     defaultValue = strTmp.toString();
1492 
1493 // TODO: implement this check
1494 ///            if (a.type() != Attribute.CDATA)
1495 ///                validateAttributeSyntax (a, a.defaultValue());
1496             } else if (!peek("#IMPLIED")) {
1497                 attributeUse = DTDEventListener.USE_IMPLIED;
1498 
1499 ///            if (a.type() == Attribute.ID)
1500                 if (typeName == TYPE_ID)
1501                     error("V-018", new Object[]{attName});
1502                 parseLiteral(false);
1503 ///            if (a.type() != Attribute.CDATA)
1504 ///                a.setDefaultValue(normalize(false));
1505 ///            else
1506 ///                a.setDefaultValue(strTmp.toString());
1507                 if (typeName == TYPE_CDATA)
1508                     defaultValue = normalize(false);
1509                 else
1510                     defaultValue = strTmp.toString();
1511 
1512 // TODO: implement this check
1513 ///            if (a.type() != Attribute.CDATA)
1514 ///                validateAttributeSyntax (a, a.defaultValue());
1515             } else {
1516                 // TODO: this looks like an fatal error.
1517                 attributeUse = DTDEventListener.USE_NORMAL;
1518             }
1519 
1520             if (XmlLang.equals(attName)
1521                     && defaultValue/* a.defaultValue()*/ != null
1522                     && !isXmlLang(defaultValue/*a.defaultValue()*/))
1523                 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});
1524 
1525 // TODO: isn't it an error to specify the same attribute twice?
1526 ///        if (!element.attributes().contains(a)) {
1527 ///            element.addAttribute(a);
1528 ///            dtdHandler.attributeDecl(a);
1529 ///        }
1530 
1531             String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null;
1532             dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
1533             maybeWhitespace();
1534         }
1535         if (start != in)
1536             error("V-013", null);
1537         return true;
1538     }
1539 
1540     // used when parsing literal attribute values,
1541     // or public identifiers.
1542     //
1543     // input in strTmp
normalize(boolean invalidIfNeeded)1544     private String normalize(boolean invalidIfNeeded) {
1545 
1546         // this can allocate an extra string...
1547 
1548         String s = strTmp.toString();
1549         String s2 = s.trim();
1550         boolean didStrip = false;
1551 
1552         if (s != s2) {
1553             s = s2;
1554             s2 = null;
1555             didStrip = true;
1556         }
1557         strTmp = new StringBuffer();
1558         for (int i = 0; i < s.length(); i++) {
1559             char c = s.charAt(i);
1560             if (!XmlChars.isSpace(c)) {
1561                 strTmp.append(c);
1562                 continue;
1563             }
1564             strTmp.append(' ');
1565             while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
1566                 didStrip = true;
1567             i--;
1568         }
1569         if (didStrip)
1570             return strTmp.toString();
1571         else
1572             return s;
1573     }
1574 
maybeConditionalSect()1575     private boolean maybeConditionalSect()
1576             throws IOException, SAXException {
1577 
1578         // [61] conditionalSect ::= includeSect | ignoreSect
1579 
1580         if (!peek("<!["))
1581             return false;
1582 
1583         String keyword;
1584         InputEntity start = in;
1585 
1586         maybeWhitespace();
1587 
1588         if ((keyword = maybeGetName()) == null)
1589             fatal("P-046");
1590         maybeWhitespace();
1591         nextChar('[', "F-030", null);
1592 
1593         // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
1594         //                extSubsetDecl ']]>'
1595         if ("INCLUDE".equals(keyword)) {
1596             for (; ;) {
1597                 while (in.isEOF() && in != start)
1598                     in = in.pop();
1599                 if (in.isEOF()) {
1600                     error("V-020", null);
1601                 }
1602                 if (peek("]]>"))
1603                     break;
1604 
1605                 doLexicalPE = false;
1606                 if (maybeWhitespace())
1607                     continue;
1608                 if (maybePEReference())
1609                     continue;
1610                 doLexicalPE = true;
1611                 if (maybeMarkupDecl() || maybeConditionalSect())
1612                     continue;
1613 
1614                 fatal("P-047");
1615             }
1616 
1617             // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
1618             //            ignoreSectcontents ']]>'
1619             // [64] ignoreSectcontents ::= Ignore ('<!['
1620             //            ignoreSectcontents ']]>' Ignore)*
1621             // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
1622         } else if ("IGNORE".equals(keyword)) {
1623             int nestlevel = 1;
1624             // ignoreSectcontents
1625             doLexicalPE = false;
1626             while (nestlevel > 0) {
1627                 char c = getc();    // will pop input entities
1628                 if (c == '<') {
1629                     if (peek("!["))
1630                         nestlevel++;
1631                 } else if (c == ']') {
1632                     if (peek("]>"))
1633                         nestlevel--;
1634                 } else
1635                     continue;
1636             }
1637         } else
1638             fatal("P-048", new Object[]{keyword});
1639         return true;
1640     }
1641 
1642 
1643     //
1644     // CHAPTER 4:  Physical Structures
1645     //
1646 
1647     // parse decimal or hex numeric character reference
parseCharNumber()1648     private int parseCharNumber()
1649             throws IOException, SAXException {
1650 
1651         char c;
1652         int retval = 0;
1653 
1654         // n.b. we ignore overflow ...
1655         if (getc() != 'x') {
1656             ungetc();
1657             for (; ;) {
1658                 c = getc();
1659                 if (c >= '0' && c <= '9') {
1660                     retval *= 10;
1661                     retval += (c - '0');
1662                     continue;
1663                 }
1664                 if (c == ';')
1665                     return retval;
1666                 fatal("P-049");
1667             }
1668         } else
1669             for (; ;) {
1670                 c = getc();
1671                 if (c >= '0' && c <= '9') {
1672                     retval <<= 4;
1673                     retval += (c - '0');
1674                     continue;
1675                 }
1676                 if (c >= 'a' && c <= 'f') {
1677                     retval <<= 4;
1678                     retval += 10 + (c - 'a');
1679                     continue;
1680                 }
1681                 if (c >= 'A' && c <= 'F') {
1682                     retval <<= 4;
1683                     retval += 10 + (c - 'A');
1684                     continue;
1685                 }
1686                 if (c == ';')
1687                     return retval;
1688                 fatal("P-050");
1689             }
1690     }
1691 
1692     // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
1693     // though still subject to the 'Char' construct in XML
surrogatesToCharTmp(int ucs4)1694     private int surrogatesToCharTmp(int ucs4)
1695             throws SAXException {
1696 
1697         if (ucs4 <= 0xffff) {
1698             if (XmlChars.isChar(ucs4)) {
1699                 charTmp[0] = (char) ucs4;
1700                 return 1;
1701             }
1702         } else if (ucs4 <= 0x0010ffff) {
1703             // we represent these as UNICODE surrogate pairs
1704             ucs4 -= 0x10000;
1705             charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
1706             charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
1707             return 2;
1708         }
1709         fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
1710         // NOTREACHED
1711         return -1;
1712     }
1713 
maybePEReference()1714     private boolean maybePEReference()
1715             throws IOException, SAXException {
1716 
1717         // This is the SYNTACTIC version of this construct.
1718         // When processing external entities, there is also
1719         // a LEXICAL version; see getc() and doLexicalPE.
1720 
1721         // [69] PEReference ::= '%' Name ';'
1722         if (!in.peekc('%'))
1723             return false;
1724 
1725         String name = maybeGetName();
1726         Object entity;
1727 
1728         if (name == null)
1729             fatal("P-011");
1730         nextChar(';', "F-021", name);
1731         entity = params.get(name);
1732 
1733         if (entity instanceof InternalEntity) {
1734             InternalEntity value = (InternalEntity) entity;
1735             pushReader(value.buf, name, false);
1736 
1737         } else if (entity instanceof ExternalEntity) {
1738             pushReader((ExternalEntity) entity);
1739             externalParameterEntity((ExternalEntity) entity);
1740 
1741         } else if (entity == null) {
1742             error("V-022", new Object[]{name});
1743         }
1744         return true;
1745     }
1746 
maybeEntityDecl()1747     private boolean maybeEntityDecl()
1748             throws IOException, SAXException {
1749 
1750         // [70] EntityDecl ::= GEDecl | PEDecl
1751         // [71] GEDecl ::= '<!ENTITY' S       Name S EntityDef S? '>'
1752         // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF     S? '>'
1753         // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1754         // [74] PEDef     ::= EntityValue |  ExternalID
1755         //
1756         InputEntity start = peekDeclaration("!ENTITY");
1757 
1758         if (start == null)
1759             return false;
1760 
1761         String entityName;
1762         SimpleHashtable defns;
1763         ExternalEntity externalId;
1764         boolean doStore;
1765 
1766         // PE expansion gets selectively turned off several places:
1767         // in ENTITY declarations (here), in comments, in PIs.
1768 
1769         // Here, we allow PE entities to be declared, and allows
1770         // literals to include PE refs without the added spaces
1771         // required with their expansion in markup decls.
1772 
1773         doLexicalPE = false;
1774         whitespace("F-005");
1775         if (in.peekc('%')) {
1776             whitespace("F-006");
1777             defns = params;
1778         } else
1779             defns = entities;
1780 
1781         ungetc();    // leave some whitespace
1782         doLexicalPE = true;
1783         entityName = getMarkupDeclname("F-017", false);
1784         whitespace("F-007");
1785         externalId = maybeExternalID();
1786 
1787         //
1788         // first definition sticks ... e.g. internal subset PEs are used
1789         // to override DTD defaults.  It's also an "error" to incorrectly
1790         // redefine builtin internal entities, but since reporting such
1791         // errors is optional we only give warnings ("just in case") for
1792         // non-parameter entities.
1793         //
1794         doStore = (defns.get(entityName) == null);
1795         if (!doStore && defns == entities)
1796             warning("P-054", new Object[]{entityName});
1797 
1798         // internal entities
1799         if (externalId == null) {
1800             char value [];
1801             InternalEntity entity;
1802 
1803             doLexicalPE = false;        // "ab%bar;cd" -maybe-> "abcd"
1804             parseLiteral(true);
1805             doLexicalPE = true;
1806             if (doStore) {
1807                 value = new char[strTmp.length()];
1808                 if (value.length != 0)
1809                     strTmp.getChars(0, value.length, value, 0);
1810                 entity = new InternalEntity(entityName, value);
1811                 entity.isPE = (defns == params);
1812                 entity.isFromInternalSubset = false;
1813                 defns.put(entityName, entity);
1814                 if (defns == entities)
1815                     dtdHandler.internalGeneralEntityDecl(entityName,
1816                             new String(value));
1817             }
1818 
1819             // external entities (including unparsed)
1820         } else {
1821             // [76] NDataDecl ::= S 'NDATA' S Name
1822             if (defns == entities && maybeWhitespace()
1823                     && peek("NDATA")) {
1824                 externalId.notation = getMarkupDeclname("F-018", false);
1825 
1826                 // flag undeclared notation for checking after
1827                 // the DTD is fully processed
1828                 if (notations.get(externalId.notation) == null)
1829                     notations.put(externalId.notation, Boolean.TRUE);
1830             }
1831             externalId.name = entityName;
1832             externalId.isPE = (defns == params);
1833             externalId.isFromInternalSubset = false;
1834             if (doStore) {
1835                 defns.put(entityName, externalId);
1836                 if (externalId.notation != null)
1837                     dtdHandler.unparsedEntityDecl(entityName,
1838                             externalId.publicId, externalId.systemId,
1839                             externalId.notation);
1840                 else if (defns == entities)
1841                     dtdHandler.externalGeneralEntityDecl(entityName,
1842                             externalId.publicId, externalId.systemId);
1843             }
1844         }
1845         maybeWhitespace();
1846         nextChar('>', "F-031", entityName);
1847         if (start != in)
1848             error("V-013", null);
1849         return true;
1850     }
1851 
maybeExternalID()1852     private ExternalEntity maybeExternalID()
1853             throws IOException, SAXException {
1854 
1855         // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1856         //        | 'PUBLIC' S' PubidLiteral S Systemliteral
1857         String temp = null;
1858         ExternalEntity retval;
1859 
1860         if (peek("PUBLIC")) {
1861             whitespace("F-009");
1862             temp = parsePublicId();
1863         } else if (!peek("SYSTEM"))
1864             return null;
1865 
1866         retval = new ExternalEntity(in);
1867         retval.publicId = temp;
1868         whitespace("F-008");
1869         retval.systemId = parseSystemId();
1870         return retval;
1871     }
1872 
parseSystemId()1873     private String parseSystemId()
1874             throws IOException, SAXException {
1875 
1876         String uri = getQuotedString("F-034", null);
1877         int temp = uri.indexOf(':');
1878 
1879         // resolve relative URIs ... must do it here since
1880         // it's relative to the source file holding the URI!
1881 
1882         // "new java.net.URL (URL, string)" conforms to RFC 1630,
1883         // but we can't use that except when the URI is a URL.
1884         // The entity resolver is allowed to handle URIs that are
1885         // not URLs, so we pass URIs through with scheme intact
1886         if (temp == -1 || uri.indexOf('/') < temp) {
1887             String baseURI;
1888 
1889             baseURI = in.getSystemId();
1890             if (baseURI == null)
1891                 fatal("P-055", new Object[]{uri});
1892             if (uri.length() == 0)
1893                 uri = ".";
1894             baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
1895             if (uri.charAt(0) != '/')
1896                 uri = baseURI + uri;
1897             else {
1898                 // XXX slashes at the beginning of a relative URI are
1899                 // a special case we don't handle.
1900                 throw new InternalError();
1901             }
1902 
1903             // letting other code map any "/xxx/../" or "/./" to "/",
1904             // since all URIs must handle it the same.
1905         }
1906         // check for fragment ID in URI
1907         if (uri.indexOf('#') != -1)
1908             error("P-056", new Object[]{uri});
1909         return uri;
1910     }
1911 
maybeTextDecl()1912     private void maybeTextDecl()
1913             throws IOException, SAXException {
1914 
1915         // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1916         if (peek("<?xml")) {
1917             readVersion(false, "1.0");
1918             readEncoding(true);
1919             maybeWhitespace();
1920             if (!peek("?>"))
1921                 fatal("P-057");
1922         }
1923     }
1924 
externalParameterEntity(ExternalEntity next)1925     private void externalParameterEntity(ExternalEntity next)
1926             throws IOException, SAXException {
1927 
1928         //
1929         // Reap the intended benefits of standalone declarations:
1930         // don't deal with external parameter entities, except to
1931         // validate the standalone declaration.
1932         //
1933 
1934         // n.b. "in external parameter entities" (and external
1935         // DTD subset, same grammar) parameter references can
1936         // occur "within" markup declarations ... expansions can
1937         // cross syntax rules.  Flagged here; affects getc().
1938 
1939         // [79] ExtPE ::= TextDecl? extSubsetDecl
1940         // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
1941         //        | PEReference | S )*
1942         InputEntity pe;
1943 
1944         // XXX if this returns false ...
1945 
1946         pe = in;
1947         maybeTextDecl();
1948         while (!pe.isEOF()) {
1949             // pop internal PEs (and whitespace before/after)
1950             if (in.isEOF()) {
1951                 in = in.pop();
1952                 continue;
1953             }
1954             doLexicalPE = false;
1955             if (maybeWhitespace())
1956                 continue;
1957             if (maybePEReference())
1958                 continue;
1959             doLexicalPE = true;
1960             if (maybeMarkupDecl() || maybeConditionalSect())
1961                 continue;
1962             break;
1963         }
1964         // if (in != pe) throw new InternalError("who popped my PE?");
1965         if (!pe.isEOF())
1966             fatal("P-059", new Object[]{in.getName()});
1967     }
1968 
readEncoding(boolean must)1969     private void readEncoding(boolean must)
1970             throws IOException, SAXException {
1971 
1972         // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1973         String name = maybeReadAttribute("encoding", must);
1974 
1975         if (name == null)
1976             return;
1977         for (int i = 0; i < name.length(); i++) {
1978             char c = name.charAt(i);
1979             if ((c >= 'A' && c <= 'Z')
1980                     || (c >= 'a' && c <= 'z'))
1981                 continue;
1982             if (i != 0
1983                     && ((c >= '0' && c <= '9')
1984                     || c == '-'
1985                     || c == '_'
1986                     || c == '.'
1987                     ))
1988                 continue;
1989             fatal("P-060", new Object[]{new Character(c)});
1990         }
1991 
1992         //
1993         // This should be the encoding in use, and it's even an error for
1994         // it to be anything else (in certain cases that are impractical to
1995         // to test, and may even be insufficient).  So, we do the best we
1996         // can, and warn if things look suspicious.  Note that Java doesn't
1997         // uniformly expose the encodings, and that the names it uses
1998         // internally are nonstandard.  Also, that the XML spec allows
1999         // such "errors" not to be reported at all.
2000         //
2001         String currentEncoding = in.getEncoding();
2002 
2003         if (currentEncoding != null
2004                 && !name.equalsIgnoreCase(currentEncoding))
2005             warning("P-061", new Object[]{name, currentEncoding});
2006     }
2007 
maybeNotationDecl()2008     private boolean maybeNotationDecl()
2009             throws IOException, SAXException {
2010 
2011         // [82] NotationDecl ::= '<!NOTATION' S Name S
2012         //        (ExternalID | PublicID) S? '>'
2013         // [83] PublicID ::= 'PUBLIC' S PubidLiteral
2014         InputEntity start = peekDeclaration("!NOTATION");
2015 
2016         if (start == null)
2017             return false;
2018 
2019         String name = getMarkupDeclname("F-019", false);
2020         ExternalEntity entity = new ExternalEntity(in);
2021 
2022         whitespace("F-011");
2023         if (peek("PUBLIC")) {
2024             whitespace("F-009");
2025             entity.publicId = parsePublicId();
2026             if (maybeWhitespace()) {
2027                 if (!peek(">"))
2028                     entity.systemId = parseSystemId();
2029                 else
2030                     ungetc();
2031             }
2032         } else if (peek("SYSTEM")) {
2033             whitespace("F-008");
2034             entity.systemId = parseSystemId();
2035         } else
2036             fatal("P-062");
2037         maybeWhitespace();
2038         nextChar('>', "F-032", name);
2039         if (start != in)
2040             error("V-013", null);
2041         if (entity.systemId != null && entity.systemId.indexOf('#') != -1)
2042             error("P-056", new Object[]{entity.systemId});
2043 
2044         Object value = notations.get(name);
2045         if (value != null && value instanceof ExternalEntity)
2046             warning("P-063", new Object[]{name});
2047 
2048         else {
2049             notations.put(name, entity);
2050             dtdHandler.notationDecl(name, entity.publicId,
2051                     entity.systemId);
2052         }
2053         return true;
2054     }
2055 
2056 
2057     ////////////////////////////////////////////////////////////////
2058     //
2059     //    UTILITIES
2060     //
2061     ////////////////////////////////////////////////////////////////
2062 
getc()2063     private char getc() throws IOException, SAXException {
2064 
2065         if (!doLexicalPE) {
2066             char c = in.getc();
2067             return c;
2068         }
2069 
2070         //
2071         // External parameter entities get funky processing of '%param;'
2072         // references.  It's not clearly defined in the XML spec; but it
2073         // boils down to having those refs be _lexical_ in most cases to
2074         // include partial syntax productions.  It also needs selective
2075         // enabling; "<!ENTITY % foo ...>" must work, for example, and
2076         // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
2077         // if it's expanded in a literal, else "ab  cd".  PEs also do
2078         // not expand within comments or PIs, and external PEs are only
2079         // allowed to have markup decls (and so aren't handled lexically).
2080         //
2081         // This PE handling should be merged into maybeWhitespace, where
2082         // it can be dealt with more consistently.
2083         //
2084         // Also, there are some validity constraints in this area.
2085         //
2086         char c;
2087 
2088         while (in.isEOF()) {
2089             if (in.isInternal() || (doLexicalPE && !in.isDocument()))
2090                 in = in.pop();
2091             else {
2092                 fatal("P-064", new Object[]{in.getName()});
2093             }
2094         }
2095         if ((c = in.getc()) == '%' && doLexicalPE) {
2096             // PE ref ::= '%' name ';'
2097             String name = maybeGetName();
2098             Object entity;
2099 
2100             if (name == null)
2101                 fatal("P-011");
2102             nextChar(';', "F-021", name);
2103             entity = params.get(name);
2104 
2105             // push a magic "entity" before and after the
2106             // real one, so ungetc() behaves uniformly
2107             pushReader(" ".toCharArray(), null, false);
2108             if (entity instanceof InternalEntity)
2109                 pushReader(((InternalEntity) entity).buf, name, false);
2110             else if (entity instanceof ExternalEntity)
2111             // PEs can't be unparsed!
2112             // XXX if this returns false ...
2113                 pushReader((ExternalEntity) entity);
2114             else if (entity == null)
2115             // see note in maybePEReference re making this be nonfatal.
2116                 fatal("V-022");
2117             else
2118                 throw new InternalError();
2119             pushReader(" ".toCharArray(), null, false);
2120             return in.getc();
2121         }
2122         return c;
2123     }
2124 
ungetc()2125     private void ungetc() {
2126 
2127         in.ungetc();
2128     }
2129 
peek(String s)2130     private boolean peek(String s)
2131             throws IOException, SAXException {
2132 
2133         return in.peek(s, null);
2134     }
2135 
2136     // Return the entity starting the specified declaration
2137     // (for validating declaration nesting) else null.
2138 
peekDeclaration(String s)2139     private InputEntity peekDeclaration(String s)
2140             throws IOException, SAXException {
2141 
2142         InputEntity start;
2143 
2144         if (!in.peekc('<'))
2145             return null;
2146         start = in;
2147         if (in.peek(s, null))
2148             return start;
2149         in.ungetc();
2150         return null;
2151     }
2152 
nextChar(char c, String location, String near)2153     private void nextChar(char c, String location, String near)
2154             throws IOException, SAXException {
2155 
2156         while (in.isEOF() && !in.isDocument())
2157             in = in.pop();
2158         if (!in.peekc(c))
2159             fatal("P-008", new Object[]
2160             {new Character(c),
2161              messages.getMessage(locale, location),
2162              (near == null ? "" : ('"' + near + '"'))});
2163     }
2164 
2165 
pushReader(char buf [], String name, boolean isGeneral)2166     private void pushReader(char buf [], String name, boolean isGeneral)
2167             throws SAXException {
2168 
2169         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2170         r.init(buf, name, in, !isGeneral);
2171         in = r;
2172     }
2173 
pushReader(ExternalEntity next)2174     private boolean pushReader(ExternalEntity next)
2175             throws IOException, SAXException {
2176 
2177         InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2178         InputSource s;
2179         try {
2180             s = next.getInputSource(resolver);
2181         } catch (IOException e) {
2182             String msg =
2183                     "unable to open the external entity from :" + next.systemId;
2184             if (next.publicId != null)
2185                 msg += " (public id:" + next.publicId + ")";
2186 
2187             SAXParseException spe = new SAXParseException(msg,
2188                     getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
2189             dtdHandler.fatalError(spe);
2190             throw e;
2191         }
2192 
2193         r.init(s, next.name, in, next.isPE);
2194         in = r;
2195         return true;
2196     }
2197 
getPublicId()2198     public String getPublicId() {
2199 
2200         return (in == null) ? null : in.getPublicId();
2201     }
2202 
getSystemId()2203     public String getSystemId() {
2204 
2205         return (in == null) ? null : in.getSystemId();
2206     }
2207 
getLineNumber()2208     public int getLineNumber() {
2209 
2210         return (in == null) ? -1 : in.getLineNumber();
2211     }
2212 
getColumnNumber()2213     public int getColumnNumber() {
2214 
2215         return (in == null) ? -1 : in.getColumnNumber();
2216     }
2217 
2218     // error handling convenience routines
2219 
warning(String messageId, Object parameters [])2220     private void warning(String messageId, Object parameters [])
2221             throws SAXException {
2222 
2223         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2224                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2225 
2226         dtdHandler.warning(e);
2227     }
2228 
error(String messageId, Object parameters [])2229     void error(String messageId, Object parameters [])
2230             throws SAXException {
2231 
2232         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2233                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2234 
2235         dtdHandler.error(e);
2236     }
2237 
fatal(String messageId)2238     private void fatal(String messageId) throws SAXException {
2239 
2240         fatal(messageId, null);
2241     }
2242 
fatal(String messageId, Object parameters [])2243     private void fatal(String messageId, Object parameters [])
2244             throws SAXException {
2245 
2246         SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2247                 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2248 
2249         dtdHandler.fatalError(e);
2250 
2251         throw e;
2252     }
2253 
2254     //
2255     // Map char arrays to strings ... cuts down both on memory and
2256     // CPU usage for element/attribute/other names that are reused.
2257     //
2258     // Documents typically repeat names a lot, so we more or less
2259     // intern all the strings within the document; since some strings
2260     // are repeated in multiple documents (e.g. stylesheets) we go
2261     // a bit further, and intern globally.
2262     //
2263     static class NameCache {
2264         //
2265         // Unless we auto-grow this, the default size should be a
2266         // reasonable bit larger than needed for most XML files
2267         // we've yet seen (and be prime).  If it's too small, the
2268         // penalty is just excess cache collisions.
2269         //
2270         NameCacheEntry hashtable [] = new NameCacheEntry[541];
2271 
2272         //
2273         // Usually we just want to get the 'symbol' for these chars
2274         //
lookup(char value [], int len)2275         String lookup(char value [], int len) {
2276 
2277             return lookupEntry(value, len).name;
2278         }
2279 
2280         //
2281         // Sometimes we need to scan the chars in the resulting
2282         // string, so there's an accessor which exposes them.
2283         // (Mostly for element end tags.)
2284         //
lookupEntry(char value [], int len)2285         NameCacheEntry lookupEntry(char value [], int len) {
2286 
2287             int index = 0;
2288             NameCacheEntry entry;
2289 
2290             // hashing to get index
2291             for (int i = 0; i < len; i++)
2292                 index = index * 31 + value[i];
2293             index &= 0x7fffffff;
2294             index %= hashtable.length;
2295 
2296             // return entry if one's there ...
2297             for (entry = hashtable[index];
2298                  entry != null;
2299                  entry = entry.next) {
2300                 if (entry.matches(value, len))
2301                     return entry;
2302             }
2303 
2304             // else create new one
2305             entry = new NameCacheEntry();
2306             entry.chars = new char[len];
2307             System.arraycopy(value, 0, entry.chars, 0, len);
2308             entry.name = new String(entry.chars);
2309             //
2310             // NOTE:  JDK 1.1 has a fixed size string intern table,
2311             // with non-GC'd entries.  It can panic here; that's a
2312             // JDK problem, use 1.2 or later with many identifiers.
2313             //
2314             entry.name = entry.name.intern();        // "global" intern
2315             entry.next = hashtable[index];
2316             hashtable[index] = entry;
2317             return entry;
2318         }
2319     }
2320 
2321     static class NameCacheEntry {
2322 
2323         String name;
2324         char chars [];
2325         NameCacheEntry next;
2326 
matches(char value [], int len)2327         boolean matches(char value [], int len) {
2328 
2329             if (chars.length != len)
2330                 return false;
2331             for (int i = 0; i < len; i++)
2332                 if (value[i] != chars[i])
2333                     return false;
2334             return true;
2335         }
2336     }
2337 
2338     //
2339     // Message catalog for diagnostics.
2340     //
2341     static final Catalog messages = new Catalog();
2342 
2343     static final class Catalog extends MessageCatalog {
2344 
Catalog()2345         Catalog() {
2346             super(DTDParser.class);
2347         }
2348     }
2349 
2350 }
2351