1 /*
2  * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package jdk.internal.util.xml.impl;
27 
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.io.InputStreamReader;
31 import java.io.Reader;
32 import java.io.UnsupportedEncodingException;
33 import java.util.HashMap;
34 import java.util.Map;
35 import jdk.internal.org.xml.sax.InputSource;
36 import jdk.internal.org.xml.sax.SAXException;
37 
38 /**
39  * XML non-validating parser engine.
40  */
41 public abstract class Parser {
42 
43     public final static String FAULT = "";
44     protected final static int BUFFSIZE_READER = 512;
45     protected final static int BUFFSIZE_PARSER = 128;
46     /**
47      * The end of stream character.
48      */
49     public final static char EOS = 0xffff;
50     private Pair mNoNS; // there is no namespace
51     private Pair mXml;  // the xml namespace
52     private Map<String, Input> mEnt;  // the entities look up table
53     private Map<String, Input> mPEnt; // the parmeter entities look up table
54     protected boolean mIsSAlone;     // xml decl standalone flag
55     protected boolean mIsSAloneSet;  // standalone is explicitely set
56     protected boolean mIsNSAware;    // if true - namespace aware mode
57     protected int mPh;  // current phase of document processing
58     protected final static int PH_BEFORE_DOC = -1;  // before parsing
59     protected final static int PH_DOC_START = 0;   // document start
60     protected final static int PH_MISC_DTD = 1;   // misc before DTD
61     protected final static int PH_DTD = 2;   // DTD
62     protected final static int PH_DTD_MISC = 3;   // misc after DTD
63     protected final static int PH_DOCELM = 4;   // document's element
64     protected final static int PH_DOCELM_MISC = 5;   // misc after element
65     protected final static int PH_AFTER_DOC = 6;   // after parsing
66     protected int mEvt;  // current event type
67     protected final static int EV_NULL = 0;   // unknown
68     protected final static int EV_ELM = 1;   // empty element
69     protected final static int EV_ELMS = 2;   // start element
70     protected final static int EV_ELME = 3;   // end element
71     protected final static int EV_TEXT = 4;   // textual content
72     protected final static int EV_WSPC = 5;   // white space content
73     protected final static int EV_PI = 6;   // processing instruction
74     protected final static int EV_CDAT = 7;   // character data
75     protected final static int EV_COMM = 8;   // comment
76     protected final static int EV_DTD = 9;   // document type definition
77     protected final static int EV_ENT = 10;  // skipped entity
78     private char mESt; // built-in entity recognizer state
79     // mESt values:
80     //   0x100   : the initial state
81     //   > 0x100 : unrecognized name
82     //   < 0x100 : replacement character
83     protected char[] mBuff;       // parser buffer
84     protected int mBuffIdx;    // index of the last char
85     protected Pair mPref;       // stack of prefixes
86     protected Pair mElm;        // stack of elements
87     // mAttL.chars - element qname
88     // mAttL.next  - next element
89     // mAttL.list  - list of attributes defined on this element
90     // mAttL.list.chars - attribute qname
91     // mAttL.list.id    - a char representing attribute's type see below
92     // mAttL.list.next  - next attribute defined on the element
93     // mAttL.list.list  - devault value structure or null
94     // mAttL.list.list.chars - "name='value' " chars array for Input
95     //
96     // Attribute type character values:
97     // 'i' - "ID"
98     // 'r' - "IDREF"
99     // 'R' - "IDREFS"
100     // 'n' - "ENTITY"
101     // 'N' - "ENTITIES"
102     // 't' - "NMTOKEN"
103     // 'T' - "NMTOKENS"
104     // 'u' - enumeration type
105     // 'o' - "NOTATION"
106     // 'c' - "CDATA"
107     // see also: bkeyword() and atype()
108     //
109     protected Pair mAttL;       // list of defined attrs by element name
110     protected Input mDoc;        // document entity
111     protected Input mInp;        // stack of entities
112     private char[] mChars;      // reading buffer
113     private int mChLen;      // current capacity
114     private int mChIdx;      // index to the next char
115     protected Attrs mAttrs;      // attributes of the curr. element
116     private String[] mItems;      // attributes array of the curr. element
117     private char mAttrIdx;    // attributes counter/index
118     private String mUnent;  // unresolved entity name
119     private Pair mDltd;   // deleted objects for reuse
120     /**
121      * Default prefixes
122      */
123     private final static char NONS[];
124     private final static char XML[];
125     private final static char XMLNS[];
126 
127     static {
128         NONS = new char[1];
129         NONS[0] = (char) 0;
130 
131         XML = new char[4];
132         XML[0] = (char) 4;
133         XML[1] = 'x';
134         XML[2] = 'm';
135         XML[3] = 'l';
136 
137         XMLNS = new char[6];
138         XMLNS[0] = (char) 6;
139         XMLNS[1] = 'x';
140         XMLNS[2] = 'm';
141         XMLNS[3] = 'l';
142         XMLNS[4] = 'n';
143         XMLNS[5] = 's';
144     }
145     /**
146      * ASCII character type array.
147      *
148      * This array maps an ASCII (7 bit) character to the character type.<br />
149      * Possible character type values are:<br /> - ' ' for any kind of white
150      * space character;<br /> - 'a' for any lower case alphabetical character
151      * value;<br /> - 'A' for any upper case alphabetical character value;<br />
152      * - 'd' for any decimal digit character value;<br /> - 'z' for any
153      * character less then ' ' except '\t', '\n', '\r';<br /> An ASCII (7 bit)
154      * character which does not fall in any category listed above is mapped to
155      * it self.
156      */
157     private static final byte asctyp[];
158     /**
159      * NMTOKEN character type array.
160      *
161      * This array maps an ASCII (7 bit) character to the character type.<br />
162      * Possible character type values are:<br /> - 0 for underscore ('_') or any
163      * lower and upper case alphabetical character value;<br /> - 1 for colon
164      * (':') character;<br /> - 2 for dash ('-') and dot ('.') or any decimal
165      * digit character value;<br /> - 3 for any kind of white space character<br
166      * /> An ASCII (7 bit) character which does not fall in any category listed
167      * above is mapped to 0xff.
168      */
169     private static final byte nmttyp[];
170 
171     /**
172      * Static constructor.
173      *
174      * Sets up the ASCII character type array which is used by
175      * {@link #asctyp asctyp} method and NMTOKEN character type array.
176      */
177     static {
178         short i = 0;
179 
180         asctyp = new byte[0x80];
181         while (i < ' ') {
182             asctyp[i++] = (byte) 'z';
183         }
184         asctyp['\t'] = (byte) ' ';
185         asctyp['\r'] = (byte) ' ';
186         asctyp['\n'] = (byte) ' ';
187         while (i < '0') {
188             asctyp[i] = (byte) i++;
189         }
190         while (i <= '9') {
191             asctyp[i++] = (byte) 'd';
192         }
193         while (i < 'A') {
194             asctyp[i] = (byte) i++;
195         }
196         while (i <= 'Z') {
197             asctyp[i++] = (byte) 'A';
198         }
199         while (i < 'a') {
200             asctyp[i] = (byte) i++;
201         }
202         while (i <= 'z') {
203             asctyp[i++] = (byte) 'a';
204         }
205         while (i < 0x80) {
206             asctyp[i] = (byte) i++;
207         }
208 
209         nmttyp = new byte[0x80];
210         for (i = 0; i < '0'; i++) {
211             nmttyp[i] = (byte) 0xff;
212         }
213         while (i <= '9') {
214             nmttyp[i++] = (byte) 2;  // digits
215         }
216         while (i < 'A') {
217             nmttyp[i++] = (byte) 0xff;
218         }
219         // skiped upper case alphabetical character are already 0
220         for (i = '['; i < 'a'; i++) {
221             nmttyp[i] = (byte) 0xff;
222         }
223         // skiped lower case alphabetical character are already 0
224         for (i = '{'; i < 0x80; i++) {
225             nmttyp[i] = (byte) 0xff;
226         }
227         nmttyp['_'] = 0;
228         nmttyp[':'] = 1;
229         nmttyp['.'] = 2;
230         nmttyp['-'] = 2;
231         nmttyp[' '] = 3;
232         nmttyp['\t'] = 3;
233         nmttyp['\r'] = 3;
234         nmttyp['\n'] = 3;
235     }
236 
237     /**
238      * Constructor.
239      */
Parser()240     protected Parser() {
241         mPh = PH_BEFORE_DOC;  // before parsing
242 
243         //              Initialize the parser
244         mBuff = new char[BUFFSIZE_PARSER];
245         mAttrs = new Attrs();
246 
247         //              Default namespace
248         mPref = pair(mPref);
249         mPref.name = "";
250         mPref.value = "";
251         mPref.chars = NONS;
252         mNoNS = mPref;  // no namespace
253         //              XML namespace
254         mPref = pair(mPref);
255         mPref.name = "xml";
256         mPref.value = "http://www.w3.org/XML/1998/namespace";
257         mPref.chars = XML;
258         mXml = mPref;  // XML namespace
259     }
260 
261     /**
262      * Initializes parser's internals. Note, current input has to be set before
263      * this method is called.
264      */
init()265     protected void init() {
266         mUnent = null;
267         mElm = null;
268         mPref = mXml;
269         mAttL = null;
270         mPEnt = new HashMap<>();
271         mEnt = new HashMap<>();
272         mDoc = mInp;          // current input is document entity
273         mChars = mInp.chars;    // use document entity buffer
274         mPh = PH_DOC_START;  // the begining of the document
275     }
276 
277     /**
278      * Cleans up parser internal resources.
279      */
cleanup()280     protected void cleanup() {
281         //              Default attributes
282         while (mAttL != null) {
283             while (mAttL.list != null) {
284                 if (mAttL.list.list != null) {
285                     del(mAttL.list.list);
286                 }
287                 mAttL.list = del(mAttL.list);
288             }
289             mAttL = del(mAttL);
290         }
291         //              Element stack
292         while (mElm != null) {
293             mElm = del(mElm);
294         }
295         //              Namespace prefixes
296         while (mPref != mXml) {
297             mPref = del(mPref);
298         }
299         //              Inputs
300         while (mInp != null) {
301             pop();
302         }
303         //              Document reader
304         if ((mDoc != null) && (mDoc.src != null)) {
305             try {
306                 mDoc.src.close();
307             } catch (IOException ioe) {
308             }
309         }
310         mPEnt = null;
311         mEnt = null;
312         mDoc = null;
313         mPh = PH_AFTER_DOC;  // before documnet processing
314     }
315 
316     /**
317      * Processes a portion of document. This method returns one of EV_*
318      * constants as an identifier of the portion of document have been read.
319      *
320      * @return Identifier of processed document portion.
321      * @exception Exception is parser specific exception form panic method.
322      * @exception IOException
323      */
324     @SuppressWarnings("fallthrough")
step()325     protected int step() throws Exception {
326         mEvt = EV_NULL;
327         int st = 0;
328         while (mEvt == EV_NULL) {
329             char ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
330             switch (st) {
331                 case 0:     // all sorts of markup (dispetcher)
332                     if (ch != '<') {
333                         bkch();
334                         mBuffIdx = -1;  // clean parser buffer
335                         st = 1;
336                         break;
337                     }
338                     switch (getch()) {
339                         case '/':  // the end of the element content
340                             mEvt = EV_ELME;
341                             if (mElm == null) {
342                                 panic(FAULT);
343                             }
344                             //          Check element's open/close tags balance
345                             mBuffIdx = -1;  // clean parser buffer
346                             bname(mIsNSAware);
347                             char[] chars = mElm.chars;
348                             if (chars.length == (mBuffIdx + 1)) {
349                                 for (char i = 1; i <= mBuffIdx; i += 1) {
350                                     if (chars[i] != mBuff[i]) {
351                                         panic(FAULT);
352                                     }
353                                 }
354                             } else {
355                                 panic(FAULT);
356                             }
357                             //          Skip white spaces before '>'
358                             if (wsskip() != '>') {
359                                 panic(FAULT);
360                             }
361                             getch();  // read '>'
362                             break;
363 
364                         case '!':  // a comment or a CDATA
365                             ch = getch();
366                             bkch();
367                             switch (ch) {
368                                 case '-':  // must be a comment
369                                     mEvt = EV_COMM;
370                                     comm();
371                                     break;
372 
373                                 case '[':  // must be a CDATA section
374                                     mEvt = EV_CDAT;
375                                     cdat();
376                                     break;
377 
378                                 default:   // must be 'DOCTYPE'
379                                     mEvt = EV_DTD;
380                                     dtd();
381                                     break;
382                             }
383                             break;
384 
385                         case '?':  // processing instruction
386                             mEvt = EV_PI;
387                             pi();
388                             break;
389 
390                         default:  // must be the first char of an xml name
391                             bkch();
392                             //          Read an element name and put it on top of the
393                             //          element stack
394                             mElm = pair(mElm);  // add new element to the stack
395                             mElm.chars = qname(mIsNSAware);
396                             mElm.name = mElm.local();
397                             mElm.id = (mElm.next != null) ? mElm.next.id : 0;  // flags
398                             mElm.num = 0;     // namespace counter
399                             //          Find the list of defined attributs of the current
400                             //          element
401                             Pair elm = find(mAttL, mElm.chars);
402                             mElm.list = (elm != null) ? elm.list : null;
403                             //          Read attributes till the end of the element tag
404                             mAttrIdx = 0;
405                             Pair att = pair(null);
406                             att.num = 0;  // clear attribute's flags
407                             attr(att);     // get all attributes inc. defaults
408                             del(att);
409                             mElm.value = (mIsNSAware) ? rslv(mElm.chars) : null;
410                             //          Skip white spaces before '>'
411                             switch (wsskip()) {
412                                 case '>':
413                                     getch();  // read '>'
414                                     mEvt = EV_ELMS;
415                                     break;
416 
417                                 case '/':
418                                     getch();  // read '/'
419                                     if (getch() != '>') // read '>'
420                                     {
421                                         panic(FAULT);
422                                     }
423                                     mEvt = EV_ELM;
424                                     break;
425 
426                                 default:
427                                     panic(FAULT);
428                             }
429                             break;
430                     }
431                     break;
432 
433                 case 1:     // read white space
434                     switch (ch) {
435                         case ' ':
436                         case '\t':
437                         case '\n':
438                             bappend(ch);
439                             break;
440 
441                         case '\r':              // EOL processing [#2.11]
442                             if (getch() != '\n') {
443                                 bkch();
444                             }
445                             bappend('\n');
446                             break;
447 
448                         case '<':
449                             mEvt = EV_WSPC;
450                             bkch();
451                             bflash_ws();
452                             break;
453 
454                         default:
455                             bkch();
456                             st = 2;
457                             break;
458                     }
459                     break;
460 
461                 case 2:     // read the text content of the element
462                     switch (ch) {
463                         case '&':
464                             if (mUnent == null) {
465                                 //              There was no unresolved entity on previous step.
466                                 if ((mUnent = ent('x')) != null) {
467                                     mEvt = EV_TEXT;
468                                     bkch();      // move back to ';' after entity name
469                                     setch('&');  // parser must be back on next step
470                                     bflash();
471                                 }
472                             } else {
473                                 //              There was unresolved entity on previous step.
474                                 mEvt = EV_ENT;
475                                 skippedEnt(mUnent);
476                                 mUnent = null;
477                             }
478                             break;
479 
480                         case '<':
481                             mEvt = EV_TEXT;
482                             bkch();
483                             bflash();
484                             break;
485 
486                         case '\r':  // EOL processing [#2.11]
487                             if (getch() != '\n') {
488                                 bkch();
489                             }
490                             bappend('\n');
491                             break;
492 
493                         case EOS:
494                             panic(FAULT);
495 
496                         default:
497                             bappend(ch);
498                             break;
499                     }
500                     break;
501 
502                 default:
503                     panic(FAULT);
504             }
505         }
506 
507         return mEvt;
508     }
509 
510     /**
511      * Parses the document type declaration.
512      *
513      * @exception Exception is parser specific exception form panic method.
514      * @exception IOException
515      */
dtd()516     private void dtd() throws Exception {
517         char ch;
518         String str = null;
519         String name = null;
520         Pair psid = null;
521         // read 'DOCTYPE'
522         if ("DOCTYPE".equals(name(false)) != true) {
523             panic(FAULT);
524         }
525         mPh = PH_DTD;  // DTD
526         for (short st = 0; st >= 0;) {
527             ch = getch();
528             switch (st) {
529                 case 0:     // read the document type name
530                     if (chtyp(ch) != ' ') {
531                         bkch();
532                         name = name(mIsNSAware);
533                         wsskip();
534                         st = 1;  // read 'PUPLIC' or 'SYSTEM'
535                     }
536                     break;
537 
538                 case 1:     // read 'PUPLIC' or 'SYSTEM'
539                     switch (chtyp(ch)) {
540                         case 'A':
541                             bkch();
542                             psid = pubsys(' ');
543                             st = 2;  // skip spaces before internal subset
544                             docType(name, psid.name, psid.value);
545                             break;
546 
547                         case '[':
548                             bkch();
549                             st = 2;    // skip spaces before internal subset
550                             docType(name, null, null);
551                             break;
552 
553                         case '>':
554                             bkch();
555                             st = 3;    // skip spaces after internal subset
556                             docType(name, null, null);
557                             break;
558 
559                         default:
560                             panic(FAULT);
561                     }
562                     break;
563 
564                 case 2:     // skip spaces before internal subset
565                     switch (chtyp(ch)) {
566                         case '[':
567                             //          Process internal subset
568                             dtdsub();
569                             st = 3;  // skip spaces after internal subset
570                             break;
571 
572                         case '>':
573                             //          There is no internal subset
574                             bkch();
575                             st = 3;  // skip spaces after internal subset
576                             break;
577 
578                         case ' ':
579                             // skip white spaces
580                             break;
581 
582                         default:
583                             panic(FAULT);
584                     }
585                     break;
586 
587                 case 3:     // skip spaces after internal subset
588                     switch (chtyp(ch)) {
589                         case '>':
590                             if (psid != null) {
591                                 //              Report the DTD external subset
592                                 InputSource is = resolveEnt(name, psid.name, psid.value);
593                                 if (is != null) {
594                                     if (mIsSAlone == false) {
595                                         //              Set the end of DTD external subset char
596                                         bkch();
597                                         setch(']');
598                                         //              Set the DTD external subset InputSource
599                                         push(new Input(BUFFSIZE_READER));
600                                         setinp(is);
601                                         mInp.pubid = psid.name;
602                                         mInp.sysid = psid.value;
603                                         //              Parse the DTD external subset
604                                         dtdsub();
605                                     } else {
606                                         //              Unresolved DTD external subset
607                                         skippedEnt("[dtd]");
608                                         //              Release reader and stream
609                                         if (is.getCharacterStream() != null) {
610                                             try {
611                                                 is.getCharacterStream().close();
612                                             } catch (IOException ioe) {
613                                             }
614                                         }
615                                         if (is.getByteStream() != null) {
616                                             try {
617                                                 is.getByteStream().close();
618                                             } catch (IOException ioe) {
619                                             }
620                                         }
621                                     }
622                                 } else {
623                                     //          Unresolved DTD external subset
624                                     skippedEnt("[dtd]");
625                                 }
626                                 del(psid);
627                             }
628                             st = -1;  // end of DTD
629                             break;
630 
631                         case ' ':
632                             // skip white spaces
633                             break;
634 
635                         default:
636                             panic(FAULT);
637                     }
638                     break;
639 
640                 default:
641                     panic(FAULT);
642             }
643         }
644     }
645 
646     /**
647      * Parses the document type declaration subset.
648      *
649      * @exception Exception is parser specific exception form panic method.
650      * @exception IOException
651      */
dtdsub()652     private void dtdsub() throws Exception {
653         char ch;
654         for (short st = 0; st >= 0;) {
655             ch = getch();
656             switch (st) {
657                 case 0:     // skip white spaces before a declaration
658                     switch (chtyp(ch)) {
659                         case '<':
660                             ch = getch();
661                             switch (ch) {
662                                 case '?':
663                                     pi();
664                                     break;
665 
666                                 case '!':
667                                     ch = getch();
668                                     bkch();
669                                     if (ch == '-') {
670                                         comm();
671                                         break;
672                                     }
673                                     //          A markup or an entity declaration
674                                     bntok();
675                                     switch (bkeyword()) {
676                                         case 'n':
677                                             dtdent();
678                                             break;
679 
680                                         case 'a':
681                                             dtdattl();    // parse attributes declaration
682                                             break;
683 
684                                         case 'e':
685                                             dtdelm();     // parse element declaration
686                                             break;
687 
688                                         case 'o':
689                                             dtdnot();     // parse notation declaration
690                                             break;
691 
692                                         default:
693                                             panic(FAULT); // unsupported markup declaration
694                                             break;
695                                     }
696                                     st = 1;  // read the end of declaration
697                                     break;
698 
699                                 default:
700                                     panic(FAULT);
701                                     break;
702                             }
703                             break;
704 
705                         case '%':
706                             //          A parameter entity reference
707                             pent(' ');
708                             break;
709 
710                         case ']':
711                             //          End of DTD subset
712                             st = -1;
713                             break;
714 
715                         case ' ':
716                             //          Skip white spaces
717                             break;
718 
719                         case 'Z':
720                             //          End of stream
721                             if (getch() != ']') {
722                                 panic(FAULT);
723                             }
724                             st = -1;
725                             break;
726 
727                         default:
728                             panic(FAULT);
729                     }
730                     break;
731 
732                 case 1:     // read the end of declaration
733                     switch (ch) {
734                         case '>':   // there is no notation
735                             st = 0; // skip white spaces before a declaration
736                             break;
737 
738                         case ' ':
739                         case '\n':
740                         case '\r':
741                         case '\t':
742                             //          Skip white spaces
743                             break;
744 
745                         default:
746                             panic(FAULT);
747                             break;
748                     }
749                     break;
750 
751                 default:
752                     panic(FAULT);
753             }
754         }
755     }
756 
757     /**
758      * Parses an entity declaration. This method fills the general (
759      * <code>mEnt</code>) and parameter
760      * (
761      * <code>mPEnt</code>) entity look up table.
762      *
763      * @exception Exception is parser specific exception form panic method.
764      * @exception IOException
765      */
766     @SuppressWarnings("fallthrough")
dtdent()767     private void dtdent() throws Exception {
768         String str = null;
769         char[] val = null;
770         Input inp = null;
771         Pair ids = null;
772         char ch;
773         for (short st = 0; st >= 0;) {
774             ch = getch();
775             switch (st) {
776                 case 0:     // skip white spaces before entity name
777                     switch (chtyp(ch)) {
778                         case ' ':
779                             //          Skip white spaces
780                             break;
781 
782                         case '%':
783                             //          Parameter entity or parameter entity declaration.
784                             ch = getch();
785                             bkch();
786                             if (chtyp(ch) == ' ') {
787                                 //              Parameter entity declaration.
788                                 wsskip();
789                                 str = name(false);
790                                 switch (chtyp(wsskip())) {
791                                     case 'A':
792                                         //              Read the external identifier
793                                         ids = pubsys(' ');
794                                         if (wsskip() == '>') {
795                                             //          External parsed entity
796                                             if (mPEnt.containsKey(str) == false) {      // [#4.2]
797                                                 inp = new Input();
798                                                 inp.pubid = ids.name;
799                                                 inp.sysid = ids.value;
800                                                 mPEnt.put(str, inp);
801                                             }
802                                         } else {
803                                             panic(FAULT);
804                                         }
805                                         del(ids);
806                                         st = -1;  // the end of declaration
807                                         break;
808 
809                                     case '\"':
810                                     case '\'':
811                                         //              Read the parameter entity value
812                                         bqstr('d');
813                                         //              Create the parameter entity value
814                                         val = new char[mBuffIdx + 1];
815                                         System.arraycopy(mBuff, 1, val, 1, val.length - 1);
816                                         //              Add surrounding spaces [#4.4.8]
817                                         val[0] = ' ';
818                                         //              Add the entity to the entity look up table
819                                         if (mPEnt.containsKey(str) == false) {  // [#4.2]
820                                             inp = new Input(val);
821                                             inp.pubid = mInp.pubid;
822                                             inp.sysid = mInp.sysid;
823                                             inp.xmlenc = mInp.xmlenc;
824                                             inp.xmlver = mInp.xmlver;
825                                             mPEnt.put(str, inp);
826                                         }
827                                         st = -1;  // the end of declaration
828                                         break;
829 
830                                     default:
831                                         panic(FAULT);
832                                         break;
833                                 }
834                             } else {
835                                 //              Parameter entity reference.
836                                 pent(' ');
837                             }
838                             break;
839 
840                         default:
841                             bkch();
842                             str = name(false);
843                             st = 1;  // read entity declaration value
844                             break;
845                     }
846                     break;
847 
848                 case 1:     // read entity declaration value
849                     switch (chtyp(ch)) {
850                         case '\"':  // internal entity
851                         case '\'':
852                             bkch();
853                             bqstr('d');  // read a string into the buffer
854                             if (mEnt.get(str) == null) {
855                                 //              Create general entity value
856                                 val = new char[mBuffIdx];
857                                 System.arraycopy(mBuff, 1, val, 0, val.length);
858                                 //              Add the entity to the entity look up table
859                                 if (mEnt.containsKey(str) == false) {   // [#4.2]
860                                     inp = new Input(val);
861                                     inp.pubid = mInp.pubid;
862                                     inp.sysid = mInp.sysid;
863                                     inp.xmlenc = mInp.xmlenc;
864                                     inp.xmlver = mInp.xmlver;
865                                     mEnt.put(str, inp);
866                                 }
867                             }
868                             st = -1;  // the end of declaration
869                             break;
870 
871                         case 'A':  // external entity
872                             bkch();
873                             ids = pubsys(' ');
874                             switch (wsskip()) {
875                                 case '>':  // external parsed entity
876                                     if (mEnt.containsKey(str) == false) {  // [#4.2]
877                                         inp = new Input();
878                                         inp.pubid = ids.name;
879                                         inp.sysid = ids.value;
880                                         mEnt.put(str, inp);
881                                     }
882                                     break;
883 
884                                 case 'N':  // external general unparsed entity
885                                     if ("NDATA".equals(name(false)) == true) {
886                                         wsskip();
887                                         unparsedEntDecl(str, ids.name, ids.value, name(false));
888                                         break;
889                                     }
890                                 default:
891                                     panic(FAULT);
892                                     break;
893                             }
894                             del(ids);
895                             st = -1;  // the end of declaration
896                             break;
897 
898                         case ' ':
899                             //          Skip white spaces
900                             break;
901 
902                         default:
903                             panic(FAULT);
904                             break;
905                     }
906                     break;
907 
908                 default:
909                     panic(FAULT);
910             }
911         }
912     }
913 
914     /**
915      * Parses an element declaration.
916      *
917      * This method parses the declaration up to the closing angle bracket.
918      *
919      * @exception Exception is parser specific exception form panic method.
920      * @exception IOException
921      */
922     @SuppressWarnings("fallthrough")
dtdelm()923     private void dtdelm() throws Exception {
924         //              This is stub implementation which skips an element
925         //              declaration.
926         wsskip();
927         name(mIsNSAware);
928 
929         char ch;
930         while (true) {
931             ch = getch();
932             switch (ch) {
933                 case '>':
934                     bkch();
935                     return;
936 
937                 case EOS:
938                     panic(FAULT);
939 
940                 default:
941                     break;
942             }
943         }
944     }
945 
946     /**
947      * Parses an attribute list declaration.
948      *
949      * This method parses the declaration up to the closing angle bracket.
950      *
951      * @exception Exception is parser specific exception form panic method.
952      * @exception IOException
953      */
dtdattl()954     private void dtdattl() throws Exception {
955         char elmqn[] = null;
956         Pair elm = null;
957         char ch;
958         for (short st = 0; st >= 0;) {
959             ch = getch();
960             switch (st) {
961                 case 0:     // read the element name
962                     switch (chtyp(ch)) {
963                         case 'a':
964                         case 'A':
965                         case '_':
966                         case 'X':
967                         case ':':
968                             bkch();
969                             //          Get the element from the list or add a new one.
970                             elmqn = qname(mIsNSAware);
971                             elm = find(mAttL, elmqn);
972                             if (elm == null) {
973                                 elm = pair(mAttL);
974                                 elm.chars = elmqn;
975                                 mAttL = elm;
976                             }
977                             st = 1;  // read an attribute declaration
978                             break;
979 
980                         case ' ':
981                             break;
982 
983                         case '%':
984                             pent(' ');
985                             break;
986 
987                         default:
988                             panic(FAULT);
989                             break;
990                     }
991                     break;
992 
993                 case 1:     // read an attribute declaration
994                     switch (chtyp(ch)) {
995                         case 'a':
996                         case 'A':
997                         case '_':
998                         case 'X':
999                         case ':':
1000                             bkch();
1001                             dtdatt(elm);
1002                             if (wsskip() == '>') {
1003                                 return;
1004                             }
1005                             break;
1006 
1007                         case ' ':
1008                             break;
1009 
1010                         case '%':
1011                             pent(' ');
1012                             break;
1013 
1014                         default:
1015                             panic(FAULT);
1016                             break;
1017                     }
1018                     break;
1019 
1020                 default:
1021                     panic(FAULT);
1022                     break;
1023             }
1024         }
1025     }
1026 
1027     /**
1028      * Parses an attribute declaration.
1029      *
1030      * The attribute uses the following fields of Pair object: chars - characters
1031      * of qualified name id - the type identifier of the attribute list - a pair
1032      * which holds the default value (chars field)
1033      *
1034      * @param elm An object which represents all defined attributes on an
1035      * element.
1036      * @exception Exception is parser specific exception form panic method.
1037      * @exception IOException
1038      */
1039     @SuppressWarnings("fallthrough")
dtdatt(Pair elm)1040     private void dtdatt(Pair elm) throws Exception {
1041         char attqn[] = null;
1042         Pair att = null;
1043         char ch;
1044         for (short st = 0; st >= 0;) {
1045             ch = getch();
1046             switch (st) {
1047                 case 0:     // the attribute name
1048                     switch (chtyp(ch)) {
1049                         case 'a':
1050                         case 'A':
1051                         case '_':
1052                         case 'X':
1053                         case ':':
1054                             bkch();
1055                             //          Get the attribute from the list or add a new one.
1056                             attqn = qname(mIsNSAware);
1057                             att = find(elm.list, attqn);
1058                             if (att == null) {
1059                                 //              New attribute declaration
1060                                 att = pair(elm.list);
1061                                 att.chars = attqn;
1062                                 elm.list = att;
1063                             } else {
1064                                 //              Do not override the attribute declaration [#3.3]
1065                                 att = pair(null);
1066                                 att.chars = attqn;
1067                                 att.id = 'c';
1068                             }
1069                             wsskip();
1070                             st = 1;
1071                             break;
1072 
1073                         case '%':
1074                             pent(' ');
1075                             break;
1076 
1077                         case ' ':
1078                             break;
1079 
1080                         default:
1081                             panic(FAULT);
1082                             break;
1083                     }
1084                     break;
1085 
1086                 case 1:     // the attribute type
1087                     switch (chtyp(ch)) {
1088                         case '(':
1089                             att.id = 'u';  // enumeration type
1090                             st = 2;        // read the first element of the list
1091                             break;
1092 
1093                         case '%':
1094                             pent(' ');
1095                             break;
1096 
1097                         case ' ':
1098                             break;
1099 
1100                         default:
1101                             bkch();
1102                             bntok();  // read type id
1103                             att.id = bkeyword();
1104                             switch (att.id) {
1105                                 case 'o':   // NOTATION
1106                                     if (wsskip() != '(') {
1107                                         panic(FAULT);
1108                                     }
1109                                     ch = getch();
1110                                     st = 2;  // read the first element of the list
1111                                     break;
1112 
1113                                 case 'i':     // ID
1114                                 case 'r':     // IDREF
1115                                 case 'R':     // IDREFS
1116                                 case 'n':     // ENTITY
1117                                 case 'N':     // ENTITIES
1118                                 case 't':     // NMTOKEN
1119                                 case 'T':     // NMTOKENS
1120                                 case 'c':     // CDATA
1121                                     wsskip();
1122                                     st = 4;  // read default declaration
1123                                     break;
1124 
1125                                 default:
1126                                     panic(FAULT);
1127                                     break;
1128                             }
1129                             break;
1130                     }
1131                     break;
1132 
1133                 case 2:     // read the first element of the list
1134                     switch (chtyp(ch)) {
1135                         case 'a':
1136                         case 'A':
1137                         case 'd':
1138                         case '.':
1139                         case ':':
1140                         case '-':
1141                         case '_':
1142                         case 'X':
1143                             bkch();
1144                             switch (att.id) {
1145                                 case 'u':  // enumeration type
1146                                     bntok();
1147                                     break;
1148 
1149                                 case 'o':  // NOTATION
1150                                     mBuffIdx = -1;
1151                                     bname(false);
1152                                     break;
1153 
1154                                 default:
1155                                     panic(FAULT);
1156                                     break;
1157                             }
1158                             wsskip();
1159                             st = 3;  // read next element of the list
1160                             break;
1161 
1162                         case '%':
1163                             pent(' ');
1164                             break;
1165 
1166                         case ' ':
1167                             break;
1168 
1169                         default:
1170                             panic(FAULT);
1171                             break;
1172                     }
1173                     break;
1174 
1175                 case 3:     // read next element of the list
1176                     switch (ch) {
1177                         case ')':
1178                             wsskip();
1179                             st = 4;  // read default declaration
1180                             break;
1181 
1182                         case '|':
1183                             wsskip();
1184                             switch (att.id) {
1185                                 case 'u':  // enumeration type
1186                                     bntok();
1187                                     break;
1188 
1189                                 case 'o':  // NOTATION
1190                                     mBuffIdx = -1;
1191                                     bname(false);
1192                                     break;
1193 
1194                                 default:
1195                                     panic(FAULT);
1196                                     break;
1197                             }
1198                             wsskip();
1199                             break;
1200 
1201                         case '%':
1202                             pent(' ');
1203                             break;
1204 
1205                         default:
1206                             panic(FAULT);
1207                             break;
1208                     }
1209                     break;
1210 
1211                 case 4:     // read default declaration
1212                     switch (ch) {
1213                         case '#':
1214                             bntok();
1215                             switch (bkeyword()) {
1216                                 case 'F':  // FIXED
1217                                     switch (wsskip()) {
1218                                         case '\"':
1219                                         case '\'':
1220                                             st = 5;  // read the default value
1221                                             break;
1222 
1223                                         case EOS:
1224                                             panic(FAULT);
1225 
1226                                         default:
1227                                             st = -1;
1228                                             break;
1229                                     }
1230                                     break;
1231 
1232                                 case 'Q':  // REQUIRED
1233                                 case 'I':  // IMPLIED
1234                                     st = -1;
1235                                     break;
1236 
1237                                 default:
1238                                     panic(FAULT);
1239                                     break;
1240                             }
1241                             break;
1242 
1243                         case '\"':
1244                         case '\'':
1245                             bkch();
1246                             st = 5;  // read the default value
1247                             break;
1248 
1249                         case ' ':
1250                         case '\n':
1251                         case '\r':
1252                         case '\t':
1253                             break;
1254 
1255                         case '%':
1256                             pent(' ');
1257                             break;
1258 
1259                         default:
1260                             bkch();
1261                             st = -1;
1262                             break;
1263                     }
1264                     break;
1265 
1266                 case 5:     // read the default value
1267                     switch (ch) {
1268                         case '\"':
1269                         case '\'':
1270                             bkch();
1271                             bqstr('d');  // the value in the mBuff now
1272                             att.list = pair(null);
1273                             //          Create a string like "attqname='value' "
1274                             att.list.chars = new char[att.chars.length + mBuffIdx + 3];
1275                             System.arraycopy(
1276                                     att.chars, 1, att.list.chars, 0, att.chars.length - 1);
1277                             att.list.chars[att.chars.length - 1] = '=';
1278                             att.list.chars[att.chars.length] = ch;
1279                             System.arraycopy(
1280                                     mBuff, 1, att.list.chars, att.chars.length + 1, mBuffIdx);
1281                             att.list.chars[att.chars.length + mBuffIdx + 1] = ch;
1282                             att.list.chars[att.chars.length + mBuffIdx + 2] = ' ';
1283                             st = -1;
1284                             break;
1285 
1286                         default:
1287                             panic(FAULT);
1288                             break;
1289                     }
1290                     break;
1291 
1292                 default:
1293                     panic(FAULT);
1294                     break;
1295             }
1296         }
1297     }
1298 
1299     /**
1300      * Parses a notation declaration.
1301      *
1302      * This method parses the declaration up to the closing angle bracket.
1303      *
1304      * @exception Exception is parser specific exception form panic method.
1305      * @exception IOException
1306      */
dtdnot()1307     private void dtdnot() throws Exception {
1308         wsskip();
1309         String name = name(false);
1310         wsskip();
1311         Pair ids = pubsys('N');
1312         notDecl(name, ids.name, ids.value);
1313         del(ids);
1314     }
1315 
1316     /**
1317      * Parses an attribute.
1318      *
1319      * This recursive method is responsible for prefix addition
1320      * (
1321      * <code>mPref</code>) on the way down. The element's start tag end triggers
1322      * the return process. The method then on it's way back resolves prefixes
1323      * and accumulates attributes.
1324      *
1325      * <p><code>att.num</code> carries attribute flags where: 0x1 - attribute is
1326      * declared in DTD (attribute decalration had been read); 0x2 - attribute's
1327      * default value is used.</p>
1328      *
1329      * @param att An object which reprecents current attribute.
1330      * @exception Exception is parser specific exception form panic method.
1331      * @exception IOException
1332      */
1333     @SuppressWarnings("fallthrough")
attr(Pair att)1334     private void attr(Pair att) throws Exception {
1335         switch (wsskip()) {
1336             case '/':
1337             case '>':
1338                 if ((att.num & 0x2) == 0) {  // all attributes have been read
1339                     att.num |= 0x2;  // set default attribute flag
1340                     Input inp = mInp;
1341                     //          Go through all attributes defined on current element.
1342                     for (Pair def = mElm.list; def != null; def = def.next) {
1343                         if (def.list == null) // no default value
1344                         {
1345                             continue;
1346                         }
1347                         //              Go through all attributes defined on current
1348                         //              element and add defaults.
1349                         Pair act = find(att.next, def.chars);
1350                         if (act == null) {
1351                             push(new Input(def.list.chars));
1352                         }
1353                     }
1354                     if (mInp != inp) {  // defaults have been added
1355                         attr(att);
1356                         return;
1357                     }
1358                 }
1359                 //              Ensure the attribute string array capacity
1360                 mAttrs.setLength(mAttrIdx);
1361                 mItems = mAttrs.mItems;
1362                 return;
1363 
1364             case EOS:
1365                 panic(FAULT);
1366 
1367             default:
1368                 //              Read the attribute name and value
1369                 att.chars = qname(mIsNSAware);
1370                 att.name = att.local();
1371                 String type = atype(att);  // sets attribute's type on att.id
1372                 wsskip();
1373                 if (getch() != '=') {
1374                     panic(FAULT);
1375                 }
1376                 bqstr((char) att.id);   // read the value with normalization.
1377                 String val = new String(mBuff, 1, mBuffIdx);
1378                 Pair next = pair(att);
1379                 next.num = (att.num & ~0x1);  // inherit attribute flags
1380                 //              Put a namespace declaration on top of the prefix stack
1381                 if ((mIsNSAware == false) || (isdecl(att, val) == false)) {
1382                     //          An ordinary attribute
1383                     mAttrIdx++;
1384                     attr(next);     // recursive call to parse the next attribute
1385                     mAttrIdx--;
1386                     //          Add the attribute to the attributes string array
1387                     char idx = (char) (mAttrIdx << 3);
1388                     mItems[idx + 1] = att.qname();  // attr qname
1389                     mItems[idx + 2] = (mIsNSAware) ? att.name : ""; // attr local name
1390                     mItems[idx + 3] = val;          // attr value
1391                     mItems[idx + 4] = type;         // attr type
1392                     switch (att.num & 0x3) {
1393                         case 0x0:
1394                             mItems[idx + 5] = null;
1395                             break;
1396 
1397                         case 0x1:  // declared attribute
1398                             mItems[idx + 5] = "d";
1399                             break;
1400 
1401                         default:  // 0x2, 0x3 - default attribute always declared
1402                             mItems[idx + 5] = "D";
1403                             break;
1404                     }
1405                     //          Resolve the prefix if any and report the attribute
1406                     //          NOTE: The attribute does not accept the default namespace.
1407                     mItems[idx + 0] = (att.chars[0] != 0) ? rslv(att.chars) : "";
1408                 } else {
1409                     //          A namespace declaration. mPref.name contains prefix and
1410                     //          mPref.value contains namespace URI set by isdecl method.
1411                     //          Report a start of the new mapping
1412                     newPrefix();
1413                     //          Recursive call to parse the next attribute
1414                     attr(next);
1415                     //          NOTE: The namespace declaration is not reported.
1416                 }
1417                 del(next);
1418                 break;
1419         }
1420     }
1421 
1422     /**
1423      * Retrieves attribute type.
1424      *
1425      * This method sets the type of normalization in the attribute
1426      * <code>id</code> field and returns the name of attribute type.
1427      *
1428      * @param att An object which represents current attribute.
1429      * @return The name of the attribute type.
1430      * @exception Exception is parser specific exception form panic method.
1431      */
atype(Pair att)1432     private String atype(Pair att)
1433             throws Exception {
1434         Pair attr;
1435 
1436         // CDATA-type normalization by default [#3.3.3]
1437         att.id = 'c';
1438         if (mElm.list == null || (attr = find(mElm.list, att.chars)) == null) {
1439             return "CDATA";
1440         }
1441 
1442         att.num |= 0x1;  // attribute is declared
1443 
1444         // Non-CDATA normalization except when the attribute type is CDATA.
1445         att.id = 'i';
1446         switch (attr.id) {
1447             case 'i':
1448                 return "ID";
1449 
1450             case 'r':
1451                 return "IDREF";
1452 
1453             case 'R':
1454                 return "IDREFS";
1455 
1456             case 'n':
1457                 return "ENTITY";
1458 
1459             case 'N':
1460                 return "ENTITIES";
1461 
1462             case 't':
1463                 return "NMTOKEN";
1464 
1465             case 'T':
1466                 return "NMTOKENS";
1467 
1468             case 'u':
1469                 return "NMTOKEN";
1470 
1471             case 'o':
1472                 return "NOTATION";
1473 
1474             case 'c':
1475                 att.id = 'c';
1476                 return "CDATA";
1477 
1478             default:
1479                 panic(FAULT);
1480         }
1481         return null;
1482     }
1483 
1484     /**
1485      * Parses a comment.
1486      *
1487      * The &apos;&lt;!&apos; part is read in dispatcher so the method starts
1488      * with first &apos;-&apos; after &apos;&lt;!&apos;.
1489      *
1490      * @exception Exception is parser specific exception form panic method.
1491      */
1492     @SuppressWarnings("fallthrough")
comm()1493     private void comm() throws Exception {
1494         if (mPh == PH_DOC_START) {
1495             mPh = PH_MISC_DTD;  // misc before DTD
1496         }               // '<!' has been already read by dispetcher.
1497         char ch;
1498         mBuffIdx = -1;
1499         for (short st = 0; st >= 0;) {
1500             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
1501             if (ch == EOS) {
1502                 panic(FAULT);
1503             }
1504             switch (st) {
1505                 case 0:     // first '-' of the comment open
1506                     if (ch == '-') {
1507                         st = 1;
1508                     } else {
1509                         panic(FAULT);
1510                     }
1511                     break;
1512 
1513                 case 1:     // secind '-' of the comment open
1514                     if (ch == '-') {
1515                         st = 2;
1516                     } else {
1517                         panic(FAULT);
1518                     }
1519                     break;
1520 
1521                 case 2:     // skip the comment body
1522                     switch (ch) {
1523                         case '-':
1524                             st = 3;
1525                             break;
1526 
1527                         default:
1528                             bappend(ch);
1529                             break;
1530                     }
1531                     break;
1532 
1533                 case 3:     // second '-' of the comment close
1534                     switch (ch) {
1535                         case '-':
1536                             st = 4;
1537                             break;
1538 
1539                         default:
1540                             bappend('-');
1541                             bappend(ch);
1542                             st = 2;
1543                             break;
1544                     }
1545                     break;
1546 
1547                 case 4:     // '>' of the comment close
1548                     if (ch == '>') {
1549                         comm(mBuff, mBuffIdx + 1);
1550                         st = -1;
1551                         break;
1552                     }
1553                 // else - panic [#2.5 compatibility note]
1554 
1555                 default:
1556                     panic(FAULT);
1557             }
1558         }
1559     }
1560 
1561     /**
1562      * Parses a processing instruction.
1563      *
1564      * The &apos;&lt;?&apos; is read in dispatcher so the method starts with
1565      * first character of PI target name after &apos;&lt;?&apos;.
1566      *
1567      * @exception Exception is parser specific exception form panic method.
1568      * @exception IOException
1569      */
pi()1570     private void pi() throws Exception {
1571         // '<?' has been already read by dispetcher.
1572         char ch;
1573         String str = null;
1574         mBuffIdx = -1;
1575         for (short st = 0; st >= 0;) {
1576             ch = getch();
1577             if (ch == EOS) {
1578                 panic(FAULT);
1579             }
1580             switch (st) {
1581                 case 0:     // read the PI target name
1582                     switch (chtyp(ch)) {
1583                         case 'a':
1584                         case 'A':
1585                         case '_':
1586                         case ':':
1587                         case 'X':
1588                             bkch();
1589                             str = name(false);
1590                             //          PI target name may not be empty string [#2.6]
1591                             //          PI target name 'XML' is reserved [#2.6]
1592                             if ((str.length() == 0)
1593                                     || (mXml.name.equals(str.toLowerCase()) == true)) {
1594                                 panic(FAULT);
1595                             }
1596                             //          This is processing instruction
1597                             if (mPh == PH_DOC_START) // the begining of the document
1598                             {
1599                                 mPh = PH_MISC_DTD;    // misc before DTD
1600                             }
1601                             wsskip();  // skip spaces after the PI target name
1602                             st = 1;    // accumulate the PI body
1603                             mBuffIdx = -1;
1604                             break;
1605 
1606                         default:
1607                             panic(FAULT);
1608                     }
1609                     break;
1610 
1611                 case 1:     // accumulate the PI body
1612                     switch (ch) {
1613                         case '?':
1614                             st = 2;  // end of the PI body
1615                             break;
1616 
1617                         default:
1618                             bappend(ch);
1619                             break;
1620                     }
1621                     break;
1622 
1623                 case 2:     // end of the PI body
1624                     switch (ch) {
1625                         case '>':
1626                             //          PI has been read.
1627                             pi(str, new String(mBuff, 0, mBuffIdx + 1));
1628                             st = -1;
1629                             break;
1630 
1631                         case '?':
1632                             bappend('?');
1633                             break;
1634 
1635                         default:
1636                             bappend('?');
1637                             bappend(ch);
1638                             st = 1;  // accumulate the PI body
1639                             break;
1640                     }
1641                     break;
1642 
1643                 default:
1644                     panic(FAULT);
1645             }
1646         }
1647     }
1648 
1649     /**
1650      * Parses a character data.
1651      *
1652      * The &apos;&lt;!&apos; part is read in dispatcher so the method starts
1653      * with first &apos;[&apos; after &apos;&lt;!&apos;.
1654      *
1655      * @exception Exception is parser specific exception form panic method.
1656      * @exception IOException
1657      */
cdat()1658     private void cdat()
1659             throws Exception {
1660         // '<!' has been already read by dispetcher.
1661         char ch;
1662         mBuffIdx = -1;
1663         for (short st = 0; st >= 0;) {
1664             ch = getch();
1665             switch (st) {
1666                 case 0:     // the first '[' of the CDATA open
1667                     if (ch == '[') {
1668                         st = 1;
1669                     } else {
1670                         panic(FAULT);
1671                     }
1672                     break;
1673 
1674                 case 1:     // read "CDATA"
1675                     if (chtyp(ch) == 'A') {
1676                         bappend(ch);
1677                     } else {
1678                         if ("CDATA".equals(
1679                                 new String(mBuff, 0, mBuffIdx + 1)) != true) {
1680                             panic(FAULT);
1681                         }
1682                         bkch();
1683                         st = 2;
1684                     }
1685                     break;
1686 
1687                 case 2:     // the second '[' of the CDATA open
1688                     if (ch != '[') {
1689                         panic(FAULT);
1690                     }
1691                     mBuffIdx = -1;
1692                     st = 3;
1693                     break;
1694 
1695                 case 3:     // read data before the first ']'
1696                     if (ch != ']') {
1697                         bappend(ch);
1698                     } else {
1699                         st = 4;
1700                     }
1701                     break;
1702 
1703                 case 4:     // read the second ']' or continue to read the data
1704                     if (ch != ']') {
1705                         bappend(']');
1706                         bappend(ch);
1707                         st = 3;
1708                     } else {
1709                         st = 5;
1710                     }
1711                     break;
1712 
1713                 case 5:     // read '>' or continue to read the data
1714                     switch (ch) {
1715                         case ']':
1716                             bappend(']');
1717                             break;
1718 
1719                         case '>':
1720                             bflash();
1721                             st = -1;
1722                             break;
1723 
1724                         default:
1725                             bappend(']');
1726                             bappend(']');
1727                             bappend(ch);
1728                             st = 3;
1729                             break;
1730                     }
1731                     break;
1732 
1733                 default:
1734                     panic(FAULT);
1735             }
1736         }
1737     }
1738 
1739     /**
1740      * Reads a xml name.
1741      *
1742      * The xml name must conform "Namespaces in XML" specification. Therefore
1743      * the ':' character is not allowed in the name. This method should be used
1744      * for PI and entity names which may not have a namespace according to the
1745      * specification mentioned above.
1746      *
1747      * @param ns The true value turns namespace conformance on.
1748      * @return The name has been read.
1749      * @exception Exception When incorrect character appear in the name.
1750      * @exception IOException
1751      */
name(boolean ns)1752     protected String name(boolean ns)
1753             throws Exception {
1754         mBuffIdx = -1;
1755         bname(ns);
1756         return new String(mBuff, 1, mBuffIdx);
1757     }
1758 
1759     /**
1760      * Reads a qualified xml name.
1761      *
1762      * The characters of a qualified name is an array of characters. The first
1763      * (chars[0]) character is the index of the colon character which separates
1764      * the prefix from the local name. If the index is zero, the name does not
1765      * contain separator or the parser works in the namespace unaware mode. The
1766      * length of qualified name is the length of the array minus one.
1767      *
1768      * @param ns The true value turns namespace conformance on.
1769      * @return The characters of a qualified name.
1770      * @exception Exception When incorrect character appear in the name.
1771      * @exception IOException
1772      */
qname(boolean ns)1773     protected char[] qname(boolean ns)
1774             throws Exception {
1775         mBuffIdx = -1;
1776         bname(ns);
1777         char chars[] = new char[mBuffIdx + 1];
1778         System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1);
1779         return chars;
1780     }
1781 
1782     /**
1783      * Reads the public or/and system identifiers.
1784      *
1785      * @param inp The input object.
1786      * @exception Exception is parser specific exception form panic method.
1787      * @exception IOException
1788      */
pubsys(Input inp)1789     private void pubsys(Input inp)
1790             throws Exception {
1791         Pair pair = pubsys(' ');
1792         inp.pubid = pair.name;
1793         inp.sysid = pair.value;
1794         del(pair);
1795     }
1796 
1797     /**
1798      * Reads the public or/and system identifiers.
1799      *
1800      * @param flag The 'N' allows public id be without system id.
1801      * @return The public or/and system identifiers pair.
1802      * @exception Exception is parser specific exception form panic method.
1803      * @exception IOException
1804      */
1805     @SuppressWarnings("fallthrough")
pubsys(char flag)1806     private Pair pubsys(char flag) throws Exception {
1807         Pair ids = pair(null);
1808         String str = name(false);
1809         if ("PUBLIC".equals(str) == true) {
1810             bqstr('i');  // non-CDATA normalization [#4.2.2]
1811             ids.name = new String(mBuff, 1, mBuffIdx);
1812             switch (wsskip()) {
1813                 case '\"':
1814                 case '\'':
1815                     bqstr(' ');
1816                     ids.value = new String(mBuff, 1, mBuffIdx);
1817                     break;
1818 
1819                 case EOS:
1820                     panic(FAULT);
1821 
1822                 default:
1823                     if (flag != 'N') // [#4.7]
1824                     {
1825                         panic(FAULT);
1826                     }
1827                     ids.value = null;
1828                     break;
1829             }
1830             return ids;
1831         } else if ("SYSTEM".equals(str) == true) {
1832             ids.name = null;
1833             bqstr(' ');
1834             ids.value = new String(mBuff, 1, mBuffIdx);
1835             return ids;
1836         }
1837         panic(FAULT);
1838         return null;
1839     }
1840 
1841     /**
1842      * Reads an attribute value.
1843      *
1844      * The grammar which this method can read is:<br />
1845      * <code>eqstr := S &quot;=&quot; qstr</code><br />
1846      * <code>qstr  := S (&quot;'&quot; string &quot;'&quot;) |
1847      *  ('&quot;' string '&quot;')</code><br /> This method resolves entities
1848      * inside a string unless the parser parses DTD.
1849      *
1850      * @param flag The '=' character forces the method to accept the '='
1851      * character before quoted string and read the following string as not an
1852      * attribute ('-'), 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization;
1853      * '-' - not an attribute value; 'd' - in DTD context.
1854      * @return The content of the quoted strign as a string.
1855      * @exception Exception is parser specific exception form panic method.
1856      * @exception IOException
1857      */
eqstr(char flag)1858     protected String eqstr(char flag) throws Exception {
1859         if (flag == '=') {
1860             wsskip();
1861             if (getch() != '=') {
1862                 panic(FAULT);
1863             }
1864         }
1865         bqstr((flag == '=') ? '-' : flag);
1866         return new String(mBuff, 1, mBuffIdx);
1867     }
1868 
1869     /**
1870      * Resoves an entity.
1871      *
1872      * This method resolves built-in and character entity references. It is also
1873      * reports external entities to the application.
1874      *
1875      * @param flag The 'x' character forces the method to report a skipped
1876      * entity; 'i' character - indicates non-CDATA normalization.
1877      * @return Name of unresolved entity or <code>null</code> if entity had been
1878      * resolved successfully.
1879      * @exception Exception is parser specific exception form panic method.
1880      * @exception IOException
1881      */
1882     @SuppressWarnings("fallthrough")
ent(char flag)1883     private String ent(char flag) throws Exception {
1884         char ch;
1885         int idx = mBuffIdx + 1;
1886         Input inp = null;
1887         String str = null;
1888         mESt = 0x100;  // reset the built-in entity recognizer
1889         bappend('&');
1890         for (short st = 0; st >= 0;) {
1891             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
1892             switch (st) {
1893                 case 0:     // the first character of the entity name
1894                 case 1:     // read built-in entity name
1895                     switch (chtyp(ch)) {
1896                         case 'd':
1897                         case '.':
1898                         case '-':
1899                             if (st != 1) {
1900                                 panic(FAULT);
1901                             }
1902                         case 'a':
1903                         case 'A':
1904                         case '_':
1905                         case 'X':
1906                             bappend(ch);
1907                             eappend(ch);
1908                             st = 1;
1909                             break;
1910 
1911                         case ':':
1912                             if (mIsNSAware != false) {
1913                                 panic(FAULT);
1914                             }
1915                             bappend(ch);
1916                             eappend(ch);
1917                             st = 1;
1918                             break;
1919 
1920                         case ';':
1921                             if (mESt < 0x100) {
1922                                 //              The entity is a built-in entity
1923                                 mBuffIdx = idx - 1;
1924                                 bappend(mESt);
1925                                 st = -1;
1926                                 break;
1927                             } else if (mPh == PH_DTD) {
1928                                 //              In DTD entity declaration has to resolve character
1929                                 //              entities and include "as is" others. [#4.4.7]
1930                                 bappend(';');
1931                                 st = -1;
1932                                 break;
1933                             }
1934                             //          Convert an entity name to a string
1935                             str = new String(mBuff, idx + 1, mBuffIdx - idx);
1936                             inp = mEnt.get(str);
1937                             //          Restore the buffer offset
1938                             mBuffIdx = idx - 1;
1939                             if (inp != null) {
1940                                 if (inp.chars == null) {
1941                                     //          External entity
1942                                     InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
1943                                     if (is != null) {
1944                                         push(new Input(BUFFSIZE_READER));
1945                                         setinp(is);
1946                                         mInp.pubid = inp.pubid;
1947                                         mInp.sysid = inp.sysid;
1948                                         str = null;  // the entity is resolved
1949                                     } else {
1950                                         //              Unresolved external entity
1951                                         if (flag != 'x') {
1952                                             panic(FAULT);  // unknown entity within marckup
1953                                         }                                                               //              str is name of unresolved entity
1954                                     }
1955                                 } else {
1956                                     //          Internal entity
1957                                     push(inp);
1958                                     str = null;  // the entity is resolved
1959                                 }
1960                             } else {
1961                                 //              Unknown or general unparsed entity
1962                                 if (flag != 'x') {
1963                                     panic(FAULT);  // unknown entity within marckup
1964                                 }                                               //              str is name of unresolved entity
1965                             }
1966                             st = -1;
1967                             break;
1968 
1969                         case '#':
1970                             if (st != 0) {
1971                                 panic(FAULT);
1972                             }
1973                             st = 2;
1974                             break;
1975 
1976                         default:
1977                             panic(FAULT);
1978                     }
1979                     break;
1980 
1981                 case 2:     // read character entity
1982                     switch (chtyp(ch)) {
1983                         case 'd':
1984                             bappend(ch);
1985                             break;
1986 
1987                         case ';':
1988                             //          Convert the character entity to a character
1989                             try {
1990                                 int i = Integer.parseInt(
1991                                         new String(mBuff, idx + 1, mBuffIdx - idx), 10);
1992                                 if (i >= 0xffff) {
1993                                     panic(FAULT);
1994                                 }
1995                                 ch = (char) i;
1996                             } catch (NumberFormatException nfe) {
1997                                 panic(FAULT);
1998                             }
1999                             //          Restore the buffer offset
2000                             mBuffIdx = idx - 1;
2001                             if (ch == ' ' || mInp.next != null) {
2002                                 bappend(ch, flag);
2003                             } else {
2004                                 bappend(ch);
2005                             }
2006                             st = -1;
2007                             break;
2008 
2009                         case 'a':
2010                             //          If the entity buffer is empty and ch == 'x'
2011                             if ((mBuffIdx == idx) && (ch == 'x')) {
2012                                 st = 3;
2013                                 break;
2014                             }
2015                         default:
2016                             panic(FAULT);
2017                     }
2018                     break;
2019 
2020                 case 3:     // read hex character entity
2021                     switch (chtyp(ch)) {
2022                         case 'A':
2023                         case 'a':
2024                         case 'd':
2025                             bappend(ch);
2026                             break;
2027 
2028                         case ';':
2029                             //          Convert the character entity to a character
2030                             try {
2031                                 int i = Integer.parseInt(
2032                                         new String(mBuff, idx + 1, mBuffIdx - idx), 16);
2033                                 if (i >= 0xffff) {
2034                                     panic(FAULT);
2035                                 }
2036                                 ch = (char) i;
2037                             } catch (NumberFormatException nfe) {
2038                                 panic(FAULT);
2039                             }
2040                             //          Restore the buffer offset
2041                             mBuffIdx = idx - 1;
2042                             if (ch == ' ' || mInp.next != null) {
2043                                 bappend(ch, flag);
2044                             } else {
2045                                 bappend(ch);
2046                             }
2047                             st = -1;
2048                             break;
2049 
2050                         default:
2051                             panic(FAULT);
2052                     }
2053                     break;
2054 
2055                 default:
2056                     panic(FAULT);
2057             }
2058         }
2059 
2060         return str;
2061     }
2062 
2063     /**
2064      * Resoves a parameter entity.
2065      *
2066      * This method resolves a parameter entity references. It is also reports
2067      * external entities to the application.
2068      *
2069      * @param flag The '-' instruct the method to do not set up surrounding
2070      * spaces [#4.4.8].
2071      * @exception Exception is parser specific exception form panic method.
2072      * @exception IOException
2073      */
2074     @SuppressWarnings("fallthrough")
pent(char flag)2075     private void pent(char flag) throws Exception {
2076         char ch;
2077         int idx = mBuffIdx + 1;
2078         Input inp = null;
2079         String str = null;
2080         bappend('%');
2081         if (mPh != PH_DTD) // the DTD internal subset
2082         {
2083             return;         // Not Recognized [#4.4.1]
2084         }               //              Read entity name
2085         bname(false);
2086         str = new String(mBuff, idx + 2, mBuffIdx - idx - 1);
2087         if (getch() != ';') {
2088             panic(FAULT);
2089         }
2090         inp = mPEnt.get(str);
2091         //              Restore the buffer offset
2092         mBuffIdx = idx - 1;
2093         if (inp != null) {
2094             if (inp.chars == null) {
2095                 //              External parameter entity
2096                 InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
2097                 if (is != null) {
2098                     if (flag != '-') {
2099                         bappend(' ');  // tail space
2100                     }
2101                     push(new Input(BUFFSIZE_READER));
2102                     // BUG: there is no leading space! [#4.4.8]
2103                     setinp(is);
2104                     mInp.pubid = inp.pubid;
2105                     mInp.sysid = inp.sysid;
2106                 } else {
2107                     //          Unresolved external parameter entity
2108                     skippedEnt("%" + str);
2109                 }
2110             } else {
2111                 //              Internal parameter entity
2112                 if (flag == '-') {
2113                     //          No surrounding spaces
2114                     inp.chIdx = 1;
2115                 } else {
2116                     //          Insert surrounding spaces
2117                     bappend(' ');  // tail space
2118                     inp.chIdx = 0;
2119                 }
2120                 push(inp);
2121             }
2122         } else {
2123             //          Unknown parameter entity
2124             skippedEnt("%" + str);
2125         }
2126     }
2127 
2128     /**
2129      * Recognizes and handles a namespace declaration.
2130      *
2131      * This method identifies a type of namespace declaration if any and puts
2132      * new mapping on top of prefix stack.
2133      *
2134      * @param name The attribute qualified name (<code>name.value</code> is a
2135      * <code>String</code> object which represents the attribute prefix).
2136      * @param value The attribute value.
2137      * @return <code>true</code> if a namespace declaration is recognized.
2138      */
isdecl(Pair name, String value)2139     private boolean isdecl(Pair name, String value) {
2140         if (name.chars[0] == 0) {
2141             if ("xmlns".equals(name.name) == true) {
2142                 //              New default namespace declaration
2143                 mPref = pair(mPref);
2144                 mPref.list = mElm;  // prefix owner element
2145                 mPref.value = value;
2146                 mPref.name = "";
2147                 mPref.chars = NONS;
2148                 mElm.num++;  // namespace counter
2149                 return true;
2150             }
2151         } else {
2152             if (name.eqpref(XMLNS) == true) {
2153                 //              New prefix declaration
2154                 int len = name.name.length();
2155                 mPref = pair(mPref);
2156                 mPref.list = mElm;  // prefix owner element
2157                 mPref.value = value;
2158                 mPref.name = name.name;
2159                 mPref.chars = new char[len + 1];
2160                 mPref.chars[0] = (char) (len + 1);
2161                 name.name.getChars(0, len, mPref.chars, 1);
2162                 mElm.num++;  // namespace counter
2163                 return true;
2164             }
2165         }
2166         return false;
2167     }
2168 
2169     /**
2170      * Resolves a prefix.
2171      *
2172      * @return The namespace assigned to the prefix.
2173      * @exception Exception When mapping for specified prefix is not found.
2174      */
rslv(char[] qname)2175     private String rslv(char[] qname)
2176             throws Exception {
2177         for (Pair pref = mPref; pref != null; pref = pref.next) {
2178             if (pref.eqpref(qname) == true) {
2179                 return pref.value;
2180             }
2181         }
2182         if (qname[0] == 1) {  // QNames like ':local'
2183             for (Pair pref = mPref; pref != null; pref = pref.next) {
2184                 if (pref.chars[0] == 0) {
2185                     return pref.value;
2186                 }
2187             }
2188         }
2189         panic(FAULT);
2190         return null;
2191     }
2192 
2193     /**
2194      * Skips xml white space characters.
2195      *
2196      * This method skips white space characters (' ', '\t', '\n', '\r') and
2197      * looks ahead not white space character.
2198      *
2199      * @return The first not white space look ahead character.
2200      * @exception IOException
2201      */
wsskip()2202     protected char wsskip()
2203             throws IOException {
2204         char ch;
2205         while (true) {
2206             //          Read next character
2207             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
2208             if (ch < 0x80) {
2209                 if (nmttyp[ch] != 3) // [ \t\n\r]
2210                 {
2211                     break;
2212                 }
2213             } else {
2214                 break;
2215             }
2216         }
2217         mChIdx--;  // bkch();
2218         return ch;
2219     }
2220 
2221     /**
2222      * Reports document type.
2223      *
2224      * @param name The name of the entity.
2225      * @param pubid The public identifier of the entity or <code>null</code>.
2226      * @param sysid The system identifier of the entity or <code>null</code>.
2227      */
docType(String name, String pubid, String sysid)2228     protected abstract void docType(String name, String pubid, String sysid)
2229             throws SAXException;
2230 
2231     /**
2232      * Reports a comment.
2233      *
2234      * @param text The comment text starting from first charcater.
2235      * @param length The number of characters in comment.
2236      */
comm(char[] text, int length)2237     protected abstract void comm(char[] text, int length);
2238 
2239     /**
2240      * Reports a processing instruction.
2241      *
2242      * @param target The processing instruction target name.
2243      * @param body The processing instruction body text.
2244      */
pi(String target, String body)2245     protected abstract void pi(String target, String body)
2246             throws Exception;
2247 
2248     /**
2249      * Reports new namespace prefix. The Namespace prefix (
2250      * <code>mPref.name</code>) being declared and the Namespace URI (
2251      * <code>mPref.value</code>) the prefix is mapped to. An empty string is
2252      * used for the default element namespace, which has no prefix.
2253      */
newPrefix()2254     protected abstract void newPrefix()
2255             throws Exception;
2256 
2257     /**
2258      * Reports skipped entity name.
2259      *
2260      * @param name The entity name.
2261      */
skippedEnt(String name)2262     protected abstract void skippedEnt(String name)
2263             throws Exception;
2264 
2265     /**
2266      * Returns an
2267      * <code>InputSource</code> for specified entity or
2268      * <code>null</code>.
2269      *
2270      * @param name The name of the entity.
2271      * @param pubid The public identifier of the entity.
2272      * @param sysid The system identifier of the entity.
2273      */
resolveEnt( String name, String pubid, String sysid)2274     protected abstract InputSource resolveEnt(
2275             String name, String pubid, String sysid)
2276             throws Exception;
2277 
2278     /**
2279      * Reports notation declaration.
2280      *
2281      * @param name The notation's name.
2282      * @param pubid The notation's public identifier, or null if none was given.
2283      * @param sysid The notation's system identifier, or null if none was given.
2284      */
notDecl(String name, String pubid, String sysid)2285     protected abstract void notDecl(String name, String pubid, String sysid)
2286             throws Exception;
2287 
2288     /**
2289      * Reports unparsed entity name.
2290      *
2291      * @param name The unparsed entity's name.
2292      * @param pubid The entity's public identifier, or null if none was given.
2293      * @param sysid The entity's system identifier.
2294      * @param notation The name of the associated notation.
2295      */
unparsedEntDecl( String name, String pubid, String sysid, String notation)2296     protected abstract void unparsedEntDecl(
2297             String name, String pubid, String sysid, String notation)
2298             throws Exception;
2299 
2300     /**
2301      * Notifies the handler about fatal parsing error.
2302      *
2303      * @param msg The problem description message.
2304      */
panic(String msg)2305     protected abstract void panic(String msg)
2306             throws Exception;
2307 
2308     /**
2309      * Reads a qualified xml name.
2310      *
2311      * This is low level routine which leaves a qName in the buffer. The
2312      * characters of a qualified name is an array of characters. The first
2313      * (chars[0]) character is the index of the colon character which separates
2314      * the prefix from the local name. If the index is zero, the name does not
2315      * contain separator or the parser works in the namespace unaware mode. The
2316      * length of qualified name is the length of the array minus one.
2317      *
2318      * @param ns The true value turns namespace conformance on.
2319      * @exception Exception is parser specific exception form panic method.
2320      * @exception IOException
2321      */
bname(boolean ns)2322     private void bname(boolean ns)
2323             throws Exception {
2324         char ch;
2325         char type;
2326         mBuffIdx++;  // allocate a char for colon offset
2327         int bqname = mBuffIdx;
2328         int bcolon = bqname;
2329         int bchidx = bqname + 1;
2330         int bstart = bchidx;
2331         int cstart = mChIdx;
2332         short st = (short) ((ns == true) ? 0 : 2);
2333         while (true) {
2334             //          Read next character
2335             if (mChIdx >= mChLen) {
2336                 bcopy(cstart, bstart);
2337                 getch();
2338                 mChIdx--;  // bkch();
2339                 cstart = mChIdx;
2340                 bstart = bchidx;
2341             }
2342             ch = mChars[mChIdx++];
2343             type = (char) 0;  // [X]
2344             if (ch < 0x80) {
2345                 type = (char) nmttyp[ch];
2346             } else if (ch == EOS) {
2347                 panic(FAULT);
2348             }
2349             //          Parse QName
2350             switch (st) {
2351                 case 0:     // read the first char of the prefix
2352                 case 2:     // read the first char of the suffix
2353                     switch (type) {
2354                         case 0:  // [aA_X]
2355                             bchidx++;  // append char to the buffer
2356                             st++;      // (st == 0)? 1: 3;
2357                             break;
2358 
2359                         case 1:  // [:]
2360                             mChIdx--;  // bkch();
2361                             st++;      // (st == 0)? 1: 3;
2362                             break;
2363 
2364                         default:
2365                             panic(FAULT);
2366                     }
2367                     break;
2368 
2369                 case 1:     // read the prefix
2370                 case 3:     // read the suffix
2371                     switch (type) {
2372                         case 0:  // [aA_X]
2373                         case 2:  // [.-d]
2374                             bchidx++;  // append char to the buffer
2375                             break;
2376 
2377                         case 1:  // [:]
2378                             bchidx++;  // append char to the buffer
2379                             if (ns == true) {
2380                                 if (bcolon != bqname) {
2381                                     panic(FAULT);  // it must be only one colon
2382                                 }
2383                                 bcolon = bchidx - 1;
2384                                 if (st == 1) {
2385                                     st = 2;
2386                                 }
2387                             }
2388                             break;
2389 
2390                         default:
2391                             mChIdx--;  // bkch();
2392                             bcopy(cstart, bstart);
2393                             mBuff[bqname] = (char) (bcolon - bqname);
2394                             return;
2395                     }
2396                     break;
2397 
2398                 default:
2399                     panic(FAULT);
2400             }
2401         }
2402     }
2403 
2404     /**
2405      * Reads a nmtoken.
2406      *
2407      * This is low level routine which leaves a nmtoken in the buffer.
2408      *
2409      * @exception Exception is parser specific exception form panic method.
2410      * @exception IOException
2411      */
2412     @SuppressWarnings("fallthrough")
bntok()2413     private void bntok() throws Exception {
2414         char ch;
2415         mBuffIdx = -1;
2416         bappend((char) 0);  // default offset to the colon char
2417         while (true) {
2418             ch = getch();
2419             switch (chtyp(ch)) {
2420                 case 'a':
2421                 case 'A':
2422                 case 'd':
2423                 case '.':
2424                 case ':':
2425                 case '-':
2426                 case '_':
2427                 case 'X':
2428                     bappend(ch);
2429                     break;
2430 
2431                 case 'Z':
2432                     panic(FAULT);
2433 
2434                 default:
2435                     bkch();
2436                     return;
2437             }
2438         }
2439     }
2440 
2441     /**
2442      * Recognizes a keyword.
2443      *
2444      * This is low level routine which recognizes one of keywords in the buffer.
2445      * Keyword Id ID - i IDREF - r IDREFS - R ENTITY - n ENTITIES - N NMTOKEN -
2446      * t NMTOKENS - T ELEMENT - e ATTLIST - a NOTATION - o CDATA - c REQUIRED -
2447      * Q IMPLIED - I FIXED - F
2448      *
2449      * @return an id of a keyword or '?'.
2450      * @exception Exception is parser specific exception form panic method.
2451      * @exception IOException
2452      */
bkeyword()2453     private char bkeyword()
2454             throws Exception {
2455         String str = new String(mBuff, 1, mBuffIdx);
2456         switch (str.length()) {
2457             case 2:  // ID
2458                 return ("ID".equals(str) == true) ? 'i' : '?';
2459 
2460             case 5:  // IDREF, CDATA, FIXED
2461                 switch (mBuff[1]) {
2462                     case 'I':
2463                         return ("IDREF".equals(str) == true) ? 'r' : '?';
2464                     case 'C':
2465                         return ("CDATA".equals(str) == true) ? 'c' : '?';
2466                     case 'F':
2467                         return ("FIXED".equals(str) == true) ? 'F' : '?';
2468                     default:
2469                         break;
2470                 }
2471                 break;
2472 
2473             case 6:  // IDREFS, ENTITY
2474                 switch (mBuff[1]) {
2475                     case 'I':
2476                         return ("IDREFS".equals(str) == true) ? 'R' : '?';
2477                     case 'E':
2478                         return ("ENTITY".equals(str) == true) ? 'n' : '?';
2479                     default:
2480                         break;
2481                 }
2482                 break;
2483 
2484             case 7:  // NMTOKEN, IMPLIED, ATTLIST, ELEMENT
2485                 switch (mBuff[1]) {
2486                     case 'I':
2487                         return ("IMPLIED".equals(str) == true) ? 'I' : '?';
2488                     case 'N':
2489                         return ("NMTOKEN".equals(str) == true) ? 't' : '?';
2490                     case 'A':
2491                         return ("ATTLIST".equals(str) == true) ? 'a' : '?';
2492                     case 'E':
2493                         return ("ELEMENT".equals(str) == true) ? 'e' : '?';
2494                     default:
2495                         break;
2496                 }
2497                 break;
2498 
2499             case 8:  // ENTITIES, NMTOKENS, NOTATION, REQUIRED
2500                 switch (mBuff[2]) {
2501                     case 'N':
2502                         return ("ENTITIES".equals(str) == true) ? 'N' : '?';
2503                     case 'M':
2504                         return ("NMTOKENS".equals(str) == true) ? 'T' : '?';
2505                     case 'O':
2506                         return ("NOTATION".equals(str) == true) ? 'o' : '?';
2507                     case 'E':
2508                         return ("REQUIRED".equals(str) == true) ? 'Q' : '?';
2509                     default:
2510                         break;
2511                 }
2512                 break;
2513 
2514             default:
2515                 break;
2516         }
2517         return '?';
2518     }
2519 
2520     /**
2521      * Reads a single or double quotted string in to the buffer.
2522      *
2523      * This method resolves entities inside a string unless the parser parses
2524      * DTD.
2525      *
2526      * @param flag 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' -
2527      * not an attribute value; 'd' - in DTD context.
2528      * @exception Exception is parser specific exception form panic method.
2529      * @exception IOException
2530      */
2531     @SuppressWarnings("fallthrough")
bqstr(char flag)2532     private void bqstr(char flag) throws Exception {
2533         Input inp = mInp;  // remember the original input
2534         mBuffIdx = -1;
2535         bappend((char) 0);  // default offset to the colon char
2536         char ch;
2537         for (short st = 0; st >= 0;) {
2538             ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
2539             switch (st) {
2540                 case 0:     // read a single or double quote
2541                     switch (ch) {
2542                         case ' ':
2543                         case '\n':
2544                         case '\r':
2545                         case '\t':
2546                             break;
2547 
2548                         case '\'':
2549                             st = 2;  // read a single quoted string
2550                             break;
2551 
2552                         case '\"':
2553                             st = 3;  // read a double quoted string
2554                             break;
2555 
2556                         default:
2557                             panic(FAULT);
2558                             break;
2559                     }
2560                     break;
2561 
2562                 case 2:     // read a single quoted string
2563                 case 3:     // read a double quoted string
2564                     switch (ch) {
2565                         case '\'':
2566                             if ((st == 2) && (mInp == inp)) {
2567                                 st = -1;
2568                             } else {
2569                                 bappend(ch);
2570                             }
2571                             break;
2572 
2573                         case '\"':
2574                             if ((st == 3) && (mInp == inp)) {
2575                                 st = -1;
2576                             } else {
2577                                 bappend(ch);
2578                             }
2579                             break;
2580 
2581                         case '&':
2582                             if (flag != 'd') {
2583                                 ent(flag);
2584                             } else {
2585                                 bappend(ch);
2586                             }
2587                             break;
2588 
2589                         case '%':
2590                             if (flag == 'd') {
2591                                 pent('-');
2592                             } else {
2593                                 bappend(ch);
2594                             }
2595                             break;
2596 
2597                         case '<':
2598                             if ((flag == '-') || (flag == 'd')) {
2599                                 bappend(ch);
2600                             } else {
2601                                 panic(FAULT);
2602                             }
2603                             break;
2604 
2605                         case EOS:               // EOS before single/double quote
2606                             panic(FAULT);
2607 
2608                         case '\r':     // EOL processing [#2.11 & #3.3.3]
2609                             if (flag != ' ' && mInp.next == null) {
2610                                 if (getch() != '\n') {
2611                                     bkch();
2612                                 }
2613                                 ch = '\n';
2614                             }
2615                         default:
2616                             bappend(ch, flag);
2617                             break;
2618                     }
2619                     break;
2620 
2621                 default:
2622                     panic(FAULT);
2623             }
2624         }
2625         //              There is maximum one space at the end of the string in
2626         //              i-mode (non CDATA normalization) and it has to be removed.
2627         if ((flag == 'i') && (mBuff[mBuffIdx] == ' ')) {
2628             mBuffIdx -= 1;
2629         }
2630     }
2631 
2632     /**
2633      * Reports characters and empties the parser's buffer. This method is called
2634      * only if parser is going to return control to the main loop. This means
2635      * that this method may use parser buffer to report white space without
2636      * copeing characters to temporary buffer.
2637      */
bflash()2638     protected abstract void bflash()
2639             throws Exception;
2640 
2641     /**
2642      * Reports white space characters and empties the parser's buffer. This
2643      * method is called only if parser is going to return control to the main
2644      * loop. This means that this method may use parser buffer to report white
2645      * space without copeing characters to temporary buffer.
2646      */
bflash_ws()2647     protected abstract void bflash_ws()
2648             throws Exception;
2649 
2650     /**
2651      * Appends a character to parser's buffer with normalization.
2652      *
2653      * @param ch The character to append to the buffer.
2654      * @param mode The normalization mode.
2655      */
bappend(char ch, char mode)2656     private void bappend(char ch, char mode) {
2657         //              This implements attribute value normalization as
2658         //              described in the XML specification [#3.3.3].
2659         switch (mode) {
2660             case 'i':  // non CDATA normalization
2661                 switch (ch) {
2662                     case ' ':
2663                     case '\n':
2664                     case '\r':
2665                     case '\t':
2666                         if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' ')) {
2667                             bappend(' ');
2668                         }
2669                         return;
2670 
2671                     default:
2672                         break;
2673                 }
2674                 break;
2675 
2676             case 'c':  // CDATA normalization
2677                 switch (ch) {
2678                     case '\n':
2679                     case '\r':
2680                     case '\t':
2681                         ch = ' ';
2682                         break;
2683 
2684                     default:
2685                         break;
2686                 }
2687                 break;
2688 
2689             default:  // no normalization
2690                 break;
2691         }
2692         mBuffIdx++;
2693         if (mBuffIdx < mBuff.length) {
2694             mBuff[mBuffIdx] = ch;
2695         } else {
2696             mBuffIdx--;
2697             bappend(ch);
2698         }
2699     }
2700 
2701     /**
2702      * Appends a character to parser's buffer.
2703      *
2704      * @param ch The character to append to the buffer.
2705      */
bappend(char ch)2706     private void bappend(char ch) {
2707         try {
2708             mBuff[++mBuffIdx] = ch;
2709         } catch (Exception exp) {
2710             //          Double the buffer size
2711             char buff[] = new char[mBuff.length << 1];
2712             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2713             mBuff = buff;
2714             mBuff[mBuffIdx] = ch;
2715         }
2716     }
2717 
2718     /**
2719      * Appends (mChIdx - cidx) characters from character buffer (mChars) to
2720      * parser's buffer (mBuff).
2721      *
2722      * @param cidx The character buffer (mChars) start index.
2723      * @param bidx The parser buffer (mBuff) start index.
2724      */
bcopy(int cidx, int bidx)2725     private void bcopy(int cidx, int bidx) {
2726         int length = mChIdx - cidx;
2727         if ((bidx + length + 1) >= mBuff.length) {
2728             //          Expand the buffer
2729             char buff[] = new char[mBuff.length + length];
2730             System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
2731             mBuff = buff;
2732         }
2733         System.arraycopy(mChars, cidx, mBuff, bidx, length);
2734         mBuffIdx += length;
2735     }
2736 
2737     /**
2738      * Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>,
2739      * <i>apos</i>, <i>quot</i>. The initial state is 0x100. Any state belowe
2740      * 0x100 is a built-in entity replacement character.
2741      *
2742      * @param ch the next character of an entity name.
2743      */
2744     @SuppressWarnings("fallthrough")
eappend(char ch)2745     private void eappend(char ch) {
2746         switch (mESt) {
2747             case 0x100:  // "l" or "g" or "a" or "q"
2748                 switch (ch) {
2749                     case 'l':
2750                         mESt = 0x101;
2751                         break;
2752                     case 'g':
2753                         mESt = 0x102;
2754                         break;
2755                     case 'a':
2756                         mESt = 0x103;
2757                         break;
2758                     case 'q':
2759                         mESt = 0x107;
2760                         break;
2761                     default:
2762                         mESt = 0x200;
2763                         break;
2764                 }
2765                 break;
2766 
2767             case 0x101:  // "lt"
2768                 mESt = (ch == 't') ? '<' : (char) 0x200;
2769                 break;
2770 
2771             case 0x102:  // "gt"
2772                 mESt = (ch == 't') ? '>' : (char) 0x200;
2773                 break;
2774 
2775             case 0x103:  // "am" or "ap"
2776                 switch (ch) {
2777                     case 'm':
2778                         mESt = 0x104;
2779                         break;
2780                     case 'p':
2781                         mESt = 0x105;
2782                         break;
2783                     default:
2784                         mESt = 0x200;
2785                         break;
2786                 }
2787                 break;
2788 
2789             case 0x104:  // "amp"
2790                 mESt = (ch == 'p') ? '&' : (char) 0x200;
2791                 break;
2792 
2793             case 0x105:  // "apo"
2794                 mESt = (ch == 'o') ? (char) 0x106 : (char) 0x200;
2795                 break;
2796 
2797             case 0x106:  // "apos"
2798                 mESt = (ch == 's') ? '\'' : (char) 0x200;
2799                 break;
2800 
2801             case 0x107:  // "qu"
2802                 mESt = (ch == 'u') ? (char) 0x108 : (char) 0x200;
2803                 break;
2804 
2805             case 0x108:  // "quo"
2806                 mESt = (ch == 'o') ? (char) 0x109 : (char) 0x200;
2807                 break;
2808 
2809             case 0x109:  // "quot"
2810                 mESt = (ch == 't') ? '\"' : (char) 0x200;
2811                 break;
2812 
2813             case '<':   // "lt"
2814             case '>':   // "gt"
2815             case '&':   // "amp"
2816             case '\'':  // "apos"
2817             case '\"':  // "quot"
2818                 mESt = 0x200;
2819             default:
2820                 break;
2821         }
2822     }
2823 
2824     /**
2825      * Sets up a new input source on the top of the input stack. Note, the first
2826      * byte returned by the entity's byte stream has to be the first byte in the
2827      * entity. However, the parser does not expect the byte order mask in both
2828      * cases when encoding is provided by the input source.
2829      *
2830      * @param is A new input source to set up.
2831      * @exception IOException If any IO errors occur.
2832      * @exception Exception is parser specific exception form panic method.
2833      */
setinp(InputSource is)2834     protected void setinp(InputSource is)
2835             throws Exception {
2836         Reader reader = null;
2837         mChIdx = 0;
2838         mChLen = 0;
2839         mChars = mInp.chars;
2840         mInp.src = null;
2841         if (mPh < PH_DOC_START) {
2842             mIsSAlone = false;  // default [#2.9]
2843         }
2844         mIsSAloneSet = false;
2845         if (is.getCharacterStream() != null) {
2846             //          Ignore encoding in the xml text decl.
2847             reader = is.getCharacterStream();
2848             xml(reader);
2849         } else if (is.getByteStream() != null) {
2850             String expenc;
2851             if (is.getEncoding() != null) {
2852                 //              Ignore encoding in the xml text decl.
2853                 expenc = is.getEncoding().toUpperCase();
2854                 if (expenc.equals("UTF-16")) {
2855                     reader = bom(is.getByteStream(), 'U');  // UTF-16 [#4.3.3]
2856                 } else {
2857                     reader = enc(expenc, is.getByteStream());
2858                 }
2859                 xml(reader);
2860             } else {
2861                 //              Get encoding from BOM or the xml text decl.
2862                 reader = bom(is.getByteStream(), ' ');
2863                 if (reader == null) {
2864                     //          Encoding is defined by the xml text decl.
2865                     reader = enc("UTF-8", is.getByteStream());
2866                     expenc = xml(reader);
2867                     if (expenc.startsWith("UTF-16")) {
2868                         panic(FAULT);  // UTF-16 must have BOM [#4.3.3]
2869                     }
2870                     reader = enc(expenc, is.getByteStream());
2871                 } else {
2872                     //          Encoding is defined by the BOM.
2873                     xml(reader);
2874                 }
2875             }
2876         } else {
2877             //          There is no support for public/system identifiers.
2878             panic(FAULT);
2879         }
2880         mInp.src = reader;
2881         mInp.pubid = is.getPublicId();
2882         mInp.sysid = is.getSystemId();
2883     }
2884 
2885     /**
2886      * Determines the entity encoding.
2887      *
2888      * This method gets encoding from Byte Order Mask [#4.3.3] if any. Note, the
2889      * first byte returned by the entity's byte stream has to be the first byte
2890      * in the entity. Also, there is no support for UCS-4.
2891      *
2892      * @param is A byte stream of the entity.
2893      * @param hint An encoding hint, character U means UTF-16.
2894      * @return a reader constructed from the BOM or UTF-8 by default.
2895      * @exception Exception is parser specific exception form panic method.
2896      * @exception IOException
2897      */
bom(InputStream is, char hint)2898     private Reader bom(InputStream is, char hint)
2899             throws Exception {
2900         int val = is.read();
2901         switch (val) {
2902             case 0xef:     // UTF-8
2903                 if (hint == 'U') // must be UTF-16
2904                 {
2905                     panic(FAULT);
2906                 }
2907                 if (is.read() != 0xbb) {
2908                     panic(FAULT);
2909                 }
2910                 if (is.read() != 0xbf) {
2911                     panic(FAULT);
2912                 }
2913                 return new ReaderUTF8(is);
2914 
2915             case 0xfe:     // UTF-16, big-endian
2916                 if (is.read() != 0xff) {
2917                     panic(FAULT);
2918                 }
2919                 return new ReaderUTF16(is, 'b');
2920 
2921             case 0xff:     // UTF-16, little-endian
2922                 if (is.read() != 0xfe) {
2923                     panic(FAULT);
2924                 }
2925                 return new ReaderUTF16(is, 'l');
2926 
2927             case -1:
2928                 mChars[mChIdx++] = EOS;
2929                 return new ReaderUTF8(is);
2930 
2931             default:
2932                 if (hint == 'U') // must be UTF-16
2933                 {
2934                     panic(FAULT);
2935                 }
2936                 //              Read the rest of UTF-8 character
2937                 switch (val & 0xf0) {
2938                     case 0xc0:
2939                     case 0xd0:
2940                         mChars[mChIdx++] = (char) (((val & 0x1f) << 6) | (is.read() & 0x3f));
2941                         break;
2942 
2943                     case 0xe0:
2944                         mChars[mChIdx++] = (char) (((val & 0x0f) << 12)
2945                                 | ((is.read() & 0x3f) << 6) | (is.read() & 0x3f));
2946                         break;
2947 
2948                     case 0xf0:  // UCS-4 character
2949                         throw new UnsupportedEncodingException();
2950 
2951                     default:
2952                         mChars[mChIdx++] = (char) val;
2953                         break;
2954                 }
2955                 return null;
2956         }
2957     }
2958 
2959     /**
2960      * Parses the xml text declaration.
2961      *
2962      * This method gets encoding from the xml text declaration [#4.3.1] if any.
2963      * The method assumes the buffer (mChars) is big enough to accommodate whole
2964      * xml text declaration.
2965      *
2966      * @param reader is entity reader.
2967      * @return The xml text declaration encoding or default UTF-8 encoding.
2968      * @exception Exception is parser specific exception form panic method.
2969      * @exception IOException
2970      */
xml(Reader reader)2971     private String xml(Reader reader)
2972             throws Exception {
2973         String str = null;
2974         String enc = "UTF-8";
2975         char ch;
2976         int val;
2977         short st;
2978         //              Read the xml text declaration into the buffer
2979         if (mChIdx != 0) {
2980             //          The bom method have read ONE char into the buffer.
2981             st = (short) ((mChars[0] == '<') ? 1 : -1);
2982         } else {
2983             st = 0;
2984         }
2985         while (st >= 0 && mChIdx < mChars.length) {
2986             ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
2987             mChars[mChIdx++] = ch;
2988             switch (st) {
2989                 case 0:     // read '<' of xml declaration
2990                     switch (ch) {
2991                         case '<':
2992                             st = 1;
2993                             break;
2994 
2995                         case 0xfeff:    // the byte order mask
2996                             ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
2997                             mChars[mChIdx - 1] = ch;
2998                             st = (short) ((ch == '<') ? 1 : -1);
2999                             break;
3000 
3001                         default:
3002                             st = -1;
3003                             break;
3004                     }
3005                     break;
3006 
3007                 case 1:     // read '?' of xml declaration [#4.3.1]
3008                     st = (short) ((ch == '?') ? 2 : -1);
3009                     break;
3010 
3011                 case 2:     // read 'x' of xml declaration [#4.3.1]
3012                     st = (short) ((ch == 'x') ? 3 : -1);
3013                     break;
3014 
3015                 case 3:     // read 'm' of xml declaration [#4.3.1]
3016                     st = (short) ((ch == 'm') ? 4 : -1);
3017                     break;
3018 
3019                 case 4:     // read 'l' of xml declaration [#4.3.1]
3020                     st = (short) ((ch == 'l') ? 5 : -1);
3021                     break;
3022 
3023                 case 5:     // read white space after 'xml'
3024                     switch (ch) {
3025                         case ' ':
3026                         case '\t':
3027                         case '\r':
3028                         case '\n':
3029                             st = 6;
3030                             break;
3031 
3032                         default:
3033                             st = -1;
3034                             break;
3035                     }
3036                     break;
3037 
3038                 case 6:     // read content of xml declaration
3039                     switch (ch) {
3040                         case '?':
3041                             st = 7;
3042                             break;
3043 
3044                         case EOS:
3045                             st = -2;
3046                             break;
3047 
3048                         default:
3049                             break;
3050                     }
3051                     break;
3052 
3053                 case 7:     // read '>' after '?' of xml declaration
3054                     switch (ch) {
3055                         case '>':
3056                         case EOS:
3057                             st = -2;
3058                             break;
3059 
3060                         default:
3061                             st = 6;
3062                             break;
3063                     }
3064                     break;
3065 
3066                 default:
3067                     panic(FAULT);
3068                     break;
3069             }
3070         }
3071         mChLen = mChIdx;
3072         mChIdx = 0;
3073         //              If there is no xml text declaration, the encoding is default.
3074         if (st == -1) {
3075             return enc;
3076         }
3077         mChIdx = 5;  // the first white space after "<?xml"
3078         //              Parse the xml text declaration
3079         for (st = 0; st >= 0;) {
3080             ch = getch();
3081             switch (st) {
3082                 case 0:     // skip spaces after the xml declaration name
3083                     if (chtyp(ch) != ' ') {
3084                         bkch();
3085                         st = 1;
3086                     }
3087                     break;
3088 
3089                 case 1:     // read xml declaration version
3090                 case 2:     // read xml declaration encoding or standalone
3091                 case 3:     // read xml declaration standalone
3092                     switch (chtyp(ch)) {
3093                         case 'a':
3094                         case 'A':
3095                         case '_':
3096                             bkch();
3097                             str = name(false).toLowerCase();
3098                             if ("version".equals(str) == true) {
3099                                 if (st != 1) {
3100                                     panic(FAULT);
3101                                 }
3102                                 if ("1.0".equals(eqstr('=')) != true) {
3103                                     panic(FAULT);
3104                                 }
3105                                 mInp.xmlver = 0x0100;
3106                                 st = 2;
3107                             } else if ("encoding".equals(str) == true) {
3108                                 if (st != 2) {
3109                                     panic(FAULT);
3110                                 }
3111                                 mInp.xmlenc = eqstr('=').toUpperCase();
3112                                 enc = mInp.xmlenc;
3113                                 st = 3;
3114                             } else if ("standalone".equals(str) == true) {
3115                                 if ((st == 1) || (mPh >= PH_DOC_START)) // [#4.3.1]
3116                                 {
3117                                     panic(FAULT);
3118                                 }
3119                                 str = eqstr('=').toLowerCase();
3120                                 //              Check the 'standalone' value and use it [#5.1]
3121                                 if (str.equals("yes") == true) {
3122                                     mIsSAlone = true;
3123                                 } else if (str.equals("no") == true) {
3124                                     mIsSAlone = false;
3125                                 } else {
3126                                     panic(FAULT);
3127                                 }
3128                                 mIsSAloneSet = true;
3129                                 st = 4;
3130                             } else {
3131                                 panic(FAULT);
3132                             }
3133                             break;
3134 
3135                         case ' ':
3136                             break;
3137 
3138                         case '?':
3139                             if (st == 1) {
3140                                 panic(FAULT);
3141                             }
3142                             bkch();
3143                             st = 4;
3144                             break;
3145 
3146                         default:
3147                             panic(FAULT);
3148                     }
3149                     break;
3150 
3151                 case 4:     // end of xml declaration
3152                     switch (chtyp(ch)) {
3153                         case '?':
3154                             if (getch() != '>') {
3155                                 panic(FAULT);
3156                             }
3157                             if (mPh <= PH_DOC_START) {
3158                                 mPh = PH_MISC_DTD;  // misc before DTD
3159                             }
3160                             st = -1;
3161                             break;
3162 
3163                         case ' ':
3164                             break;
3165 
3166                         default:
3167                             panic(FAULT);
3168                     }
3169                     break;
3170 
3171                 default:
3172                     panic(FAULT);
3173             }
3174         }
3175         return enc;
3176     }
3177 
3178     /**
3179      * Sets up the document reader.
3180      *
3181      * @param name an encoding name.
3182      * @param is the document byte input stream.
3183      * @return a reader constructed from encoding name and input stream.
3184      * @exception UnsupportedEncodingException
3185      */
enc(String name, InputStream is)3186     private Reader enc(String name, InputStream is)
3187             throws UnsupportedEncodingException {
3188         //              DO NOT CLOSE current reader if any!
3189         if (name.equals("UTF-8")) {
3190             return new ReaderUTF8(is);
3191         } else if (name.equals("UTF-16LE")) {
3192             return new ReaderUTF16(is, 'l');
3193         } else if (name.equals("UTF-16BE")) {
3194             return new ReaderUTF16(is, 'b');
3195         } else {
3196             return new InputStreamReader(is, name);
3197         }
3198     }
3199 
3200     /**
3201      * Sets up current input on the top of the input stack.
3202      *
3203      * @param inp A new input to set up.
3204      */
push(Input inp)3205     protected void push(Input inp) {
3206         mInp.chLen = mChLen;
3207         mInp.chIdx = mChIdx;
3208         inp.next = mInp;
3209         mInp = inp;
3210         mChars = inp.chars;
3211         mChLen = inp.chLen;
3212         mChIdx = inp.chIdx;
3213     }
3214 
3215     /**
3216      * Restores previous input on the top of the input stack.
3217      */
pop()3218     protected void pop() {
3219         if (mInp.src != null) {
3220             try {
3221                 mInp.src.close();
3222             } catch (IOException ioe) {
3223             }
3224             mInp.src = null;
3225         }
3226         mInp = mInp.next;
3227         if (mInp != null) {
3228             mChars = mInp.chars;
3229             mChLen = mInp.chLen;
3230             mChIdx = mInp.chIdx;
3231         } else {
3232             mChars = null;
3233             mChLen = 0;
3234             mChIdx = 0;
3235         }
3236     }
3237 
3238     /**
3239      * Maps a character to it's type.
3240      *
3241      * Possible character type values are:<br /> - ' ' for any kind of white
3242      * space character;<br /> - 'a' for any lower case alphabetical character
3243      * value;<br /> - 'A' for any upper case alphabetical character value;<br />
3244      * - 'd' for any decimal digit character value;<br /> - 'z' for any
3245      * character less then ' ' except '\t', '\n', '\r';<br /> - 'X' for any not
3246      * ASCII character;<br /> - 'Z' for EOS character.<br /> An ASCII (7 bit)
3247      * character which does not fall in any category listed above is mapped to
3248      * it self.
3249      *
3250      * @param ch The character to map.
3251      * @return The type of character.
3252      */
chtyp(char ch)3253     protected char chtyp(char ch) {
3254         if (ch < 0x80) {
3255             return (char) asctyp[ch];
3256         }
3257         return (ch != EOS) ? 'X' : 'Z';
3258     }
3259 
3260     /**
3261      * Retrives the next character in the document.
3262      *
3263      * @return The next character in the document.
3264      */
getch()3265     protected char getch()
3266             throws IOException {
3267         if (mChIdx >= mChLen) {
3268             if (mInp.src == null) {
3269                 pop();  // remove internal entity
3270                 return getch();
3271             }
3272             //          Read new portion of the document characters
3273             int Num = mInp.src.read(mChars, 0, mChars.length);
3274             if (Num < 0) {
3275                 if (mInp != mDoc) {
3276                     pop();  // restore the previous input
3277                     return getch();
3278                 } else {
3279                     mChars[0] = EOS;
3280                     mChLen = 1;
3281                 }
3282             } else {
3283                 mChLen = Num;
3284             }
3285             mChIdx = 0;
3286         }
3287         return mChars[mChIdx++];
3288     }
3289 
3290     /**
3291      * Puts back the last read character.
3292      *
3293      * This method <strong>MUST NOT</strong> be called more then once after each
3294      * call of {@link #getch getch} method.
3295      */
bkch()3296     protected void bkch()
3297             throws Exception {
3298         if (mChIdx <= 0) {
3299             panic(FAULT);
3300         }
3301         mChIdx--;
3302     }
3303 
3304     /**
3305      * Sets the current character.
3306      *
3307      * @param ch The character to set.
3308      */
setch(char ch)3309     protected void setch(char ch) {
3310         mChars[mChIdx] = ch;
3311     }
3312 
3313     /**
3314      * Finds a pair in the pair chain by a qualified name.
3315      *
3316      * @param chain The first element of the chain of pairs.
3317      * @param qname The qualified name.
3318      * @return A pair with the specified qualified name or null.
3319      */
find(Pair chain, char[] qname)3320     protected Pair find(Pair chain, char[] qname) {
3321         for (Pair pair = chain; pair != null; pair = pair.next) {
3322             if (pair.eqname(qname) == true) {
3323                 return pair;
3324             }
3325         }
3326         return null;
3327     }
3328 
3329     /**
3330      * Provedes an instance of a pair.
3331      *
3332      * @param next The reference to a next pair.
3333      * @return An instance of a pair.
3334      */
pair(Pair next)3335     protected Pair pair(Pair next) {
3336         Pair pair;
3337 
3338         if (mDltd != null) {
3339             pair = mDltd;
3340             mDltd = pair.next;
3341         } else {
3342             pair = new Pair();
3343         }
3344         pair.next = next;
3345 
3346         return pair;
3347     }
3348 
3349     /**
3350      * Deletes an instance of a pair.
3351      *
3352      * @param pair The pair to delete.
3353      * @return A reference to the next pair in a chain.
3354      */
del(Pair pair)3355     protected Pair del(Pair pair) {
3356         Pair next = pair.next;
3357 
3358         pair.name = null;
3359         pair.value = null;
3360         pair.chars = null;
3361         pair.list = null;
3362         pair.next = mDltd;
3363         mDltd = pair;
3364 
3365         return next;
3366     }
3367 }
3368