1 package com.jclark.xml.tok;
2 
3 /**
4  * An XML TextDecl.
5  * @version $Revision: 1.4 $ $Date: 1998/12/30 02:42:01 $
6  */
7 public class TextDecl {
8   private String version;
9   private String encoding;
10 
11   /**
12    * Creates a <code>TextDecl</code> from the specified byte subarray.
13    * The specified encoding is used to convert bytes to characters.
14    * The byte subarray should be a <code>TOK_XML_DECL</code> token
15    * returned from Encoding.tokenizeProlog or Encoding.tokenizeContent,
16    * starting with <code>&lt;?</code> and ending with <code>?&gt;</code>.
17    * @exception InvalidTokenException if the specified byte subarray
18    * is not a legal XML TextDecl.
19    */
TextDecl(Encoding enc, byte[] buf, int off, int end)20   public TextDecl(Encoding enc, byte[] buf, int off, int end)
21        throws InvalidTokenException {
22     init(false, enc, buf, off, end);
23   }
24 
25   /**
26    * Return the encoding specified in the declaration, or null
27    * if no encoding was specified.
28    */
getEncoding()29   public String getEncoding() {
30     return encoding;
31   }
32 
33   /**
34    * Return the version specified in the declaration, or null
35    * if no version was specified.
36    */
getVersion()37   public String getVersion() {
38     return version;
39   }
40 
TextDecl()41   TextDecl() { }
42 
init(boolean isXmlDecl, Encoding enc, byte[] buf, int off, int end)43   boolean init(boolean isXmlDecl, Encoding enc, byte[] buf, int off, int end) throws InvalidTokenException {
44     // Skip <?xml
45     off += enc.getMinBytesPerChar()*5;
46     // Skip ?>
47     end -= enc.getMinBytesPerChar()*2;
48     ContentToken ct = new ContentToken();
49     int firstErrorIndex = -1;
50     try {
51       parsePseudoAttributes(enc, buf, off, end, ct);
52     }
53     catch (InvalidTokenException e) {
54       firstErrorIndex = e.getOffset();
55     }
56     int nAtts = ct.getAttributeSpecifiedCount();
57     if (nAtts == 0) {
58       if (firstErrorIndex == -1)
59 	firstErrorIndex = end;
60       throw new InvalidTokenException(firstErrorIndex);
61     }
62     String[] names = new String[nAtts];
63     String[] values = new String[nAtts];
64     char[] cbuf = new char[32];
65     for (int i = 0; i < nAtts; i++) {
66       int s = ct.getAttributeNameStart(i);
67       int e = ct.getAttributeNameEnd(i);
68       if (e - s > cbuf.length)
69 	cbuf = new char[e - s];
70       names[i] = new String(cbuf, 0, enc.convert(buf, s, e, cbuf, 0));
71       s = ct.getAttributeValueStart(i);
72       e = ct.getAttributeValueEnd(i);
73       if (e - s > cbuf.length)
74 	cbuf = new char[e - s];
75       values[i] = new String(cbuf, 0, enc.convert(buf, s, e, cbuf, 0));
76     }
77     int att = 0;
78     if (names[0].equals("version")) {
79       version = values[0];
80       att++;
81     }
82     if ((att == 1 || !isXmlDecl)
83 	&& att < nAtts && names[att].equals("encoding")) {
84       encoding = values[att];
85       if (values[att].length() == 0
86 	  || !Character.isLetter(values[att].charAt(0))
87 	  || values[att].indexOf(':') >= 0) {
88 	int k = ct.getAttributeValueStart(att);
89 	if (firstErrorIndex == -1 || k < firstErrorIndex)
90 	  firstErrorIndex = k;
91       }
92       att++;
93     }
94     else if (!isXmlDecl)
95       firstErrorIndex = 0;	// encoding is required in a TextDecl
96     boolean standalone = false;
97     if (isXmlDecl && att > 0 && att < nAtts
98 	&& names[att].equals("standalone")) {
99       if (values[att].equals("yes"))
100 	standalone = true;
101       else if (!values[att].equals("no")) {
102 	int k = ct.getAttributeValueStart(att);
103 	if (firstErrorIndex == -1 || k < firstErrorIndex)
104 	  firstErrorIndex = k;
105       }
106       att++;
107     }
108     if (att < nAtts) {
109       int k = ct.getAttributeNameStart(att);
110       if (firstErrorIndex == -1 || k < firstErrorIndex)
111 	firstErrorIndex = k;
112     }
113     if (firstErrorIndex != -1)
114       throw new InvalidTokenException(firstErrorIndex);
115     return standalone;
116   }
117 
118   private final
parsePseudoAttributes(Encoding enc, byte[] buf, int off, int end, ContentToken ct)119   void parsePseudoAttributes(Encoding enc, byte[] buf, int off, int end,
120 			     ContentToken ct) throws InvalidTokenException {
121     final int minBPC = enc.getMinBytesPerChar();
122     for (;;) {
123       off = skipWS(enc, buf, off, end);
124       if (off == end)
125 	break;
126       int nameStart = off;
127       int nameEnd;
128     nameLoop:
129       for (;;) {
130 	switch (enc.byteType(buf, off)) {
131 	case Encoding.BT_NMSTRT:
132 	  break;
133 	case Encoding.BT_EQUALS:
134 	  nameEnd = off;
135 	  break nameLoop;
136 	case Encoding.BT_S:
137 	case Encoding.BT_LF:
138 	case Encoding.BT_CR:
139 	  nameEnd = off;
140 	  off += minBPC;
141 	  off = skipWS(enc, buf, off, end);
142 	  if (off == end || !enc.charMatches(buf, off, '='))
143 	    throw new InvalidTokenException(off);
144 	  break nameLoop;
145 	default:
146 	  throw new InvalidTokenException(off);
147 	}
148 	off += minBPC;
149 	if (off == end)
150 	  throw new InvalidTokenException(off);
151       }
152       off += minBPC;
153       off = skipWS(enc, buf, off, end);
154       if (off == end || !(enc.charMatches(buf, off, '\'')
155 			  || enc.charMatches(buf, off, '"')))
156 	throw new InvalidTokenException(off);
157       off += minBPC;
158       int valueStart = off;
159     valueLoop:
160       for (;;) {
161 	if (off == end)
162 	  throw new InvalidTokenException(off);
163 	switch (enc.byteType(buf, off)) {
164 	case Encoding.BT_NMSTRT:
165 	case Encoding.BT_NAME:
166 	case Encoding.BT_MINUS:
167 	  if ((enc.byteToAscii(buf, off) & ~0x7F) != 0)
168 	    throw new InvalidTokenException(off);
169 	  off += minBPC;
170 	  break;
171 	case Encoding.BT_QUOT:
172 	case Encoding.BT_APOS:
173 	  if (enc.byteType(buf, off) != enc.byteType(buf, valueStart - minBPC))
174 	    throw new InvalidTokenException(off);
175 	  break valueLoop;
176 	default:
177 	  throw new InvalidTokenException(off);
178 	}
179       }
180       ct.appendAttribute(nameStart, nameEnd, valueStart, off, true);
181       off += minBPC;
182       if (off == end)
183 	break;
184       switch (enc.byteType(buf, off)) {
185       case Encoding.BT_S:
186       case Encoding.BT_LF:
187       case Encoding.BT_CR:
188 	off += minBPC;
189 	break;
190       default:
191 	throw new InvalidTokenException(off);
192       }
193     }
194   }
195 
skipWS(Encoding enc, byte[] buf, int off, int end)196   private int skipWS(Encoding enc, byte[] buf, int off, int end) {
197   loop:
198     while (off != end) {
199       switch (enc.byteType(buf, off)) {
200       case Encoding.BT_S:
201       case Encoding.BT_LF:
202       case Encoding.BT_CR:
203 	off += enc.getMinBytesPerChar();
204 	break;
205       default:
206 	break loop;
207       }
208     }
209     return off;
210   }
211 }
212