1 package com.jclark.xml.tok; 2 3 /** 4 * An XML TextDecl. 5 * @version $Revision: 1.4 $ $Date: 1998/12/30 02:42:01 $ 6 */ 7 public class TextDecl { 8 private String version; 9 private String encoding; 10 11 /** 12 * Creates a <code>TextDecl</code> from the specified byte subarray. 13 * The specified encoding is used to convert bytes to characters. 14 * The byte subarray should be a <code>TOK_XML_DECL</code> token 15 * returned from Encoding.tokenizeProlog or Encoding.tokenizeContent, 16 * starting with <code><?</code> and ending with <code>?></code>. 17 * @exception InvalidTokenException if the specified byte subarray 18 * is not a legal XML TextDecl. 19 */ TextDecl(Encoding enc, byte[] buf, int off, int end)20 public TextDecl(Encoding enc, byte[] buf, int off, int end) 21 throws InvalidTokenException { 22 init(false, enc, buf, off, end); 23 } 24 25 /** 26 * Return the encoding specified in the declaration, or null 27 * if no encoding was specified. 28 */ getEncoding()29 public String getEncoding() { 30 return encoding; 31 } 32 33 /** 34 * Return the version specified in the declaration, or null 35 * if no version was specified. 36 */ getVersion()37 public String getVersion() { 38 return version; 39 } 40 TextDecl()41 TextDecl() { } 42 init(boolean isXmlDecl, Encoding enc, byte[] buf, int off, int end)43 boolean init(boolean isXmlDecl, Encoding enc, byte[] buf, int off, int end) throws InvalidTokenException { 44 // Skip <?xml 45 off += enc.getMinBytesPerChar()*5; 46 // Skip ?> 47 end -= enc.getMinBytesPerChar()*2; 48 ContentToken ct = new ContentToken(); 49 int firstErrorIndex = -1; 50 try { 51 parsePseudoAttributes(enc, buf, off, end, ct); 52 } 53 catch (InvalidTokenException e) { 54 firstErrorIndex = e.getOffset(); 55 } 56 int nAtts = ct.getAttributeSpecifiedCount(); 57 if (nAtts == 0) { 58 if (firstErrorIndex == -1) 59 firstErrorIndex = end; 60 throw new InvalidTokenException(firstErrorIndex); 61 } 62 String[] names = new String[nAtts]; 63 String[] values = new String[nAtts]; 64 char[] cbuf = new char[32]; 65 for (int i = 0; i < nAtts; i++) { 66 int s = ct.getAttributeNameStart(i); 67 int e = ct.getAttributeNameEnd(i); 68 if (e - s > cbuf.length) 69 cbuf = new char[e - s]; 70 names[i] = new String(cbuf, 0, enc.convert(buf, s, e, cbuf, 0)); 71 s = ct.getAttributeValueStart(i); 72 e = ct.getAttributeValueEnd(i); 73 if (e - s > cbuf.length) 74 cbuf = new char[e - s]; 75 values[i] = new String(cbuf, 0, enc.convert(buf, s, e, cbuf, 0)); 76 } 77 int att = 0; 78 if (names[0].equals("version")) { 79 version = values[0]; 80 att++; 81 } 82 if ((att == 1 || !isXmlDecl) 83 && att < nAtts && names[att].equals("encoding")) { 84 encoding = values[att]; 85 if (values[att].length() == 0 86 || !Character.isLetter(values[att].charAt(0)) 87 || values[att].indexOf(':') >= 0) { 88 int k = ct.getAttributeValueStart(att); 89 if (firstErrorIndex == -1 || k < firstErrorIndex) 90 firstErrorIndex = k; 91 } 92 att++; 93 } 94 else if (!isXmlDecl) 95 firstErrorIndex = 0; // encoding is required in a TextDecl 96 boolean standalone = false; 97 if (isXmlDecl && att > 0 && att < nAtts 98 && names[att].equals("standalone")) { 99 if (values[att].equals("yes")) 100 standalone = true; 101 else if (!values[att].equals("no")) { 102 int k = ct.getAttributeValueStart(att); 103 if (firstErrorIndex == -1 || k < firstErrorIndex) 104 firstErrorIndex = k; 105 } 106 att++; 107 } 108 if (att < nAtts) { 109 int k = ct.getAttributeNameStart(att); 110 if (firstErrorIndex == -1 || k < firstErrorIndex) 111 firstErrorIndex = k; 112 } 113 if (firstErrorIndex != -1) 114 throw new InvalidTokenException(firstErrorIndex); 115 return standalone; 116 } 117 118 private final parsePseudoAttributes(Encoding enc, byte[] buf, int off, int end, ContentToken ct)119 void parsePseudoAttributes(Encoding enc, byte[] buf, int off, int end, 120 ContentToken ct) throws InvalidTokenException { 121 final int minBPC = enc.getMinBytesPerChar(); 122 for (;;) { 123 off = skipWS(enc, buf, off, end); 124 if (off == end) 125 break; 126 int nameStart = off; 127 int nameEnd; 128 nameLoop: 129 for (;;) { 130 switch (enc.byteType(buf, off)) { 131 case Encoding.BT_NMSTRT: 132 break; 133 case Encoding.BT_EQUALS: 134 nameEnd = off; 135 break nameLoop; 136 case Encoding.BT_S: 137 case Encoding.BT_LF: 138 case Encoding.BT_CR: 139 nameEnd = off; 140 off += minBPC; 141 off = skipWS(enc, buf, off, end); 142 if (off == end || !enc.charMatches(buf, off, '=')) 143 throw new InvalidTokenException(off); 144 break nameLoop; 145 default: 146 throw new InvalidTokenException(off); 147 } 148 off += minBPC; 149 if (off == end) 150 throw new InvalidTokenException(off); 151 } 152 off += minBPC; 153 off = skipWS(enc, buf, off, end); 154 if (off == end || !(enc.charMatches(buf, off, '\'') 155 || enc.charMatches(buf, off, '"'))) 156 throw new InvalidTokenException(off); 157 off += minBPC; 158 int valueStart = off; 159 valueLoop: 160 for (;;) { 161 if (off == end) 162 throw new InvalidTokenException(off); 163 switch (enc.byteType(buf, off)) { 164 case Encoding.BT_NMSTRT: 165 case Encoding.BT_NAME: 166 case Encoding.BT_MINUS: 167 if ((enc.byteToAscii(buf, off) & ~0x7F) != 0) 168 throw new InvalidTokenException(off); 169 off += minBPC; 170 break; 171 case Encoding.BT_QUOT: 172 case Encoding.BT_APOS: 173 if (enc.byteType(buf, off) != enc.byteType(buf, valueStart - minBPC)) 174 throw new InvalidTokenException(off); 175 break valueLoop; 176 default: 177 throw new InvalidTokenException(off); 178 } 179 } 180 ct.appendAttribute(nameStart, nameEnd, valueStart, off, true); 181 off += minBPC; 182 if (off == end) 183 break; 184 switch (enc.byteType(buf, off)) { 185 case Encoding.BT_S: 186 case Encoding.BT_LF: 187 case Encoding.BT_CR: 188 off += minBPC; 189 break; 190 default: 191 throw new InvalidTokenException(off); 192 } 193 } 194 } 195 skipWS(Encoding enc, byte[] buf, int off, int end)196 private int skipWS(Encoding enc, byte[] buf, int off, int end) { 197 loop: 198 while (off != end) { 199 switch (enc.byteType(buf, off)) { 200 case Encoding.BT_S: 201 case Encoding.BT_LF: 202 case Encoding.BT_CR: 203 off += enc.getMinBytesPerChar(); 204 break; 205 default: 206 break loop; 207 } 208 } 209 return off; 210 } 211 } 212