1 // Mozilla has modified this file - see https://hg.mozilla.org/ for details. 2 /* 3 * Licensed to the Apache Software Foundation (ASF) under one or more 4 * contributor license agreements. See the NOTICE file distributed with 5 * this work for additional information regarding copyright ownership. 6 * The ASF licenses this file to You under the Apache License, Version 2.0 7 * (the "License"); you may not use this file except in compliance with 8 * the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package org.mozilla.apache.commons.codec.binary; 20 21 import org.mozilla.apache.commons.codec.BinaryDecoder; 22 import org.mozilla.apache.commons.codec.BinaryEncoder; 23 import org.mozilla.apache.commons.codec.DecoderException; 24 import org.mozilla.apache.commons.codec.EncoderException; 25 26 /** 27 * Abstract superclass for Base-N encoders and decoders. 28 * 29 * <p> 30 * This class is not thread-safe. 31 * Each thread should use its own instance. 32 * </p> 33 */ 34 public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { 35 36 /** 37 * MIME chunk size per RFC 2045 section 6.8. 38 * 39 * <p> 40 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 41 * equal signs. 42 * </p> 43 * 44 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 45 */ 46 public static final int MIME_CHUNK_SIZE = 76; 47 48 /** 49 * PEM chunk size per RFC 1421 section 4.3.2.4. 50 * 51 * <p> 52 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 53 * equal signs. 54 * </p> 55 * 56 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> 57 */ 58 public static final int PEM_CHUNK_SIZE = 64; 59 60 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 61 62 /** 63 * Defines the default buffer size - currently {@value} 64 * - must be large enough for at least one encoded block+separator 65 */ 66 private static final int DEFAULT_BUFFER_SIZE = 8192; 67 68 /** Mask used to extract 8 bits, used in decoding bytes */ 69 protected static final int MASK_8BITS = 0xff; 70 71 /** 72 * Byte used to pad output. 73 */ 74 protected static final byte PAD_DEFAULT = '='; // Allow static access to default 75 76 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later 77 78 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ 79 private final int unencodedBlockSize; 80 81 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ 82 private final int encodedBlockSize; 83 84 /** 85 * Chunksize for encoding. Not used when decoding. 86 * A value of zero or less implies no chunking of the encoded data. 87 * Rounded down to nearest multiple of encodedBlockSize. 88 */ 89 protected final int lineLength; 90 91 /** 92 * Size of chunk separator. Not used unless {@link #lineLength} > 0. 93 */ 94 private final int chunkSeparatorLength; 95 96 /** 97 * Buffer for streaming. 98 */ 99 protected byte[] buffer; 100 101 /** 102 * Position where next character should be written in the buffer. 103 */ 104 protected int pos; 105 106 /** 107 * Position where next character should be read from the buffer. 108 */ 109 private int readPos; 110 111 /** 112 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, 113 * and must be thrown away. 114 */ 115 protected boolean eof; 116 117 /** 118 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use it to 119 * make sure each encoded line never goes beyond lineLength (if lineLength > 0). 120 */ 121 protected int currentLinePos; 122 123 /** 124 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. 125 * This variable helps track that. 126 */ 127 protected int modulus; 128 129 /** 130 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} 131 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. 132 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) 133 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) 134 * @param lineLength if > 0, use chunking with a length <code>lineLength</code> 135 * @param chunkSeparatorLength the chunk separator length, if relevant 136 */ BaseNCodec(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength)137 protected BaseNCodec(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength){ 138 this.unencodedBlockSize = unencodedBlockSize; 139 this.encodedBlockSize = encodedBlockSize; 140 this.lineLength = (lineLength > 0 && chunkSeparatorLength > 0) ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; 141 this.chunkSeparatorLength = chunkSeparatorLength; 142 } 143 144 /** 145 * Returns true if this object has buffered data for reading. 146 * 147 * @return true if there is data still available for reading. 148 */ hasData()149 boolean hasData() { // package protected for access from I/O streams 150 return this.buffer != null; 151 } 152 153 /** 154 * Returns the amount of buffered data available for reading. 155 * 156 * @return The amount of buffered data available for reading. 157 */ available()158 int available() { // package protected for access from I/O streams 159 return buffer != null ? pos - readPos : 0; 160 } 161 162 /** 163 * Get the default buffer size. Can be overridden. 164 * 165 * @return {@link #DEFAULT_BUFFER_SIZE} 166 */ getDefaultBufferSize()167 protected int getDefaultBufferSize() { 168 return DEFAULT_BUFFER_SIZE; 169 } 170 171 /** Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. */ resizeBuffer()172 private void resizeBuffer() { 173 if (buffer == null) { 174 buffer = new byte[getDefaultBufferSize()]; 175 pos = 0; 176 readPos = 0; 177 } else { 178 byte[] b = new byte[buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; 179 System.arraycopy(buffer, 0, b, 0, buffer.length); 180 buffer = b; 181 } 182 } 183 184 /** 185 * Ensure that the buffer has room for <code>size</code> bytes 186 * 187 * @param size minimum spare space required 188 */ ensureBufferSize(int size)189 protected void ensureBufferSize(int size){ 190 if ((buffer == null) || (buffer.length < pos + size)){ 191 resizeBuffer(); 192 } 193 } 194 195 /** 196 * Extracts buffered data into the provided byte[] array, starting at position bPos, 197 * up to a maximum of bAvail bytes. Returns how many bytes were actually extracted. 198 * 199 * @param b 200 * byte[] array to extract the buffered data into. 201 * @param bPos 202 * position in byte[] array to start extraction at. 203 * @param bAvail 204 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). 205 * @return The number of bytes successfully extracted into the provided byte[] array. 206 */ readResults(byte[] b, int bPos, int bAvail)207 int readResults(byte[] b, int bPos, int bAvail) { // package protected for access from I/O streams 208 if (buffer != null) { 209 int len = Math.min(available(), bAvail); 210 System.arraycopy(buffer, readPos, b, bPos, len); 211 readPos += len; 212 if (readPos >= pos) { 213 buffer = null; // so hasData() will return false, and this method can return -1 214 } 215 return len; 216 } 217 return eof ? -1 : 0; 218 } 219 220 /** 221 * Checks if a byte value is whitespace or not. 222 * Whitespace is taken to mean: space, tab, CR, LF 223 * @param byteToCheck 224 * the byte to check 225 * @return true if byte is whitespace, false otherwise 226 */ isWhiteSpace(byte byteToCheck)227 protected static boolean isWhiteSpace(byte byteToCheck) { 228 switch (byteToCheck) { 229 case ' ' : 230 case '\n' : 231 case '\r' : 232 case '\t' : 233 return true; 234 default : 235 return false; 236 } 237 } 238 239 /** 240 * Resets this object to its initial newly constructed state. 241 */ reset()242 private void reset() { 243 buffer = null; 244 pos = 0; 245 readPos = 0; 246 currentLinePos = 0; 247 modulus = 0; 248 eof = false; 249 } 250 251 /** 252 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of the 253 * Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. 254 * 255 * @param pObject 256 * Object to encode 257 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. 258 * @throws EncoderException 259 * if the parameter supplied is not of type byte[] 260 */ encode(Object pObject)261 public Object encode(Object pObject) throws EncoderException { 262 if (!(pObject instanceof byte[])) { 263 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); 264 } 265 return encode((byte[]) pObject); 266 } 267 268 /** 269 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. 270 * 271 * @param pArray 272 * a byte array containing binary data 273 * @return A String containing only Base-N character data 274 */ encodeToString(byte[] pArray)275 public String encodeToString(byte[] pArray) { 276 return StringUtils.newStringUtf8(encode(pArray)); 277 } 278 279 /** 280 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of the 281 * Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. 282 * 283 * @param pObject 284 * Object to decode 285 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String supplied. 286 * @throws DecoderException 287 * if the parameter supplied is not of type byte[] 288 */ decode(Object pObject)289 public Object decode(Object pObject) throws DecoderException { 290 if (pObject instanceof byte[]) { 291 return decode((byte[]) pObject); 292 } else if (pObject instanceof String) { 293 return decode((String) pObject); 294 } else { 295 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); 296 } 297 } 298 299 /** 300 * Decodes a String containing characters in the Base-N alphabet. 301 * 302 * @param pArray 303 * A String containing Base-N character data 304 * @return a byte array containing binary data 305 */ decode(String pArray)306 public byte[] decode(String pArray) { 307 return decode(StringUtils.getBytesUtf8(pArray)); 308 } 309 310 /** 311 * Decodes a byte[] containing characters in the Base-N alphabet. 312 * 313 * @param pArray 314 * A byte array containing Base-N character data 315 * @return a byte array containing binary data 316 */ decode(byte[] pArray)317 public byte[] decode(byte[] pArray) { 318 reset(); 319 if (pArray == null || pArray.length == 0) { 320 return pArray; 321 } 322 decode(pArray, 0, pArray.length); 323 decode(pArray, 0, -1); // Notify decoder of EOF. 324 byte[] result = new byte[pos]; 325 readResults(result, 0, result.length); 326 return result; 327 } 328 329 /** 330 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. 331 * 332 * @param pArray 333 * a byte array containing binary data 334 * @return A byte array containing only the basen alphabetic character data 335 */ encode(byte[] pArray)336 public byte[] encode(byte[] pArray) { 337 reset(); 338 if (pArray == null || pArray.length == 0) { 339 return pArray; 340 } 341 encode(pArray, 0, pArray.length); 342 encode(pArray, 0, -1); // Notify encoder of EOF. 343 byte[] buf = new byte[pos - readPos]; 344 readResults(buf, 0, buf.length); 345 return buf; 346 } 347 348 /** 349 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. 350 * Uses UTF8 encoding. 351 * 352 * @param pArray a byte array containing binary data 353 * @return String containing only character data in the appropriate alphabet. 354 */ encodeAsString(byte[] pArray)355 public String encodeAsString(byte[] pArray){ 356 return StringUtils.newStringUtf8(encode(pArray)); 357 } 358 encode(byte[] pArray, int i, int length)359 abstract void encode(byte[] pArray, int i, int length); // package protected for access from I/O streams 360 decode(byte[] pArray, int i, int length)361 abstract void decode(byte[] pArray, int i, int length); // package protected for access from I/O streams 362 363 /** 364 * Returns whether or not the <code>octet</code> is in the current alphabet. 365 * Does not allow whitespace or pad. 366 * 367 * @param value The value to test 368 * 369 * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise. 370 */ isInAlphabet(byte value)371 protected abstract boolean isInAlphabet(byte value); 372 373 /** 374 * Tests a given byte array to see if it contains only valid characters within the alphabet. 375 * The method optionally treats whitespace and pad as valid. 376 * 377 * @param arrayOctet byte array to test 378 * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed 379 * 380 * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty; 381 * <code>false</code>, otherwise 382 */ isInAlphabet(byte[] arrayOctet, boolean allowWSPad)383 public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) { 384 for (int i = 0; i < arrayOctet.length; i++) { 385 if (!isInAlphabet(arrayOctet[i]) && 386 (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) { 387 return false; 388 } 389 } 390 return true; 391 } 392 393 /** 394 * Tests a given String to see if it contains only valid characters within the alphabet. 395 * The method treats whitespace and PAD as valid. 396 * 397 * @param basen String to test 398 * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if 399 * the String is empty; <code>false</code>, otherwise 400 * @see #isInAlphabet(byte[], boolean) 401 */ isInAlphabet(String basen)402 public boolean isInAlphabet(String basen) { 403 return isInAlphabet(StringUtils.getBytesUtf8(basen), true); 404 } 405 406 /** 407 * Tests a given byte array to see if it contains any characters within the alphabet or PAD. 408 * 409 * Intended for use in checking line-ending arrays 410 * 411 * @param arrayOctet 412 * byte array to test 413 * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise 414 */ containsAlphabetOrPad(byte[] arrayOctet)415 protected boolean containsAlphabetOrPad(byte[] arrayOctet) { 416 if (arrayOctet == null) { 417 return false; 418 } 419 for (int i = 0; i < arrayOctet.length; i++) { 420 if (PAD == arrayOctet[i] || isInAlphabet(arrayOctet[i])) { 421 return true; 422 } 423 } 424 return false; 425 } 426 427 /** 428 * Calculates the amount of space needed to encode the supplied array. 429 * 430 * @param pArray byte[] array which will later be encoded 431 * 432 * @return amount of space needed to encoded the supplied array. 433 * Returns a long since a max-len array will require > Integer.MAX_VALUE 434 */ getEncodedLength(byte[] pArray)435 public long getEncodedLength(byte[] pArray) { 436 // Calculate non-chunked size - rounded up to allow for padding 437 // cast to long is needed to avoid possibility of overflow 438 long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize; 439 if (lineLength > 0) { // We're using chunking 440 // Round up to nearest multiple 441 len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength; 442 } 443 return len; 444 } 445 } 446