1 // Mozilla has modified this file - see https://hg.mozilla.org/ for details. 2 /* 3 * Licensed to the Apache Software Foundation (ASF) under one or more 4 * contributor license agreements. See the NOTICE file distributed with 5 * this work for additional information regarding copyright ownership. 6 * The ASF licenses this file to You under the Apache License, Version 2.0 7 * (the "License"); you may not use this file except in compliance with 8 * the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package org.mozilla.apache.commons.codec.binary; 20 21 /** 22 * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>. 23 * 24 * <p> 25 * The class can be parameterized in the following manner with various constructors: 26 * <ul> 27 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li> 28 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of 29 * 8 in the encoded data. 30 * <li>Line separator: Default is CRLF ("\r\n")</li> 31 * </ul> 32 * </p> 33 * <p> 34 * This class operates directly on byte streams, and not character streams. 35 * </p> 36 * <p> 37 * This class is not thread-safe. Each thread should use its own instance. 38 * </p> 39 * 40 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> 41 * 42 * @since 1.5 43 * @version $Revision: 1080712 $ 44 */ 45 public class Base32 extends BaseNCodec { 46 47 /** 48 * BASE32 characters are 5 bits in length. 49 * They are formed by taking a block of five octets to form a 40-bit string, 50 * which is converted into eight BASE32 characters. 51 */ 52 private static final int BITS_PER_ENCODED_BYTE = 5; 53 private static final int BYTES_PER_ENCODED_BLOCK = 8; 54 private static final int BYTES_PER_UNENCODED_BLOCK = 5; 55 56 /** 57 * Chunk separator per RFC 2045 section 2.1. 58 * 59 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 60 */ 61 private static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; 62 63 /** 64 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in 65 * Table 3 of RFC 2045) into their 5-bit positive integer equivalents. Characters that are not in the Base32 66 * alphabet but fall within the bounds of the array are translated to -1. 67 * 68 */ 69 private static final byte[] DECODE_TABLE = { 70 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 71 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 72 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 73 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, // 20-2f 74 -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 75 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-N 76 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a O-Z 77 }; 78 79 /** 80 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" 81 * equivalents as specified in Table 3 of RFC 2045. 82 */ 83 private static final byte[] ENCODE_TABLE = { 84 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 85 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 86 '2', '3', '4', '5', '6', '7', 87 }; 88 89 /** 90 * This array is a lookup table that translates Unicode characters drawn from the "Base32 |Hex Alphabet" (as specified in 91 * Table 3 of RFC 2045) into their 5-bit positive integer equivalents. Characters that are not in the Base32 Hex 92 * alphabet but fall within the bounds of the array are translated to -1. 93 * 94 */ 95 private static final byte[] HEX_DECODE_TABLE = { 96 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 97 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f 98 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f 99 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, // 20-2f 100 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 101 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-N 102 25, 26, 27, 28, 29, 30, 31, 32, // 50-57 O-V 103 }; 104 105 /** 106 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Hex Alphabet" 107 * equivalents as specified in Table 3 of RFC 2045. 108 */ 109 private static final byte[] HEX_ENCODE_TABLE = { 110 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 111 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 112 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 113 }; 114 115 /** Mask used to extract 5 bits, used when encoding Base32 bytes */ 116 private static final int MASK_5BITS = 0x1f; 117 118 // The static final fields above are used for the original static byte[] methods on Base32. 119 // The private member fields below are used with the new streaming approach, which requires 120 // some state be preserved between calls of encode() and decode(). 121 122 /** 123 * Place holder for the bytes we're dealing with for our based logic. 124 * Bitwise operations store and extract the encoding or decoding from this variable. 125 */ 126 private long bitWorkArea; 127 128 /** 129 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 130 * <code>decodeSize = {@link BYTES_PER_ENCODED_BLOCK} - 1 + lineSeparator.length;</code> 131 */ 132 private final int decodeSize; 133 134 /** 135 * Decode table to use. 136 */ 137 private final byte[] decodeTable; 138 139 /** 140 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 141 * <code>encodeSize = {@link BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;</code> 142 */ 143 private final int encodeSize; 144 145 /** 146 * Encode table to use. 147 */ 148 private final byte[] encodeTable; 149 150 /** 151 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 152 */ 153 private final byte[] lineSeparator; 154 155 /** 156 * Creates a Base32 codec used for decoding and encoding. 157 * <p> 158 * When encoding the line length is 0 (no chunking). 159 * </p> 160 * 161 */ Base32()162 public Base32() { 163 this(false); 164 } 165 166 /** 167 * Creates a Base32 codec used for decoding and encoding. 168 * <p> 169 * When encoding the line length is 0 (no chunking). 170 * </p> 171 * @param useHex if <code>true</code> then use Base32 Hex alphabet 172 */ Base32(boolean useHex)173 public Base32(boolean useHex) { 174 this(0, null, useHex); 175 } 176 177 /** 178 * Creates a Base32 codec used for decoding and encoding. 179 * <p> 180 * When encoding the line length is given in the constructor, the line separator is CRLF. 181 * </p> 182 * 183 * @param lineLength 184 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 8). 185 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding. 186 */ Base32(int lineLength)187 public Base32(int lineLength) { 188 this(lineLength, CHUNK_SEPARATOR); 189 } 190 191 /** 192 * Creates a Base32 codec used for decoding and encoding. 193 * <p> 194 * When encoding the line length and line separator are given in the constructor. 195 * </p> 196 * <p> 197 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 198 * </p> 199 * 200 * @param lineLength 201 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 8). 202 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding. 203 * @param lineSeparator 204 * Each line of encoded data will end with this sequence of bytes. 205 * @throws IllegalArgumentException 206 * The provided lineSeparator included some Base32 characters. That's not going to work! 207 */ Base32(int lineLength, byte[] lineSeparator)208 public Base32(int lineLength, byte[] lineSeparator) { 209 this(lineLength, lineSeparator, false); 210 } 211 212 /** 213 * Creates a Base32 / Base32 Hex codec used for decoding and encoding. 214 * <p> 215 * When encoding the line length and line separator are given in the constructor. 216 * </p> 217 * <p> 218 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. 219 * </p> 220 * 221 * @param lineLength 222 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 8). 223 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding. 224 * @param lineSeparator 225 * Each line of encoded data will end with this sequence of bytes. 226 * @param useHex if <code>true</code>, then use Base32 Hex alphabet, otherwise use Base32 alphabet 227 * @throws IllegalArgumentException 228 * The provided lineSeparator included some Base32 characters. That's not going to work! 229 * Or the lineLength > 0 and lineSeparator is null. 230 */ Base32(int lineLength, byte[] lineSeparator, boolean useHex)231 public Base32(int lineLength, byte[] lineSeparator, boolean useHex) { 232 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 233 lineLength, 234 lineSeparator == null ? 0 : lineSeparator.length); 235 if (useHex){ 236 this.encodeTable = HEX_ENCODE_TABLE; 237 this.decodeTable = HEX_DECODE_TABLE; 238 } else { 239 this.encodeTable = ENCODE_TABLE; 240 this.decodeTable = DECODE_TABLE; 241 } 242 if (lineLength > 0) { 243 if (lineSeparator == null) { 244 throw new IllegalArgumentException("lineLength "+lineLength+" > 0, but lineSeparator is null"); 245 } 246 // Must be done after initializing the tables 247 if (containsAlphabetOrPad(lineSeparator)) { 248 String sep = StringUtils.newStringUtf8(lineSeparator); 249 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]"); 250 } 251 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; 252 this.lineSeparator = new byte[lineSeparator.length]; 253 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); 254 } else { 255 this.encodeSize = BYTES_PER_ENCODED_BLOCK; 256 this.lineSeparator = null; 257 } 258 this.decodeSize = this.encodeSize - 1; 259 } 260 261 /** 262 * <p> 263 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once 264 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" 265 * call is not necessary when decoding, but it doesn't hurt, either. 266 * </p> 267 * <p> 268 * Ignores all non-Base32 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are 269 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, 270 * garbage-out philosophy: it will not check the provided data for validity. 271 * </p> 272 * 273 * @param in 274 * byte[] array of ascii data to Base32 decode. 275 * @param inPos 276 * Position to start reading data from. 277 * @param inAvail 278 * Amount of bytes available from input for encoding. 279 * 280 * Output is written to {@link #buffer} as 8-bit octets, using {@link pos} as the buffer position 281 */ decode(byte[] in, int inPos, int inAvail)282 void decode(byte[] in, int inPos, int inAvail) { // package protected for access from I/O streams 283 if (eof) { 284 return; 285 } 286 if (inAvail < 0) { 287 eof = true; 288 } 289 for (int i = 0; i < inAvail; i++) { 290 byte b = in[inPos++]; 291 if (b == PAD) { 292 // We're done. 293 eof = true; 294 break; 295 } else { 296 ensureBufferSize(decodeSize); 297 if (b >= 0 && b < this.decodeTable.length) { 298 int result = this.decodeTable[b]; 299 if (result >= 0) { 300 modulus = (modulus+1) % BYTES_PER_ENCODED_BLOCK; 301 bitWorkArea = (bitWorkArea << BITS_PER_ENCODED_BYTE) + result; // collect decoded bytes 302 if (modulus == 0) { // we can output the 5 bytes 303 buffer[pos++] = (byte) ((bitWorkArea >> 32) & MASK_8BITS); 304 buffer[pos++] = (byte) ((bitWorkArea >> 24) & MASK_8BITS); 305 buffer[pos++] = (byte) ((bitWorkArea >> 16) & MASK_8BITS); 306 buffer[pos++] = (byte) ((bitWorkArea >> 8) & MASK_8BITS); 307 buffer[pos++] = (byte) (bitWorkArea & MASK_8BITS); 308 } 309 } 310 } 311 } 312 } 313 314 // Two forms of EOF as far as Base32 decoder is concerned: actual 315 // EOF (-1) and first time '=' character is encountered in stream. 316 // This approach makes the '=' padding characters completely optional. 317 if (eof && modulus >= 2) { // if modulus < 2, nothing to do 318 ensureBufferSize(decodeSize); 319 320 // we ignore partial bytes, i.e. only multiples of 8 count 321 switch (modulus) { 322 case 2 : // 10 bits, drop 2 and output one byte 323 buffer[pos++] = (byte) ((bitWorkArea >> 2) & MASK_8BITS); 324 break; 325 case 3 : // 15 bits, drop 7 and output 1 byte 326 buffer[pos++] = (byte) ((bitWorkArea >> 7) & MASK_8BITS); 327 break; 328 case 4 : // 20 bits = 2*8 + 4 329 bitWorkArea = bitWorkArea >> 4; // drop 4 bits 330 buffer[pos++] = (byte) ((bitWorkArea >> 8) & MASK_8BITS); 331 buffer[pos++] = (byte) ((bitWorkArea) & MASK_8BITS); 332 break; 333 case 5 : // 25bits = 3*8 + 1 334 bitWorkArea = bitWorkArea >> 1; 335 buffer[pos++] = (byte) ((bitWorkArea >> 16) & MASK_8BITS); 336 buffer[pos++] = (byte) ((bitWorkArea >> 8) & MASK_8BITS); 337 buffer[pos++] = (byte) ((bitWorkArea) & MASK_8BITS); 338 break; 339 case 6 : // 30bits = 3*8 + 6 340 bitWorkArea = bitWorkArea >> 6; 341 buffer[pos++] = (byte) ((bitWorkArea >> 16) & MASK_8BITS); 342 buffer[pos++] = (byte) ((bitWorkArea >> 8) & MASK_8BITS); 343 buffer[pos++] = (byte) ((bitWorkArea) & MASK_8BITS); 344 break; 345 case 7 : // 35 = 4*8 +3 346 bitWorkArea = bitWorkArea >> 3; 347 buffer[pos++] = (byte) ((bitWorkArea >> 24) & MASK_8BITS); 348 buffer[pos++] = (byte) ((bitWorkArea >> 16) & MASK_8BITS); 349 buffer[pos++] = (byte) ((bitWorkArea >> 8) & MASK_8BITS); 350 buffer[pos++] = (byte) ((bitWorkArea) & MASK_8BITS); 351 break; 352 } 353 } 354 } 355 356 /** 357 * <p> 358 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with 359 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last 360 * remaining bytes (if not multiple of 5). 361 * </p> 362 * 363 * @param in 364 * byte[] array of binary data to Base32 encode. 365 * @param inPos 366 * Position to start reading data from. 367 * @param inAvail 368 * Amount of bytes available from input for encoding. 369 */ encode(byte[] in, int inPos, int inAvail)370 void encode(byte[] in, int inPos, int inAvail) { // package protected for access from I/O streams 371 if (eof) { 372 return; 373 } 374 // inAvail < 0 is how we're informed of EOF in the underlying data we're 375 // encoding. 376 if (inAvail < 0) { 377 eof = true; 378 if (0 == modulus && lineLength == 0) { 379 return; // no leftovers to process and not using chunking 380 } 381 ensureBufferSize(encodeSize); 382 int savedPos = pos; 383 switch (modulus) { // % 5 384 case 1 : // Only 1 octet; take top 5 bits then remainder 385 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3 386 buffer[pos++] = encodeTable[(int)(bitWorkArea << 2) & MASK_5BITS]; // 5-3=2 387 buffer[pos++] = PAD; 388 buffer[pos++] = PAD; 389 buffer[pos++] = PAD; 390 buffer[pos++] = PAD; 391 buffer[pos++] = PAD; 392 buffer[pos++] = PAD; 393 break; 394 395 case 2 : // 2 octets = 16 bits to use 396 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11 397 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6 398 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1 399 buffer[pos++] = encodeTable[(int)(bitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4 400 buffer[pos++] = PAD; 401 buffer[pos++] = PAD; 402 buffer[pos++] = PAD; 403 buffer[pos++] = PAD; 404 break; 405 case 3 : // 3 octets = 24 bits to use 406 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19 407 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14 408 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9 409 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4 410 buffer[pos++] = encodeTable[(int)(bitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1 411 buffer[pos++] = PAD; 412 buffer[pos++] = PAD; 413 buffer[pos++] = PAD; 414 break; 415 case 4 : // 4 octets = 32 bits to use 416 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27 417 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22 418 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17 419 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12 420 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7 421 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2 422 buffer[pos++] = encodeTable[(int)(bitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3 423 buffer[pos++] = PAD; 424 break; 425 } 426 currentLinePos += pos - savedPos; // keep track of current line position 427 // if currentPos == 0 we are at the start of a line, so don't add CRLF 428 if (lineLength > 0 && currentLinePos > 0){ // add chunk separator if required 429 System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length); 430 pos += lineSeparator.length; 431 } 432 } else { 433 for (int i = 0; i < inAvail; i++) { 434 ensureBufferSize(encodeSize); 435 modulus = (modulus+1) % BYTES_PER_UNENCODED_BLOCK; 436 int b = in[inPos++]; 437 if (b < 0) { 438 b += 256; 439 } 440 bitWorkArea = (bitWorkArea << 8) + b; // BITS_PER_BYTE 441 if (0 == modulus) { // we have enough bytes to create our output 442 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 35) & MASK_5BITS]; 443 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 30) & MASK_5BITS]; 444 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 25) & MASK_5BITS]; 445 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 20) & MASK_5BITS]; 446 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 15) & MASK_5BITS]; 447 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 10) & MASK_5BITS]; 448 buffer[pos++] = encodeTable[(int)(bitWorkArea >> 5) & MASK_5BITS]; 449 buffer[pos++] = encodeTable[(int)bitWorkArea & MASK_5BITS]; 450 currentLinePos += BYTES_PER_ENCODED_BLOCK; 451 if (lineLength > 0 && lineLength <= currentLinePos) { 452 System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length); 453 pos += lineSeparator.length; 454 currentLinePos = 0; 455 } 456 } 457 } 458 } 459 } 460 461 /** 462 * Returns whether or not the <code>octet</code> is in the Base32 alphabet. 463 * 464 * @param octet 465 * The value to test 466 * @return <code>true</code> if the value is defined in the the Base32 alphabet <code>false</code> otherwise. 467 */ isInAlphabet(byte octet)468 public boolean isInAlphabet(byte octet) { 469 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; 470 } 471 } 472