1 // Mozilla has modified this file - see https://hg.mozilla.org/ for details.
2 /*
3  * Licensed to the Apache Software Foundation (ASF) under one or more
4  * contributor license agreements.  See the NOTICE file distributed with
5  * this work for additional information regarding copyright ownership.
6  * The ASF licenses this file to You under the Apache License, Version 2.0
7  * (the "License"); you may not use this file except in compliance with
8  * the License.  You may obtain a copy of the License at
9  *
10  *      http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 package org.mozilla.apache.commons.codec.binary;
20 
21 import org.mozilla.apache.commons.codec.BinaryDecoder;
22 import org.mozilla.apache.commons.codec.BinaryEncoder;
23 import org.mozilla.apache.commons.codec.DecoderException;
24 import org.mozilla.apache.commons.codec.EncoderException;
25 
26 /**
27  * Abstract superclass for Base-N encoders and decoders.
28  *
29  * <p>
30  * This class is not thread-safe.
31  * Each thread should use its own instance.
32  * </p>
33  */
34 public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
35 
36     /**
37      *  MIME chunk size per RFC 2045 section 6.8.
38      *
39      * <p>
40      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
41      * equal signs.
42      * </p>
43      *
44      * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
45      */
46     public static final int MIME_CHUNK_SIZE = 76;
47 
48     /**
49      * PEM chunk size per RFC 1421 section 4.3.2.4.
50      *
51      * <p>
52      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
53      * equal signs.
54      * </p>
55      *
56      * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
57      */
58     public static final int PEM_CHUNK_SIZE = 64;
59 
60     private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
61 
62     /**
63      * Defines the default buffer size - currently {@value}
64      * - must be large enough for at least one encoded block+separator
65      */
66     private static final int DEFAULT_BUFFER_SIZE = 8192;
67 
68     /** Mask used to extract 8 bits, used in decoding bytes */
69     protected static final int MASK_8BITS = 0xff;
70 
71     /**
72      * Byte used to pad output.
73      */
74     protected static final byte PAD_DEFAULT = '='; // Allow static access to default
75 
76     protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
77 
78     /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
79     private final int unencodedBlockSize;
80 
81     /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
82     private final int encodedBlockSize;
83 
84     /**
85      * Chunksize for encoding. Not used when decoding.
86      * A value of zero or less implies no chunking of the encoded data.
87      * Rounded down to nearest multiple of encodedBlockSize.
88      */
89     protected final int lineLength;
90 
91     /**
92      * Size of chunk separator. Not used unless {@link #lineLength} > 0.
93      */
94     private final int chunkSeparatorLength;
95 
96     /**
97      * Buffer for streaming.
98      */
99     protected byte[] buffer;
100 
101     /**
102      * Position where next character should be written in the buffer.
103      */
104     protected int pos;
105 
106     /**
107      * Position where next character should be read from the buffer.
108      */
109     private int readPos;
110 
111     /**
112      * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
113      * and must be thrown away.
114      */
115     protected boolean eof;
116 
117     /**
118      * Variable tracks how many characters have been written to the current line. Only used when encoding. We use it to
119      * make sure each encoded line never goes beyond lineLength (if lineLength > 0).
120      */
121     protected int currentLinePos;
122 
123     /**
124      * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding.
125      * This variable helps track that.
126      */
127     protected int modulus;
128 
129     /**
130      * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
131      * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
132      * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
133      * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
134      * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
135      * @param chunkSeparatorLength the chunk separator length, if relevant
136      */
BaseNCodec(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength)137     protected BaseNCodec(int unencodedBlockSize, int encodedBlockSize, int lineLength, int chunkSeparatorLength){
138         this.unencodedBlockSize = unencodedBlockSize;
139         this.encodedBlockSize = encodedBlockSize;
140         this.lineLength = (lineLength > 0  && chunkSeparatorLength > 0) ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
141         this.chunkSeparatorLength = chunkSeparatorLength;
142     }
143 
144     /**
145      * Returns true if this object has buffered data for reading.
146      *
147      * @return true if there is data still available for reading.
148      */
hasData()149     boolean hasData() {  // package protected for access from I/O streams
150         return this.buffer != null;
151     }
152 
153     /**
154      * Returns the amount of buffered data available for reading.
155      *
156      * @return The amount of buffered data available for reading.
157      */
available()158     int available() {  // package protected for access from I/O streams
159         return buffer != null ? pos - readPos : 0;
160     }
161 
162     /**
163      * Get the default buffer size. Can be overridden.
164      *
165      * @return {@link #DEFAULT_BUFFER_SIZE}
166      */
getDefaultBufferSize()167     protected int getDefaultBufferSize() {
168         return DEFAULT_BUFFER_SIZE;
169     }
170 
171     /** Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. */
resizeBuffer()172     private void resizeBuffer() {
173         if (buffer == null) {
174             buffer = new byte[getDefaultBufferSize()];
175             pos = 0;
176             readPos = 0;
177         } else {
178             byte[] b = new byte[buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
179             System.arraycopy(buffer, 0, b, 0, buffer.length);
180             buffer = b;
181         }
182     }
183 
184     /**
185      * Ensure that the buffer has room for <code>size</code> bytes
186      *
187      * @param size minimum spare space required
188      */
ensureBufferSize(int size)189     protected void ensureBufferSize(int size){
190         if ((buffer == null) || (buffer.length < pos + size)){
191             resizeBuffer();
192         }
193     }
194 
195     /**
196      * Extracts buffered data into the provided byte[] array, starting at position bPos,
197      * up to a maximum of bAvail bytes. Returns how many bytes were actually extracted.
198      *
199      * @param b
200      *            byte[] array to extract the buffered data into.
201      * @param bPos
202      *            position in byte[] array to start extraction at.
203      * @param bAvail
204      *            amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
205      * @return The number of bytes successfully extracted into the provided byte[] array.
206      */
readResults(byte[] b, int bPos, int bAvail)207     int readResults(byte[] b, int bPos, int bAvail) {  // package protected for access from I/O streams
208         if (buffer != null) {
209             int len = Math.min(available(), bAvail);
210             System.arraycopy(buffer, readPos, b, bPos, len);
211             readPos += len;
212             if (readPos >= pos) {
213                 buffer = null; // so hasData() will return false, and this method can return -1
214             }
215             return len;
216         }
217         return eof ? -1 : 0;
218     }
219 
220     /**
221      * Checks if a byte value is whitespace or not.
222      * Whitespace is taken to mean: space, tab, CR, LF
223      * @param byteToCheck
224      *            the byte to check
225      * @return true if byte is whitespace, false otherwise
226      */
isWhiteSpace(byte byteToCheck)227     protected static boolean isWhiteSpace(byte byteToCheck) {
228         switch (byteToCheck) {
229             case ' ' :
230             case '\n' :
231             case '\r' :
232             case '\t' :
233                 return true;
234             default :
235                 return false;
236         }
237     }
238 
239     /**
240      * Resets this object to its initial newly constructed state.
241      */
reset()242     private void reset() {
243         buffer = null;
244         pos = 0;
245         readPos = 0;
246         currentLinePos = 0;
247         modulus = 0;
248         eof = false;
249     }
250 
251     /**
252      * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of the
253      * Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
254      *
255      * @param pObject
256      *            Object to encode
257      * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
258      * @throws EncoderException
259      *             if the parameter supplied is not of type byte[]
260      */
encode(Object pObject)261     public Object encode(Object pObject) throws EncoderException {
262         if (!(pObject instanceof byte[])) {
263             throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
264         }
265         return encode((byte[]) pObject);
266     }
267 
268     /**
269      * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
270      *
271      * @param pArray
272      *            a byte array containing binary data
273      * @return A String containing only Base-N character data
274      */
encodeToString(byte[] pArray)275     public String encodeToString(byte[] pArray) {
276         return StringUtils.newStringUtf8(encode(pArray));
277     }
278 
279     /**
280      * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of the
281      * Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
282      *
283      * @param pObject
284      *            Object to decode
285      * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String supplied.
286      * @throws DecoderException
287      *             if the parameter supplied is not of type byte[]
288      */
decode(Object pObject)289     public Object decode(Object pObject) throws DecoderException {
290         if (pObject instanceof byte[]) {
291             return decode((byte[]) pObject);
292         } else if (pObject instanceof String) {
293             return decode((String) pObject);
294         } else {
295             throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
296         }
297     }
298 
299     /**
300      * Decodes a String containing characters in the Base-N alphabet.
301      *
302      * @param pArray
303      *            A String containing Base-N character data
304      * @return a byte array containing binary data
305      */
decode(String pArray)306     public byte[] decode(String pArray) {
307         return decode(StringUtils.getBytesUtf8(pArray));
308     }
309 
310     /**
311      * Decodes a byte[] containing characters in the Base-N alphabet.
312      *
313      * @param pArray
314      *            A byte array containing Base-N character data
315      * @return a byte array containing binary data
316      */
decode(byte[] pArray)317     public byte[] decode(byte[] pArray) {
318         reset();
319         if (pArray == null || pArray.length == 0) {
320             return pArray;
321         }
322         decode(pArray, 0, pArray.length);
323         decode(pArray, 0, -1); // Notify decoder of EOF.
324         byte[] result = new byte[pos];
325         readResults(result, 0, result.length);
326         return result;
327     }
328 
329     /**
330      * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
331      *
332      * @param pArray
333      *            a byte array containing binary data
334      * @return A byte array containing only the basen alphabetic character data
335      */
encode(byte[] pArray)336     public byte[] encode(byte[] pArray) {
337         reset();
338         if (pArray == null || pArray.length == 0) {
339             return pArray;
340         }
341         encode(pArray, 0, pArray.length);
342         encode(pArray, 0, -1); // Notify encoder of EOF.
343         byte[] buf = new byte[pos - readPos];
344         readResults(buf, 0, buf.length);
345         return buf;
346     }
347 
348     /**
349      * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
350      * Uses UTF8 encoding.
351      *
352      * @param pArray a byte array containing binary data
353      * @return String containing only character data in the appropriate alphabet.
354     */
encodeAsString(byte[] pArray)355     public String encodeAsString(byte[] pArray){
356         return StringUtils.newStringUtf8(encode(pArray));
357     }
358 
encode(byte[] pArray, int i, int length)359     abstract void encode(byte[] pArray, int i, int length);  // package protected for access from I/O streams
360 
decode(byte[] pArray, int i, int length)361     abstract void decode(byte[] pArray, int i, int length); // package protected for access from I/O streams
362 
363     /**
364      * Returns whether or not the <code>octet</code> is in the current alphabet.
365      * Does not allow whitespace or pad.
366      *
367      * @param value The value to test
368      *
369      * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise.
370      */
isInAlphabet(byte value)371     protected abstract boolean isInAlphabet(byte value);
372 
373     /**
374      * Tests a given byte array to see if it contains only valid characters within the alphabet.
375      * The method optionally treats whitespace and pad as valid.
376      *
377      * @param arrayOctet byte array to test
378      * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed
379      *
380      * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty;
381      *         <code>false</code>, otherwise
382      */
isInAlphabet(byte[] arrayOctet, boolean allowWSPad)383     public boolean isInAlphabet(byte[] arrayOctet, boolean allowWSPad) {
384         for (int i = 0; i < arrayOctet.length; i++) {
385             if (!isInAlphabet(arrayOctet[i]) &&
386                     (!allowWSPad || (arrayOctet[i] != PAD) && !isWhiteSpace(arrayOctet[i]))) {
387                 return false;
388             }
389         }
390         return true;
391     }
392 
393     /**
394      * Tests a given String to see if it contains only valid characters within the alphabet.
395      * The method treats whitespace and PAD as valid.
396      *
397      * @param basen String to test
398      * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if
399      *         the String is empty; <code>false</code>, otherwise
400      * @see #isInAlphabet(byte[], boolean)
401      */
isInAlphabet(String basen)402     public boolean isInAlphabet(String basen) {
403         return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
404     }
405 
406     /**
407      * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
408      *
409      * Intended for use in checking line-ending arrays
410      *
411      * @param arrayOctet
412      *            byte array to test
413      * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise
414      */
containsAlphabetOrPad(byte[] arrayOctet)415     protected boolean containsAlphabetOrPad(byte[] arrayOctet) {
416         if (arrayOctet == null) {
417             return false;
418         }
419         for (int i = 0; i < arrayOctet.length; i++) {
420             if (PAD == arrayOctet[i] || isInAlphabet(arrayOctet[i])) {
421                 return true;
422             }
423         }
424         return false;
425     }
426 
427     /**
428      * Calculates the amount of space needed to encode the supplied array.
429      *
430      * @param pArray byte[] array which will later be encoded
431      *
432      * @return amount of space needed to encoded the supplied array.
433      * Returns a long since a max-len array will require > Integer.MAX_VALUE
434      */
getEncodedLength(byte[] pArray)435     public long getEncodedLength(byte[] pArray) {
436         // Calculate non-chunked size - rounded up to allow for padding
437         // cast to long is needed to avoid possibility of overflow
438         long len = ((pArray.length + unencodedBlockSize-1)  / unencodedBlockSize) * (long) encodedBlockSize;
439         if (lineLength > 0) { // We're using chunking
440             // Round up to nearest multiple
441             len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
442         }
443         return len;
444     }
445 }
446