1 // Mozilla has modified this file - see http://hg.mozilla.org/ for details.
2 /*
3  * Licensed to the Apache Software Foundation (ASF) under one or more
4  * contributor license agreements.  See the NOTICE file distributed with
5  * this work for additional information regarding copyright ownership.
6  * The ASF licenses this file to You under the Apache License, Version 2.0
7  * (the "License"); you may not use this file except in compliance with
8  * the License.  You may obtain a copy of the License at
9  *
10  *      http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 package org.mozilla.apache.commons.codec.net;
20 
21 import java.io.UnsupportedEncodingException;
22 
23 import org.mozilla.apache.commons.codec.DecoderException;
24 import org.mozilla.apache.commons.codec.EncoderException;
25 import org.mozilla.apache.commons.codec.binary.StringUtils;
26 
27 /**
28  * <p>
29  * Implements methods common to all codecs defined in RFC 1522.
30  * </p>
31  *
32  * <p>
33  * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a>
34  * describes techniques to allow the encoding of non-ASCII text in
35  * various portions of a RFC 822 [2] message header, in a manner which
36  * is unlikely to confuse existing message handling software.
37  * </p>
38 
39  * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">
40  * MIME (Multipurpose Internet Mail Extensions) Part Two:
41  * Message Header Extensions for Non-ASCII Text</a>
42  * </p>
43  *
44  * @author Apache Software Foundation
45  * @since 1.3
46  * @version $Id: RFC1522Codec.java 798428 2009-07-28 07:32:49Z ggregory $
47  */
48 abstract class RFC1522Codec {
49 
50     /**
51      * Separator.
52      */
53     protected static final char SEP = '?';
54 
55     /**
56      * Prefix
57      */
58     protected static final String POSTFIX = "?=";
59 
60     /**
61      * Postfix
62      */
63     protected static final String PREFIX = "=?";
64 
65     /**
66      * Applies an RFC 1522 compliant encoding scheme to the given string of text with the
67      * given charset. This method constructs the "encoded-word" header common to all the
68      * RFC 1522 codecs and then invokes {@link #doEncoding(byte [])} method of a concrete
69      * class to perform the specific enconding.
70      *
71      * @param text a string to encode
72      * @param charset a charset to be used
73      *
74      * @return RFC 1522 compliant "encoded-word"
75      *
76      * @throws EncoderException thrown if there is an error conidition during the Encoding
77      *  process.
78      * @throws UnsupportedEncodingException thrown if charset is not supported
79      *
80      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
81      */
encodeText(final String text, final String charset)82     protected String encodeText(final String text, final String charset)
83      throws EncoderException, UnsupportedEncodingException
84     {
85         if (text == null) {
86             return null;
87         }
88         StringBuffer buffer = new StringBuffer();
89         buffer.append(PREFIX);
90         buffer.append(charset);
91         buffer.append(SEP);
92         buffer.append(getEncoding());
93         buffer.append(SEP);
94         byte [] rawdata = doEncoding(text.getBytes(charset));
95         buffer.append(StringUtils.newStringUsAscii(rawdata));
96         buffer.append(POSTFIX);
97         return buffer.toString();
98     }
99 
100     /**
101      * Applies an RFC 1522 compliant decoding scheme to the given string of text. This method
102      * processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes
103      * {@link #doEncoding(byte [])} method of a concrete class to perform the specific deconding.
104      *
105      * @param text a string to decode
106      * @return A new decoded String or <code>null</code> if the input is <code>null</code>.
107      *
108      * @throws DecoderException thrown if there is an error conidition during the Decoding
109      *  process.
110      * @throws UnsupportedEncodingException thrown if charset specified in the "encoded-word"
111      *  header is not supported
112      */
decodeText(final String text)113     protected String decodeText(final String text)
114      throws DecoderException, UnsupportedEncodingException
115     {
116         if (text == null) {
117             return null;
118         }
119         if ((!text.startsWith(PREFIX)) || (!text.endsWith(POSTFIX))) {
120             throw new DecoderException("RFC 1522 violation: malformed encoded content");
121         }
122         int terminator = text.length() - 2;
123         int from = 2;
124         int to = text.indexOf(SEP, from);
125         if (to == terminator) {
126             throw new DecoderException("RFC 1522 violation: charset token not found");
127         }
128         String charset = text.substring(from, to);
129         if (charset.equals("")) {
130             throw new DecoderException("RFC 1522 violation: charset not specified");
131         }
132         from = to + 1;
133         to = text.indexOf(SEP, from);
134         if (to == terminator) {
135             throw new DecoderException("RFC 1522 violation: encoding token not found");
136         }
137         String encoding = text.substring(from, to);
138         if (!getEncoding().equalsIgnoreCase(encoding)) {
139             throw new DecoderException("This codec cannot decode " +
140                 encoding + " encoded content");
141         }
142         from = to + 1;
143         to = text.indexOf(SEP, from);
144         byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
145         data = doDecoding(data);
146         return new String(data, charset);
147     }
148 
149     /**
150      * Returns the codec name (referred to as encoding in the RFC 1522)
151      *
152      * @return name of the codec
153      */
getEncoding()154     protected abstract String getEncoding();
155 
156     /**
157      * Encodes an array of bytes using the defined encoding scheme
158      *
159      * @param bytes Data to be encoded
160      *
161      * @return A byte array containing the encoded data
162      *
163      * @throws EncoderException thrown if the Encoder encounters a failure condition
164      *  during the encoding process.
165      */
doEncoding(byte[] bytes)166     protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
167 
168     /**
169      * Decodes an array of bytes using the defined encoding scheme
170      *
171      * @param bytes Data to be decoded
172      *
173      * @return a byte array that contains decoded data
174      *
175      * @throws DecoderException A decoder exception is thrown if a Decoder encounters a
176      *  failure condition during the decode process.
177      */
doDecoding(byte[] bytes)178     protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
179 }
180