1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * $Id$
20  */
21 
22 #if !defined(XERCESC_INCLUDE_GUARD_BASE64_HPP)
23 #define XERCESC_INCLUDE_GUARD_BASE64_HPP
24 
25 #include <xercesc/util/XercesDefs.hpp>
26 #include <xercesc/util/XMLUniDefs.hpp>
27 #include <xercesc/framework/MemoryManager.hpp>
28 
29 XERCES_CPP_NAMESPACE_BEGIN
30 
31 //
32 // This class provides encode/decode for RFC 2045 Base64 as
33 // defined by RFC 2045, N. Freed and N. Borenstein.
34 // RFC 2045: Multipurpose Internet Mail Extensions (MIME)
35 // Part One: Format of Internet Message Bodies. Reference
36 // 1996 Available at: http://www.ietf.org/rfc/rfc2045.txt
37 // This class is used by XML Schema binary format validation
38 //
39 //
40 class XMLUTIL_EXPORT Base64
41 {
42 public :
43 
44     enum Conformance
45     {
46         Conf_RFC2045
47       , Conf_Schema
48     };
49 
50     //@{
51 
52     /**
53      * Encodes octets into Base64 data
54      *
55      * NOTE: The returned buffer is dynamically allocated and is the
56      * responsibility of the caller to delete it when not longer needed.
57      * Use the memory manager to release the returned buffer or
58      * operator delete() if none was provided.
59      *
60      * @param inputData Binary data in XMLByte stream.
61      * @param inputLength Length of the XMLByte stream.
62      * @param outputLength Length of the encoded Base64 byte stream.
63      * @param memMgr client provided memory manager
64      * @return Encoded Base64 data in XMLByte stream,
65      *      or NULL if input data can not be encoded.
66      */
67     static XMLByte* encode(const XMLByte* const inputData
68                          , const XMLSize_t      inputLength
69                          , XMLSize_t*           outputLength
70                          , MemoryManager* const memMgr = 0);
71 
72     /**
73      * Decodes Base64 data into octets
74      *
75      * NOTE: The returned buffer is dynamically allocated and is the
76      * responsibility of the caller to delete it when not longer needed.
77      * Use the memory manager to release the returned buffer or
78      * operator delete() if none was provided.
79      *
80      * @param inputData Base64 data in XMLByte stream.
81      * @param decodedLength Length of decoded XMLByte stream.
82      * @param memMgr client provided memory manager
83      * @param conform conformance specified: if the input data conforms to the
84      *                RFC 2045 it is allowed to have any number of whitespace
85      *                characters inside; if it conforms to the XMLSchema specs,
86      *                it is allowed to have at most one whitespace character
87      *                between the quartets
88      * @return Decoded binary data in XMLByte stream,
89      *      or NULL if input data can not be decoded.
90      */
91     static XMLByte* decode(
92                            const XMLByte*        const   inputData
93                          ,       XMLSize_t*              decodedLength
94                          ,       MemoryManager*  const   memMgr = 0
95                          ,       Conformance             conform = Conf_RFC2045
96                           );
97 
98    /**
99      * Decodes Base64 data into octets
100      *
101      * NOTE: The returned buffer is dynamically allocated and is the
102      * responsibility of the caller to delete it when not longer needed.
103      * Use the memory manager to release the returned buffer or
104      * operator delete() if none was provided.
105      *
106      * @param inputData Base64 data in XMLCh stream.
107      * @param decodedLength Length of decoded XMLByte stream.
108      * @param memMgr client provided memory manager
109      * @param conform conformance specified: if the input data conforms to the
110      *                RFC 2045 it is allowed to have any number of whitespace
111      *                characters inside; if it conforms to the XMLSchema specs,
112      *                it is allowed to have at most one whitespace character
113      *                between the quartets
114      * @return Decoded binary data in XMLByte stream,
115      *      or NULL if input data can not be decoded.
116      */
117     static XMLByte* decodeToXMLByte(
118                            const XMLCh*          const   inputData
119                          ,       XMLSize_t*              decodedLength
120                          ,       MemoryManager*  const   memMgr = 0
121                          ,       Conformance             conform = Conf_RFC2045
122                           );
123     /**
124      * Get data length
125 	 *
126      * Returns length of decoded data given an array
127      * containing encoded data.
128      *
129      * @param inputData Base64 data in XMLCh stream.
130      * @param memMgr client provided memory manager
131      * @param conform conformance specified
132      * @return Length of decoded data,
133 	 *      or -1 if input data can not be decoded.
134      */
135     static int getDataLength(
136                              const XMLCh*         const  inputData
137                             ,      MemoryManager* const  memMgr = 0
138                             ,      Conformance           conform = Conf_RFC2045
139                              );
140 
141     //@}
142 
143      /**
144      * get canonical representation
145      *
146      * Caller is responsible for the proper deallocation
147      * of the string returned.
148      *
149      * @param inputData A string containing the Base64
150      * @param memMgr client provided memory manager
151      * @param conform conformance specified
152      *
153      * return: the canonical representation of the Base64
154      *         if it is a valid Base64
155      *         0 otherwise
156      */
157 
158     static XMLCh* getCanonicalRepresentation
159                   (
160                       const XMLCh*          const inputData
161                     ,       MemoryManager*  const memMgr = 0
162                     ,       Conformance           conform = Conf_RFC2045
163                   );
164 
165 private :
166 
167     // -----------------------------------------------------------------------
168     //  Helper methods
169     // -----------------------------------------------------------------------
170 
171     static XMLByte* decode(
172                            const XMLByte*        const   inputData
173                          ,       XMLSize_t*              outputLength
174                          ,       XMLByte*&               canRepData
175                          ,       MemoryManager*  const   memMgr = 0
176                          ,       Conformance             conform = Conf_RFC2045
177                           );
178 
179     static bool isData(const XMLByte& octet);
180     static bool isPad(const XMLByte& octet);
181 
182     static XMLByte set1stOctet(const XMLByte&, const XMLByte&);
183     static XMLByte set2ndOctet(const XMLByte&, const XMLByte&);
184     static XMLByte set3rdOctet(const XMLByte&, const XMLByte&);
185 
186     static void split1stOctet(const XMLByte&, XMLByte&, XMLByte&);
187     static void split2ndOctet(const XMLByte&, XMLByte&, XMLByte&);
188     static void split3rdOctet(const XMLByte&, XMLByte&, XMLByte&);
189 
190     // -----------------------------------------------------------------------
191     //  Unimplemented constructors and operators
192     // -----------------------------------------------------------------------
193     Base64();
194     Base64(const Base64&);
195 
196     // -----------------------------------------------------------------------
197     //  Private data members
198     //
199     //  base64Alphabet
200     //     The Base64 alphabet (see RFC 2045).
201     //
202     //  base64Padding
203     //     Padding character (see RFC 2045).
204     //
205     //  base64Inverse
206     //     Table used in decoding base64.
207     //
208     //  isInitialized
209     //     Set once base64Inverse is initialized.
210     //
211     //  quadsPerLine
212     //     Number of quadruplets per one line. The encoded output
213     //     stream must be represented in lines of no more
214     //     than 19 quadruplets each.
215     //
216     // -----------------------------------------------------------------------
217 
218     static const XMLByte  base64Alphabet[];
219     static const XMLByte  base64Padding;
220 
221     static const XMLByte  base64Inverse[];
222 
223     static const unsigned int  quadsPerLine;
224 };
225 
226 // -----------------------------------------------------------------------
227 //  Helper methods
228 // -----------------------------------------------------------------------
isPad(const XMLByte & octet)229 inline bool Base64::isPad(const XMLByte& octet)
230 {
231     return ( octet == base64Padding );
232 }
233 
set1stOctet(const XMLByte & b1,const XMLByte & b2)234 inline XMLByte Base64::set1stOctet(const XMLByte& b1, const XMLByte& b2)
235 {
236     return (( b1 << 2 ) | ( b2 >> 4 ));
237 }
238 
set2ndOctet(const XMLByte & b2,const XMLByte & b3)239 inline XMLByte Base64::set2ndOctet(const XMLByte& b2, const XMLByte& b3)
240 {
241     return (( b2 << 4 ) | ( b3 >> 2 ));
242 }
243 
set3rdOctet(const XMLByte & b3,const XMLByte & b4)244 inline XMLByte Base64::set3rdOctet(const XMLByte& b3, const XMLByte& b4)
245 {
246     return (( b3 << 6 ) | b4 );
247 }
248 
split1stOctet(const XMLByte & ch,XMLByte & b1,XMLByte & b2)249 inline void Base64::split1stOctet(const XMLByte& ch, XMLByte& b1, XMLByte& b2) {
250     b1 = ch >> 2;
251     b2 = ( ch & 0x3 ) << 4;
252 }
253 
split2ndOctet(const XMLByte & ch,XMLByte & b2,XMLByte & b3)254 inline void Base64::split2ndOctet(const XMLByte& ch, XMLByte& b2, XMLByte& b3) {
255     b2 |= ch >> 4;  // combine with previous value
256     b3 = ( ch & 0xf ) << 2;
257 }
258 
split3rdOctet(const XMLByte & ch,XMLByte & b3,XMLByte & b4)259 inline void Base64::split3rdOctet(const XMLByte& ch, XMLByte& b3, XMLByte& b4) {
260     b3 |= ch >> 6;  // combine with previous value
261     b4 = ( ch & 0x3f );
262 }
263 
264 XERCES_CPP_NAMESPACE_END
265 
266 #endif
267