1 /*********************************************************************\
2 
3 MODULE NAME:    b64.c
4 
5 AUTHOR:         Bob Trower 08/04/01
6 
7 PROJECT:        Crypt Data Packaging
8 
9 COPYRIGHT:      Copyright (c) Trantor Standard Systems Inc., 2001
10 
11 NOTE:           This source code may be used as you wish, subject to
12                 the MIT license.  See the LICENCE section below.
13 
14 DESCRIPTION:
15                 This little utility implements the Base64
16                 Content-Transfer-Encoding standard described in
17                 RFC1113 (http://www.faqs.org/rfcs/rfc1113.html).
18 
19                 This is the coding scheme used by MIME to allow
20                 binary data to be transferred by SMTP mail.
21 
22                 Groups of 3 bytes from a binary stream are coded as
23                 groups of 4 bytes in a text stream.
24 
25                 The input stream is 'padded' with zeros to create
26                 an input that is an even multiple of 3.
27 
28                 A special character ('=') is used to denote padding so
29                 that the stream can be decoded back to its exact size.
30 
31                 Encoded output is formatted in lines which should
32                 be a maximum of 72 characters to conform to the
33                 specification.  This program defaults to 72 characters,
34                 but will allow more or less through the use of a
35                 switch.  The program enforces a minimum line size
36                 of 4 characters.
37 
38                 Example encoding:
39 
40                 The stream 'ABCD' is 32 bits long.  It is mapped as
41                 follows:
42 
43                 ABCD
44 
45                  A (65)     B (66)     C (67)     D (68)   (None) (None)
46                 01000001   01000010   01000011   01000100
47 
48                 16 (Q)  20 (U)  9 (J)   3 (D)    17 (R) 0 (A)  NA (=) NA (=)
49                 010000  010100  001001  000011   010001 000000 000000 000000
50 
51 
52                 QUJDRA==
53 
54                 Decoding is the process in reverse.  A 'decode' lookup
55                 table has been created to avoid string scans.
56 
57 DESIGN GOALS:	Specifically:
58 		Code is a stand-alone utility to perform base64
59 		encoding/decoding. It should be genuinely useful
60 		when the need arises and it meets a need that is
61 		likely to occur for some users.
62 		Code acts as sample code to show the author's
63 		design and coding style.
64 
65 		Generally:
66 		This program is designed to survive:
67 		Everything you need is in a single source file.
68 		It compiles cleanly using a vanilla ANSI C compiler.
69 		It does its job correctly with a minimum of fuss.
70 		The code is not overly clever, not overly simplistic
71 		and not overly verbose.
72 		Access is 'cut and paste' from a web page.
73 		Terms of use are reasonable.
74 
75 VALIDATION:     Non-trivial code is never without errors.  This
76                 file likely has some problems, since it has only
77                 been tested by the author.  It is expected with most
78                 source code that there is a period of 'burn-in' when
79                 problems are identified and corrected.  That being
80                 said, it is possible to have 'reasonably correct'
81                 code by following a regime of unit test that covers
82                 the most likely cases and regression testing prior
83                 to release.  This has been done with this code and
84                 it has a good probability of performing as expected.
85 
86                 Unit Test Cases:
87 
88                 case 0:empty file:
89                     CASE0.DAT  ->  ->
90                     (Zero length target file created
91                     on both encode and decode.)
92 
93                 case 1:One input character:
94                     CASE1.DAT A -> QQ== -> A
95 
96                 case 2:Two input characters:
97                     CASE2.DAT AB -> QUJD -> AB
98 
99                 case 3:Three input characters:
100                     CASE3.DAT ABC -> QUJD -> ABC
101 
102                 case 4:Four input characters:
103                     case4.dat ABCD -> QUJDRA== -> ABCD
104 
105                 case 5:All chars from 0 to ff, linesize set to 50:
106 
107                     AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIj
108                     JCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZH
109                     SElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWpr
110                     bG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6P
111                     kJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKz
112                     tLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX
113                     2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7
114                     /P3+/w==
115 
116                 case 6:Mime Block from e-mail:
117                     (Data same as test case 5)
118 
119                 case 7: Large files:
120                     Tested 28 MB file in/out.
121 
122                 case 8: Random Binary Integrity:
123                     This binary program (b64.exe) was encoded to base64,
124                     back to binary and then executed.
125 
126                 case 9 Stress:
127                     All files in a working directory encoded/decoded
128                     and compared with file comparison utility to
129                     ensure that multiple runs do not cause problems
130                     such as exhausting file handles, tmp storage, etc.
131 
132                 -------------
133 
134                 Syntax, operation and failure:
135                     All options/switches tested.  Performs as
136                     expected.
137 
138                 case 10:
139                     No Args -- Shows Usage Screen
140                     Return Code 1 (Invalid Syntax)
141                 case 11:
142                     One Arg (invalid) -- Shows Usage Screen
143                     Return Code 1 (Invalid Syntax)
144                 case 12:
145                     One Arg Help (-?) -- Shows detailed Usage Screen.
146                     Return Code 0 (Success -- help request is valid).
147                 case 13:
148                     One Arg Help (-h) -- Shows detailed Usage Screen.
149                     Return Code 0 (Success -- help request is valid).
150                 case 14:
151                     One Arg (valid) -- Uses stdin/stdout (filter)
152                     Return Code 0 (Sucess)
153                 case 15:
154                     Two Args (invalid file) -- shows system error.
155                     Return Code 2 (File Error)
156                 case 16:
157                     Encode non-existent file -- shows system error.
158                     Return Code 2 (File Error)
159                 case 17:
160                     Out of disk space -- shows system error.
161                     Return Code 3 (File I/O Error)
162 
163                 -------------
164 
165                 Compile/Regression test:
166                     gcc compiled binary under Cygwin
167                     Microsoft Visual Studio under Windows 2000
168                     Microsoft Version 6.0 C under Windows 2000
169 
170 DEPENDENCIES:   None
171 
172 LICENCE:        Copyright (c) 2001 Bob Trower, Trantor Standard Systems Inc.
173 
174                 Permission is hereby granted, free of charge, to any person
175                 obtaining a copy of this software and associated
176                 documentation files (the "Software"), to deal in the
177                 Software without restriction, including without limitation
178                 the rights to use, copy, modify, merge, publish, distribute,
179                 sublicense, and/or sell copies of the Software, and to
180                 permit persons to whom the Software is furnished to do so,
181                 subject to the following conditions:
182 
183                 The above copyright notice and this permission notice shall
184                 be included in all copies or substantial portions of the
185                 Software.
186 
187                 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
188                 KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
189                 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
190                 PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
191                 OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
192                 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
193                 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
194                 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
195 
196 VERSION HISTORY:
197                 Bob Trower 08/04/01 -- Create Version 0.00.00B
198 
199 \******************************************************************* */
200 
201 #include <inttypes.h>
202 #include <stdio.h>
203 #include <stdlib.h>
204 
205 #include "b64.h"
206 
207 /*
208 ** Translation Table as described in RFC1113
209 */
210 static const char cb64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
211 
212 /*
213 ** Translation Table to decode (created by author)
214 */
215 static const char cd64[] = "|$$$}rstuvwxyz{$$$$$$$>?@ABCDEFGHIJKLMNOPQRSTUVW$$$$$$XYZ[\\]^_`abcdefghijklmnopq";
216 
217 /*
218 ** encodeblock
219 **
220 ** encode 3 8-bit binary bytes as 4 '6-bit' characters
221 */
222 static void
223 encodeblock(uint8_t *wordin, uint8_t *wordout, int wordlen)
224 {
225 	wordout[0] = cb64[(unsigned)wordin[0] >> 2];
226 	wordout[1] = cb64[((unsigned)(wordin[0] & 0x03) << 4) | ((unsigned)(wordin[1] & 0xf0) >> 4)];
227 	wordout[2] = (uint8_t)(wordlen > 1) ?
228 		cb64[((unsigned)(wordin[1] & 0x0f) << 2) | ((unsigned)(wordin[2] & 0xc0) >> 6)] : '=';
229 	wordout[3] = (uint8_t)(wordlen > 2) ? cb64[wordin[2] & 0x3f] : '=';
230 }
231 
232 /*
233 ** encode
234 **
235 ** base64 encode a stream adding padding and line breaks as per spec.
236 */
237 int
238 b64encode(const char *in, const size_t insize, void *vp, size_t outsize, int linesize)
239 {
240 	const char	*inp;
241 	unsigned	 i;
242 	uint8_t		 wordout[4];
243 	uint8_t		 wordin[3];
244 	char		*out = vp;
245 	char		*outp;
246 	int              blocksout;
247 	int              wordlen;
248 
249 	if (in == NULL || vp == NULL) {
250 		return 0;
251 	}
252 	wordlen = 0;
253 	for (blocksout = 0, inp = in, outp = out; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize;) {
254 		for (wordlen = 0, i = 0; i < sizeof(wordin); i++) {
255 			wordin[i] = (uint8_t) *inp++;
256 			if ((size_t)(inp - in) <= insize) {
257 				wordlen++;
258 			} else {
259 				wordin[i] = 0x0;
260 			}
261 		}
262 		if (wordlen > 0) {
263 			encodeblock(wordin, wordout, wordlen);
264 			for (i = 0; i < sizeof(wordout) ; i++) {
265 				*outp++ = wordout[i];
266 			}
267 			blocksout++;
268 		}
269 		if (linesize > 0) {
270 			if (blocksout >= (int)(linesize / sizeof(wordout)) ||
271 			    (size_t)(inp - in) >= insize) {
272 				if (blocksout) {
273 					*outp++ = '\r';
274 					*outp++ = '\n';
275 				}
276 				blocksout = 0;
277 			}
278 		}
279 	}
280 	return (int)(outp - out);
281 }
282 
283 /*
284 ** decodeblock
285 **
286 ** decode 4 '6-bit' characters into 3 8-bit binary bytes
287 */
288 static void
289 decodeblock(uint8_t wordin[4], uint8_t wordout[3])
290 {
291 	wordout[0] = (uint8_t) ((unsigned)wordin[0] << 2 | (unsigned)wordin[1] >> 4);
292 	wordout[1] = (uint8_t) ((unsigned)wordin[1] << 4 | (unsigned)wordin[2] >> 2);
293 	wordout[2] = (uint8_t) (((wordin[2] << 6) & 0xc0) | wordin[3]);
294 }
295 
296 /*
297 ** decode
298 **
299 ** decode a base64 encoded stream discarding padding, line breaks and noise
300 */
301 int
302 b64decode(const char *in, const size_t insize, void *vp, size_t outsize)
303 {
304 	const char	*inp;
305 	unsigned	 wordlen;
306 	unsigned	 i;
307 	uint8_t    	 wordout[3];
308 	uint8_t    	 wordin[4];
309 	uint8_t    	 v;
310 	char		*out = vp;
311 	char		*outp;
312 
313 	if (in == NULL || vp == NULL) {
314 		return 0;
315 	}
316 	for (inp = in, outp = out ; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize ; ) {
317 		for (wordlen = 0, i = 0 ; i < sizeof(wordin) && (size_t)(inp - in) < insize ; i++) {
318 			/* get a single character */
319 			for (v = 0; (size_t)(inp - in) <= insize && v == 0 ; ) {
320 				if (*inp == '\r' && *(inp + 1) == '\n') {
321 					inp += 2;
322 				} else {
323 					v = (uint8_t) *inp++;
324 					v = (uint8_t) ((v < 43 || v > 122) ? 0 : cd64[v - 43]);
325 					if (v) {
326 						v = (uint8_t) ((v == '$') ? 0 : v - 61);
327 					}
328 				}
329 			}
330 			/* perhaps 0x0 pad */
331 			if ((size_t)(inp - in) <= insize) {
332 				wordlen += 1;
333 				if (v) {
334 					wordin[i] = (uint8_t) (v - 1);
335 				}
336 			} else {
337 				wordin[i] = 0x0;
338 			}
339 		}
340 		if (wordlen > 0) {
341 			decodeblock(wordin, wordout);
342 			for (i = 0; i < wordlen - 1 ; i++) {
343 				*outp++ = wordout[i];
344 			}
345 		}
346 	}
347 	return (int)(outp - out);
348 }
349 
350 /* return the encoded size for n bytes input */
351 int
352 b64_encsize(unsigned n)
353 {
354 	return ((4 * n) / 3) + 4;
355 }
356