113b8cf66Sagc /*********************************************************************\
213b8cf66Sagc 
313b8cf66Sagc MODULE NAME:    b64.c
413b8cf66Sagc 
513b8cf66Sagc AUTHOR:         Bob Trower 08/04/01
613b8cf66Sagc 
713b8cf66Sagc PROJECT:        Crypt Data Packaging
813b8cf66Sagc 
913b8cf66Sagc COPYRIGHT:      Copyright (c) Trantor Standard Systems Inc., 2001
1013b8cf66Sagc 
1113b8cf66Sagc NOTE:           This source code may be used as you wish, subject to
1213b8cf66Sagc                 the MIT license.  See the LICENCE section below.
1313b8cf66Sagc 
1413b8cf66Sagc DESCRIPTION:
1513b8cf66Sagc                 This little utility implements the Base64
1613b8cf66Sagc                 Content-Transfer-Encoding standard described in
1713b8cf66Sagc                 RFC1113 (http://www.faqs.org/rfcs/rfc1113.html).
1813b8cf66Sagc 
1913b8cf66Sagc                 This is the coding scheme used by MIME to allow
2013b8cf66Sagc                 binary data to be transferred by SMTP mail.
2113b8cf66Sagc 
2213b8cf66Sagc                 Groups of 3 bytes from a binary stream are coded as
2313b8cf66Sagc                 groups of 4 bytes in a text stream.
2413b8cf66Sagc 
2513b8cf66Sagc                 The input stream is 'padded' with zeros to create
2613b8cf66Sagc                 an input that is an even multiple of 3.
2713b8cf66Sagc 
2813b8cf66Sagc                 A special character ('=') is used to denote padding so
2913b8cf66Sagc                 that the stream can be decoded back to its exact size.
3013b8cf66Sagc 
3113b8cf66Sagc                 Encoded output is formatted in lines which should
3213b8cf66Sagc                 be a maximum of 72 characters to conform to the
3313b8cf66Sagc                 specification.  This program defaults to 72 characters,
3413b8cf66Sagc                 but will allow more or less through the use of a
3513b8cf66Sagc                 switch.  The program enforces a minimum line size
3613b8cf66Sagc                 of 4 characters.
3713b8cf66Sagc 
3813b8cf66Sagc                 Example encoding:
3913b8cf66Sagc 
4013b8cf66Sagc                 The stream 'ABCD' is 32 bits long.  It is mapped as
4113b8cf66Sagc                 follows:
4213b8cf66Sagc 
4313b8cf66Sagc                 ABCD
4413b8cf66Sagc 
4513b8cf66Sagc                  A (65)     B (66)     C (67)     D (68)   (None) (None)
4613b8cf66Sagc                 01000001   01000010   01000011   01000100
4713b8cf66Sagc 
4813b8cf66Sagc                 16 (Q)  20 (U)  9 (J)   3 (D)    17 (R) 0 (A)  NA (=) NA (=)
4913b8cf66Sagc                 010000  010100  001001  000011   010001 000000 000000 000000
5013b8cf66Sagc 
5113b8cf66Sagc 
5213b8cf66Sagc                 QUJDRA==
5313b8cf66Sagc 
5413b8cf66Sagc                 Decoding is the process in reverse.  A 'decode' lookup
5513b8cf66Sagc                 table has been created to avoid string scans.
5613b8cf66Sagc 
5713b8cf66Sagc DESIGN GOALS:	Specifically:
5813b8cf66Sagc 		Code is a stand-alone utility to perform base64
5913b8cf66Sagc 		encoding/decoding. It should be genuinely useful
6013b8cf66Sagc 		when the need arises and it meets a need that is
6113b8cf66Sagc 		likely to occur for some users.
6213b8cf66Sagc 		Code acts as sample code to show the author's
6313b8cf66Sagc 		design and coding style.
6413b8cf66Sagc 
6513b8cf66Sagc 		Generally:
6613b8cf66Sagc 		This program is designed to survive:
6713b8cf66Sagc 		Everything you need is in a single source file.
6813b8cf66Sagc 		It compiles cleanly using a vanilla ANSI C compiler.
6913b8cf66Sagc 		It does its job correctly with a minimum of fuss.
7013b8cf66Sagc 		The code is not overly clever, not overly simplistic
7113b8cf66Sagc 		and not overly verbose.
7213b8cf66Sagc 		Access is 'cut and paste' from a web page.
7313b8cf66Sagc 		Terms of use are reasonable.
7413b8cf66Sagc 
7513b8cf66Sagc VALIDATION:     Non-trivial code is never without errors.  This
7613b8cf66Sagc                 file likely has some problems, since it has only
7713b8cf66Sagc                 been tested by the author.  It is expected with most
7813b8cf66Sagc                 source code that there is a period of 'burn-in' when
7913b8cf66Sagc                 problems are identified and corrected.  That being
8013b8cf66Sagc                 said, it is possible to have 'reasonably correct'
8113b8cf66Sagc                 code by following a regime of unit test that covers
8213b8cf66Sagc                 the most likely cases and regression testing prior
8313b8cf66Sagc                 to release.  This has been done with this code and
8413b8cf66Sagc                 it has a good probability of performing as expected.
8513b8cf66Sagc 
8613b8cf66Sagc                 Unit Test Cases:
8713b8cf66Sagc 
8813b8cf66Sagc                 case 0:empty file:
8913b8cf66Sagc                     CASE0.DAT  ->  ->
9013b8cf66Sagc                     (Zero length target file created
9113b8cf66Sagc                     on both encode and decode.)
9213b8cf66Sagc 
9313b8cf66Sagc                 case 1:One input character:
9413b8cf66Sagc                     CASE1.DAT A -> QQ== -> A
9513b8cf66Sagc 
9613b8cf66Sagc                 case 2:Two input characters:
9713b8cf66Sagc                     CASE2.DAT AB -> QUJD -> AB
9813b8cf66Sagc 
9913b8cf66Sagc                 case 3:Three input characters:
10013b8cf66Sagc                     CASE3.DAT ABC -> QUJD -> ABC
10113b8cf66Sagc 
10213b8cf66Sagc                 case 4:Four input characters:
10313b8cf66Sagc                     case4.dat ABCD -> QUJDRA== -> ABCD
10413b8cf66Sagc 
10513b8cf66Sagc                 case 5:All chars from 0 to ff, linesize set to 50:
10613b8cf66Sagc 
10713b8cf66Sagc                     AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIj
10813b8cf66Sagc                     JCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZH
10913b8cf66Sagc                     SElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWpr
11013b8cf66Sagc                     bG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6P
11113b8cf66Sagc                     kJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKz
11213b8cf66Sagc                     tLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX
11313b8cf66Sagc                     2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7
11413b8cf66Sagc                     /P3+/w==
11513b8cf66Sagc 
11613b8cf66Sagc                 case 6:Mime Block from e-mail:
11713b8cf66Sagc                     (Data same as test case 5)
11813b8cf66Sagc 
11913b8cf66Sagc                 case 7: Large files:
12013b8cf66Sagc                     Tested 28 MB file in/out.
12113b8cf66Sagc 
12213b8cf66Sagc                 case 8: Random Binary Integrity:
12313b8cf66Sagc                     This binary program (b64.exe) was encoded to base64,
12413b8cf66Sagc                     back to binary and then executed.
12513b8cf66Sagc 
12613b8cf66Sagc                 case 9 Stress:
12713b8cf66Sagc                     All files in a working directory encoded/decoded
12813b8cf66Sagc                     and compared with file comparison utility to
12913b8cf66Sagc                     ensure that multiple runs do not cause problems
13013b8cf66Sagc                     such as exhausting file handles, tmp storage, etc.
13113b8cf66Sagc 
13213b8cf66Sagc                 -------------
13313b8cf66Sagc 
13413b8cf66Sagc                 Syntax, operation and failure:
13513b8cf66Sagc                     All options/switches tested.  Performs as
13613b8cf66Sagc                     expected.
13713b8cf66Sagc 
13813b8cf66Sagc                 case 10:
13913b8cf66Sagc                     No Args -- Shows Usage Screen
14013b8cf66Sagc                     Return Code 1 (Invalid Syntax)
14113b8cf66Sagc                 case 11:
14213b8cf66Sagc                     One Arg (invalid) -- Shows Usage Screen
14313b8cf66Sagc                     Return Code 1 (Invalid Syntax)
14413b8cf66Sagc                 case 12:
14513b8cf66Sagc                     One Arg Help (-?) -- Shows detailed Usage Screen.
14613b8cf66Sagc                     Return Code 0 (Success -- help request is valid).
14713b8cf66Sagc                 case 13:
14813b8cf66Sagc                     One Arg Help (-h) -- Shows detailed Usage Screen.
14913b8cf66Sagc                     Return Code 0 (Success -- help request is valid).
15013b8cf66Sagc                 case 14:
15113b8cf66Sagc                     One Arg (valid) -- Uses stdin/stdout (filter)
15213b8cf66Sagc                     Return Code 0 (Sucess)
15313b8cf66Sagc                 case 15:
15413b8cf66Sagc                     Two Args (invalid file) -- shows system error.
15513b8cf66Sagc                     Return Code 2 (File Error)
15613b8cf66Sagc                 case 16:
15713b8cf66Sagc                     Encode non-existent file -- shows system error.
15813b8cf66Sagc                     Return Code 2 (File Error)
15913b8cf66Sagc                 case 17:
16013b8cf66Sagc                     Out of disk space -- shows system error.
16113b8cf66Sagc                     Return Code 3 (File I/O Error)
16213b8cf66Sagc 
16313b8cf66Sagc                 -------------
16413b8cf66Sagc 
16513b8cf66Sagc                 Compile/Regression test:
16613b8cf66Sagc                     gcc compiled binary under Cygwin
16713b8cf66Sagc                     Microsoft Visual Studio under Windows 2000
16813b8cf66Sagc                     Microsoft Version 6.0 C under Windows 2000
16913b8cf66Sagc 
17013b8cf66Sagc DEPENDENCIES:   None
17113b8cf66Sagc 
17213b8cf66Sagc LICENCE:        Copyright (c) 2001 Bob Trower, Trantor Standard Systems Inc.
17313b8cf66Sagc 
17413b8cf66Sagc                 Permission is hereby granted, free of charge, to any person
17513b8cf66Sagc                 obtaining a copy of this software and associated
17613b8cf66Sagc                 documentation files (the "Software"), to deal in the
17713b8cf66Sagc                 Software without restriction, including without limitation
17813b8cf66Sagc                 the rights to use, copy, modify, merge, publish, distribute,
17913b8cf66Sagc                 sublicense, and/or sell copies of the Software, and to
18013b8cf66Sagc                 permit persons to whom the Software is furnished to do so,
18113b8cf66Sagc                 subject to the following conditions:
18213b8cf66Sagc 
18313b8cf66Sagc                 The above copyright notice and this permission notice shall
18413b8cf66Sagc                 be included in all copies or substantial portions of the
18513b8cf66Sagc                 Software.
18613b8cf66Sagc 
18713b8cf66Sagc                 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
18813b8cf66Sagc                 KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
18913b8cf66Sagc                 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
19013b8cf66Sagc                 PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
19113b8cf66Sagc                 OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19213b8cf66Sagc                 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19313b8cf66Sagc                 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
19413b8cf66Sagc                 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19513b8cf66Sagc 
19613b8cf66Sagc VERSION HISTORY:
19713b8cf66Sagc                 Bob Trower 08/04/01 -- Create Version 0.00.00B
19813b8cf66Sagc 
19913b8cf66Sagc \******************************************************************* */
20013b8cf66Sagc 
20113b8cf66Sagc #include <inttypes.h>
20213b8cf66Sagc #include <stdio.h>
20313b8cf66Sagc #include <stdlib.h>
20413b8cf66Sagc 
20513b8cf66Sagc #include "b64.h"
20613b8cf66Sagc 
20713b8cf66Sagc /*
20813b8cf66Sagc ** Translation Table as described in RFC1113
20913b8cf66Sagc */
21013b8cf66Sagc static const char cb64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
21113b8cf66Sagc 
21213b8cf66Sagc /*
21313b8cf66Sagc ** Translation Table to decode (created by author)
21413b8cf66Sagc */
21513b8cf66Sagc static const char cd64[] = "|$$$}rstuvwxyz{$$$$$$$>?@ABCDEFGHIJKLMNOPQRSTUVW$$$$$$XYZ[\\]^_`abcdefghijklmnopq";
21613b8cf66Sagc 
21713b8cf66Sagc /*
21813b8cf66Sagc ** encodeblock
21913b8cf66Sagc **
22013b8cf66Sagc ** encode 3 8-bit binary bytes as 4 '6-bit' characters
22113b8cf66Sagc */
22213b8cf66Sagc static void
encodeblock(uint8_t * wordin,uint8_t * wordout,int wordlen)22313b8cf66Sagc encodeblock(uint8_t *wordin, uint8_t *wordout, int wordlen)
22413b8cf66Sagc {
22513b8cf66Sagc 	wordout[0] = cb64[(unsigned)wordin[0] >> 2];
22613b8cf66Sagc 	wordout[1] = cb64[((unsigned)(wordin[0] & 0x03) << 4) | ((unsigned)(wordin[1] & 0xf0) >> 4)];
22713b8cf66Sagc 	wordout[2] = (uint8_t)(wordlen > 1) ?
22813b8cf66Sagc 		cb64[((unsigned)(wordin[1] & 0x0f) << 2) | ((unsigned)(wordin[2] & 0xc0) >> 6)] : '=';
22913b8cf66Sagc 	wordout[3] = (uint8_t)(wordlen > 2) ? cb64[wordin[2] & 0x3f] : '=';
23013b8cf66Sagc }
23113b8cf66Sagc 
23213b8cf66Sagc /*
23313b8cf66Sagc ** encode
23413b8cf66Sagc **
23513b8cf66Sagc ** base64 encode a stream adding padding and line breaks as per spec.
23613b8cf66Sagc */
23713b8cf66Sagc int
b64encode(const char * in,const size_t insize,void * vp,size_t outsize,int linesize)23813b8cf66Sagc b64encode(const char *in, const size_t insize, void *vp, size_t outsize, int linesize)
23913b8cf66Sagc {
24013b8cf66Sagc 	const char	*inp;
24113b8cf66Sagc 	unsigned	 i;
24213b8cf66Sagc 	uint8_t		 wordout[4];
24313b8cf66Sagc 	uint8_t		 wordin[3];
24413b8cf66Sagc 	char		*out = vp;
24513b8cf66Sagc 	char		*outp;
24613b8cf66Sagc 	int              blocksout;
24713b8cf66Sagc 	int              wordlen;
24813b8cf66Sagc 
2492f6667e4Sagc 	wordlen = 0;
25013b8cf66Sagc 	for (blocksout = 0, inp = in, outp = out; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize;) {
25113b8cf66Sagc 		for (wordlen = 0, i = 0; i < sizeof(wordin); i++) {
25213b8cf66Sagc 			wordin[i] = (uint8_t) *inp++;
25313b8cf66Sagc 			if ((size_t)(inp - in) <= insize) {
25413b8cf66Sagc 				wordlen++;
25513b8cf66Sagc 			} else {
25613b8cf66Sagc 				wordin[i] = 0x0;
25713b8cf66Sagc 			}
25813b8cf66Sagc 		}
25913b8cf66Sagc 		if (wordlen > 0) {
26013b8cf66Sagc 			encodeblock(wordin, wordout, wordlen);
26113b8cf66Sagc 			for (i = 0; i < sizeof(wordout) ; i++) {
26213b8cf66Sagc 				*outp++ = wordout[i];
26313b8cf66Sagc 			}
26413b8cf66Sagc 			blocksout++;
26513b8cf66Sagc 		}
2662f6667e4Sagc 		if (linesize > 0) {
26713b8cf66Sagc 			if (blocksout >= (int)(linesize / sizeof(wordout)) ||
26813b8cf66Sagc 			    (size_t)(inp - in) >= insize) {
26913b8cf66Sagc 				if (blocksout) {
27013b8cf66Sagc 					*outp++ = '\r';
27113b8cf66Sagc 					*outp++ = '\n';
27213b8cf66Sagc 				}
27313b8cf66Sagc 				blocksout = 0;
27413b8cf66Sagc 			}
27513b8cf66Sagc 		}
2762f6667e4Sagc 	}
27713b8cf66Sagc 	return (int)(outp - out);
27813b8cf66Sagc }
27913b8cf66Sagc 
28013b8cf66Sagc /*
28113b8cf66Sagc ** decodeblock
28213b8cf66Sagc **
28313b8cf66Sagc ** decode 4 '6-bit' characters into 3 8-bit binary bytes
28413b8cf66Sagc */
28513b8cf66Sagc static void
decodeblock(uint8_t wordin[4],uint8_t wordout[3])28613b8cf66Sagc decodeblock(uint8_t wordin[4], uint8_t wordout[3])
28713b8cf66Sagc {
28813b8cf66Sagc 	wordout[0] = (uint8_t) ((unsigned)wordin[0] << 2 | (unsigned)wordin[1] >> 4);
28913b8cf66Sagc 	wordout[1] = (uint8_t) ((unsigned)wordin[1] << 4 | (unsigned)wordin[2] >> 2);
29013b8cf66Sagc 	wordout[2] = (uint8_t) (((wordin[2] << 6) & 0xc0) | wordin[3]);
29113b8cf66Sagc }
29213b8cf66Sagc 
29313b8cf66Sagc /*
29413b8cf66Sagc ** decode
29513b8cf66Sagc **
29613b8cf66Sagc ** decode a base64 encoded stream discarding padding, line breaks and noise
29713b8cf66Sagc */
29813b8cf66Sagc int
b64decode(const char * in,const size_t insize,void * vp,size_t outsize)29913b8cf66Sagc b64decode(const char *in, const size_t insize, void *vp, size_t outsize)
30013b8cf66Sagc {
30113b8cf66Sagc 	const char	*inp;
3022f6667e4Sagc 	unsigned	 wordlen;
3032f6667e4Sagc 	unsigned	 i;
30413b8cf66Sagc 	uint8_t    	 wordout[3];
30513b8cf66Sagc 	uint8_t    	 wordin[4];
30613b8cf66Sagc 	uint8_t    	 v;
30713b8cf66Sagc 	char		*out = vp;
30813b8cf66Sagc 	char		*outp;
30913b8cf66Sagc 
31013b8cf66Sagc 	for (inp = in, outp = out ; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize ; ) {
3112f6667e4Sagc 		for (wordlen = 0, i = 0 ; i < sizeof(wordin) && (size_t)(inp - in) < insize ; i++) {
31213b8cf66Sagc 			/* get a single character */
313*6bae07a6Sagc 			for (v = 0; (size_t)(inp - in) <= insize && v == 0 ; ) {
31413b8cf66Sagc 				if (*inp == '\r' && *(inp + 1) == '\n') {
31513b8cf66Sagc 					inp += 2;
31613b8cf66Sagc 				} else {
31713b8cf66Sagc 					v = (uint8_t) *inp++;
31813b8cf66Sagc 					v = (uint8_t) ((v < 43 || v > 122) ? 0 : cd64[v - 43]);
31913b8cf66Sagc 					if (v) {
32013b8cf66Sagc 						v = (uint8_t) ((v == '$') ? 0 : v - 61);
32113b8cf66Sagc 					}
32213b8cf66Sagc 				}
32313b8cf66Sagc 			}
3242f6667e4Sagc 			/* perhaps 0x0 pad */
325*6bae07a6Sagc 			if ((size_t)(inp - in) <= insize) {
32613b8cf66Sagc 				wordlen += 1;
32713b8cf66Sagc 				if (v) {
32813b8cf66Sagc 					wordin[i] = (uint8_t) (v - 1);
32913b8cf66Sagc 				}
33013b8cf66Sagc 			} else {
33113b8cf66Sagc 				wordin[i] = 0x0;
33213b8cf66Sagc 			}
33313b8cf66Sagc 		}
33413b8cf66Sagc 		if (wordlen > 0) {
33513b8cf66Sagc 			decodeblock(wordin, wordout);
33613b8cf66Sagc 			for (i = 0; i < wordlen - 1 ; i++) {
33713b8cf66Sagc 				*outp++ = wordout[i];
33813b8cf66Sagc 			}
33913b8cf66Sagc 		}
34013b8cf66Sagc 	}
34113b8cf66Sagc 	return (int)(outp - out);
34213b8cf66Sagc }
343