113b8cf66Sagc /*********************************************************************\
213b8cf66Sagc
313b8cf66Sagc MODULE NAME: b64.c
413b8cf66Sagc
513b8cf66Sagc AUTHOR: Bob Trower 08/04/01
613b8cf66Sagc
713b8cf66Sagc PROJECT: Crypt Data Packaging
813b8cf66Sagc
913b8cf66Sagc COPYRIGHT: Copyright (c) Trantor Standard Systems Inc., 2001
1013b8cf66Sagc
1113b8cf66Sagc NOTE: This source code may be used as you wish, subject to
1213b8cf66Sagc the MIT license. See the LICENCE section below.
1313b8cf66Sagc
1413b8cf66Sagc DESCRIPTION:
1513b8cf66Sagc This little utility implements the Base64
1613b8cf66Sagc Content-Transfer-Encoding standard described in
1713b8cf66Sagc RFC1113 (http://www.faqs.org/rfcs/rfc1113.html).
1813b8cf66Sagc
1913b8cf66Sagc This is the coding scheme used by MIME to allow
2013b8cf66Sagc binary data to be transferred by SMTP mail.
2113b8cf66Sagc
2213b8cf66Sagc Groups of 3 bytes from a binary stream are coded as
2313b8cf66Sagc groups of 4 bytes in a text stream.
2413b8cf66Sagc
2513b8cf66Sagc The input stream is 'padded' with zeros to create
2613b8cf66Sagc an input that is an even multiple of 3.
2713b8cf66Sagc
2813b8cf66Sagc A special character ('=') is used to denote padding so
2913b8cf66Sagc that the stream can be decoded back to its exact size.
3013b8cf66Sagc
3113b8cf66Sagc Encoded output is formatted in lines which should
3213b8cf66Sagc be a maximum of 72 characters to conform to the
3313b8cf66Sagc specification. This program defaults to 72 characters,
3413b8cf66Sagc but will allow more or less through the use of a
3513b8cf66Sagc switch. The program enforces a minimum line size
3613b8cf66Sagc of 4 characters.
3713b8cf66Sagc
3813b8cf66Sagc Example encoding:
3913b8cf66Sagc
4013b8cf66Sagc The stream 'ABCD' is 32 bits long. It is mapped as
4113b8cf66Sagc follows:
4213b8cf66Sagc
4313b8cf66Sagc ABCD
4413b8cf66Sagc
4513b8cf66Sagc A (65) B (66) C (67) D (68) (None) (None)
4613b8cf66Sagc 01000001 01000010 01000011 01000100
4713b8cf66Sagc
4813b8cf66Sagc 16 (Q) 20 (U) 9 (J) 3 (D) 17 (R) 0 (A) NA (=) NA (=)
4913b8cf66Sagc 010000 010100 001001 000011 010001 000000 000000 000000
5013b8cf66Sagc
5113b8cf66Sagc
5213b8cf66Sagc QUJDRA==
5313b8cf66Sagc
5413b8cf66Sagc Decoding is the process in reverse. A 'decode' lookup
5513b8cf66Sagc table has been created to avoid string scans.
5613b8cf66Sagc
5713b8cf66Sagc DESIGN GOALS: Specifically:
5813b8cf66Sagc Code is a stand-alone utility to perform base64
5913b8cf66Sagc encoding/decoding. It should be genuinely useful
6013b8cf66Sagc when the need arises and it meets a need that is
6113b8cf66Sagc likely to occur for some users.
6213b8cf66Sagc Code acts as sample code to show the author's
6313b8cf66Sagc design and coding style.
6413b8cf66Sagc
6513b8cf66Sagc Generally:
6613b8cf66Sagc This program is designed to survive:
6713b8cf66Sagc Everything you need is in a single source file.
6813b8cf66Sagc It compiles cleanly using a vanilla ANSI C compiler.
6913b8cf66Sagc It does its job correctly with a minimum of fuss.
7013b8cf66Sagc The code is not overly clever, not overly simplistic
7113b8cf66Sagc and not overly verbose.
7213b8cf66Sagc Access is 'cut and paste' from a web page.
7313b8cf66Sagc Terms of use are reasonable.
7413b8cf66Sagc
7513b8cf66Sagc VALIDATION: Non-trivial code is never without errors. This
7613b8cf66Sagc file likely has some problems, since it has only
7713b8cf66Sagc been tested by the author. It is expected with most
7813b8cf66Sagc source code that there is a period of 'burn-in' when
7913b8cf66Sagc problems are identified and corrected. That being
8013b8cf66Sagc said, it is possible to have 'reasonably correct'
8113b8cf66Sagc code by following a regime of unit test that covers
8213b8cf66Sagc the most likely cases and regression testing prior
8313b8cf66Sagc to release. This has been done with this code and
8413b8cf66Sagc it has a good probability of performing as expected.
8513b8cf66Sagc
8613b8cf66Sagc Unit Test Cases:
8713b8cf66Sagc
8813b8cf66Sagc case 0:empty file:
8913b8cf66Sagc CASE0.DAT -> ->
9013b8cf66Sagc (Zero length target file created
9113b8cf66Sagc on both encode and decode.)
9213b8cf66Sagc
9313b8cf66Sagc case 1:One input character:
9413b8cf66Sagc CASE1.DAT A -> QQ== -> A
9513b8cf66Sagc
9613b8cf66Sagc case 2:Two input characters:
9713b8cf66Sagc CASE2.DAT AB -> QUJD -> AB
9813b8cf66Sagc
9913b8cf66Sagc case 3:Three input characters:
10013b8cf66Sagc CASE3.DAT ABC -> QUJD -> ABC
10113b8cf66Sagc
10213b8cf66Sagc case 4:Four input characters:
10313b8cf66Sagc case4.dat ABCD -> QUJDRA== -> ABCD
10413b8cf66Sagc
10513b8cf66Sagc case 5:All chars from 0 to ff, linesize set to 50:
10613b8cf66Sagc
10713b8cf66Sagc AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIj
10813b8cf66Sagc JCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZH
10913b8cf66Sagc SElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWpr
11013b8cf66Sagc bG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6P
11113b8cf66Sagc kJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKz
11213b8cf66Sagc tLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX
11313b8cf66Sagc 2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7
11413b8cf66Sagc /P3+/w==
11513b8cf66Sagc
11613b8cf66Sagc case 6:Mime Block from e-mail:
11713b8cf66Sagc (Data same as test case 5)
11813b8cf66Sagc
11913b8cf66Sagc case 7: Large files:
12013b8cf66Sagc Tested 28 MB file in/out.
12113b8cf66Sagc
12213b8cf66Sagc case 8: Random Binary Integrity:
12313b8cf66Sagc This binary program (b64.exe) was encoded to base64,
12413b8cf66Sagc back to binary and then executed.
12513b8cf66Sagc
12613b8cf66Sagc case 9 Stress:
12713b8cf66Sagc All files in a working directory encoded/decoded
12813b8cf66Sagc and compared with file comparison utility to
12913b8cf66Sagc ensure that multiple runs do not cause problems
13013b8cf66Sagc such as exhausting file handles, tmp storage, etc.
13113b8cf66Sagc
13213b8cf66Sagc -------------
13313b8cf66Sagc
13413b8cf66Sagc Syntax, operation and failure:
13513b8cf66Sagc All options/switches tested. Performs as
13613b8cf66Sagc expected.
13713b8cf66Sagc
13813b8cf66Sagc case 10:
13913b8cf66Sagc No Args -- Shows Usage Screen
14013b8cf66Sagc Return Code 1 (Invalid Syntax)
14113b8cf66Sagc case 11:
14213b8cf66Sagc One Arg (invalid) -- Shows Usage Screen
14313b8cf66Sagc Return Code 1 (Invalid Syntax)
14413b8cf66Sagc case 12:
14513b8cf66Sagc One Arg Help (-?) -- Shows detailed Usage Screen.
14613b8cf66Sagc Return Code 0 (Success -- help request is valid).
14713b8cf66Sagc case 13:
14813b8cf66Sagc One Arg Help (-h) -- Shows detailed Usage Screen.
14913b8cf66Sagc Return Code 0 (Success -- help request is valid).
15013b8cf66Sagc case 14:
15113b8cf66Sagc One Arg (valid) -- Uses stdin/stdout (filter)
15213b8cf66Sagc Return Code 0 (Sucess)
15313b8cf66Sagc case 15:
15413b8cf66Sagc Two Args (invalid file) -- shows system error.
15513b8cf66Sagc Return Code 2 (File Error)
15613b8cf66Sagc case 16:
15713b8cf66Sagc Encode non-existent file -- shows system error.
15813b8cf66Sagc Return Code 2 (File Error)
15913b8cf66Sagc case 17:
16013b8cf66Sagc Out of disk space -- shows system error.
16113b8cf66Sagc Return Code 3 (File I/O Error)
16213b8cf66Sagc
16313b8cf66Sagc -------------
16413b8cf66Sagc
16513b8cf66Sagc Compile/Regression test:
16613b8cf66Sagc gcc compiled binary under Cygwin
16713b8cf66Sagc Microsoft Visual Studio under Windows 2000
16813b8cf66Sagc Microsoft Version 6.0 C under Windows 2000
16913b8cf66Sagc
17013b8cf66Sagc DEPENDENCIES: None
17113b8cf66Sagc
17213b8cf66Sagc LICENCE: Copyright (c) 2001 Bob Trower, Trantor Standard Systems Inc.
17313b8cf66Sagc
17413b8cf66Sagc Permission is hereby granted, free of charge, to any person
17513b8cf66Sagc obtaining a copy of this software and associated
17613b8cf66Sagc documentation files (the "Software"), to deal in the
17713b8cf66Sagc Software without restriction, including without limitation
17813b8cf66Sagc the rights to use, copy, modify, merge, publish, distribute,
17913b8cf66Sagc sublicense, and/or sell copies of the Software, and to
18013b8cf66Sagc permit persons to whom the Software is furnished to do so,
18113b8cf66Sagc subject to the following conditions:
18213b8cf66Sagc
18313b8cf66Sagc The above copyright notice and this permission notice shall
18413b8cf66Sagc be included in all copies or substantial portions of the
18513b8cf66Sagc Software.
18613b8cf66Sagc
18713b8cf66Sagc THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
18813b8cf66Sagc KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
18913b8cf66Sagc WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
19013b8cf66Sagc PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
19113b8cf66Sagc OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19213b8cf66Sagc OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19313b8cf66Sagc OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
19413b8cf66Sagc SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19513b8cf66Sagc
19613b8cf66Sagc VERSION HISTORY:
19713b8cf66Sagc Bob Trower 08/04/01 -- Create Version 0.00.00B
19813b8cf66Sagc
19913b8cf66Sagc \******************************************************************* */
20013b8cf66Sagc
20113b8cf66Sagc #include <inttypes.h>
20213b8cf66Sagc #include <stdio.h>
20313b8cf66Sagc #include <stdlib.h>
20413b8cf66Sagc
20513b8cf66Sagc #include "b64.h"
20613b8cf66Sagc
20713b8cf66Sagc /*
20813b8cf66Sagc ** Translation Table as described in RFC1113
20913b8cf66Sagc */
21013b8cf66Sagc static const char cb64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
21113b8cf66Sagc
21213b8cf66Sagc /*
21313b8cf66Sagc ** Translation Table to decode (created by author)
21413b8cf66Sagc */
21513b8cf66Sagc static const char cd64[] = "|$$$}rstuvwxyz{$$$$$$$>?@ABCDEFGHIJKLMNOPQRSTUVW$$$$$$XYZ[\\]^_`abcdefghijklmnopq";
21613b8cf66Sagc
21713b8cf66Sagc /*
21813b8cf66Sagc ** encodeblock
21913b8cf66Sagc **
22013b8cf66Sagc ** encode 3 8-bit binary bytes as 4 '6-bit' characters
22113b8cf66Sagc */
22213b8cf66Sagc static void
encodeblock(uint8_t * wordin,uint8_t * wordout,int wordlen)22313b8cf66Sagc encodeblock(uint8_t *wordin, uint8_t *wordout, int wordlen)
22413b8cf66Sagc {
22513b8cf66Sagc wordout[0] = cb64[(unsigned)wordin[0] >> 2];
22613b8cf66Sagc wordout[1] = cb64[((unsigned)(wordin[0] & 0x03) << 4) | ((unsigned)(wordin[1] & 0xf0) >> 4)];
22713b8cf66Sagc wordout[2] = (uint8_t)(wordlen > 1) ?
22813b8cf66Sagc cb64[((unsigned)(wordin[1] & 0x0f) << 2) | ((unsigned)(wordin[2] & 0xc0) >> 6)] : '=';
22913b8cf66Sagc wordout[3] = (uint8_t)(wordlen > 2) ? cb64[wordin[2] & 0x3f] : '=';
23013b8cf66Sagc }
23113b8cf66Sagc
23213b8cf66Sagc /*
23313b8cf66Sagc ** encode
23413b8cf66Sagc **
23513b8cf66Sagc ** base64 encode a stream adding padding and line breaks as per spec.
23613b8cf66Sagc */
23713b8cf66Sagc int
b64encode(const char * in,const size_t insize,void * vp,size_t outsize,int linesize)23813b8cf66Sagc b64encode(const char *in, const size_t insize, void *vp, size_t outsize, int linesize)
23913b8cf66Sagc {
24013b8cf66Sagc const char *inp;
24113b8cf66Sagc unsigned i;
24213b8cf66Sagc uint8_t wordout[4];
24313b8cf66Sagc uint8_t wordin[3];
24413b8cf66Sagc char *out = vp;
24513b8cf66Sagc char *outp;
24613b8cf66Sagc int blocksout;
24713b8cf66Sagc int wordlen;
24813b8cf66Sagc
2492f6667e4Sagc wordlen = 0;
25013b8cf66Sagc for (blocksout = 0, inp = in, outp = out; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize;) {
25113b8cf66Sagc for (wordlen = 0, i = 0; i < sizeof(wordin); i++) {
25213b8cf66Sagc wordin[i] = (uint8_t) *inp++;
25313b8cf66Sagc if ((size_t)(inp - in) <= insize) {
25413b8cf66Sagc wordlen++;
25513b8cf66Sagc } else {
25613b8cf66Sagc wordin[i] = 0x0;
25713b8cf66Sagc }
25813b8cf66Sagc }
25913b8cf66Sagc if (wordlen > 0) {
26013b8cf66Sagc encodeblock(wordin, wordout, wordlen);
26113b8cf66Sagc for (i = 0; i < sizeof(wordout) ; i++) {
26213b8cf66Sagc *outp++ = wordout[i];
26313b8cf66Sagc }
26413b8cf66Sagc blocksout++;
26513b8cf66Sagc }
2662f6667e4Sagc if (linesize > 0) {
26713b8cf66Sagc if (blocksout >= (int)(linesize / sizeof(wordout)) ||
26813b8cf66Sagc (size_t)(inp - in) >= insize) {
26913b8cf66Sagc if (blocksout) {
27013b8cf66Sagc *outp++ = '\r';
27113b8cf66Sagc *outp++ = '\n';
27213b8cf66Sagc }
27313b8cf66Sagc blocksout = 0;
27413b8cf66Sagc }
27513b8cf66Sagc }
2762f6667e4Sagc }
27713b8cf66Sagc return (int)(outp - out);
27813b8cf66Sagc }
27913b8cf66Sagc
28013b8cf66Sagc /*
28113b8cf66Sagc ** decodeblock
28213b8cf66Sagc **
28313b8cf66Sagc ** decode 4 '6-bit' characters into 3 8-bit binary bytes
28413b8cf66Sagc */
28513b8cf66Sagc static void
decodeblock(uint8_t wordin[4],uint8_t wordout[3])28613b8cf66Sagc decodeblock(uint8_t wordin[4], uint8_t wordout[3])
28713b8cf66Sagc {
28813b8cf66Sagc wordout[0] = (uint8_t) ((unsigned)wordin[0] << 2 | (unsigned)wordin[1] >> 4);
28913b8cf66Sagc wordout[1] = (uint8_t) ((unsigned)wordin[1] << 4 | (unsigned)wordin[2] >> 2);
29013b8cf66Sagc wordout[2] = (uint8_t) (((wordin[2] << 6) & 0xc0) | wordin[3]);
29113b8cf66Sagc }
29213b8cf66Sagc
29313b8cf66Sagc /*
29413b8cf66Sagc ** decode
29513b8cf66Sagc **
29613b8cf66Sagc ** decode a base64 encoded stream discarding padding, line breaks and noise
29713b8cf66Sagc */
29813b8cf66Sagc int
b64decode(const char * in,const size_t insize,void * vp,size_t outsize)29913b8cf66Sagc b64decode(const char *in, const size_t insize, void *vp, size_t outsize)
30013b8cf66Sagc {
30113b8cf66Sagc const char *inp;
3022f6667e4Sagc unsigned wordlen;
3032f6667e4Sagc unsigned i;
30413b8cf66Sagc uint8_t wordout[3];
30513b8cf66Sagc uint8_t wordin[4];
30613b8cf66Sagc uint8_t v;
30713b8cf66Sagc char *out = vp;
30813b8cf66Sagc char *outp;
30913b8cf66Sagc
31013b8cf66Sagc for (inp = in, outp = out ; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize ; ) {
3112f6667e4Sagc for (wordlen = 0, i = 0 ; i < sizeof(wordin) && (size_t)(inp - in) < insize ; i++) {
31213b8cf66Sagc /* get a single character */
313*6bae07a6Sagc for (v = 0; (size_t)(inp - in) <= insize && v == 0 ; ) {
31413b8cf66Sagc if (*inp == '\r' && *(inp + 1) == '\n') {
31513b8cf66Sagc inp += 2;
31613b8cf66Sagc } else {
31713b8cf66Sagc v = (uint8_t) *inp++;
31813b8cf66Sagc v = (uint8_t) ((v < 43 || v > 122) ? 0 : cd64[v - 43]);
31913b8cf66Sagc if (v) {
32013b8cf66Sagc v = (uint8_t) ((v == '$') ? 0 : v - 61);
32113b8cf66Sagc }
32213b8cf66Sagc }
32313b8cf66Sagc }
3242f6667e4Sagc /* perhaps 0x0 pad */
325*6bae07a6Sagc if ((size_t)(inp - in) <= insize) {
32613b8cf66Sagc wordlen += 1;
32713b8cf66Sagc if (v) {
32813b8cf66Sagc wordin[i] = (uint8_t) (v - 1);
32913b8cf66Sagc }
33013b8cf66Sagc } else {
33113b8cf66Sagc wordin[i] = 0x0;
33213b8cf66Sagc }
33313b8cf66Sagc }
33413b8cf66Sagc if (wordlen > 0) {
33513b8cf66Sagc decodeblock(wordin, wordout);
33613b8cf66Sagc for (i = 0; i < wordlen - 1 ; i++) {
33713b8cf66Sagc *outp++ = wordout[i];
33813b8cf66Sagc }
33913b8cf66Sagc }
34013b8cf66Sagc }
34113b8cf66Sagc return (int)(outp - out);
34213b8cf66Sagc }
343