1%
2%                            B A S E 6 4
3%
4%                           by John Walker
5%                      http://www.fourmilab.ch/
6%
7%   What's all this, you ask?  Well, this is a "literate program",
8%   written in the CWEB language created by Donald E. Knuth and
9%   Silvio Levy.  This file includes both the C source code for
10%   the program and internal documentation in TeX.  Processing
11%   this file with the CTANGLE utility produces the C source file,
12%   while the CWEAVE program emits documentation in TeX.  The
13%   current version of these programs may be downloaded from:
14%
15%       http://www-cs-faculty.stanford.edu/~knuth/cweb.html
16%
17%   where you will find additional information on literate
18%   programming and examples of other programs written in this
19%   manner.
20%
21%   If you don't want to wade through all these details, don't
22%   worry; this distribution includes a .c file already
23%   extracted and ready to compile.  If "make" complains that it
24%   can't find "ctangle" or "cweave", just "touch *.c"
25%   and re-make--apparently the process of extracting the files
26%   from the archive messed up the date and time, misleading
27%   make into believing it needed to rebuild those files.
28
29@** Introduction.
30
31\vskip 15pt
32\centerline{\ttitlefont BASE64}
33\vskip 10pt
34\centerline{\titlefont Encode or decode file as MIME base64 (RFC 1341)}
35\vskip 15pt
36\centerline{by John Walker}
37\centerline{\.{http://www.fourmilab.ch/}}
38
39\vskip 15pt
40\centerline{This program is in the public domain.}
41
42\vskip 15pt
43\centerline{EBCDIC support courtesy of Christian.Ferrari@@fccrt.it, 2000-12-20.}
44\vskip 30pt
45
46@d REVDATE "10th June 2007"
47
48@** Program global context.
49@d TRUE  1
50@d FALSE 0
51@d LINELEN 72  /* Encoded line length (max 76) */
52@d MAXINLINE 256  /* Maximum input line length */
53
54@c
55#include "config.h"                   /* System-dependent configuration */
56
57@h
58
59@<System include files@>@/
60@<Windows-specific include files@>@/
61@<Global variables@>@/
62
63@ We include the following POSIX-standard C library files.
64  Conditionals based on a probe of the system by the
65  \.{configure} program allow us to cope with the
66  peculiarities of specific systems.
67
68@<System include files@>=
69#include <stdio.h>
70#include <stdlib.h>
71#include <ctype.h>
72#ifdef HAVE_STRING_H
73#include <string.h>
74#else
75#ifdef HAVE_STRINGS_H
76#include <strings.h>
77#endif
78#endif
79#ifdef HAVE_GETOPT
80#ifdef HAVE_UNISTD_H
81#include <unistd.h>
82#endif
83#else
84#include "getopt.h"     /* No system \.{getopt}--use our own */
85#endif
86
87@ The following include files are needed in WIN32 builds
88  to permit setting already-open I/O streams to binary mode.
89
90@<Windows-specific include files@>=
91#ifdef _WIN32
92#define FORCE_BINARY_IO
93#include <io.h>
94#include <fcntl.h>
95#endif
96
97@ These variables are global to all procedures; many are used
98  as ``hidden arguments'' to functions in order to simplify
99  calling sequences.
100
101@<Global variables@>=
102typedef unsigned char byte;           /* Byte type */
103
104static FILE *fi;                      /* Input file */
105static FILE *fo;                      /* Output file */
106static byte iobuf[MAXINLINE];         /* I/O buffer */
107static int iolen = 0;                 /* Bytes left in I/O buffer */
108static int iocp = MAXINLINE;          /* Character removal pointer */
109static int ateof = FALSE;             /* EOF encountered */
110static byte dtable[256];              /* Encode / decode table */
111static int linelength = 0;            /* Length of encoded output line */
112static char eol[] =                   /* End of line sequence */
113#ifdef FORCE_BINARY_IO
114    "\n"
115#else
116    "\r\n"
117#endif
118    ;
119static int errcheck = TRUE;           /* Check decode input for errors ? */
120
121@** Input/output functions.
122
123@ Procedure |inbuf|
124fills the input buffer with data from the input stream |fi|.
125
126@c
127
128static int inbuf(void)
129{
130    int l;
131
132    if (ateof) {
133        return FALSE;
134    }
135    l = fread(iobuf, 1, MAXINLINE, fi);     /* Read input buffer */
136    if (l <= 0) {
137        if (ferror(fi)) {
138            exit(1);
139        }
140        ateof = TRUE;
141        return FALSE;
142    }
143    iolen = l;
144    iocp = 0;
145    return TRUE;
146}
147
148@ Procedure |inchar|
149returns the next character from the input line.  At end of line,
150it calls |inbuf| to read the next line, returning |EOF| at end
151of file.
152
153@c
154
155static int inchar(void)
156{
157    if (iocp >= iolen) {
158       if (!inbuf()) {
159          return EOF;
160        }
161    }
162
163    return iobuf[iocp++];
164}
165
166@ Procedure |insig|
167returns the next significant input character, ignoring
168white space and control characters.  This procedure uses
169|inchar| to read the input stream and returns |EOF| when
170the end of the input file is reached.
171
172@c
173
174static int insig(void)
175{
176    int c;
177
178    while (TRUE) {
179        c = inchar();
180        if (c == EOF || (c > ' ')) {
181            return c;
182        }
183    }
184}
185
186@ Procedure |ochar|
187outputs an encoded character, inserting line breaks
188as required so that no line exceeds |LINELEN|
189characters.
190
191@c
192
193static void ochar(int c)
194{
195    if (linelength >= LINELEN) {
196        if (fputs(eol, fo) == EOF) {
197            exit(1);
198        }
199        linelength = 0;
200    }
201    if (putc(((byte) c), fo) == EOF) {
202        exit(1);
203    }
204    linelength++;
205}
206
207@** Encoding.
208
209Procedure |encode|
210encodes the binary file opened as |fi| into base64, writing
211the output to |fo|.
212
213@c
214
215static void encode(void)
216{
217    int i, hiteof = FALSE;
218
219    @<initialise encoding table@>;@\
220
221    while (!hiteof) {
222        byte igroup[3], ogroup[4];
223        int c, n;
224
225        igroup[0] = igroup[1] = igroup[2] = 0;
226        for (n = 0; n < 3; n++) {
227            c = inchar();
228            if (c == EOF) {
229                hiteof = TRUE;
230                break;
231            }
232            igroup[n] = (byte) c;
233        }
234        if (n > 0) {
235            ogroup[0] = dtable[igroup[0] >> 2];
236            ogroup[1] = dtable[((igroup[0] & 3) << 4) | (igroup[1] >> 4)];
237            ogroup[2] = dtable[((igroup[1] & 0xF) << 2) | (igroup[2] >> 6)];
238            ogroup[3] = dtable[igroup[2] & 0x3F];
239
240            /* Replace characters in output stream with "=" pad
241               characters if fewer than three characters were
242               read from the end of the input stream. */
243
244            if (n < 3) {
245                ogroup[3] = '=';
246                if (n < 2) {
247                    ogroup[2] = '=';
248                }
249            }
250            for (i = 0; i < 4; i++) {
251                ochar(ogroup[i]);
252            }
253        }
254    }
255    if (fputs(eol, fo) == EOF) {
256        exit(1);
257    }
258}
259
260@ Procedure |initialise_encoding_table|
261  fills the binary encoding table with the characters
262  the 6 bit values are mapped into.  The curious
263  and disparate sequences used to fill this table
264  permit this code to work both on ASCII and EBCDIC
265  systems, the latter thanks to Ch.F.
266
267  In EBCDIC systems character codes for letters are not
268  consecutive; the initialisation must be split to accommodate
269  the EBCDIC consecutive letters:
270
271  \centerline{ A--I J--R S--Z a--i j--r s--z}
272
273  This code works on ASCII as well as EBCDIC systems.
274
275@<initialise encoding table@>=
276
277    for (i = 0; i < 9; i++) {
278        dtable[i] = 'A' + i;
279        dtable[i + 9] = 'J' + i;
280        dtable[26 + i] = 'a' + i;
281        dtable[26 + i + 9] = 'j' + i;
282    }
283    for (i = 0; i < 8; i++) {
284        dtable[i + 18] = 'S' + i;
285        dtable[26 + i + 18] = 's' + i;
286    }
287    for (i = 0; i < 10; i++) {
288        dtable[52 + i] = '0' + i;
289    }
290    dtable[62] = '+';
291    dtable[63] = '/';
292
293
294@** Decoding.
295
296Procedure |decode| decodes a base64 encoded stream from
297|fi| and emits the binary result on |fo|.
298
299@c
300
301static void decode(void)
302{
303    int i;
304
305    @<Initialise decode table@>;
306
307    while (TRUE) {
308        byte a[4], b[4], o[3];
309
310        for (i = 0; i < 4; i++) {
311            int c = insig();
312
313            if (c == EOF) {
314                if (errcheck && (i > 0)) {
315                    fprintf(stderr, "Input file incomplete.\n");
316                    exit(1);
317                }
318                return;
319            }
320            if (dtable[c] & 0x80) {
321                if (errcheck) {
322                    fprintf(stderr, "Illegal character '%c' in input file.\n", c);
323                    exit(1);
324                }
325                /* Ignoring errors: discard invalid character. */
326                i--;
327                continue;
328            }
329            a[i] = (byte) c;
330            b[i] = (byte) dtable[c];
331        }
332        o[0] = (b[0] << 2) | (b[1] >> 4);
333        o[1] = (b[1] << 4) | (b[2] >> 2);
334        o[2] = (b[2] << 6) | b[3];
335        i = a[2] == '=' ? 1 : (a[3] == '=' ? 2 : 3);
336        if (fwrite(o, i, 1, fo) == EOF) {
337            exit(1);
338        }
339        if (i < 3) {
340            return;
341        }
342    }
343    @t\4@>  @q Fix bad tab thanks to lint comment. @>
344}
345
346@ Procedure |initialise decode table| creates the lookup table
347  used to map base64 characters into their binary values from
348  0 to 63.  The table is built in this rather curious way in
349  order to be properly initialised for both ASCII-based
350  systems and those using EBCDIC, where the letters are not
351  contiguous.  (EBCDIC fixes courtesy of Ch.F.)
352
353
354  In EBCDIC systems character codes for letters are not
355  consecutive; the initialisation must be split to accommodate
356  the EBCDIC consecutive letters:
357
358  \centerline{ A--I J--R S--Z a--i j--r s--z}
359
360  This code works on ASCII as well as EBCDIC systems.
361
362@<Initialise decode table@>=
363
364    for (i = 0; i < 255; i++) {
365        dtable[i] = 0x80;
366    }
367    for (i = 'A'; i <= 'I'; i++) {
368        dtable[i] = 0 + (i - 'A');
369    }
370    for (i = 'J'; i <= 'R'; i++) {
371        dtable[i] = 9 + (i - 'J');
372    }
373    for (i = 'S'; i <= 'Z'; i++) {
374        dtable[i] = 18 + (i - 'S');
375    }
376    for (i = 'a'; i <= 'i'; i++) {
377        dtable[i] = 26 + (i - 'a');
378    }
379    for (i = 'j'; i <= 'r'; i++) {
380        dtable[i] = 35 + (i - 'j');
381    }
382    for (i = 's'; i <= 'z'; i++) {
383        dtable[i] = 44 + (i - 's');
384    }
385    for (i = '0'; i <= '9'; i++) {
386        dtable[i] = 52 + (i - '0');
387    }
388    dtable['+'] = 62;
389    dtable['/'] = 63;
390    dtable['='] = 0;
391
392
393@** Utility functions.
394
395@ Procedure |usage|
396prints how-to-call information.
397
398@c
399
400static void usage(void)
401{
402    printf("%s  --  Encode/decode file as base64.  Call:\n", PRODUCT);
403    printf("            %s [-e / -d] [options] [infile] [outfile]\n", PRODUCT);
404    printf("\n");
405    printf("Options:\n");
406    printf("           --copyright       Print copyright information\n");
407    printf("           -d, --decode      Decode base64 encoded file\n");
408    printf("           -e, --encode      Encode file into base64\n");
409    printf("           -n, --noerrcheck  Ignore errors when decoding\n");
410    printf("           -u, --help        Print this message\n");
411    printf("           --version         Print version number\n");
412    printf("\n");
413    printf("by John Walker\n");
414    printf("http://www.fourmilab.ch/\n");
415}
416
417@** Main program.
418
419@c
420
421int main(int argc, char *argv[])
422{
423    extern char *optarg;            /* Imported from |getopt| */
424    extern int optind;
425
426    int f, decoding = FALSE, opt;
427#ifdef FORCE_BINARY_IO
428    int in_std = TRUE, out_std = TRUE;
429#endif
430    char *cp;
431
432    /* 2000-12-20 Ch.F.
433       UNIX/390 C compiler (cc) does not allow initialisation of
434       static variables with non static right-value during variable
435       declaration; it was moved from declaration to main function
436       start.  */
437
438    fi = stdin;
439    fo = stdout;
440
441@<Process command-line options@>;@\
442@<Process command-line arguments@>;@\
443@<Force binary I/O where required@>;@\
444
445    if (decoding) {
446       decode();
447    } else {
448       encode();
449    }
450    return 0;
451}
452
453@
454We use |getopt| to process command line options.  This
455permits aggregation of options without arguments and
456both \.{-d}{\it arg} and \.{-d} {\it arg} syntax.
457@<Process command-line options@>=
458    while ((opt = getopt(argc, argv, "denu-:")) != -1) {
459        switch (opt) {
460            case 'd':             /* -d  Decode */
461                decoding = TRUE;
462                break;
463
464            case 'e':             /* -e  Encode */
465                decoding = FALSE;
466                break;
467
468            case 'n':             /* -n  Suppress error checking */
469                errcheck = FALSE;
470                break;
471
472            case 'u':             /* -u  Print how-to-call information */
473            case '?':
474                usage();
475                return 0;
476
477            case '-':             /* --  Extended options */
478                switch (optarg[0]) {
479                    case 'c':     /* --copyright */
480                        printf("This program is in the public domain.\n");
481                        return 0;
482
483                    case 'd':     /* --decode */
484                        decoding = TRUE;
485                        break;
486
487                    case 'e':     /* -encode */
488                        decoding = FALSE;
489                        break;
490
491                    case 'h':     /* --help */
492                        usage();
493                        return 0;
494
495                    case 'n':             /* --noerrcheck */
496                        errcheck = FALSE;
497                        break;
498
499                    case 'v':     /* --version */
500                        printf("%s %s\n", PRODUCT, VERSION);
501                        printf("Last revised: %s\n", REVDATE);
502                        printf("The latest version is always available\n");
503                        printf("at http://www.fourmilab.ch/webtools/base64\n");
504                        return 0;
505                }
506        }
507    }
508
509@
510This code is executed after |getopt| has completed parsing
511command line options.  At this point the external variable
512|optind| in |getopt| contains the index of the first
513argument in the |argv[]| array.
514@<Process command-line arguments@>=
515    f = 0;
516    for (; optind < argc; optind++) {
517        cp = argv[optind];
518        switch (f) {
519
520            /** Warning!  On systems which distinguish text mode and
521                binary I/O (MS-DOS, Macintosh, etc.) the modes in these
522                open statements will have to be made conditional based
523                upon whether an encode or decode is being done, which
524                will have to be specified earlier.  But it's worse: if
525                input or output is from standard input or output, the
526                mode will have to be changed on the fly, which is
527                generally system and compiler dependent.  'Twasn't me
528                who couldn't conform to Unix CR/LF convention, so
529                don't ask me to write the code to work around
530                Apple and Microsoft's incompatible standards. **/
531
532            case 0:
533                if (strcmp(cp, "-") != 0) {
534                    if ((fi = fopen(cp,
535#ifdef FORCE_BINARY_IO
536                                        decoding ? "r" : "rb"
537#else
538                                        "r"
539#endif
540                                       )) == NULL) {
541                        fprintf(stderr, "Cannot open input file %s\n", cp);
542                        return 2;
543                    }
544#ifdef FORCE_BINARY_IO
545                    in_std = FALSE;
546#endif
547                }
548                f++;
549                break;
550
551            case 1:
552                if (strcmp(cp, "-") != 0) {
553                    if ((fo = fopen(cp,
554#ifdef FORCE_BINARY_IO
555                                        decoding ? "wb" : "w"
556#else
557                                        "w"
558#endif
559                                       )) == NULL) {
560                        fprintf(stderr, "Cannot open output file %s\n", cp);
561                        return 2;
562                    }
563#ifdef FORCE_BINARY_IO
564                    out_std = FALSE;
565#endif
566                }
567                f++;
568                break;
569
570            default:
571                fprintf(stderr, "Too many file names specified.\n");
572                usage();
573                return 2;
574        }
575    }
576
577@
578On WIN32, if the binary stream is the default of \.{stdin}/\.{stdout},
579we must place this stream, opened in text mode (translation
580of CR to CR/LF) by default, into binary mode (no EOL
581translation).  If you port this code to other platforms
582which distinguish between text and binary file I/O
583(for example, the Macintosh), you'll need to add equivalent
584code here.
585
586The following code sets the already-open standard stream to
587binary mode on Microsoft Visual C 5.0 (Monkey C).  If you're
588using a different version or compiler, you may need some
589other incantation to cancel the text translation spell.
590@<Force binary I/O where required@>=
591#ifdef FORCE_BINARY_IO
592    if ((decoding && out_std) || ((!decoding) && in_std)) {
593#ifdef _WIN32
594
595
596        _setmode(_fileno(decoding ? fo : fi), O_BINARY);
597#endif
598    }
599#endif
600
601
602@** Index.
603The following is a cross-reference table for \.{base64}.
604Single-character identifiers are not indexed, nor are
605reserved words.  Underlined entries indicate where
606an identifier was declared.
607