1% 2% B A S E 6 4 3% 4% by John Walker 5% http://www.fourmilab.ch/ 6% 7% What's all this, you ask? Well, this is a "literate program", 8% written in the CWEB language created by Donald E. Knuth and 9% Silvio Levy. This file includes both the C source code for 10% the program and internal documentation in TeX. Processing 11% this file with the CTANGLE utility produces the C source file, 12% while the CWEAVE program emits documentation in TeX. The 13% current version of these programs may be downloaded from: 14% 15% http://www-cs-faculty.stanford.edu/~knuth/cweb.html 16% 17% where you will find additional information on literate 18% programming and examples of other programs written in this 19% manner. 20% 21% If you don't want to wade through all these details, don't 22% worry; this distribution includes a .c file already 23% extracted and ready to compile. If "make" complains that it 24% can't find "ctangle" or "cweave", just "touch *.c" 25% and re-make--apparently the process of extracting the files 26% from the archive messed up the date and time, misleading 27% make into believing it needed to rebuild those files. 28 29@** Introduction. 30 31\vskip 15pt 32\centerline{\ttitlefont BASE64} 33\vskip 10pt 34\centerline{\titlefont Encode or decode file as MIME base64 (RFC 1341)} 35\vskip 15pt 36\centerline{by John Walker} 37\centerline{\.{http://www.fourmilab.ch/}} 38 39\vskip 15pt 40\centerline{This program is in the public domain.} 41 42\vskip 15pt 43\centerline{EBCDIC support courtesy of Christian.Ferrari@@fccrt.it, 2000-12-20.} 44\vskip 30pt 45 46@d REVDATE "10th June 2007" 47 48@** Program global context. 49@d TRUE 1 50@d FALSE 0 51@d LINELEN 72 /* Encoded line length (max 76) */ 52@d MAXINLINE 256 /* Maximum input line length */ 53 54@c 55#include "config.h" /* System-dependent configuration */ 56 57@h 58 59@<System include files@>@/ 60@<Windows-specific include files@>@/ 61@<Global variables@>@/ 62 63@ We include the following POSIX-standard C library files. 64 Conditionals based on a probe of the system by the 65 \.{configure} program allow us to cope with the 66 peculiarities of specific systems. 67 68@<System include files@>= 69#include <stdio.h> 70#include <stdlib.h> 71#include <ctype.h> 72#ifdef HAVE_STRING_H 73#include <string.h> 74#else 75#ifdef HAVE_STRINGS_H 76#include <strings.h> 77#endif 78#endif 79#ifdef HAVE_GETOPT 80#ifdef HAVE_UNISTD_H 81#include <unistd.h> 82#endif 83#else 84#include "getopt.h" /* No system \.{getopt}--use our own */ 85#endif 86 87@ The following include files are needed in WIN32 builds 88 to permit setting already-open I/O streams to binary mode. 89 90@<Windows-specific include files@>= 91#ifdef _WIN32 92#define FORCE_BINARY_IO 93#include <io.h> 94#include <fcntl.h> 95#endif 96 97@ These variables are global to all procedures; many are used 98 as ``hidden arguments'' to functions in order to simplify 99 calling sequences. 100 101@<Global variables@>= 102typedef unsigned char byte; /* Byte type */ 103 104static FILE *fi; /* Input file */ 105static FILE *fo; /* Output file */ 106static byte iobuf[MAXINLINE]; /* I/O buffer */ 107static int iolen = 0; /* Bytes left in I/O buffer */ 108static int iocp = MAXINLINE; /* Character removal pointer */ 109static int ateof = FALSE; /* EOF encountered */ 110static byte dtable[256]; /* Encode / decode table */ 111static int linelength = 0; /* Length of encoded output line */ 112static char eol[] = /* End of line sequence */ 113#ifdef FORCE_BINARY_IO 114 "\n" 115#else 116 "\r\n" 117#endif 118 ; 119static int errcheck = TRUE; /* Check decode input for errors ? */ 120 121@** Input/output functions. 122 123@ Procedure |inbuf| 124fills the input buffer with data from the input stream |fi|. 125 126@c 127 128static int inbuf(void) 129{ 130 int l; 131 132 if (ateof) { 133 return FALSE; 134 } 135 l = fread(iobuf, 1, MAXINLINE, fi); /* Read input buffer */ 136 if (l <= 0) { 137 if (ferror(fi)) { 138 exit(1); 139 } 140 ateof = TRUE; 141 return FALSE; 142 } 143 iolen = l; 144 iocp = 0; 145 return TRUE; 146} 147 148@ Procedure |inchar| 149returns the next character from the input line. At end of line, 150it calls |inbuf| to read the next line, returning |EOF| at end 151of file. 152 153@c 154 155static int inchar(void) 156{ 157 if (iocp >= iolen) { 158 if (!inbuf()) { 159 return EOF; 160 } 161 } 162 163 return iobuf[iocp++]; 164} 165 166@ Procedure |insig| 167returns the next significant input character, ignoring 168white space and control characters. This procedure uses 169|inchar| to read the input stream and returns |EOF| when 170the end of the input file is reached. 171 172@c 173 174static int insig(void) 175{ 176 int c; 177 178 while (TRUE) { 179 c = inchar(); 180 if (c == EOF || (c > ' ')) { 181 return c; 182 } 183 } 184} 185 186@ Procedure |ochar| 187outputs an encoded character, inserting line breaks 188as required so that no line exceeds |LINELEN| 189characters. 190 191@c 192 193static void ochar(int c) 194{ 195 if (linelength >= LINELEN) { 196 if (fputs(eol, fo) == EOF) { 197 exit(1); 198 } 199 linelength = 0; 200 } 201 if (putc(((byte) c), fo) == EOF) { 202 exit(1); 203 } 204 linelength++; 205} 206 207@** Encoding. 208 209Procedure |encode| 210encodes the binary file opened as |fi| into base64, writing 211the output to |fo|. 212 213@c 214 215static void encode(void) 216{ 217 int i, hiteof = FALSE; 218 219 @<initialise encoding table@>;@\ 220 221 while (!hiteof) { 222 byte igroup[3], ogroup[4]; 223 int c, n; 224 225 igroup[0] = igroup[1] = igroup[2] = 0; 226 for (n = 0; n < 3; n++) { 227 c = inchar(); 228 if (c == EOF) { 229 hiteof = TRUE; 230 break; 231 } 232 igroup[n] = (byte) c; 233 } 234 if (n > 0) { 235 ogroup[0] = dtable[igroup[0] >> 2]; 236 ogroup[1] = dtable[((igroup[0] & 3) << 4) | (igroup[1] >> 4)]; 237 ogroup[2] = dtable[((igroup[1] & 0xF) << 2) | (igroup[2] >> 6)]; 238 ogroup[3] = dtable[igroup[2] & 0x3F]; 239 240 /* Replace characters in output stream with "=" pad 241 characters if fewer than three characters were 242 read from the end of the input stream. */ 243 244 if (n < 3) { 245 ogroup[3] = '='; 246 if (n < 2) { 247 ogroup[2] = '='; 248 } 249 } 250 for (i = 0; i < 4; i++) { 251 ochar(ogroup[i]); 252 } 253 } 254 } 255 if (fputs(eol, fo) == EOF) { 256 exit(1); 257 } 258} 259 260@ Procedure |initialise_encoding_table| 261 fills the binary encoding table with the characters 262 the 6 bit values are mapped into. The curious 263 and disparate sequences used to fill this table 264 permit this code to work both on ASCII and EBCDIC 265 systems, the latter thanks to Ch.F. 266 267 In EBCDIC systems character codes for letters are not 268 consecutive; the initialisation must be split to accommodate 269 the EBCDIC consecutive letters: 270 271 \centerline{ A--I J--R S--Z a--i j--r s--z} 272 273 This code works on ASCII as well as EBCDIC systems. 274 275@<initialise encoding table@>= 276 277 for (i = 0; i < 9; i++) { 278 dtable[i] = 'A' + i; 279 dtable[i + 9] = 'J' + i; 280 dtable[26 + i] = 'a' + i; 281 dtable[26 + i + 9] = 'j' + i; 282 } 283 for (i = 0; i < 8; i++) { 284 dtable[i + 18] = 'S' + i; 285 dtable[26 + i + 18] = 's' + i; 286 } 287 for (i = 0; i < 10; i++) { 288 dtable[52 + i] = '0' + i; 289 } 290 dtable[62] = '+'; 291 dtable[63] = '/'; 292 293 294@** Decoding. 295 296Procedure |decode| decodes a base64 encoded stream from 297|fi| and emits the binary result on |fo|. 298 299@c 300 301static void decode(void) 302{ 303 int i; 304 305 @<Initialise decode table@>; 306 307 while (TRUE) { 308 byte a[4], b[4], o[3]; 309 310 for (i = 0; i < 4; i++) { 311 int c = insig(); 312 313 if (c == EOF) { 314 if (errcheck && (i > 0)) { 315 fprintf(stderr, "Input file incomplete.\n"); 316 exit(1); 317 } 318 return; 319 } 320 if (dtable[c] & 0x80) { 321 if (errcheck) { 322 fprintf(stderr, "Illegal character '%c' in input file.\n", c); 323 exit(1); 324 } 325 /* Ignoring errors: discard invalid character. */ 326 i--; 327 continue; 328 } 329 a[i] = (byte) c; 330 b[i] = (byte) dtable[c]; 331 } 332 o[0] = (b[0] << 2) | (b[1] >> 4); 333 o[1] = (b[1] << 4) | (b[2] >> 2); 334 o[2] = (b[2] << 6) | b[3]; 335 i = a[2] == '=' ? 1 : (a[3] == '=' ? 2 : 3); 336 if (fwrite(o, i, 1, fo) == EOF) { 337 exit(1); 338 } 339 if (i < 3) { 340 return; 341 } 342 } 343 @t\4@> @q Fix bad tab thanks to lint comment. @> 344} 345 346@ Procedure |initialise decode table| creates the lookup table 347 used to map base64 characters into their binary values from 348 0 to 63. The table is built in this rather curious way in 349 order to be properly initialised for both ASCII-based 350 systems and those using EBCDIC, where the letters are not 351 contiguous. (EBCDIC fixes courtesy of Ch.F.) 352 353 354 In EBCDIC systems character codes for letters are not 355 consecutive; the initialisation must be split to accommodate 356 the EBCDIC consecutive letters: 357 358 \centerline{ A--I J--R S--Z a--i j--r s--z} 359 360 This code works on ASCII as well as EBCDIC systems. 361 362@<Initialise decode table@>= 363 364 for (i = 0; i < 255; i++) { 365 dtable[i] = 0x80; 366 } 367 for (i = 'A'; i <= 'I'; i++) { 368 dtable[i] = 0 + (i - 'A'); 369 } 370 for (i = 'J'; i <= 'R'; i++) { 371 dtable[i] = 9 + (i - 'J'); 372 } 373 for (i = 'S'; i <= 'Z'; i++) { 374 dtable[i] = 18 + (i - 'S'); 375 } 376 for (i = 'a'; i <= 'i'; i++) { 377 dtable[i] = 26 + (i - 'a'); 378 } 379 for (i = 'j'; i <= 'r'; i++) { 380 dtable[i] = 35 + (i - 'j'); 381 } 382 for (i = 's'; i <= 'z'; i++) { 383 dtable[i] = 44 + (i - 's'); 384 } 385 for (i = '0'; i <= '9'; i++) { 386 dtable[i] = 52 + (i - '0'); 387 } 388 dtable['+'] = 62; 389 dtable['/'] = 63; 390 dtable['='] = 0; 391 392 393@** Utility functions. 394 395@ Procedure |usage| 396prints how-to-call information. 397 398@c 399 400static void usage(void) 401{ 402 printf("%s -- Encode/decode file as base64. Call:\n", PRODUCT); 403 printf(" %s [-e / -d] [options] [infile] [outfile]\n", PRODUCT); 404 printf("\n"); 405 printf("Options:\n"); 406 printf(" --copyright Print copyright information\n"); 407 printf(" -d, --decode Decode base64 encoded file\n"); 408 printf(" -e, --encode Encode file into base64\n"); 409 printf(" -n, --noerrcheck Ignore errors when decoding\n"); 410 printf(" -u, --help Print this message\n"); 411 printf(" --version Print version number\n"); 412 printf("\n"); 413 printf("by John Walker\n"); 414 printf("http://www.fourmilab.ch/\n"); 415} 416 417@** Main program. 418 419@c 420 421int main(int argc, char *argv[]) 422{ 423 extern char *optarg; /* Imported from |getopt| */ 424 extern int optind; 425 426 int f, decoding = FALSE, opt; 427#ifdef FORCE_BINARY_IO 428 int in_std = TRUE, out_std = TRUE; 429#endif 430 char *cp; 431 432 /* 2000-12-20 Ch.F. 433 UNIX/390 C compiler (cc) does not allow initialisation of 434 static variables with non static right-value during variable 435 declaration; it was moved from declaration to main function 436 start. */ 437 438 fi = stdin; 439 fo = stdout; 440 441@<Process command-line options@>;@\ 442@<Process command-line arguments@>;@\ 443@<Force binary I/O where required@>;@\ 444 445 if (decoding) { 446 decode(); 447 } else { 448 encode(); 449 } 450 return 0; 451} 452 453@ 454We use |getopt| to process command line options. This 455permits aggregation of options without arguments and 456both \.{-d}{\it arg} and \.{-d} {\it arg} syntax. 457@<Process command-line options@>= 458 while ((opt = getopt(argc, argv, "denu-:")) != -1) { 459 switch (opt) { 460 case 'd': /* -d Decode */ 461 decoding = TRUE; 462 break; 463 464 case 'e': /* -e Encode */ 465 decoding = FALSE; 466 break; 467 468 case 'n': /* -n Suppress error checking */ 469 errcheck = FALSE; 470 break; 471 472 case 'u': /* -u Print how-to-call information */ 473 case '?': 474 usage(); 475 return 0; 476 477 case '-': /* -- Extended options */ 478 switch (optarg[0]) { 479 case 'c': /* --copyright */ 480 printf("This program is in the public domain.\n"); 481 return 0; 482 483 case 'd': /* --decode */ 484 decoding = TRUE; 485 break; 486 487 case 'e': /* -encode */ 488 decoding = FALSE; 489 break; 490 491 case 'h': /* --help */ 492 usage(); 493 return 0; 494 495 case 'n': /* --noerrcheck */ 496 errcheck = FALSE; 497 break; 498 499 case 'v': /* --version */ 500 printf("%s %s\n", PRODUCT, VERSION); 501 printf("Last revised: %s\n", REVDATE); 502 printf("The latest version is always available\n"); 503 printf("at http://www.fourmilab.ch/webtools/base64\n"); 504 return 0; 505 } 506 } 507 } 508 509@ 510This code is executed after |getopt| has completed parsing 511command line options. At this point the external variable 512|optind| in |getopt| contains the index of the first 513argument in the |argv[]| array. 514@<Process command-line arguments@>= 515 f = 0; 516 for (; optind < argc; optind++) { 517 cp = argv[optind]; 518 switch (f) { 519 520 /** Warning! On systems which distinguish text mode and 521 binary I/O (MS-DOS, Macintosh, etc.) the modes in these 522 open statements will have to be made conditional based 523 upon whether an encode or decode is being done, which 524 will have to be specified earlier. But it's worse: if 525 input or output is from standard input or output, the 526 mode will have to be changed on the fly, which is 527 generally system and compiler dependent. 'Twasn't me 528 who couldn't conform to Unix CR/LF convention, so 529 don't ask me to write the code to work around 530 Apple and Microsoft's incompatible standards. **/ 531 532 case 0: 533 if (strcmp(cp, "-") != 0) { 534 if ((fi = fopen(cp, 535#ifdef FORCE_BINARY_IO 536 decoding ? "r" : "rb" 537#else 538 "r" 539#endif 540 )) == NULL) { 541 fprintf(stderr, "Cannot open input file %s\n", cp); 542 return 2; 543 } 544#ifdef FORCE_BINARY_IO 545 in_std = FALSE; 546#endif 547 } 548 f++; 549 break; 550 551 case 1: 552 if (strcmp(cp, "-") != 0) { 553 if ((fo = fopen(cp, 554#ifdef FORCE_BINARY_IO 555 decoding ? "wb" : "w" 556#else 557 "w" 558#endif 559 )) == NULL) { 560 fprintf(stderr, "Cannot open output file %s\n", cp); 561 return 2; 562 } 563#ifdef FORCE_BINARY_IO 564 out_std = FALSE; 565#endif 566 } 567 f++; 568 break; 569 570 default: 571 fprintf(stderr, "Too many file names specified.\n"); 572 usage(); 573 return 2; 574 } 575 } 576 577@ 578On WIN32, if the binary stream is the default of \.{stdin}/\.{stdout}, 579we must place this stream, opened in text mode (translation 580of CR to CR/LF) by default, into binary mode (no EOL 581translation). If you port this code to other platforms 582which distinguish between text and binary file I/O 583(for example, the Macintosh), you'll need to add equivalent 584code here. 585 586The following code sets the already-open standard stream to 587binary mode on Microsoft Visual C 5.0 (Monkey C). If you're 588using a different version or compiler, you may need some 589other incantation to cancel the text translation spell. 590@<Force binary I/O where required@>= 591#ifdef FORCE_BINARY_IO 592 if ((decoding && out_std) || ((!decoding) && in_std)) { 593#ifdef _WIN32 594 595 596 _setmode(_fileno(decoding ? fo : fi), O_BINARY); 597#endif 598 } 599#endif 600 601 602@** Index. 603The following is a cross-reference table for \.{base64}. 604Single-character identifiers are not indexed, nor are 605reserved words. Underlined entries indicate where 606an identifier was declared. 607