1 /* unship.c -- for unpacking ship files via trn */
2 /* Based on ship.c -- Not copyrighted 1991 Mark Adler. */
3 /* Modified by Wayne Davison, but still not copyrighted. */
4 
5 /* The authors make no claims as to the fitness or correctness of this software
6  * for any use whatsoever, and it is provided as is. Any use of this software
7  * is at the user's own risk.
8  */
9 
10 #include "EXTERN.h"
11 #include "common.h"
12 #include "respond.h"
13 #include "decode.h"
14 
15 typedef unsigned long ulg;	/* 32-bit unsigned integer */
16 
17 /* Function prototypes */
18 
19 static void decode_line _((unsigned char *));
20 static void err _((int));
21 
22 /* Globals for ship() */
23 ulg ccnt;		/* count of bytes read or written */
24 ulg crc;		/* CRC register */
25 ulg buf4;		/* four byte buffer */
26 int bcnt;		/* buffer count */
27 
28 unsigned int decb;	/* bit buffer for decode */
29 unsigned int decn;	/* number of bits in decb */
30 
31 bool fast;		/* true for arithmetic coding, else base 85 */
32 bool overwrite = 1;	/* should we overwrite existing files? */
33 
34 /* Errors */
35 #define SE_FORM 1
36 #define SE_CONT 2
37 #define SE_CRC 3
38 #define SE_OVER 4
39 #define SE_FULL 5
40 char *errors[] = {
41   /* 1 */ "Invalid ship format.",
42   /* 2 */ "This piece is out of sequence.",
43   /* 3 */ "CRC check failed.",
44   /* 4 */ "File already exists.",
45   /* 5 */ "Error writing file.",
46 };
47 
48 /* Set of 86 characters used for the base 85 digits (last one not used), and
49    the 86 character arithmetic coding.	Selected to be part of both the ASCII
50    printable characters, and the common EBCDIC printable characters whose
51    ASCII translations are universal. */
52 unsigned char safe[] = {
53 	'{','"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
54 	'0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@',
55 	'A','B','C','D','E','F','G','H','I','J','K','L','M',
56 	'N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_',
57 	'a','b','c','d','e','f','g','h','i','j','k','l','m',
58 	'n','o','p','q','r','s','t','u','v','w','x','y','z','}'};
59 
60 #define LOWSZ (sizeof(safe)-64)		/* low set size for fast coding */
61 
62 /* Special replacement pairs--if first of each pair is received, it is
63    treated like the second member of the pair.	You're probably
64    wondering why.  The first pair is for compatibility with an
65    earlier version of ship that used ! for the base 85 zero digit.
66    However, there exist ASCII-EBCDIC translation tables that don't
67    know about exclamation marks.  The second set has mysterious
68    historical origins that are best left unspoken ... */
69 unsigned char aliases[] = {'!','{','|','+',0};
70 
71 /* Inverse of safe[], filled in by unship_init() */
72 unsigned char invsafe[256];
73 
74 /* Table of CRC-32's of all single byte values (made by makecrc.c) */
75 ulg crctab[] = {
76   0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
77   0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
78   0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
79   0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
80   0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
81   0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
82   0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
83   0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
84   0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
85   0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
86   0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
87   0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
88   0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
89   0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
90   0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
91   0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
92   0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
93   0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
94   0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
95   0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
96   0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
97   0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
98   0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
99   0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
100   0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
101   0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
102   0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
103   0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
104   0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
105   0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
106   0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
107   0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
108   0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
109   0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
110   0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
111   0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
112   0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
113   0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
114   0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
115   0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
116   0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
117   0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
118   0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
119   0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
120   0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
121   0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
122   0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
123   0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
124   0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
125   0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
126   0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
127   0x2d02ef8dL
128 };
129 
130 /* Macro to update the CRC shift register one byte at a time */
131 #define CRC(c,b) (crctab[((int)(c)^(int)(b))&0xff]^((c)>>8))
132 
133 /* cputc(d,x)--like putc(d,f), but delays four bytes and computes a CRC.
134    x is a cfile *, and d is expected to be an ulg. */
135 #define cputf(fp) (int)(crc=CRC(crc,buf4),putc((int)buf4&0xff,fp),ccnt++)
136 #define cputc(d,fp) (bcnt!=4?bcnt++:cputf(fp),buf4=(buf4>>8)+((ulg)(d)<<24))
137 
138 /* Build invsafe[], the inverse of safe[]. */
139 void
unship_init()140 unship_init()
141 {
142   int i;
143 
144   for (i = 0; i < 256; i++)
145     invsafe[i] = 127;
146   for (i = 0; i < sizeof(safe); i++)
147     invsafe[safe[i]] = (char)i;
148   for (i = 0; aliases[i]; i += 2)
149     invsafe[aliases[i]] = invsafe[aliases[i + 1]];
150 }
151 
152 int
unship(in)153 unship(in)
154 FILE *in;
155 {
156   int b;		/* state of line loop */
157   char l[LBUFLEN];	/* line buffer on input */
158   char *p;		/* modifies line buffer */
159   char *q;		/* scans continuation line */
160 
161   /* Loop on the latest article's lines */
162   b = 2;				/* not in body yet */
163   while (1)				/* return on end of last file */
164   {
165     /* Get next line from file */
166     if (fgets(l, LBUFLEN, in) == Nullch)
167       break;
168 
169     /* Strip control characters and leading blank space, if any */
170     for (q = l; *q && *q <= ' ' && *q != '\n'; q++)
171       ;
172     for (p = l; *q; q++)
173       if (*q >= ' ' || *q == '\n')
174 	*p++ = *q;
175     *p = 0;
176 
177     /* Based on current state, end or start on terminator.  States are:
178 	 b == 0:  at start of body or body terminator line
179 	 b == 1:  in middle of body line
180 	 b == 2:  at start of non-body line
181 	 b == 3:  in middle of non-body line
182 	 b == 4:  at information line
183     */
184     switch (b)
185     {
186     case 0:
187       if ((!fast && strEQ(l, "$\n")) ||
188 	  (fast && strEQ(l, "$ f\n")))
189       {
190 	b = 4;
191 	break;
192       }
193       /* fall through to case 1 */
194     case 1:
195       decode_line((unsigned char *)l);
196       b = l[strlen(l) - 1] != '\n';
197       break;
198     case 2:
199       if (strEQ(l, "$\n") || strEQ(l, "$ f\n"))
200       {
201 	fast = l[1] == ' ';
202 	b = 4;
203 	break;
204       }
205       /* fall through to case 3 */
206     case 3:
207       b = l[strlen(l)-1] == '\n' ? 2 : 3;
208       break;
209     case 4:
210       /* Possible information lines are ship, more, cont, and end */
211       if (l[b = strlen(l) - 1] != '\n')
212       {
213 	err(SE_FORM);
214 	decode_end();
215 	return -1;
216       }
217       l[b] = 0;
218       if (strnEQ(l, "ship ", 5))
219       {
220 	/* get name, open new output file */
221 	if (decode_fp != Nullfp)
222 	  decode_end();			/* outputs an "incomplete" warning */
223 	if (strEQ(l + 5, "-"))
224 	  strcpy(decode_fname, "unnamed");
225 	else
226 	  strcpy(decode_fname, l + 5);
227 	sprintf(decode_dest, "%s/%s", extractdest, decode_fname);
228 	printf("Decoding: %s\n", decode_fname);
229 #ifndef VMS	/* shouldn't have explicit version #, so VMS won't overwrite */
230 	if (!overwrite && (decode_fp = fopen(decode_dest, "r")) != Nullfp)
231 	{
232 	  fclose(decode_fp);
233 	  decode_fp = Nullfp;
234 	  err(SE_OVER);
235 	  return -1;
236 	}
237 #endif /* !VMS */
238 	if ((decode_fp = fopen(decode_dest, FOPEN_WB)) == Nullfp)
239 	{
240 	  err(SE_FULL);
241 	  return -1;
242 	}
243 	crc = 0xffffffffL;		/* preload CRC register */
244 	buf4 = 0;			/* empty fifo (for output) */
245 	bcnt = 0;			/* fifo is empty (output) */
246 	b = decb = decn = 0;
247 	ccnt = 0;
248       }
249       else if (strEQ(l, "more"))
250       {
251 	/* check if currently writing */
252 	if (decode_fp == Nullfp)
253 	{
254 	  err(SE_FORM);
255 	  return -1;
256 	}
257 	b = 2;
258       }
259       else if (strnEQ(l, "cont ", 5))
260       {
261 	/* check name and file offset */
262 	if (decode_fp == Nullfp)
263 	{
264 	  err(SE_CONT);
265 	  return -1;
266 	}
267 	for (q = l + 5; *q && *q != ' '; q++)
268 	  ;
269 	if (*q == 0 || atol(l + 5) != ccnt + 4 + (decn != 0) ||
270 	    strNE(q + 1, decode_fname))
271 	{
272 	  err(SE_CONT);
273 	  return -1;
274 	}
275 	b = 0;
276       }
277       else if (strcmp(l, "end") == 0)
278       {
279 	/* check crc, close output file */
280 	if (decode_fp == Nullfp)
281 	{
282 	  err(SE_FORM);
283 	  return -1;
284 	}
285 	if (bcnt != 4 || buf4 != ~crc)
286 	  err(SE_CRC);
287 	else
288 	  printf("CRC verified -- Done.\n");
289 	if (ferror(decode_fp) || fclose(decode_fp))
290 	{
291 	  err(SE_FULL);
292 	  decode_end();
293 	  return -1;
294 	}
295 	decode_fp = Nullfp;
296 	b = 2;
297       }
298       else
299       {
300 	for (q = l; *q && *q != ' '; q++)
301 	  ;
302 	*q = 0;
303 	printf("Ignoring unsupported ship keyword: '%s'\n", l);
304 	b = 4;
305       }
306       break;
307     }
308   }
309   if (!(b & 2)) {
310     err(SE_FORM);
311     return -1;
312   }
313   if (decode_fp)
314     printf("(Continued)\n");
315   return 0;
316 }
317 
318 /* Decode s, a string of base 85 digits or, if fast is true, a string of safe
319    characters generated arithmetically, into its binary equivalent, writing
320    the result to decode_fp, using cputc(). */
321 static void
decode_line(s)322 decode_line(s)
323 unsigned char *s;	/* data to decode */
324 {
325   int b;		/* state of line loop, next character */
326   int k;		/* counts bits or digits read */
327   /* powers of 85 table for decoding */
328   static ulg m[] = {1L,85L,85L*85L,85L*85L*85L,85L*85L*85L*85L};
329 
330   if (fast)
331   {
332     unsigned int d;	/* disperses bits */
333 
334     d = decb;
335     k = decn;
336     while ((b = *s++) != 0)
337       if ((b = invsafe[b]) < sizeof(safe))
338       {
339 	if (b < LOWSZ)
340 	{
341 	  d |= b << k;
342 	  k += 7;
343 	}
344 	else if ((b -= LOWSZ) < LOWSZ)
345 	{
346 	  d |= (b + 0x40) << k;
347 	  k += 7;
348 	}
349 	else
350 	{
351 	  d |= b << k;
352 	  k += 6;
353 	}
354 	if (k >= 8)
355 	{
356 	  cputc(d, decode_fp);
357 	  d >>= 8;
358 	  k -= 8;
359 	}
360       }
361     decb = d;
362     decn = k;
363   }
364   else
365   {
366     ulg d;		/* disperses bytes */
367 
368     d = k = 0;
369     while ((b = *s++) != 0)
370       if ((b = invsafe[b]) < 85)
371       {
372 	d += m[k] * b;
373 	if (++k == 5)
374 	{
375 	  cputc(d, decode_fp);  d >>= 8;
376 	  cputc(d, decode_fp);  d >>= 8;
377 	  cputc(d, decode_fp);  d >>= 8;
378 	  cputc(d, decode_fp);
379 	  d = k = 0;
380 	}
381       }
382     if (--k > 0)
383     {
384       while (--k)
385       {
386 	cputc(d, decode_fp);
387 	d >>= 8;
388       }
389       cputc(d, decode_fp);
390     }
391   }
392 }
393 
394 static void
err(n)395 err(n)
396 int n;			/* error number */
397 {
398   if (n == SE_FULL)
399     perror("ship");
400   fputs(errors[n - 1], stdout);
401   putchar('\n') FLUSH;
402 }
403