1 /*
2  * This program compresses a file without loosing information.
3  * The "usq" program is required to unsqueeze the file
4  * before it can be used.
5  *
6  * Formerly written in BDS C and quite pupular as a CP/M tool
7  * in the earlier eighties.
8  *
9  * Ported to modern c compilers and z88dk by Stefano Bodrato
10  *
11  *
12  * $Id: sq.c,v 1.2 2012-03-29 15:28:36 stefano Exp $
13  *
14  * --------------------------------------------------------------
15  *
16  * Build (z88dk - OSCA):
17  * zcc +osca -osq.exe -lflosxdos sq.c
18  *
19  * Build (z88dk - CP/M):
20  * zcc +osca -osq.com sq.c
21  *
22  * Build (gcc):
23  * gcc -osq sq.c
24  *
25  * --------------------------------------------------------------
26  *
27  * Typical compression rates are between 30 and 50 percent for text files.
28  *
29  * Squeezing a really big file takes a few minutes.
30  *
31  * Useage:
32  *	sq [file1] [file2] ... [filen]
33  *
34  * where file1 through filen are the names of the files to be squeezed.
35  * The file type (under CP/M or MS-DOS) is changed to ".?Q?"; under UN*X,
36  * ".SQ" is appended to the file name. The original file name is stored
37  * in the squeezed file.
38  *
39  * If no file name is given on the command line you will be
40  * prompted for commands (one at a time). An empty command
41  * terminates the program.
42  *
43  * The transformations compress strings of identical bytes and
44  * then encode each resulting byte value and EOF as bit strings
45  * having lengths in inverse proportion to their frequency of
46  * occurrance in the intermediate input stream. The latter uses
47  * the Huffman algorithm. Decoding information is included in
48  * the squeezed file, so squeezing short files or files with
49  * uniformly distributed byte values will actually increase size.
50  */
51 
52 /* CHANGE HISTORY:
53  * 1.3	Close files properly in case of error exit.
54  * 1.4	Break up long introductory lines.
55  * 1.4	Send introduction only to console.
56  * 1.4	Send errors only to console.
57  * 1.5  Fix BUG that caused a rare few squeezed files
58  *	to be incorrect and fail the USQ crc check.
59  *	The problem was that some 17 bit codes were
60  *	generated but are not supported by other code.
61  *	THIS IS A MAJOR CHANGE affecting TR2.C and SQ.H and
62  *	requires recompilation of all files which are part
63  *	of SQ. Two basic changes were made: tree depth is now
64  *	used as a tie breaker when weights are equal. This
65  *	makes the tree shallower. Although that may always be
66  *	sufficient, an error trap was added to cause rescaling
67  *	of the counts if any code > 16 bits long is generated.
68  * 1.5	Add debugging displays option '-'.
69  * 1.6  Fixed to work correctly under MP/M II.  Also shortened
70  *      signon message.
71  * 2.0	New version for use with CI-C86 compiler (CP/M-86 and MS-DOS)
72  * 2.1  Converted for use in MLINK
73  * 2.2  Converted for use with optimizing CI-C86 compiler (MS-DOS)
74  * 3.0  Generalized for UN*X use, changed output file naming convention
75  * 3.2  Change conversion of type from .SQ to .?Q? on non-UNIX machines
76  *      (found release date: 03/12/85)
77  * 3.3  More generalized for use under modern UNIX and z88dk
78  */
79 
80 
81 /*
82  * The following define MUST be set to the maximum length of a file name
83  * on the system "sq" is being compiled for.  If not, "sq" will not be
84  * able to check for whether the output file name it creates is valid
85  * or not.
86  */
87 
88 #define FNM_LEN 12
89 /* #define UNIX				/* comment out for CP/M, MS-DOS versions */
90 #define SQMAIN
91 
92 #define VERSION "3.3   23/03/2012"
93 
94 #include <stdio.h>
95 #include <stdlib.h>
96 #include <fcntl.h>
97 #include <ctype.h>
98 #include <string.h>
99 #include "sqcom.h"
100 
101 #ifdef __OSCA__
102 #include "flos.h"
103 #undef gets(a)
104 #define gets(a) flos_get_input_string(a,16); putchar('\n');
105 #endif
106 
107 
108 /* Definitions and external declarations */
109 
110 EXTERN char	debug;	/* Boolean flag */
111 
112 /* *** Stuff for first translation module *** */
113 
114 int likect;	/*count of consecutive identical chars */
115 int lastchar, newchar;
116 char state;
117 
118 /* states */
119 
120 #define NOHIST	0 	/*don't consider previous input*/
121 #define SENTCHAR 1 	/*lastchar set, no lookahead yet */
122 #define SENDNEWC 2 	/*newchar set, previous sequence done */
123 #define SENDCNT 3 	/*newchar set, DLE sent, send count next */
124 
125 /* *** Stuff for second translation module *** */
126 
127 #define NOCHILD -1	/* indicates end of path through tree */
128 #define NUMNODES (NUMVALS + NUMVALS - 1)	/* nbr of nodes */
129 
130 #define MAXCOUNT 0xFFFF	/* biggest unsigned integer */
131 
132 /* The following array of structures are the nodes of the
133  * binary trees. The first NUMVALS nodes becomethe leaves of the
134  * final tree and represent the values of the data bytes being
135  * encoded and the special endfile, SPEOF.
136  * The remaining nodes become the internal nodes of the final tree.
137  */
138 
139 struct	nd {
140 	unsigned int weight;	/* number of appearances */
141 	char tdepth;		/* length on longest path in tre*/
142 	int lchild, rchild;	/* indexes to next level */
143 } node[NUMNODES];
144 
145 int dctreehd;	/*index to head node of final tree */
146 
147 
148 /* This is the encoding table:
149  * The bit strings have first bit in  low bit.
150  * Note that counts were scaled so code fits unsigned integer
151  */
152 
153 char codelen[NUMVALS];		/* number of bits in code */
154 unsigned int code[NUMVALS];	/* code itself, right adjusted */
155 unsigned int tcode;		/* temporary code value */
156 
157 
158 /* Variables used by encoding process */
159 
160 int curin;		/* Value currently being encoded */
161 char cbitsrem;		/* Number of code string bits remaining */
162 unsigned int ccode;	/* Current code shifted so next code bit is at right */
163 
164 
165 /* More vars to handle file output */
166 
167 static char obuf[128];
168 static int oblen = 0;
169 
170 
171 
172 /* -- DEBUGGING TOOLS -- */
173 #ifdef DEBUG
pcounts()174 void pcounts()
175 {
176 	int i;
177 
178 	if(debug) {
179 		printf("\nCounts after 1st algorithm and maybe scaling");
180 		for(i = 0; i < NUMVALS; ++i) {
181 			if(i%8 == 0)
182 				printf("\n%4x  ", i);
183 			printf("%5u  ", node[i].weight);
184 		}
185 	printf("\n\n");
186 	}
187 }
188 
phuff()189 void phuff()
190 {
191 	int i;
192 
193 	if(debug) {
194 		printf("\nEncoding tree - root=%3d\n", dctreehd);
195 		for(i = 0; i < NUMNODES; ++i)
196 			if(node[i].weight != 0)
197 				printf("%3d  w=%5u d=%3d l=%3d r=%3d\n", i, node[i].weight, node[i].tdepth, node[i].lchild, node[i].rchild);
198 
199 		printf("\nHuffman codes\n");
200 		for(i = 0; i < NUMVALS; ++i) {
201 			if(codelen[i] > 0)
202 				printf("%3d  %4x l=%2d c=%4x\n", i, i, codelen[i], code[i]);
203 		}
204 	}
205 }
206 #endif
207 /* -- (END) DEBUGGING TOOLS -- */
208 
209 /* Get next byte from file and update checksum */
210 
getc_crc(FILE * ib)211 int getc_crc(FILE *ib)
212 {
213 	int c;
214 
215 	c = getc(ib);
216 	if (c != EOF)
217 		crc += c;		/* checksum */
218 	return c;
219 }
220 
221 /* flush output buffer */
oflush(FILE * iob)222 void oflush(FILE *iob)
223 {
224 	if (oblen && !fwrite(obuf, oblen, 1, iob)) {
225 		printf("Error writing output file\n");
226 		exit(1);
227 	}
228 	oblen = 0;
229 }
230 
231 /* Output functions with error reporting */
232 
putce(int c,FILE * iob)233 void putce(int c,  FILE *iob)
234 {
235 	obuf[oblen++] = c;
236 	if (oblen >= sizeof(obuf)) oflush(iob);
237 }
238 
putwe(int w,FILE * iob)239 void putwe(int w, FILE *iob)
240 {
241 	obuf[oblen++] = w;
242 	if (oblen >= sizeof(obuf)) oflush(iob);
243 	obuf[oblen++] = w >> 8;
244 	if (oblen >= sizeof(obuf)) oflush(iob);
245 }
246 
247 
248 /* Compare two trees, if a > b return true, else return false
249  * note comparison rules in previous comments.
250  */
251 
252 /* Boolean */
cmptrees(int a,int b)253 char cmptrees(int a, int b)
254 /* a,b = root nodes of trees */
255 {
256 	if(node[a].weight > node[b].weight)
257 		return TRUE;
258 	if(node[a].weight == node[b].weight)
259 		if(node[a].tdepth > node[b].tdepth)
260 			return TRUE;
261 	return FALSE;
262 }
263 
264 
maxchar(char a,char b)265 char maxchar(char a, char b)
266 {
267 	return a > b ? a : b;
268 }
269 
270 /******** Second translation - bytes to variable length bit strings *********/
271 
272 
273 /* HUFFMAN ALGORITHM: develops the single element trees
274  * into a single binary tree by forming subtrees rooted in
275  * interior nodes having weights equal to the sum of weights of all
276  * their descendents and having depth counts indicating the
277  * depth of their longest paths.
278  *
279  * When all trees have been formed into a single tree satisfying
280  * the heap property (on weight, with depth as a tie breaker)
281  * then the binary code assigned to a leaf (value to be encoded)
282  * is then the series of left (0) and right (1)
283  * paths leading from the root to the leaf.
284  * Note that trees are removed from the heaped list by
285  * moving the last element over the top element and
286  * reheaping the shorter list.
287  */
288 
289 /* Make a heap from a heap with a new top */
290 
adjust(list,top,bottom)291 adjust(list, top, bottom)
292 int list[], top, bottom;
293 {
294 	int k, temp;
295 
296 	k = 2 * top + 1;	/* left child of top */
297 	temp = list[top];	/* remember root node of top tree */
298 	if( k <= bottom) {
299 		if( k < bottom && cmptrees(list[k], list[k + 1]))
300 			++k;
301 
302 		/* k indexes "smaller" child (in heap of trees) of top */
303 		/* now make top index "smaller" of old top and smallest child */
304 		if(cmptrees(temp, list[k])) {
305 			list[top] = list[k];
306 			list[k] = temp;
307 			/* Make the changed list a heap */
308 			adjust(list, k, bottom); /*recursive*/
309 		}
310 	}
311 }
312 
313 /* heap() and adjust() maintain a list of binary trees as a
314  * heap with the top indexing the binary tree on the list
315  * which has the least weight or, in case of equal weights,
316  * least depth in its longest path. The depth part is not
317  * strictly necessary, but tends to avoid long codes which
318  * might provoke rescaling.
319  */
320 
heap(int list[],int length)321 void heap(int list[], int length)
322 {
323 	int i;
324 
325 	for(i = (length - 2) / 2; i >= 0; --i)
326 		adjust(list, i, length - 1);
327 }
328 
329 
330 
bld_tree(int list[],int len)331 void bld_tree(int list[], int len)
332 {
333 	int freenode;		/* next free node in tree */
334 	int lch, rch;		/* temporaries for left, right children */
335 	struct nd *frnp;	/* free node pointer */
336 	int i;
337 
338 	/* Initialize index to next available (non-leaf) node.
339 	 * Lower numbered nodes correspond to leaves (data values).
340 	 */
341 	freenode = NUMVALS;
342 
343 	while(len > 1) {
344 		/* Take from list two btrees with least weight
345 		 * and build an interior node pointing to them.
346 		 * This forms a new tree.
347 		 */
348 		lch = list[0];	/* This one will be left child */
349 
350 		/* delete top (least) tree from the list of trees */
351 		list[0] = list[--len];
352 		adjust(list, 0, len - 1);
353 
354 		/* Take new top (least) tree. Reuse list slot later */
355 		rch = list[0];	/* This one will be right child */
356 
357 		/* Form new tree from the two least trees using
358 		 * a free node as root. Put the new tree in the list.
359 		 */
360 		frnp = &node[freenode];	/* address of next free node */
361 		list[0] = freenode++;	/* put at top for now */
362 		frnp->lchild = lch;
363 		frnp->rchild = rch;
364 		frnp->weight = node[lch].weight + node[rch].weight;
365 		frnp->tdepth = 1 + maxchar(node[lch].tdepth, node[rch].tdepth);
366 		/* reheap list  to get least tree at top*/
367 		adjust(list, 0, len - 1);
368 	}
369 	dctreehd = list[0];	/*head of final tree */
370 }
371 
372 
373 /* The count of number of occurrances of each input value
374  * have already been prevented from exceeding MAXCOUNT.
375  * Now we must scale them so that their sum doesn't exceed
376  * ceiling and yet no non-zero count can become zero.
377  * This scaling prevents errors in the weights of the
378  * interior nodes of the Huffman tree and also ensures that
379  * the codes will fit in an unsigned integer. Rescaling is
380  * used if necessary to limit the code length.
381  */
382 
scale(unsigned int ceil)383 void scale(unsigned int ceil)
384 /* ceil: upper limit on total weight */
385 {
386 	int c, ovflw, divisor, i;
387 	unsigned int w, sum;
388 	char increased;		/* flag */
389 
390 	do {
391 		for(i = sum = ovflw = 0; i < NUMVALS; ++i) {
392 			if(node[i].weight > (ceil - sum))
393 				++ovflw;
394 			sum += node[i].weight;
395 		}
396 
397 		divisor = ovflw + 1;
398 
399 		/* Ensure no non-zero values are lost */
400 		increased = FALSE;
401 		for(i = 0; i < NUMVALS; ++i) {
402 			w = node[i].weight;
403 			if (w < divisor && w > 0) {
404 				/* Don't fail to provide a code if it's used at all */
405 				node[i].weight = divisor;
406 				increased = TRUE;
407 			}
408 		}
409 	} while(increased);
410 
411 	/* Scaling factor choosen, now scale */
412 	if(divisor > 1)
413 		for(i = 0; i < NUMVALS; ++i)
414 			node[i].weight /= divisor;
415 }
416 
417 
418 
419 /* This translation uses the Huffman algorithm to develop a
420  * binary tree representing the decoding information for
421  * a variable length bit string code for each input value.
422  * Each string's length is in inverse proportion to its
423  * frequency of appearance in the incoming data stream.
424  * The encoding table is derived from the decoding table.
425  *
426  * The range of valid values into the Huffman algorithm are
427  * the values of a byte stored in an integer plus the special
428  * endfile value chosen to be an adjacent value. Overall, 0-SPEOF.
429  *
430  * The "node" array of structures contains the nodes of the
431  * binary tree. The first NUMVALS nodes are the leaves of the
432  * tree and represent the values of the data bytes being
433  * encoded and the special endfile, SPEOF.
434  * The remaining nodes become the internal nodes of the tree.
435  *
436  * In the original design it was believed that
437  * a Huffman code would fit in the same number of
438  * bits that will hold the sum of all the counts.
439  * That was disproven by a user's file and was a rare but
440  * infamous bug. This version attempts to choose among equally
441  * weighted subtrees according to their maximum depths to avoid
442  * unnecessarily long codes. In case that is not sufficient
443  * to guarantee codes <= 16 bits long, we initially scale
444  * the counts so the total fits in an unsigned integer, but
445  * if codes longer than 16 bits are generated the counts are
446  * rescaled to a lower ceiling and code generation is retried.
447  */
448 
449 
450 /* Initialize all nodes to single element binary trees
451  * with zero weight and depth.
452  */
453 
init_tree()454 void init_tree()
455 {
456 	int i;
457 
458 	for(i = 0; i < NUMNODES; ++i) {
459 		node[i].weight = 0;
460 		node[i].tdepth = 0;
461 		node[i].lchild = NOCHILD;
462 		node[i].rchild = NOCHILD;
463 	}
464 }
465 
init_enc()466 void init_enc()
467 {
468 	int i;
469 
470 	/* Initialize encoding table */
471 	for(i = 0; i < NUMVALS; ++i) {
472 		codelen[i] = 0;
473 	}
474 }
475 
476 
477 /* Initialize the Huffman translation. This requires reading
478  * the input file through any preceding translation functions
479  * to get the frequency distribution of the various values.
480  */
481 
init_huff(FILE * ib)482 void init_huff(FILE *ib)
483 {
484 	int c, i;
485 	int btlist[NUMVALS];	/* list of intermediate binary trees */
486 	int listlen;		/* length of btlist */
487 	unsigned int *wp;	/* simplifies weight counting */
488 	unsigned int ceiling;	/* limit for scaling */
489 
490 	/* Initialize tree nodes to no weight, no children */
491 	init_tree();
492 
493 	/* Build frequency info in tree */
494 	do {
495 		c = getcnr(ib);
496 		if(c == EOF)
497 			c = SPEOF;
498 		if(*(wp = &node[c].weight) !=  MAXCOUNT)
499 			++(*wp);
500 	} while(c != SPEOF);
501 #ifdef DEBUG
502 	pcounts();	/* debugging aid */
503 #endif
504 	ceiling = MAXCOUNT;
505 
506 	do {	/* Keep trying to scale and encode */
507 		if(ceiling != MAXCOUNT)
508 			printf("*** rescaling ***, ");
509 		scale(ceiling);
510 		ceiling /= 2;	/* in case we rescale */
511 #ifdef DEBUG
512 		pcounts();	/* debugging aid */
513 #endif
514 
515 		/* Build list of single node binary trees having
516 		 * leaves for the input values with non-zero counts
517 		 */
518 		for(i = listlen = 0; i < NUMVALS; ++i)
519 			if(node[i].weight != 0) {
520 				node[i].tdepth = 0;
521 				btlist[listlen++] = i;
522 			}
523 
524 		/* Arrange list of trees into a heap with the entry
525 		 * indexing the node with the least weight at the top.
526 		 */
527 		heap(btlist, listlen);
528 
529 		/* Convert the list of trees to a single decoding tree */
530 		bld_tree(btlist, listlen);
531 
532 		/* Initialize the encoding table */
533 		init_enc();
534 
535 		/* Try to build encoding table.
536 		 * Fail if any code is > 16 bits long.
537 		 */
538 	} while(buildenc(0, dctreehd) == ERROR);
539 #ifdef DEBUG
540 	phuff();	/* debugging aid */
541 #endif
542 	/* Initialize encoding variables */
543 	cbitsrem = 0;	/*force initial read */
544 	curin = 0;	/*anything but endfile*/
545 }
546 
547 
548 
549 /* Recursive routine to walk the indicated subtree and level
550  * and maintain the current path code in bstree. When a leaf
551  * is found the entire code string and length are put into
552  * the encoding table entry for the leaf's data value.
553  *
554  * Returns ERROR if codes are too long.
555  */
556 
557 /* returns ERROR or OK */
558 /* level of tree being examined, from zero */
559 /* root of subtree is also data value if leaf */
buildenc(int level,int root)560 int buildenc(int level, int root)
561 {
562 	int l, r;
563 
564 	l = node[root].lchild;
565 	r = node[root].rchild;
566 #ifdef DEBUG
567 	if (debug) printf("level=%d,root=%d,l=%d,r=%d,tcode=%04x\n",level,root,l,r,tcode);
568 #endif
569 	if( l == NOCHILD && r == NOCHILD) {
570 		/* Leaf. Previous path determines bit string
571 		 * code of length level (bits 0 to level - 1).
572 		 * Ensures unused code bits are zero.
573 		 */
574 		codelen[root] = level;
575 		code[root] = tcode & ((unsigned int)(~0) >> ((sizeof(int) * 8) - level));
576 #ifdef DEBUG
577 		if (debug) printf("  codelen[%d]=%d,code[%d]=%02x\n",root,codelen[root],root,code[root]);
578 #endif
579 		return ((level > 16) ? ERROR : OK);
580 	} else {
581 		if( l != NOCHILD) {
582 			/* Clear path bit and continue deeper */
583 			tcode &= ~(1 << level);
584 			/* NOTE RECURSION */
585 			if(buildenc(level + 1, l) == ERROR)
586 				return ERROR;
587 		}
588 		if(r != NOCHILD) {
589 			/* Set path bit and continue deeper */
590 			tcode |= 1 << level;
591 			/* NOTE RECURSION */
592 			if(buildenc(level + 1, r) == ERROR)
593 				return ERROR;
594 		}
595 	}
596 	return (OK);	/* if we got here we're ok so far */
597 }
598 
599 
600 /* Write out the header of the compressed file */
601 /* input file name (w/ or w/o drive) */
602 
wrt_head(FILE * ob,char * infile)603 void wrt_head(FILE *ob, char *infile)
604 {
605 	int i, k, l, r;
606 	int numnodes;		/* nbr of nodes in simplified tree */
607 
608 	putwe(RECOGNIZE, ob);	/* identifies as compressed */
609 	putwe(crc, ob);		/* unsigned sum of original data */
610 
611 	/* Record the original file name w/o drive */
612 	if(*(infile + 1) == ':')
613 		infile += 2;	/* skip drive */
614 
615 	do {
616 		putce(*infile, ob);
617 	} while(*(infile++) != '\0');
618 
619 
620 	/* Write out a simplified decoding tree. Only the interior
621 	 * nodes are written. When a child is a leaf index
622 	 * (representing a data value) it is recoded as
623 	 * -(index + 1) to distinguish it from interior indexes
624 	 * which are recoded as positive indexes in the new tree.
625 	 * Note that this tree will be empty for an empty file.
626 	 */
627 
628 	numnodes = dctreehd < NUMVALS ? 0 : dctreehd - (NUMVALS -1);
629 	putwe(numnodes, ob);
630 
631 	for(k = 0, i = dctreehd; k < numnodes; ++k, --i) {
632 		l = node[i].lchild;
633 		r = node[i].rchild;
634 		l = l < NUMVALS ? -(l + 1) : dctreehd - l;
635 		r = r < NUMVALS ? -(r + 1) : dctreehd - r;
636 		putwe(l, ob);	/* left child */
637 		putwe(r, ob);	/* right child */
638 	}
639 }
640 
641 /* Get an encoded byte or EOF. Reads from specified stream AS NEEDED.
642  *
643  * There are two unsynchronized bit-byte relationships here.
644  * The input stream bytes are converted to bit strings of
645  * various lengths via the static variables named c...
646  * These bit strings are concatenated without padding to
647  * become the stream of encoded result bytes, which this
648  * function returns one at a time. The EOF (end of file) is
649  * converted to SPEOF for convenience and encoded like any
650  * other input value. True EOF is returned after that.
651  *
652  * The original gethuff() called a seperate function,
653  * getbit(), but that more readable version was too slow.
654  */
655 
656 /*  Returns byte values except for EOF */
gethuff(FILE * ib)657 int gethuff(FILE *ib)
658 {
659 	unsigned int rbyte;	/* Result byte value */
660 	char need, take;	/* numbers of bits */
661 
662 	rbyte = 0;
663 	need = 8;		/* build one byte per call */
664 
665 	/* Loop to build a byte of encoded data
666 	 * Initialization forces read the first time
667 	 */
668 
669 loop:
670 	if(cbitsrem >= need) {
671 		/* Current code fullfills our needs */
672 		if(need == 0)
673 			return rbyte & 0xff;
674 		/* Take what we need */
675  		rbyte |= ccode << (8 - need);
676 		/* And leave the rest */
677 		ccode >>= need;
678 		cbitsrem -= need;
679 		return rbyte & 0xff;
680 	}
681 
682 	/* We need more than current code */
683 	if(cbitsrem > 0) {
684 		/* Take what there is */
685 		rbyte |= ccode << (8 - need);
686 		need -= cbitsrem;
687 	}
688 	/* No more bits in current code string */
689 	if(curin == SPEOF) {
690 		/* The end of file token has been encoded. If
691 		 * result byte has data return it and do EOF next time
692 		 */
693 		cbitsrem = 0;
694 
695 		return (need == 8) ? EOF : rbyte & 0xff;
696 	}
697 
698 	/* Get an input byte */
699 	if((curin = getcnr(ib)) == EOF)
700 		curin = SPEOF;	/* convenient for encoding */
701 
702 	/* Get the new byte's code */
703 	ccode = code[curin];
704 	cbitsrem = codelen[curin];
705 
706 	goto loop;
707 }
708 
709 
710 /* First translation - encoding of repeated characters
711  * The code is byte for byte pass through except that
712  * DLE is encoded as DLE, zero and repeated byte values
713  * are encoded as value, DLE, count for count >= 3.
714  */
715 
init_ncr()716 void init_ncr()	/*initialize getcnr() */
717 {
718 	state = NOHIST;
719 }
720 
getcnr(FILE * iob)721 int getcnr(FILE *iob)
722 {
723 	switch(state) {
724 	case NOHIST:
725 		/* No relevant history */
726 		state = SENTCHAR;
727 		return lastchar = getc_crc(iob);
728 	case SENTCHAR:
729 		/* Lastchar is set, need lookahead */
730 		switch(lastchar) {
731 		case DLE:
732 			state = NOHIST;
733 			return 0;	/* indicates DLE was the data */
734 		case EOF:
735 			return EOF;
736 		default:
737 			for(likect = 1; (newchar = getc_crc(iob)) == lastchar && likect < 255; ++likect)
738 				;
739 			switch(likect) {
740 			case 1:
741 				return lastchar = newchar;
742 			case 2:
743 				/* just pass through */
744 				state = SENDNEWC;
745 				return lastchar;
746 			default:
747 				state = SENDCNT;
748 				return DLE;
749 			}
750 		}
751 	case SENDNEWC:
752 		/* Previous sequence complete, newchar set */
753 		state = SENTCHAR;
754 		return lastchar = newchar;
755 	case SENDCNT:
756 		/* Sent DLE for repeat sequence, send count */
757 		state = SENDNEWC;
758 		return likect;
759 	default:
760 		puts("Bug - bad state\n");
761 		exit(1);
762 	}
763 }
764 
765 
squeeze(char * infile,char * outfile)766 void squeeze(char *infile, char *outfile)
767 {
768 	int i, c,c2;
769 	FILE *inbuff;
770 	FILE *outbuff;		/* file buffers */
771 
772 	printf("%s -> %s: ", infile, outfile);
773 
774 	if(!(inbuff=fopen(infile, "rb"))) {
775 		printf("Can't open %s for input pass 1\n", infile);
776 		return;
777 	}
778 	if(!(outbuff=fopen(outfile, "wb"))) {
779 		printf("Can't create %s\n", outfile);
780 		fclose(inbuff);
781 		return;
782 	}
783 
784 	/* First pass - get properties of file */
785 	crc = 0;	/* initialize checksum */
786 	printf("analyzing, ");
787 	init_ncr();
788 	init_huff(inbuff);
789 	fclose(inbuff);
790 
791 	/* Write output file header with decoding info */
792 	wrt_head(outbuff, infile);
793 
794 	/* Second pass - encode the file */
795 	printf("squeezing,");
796 	if(!(inbuff=fopen(infile, "rb"))) {
797 		printf("Can't open %s for input pass 2\n", infile);
798 		goto closeout;
799 	}
800 	init_ncr();	/* For second pass */
801 
802 	/* Translate the input file into the output file */
803 	while((c = gethuff(inbuff)) != EOF)
804 		putce(c, outbuff);
805 	oflush(outbuff);
806 	printf(" done.\n");
807 closeall:
808 	fclose(inbuff);
809 closeout:
810 	fclose(outbuff);
811 }
812 
813 #ifdef __OSCA__
814 /*
815  * Wildcard comparison tool
816  * Found in the BDS C sources, (wildexp..),written by Leor Zolman.
817  * contributed by: W. Earnest, Dave Hardy, Gary P. Novosielski, Bob Mathias and others
818  *
819 */
820 
match(char * wildnam,char * filnam)821 int match(char *wildnam, char *filnam)
822 {
823    char c;
824    while (c = *wildnam++)
825 	if (c == '?')
826 		if ((c = *filnam++) && c != '.')
827 			continue;
828 		else
829 			return FALSE;
830 	else if (c == '*')
831 	{
832 		while (c = *wildnam)
833 		{ 	wildnam++;
834 			if (c == '.') break;
835 		}
836 		while (c = *filnam)
837 		{	filnam++;
838 			if (c == '.') break;
839 		}
840 	}
841 	else if (c == *filnam++)
842 	 	continue;
843 	else return FALSE;
844 
845    if (!*filnam)
846 	return TRUE;
847    else
848 	return FALSE;
849 }
850 #endif
851 
obey(char * p)852 void obey(char *p)
853 {
854 	char *q;
855 	char outfile[128];	/* output file spec. */
856 	int x;
857 
858 	if(*p == '-') {
859 		/* toggle debug option */
860 		debug = !debug;
861 		return;
862 	}
863 
864 	/* Check for ambiguous (wild-card) name */
865 	for(q = p; *q != '\0'; ++q)
866 		if(*q == '*' || *q == '?') {
867 		#ifdef __OSCA__
868 			if ((x=dir_move_first())!=0) return;
869 			while (x == 0) {
870 				if (match(p,dir_get_entry_name()))
871 					obey(dir_get_entry_name());
872 				x = dir_move_next();
873 			}
874 		#else
875 			printf("\nAmbiguous name %s ignored", p);
876 		#endif
877 			return;
878 	}
879 	/* First build output file name */
880 	strcpy(outfile, p);		/* copy input name to output */
881 
882 	/* Find and change output file suffix */
883 
884 	if (strlen(outfile) > FNM_LEN) {	/* check for long file name */
885 		q = outfile + FNM_LEN - 3;
886 		*q = '\0';		/* make room for suffix */
887 	}
888 	else {
889 		q = outfile + strlen(outfile);
890 #ifndef UNIX
891 		for(; --q >= outfile;)
892 			if (*q == '.') {
893 				if (strlen(q) == 1)	/* if . */
894 					strcat(outfile,"SQ");
895 				else if (strlen(q) == 2) /* if .X */
896 					strcat(outfile,"Q");
897 				else			/* must be .XX or .XXX */
898 					*(q+2) = 'Q';
899 				break;
900 			}
901 		if (q < outfile)
902 			strcat(outfile, ".SQ");
903 #else
904 		--q;
905 #endif
906 	}
907 
908 #ifdef UNIX
909 	strcat(outfile, ".SQ");
910 #endif
911 
912 	squeeze(p, outfile);
913 }
914 
915 
main(int argc,char * argv[])916 main(int argc, char *argv[])
917 {
918 	int i,c;
919 	char inparg[128];	/* parameter from input */
920 
921 	debug = FALSE;
922 	printf("File squeezer version %s   (original author: R. Greenlaw)\n\n", VERSION);
923 
924 	/* Process the parameters in order */
925 	for(i = 1; i < argc; ++i)
926 		obey(argv[i]);
927 
928 	if(argc < 2) {
929 		printf("Enter file names, one line at a time, or type <RETURN> to quit.\n");
930 		do {
931 			gets(inparg);
932 			if(inparg[0] != '\0')
933 				obey(inparg);
934 		} while(inparg[0] != '\0');
935 	}
936 }
937 
938