1 /* This program compresses a file without losing information.
2  * The "usq" program is required to unsqueeze the file
3  * before it can be used.
4  *
5  * Typical compression rates are between 30 and 50 percent for text files.
6  *
7  * Squeezing a really big file takes a few minutes.
8  *
9  * Useage:
10  *	sq [file1] [file2] ... [filen]
11  *
12  * where file1 through filen are the names of the files to be squeezed.
13  * The file type (under CP/M or MS-DOS) is changed to ".SQ"; under UN*X,
14  * ".SQ" is appended to the file name. The original file name is stored
15  * in the squeezed file.
16  *
17  * If no file name is given on the command line you will be
18  * prompted for commands (one at a time). An empty command
19  * terminates the program.
20  *
21  * The transformations compress strings of identical bytes and
22  * then encode each resulting byte value and EOF as bit strings
23  * having lengths in inverse proportion to their frequency of
24  * occurrance in the intermediate input stream. The latter uses
25  * the Huffman algorithm. Decoding information is included in
26  * the squeezed file, so squeezing short files or files with
27  * uniformly distributed byte values will actually increase size.
28  */
29 
30 /* CHANGE HISTORY:
31  * 1.3	Close files properly in case of error exit.
32  * 1.4	Break up long introductory lines.
33  * 1.4	Send introduction only to console.
34  * 1.4	Send errors only to console.
35  * 1.5  Fix BUG that caused a rare few squeezed files
36  *	to be incorrect and fail the USQ crc check.
37  *	The problem was that some 17 bit codes were
38  *	generated but are not supported by other code.
39  *	THIS IS A MAJOR CHANGE affecting TR2.C and SQ.H and
40  *	requires recompilation of all files which are part
41  *	of SQ. Two basic changes were made: tree depth is now
42  *	used as a tie breaker when weights are equal. This
43  *	makes the tree shallower. Although that may always be
44  *	sufficient, an error trap was added to cause rescaling
45  *	of the counts if any code > 16 bits long is generated.
46  * 1.5	Add debugging displays option '-'.
47  * 1.6  Fixed to work correctly under MP/M II.  Also shortened
48  *      signon message.
49  * 2.0	New version for use with CI-C86 compiler (CP/M-86 and MS-DOS)
50  * 2.1  Converted for use in MLINK
51  * 2.2  Converted for use with optimizing CI-C86 compiler (MS-DOS)
52  * 3.0  Generalized for UN*X use, changed output file naming convention
53  * 3.3  Modified to work with ULTRIX, as per Tom Reid.
54  */
55 
56 /* ejecteject */
57 
58 /*
59  * The following define MUST be set to the maximum length of a file name
60  * on the system "sq" is being compiled for.  If not, "sq" will not be
61  * able to check for whether the output file name it creates is valid
62  * or not.
63  */
64 
65 #ifdef TOPS20
66 #define FNM_LEN 79
67 #else
68 #define FNM_LEN 14
69 #endif
70 #define SQMAIN
71 
72 #define VERSION "3.3   10/29/86"
73 
74 #include <stdio.h>
75 #include "sqcom.h"
76 #include "sq.h"
77 #ifdef TOPS20
78 #include <libt20.h>
79 #include <jsys.h>
80 #endif
81 
82 #define FALSE 0
83 
84 #ifdef TOPS20
85 struct	filename    req;
86 #endif
87 
main(argc,argv)88 main(argc, argv)
89 int argc;
90 char *argv[];
91 {
92 	int i,c;
93 	char inparg[128];	/* parameter from input */
94 
95 	debug = FALSE;
96 	printf("File squeezer version %s (original author: R. Greenlaw)\n\n", VERSION);
97 
98 	/* Process the parameters in order */
99 	for(i = 1; i < argc; ++i)
100 		obey(argv[i]);
101 
102 	if(argc < 2) {
103 		printf("Enter file names, one line at a time, or type <RETURN> to quit.");
104 		do {
105 			printf("\n*");
106 #ifdef TOPS20
107 			fflush (stdout);
108 #endif
109 			for(i = 0; i < 16; ++i) {
110 				if((c = getchar()) == EOF)
111 					c = '\n';	/* fake empty (exit) command */
112 				if((inparg[i] = c) == '\n') {
113 					inparg[i] = '\0';
114 					break;
115 				}
116 			}
117 			if(inparg[0] != '\0')
118 				obey(inparg);
119 		} while(inparg[0] != '\0');
120 	}
121 }
122 
123 /* ejecteject */
124 
obey(p)125 obey(p)
126 char *p;
127 {
128 #ifdef TOPS20
129     char   *d, *dir ();		/* directory junk */
130 #else
131 	char *q;
132 	char outfile[128];	/* output file spec. */
133 #endif
134 
135 	if(*p == '-') {
136 		/* toggle debug option */
137 		debug = !debug;
138 		return;
139 	}
140 
141 #ifdef TOPS20
142 	for (d = dir (p, 0); *d; d = dir (NULL, 0)) {
143 	    dofil (d);
144 	}
145 	return (0);
146 }
147 
148 int
dofil(p)149 dofil(p)
150 char *p;
151 {
152 	char	outfile[128];		/* output file spec. */
153 	char    ftype[40];
154 	char    fname[40];
155 	int	r;
156 
157 	strcpy (req.name, p);
158 	if ((r = fullfname(&req, "RAW")) != 0) {
159 	    if (r == 1) {
160 		printf ("JFNS error encountered\n");
161 	    }
162 	    printf ("File, %s, not found\n", p);
163 	    return (-1);
164 	}
165 	strcpy (fname, req.nm);
166 	strcpy (ftype, req.typ);
167 	if (strlen (ftype) == 0) {
168 	    strcpy (ftype, "QQQ");
169 	} else {
170 	    switch(*(ftype+1)) {
171 		case '\0':
172 		    *(ftype+2) = '\0';
173 			/* fall thru */
174 		default:
175 		    *(ftype + 1) = 'Q';
176 	    }
177 	}
178 	sprintf (outfile, "%.8s.%.3s", fname, ftype);
179 
180 	squeeze (req.qname, outfile);
181 #else
182 	/* Check for ambiguous (wild-card) name */
183 	for(q = p; *q != '\0'; ++q)
184 		if(*q == '*' || *q == '?') {
185 			printf("\nAmbiguous name %s ignored", p);
186 			return;
187 	}
188 	/* First build output file name */
189 	strcpy(outfile, p);		/* copy input name to output */
190 
191 	/* Find and change output file suffix */
192 
193 	if (strlen(outfile) + 3 > FNM_LEN) {	/* check for long file name */
194 		q = outfile + FNM_LEN - 3;
195 		*q = '\0';		/* make room for suffix */
196 	}
197 	else {
198 		q = outfile + strlen(outfile);
199 #ifndef UNIX
200 		for(; --q >= outfile;)
201 			if (*q == '.') {
202 				*q = '\0';	/* delete file type */
203 				break;
204 			}
205 #else
206 		--q;
207 #endif
208 	}
209 
210 	strcat(outfile, ".SQ");
211 
212 	squeeze(p, outfile);
213 #endif
214 }
215 
216 /* ejecteject */
217 
squeeze(infile,outfile)218 squeeze(infile, outfile)
219 char *infile, *outfile;
220 {
221 #ifdef TOPS20
222 	extern	int _uioch[];
223 	extern	int _uioufx[];
224 	int jfn;
225 #endif
226 	int i, c,c2;
227 	FILE *inbuff, *outbuff;		/* file buffers */
228 
229 	printf("%s -> %s: ", infile, outfile);
230 #ifdef TOPS20
231 	if(!(inbuff=fopen(infile, "rC-"))) {
232 #else
233 #ifdef ULTRIX
234 	if(!(inbuff=fopen(infile, "r"))) {
235 #else
236 	if(!(inbuff=fopen(infile, "rb"))) {
237 #endif
238 #endif
239 		printf("Can't open %s for input pass 1\n", infile);
240 		return;
241 	}
242 #ifdef TOPS20
243 	jfn = _uioch[_uioufx[inbuff->siofd]];
244 	bytsiz = (_gtfdb (jfn, _FBBYV) << FBBSZ_S) & FBBSZ_M;
245 	if ((outbuff = fopen (outfile, "w8")) == NULL) {
246 #else
247 #ifdef ULTRIX
248 	if(!(outbuff=fopen(outfile, "w"))) {
249 #else
250 	if(!(outbuff=fopen(outfile, "wb"))) {
251 #endif
252 #endif
253 		printf("Can't create %s\n", outfile);
254 		fclose(inbuff);
255 		return;
256 	}
257 
258 	/* First pass - get properties of file */
259 	crc = 0;	/* initialize checksum */
260 	printf("analyzing, ");
261 #ifdef TOPS20
262 	fflush (stdout);
263 #endif
264 	init_ncr();
265 	init_huff(inbuff);
266 	fclose(inbuff);
267 
268 	/* Write output file header with decoding info */
269 #ifdef TOPS20
270 	wrt_head(outbuff, &req);
271 #else
272 	wrt_head(outbuff, infile);
273 #endif
274 
275 	/* Second pass - encode the file */
276 	printf("squeezing,");
277 #ifdef TOPS20
278 	fflush (stdout);
279 	if(!(inbuff=fopen(infile, "rC-"))) {
280 #else
281 	if(!(inbuff=fopen(infile, "rb"))) {
282 #endif
283 		printf("Can't open %s for input pass 2\n", infile);
284 		goto closeout;
285 	}
286 	init_ncr();	/* For second pass */
287 
288 	/* Translate the input file into the output file */
289 	while((c = gethuff(inbuff)) != EOF)
290 		putce(c, outbuff);
291 	oflush(outbuff);
292 	printf(" done.\n");
293 closeall:
294 	fclose(inbuff);
295 closeout:
296 	fclose(outbuff);
297 }
298