xref: /original-bsd/usr.bin/ptx/ptx.c (revision c3e32dec)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.proprietary.c%
6  */
7 
8 #ifndef lint
9 static char copyright[] =
10 "@(#) Copyright (c) 1989, 1993\n\
11 	The Regents of the University of California.  All rights reserved.\n";
12 #endif /* not lint */
13 
14 #ifndef lint
15 static char sccsid[] = "@(#)ptx.c	8.1 (Berkeley) 06/06/93";
16 #endif /* not lint */
17 
18 /*	permuted title index
19 	ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output]
20 	Ptx reads the input file and permutes on words in it.
21 	It excludes all words in the ignore file.
22 	Alternately it includes words in the only file.
23 	if neither is given it excludes the words in _PATH_EIGN.
24 
25 	The width of the output line can be changed to num
26 	characters.  If omitted 72 is default unless troff than 100.
27 	the -f flag tells the program to fold the output
28 	the -t flag says the output is for troff and the
29 	output is then wider.
30 
31 	*/
32 
33 #include <stdio.h>
34 #include <ctype.h>
35 #include <signal.h>
36 #include "pathnames.h"
37 
38 #define TILDE 0177
39 #define	N 30
40 #define	MAX	N*BUFSIZ
41 #define LMAX	200
42 #define MAXT	2048
43 #define MASK	03777
44 #define SET	1
45 
46 #define isabreak(c) (btable[c])
47 
48 extern char *calloc(), *mktemp();
49 extern char *getline();
50 int status;
51 
52 
53 char *hasht[MAXT];
54 char line[LMAX];
55 char btable[128];
56 int ignore;
57 int only;
58 int llen = 72;
59 int gap = 3;
60 int gutter = 3;
61 int mlen = LMAX;
62 int wlen;
63 int rflag;
64 int halflen;
65 char *strtbufp, *endbufp;
66 char *empty = "";
67 
68 char *infile;
69 FILE *inptr = stdin;
70 
71 char *outfile;
72 FILE *outptr = stdout;
73 
74 char sortfile[] = _PATH_TMP;		/* output of sort program */
75 char nofold[] = {'-', 'd', 't', TILDE, 0};
76 char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
77 char *sortopt = nofold;
78 FILE *sortptr;
79 
80 char *bfile;	/*contains user supplied break chars */
81 FILE *bptr;
82 
83 main(argc,argv)
84 int argc;
85 char **argv;
86 {
87 	register int c;
88 	register char *bufp;
89 	int pid;
90 	char *pend;
91 	extern void onintr();
92 
93 	char *xfile;
94 	FILE *xptr;
95 
96 	if(signal(SIGHUP,onintr)==SIG_IGN)
97 		signal(SIGHUP,SIG_IGN);
98 	if(signal(SIGINT,onintr)==SIG_IGN)
99 		signal(SIGINT,SIG_IGN);
100 	signal(SIGPIPE,onintr);
101 	signal(SIGTERM,onintr);
102 
103 /*	argument decoding	*/
104 
105 	xfile = _PATH_EIGN;
106 	argv++;
107 	while(argc>1 && **argv == '-') {
108 		switch (*++*argv){
109 
110 		case 'r':
111 			rflag++;
112 			break;
113 		case 'f':
114 			sortopt = fold;
115 			break;
116 
117 		case 'w':
118 			if(argc >= 2) {
119 				argc--;
120 				wlen++;
121 				llen = atoi(*++argv);
122 				if(llen == 0)
123 					diag("Wrong width:",*argv);
124 				if(llen > LMAX) {
125 					llen = LMAX;
126 					msg("Lines truncated to 200 chars.",empty);
127 				}
128 				break;
129 			}
130 
131 		case 't':
132 			if(wlen == 0)
133 				llen = 100;
134 			break;
135 		case 'g':
136 			if(argc >=2) {
137 				argc--;
138 				gap = gutter = atoi(*++argv);
139 			}
140 			break;
141 
142 		case 'i':
143 			if(only)
144 				diag("Only file already given.",empty);
145 			if (argc>=2){
146 				argc--;
147 				ignore++;
148 				xfile = *++argv;
149 			}
150 			break;
151 
152 		case 'o':
153 			if(ignore)
154 				diag("Ignore file already given",empty);
155 			if (argc>=2){
156 				only++;
157 				argc--;
158 				xfile = *++argv;
159 			}
160 			break;
161 
162 		case 'b':
163 			if(argc>=2) {
164 				argc--;
165 				bfile = *++argv;
166 			}
167 			break;
168 
169 		default:
170 			msg("Illegal argument:",*argv);
171 		}
172 		argc--;
173 		argv++;
174 	}
175 
176 	if(argc>3)
177 		diag("Too many filenames",empty);
178 	else if(argc==3){
179 		infile = *argv++;
180 		outfile = *argv;
181 		if((outptr = fopen(outfile,"w")) == NULL)
182 			diag("Cannot open output file:",outfile);
183 	} else if(argc==2) {
184 		infile = *argv;
185 		outfile = 0;
186 	}
187 
188 
189 	/* Default breaks of blank, tab and newline */
190 	btable[' '] = SET;
191 	btable['\t'] = SET;
192 	btable['\n'] = SET;
193 	if(bfile) {
194 		if((bptr = fopen(bfile,"r")) == NULL)
195 			diag("Cannot open break char file",bfile);
196 
197 		while((c = getc(bptr)) != EOF)
198 			btable[c] = SET;
199 	}
200 
201 /*	Allocate space for a buffer.  If only or ignore file present
202 	read it into buffer. Else read in default ignore file
203 	and put resulting words in buffer.
204 	*/
205 
206 
207 	if((strtbufp = calloc(N,BUFSIZ)) == NULL)
208 		diag("Out of memory space",empty);
209 	bufp = strtbufp;
210 	endbufp = strtbufp+MAX;
211 
212 	if((xptr = fopen(xfile,"r")) == NULL)
213 		diag("Cannot open  file",xfile);
214 
215 	while(bufp < endbufp && (c = getc(xptr)) != EOF) {
216 		if(isabreak(c)) {
217 			if(storeh(hash(strtbufp,bufp),strtbufp))
218 				diag("Too many words",xfile);
219 			*bufp++ = '\0';
220 			strtbufp = bufp;
221 		}
222 		else {
223 			*bufp++ = (isupper(c)?tolower(c):c);
224 		}
225 	}
226 	if (bufp >= endbufp)
227 		diag("Too many words in file",xfile);
228 	endbufp = --bufp;
229 
230 	/* open output file for sorting */
231 
232 	mktemp(sortfile);
233 	if((sortptr = fopen(sortfile, "w")) == NULL)
234 		diag("Cannot open output for sorting:",sortfile);
235 
236 /*	get a line of data and compare each word for
237 	inclusion or exclusion in the sort phase
238 */
239 
240 	if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
241 		diag("Cannot open data: ",infile);
242 	while(pend=getline())
243 		cmpline(pend);
244 	fclose(sortptr);
245 
246 	switch (pid = fork()){
247 
248 	case -1:	/* cannot fork */
249 		diag("Cannot fork",empty);
250 
251 	case 0:		/* child */
252 		execl(_PATH_SORT, "sort", sortopt, "+0", "-1", "+1",
253 			sortfile, "-o", sortfile, 0);
254 
255 	default:	/* parent */
256 		while(wait(&status) != pid);
257 	}
258 
259 
260 	getsort();
261 	unlink(sortfile);
262 	exit(0);
263 }
264 
265 msg(s,arg)
266 char *s;
267 char *arg;
268 {
269 	fprintf(stderr,"%s %s\n",s,arg);
270 	return;
271 }
272 diag(s,arg)
273 char *s, *arg;
274 {
275 
276 	msg(s,arg);
277 	exit(1);
278 }
279 
280 
281 char *getline()
282 {
283 
284 	register c;
285 	register char *linep;
286 	char *endlinep;
287 
288 
289 	endlinep= line + mlen;
290 	linep = line;
291 	/* Throw away leading white space */
292 
293 	while(isspace(c=getc(inptr)))
294 		;
295 	if(c==EOF)
296 		return(0);
297 	ungetc(c,inptr);
298 	while(( c=getc(inptr)) != EOF) {
299 		switch (c) {
300 
301 			case '\t':
302 				if(linep<endlinep)
303 					*linep++ = ' ';
304 				break;
305 			case '\n':
306 				while(isspace(*--linep));
307 				*++linep = '\n';
308 				return(linep);
309 			default:
310 				if(linep < endlinep)
311 					*linep++ = c;
312 		}
313 	}
314 	return(0);
315 }
316 
317 cmpline(pend)
318 char *pend;
319 {
320 
321 	char *pstrt, *pchar, *cp;
322 	char **hp;
323 	int flag;
324 
325 	pchar = line;
326 	if(rflag)
327 		while(pchar<pend&&!isspace(*pchar))
328 			pchar++;
329 	while(pchar<pend){
330 	/* eliminate white space */
331 		if(isabreak(*pchar++))
332 			continue;
333 		pstrt = --pchar;
334 
335 		flag = 1;
336 		while(flag){
337 			if(isabreak(*pchar)) {
338 				hp = &hasht[hash(pstrt,pchar)];
339 				pchar--;
340 				while(cp = *hp++){
341 					if(hp == &hasht[MAXT])
342 						hp = hasht;
343 	/* possible match */
344 					if(cmpword(pstrt,pchar,cp)){
345 	/* exact match */
346 						if(!ignore && only)
347 							putline(pstrt,pend);
348 						flag = 0;
349 						break;
350 					}
351 				}
352 	/* no match */
353 				if(flag){
354 					if(ignore || !only)
355 						putline(pstrt,pend);
356 					flag = 0;
357 				}
358 			}
359 		pchar++;
360 		}
361 	}
362 }
363 
364 cmpword(cpp,pend,hpp)
365 char *cpp, *pend, *hpp;
366 {
367 	char c;
368 
369 	while(*hpp != '\0'){
370 		c = *cpp++;
371 		if((isupper(c)?tolower(c):c) != *hpp++)
372 			return(0);
373 	}
374 	if(--cpp == pend) return(1);
375 	return(0);
376 }
377 
378 putline(strt, end)
379 char *strt, *end;
380 {
381 	char *cp;
382 
383 	for(cp=strt; cp<end; cp++)
384 		putc(*cp, sortptr);
385 	/* Add extra blank before TILDE to sort correctly
386 	   with -fd option */
387 	putc(' ',sortptr);
388 	putc(TILDE,sortptr);
389 	for (cp=line; cp<strt; cp++)
390 		putc(*cp,sortptr);
391 	putc('\n',sortptr);
392 }
393 
394 getsort()
395 {
396 	register c;
397 	register char *tilde, *linep, *ref;
398 	char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b;
399 	int w;
400 	char *rtrim(), *ltrim();
401 
402 	if((sortptr = fopen(sortfile,"r")) == NULL)
403 		diag("Cannot open sorted data:",sortfile);
404 
405 	halflen = (llen-gutter)/2;
406 	linep = line;
407 	while((c = getc(sortptr)) != EOF) {
408 		switch(c) {
409 
410 		case TILDE:
411 			tilde = linep;
412 			break;
413 
414 		case '\n':
415 			while(isspace(linep[-1]))
416 				linep--;
417 			ref = tilde;
418 			if(rflag) {
419 				while(ref<linep&&!isspace(*ref))
420 					ref++;
421 				*ref++ = 0;
422 			}
423 		/* the -1 is an overly conservative test to leave
424 		   space for the / that signifies truncation*/
425 			p3b = rtrim(p3a=line,tilde,halflen-1);
426 			if(p3b-p3a>halflen-1)
427 				p3b = p3a+halflen-1;
428 			p2a = ltrim(ref,p2b=linep,halflen-1);
429 			if(p2b-p2a>halflen-1)
430 				p2a = p2b-halflen-1;
431 			p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde,
432 				w=halflen-(p2b-p2a)-gap);
433 			if(p1b-p1a>w)
434 				p1b = p1a;
435 			p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0),
436 				w=halflen-(p3b-p3a)-gap);
437 			if(p4b-p4a>w)
438 				p4a = p4b;
439 			fprintf(outptr,".xx \"");
440 			putout(p1a,p1b);
441 	/* tilde-1 to account for extra space before TILDE */
442 			if(p1b!=(tilde-1) && p1a!=p1b)
443 				fprintf(outptr,"/");
444 			fprintf(outptr,"\" \"");
445 			if(p4a==p4b && p2a!=ref && p2a!=p2b)
446 				fprintf(outptr,"/");
447 			putout(p2a,p2b);
448 			fprintf(outptr,"\" \"");
449 			putout(p3a,p3b);
450 	/* ++p3b to account for extra blank after TILDE */
451 	/* ++p3b to account for extra space before TILDE */
452 			if(p1a==p1b && ++p3b!=tilde)
453 				fprintf(outptr,"/");
454 			fprintf(outptr,"\" \"");
455 			if(p1a==p1b && p4a!=ref && p4a!=p4b)
456 				fprintf(outptr,"/");
457 			putout(p4a,p4b);
458 			if(rflag)
459 				fprintf(outptr,"\" %s\n",tilde);
460 			else
461 				fprintf(outptr,"\"\n");
462 			linep = line;
463 			break;
464 
465 		case '"':
466 	/* put double " for "  */
467 			*linep++ = c;
468 		default:
469 			*linep++ = c;
470 		}
471 	}
472 }
473 
474 char *rtrim(a,c,d)
475 char *a,*c;
476 {
477 	char *b,*x;
478 	b = c;
479 	for(x=a+1; x<=c&&x-a<=d; x++)
480 		if((x==c||isspace(x[0]))&&!isspace(x[-1]))
481 			b = x;
482 	if(b<c&&!isspace(b[0]))
483 		b++;
484 	return(b);
485 }
486 
487 char *ltrim(c,b,d)
488 char *c,*b;
489 {
490 	char *a,*x;
491 	a = c;
492 	for(x=b-1; x>=c&&b-x<=d; x--)
493 		if(!isspace(x[0])&&(x==c||isspace(x[-1])))
494 			a = x;
495 	if(a>c&&!isspace(a[-1]))
496 		a--;
497 	return(a);
498 }
499 
500 putout(strt,end)
501 char *strt, *end;
502 {
503 	char *cp;
504 
505 	cp = strt;
506 
507 	for(cp=strt; cp<end; cp++) {
508 		putc(*cp,outptr);
509 	}
510 }
511 
512 void
513 onintr()
514 {
515 
516 	unlink(sortfile);
517 	exit(1);
518 }
519 
520 hash(strtp,endp)
521 char *strtp, *endp;
522 {
523 	char *cp, c;
524 	int i, j, k;
525 
526 	/* Return zero hash number for single letter words */
527 	if((endp - strtp) == 1)
528 		return(0);
529 
530 	cp = strtp;
531 	c = *cp++;
532 	i = (isupper(c)?tolower(c):c);
533 	c = *cp;
534 	j = (isupper(c)?tolower(c):c);
535 	i = i*j;
536 	cp = --endp;
537 	c = *cp--;
538 	k = (isupper(c)?tolower(c):c);
539 	c = *cp;
540 	j = (isupper(c)?tolower(c):c);
541 	j = k*j;
542 
543 	k = (i ^ (j>>2)) & MASK;
544 	return(k);
545 }
546 
547 storeh(num,strtp)
548 int num;
549 char *strtp;
550 {
551 	int i;
552 
553 	for(i=num; i<MAXT; i++) {
554 		if(hasht[i] == 0) {
555 			hasht[i] = strtp;
556 			return(0);
557 		}
558 	}
559 	for(i=0; i<num; i++) {
560 		if(hasht[i] == 0) {
561 			hasht[i] = strtp;
562 			return(0);
563 		}
564 	}
565 	return(1);
566 }
567