xref: /original-bsd/usr.bin/ptx/ptx.c (revision b0ceb3f2)
1 #ifndef lint
2 static char *sccsid = "@(#)ptx.c	4.5 (Berkeley) 05/11/89";
3 #endif /* not lint */
4 
5 /*	permuted title index
6 	ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output]
7 	Ptx reads the input file and permutes on words in it.
8 	It excludes all words in the ignore file.
9 	Alternately it includes words in the only file.
10 	if neither is given it excludes the words in _PATH_EIGN.
11 
12 	The width of the output line can be changed to num
13 	characters.  If omitted 72 is default unless troff than 100.
14 	the -f flag tells the program to fold the output
15 	the -t flag says the output is for troff and the
16 	output is then wider.
17 
18 	*/
19 
20 #include <stdio.h>
21 #include <ctype.h>
22 #include <signal.h>
23 #include "pathnames.h"
24 
25 #define TILDE 0177
26 #define	N 30
27 #define	MAX	N*BUFSIZ
28 #define LMAX	200
29 #define MAXT	2048
30 #define MASK	03777
31 #define SET	1
32 
33 #define isabreak(c) (btable[c])
34 
35 extern char *calloc(), *mktemp();
36 extern char *getline();
37 int status;
38 
39 
40 char *hasht[MAXT];
41 char line[LMAX];
42 char btable[128];
43 int ignore;
44 int only;
45 int llen = 72;
46 int gap = 3;
47 int gutter = 3;
48 int mlen = LMAX;
49 int wlen;
50 int rflag;
51 int halflen;
52 char *strtbufp, *endbufp;
53 char *empty = "";
54 
55 char *infile;
56 FILE *inptr = stdin;
57 
58 char *outfile;
59 FILE *outptr = stdout;
60 
61 char sortfile[] = _PATH_TMP;		/* output of sort program */
62 char nofold[] = {'-', 'd', 't', TILDE, 0};
63 char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
64 char *sortopt = nofold;
65 FILE *sortptr;
66 
67 char *bfile;	/*contains user supplied break chars */
68 FILE *bptr;
69 
70 main(argc,argv)
71 int argc;
72 char **argv;
73 {
74 	register int c;
75 	register char *bufp;
76 	int pid;
77 	char *pend;
78 	extern onintr();
79 
80 	char *xfile;
81 	FILE *xptr;
82 
83 	if(signal(SIGHUP,onintr)==SIG_IGN)
84 		signal(SIGHUP,SIG_IGN);
85 	if(signal(SIGINT,onintr)==SIG_IGN)
86 		signal(SIGINT,SIG_IGN);
87 	signal(SIGPIPE,onintr);
88 	signal(SIGTERM,onintr);
89 
90 /*	argument decoding	*/
91 
92 	xfile = _PATH_EIGN;
93 	argv++;
94 	while(argc>1 && **argv == '-') {
95 		switch (*++*argv){
96 
97 		case 'r':
98 			rflag++;
99 			break;
100 		case 'f':
101 			sortopt = fold;
102 			break;
103 
104 		case 'w':
105 			if(argc >= 2) {
106 				argc--;
107 				wlen++;
108 				llen = atoi(*++argv);
109 				if(llen == 0)
110 					diag("Wrong width:",*argv);
111 				if(llen > LMAX) {
112 					llen = LMAX;
113 					msg("Lines truncated to 200 chars.",empty);
114 				}
115 				break;
116 			}
117 
118 		case 't':
119 			if(wlen == 0)
120 				llen = 100;
121 			break;
122 		case 'g':
123 			if(argc >=2) {
124 				argc--;
125 				gap = gutter = atoi(*++argv);
126 			}
127 			break;
128 
129 		case 'i':
130 			if(only)
131 				diag("Only file already given.",empty);
132 			if (argc>=2){
133 				argc--;
134 				ignore++;
135 				xfile = *++argv;
136 			}
137 			break;
138 
139 		case 'o':
140 			if(ignore)
141 				diag("Ignore file already given",empty);
142 			if (argc>=2){
143 				only++;
144 				argc--;
145 				xfile = *++argv;
146 			}
147 			break;
148 
149 		case 'b':
150 			if(argc>=2) {
151 				argc--;
152 				bfile = *++argv;
153 			}
154 			break;
155 
156 		default:
157 			msg("Illegal argument:",*argv);
158 		}
159 		argc--;
160 		argv++;
161 	}
162 
163 	if(argc>3)
164 		diag("Too many filenames",empty);
165 	else if(argc==3){
166 		infile = *argv++;
167 		outfile = *argv;
168 		if((outptr = fopen(outfile,"w")) == NULL)
169 			diag("Cannot open output file:",outfile);
170 	} else if(argc==2) {
171 		infile = *argv;
172 		outfile = 0;
173 	}
174 
175 
176 	/* Default breaks of blank, tab and newline */
177 	btable[' '] = SET;
178 	btable['\t'] = SET;
179 	btable['\n'] = SET;
180 	if(bfile) {
181 		if((bptr = fopen(bfile,"r")) == NULL)
182 			diag("Cannot open break char file",bfile);
183 
184 		while((c = getc(bptr)) != EOF)
185 			btable[c] = SET;
186 	}
187 
188 /*	Allocate space for a buffer.  If only or ignore file present
189 	read it into buffer. Else read in default ignore file
190 	and put resulting words in buffer.
191 	*/
192 
193 
194 	if((strtbufp = calloc(N,BUFSIZ)) == NULL)
195 		diag("Out of memory space",empty);
196 	bufp = strtbufp;
197 	endbufp = strtbufp+MAX;
198 
199 	if((xptr = fopen(xfile,"r")) == NULL)
200 		diag("Cannot open  file",xfile);
201 
202 	while(bufp < endbufp && (c = getc(xptr)) != EOF) {
203 		if(isabreak(c)) {
204 			if(storeh(hash(strtbufp,bufp),strtbufp))
205 				diag("Too many words",xfile);
206 			*bufp++ = '\0';
207 			strtbufp = bufp;
208 		}
209 		else {
210 			*bufp++ = (isupper(c)?tolower(c):c);
211 		}
212 	}
213 	if (bufp >= endbufp)
214 		diag("Too many words in file",xfile);
215 	endbufp = --bufp;
216 
217 	/* open output file for sorting */
218 
219 	mktemp(sortfile);
220 	if((sortptr = fopen(sortfile, "w")) == NULL)
221 		diag("Cannot open output for sorting:",sortfile);
222 
223 /*	get a line of data and compare each word for
224 	inclusion or exclusion in the sort phase
225 */
226 
227 	if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
228 		diag("Cannot open data: ",infile);
229 	while(pend=getline())
230 		cmpline(pend);
231 	fclose(sortptr);
232 
233 	switch (pid = fork()){
234 
235 	case -1:	/* cannot fork */
236 		diag("Cannot fork",empty);
237 
238 	case 0:		/* child */
239 		execl(_PATH_SORT, "sort", sortopt, "+0", "-1", "+1",
240 			sortfile, "-o", sortfile, 0);
241 
242 	default:	/* parent */
243 		while(wait(&status) != pid);
244 	}
245 
246 
247 	getsort();
248 	unlink(sortfile);
249 	exit(0);
250 }
251 
252 msg(s,arg)
253 char *s;
254 char *arg;
255 {
256 	fprintf(stderr,"%s %s\n",s,arg);
257 	return;
258 }
259 diag(s,arg)
260 char *s, *arg;
261 {
262 
263 	msg(s,arg);
264 	exit(1);
265 }
266 
267 
268 char *getline()
269 {
270 
271 	register c;
272 	register char *linep;
273 	char *endlinep;
274 
275 
276 	endlinep= line + mlen;
277 	linep = line;
278 	/* Throw away leading white space */
279 
280 	while(isspace(c=getc(inptr)))
281 		;
282 	if(c==EOF)
283 		return(0);
284 	ungetc(c,inptr);
285 	while(( c=getc(inptr)) != EOF) {
286 		switch (c) {
287 
288 			case '\t':
289 				if(linep<endlinep)
290 					*linep++ = ' ';
291 				break;
292 			case '\n':
293 				while(isspace(*--linep));
294 				*++linep = '\n';
295 				return(linep);
296 			default:
297 				if(linep < endlinep)
298 					*linep++ = c;
299 		}
300 	}
301 	return(0);
302 }
303 
304 cmpline(pend)
305 char *pend;
306 {
307 
308 	char *pstrt, *pchar, *cp;
309 	char **hp;
310 	int flag;
311 
312 	pchar = line;
313 	if(rflag)
314 		while(pchar<pend&&!isspace(*pchar))
315 			pchar++;
316 	while(pchar<pend){
317 	/* eliminate white space */
318 		if(isabreak(*pchar++))
319 			continue;
320 		pstrt = --pchar;
321 
322 		flag = 1;
323 		while(flag){
324 			if(isabreak(*pchar)) {
325 				hp = &hasht[hash(pstrt,pchar)];
326 				pchar--;
327 				while(cp = *hp++){
328 					if(hp == &hasht[MAXT])
329 						hp = hasht;
330 	/* possible match */
331 					if(cmpword(pstrt,pchar,cp)){
332 	/* exact match */
333 						if(!ignore && only)
334 							putline(pstrt,pend);
335 						flag = 0;
336 						break;
337 					}
338 				}
339 	/* no match */
340 				if(flag){
341 					if(ignore || !only)
342 						putline(pstrt,pend);
343 					flag = 0;
344 				}
345 			}
346 		pchar++;
347 		}
348 	}
349 }
350 
351 cmpword(cpp,pend,hpp)
352 char *cpp, *pend, *hpp;
353 {
354 	char c;
355 
356 	while(*hpp != '\0'){
357 		c = *cpp++;
358 		if((isupper(c)?tolower(c):c) != *hpp++)
359 			return(0);
360 	}
361 	if(--cpp == pend) return(1);
362 	return(0);
363 }
364 
365 putline(strt, end)
366 char *strt, *end;
367 {
368 	char *cp;
369 
370 	for(cp=strt; cp<end; cp++)
371 		putc(*cp, sortptr);
372 	/* Add extra blank before TILDE to sort correctly
373 	   with -fd option */
374 	putc(' ',sortptr);
375 	putc(TILDE,sortptr);
376 	for (cp=line; cp<strt; cp++)
377 		putc(*cp,sortptr);
378 	putc('\n',sortptr);
379 }
380 
381 getsort()
382 {
383 	register c;
384 	register char *tilde, *linep, *ref;
385 	char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b;
386 	int w;
387 	char *rtrim(), *ltrim();
388 
389 	if((sortptr = fopen(sortfile,"r")) == NULL)
390 		diag("Cannot open sorted data:",sortfile);
391 
392 	halflen = (llen-gutter)/2;
393 	linep = line;
394 	while((c = getc(sortptr)) != EOF) {
395 		switch(c) {
396 
397 		case TILDE:
398 			tilde = linep;
399 			break;
400 
401 		case '\n':
402 			while(isspace(linep[-1]))
403 				linep--;
404 			ref = tilde;
405 			if(rflag) {
406 				while(ref<linep&&!isspace(*ref))
407 					ref++;
408 				*ref++ = 0;
409 			}
410 		/* the -1 is an overly conservative test to leave
411 		   space for the / that signifies truncation*/
412 			p3b = rtrim(p3a=line,tilde,halflen-1);
413 			if(p3b-p3a>halflen-1)
414 				p3b = p3a+halflen-1;
415 			p2a = ltrim(ref,p2b=linep,halflen-1);
416 			if(p2b-p2a>halflen-1)
417 				p2a = p2b-halflen-1;
418 			p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde,
419 				w=halflen-(p2b-p2a)-gap);
420 			if(p1b-p1a>w)
421 				p1b = p1a;
422 			p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0),
423 				w=halflen-(p3b-p3a)-gap);
424 			if(p4b-p4a>w)
425 				p4a = p4b;
426 			fprintf(outptr,".xx \"");
427 			putout(p1a,p1b);
428 	/* tilde-1 to account for extra space before TILDE */
429 			if(p1b!=(tilde-1) && p1a!=p1b)
430 				fprintf(outptr,"/");
431 			fprintf(outptr,"\" \"");
432 			if(p4a==p4b && p2a!=ref && p2a!=p2b)
433 				fprintf(outptr,"/");
434 			putout(p2a,p2b);
435 			fprintf(outptr,"\" \"");
436 			putout(p3a,p3b);
437 	/* ++p3b to account for extra blank after TILDE */
438 	/* ++p3b to account for extra space before TILDE */
439 			if(p1a==p1b && ++p3b!=tilde)
440 				fprintf(outptr,"/");
441 			fprintf(outptr,"\" \"");
442 			if(p1a==p1b && p4a!=ref && p4a!=p4b)
443 				fprintf(outptr,"/");
444 			putout(p4a,p4b);
445 			if(rflag)
446 				fprintf(outptr,"\" %s\n",tilde);
447 			else
448 				fprintf(outptr,"\"\n");
449 			linep = line;
450 			break;
451 
452 		case '"':
453 	/* put double " for "  */
454 			*linep++ = c;
455 		default:
456 			*linep++ = c;
457 		}
458 	}
459 }
460 
461 char *rtrim(a,c,d)
462 char *a,*c;
463 {
464 	char *b,*x;
465 	b = c;
466 	for(x=a+1; x<=c&&x-a<=d; x++)
467 		if((x==c||isspace(x[0]))&&!isspace(x[-1]))
468 			b = x;
469 	if(b<c&&!isspace(b[0]))
470 		b++;
471 	return(b);
472 }
473 
474 char *ltrim(c,b,d)
475 char *c,*b;
476 {
477 	char *a,*x;
478 	a = c;
479 	for(x=b-1; x>=c&&b-x<=d; x--)
480 		if(!isspace(x[0])&&(x==c||isspace(x[-1])))
481 			a = x;
482 	if(a>c&&!isspace(a[-1]))
483 		a--;
484 	return(a);
485 }
486 
487 putout(strt,end)
488 char *strt, *end;
489 {
490 	char *cp;
491 
492 	cp = strt;
493 
494 	for(cp=strt; cp<end; cp++) {
495 		putc(*cp,outptr);
496 	}
497 }
498 
499 onintr()
500 {
501 
502 	unlink(sortfile);
503 	exit(1);
504 }
505 
506 hash(strtp,endp)
507 char *strtp, *endp;
508 {
509 	char *cp, c;
510 	int i, j, k;
511 
512 	/* Return zero hash number for single letter words */
513 	if((endp - strtp) == 1)
514 		return(0);
515 
516 	cp = strtp;
517 	c = *cp++;
518 	i = (isupper(c)?tolower(c):c);
519 	c = *cp;
520 	j = (isupper(c)?tolower(c):c);
521 	i = i*j;
522 	cp = --endp;
523 	c = *cp--;
524 	k = (isupper(c)?tolower(c):c);
525 	c = *cp;
526 	j = (isupper(c)?tolower(c):c);
527 	j = k*j;
528 
529 	k = (i ^ (j>>2)) & MASK;
530 	return(k);
531 }
532 
533 storeh(num,strtp)
534 int num;
535 char *strtp;
536 {
537 	int i;
538 
539 	for(i=num; i<MAXT; i++) {
540 		if(hasht[i] == 0) {
541 			hasht[i] = strtp;
542 			return(0);
543 		}
544 	}
545 	for(i=0; i<num; i++) {
546 		if(hasht[i] == 0) {
547 			hasht[i] = strtp;
548 			return(0);
549 		}
550 	}
551 	return(1);
552 }
553