xref: /original-bsd/usr.bin/ptx/ptx.c (revision 552e81d8)
1 static char *sccsid = "@(#)ptx.c	4.1 (Berkeley) 10/01/80";
2 #
3 
4 /*	permuted title index
5 	ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output]
6 	Ptx reads the input file and permutes on words in it.
7 	It excludes all words in the ignore file.
8 	Alternately it includes words in the only file.
9 	if neither is given it excludes the words in /usr/lib/eign.
10 
11 	The width of the output line can be changed to num
12 	characters.  If omitted 72 is default unless troff than 100.
13 	the -f flag tells the program to fold the output
14 	the -t flag says the output is for troff and the
15 	output is then wider.
16 
17 	make: cc ptx.c -lS
18 	*/
19 
20 #include <stdio.h>
21 #include <ctype.h>
22 #include <signal.h>
23 #define DEFLTX "/usr/lib/eign"
24 #define TILDE 0177
25 #define SORT "/usr/bin/sort"
26 #define	N 30
27 #define	MAX	N*BUFSIZ
28 #define LMAX	200
29 #define MAXT	2048
30 #define MASK	03777
31 #define SET	1
32 
33 #define isabreak(c) (btable[c])
34 
35 extern char *calloc(), *mktemp();
36 extern char *getline();
37 int status;
38 
39 
40 char *hasht[MAXT];
41 char line[LMAX];
42 char btable[128];
43 int ignore;
44 int only;
45 int llen = 72;
46 int gap = 3;
47 int gutter = 3;
48 int mlen = LMAX;
49 int wlen;
50 int rflag;
51 int halflen;
52 char *strtbufp, *endbufp;
53 char *empty = "";
54 
55 char *infile;
56 FILE *inptr = stdin;
57 
58 char *outfile;
59 FILE *outptr = stdout;
60 
61 char *sortfile;	/* output of sort program */
62 char nofold[] = {'-', 'd', 't', TILDE, 0};
63 char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
64 char *sortopt = nofold;
65 FILE *sortptr;
66 
67 char *bfile;	/*contains user supplied break chars */
68 FILE *bptr;
69 
70 main(argc,argv)
71 int argc;
72 char **argv;
73 {
74 	register int c;
75 	register char *bufp;
76 	int pid;
77 	char *pend;
78 	extern onintr();
79 
80 	char *xfile;
81 	FILE *xptr;
82 
83 	if(signal(SIGHUP,onintr)==SIG_IGN)
84 		signal(SIGHUP,SIG_IGN);
85 	if(signal(SIGINT,onintr)==SIG_IGN)
86 		signal(SIGINT,SIG_IGN);
87 	signal(SIGPIPE,onintr);
88 	signal(SIGTERM,onintr);
89 
90 /*	argument decoding	*/
91 
92 	xfile = DEFLTX;
93 	argv++;
94 	while(argc>1 && **argv == '-') {
95 		switch (*++*argv){
96 
97 		case 'r':
98 			rflag++;
99 			break;
100 		case 'f':
101 			sortopt = fold;
102 			break;
103 
104 		case 'w':
105 			if(argc >= 2) {
106 				argc--;
107 				wlen++;
108 				llen = atoi(*++argv);
109 				if(llen == 0)
110 					diag("Wrong width:",*argv);
111 				if(llen > LMAX) {
112 					llen = LMAX;
113 					msg("Lines truncated to 200 chars.",empty);
114 				}
115 				break;
116 			}
117 
118 		case 't':
119 			if(wlen == 0)
120 				llen = 100;
121 			break;
122 		case 'g':
123 			if(argc >=2) {
124 				argc--;
125 				gap = gutter = atoi(*++argv);
126 			}
127 			break;
128 
129 		case 'i':
130 			if(only)
131 				diag("Only file already given.",empty);
132 			if (argc>=2){
133 				argc--;
134 				ignore++;
135 				xfile = *++argv;
136 			}
137 			break;
138 
139 		case 'o':
140 			if(ignore)
141 				diag("Ignore file already given",empty);
142 			if (argc>=2){
143 				only++;
144 				argc--;
145 				xfile = *++argv;
146 			}
147 			break;
148 
149 		case 'b':
150 			if(argc>=2) {
151 				argc--;
152 				bfile = *++argv;
153 			}
154 			break;
155 
156 		default:
157 			msg("Illegal argument:",*argv);
158 		}
159 		argc--;
160 		argv++;
161 	}
162 
163 	if(argc>3)
164 		diag("Too many filenames",empty);
165 	else if(argc==3){
166 		infile = *argv++;
167 		outfile = *argv;
168 		if((outptr = fopen(outfile,"w")) == NULL)
169 			diag("Cannot open output file:",outfile);
170 	} else if(argc==2) {
171 		infile = *argv;
172 		outfile = 0;
173 	}
174 
175 
176 	/* Default breaks of blank, tab and newline */
177 	btable[' '] = SET;
178 	btable['\t'] = SET;
179 	btable['\n'] = SET;
180 	if(bfile) {
181 		if((bptr = fopen(bfile,"r")) == NULL)
182 			diag("Cannot open break char file",bfile);
183 
184 		while((c = getc(bptr)) != EOF)
185 			btable[c] = SET;
186 	}
187 
188 /*	Allocate space for a buffer.  If only or ignore file present
189 	read it into buffer. Else read in default ignore file
190 	and put resulting words in buffer.
191 	*/
192 
193 
194 	if((strtbufp = calloc(N,BUFSIZ)) == NULL)
195 		diag("Out of memory space",empty);
196 	bufp = strtbufp;
197 	endbufp = strtbufp+MAX;
198 
199 	if((xptr = fopen(xfile,"r")) == NULL)
200 		diag("Cannot open  file",xfile);
201 
202 	while(bufp < endbufp && (c = getc(xptr)) != EOF) {
203 		if(isabreak(c)) {
204 			if(storeh(hash(strtbufp,bufp),strtbufp))
205 				diag("Too many words",xfile);
206 			*bufp++ = '\0';
207 			strtbufp = bufp;
208 		}
209 		else {
210 			*bufp++ = (isupper(c)?tolower(c):c);
211 		}
212 	}
213 	if (bufp >= endbufp)
214 		diag("Too many words in file",xfile);
215 	endbufp = --bufp;
216 
217 	/* open output file for sorting */
218 
219 	sortfile = mktemp("/tmp/ptxsXXXXX");
220 	if((sortptr = fopen(sortfile, "w")) == NULL)
221 		diag("Cannot open output for sorting:",sortfile);
222 
223 /*	get a line of data and compare each word for
224 	inclusion or exclusion in the sort phase
225 */
226 
227 	if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
228 		diag("Cannot open data: ",infile);
229 	while(pend=getline())
230 		cmpline(pend);
231 	fclose(sortptr);
232 
233 	switch (pid = fork()){
234 
235 	case -1:	/* cannot fork */
236 		diag("Cannot fork",empty);
237 
238 	case 0:		/* child */
239 		execl(SORT, SORT, sortopt, "+0", "-1", "+1",
240 			sortfile, "-o", sortfile, 0);
241 
242 	default:	/* parent */
243 		while(wait(&status) != pid);
244 	}
245 
246 
247 	getsort();
248 	onintr();
249 }
250 
251 msg(s,arg)
252 char *s;
253 char *arg;
254 {
255 	fprintf(stderr,"%s %s\n",s,arg);
256 	return;
257 }
258 diag(s,arg)
259 char *s, *arg;
260 {
261 
262 	msg(s,arg);
263 	exit(1);
264 }
265 
266 
267 char *getline()
268 {
269 
270 	register c;
271 	register char *linep;
272 	char *endlinep;
273 
274 
275 	endlinep= line + mlen;
276 	linep = line;
277 	/* Throw away leading white space */
278 
279 	while(isspace(c=getc(inptr)))
280 		;
281 	if(c==EOF)
282 		return(0);
283 	ungetc(c,inptr);
284 	while(( c=getc(inptr)) != EOF) {
285 		switch (c) {
286 
287 			case '\t':
288 				if(linep<endlinep)
289 					*linep++ = ' ';
290 				break;
291 			case '\n':
292 				while(isspace(*--linep));
293 				*++linep = '\n';
294 				return(linep);
295 			default:
296 				if(linep < endlinep)
297 					*linep++ = c;
298 		}
299 	}
300 	return(0);
301 }
302 
303 cmpline(pend)
304 char *pend;
305 {
306 
307 	char *pstrt, *pchar, *cp;
308 	char **hp;
309 	int flag;
310 
311 	pchar = line;
312 	if(rflag)
313 		while(pchar<pend&&!isspace(*pchar))
314 			pchar++;
315 	while(pchar<pend){
316 	/* eliminate white space */
317 		if(isabreak(*pchar++))
318 			continue;
319 		pstrt = --pchar;
320 
321 		flag = 1;
322 		while(flag){
323 			if(isabreak(*pchar)) {
324 				hp = &hasht[hash(pstrt,pchar)];
325 				pchar--;
326 				while(cp = *hp++){
327 					if(hp == &hasht[MAXT])
328 						hp = hasht;
329 	/* possible match */
330 					if(cmpword(pstrt,pchar,cp)){
331 	/* exact match */
332 						if(!ignore && only)
333 							putline(pstrt,pend);
334 						flag = 0;
335 						break;
336 					}
337 				}
338 	/* no match */
339 				if(flag){
340 					if(ignore || !only)
341 						putline(pstrt,pend);
342 					flag = 0;
343 				}
344 			}
345 		pchar++;
346 		}
347 	}
348 }
349 
350 cmpword(cpp,pend,hpp)
351 char *cpp, *pend, *hpp;
352 {
353 	char c;
354 
355 	while(*hpp != '\0'){
356 		c = *cpp++;
357 		if((isupper(c)?tolower(c):c) != *hpp++)
358 			return(0);
359 	}
360 	if(--cpp == pend) return(1);
361 	return(0);
362 }
363 
364 putline(strt, end)
365 char *strt, *end;
366 {
367 	char *cp;
368 
369 	for(cp=strt; cp<end; cp++)
370 		putc(*cp, sortptr);
371 	/* Add extra blank before TILDE to sort correctly
372 	   with -fd option */
373 	putc(' ',sortptr);
374 	putc(TILDE,sortptr);
375 	for (cp=line; cp<strt; cp++)
376 		putc(*cp,sortptr);
377 	putc('\n',sortptr);
378 }
379 
380 getsort()
381 {
382 	register c;
383 	register char *tilde, *linep, *ref;
384 	char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b;
385 	int w;
386 	char *rtrim(), *ltrim();
387 
388 	if((sortptr = fopen(sortfile,"r")) == NULL)
389 		diag("Cannot open sorted data:",sortfile);
390 
391 	halflen = (llen-gutter)/2;
392 	linep = line;
393 	while((c = getc(sortptr)) != EOF) {
394 		switch(c) {
395 
396 		case TILDE:
397 			tilde = linep;
398 			break;
399 
400 		case '\n':
401 			while(isspace(linep[-1]))
402 				linep--;
403 			ref = tilde;
404 			if(rflag) {
405 				while(ref<linep&&!isspace(*ref))
406 					ref++;
407 				*ref++ = 0;
408 			}
409 		/* the -1 is an overly conservative test to leave
410 		   space for the / that signifies truncation*/
411 			p3b = rtrim(p3a=line,tilde,halflen-1);
412 			if(p3b-p3a>halflen-1)
413 				p3b = p3a+halflen-1;
414 			p2a = ltrim(ref,p2b=linep,halflen-1);
415 			if(p2b-p2a>halflen-1)
416 				p2a = p2b-halflen-1;
417 			p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde,
418 				w=halflen-(p2b-p2a)-gap);
419 			if(p1b-p1a>w)
420 				p1b = p1a;
421 			p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0),
422 				w=halflen-(p3b-p3a)-gap);
423 			if(p4b-p4a>w)
424 				p4a = p4b;
425 			fprintf(outptr,".xx \"");
426 			putout(p1a,p1b);
427 	/* tilde-1 to account for extra space before TILDE */
428 			if(p1b!=(tilde-1) && p1a!=p1b)
429 				fprintf(outptr,"/");
430 			fprintf(outptr,"\" \"");
431 			if(p4a==p4b && p2a!=ref && p2a!=p2b)
432 				fprintf(outptr,"/");
433 			putout(p2a,p2b);
434 			fprintf(outptr,"\" \"");
435 			putout(p3a,p3b);
436 	/* ++p3b to account for extra blank after TILDE */
437 	/* ++p3b to account for extra space before TILDE */
438 			if(p1a==p1b && ++p3b!=tilde)
439 				fprintf(outptr,"/");
440 			fprintf(outptr,"\" \"");
441 			if(p1a==p1b && p4a!=ref && p4a!=p4b)
442 				fprintf(outptr,"/");
443 			putout(p4a,p4b);
444 			if(rflag)
445 				fprintf(outptr,"\" %s\n",tilde);
446 			else
447 				fprintf(outptr,"\"\n");
448 			linep = line;
449 			break;
450 
451 		case '"':
452 	/* put double " for "  */
453 			*linep++ = c;
454 		default:
455 			*linep++ = c;
456 		}
457 	}
458 }
459 
460 char *rtrim(a,c,d)
461 char *a,*c;
462 {
463 	char *b,*x;
464 	b = c;
465 	for(x=a+1; x<=c&&x-a<=d; x++)
466 		if((x==c||isspace(x[0]))&&!isspace(x[-1]))
467 			b = x;
468 	if(b<c&&!isspace(b[0]))
469 		b++;
470 	return(b);
471 }
472 
473 char *ltrim(c,b,d)
474 char *c,*b;
475 {
476 	char *a,*x;
477 	a = c;
478 	for(x=b-1; x>=c&&b-x<=d; x--)
479 		if(!isspace(x[0])&&(x==c||isspace(x[-1])))
480 			a = x;
481 	if(a>c&&!isspace(a[-1]))
482 		a--;
483 	return(a);
484 }
485 
486 putout(strt,end)
487 char *strt, *end;
488 {
489 	char *cp;
490 
491 	cp = strt;
492 
493 	for(cp=strt; cp<end; cp++) {
494 		putc(*cp,outptr);
495 	}
496 }
497 
498 onintr()
499 {
500 
501 	if(*sortfile)
502 		unlink(sortfile);
503 	exit(1);
504 }
505 
506 hash(strtp,endp)
507 char *strtp, *endp;
508 {
509 	char *cp, c;
510 	int i, j, k;
511 
512 	/* Return zero hash number for single letter words */
513 	if((endp - strtp) == 1)
514 		return(0);
515 
516 	cp = strtp;
517 	c = *cp++;
518 	i = (isupper(c)?tolower(c):c);
519 	c = *cp;
520 	j = (isupper(c)?tolower(c):c);
521 	i = i*j;
522 	cp = --endp;
523 	c = *cp--;
524 	k = (isupper(c)?tolower(c):c);
525 	c = *cp;
526 	j = (isupper(c)?tolower(c):c);
527 	j = k*j;
528 
529 	k = (i ^ (j>>2)) & MASK;
530 	return(k);
531 }
532 
533 storeh(num,strtp)
534 int num;
535 char *strtp;
536 {
537 	int i;
538 
539 	for(i=num; i<MAXT; i++) {
540 		if(hasht[i] == 0) {
541 			hasht[i] = strtp;
542 			return(0);
543 		}
544 	}
545 	for(i=0; i<num; i++) {
546 		if(hasht[i] == 0) {
547 			hasht[i] = strtp;
548 			return(0);
549 		}
550 	}
551 	return(1);
552 }
553