1 /*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * %sccs.include.proprietary.c%
6 */
7
8 #ifndef lint
9 static char copyright[] =
10 "@(#) Copyright (c) 1989, 1993\n\
11 The Regents of the University of California. All rights reserved.\n";
12 #endif /* not lint */
13
14 #ifndef lint
15 static char sccsid[] = "@(#)ptx.c 8.1 (Berkeley) 06/06/93";
16 #endif /* not lint */
17
18 /* permuted title index
19 ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output]
20 Ptx reads the input file and permutes on words in it.
21 It excludes all words in the ignore file.
22 Alternately it includes words in the only file.
23 if neither is given it excludes the words in _PATH_EIGN.
24
25 The width of the output line can be changed to num
26 characters. If omitted 72 is default unless troff than 100.
27 the -f flag tells the program to fold the output
28 the -t flag says the output is for troff and the
29 output is then wider.
30
31 */
32
33 #include <stdio.h>
34 #include <ctype.h>
35 #include <signal.h>
36 #include "pathnames.h"
37
38 #define TILDE 0177
39 #define N 30
40 #define MAX N*BUFSIZ
41 #define LMAX 200
42 #define MAXT 2048
43 #define MASK 03777
44 #define SET 1
45
46 #define isabreak(c) (btable[c])
47
48 extern char *calloc(), *mktemp();
49 extern char *getline();
50 int status;
51
52
53 char *hasht[MAXT];
54 char line[LMAX];
55 char btable[128];
56 int ignore;
57 int only;
58 int llen = 72;
59 int gap = 3;
60 int gutter = 3;
61 int mlen = LMAX;
62 int wlen;
63 int rflag;
64 int halflen;
65 char *strtbufp, *endbufp;
66 char *empty = "";
67
68 char *infile;
69 FILE *inptr = stdin;
70
71 char *outfile;
72 FILE *outptr = stdout;
73
74 char sortfile[] = _PATH_TMP; /* output of sort program */
75 char nofold[] = {'-', 'd', 't', TILDE, 0};
76 char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
77 char *sortopt = nofold;
78 FILE *sortptr;
79
80 char *bfile; /*contains user supplied break chars */
81 FILE *bptr;
82
main(argc,argv)83 main(argc,argv)
84 int argc;
85 char **argv;
86 {
87 register int c;
88 register char *bufp;
89 int pid;
90 char *pend;
91 extern void onintr();
92
93 char *xfile;
94 FILE *xptr;
95
96 if(signal(SIGHUP,onintr)==SIG_IGN)
97 signal(SIGHUP,SIG_IGN);
98 if(signal(SIGINT,onintr)==SIG_IGN)
99 signal(SIGINT,SIG_IGN);
100 signal(SIGPIPE,onintr);
101 signal(SIGTERM,onintr);
102
103 /* argument decoding */
104
105 xfile = _PATH_EIGN;
106 argv++;
107 while(argc>1 && **argv == '-') {
108 switch (*++*argv){
109
110 case 'r':
111 rflag++;
112 break;
113 case 'f':
114 sortopt = fold;
115 break;
116
117 case 'w':
118 if(argc >= 2) {
119 argc--;
120 wlen++;
121 llen = atoi(*++argv);
122 if(llen == 0)
123 diag("Wrong width:",*argv);
124 if(llen > LMAX) {
125 llen = LMAX;
126 msg("Lines truncated to 200 chars.",empty);
127 }
128 break;
129 }
130
131 case 't':
132 if(wlen == 0)
133 llen = 100;
134 break;
135 case 'g':
136 if(argc >=2) {
137 argc--;
138 gap = gutter = atoi(*++argv);
139 }
140 break;
141
142 case 'i':
143 if(only)
144 diag("Only file already given.",empty);
145 if (argc>=2){
146 argc--;
147 ignore++;
148 xfile = *++argv;
149 }
150 break;
151
152 case 'o':
153 if(ignore)
154 diag("Ignore file already given",empty);
155 if (argc>=2){
156 only++;
157 argc--;
158 xfile = *++argv;
159 }
160 break;
161
162 case 'b':
163 if(argc>=2) {
164 argc--;
165 bfile = *++argv;
166 }
167 break;
168
169 default:
170 msg("Illegal argument:",*argv);
171 }
172 argc--;
173 argv++;
174 }
175
176 if(argc>3)
177 diag("Too many filenames",empty);
178 else if(argc==3){
179 infile = *argv++;
180 outfile = *argv;
181 if((outptr = fopen(outfile,"w")) == NULL)
182 diag("Cannot open output file:",outfile);
183 } else if(argc==2) {
184 infile = *argv;
185 outfile = 0;
186 }
187
188
189 /* Default breaks of blank, tab and newline */
190 btable[' '] = SET;
191 btable['\t'] = SET;
192 btable['\n'] = SET;
193 if(bfile) {
194 if((bptr = fopen(bfile,"r")) == NULL)
195 diag("Cannot open break char file",bfile);
196
197 while((c = getc(bptr)) != EOF)
198 btable[c] = SET;
199 }
200
201 /* Allocate space for a buffer. If only or ignore file present
202 read it into buffer. Else read in default ignore file
203 and put resulting words in buffer.
204 */
205
206
207 if((strtbufp = calloc(N,BUFSIZ)) == NULL)
208 diag("Out of memory space",empty);
209 bufp = strtbufp;
210 endbufp = strtbufp+MAX;
211
212 if((xptr = fopen(xfile,"r")) == NULL)
213 diag("Cannot open file",xfile);
214
215 while(bufp < endbufp && (c = getc(xptr)) != EOF) {
216 if(isabreak(c)) {
217 if(storeh(hash(strtbufp,bufp),strtbufp))
218 diag("Too many words",xfile);
219 *bufp++ = '\0';
220 strtbufp = bufp;
221 }
222 else {
223 *bufp++ = (isupper(c)?tolower(c):c);
224 }
225 }
226 if (bufp >= endbufp)
227 diag("Too many words in file",xfile);
228 endbufp = --bufp;
229
230 /* open output file for sorting */
231
232 mktemp(sortfile);
233 if((sortptr = fopen(sortfile, "w")) == NULL)
234 diag("Cannot open output for sorting:",sortfile);
235
236 /* get a line of data and compare each word for
237 inclusion or exclusion in the sort phase
238 */
239
240 if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
241 diag("Cannot open data: ",infile);
242 while(pend=getline())
243 cmpline(pend);
244 fclose(sortptr);
245
246 switch (pid = fork()){
247
248 case -1: /* cannot fork */
249 diag("Cannot fork",empty);
250
251 case 0: /* child */
252 execl(_PATH_SORT, "sort", sortopt, "+0", "-1", "+1",
253 sortfile, "-o", sortfile, 0);
254
255 default: /* parent */
256 while(wait(&status) != pid);
257 }
258
259
260 getsort();
261 unlink(sortfile);
262 exit(0);
263 }
264
msg(s,arg)265 msg(s,arg)
266 char *s;
267 char *arg;
268 {
269 fprintf(stderr,"%s %s\n",s,arg);
270 return;
271 }
diag(s,arg)272 diag(s,arg)
273 char *s, *arg;
274 {
275
276 msg(s,arg);
277 exit(1);
278 }
279
280
getline()281 char *getline()
282 {
283
284 register c;
285 register char *linep;
286 char *endlinep;
287
288
289 endlinep= line + mlen;
290 linep = line;
291 /* Throw away leading white space */
292
293 while(isspace(c=getc(inptr)))
294 ;
295 if(c==EOF)
296 return(0);
297 ungetc(c,inptr);
298 while(( c=getc(inptr)) != EOF) {
299 switch (c) {
300
301 case '\t':
302 if(linep<endlinep)
303 *linep++ = ' ';
304 break;
305 case '\n':
306 while(isspace(*--linep));
307 *++linep = '\n';
308 return(linep);
309 default:
310 if(linep < endlinep)
311 *linep++ = c;
312 }
313 }
314 return(0);
315 }
316
cmpline(pend)317 cmpline(pend)
318 char *pend;
319 {
320
321 char *pstrt, *pchar, *cp;
322 char **hp;
323 int flag;
324
325 pchar = line;
326 if(rflag)
327 while(pchar<pend&&!isspace(*pchar))
328 pchar++;
329 while(pchar<pend){
330 /* eliminate white space */
331 if(isabreak(*pchar++))
332 continue;
333 pstrt = --pchar;
334
335 flag = 1;
336 while(flag){
337 if(isabreak(*pchar)) {
338 hp = &hasht[hash(pstrt,pchar)];
339 pchar--;
340 while(cp = *hp++){
341 if(hp == &hasht[MAXT])
342 hp = hasht;
343 /* possible match */
344 if(cmpword(pstrt,pchar,cp)){
345 /* exact match */
346 if(!ignore && only)
347 putline(pstrt,pend);
348 flag = 0;
349 break;
350 }
351 }
352 /* no match */
353 if(flag){
354 if(ignore || !only)
355 putline(pstrt,pend);
356 flag = 0;
357 }
358 }
359 pchar++;
360 }
361 }
362 }
363
cmpword(cpp,pend,hpp)364 cmpword(cpp,pend,hpp)
365 char *cpp, *pend, *hpp;
366 {
367 char c;
368
369 while(*hpp != '\0'){
370 c = *cpp++;
371 if((isupper(c)?tolower(c):c) != *hpp++)
372 return(0);
373 }
374 if(--cpp == pend) return(1);
375 return(0);
376 }
377
putline(strt,end)378 putline(strt, end)
379 char *strt, *end;
380 {
381 char *cp;
382
383 for(cp=strt; cp<end; cp++)
384 putc(*cp, sortptr);
385 /* Add extra blank before TILDE to sort correctly
386 with -fd option */
387 putc(' ',sortptr);
388 putc(TILDE,sortptr);
389 for (cp=line; cp<strt; cp++)
390 putc(*cp,sortptr);
391 putc('\n',sortptr);
392 }
393
getsort()394 getsort()
395 {
396 register c;
397 register char *tilde, *linep, *ref;
398 char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b;
399 int w;
400 char *rtrim(), *ltrim();
401
402 if((sortptr = fopen(sortfile,"r")) == NULL)
403 diag("Cannot open sorted data:",sortfile);
404
405 halflen = (llen-gutter)/2;
406 linep = line;
407 while((c = getc(sortptr)) != EOF) {
408 switch(c) {
409
410 case TILDE:
411 tilde = linep;
412 break;
413
414 case '\n':
415 while(isspace(linep[-1]))
416 linep--;
417 ref = tilde;
418 if(rflag) {
419 while(ref<linep&&!isspace(*ref))
420 ref++;
421 *ref++ = 0;
422 }
423 /* the -1 is an overly conservative test to leave
424 space for the / that signifies truncation*/
425 p3b = rtrim(p3a=line,tilde,halflen-1);
426 if(p3b-p3a>halflen-1)
427 p3b = p3a+halflen-1;
428 p2a = ltrim(ref,p2b=linep,halflen-1);
429 if(p2b-p2a>halflen-1)
430 p2a = p2b-halflen-1;
431 p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde,
432 w=halflen-(p2b-p2a)-gap);
433 if(p1b-p1a>w)
434 p1b = p1a;
435 p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0),
436 w=halflen-(p3b-p3a)-gap);
437 if(p4b-p4a>w)
438 p4a = p4b;
439 fprintf(outptr,".xx \"");
440 putout(p1a,p1b);
441 /* tilde-1 to account for extra space before TILDE */
442 if(p1b!=(tilde-1) && p1a!=p1b)
443 fprintf(outptr,"/");
444 fprintf(outptr,"\" \"");
445 if(p4a==p4b && p2a!=ref && p2a!=p2b)
446 fprintf(outptr,"/");
447 putout(p2a,p2b);
448 fprintf(outptr,"\" \"");
449 putout(p3a,p3b);
450 /* ++p3b to account for extra blank after TILDE */
451 /* ++p3b to account for extra space before TILDE */
452 if(p1a==p1b && ++p3b!=tilde)
453 fprintf(outptr,"/");
454 fprintf(outptr,"\" \"");
455 if(p1a==p1b && p4a!=ref && p4a!=p4b)
456 fprintf(outptr,"/");
457 putout(p4a,p4b);
458 if(rflag)
459 fprintf(outptr,"\" %s\n",tilde);
460 else
461 fprintf(outptr,"\"\n");
462 linep = line;
463 break;
464
465 case '"':
466 /* put double " for " */
467 *linep++ = c;
468 default:
469 *linep++ = c;
470 }
471 }
472 }
473
rtrim(a,c,d)474 char *rtrim(a,c,d)
475 char *a,*c;
476 {
477 char *b,*x;
478 b = c;
479 for(x=a+1; x<=c&&x-a<=d; x++)
480 if((x==c||isspace(x[0]))&&!isspace(x[-1]))
481 b = x;
482 if(b<c&&!isspace(b[0]))
483 b++;
484 return(b);
485 }
486
ltrim(c,b,d)487 char *ltrim(c,b,d)
488 char *c,*b;
489 {
490 char *a,*x;
491 a = c;
492 for(x=b-1; x>=c&&b-x<=d; x--)
493 if(!isspace(x[0])&&(x==c||isspace(x[-1])))
494 a = x;
495 if(a>c&&!isspace(a[-1]))
496 a--;
497 return(a);
498 }
499
putout(strt,end)500 putout(strt,end)
501 char *strt, *end;
502 {
503 char *cp;
504
505 cp = strt;
506
507 for(cp=strt; cp<end; cp++) {
508 putc(*cp,outptr);
509 }
510 }
511
512 void
onintr()513 onintr()
514 {
515
516 unlink(sortfile);
517 exit(1);
518 }
519
hash(strtp,endp)520 hash(strtp,endp)
521 char *strtp, *endp;
522 {
523 char *cp, c;
524 int i, j, k;
525
526 /* Return zero hash number for single letter words */
527 if((endp - strtp) == 1)
528 return(0);
529
530 cp = strtp;
531 c = *cp++;
532 i = (isupper(c)?tolower(c):c);
533 c = *cp;
534 j = (isupper(c)?tolower(c):c);
535 i = i*j;
536 cp = --endp;
537 c = *cp--;
538 k = (isupper(c)?tolower(c):c);
539 c = *cp;
540 j = (isupper(c)?tolower(c):c);
541 j = k*j;
542
543 k = (i ^ (j>>2)) & MASK;
544 return(k);
545 }
546
storeh(num,strtp)547 storeh(num,strtp)
548 int num;
549 char *strtp;
550 {
551 int i;
552
553 for(i=num; i<MAXT; i++) {
554 if(hasht[i] == 0) {
555 hasht[i] = strtp;
556 return(0);
557 }
558 }
559 for(i=0; i<num; i++) {
560 if(hasht[i] == 0) {
561 hasht[i] = strtp;
562 return(0);
563 }
564 }
565 return(1);
566 }
567