1 /*- 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.proprietary.c% 6 */ 7 8 #ifndef lint 9 char copyright[] = 10 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ 11 All rights reserved.\n"; 12 #endif /* not lint */ 13 14 #ifndef lint 15 static char sccsid[] = "@(#)ptx.c 4.7 (Berkeley) 04/18/91"; 16 #endif /* not lint */ 17 18 /* permuted title index 19 ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output] 20 Ptx reads the input file and permutes on words in it. 21 It excludes all words in the ignore file. 22 Alternately it includes words in the only file. 23 if neither is given it excludes the words in _PATH_EIGN. 24 25 The width of the output line can be changed to num 26 characters. If omitted 72 is default unless troff than 100. 27 the -f flag tells the program to fold the output 28 the -t flag says the output is for troff and the 29 output is then wider. 30 31 */ 32 33 #include <stdio.h> 34 #include <ctype.h> 35 #include <signal.h> 36 #include "pathnames.h" 37 38 #define TILDE 0177 39 #define N 30 40 #define MAX N*BUFSIZ 41 #define LMAX 200 42 #define MAXT 2048 43 #define MASK 03777 44 #define SET 1 45 46 #define isabreak(c) (btable[c]) 47 48 extern char *calloc(), *mktemp(); 49 extern char *getline(); 50 int status; 51 52 53 char *hasht[MAXT]; 54 char line[LMAX]; 55 char btable[128]; 56 int ignore; 57 int only; 58 int llen = 72; 59 int gap = 3; 60 int gutter = 3; 61 int mlen = LMAX; 62 int wlen; 63 int rflag; 64 int halflen; 65 char *strtbufp, *endbufp; 66 char *empty = ""; 67 68 char *infile; 69 FILE *inptr = stdin; 70 71 char *outfile; 72 FILE *outptr = stdout; 73 74 char sortfile[] = _PATH_TMP; /* output of sort program */ 75 char nofold[] = {'-', 'd', 't', TILDE, 0}; 76 char fold[] = {'-', 'd', 'f', 't', TILDE, 0}; 77 char *sortopt = nofold; 78 FILE *sortptr; 79 80 char *bfile; /*contains user supplied break chars */ 81 FILE *bptr; 82 83 main(argc,argv) 84 int argc; 85 char **argv; 86 { 87 register int c; 88 register char *bufp; 89 int pid; 90 char *pend; 91 extern void onintr(); 92 93 char *xfile; 94 FILE *xptr; 95 96 if(signal(SIGHUP,onintr)==SIG_IGN) 97 signal(SIGHUP,SIG_IGN); 98 if(signal(SIGINT,onintr)==SIG_IGN) 99 signal(SIGINT,SIG_IGN); 100 signal(SIGPIPE,onintr); 101 signal(SIGTERM,onintr); 102 103 /* argument decoding */ 104 105 xfile = _PATH_EIGN; 106 argv++; 107 while(argc>1 && **argv == '-') { 108 switch (*++*argv){ 109 110 case 'r': 111 rflag++; 112 break; 113 case 'f': 114 sortopt = fold; 115 break; 116 117 case 'w': 118 if(argc >= 2) { 119 argc--; 120 wlen++; 121 llen = atoi(*++argv); 122 if(llen == 0) 123 diag("Wrong width:",*argv); 124 if(llen > LMAX) { 125 llen = LMAX; 126 msg("Lines truncated to 200 chars.",empty); 127 } 128 break; 129 } 130 131 case 't': 132 if(wlen == 0) 133 llen = 100; 134 break; 135 case 'g': 136 if(argc >=2) { 137 argc--; 138 gap = gutter = atoi(*++argv); 139 } 140 break; 141 142 case 'i': 143 if(only) 144 diag("Only file already given.",empty); 145 if (argc>=2){ 146 argc--; 147 ignore++; 148 xfile = *++argv; 149 } 150 break; 151 152 case 'o': 153 if(ignore) 154 diag("Ignore file already given",empty); 155 if (argc>=2){ 156 only++; 157 argc--; 158 xfile = *++argv; 159 } 160 break; 161 162 case 'b': 163 if(argc>=2) { 164 argc--; 165 bfile = *++argv; 166 } 167 break; 168 169 default: 170 msg("Illegal argument:",*argv); 171 } 172 argc--; 173 argv++; 174 } 175 176 if(argc>3) 177 diag("Too many filenames",empty); 178 else if(argc==3){ 179 infile = *argv++; 180 outfile = *argv; 181 if((outptr = fopen(outfile,"w")) == NULL) 182 diag("Cannot open output file:",outfile); 183 } else if(argc==2) { 184 infile = *argv; 185 outfile = 0; 186 } 187 188 189 /* Default breaks of blank, tab and newline */ 190 btable[' '] = SET; 191 btable['\t'] = SET; 192 btable['\n'] = SET; 193 if(bfile) { 194 if((bptr = fopen(bfile,"r")) == NULL) 195 diag("Cannot open break char file",bfile); 196 197 while((c = getc(bptr)) != EOF) 198 btable[c] = SET; 199 } 200 201 /* Allocate space for a buffer. If only or ignore file present 202 read it into buffer. Else read in default ignore file 203 and put resulting words in buffer. 204 */ 205 206 207 if((strtbufp = calloc(N,BUFSIZ)) == NULL) 208 diag("Out of memory space",empty); 209 bufp = strtbufp; 210 endbufp = strtbufp+MAX; 211 212 if((xptr = fopen(xfile,"r")) == NULL) 213 diag("Cannot open file",xfile); 214 215 while(bufp < endbufp && (c = getc(xptr)) != EOF) { 216 if(isabreak(c)) { 217 if(storeh(hash(strtbufp,bufp),strtbufp)) 218 diag("Too many words",xfile); 219 *bufp++ = '\0'; 220 strtbufp = bufp; 221 } 222 else { 223 *bufp++ = (isupper(c)?tolower(c):c); 224 } 225 } 226 if (bufp >= endbufp) 227 diag("Too many words in file",xfile); 228 endbufp = --bufp; 229 230 /* open output file for sorting */ 231 232 mktemp(sortfile); 233 if((sortptr = fopen(sortfile, "w")) == NULL) 234 diag("Cannot open output for sorting:",sortfile); 235 236 /* get a line of data and compare each word for 237 inclusion or exclusion in the sort phase 238 */ 239 240 if (infile!=0 && (inptr = fopen(infile,"r")) == NULL) 241 diag("Cannot open data: ",infile); 242 while(pend=getline()) 243 cmpline(pend); 244 fclose(sortptr); 245 246 switch (pid = fork()){ 247 248 case -1: /* cannot fork */ 249 diag("Cannot fork",empty); 250 251 case 0: /* child */ 252 execl(_PATH_SORT, "sort", sortopt, "+0", "-1", "+1", 253 sortfile, "-o", sortfile, 0); 254 255 default: /* parent */ 256 while(wait(&status) != pid); 257 } 258 259 260 getsort(); 261 unlink(sortfile); 262 exit(0); 263 } 264 265 msg(s,arg) 266 char *s; 267 char *arg; 268 { 269 fprintf(stderr,"%s %s\n",s,arg); 270 return; 271 } 272 diag(s,arg) 273 char *s, *arg; 274 { 275 276 msg(s,arg); 277 exit(1); 278 } 279 280 281 char *getline() 282 { 283 284 register c; 285 register char *linep; 286 char *endlinep; 287 288 289 endlinep= line + mlen; 290 linep = line; 291 /* Throw away leading white space */ 292 293 while(isspace(c=getc(inptr))) 294 ; 295 if(c==EOF) 296 return(0); 297 ungetc(c,inptr); 298 while(( c=getc(inptr)) != EOF) { 299 switch (c) { 300 301 case '\t': 302 if(linep<endlinep) 303 *linep++ = ' '; 304 break; 305 case '\n': 306 while(isspace(*--linep)); 307 *++linep = '\n'; 308 return(linep); 309 default: 310 if(linep < endlinep) 311 *linep++ = c; 312 } 313 } 314 return(0); 315 } 316 317 cmpline(pend) 318 char *pend; 319 { 320 321 char *pstrt, *pchar, *cp; 322 char **hp; 323 int flag; 324 325 pchar = line; 326 if(rflag) 327 while(pchar<pend&&!isspace(*pchar)) 328 pchar++; 329 while(pchar<pend){ 330 /* eliminate white space */ 331 if(isabreak(*pchar++)) 332 continue; 333 pstrt = --pchar; 334 335 flag = 1; 336 while(flag){ 337 if(isabreak(*pchar)) { 338 hp = &hasht[hash(pstrt,pchar)]; 339 pchar--; 340 while(cp = *hp++){ 341 if(hp == &hasht[MAXT]) 342 hp = hasht; 343 /* possible match */ 344 if(cmpword(pstrt,pchar,cp)){ 345 /* exact match */ 346 if(!ignore && only) 347 putline(pstrt,pend); 348 flag = 0; 349 break; 350 } 351 } 352 /* no match */ 353 if(flag){ 354 if(ignore || !only) 355 putline(pstrt,pend); 356 flag = 0; 357 } 358 } 359 pchar++; 360 } 361 } 362 } 363 364 cmpword(cpp,pend,hpp) 365 char *cpp, *pend, *hpp; 366 { 367 char c; 368 369 while(*hpp != '\0'){ 370 c = *cpp++; 371 if((isupper(c)?tolower(c):c) != *hpp++) 372 return(0); 373 } 374 if(--cpp == pend) return(1); 375 return(0); 376 } 377 378 putline(strt, end) 379 char *strt, *end; 380 { 381 char *cp; 382 383 for(cp=strt; cp<end; cp++) 384 putc(*cp, sortptr); 385 /* Add extra blank before TILDE to sort correctly 386 with -fd option */ 387 putc(' ',sortptr); 388 putc(TILDE,sortptr); 389 for (cp=line; cp<strt; cp++) 390 putc(*cp,sortptr); 391 putc('\n',sortptr); 392 } 393 394 getsort() 395 { 396 register c; 397 register char *tilde, *linep, *ref; 398 char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b; 399 int w; 400 char *rtrim(), *ltrim(); 401 402 if((sortptr = fopen(sortfile,"r")) == NULL) 403 diag("Cannot open sorted data:",sortfile); 404 405 halflen = (llen-gutter)/2; 406 linep = line; 407 while((c = getc(sortptr)) != EOF) { 408 switch(c) { 409 410 case TILDE: 411 tilde = linep; 412 break; 413 414 case '\n': 415 while(isspace(linep[-1])) 416 linep--; 417 ref = tilde; 418 if(rflag) { 419 while(ref<linep&&!isspace(*ref)) 420 ref++; 421 *ref++ = 0; 422 } 423 /* the -1 is an overly conservative test to leave 424 space for the / that signifies truncation*/ 425 p3b = rtrim(p3a=line,tilde,halflen-1); 426 if(p3b-p3a>halflen-1) 427 p3b = p3a+halflen-1; 428 p2a = ltrim(ref,p2b=linep,halflen-1); 429 if(p2b-p2a>halflen-1) 430 p2a = p2b-halflen-1; 431 p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde, 432 w=halflen-(p2b-p2a)-gap); 433 if(p1b-p1a>w) 434 p1b = p1a; 435 p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0), 436 w=halflen-(p3b-p3a)-gap); 437 if(p4b-p4a>w) 438 p4a = p4b; 439 fprintf(outptr,".xx \""); 440 putout(p1a,p1b); 441 /* tilde-1 to account for extra space before TILDE */ 442 if(p1b!=(tilde-1) && p1a!=p1b) 443 fprintf(outptr,"/"); 444 fprintf(outptr,"\" \""); 445 if(p4a==p4b && p2a!=ref && p2a!=p2b) 446 fprintf(outptr,"/"); 447 putout(p2a,p2b); 448 fprintf(outptr,"\" \""); 449 putout(p3a,p3b); 450 /* ++p3b to account for extra blank after TILDE */ 451 /* ++p3b to account for extra space before TILDE */ 452 if(p1a==p1b && ++p3b!=tilde) 453 fprintf(outptr,"/"); 454 fprintf(outptr,"\" \""); 455 if(p1a==p1b && p4a!=ref && p4a!=p4b) 456 fprintf(outptr,"/"); 457 putout(p4a,p4b); 458 if(rflag) 459 fprintf(outptr,"\" %s\n",tilde); 460 else 461 fprintf(outptr,"\"\n"); 462 linep = line; 463 break; 464 465 case '"': 466 /* put double " for " */ 467 *linep++ = c; 468 default: 469 *linep++ = c; 470 } 471 } 472 } 473 474 char *rtrim(a,c,d) 475 char *a,*c; 476 { 477 char *b,*x; 478 b = c; 479 for(x=a+1; x<=c&&x-a<=d; x++) 480 if((x==c||isspace(x[0]))&&!isspace(x[-1])) 481 b = x; 482 if(b<c&&!isspace(b[0])) 483 b++; 484 return(b); 485 } 486 487 char *ltrim(c,b,d) 488 char *c,*b; 489 { 490 char *a,*x; 491 a = c; 492 for(x=b-1; x>=c&&b-x<=d; x--) 493 if(!isspace(x[0])&&(x==c||isspace(x[-1]))) 494 a = x; 495 if(a>c&&!isspace(a[-1])) 496 a--; 497 return(a); 498 } 499 500 putout(strt,end) 501 char *strt, *end; 502 { 503 char *cp; 504 505 cp = strt; 506 507 for(cp=strt; cp<end; cp++) { 508 putc(*cp,outptr); 509 } 510 } 511 512 void 513 onintr() 514 { 515 516 unlink(sortfile); 517 exit(1); 518 } 519 520 hash(strtp,endp) 521 char *strtp, *endp; 522 { 523 char *cp, c; 524 int i, j, k; 525 526 /* Return zero hash number for single letter words */ 527 if((endp - strtp) == 1) 528 return(0); 529 530 cp = strtp; 531 c = *cp++; 532 i = (isupper(c)?tolower(c):c); 533 c = *cp; 534 j = (isupper(c)?tolower(c):c); 535 i = i*j; 536 cp = --endp; 537 c = *cp--; 538 k = (isupper(c)?tolower(c):c); 539 c = *cp; 540 j = (isupper(c)?tolower(c):c); 541 j = k*j; 542 543 k = (i ^ (j>>2)) & MASK; 544 return(k); 545 } 546 547 storeh(num,strtp) 548 int num; 549 char *strtp; 550 { 551 int i; 552 553 for(i=num; i<MAXT; i++) { 554 if(hasht[i] == 0) { 555 hasht[i] = strtp; 556 return(0); 557 } 558 } 559 for(i=0; i<num; i++) { 560 if(hasht[i] == 0) { 561 hasht[i] = strtp; 562 return(0); 563 } 564 } 565 return(1); 566 } 567