1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.proprietary.c% 6 */ 7 8 #ifndef lint 9 static char copyright[] = 10 "@(#) Copyright (c) 1991, 1993\n\ 11 The Regents of the University of California. All rights reserved.\n"; 12 #endif /* not lint */ 13 14 #ifndef lint 15 static char sccsid[] = "@(#)spell.c 8.1 (Berkeley) 06/06/93"; 16 #endif /* not lint */ 17 18 #include "spell.h" 19 #define DLEV 2 20 21 char *strcat(); 22 int strip(); 23 char *skipv(); 24 int an(); 25 int s(); 26 int es(); 27 int ily(); 28 int ncy(); 29 int CCe(); 30 int VCe(); 31 int bility(); 32 int tion(); 33 int ize(); 34 int y_to_e(); 35 int i_to_y(); 36 int nop(); 37 int metry(); 38 39 struct suftab { 40 char *suf; 41 int (*p1)(); 42 int n1; 43 char *d1; 44 char *a1; 45 int (*p2)(); 46 int n2; 47 char *d2; 48 char *a2; 49 } suftab[] = { 50 {"ssen",ily,4,"-y+iness","+ness" }, 51 {"ssel",ily,4,"-y+i+less","+less" }, 52 {"se",s,1,"","+s", es,2,"-y+ies","+es" }, 53 {"s'",s,2,"","+'s"}, 54 {"s",s,1,"","+s"}, 55 {"ecn",ncy,1,"","-t+ce"}, 56 {"ycn",ncy,1,"","-cy+t"}, 57 {"ytilb",nop,0,"",""}, 58 {"ytilib",bility,5,"-le+ility",""}, 59 {"elbaif",i_to_y,4,"-y+iable",""}, 60 {"elba",CCe,4,"-e+able","+able"}, 61 {"yti",CCe,3,"-e+ity","+ity"}, 62 {"ylb",y_to_e,1,"-e+y",""}, 63 {"yl",ily,2,"-y+ily","+ly"}, 64 {"laci",strip,2,"","+al"}, 65 {"latnem",strip,2,"","+al"}, 66 {"lanoi",strip,2,"","+al"}, 67 {"tnem",strip,4,"","+ment"}, 68 {"gni",CCe,3,"-e+ing","+ing"}, 69 {"reta",nop,0,"",""}, 70 {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"}, 71 {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"}, 72 {"citsi",strip,2,"","+ic"}, 73 {"cihparg",i_to_y,1,"-y+ic",""}, 74 {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"}, 75 {"cirtem",i_to_y,1,"-y+ic",""}, 76 {"yrtem",metry,0,"-ry+er",""}, 77 {"cigol",i_to_y,1,"-y+ic",""}, 78 {"tsigol",i_to_y,2,"-y+ist",""}, 79 {"tsi",VCe,3,"-e+ist","+ist"}, 80 {"msi",VCe,3,"-e+ism","+ist"}, 81 {"noitacif",i_to_y,6,"-y+ication",""}, 82 {"noitazi",ize,5,"-e+ation",""}, 83 {"rota",tion,2,"-e+or",""}, 84 {"noit",tion,3,"-e+ion","+ion"}, 85 {"naino",an,3,"","+ian"}, 86 {"na",an,1,"","+n"}, 87 {"evit",tion,3,"-e+ive","+ive"}, 88 {"ezi",CCe,3,"-e+ize","+ize"}, 89 {"pihs",strip,4,"","+ship"}, 90 {"dooh",ily,4,"-y+hood","+hood"}, 91 {"ekil",strip,4,"","+like"}, 92 0 93 }; 94 95 char *preftab[] = { 96 "anti", 97 "bio", 98 "dis", 99 "electro", 100 "en", 101 "fore", 102 "hyper", 103 "intra", 104 "inter", 105 "iso", 106 "kilo", 107 "magneto", 108 "meta", 109 "micro", 110 "milli", 111 "mis", 112 "mono", 113 "multi", 114 "non", 115 "out", 116 "over", 117 "photo", 118 "poly", 119 "pre", 120 "pseudo", 121 "re", 122 "semi", 123 "stereo", 124 "sub", 125 "super", 126 "thermo", 127 "ultra", 128 "under", /*must precede un*/ 129 "un", 130 0 131 }; 132 133 int vflag; 134 int xflag; 135 char word[100]; 136 char original[100]; 137 char *deriv[40]; 138 char affix[40]; 139 140 main(argc,argv) 141 int argc; 142 char **argv; 143 { 144 register char *ep, *cp; 145 register char *dp; 146 int fold; 147 int j; 148 FILE *file, *found; 149 if(!prime(argc,argv)) { 150 fprintf(stderr, 151 "spell: cannot initialize hash table\n"); 152 exit(1); 153 } 154 found = fopen(argv[2],"w"); 155 for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++) 156 switch(argv[0][1]) { 157 case 'b': 158 ise(); 159 break; 160 case 'v': 161 vflag++; 162 break; 163 case 'x': 164 xflag++; 165 break; 166 } 167 for(;; fprintf(file,"%s%s\n",affix,original)) { 168 affix[0] = 0; 169 file = found; 170 for(ep=word;(*ep=j=getchar())!='\n';ep++) 171 if(j == EOF) { 172 fclose(found); 173 exit(0); 174 } 175 for(cp=word,dp=original; cp<ep; ) 176 *dp++ = *cp++; 177 *dp = 0; 178 fold = 0; 179 for(cp=word;cp<ep;cp++) 180 if(islower(*cp)) 181 goto lcase; 182 if(putsuf(ep,".",0)) 183 continue; 184 ++fold; 185 for(cp=original+1,dp=word+1;dp<ep;dp++,cp++) 186 *dp = Tolower(*cp); 187 lcase: 188 if(putsuf(ep,".",0)||suffix(ep,0)) 189 continue; 190 if(isupper(word[0])) { 191 for(cp=original,dp=word; *dp = *cp++; dp++) 192 if (fold) *dp = Tolower(*dp); 193 word[0] = Tolower(word[0]); 194 goto lcase; 195 } 196 file = stdout; 197 } 198 } 199 200 suffix(ep,lev) 201 char *ep; 202 int lev; 203 { 204 register struct suftab *t; 205 register char *cp, *sp; 206 lev += DLEV; 207 deriv[lev] = deriv[lev-1] = 0; 208 for(t= &suftab[0];sp=t->suf;t++) { 209 cp = ep; 210 while(*sp) 211 if(*--cp!=*sp++) 212 goto next; 213 for(sp=cp; --sp>=word&&!vowel(*sp); ) ; 214 if(sp<word) 215 return(0); 216 if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1)) 217 return(1); 218 if(t->p2!=0) { 219 deriv[lev] = deriv[lev+1] = 0; 220 return((*t->p2)(ep-t->n2,t->d2,t->a2,lev)); 221 } 222 return(0); 223 next: ; 224 } 225 return(0); 226 } 227 228 nop() 229 { 230 return(0); 231 } 232 233 strip(ep,d,a,lev) 234 char *ep,*d,*a; 235 int lev; 236 { 237 return(putsuf(ep,a,lev)||suffix(ep,lev)); 238 } 239 240 s(ep,d,a,lev) 241 char *ep,*d,*a; 242 int lev; 243 { 244 if(lev>DLEV+1) 245 return(0); 246 if(*ep=='s'&&ep[-1]=='s') 247 return(0); 248 return(strip(ep,d,a,lev)); 249 } 250 251 an(ep,d,a,lev) 252 char *ep,*d,*a; 253 int lev; 254 { 255 if(!isupper(*word)) /*must be proper name*/ 256 return(0); 257 return(putsuf(ep,a,lev)); 258 } 259 260 ize(ep,d,a,lev) 261 char *ep,*d,*a; 262 int lev; 263 { 264 *ep++ = 'e'; 265 return(strip(ep,"",d,lev)); 266 } 267 268 y_to_e(ep,d,a,lev) 269 char *ep,*d,*a; 270 int lev; 271 { 272 char c = *ep; 273 *ep++ = 'e'; 274 if (strip(ep,"",d,lev)) 275 return (1); 276 ep[-1] = c; 277 return (0); 278 } 279 280 ily(ep,d,a,lev) 281 char *ep,*d,*a; 282 int lev; 283 { 284 if(ep[-1]=='i') 285 return(i_to_y(ep,d,a,lev)); 286 else 287 return(strip(ep,d,a,lev)); 288 } 289 290 ncy(ep,d,a,lev) 291 char *ep, *d, *a; 292 int lev; 293 { 294 if(skipv(skipv(ep-1))<word) 295 return(0); 296 ep[-1] = 't'; 297 return(strip(ep,d,a,lev)); 298 } 299 300 bility(ep,d,a,lev) 301 char *ep,*d,*a; 302 int lev; 303 { 304 *ep++ = 'l'; 305 return(y_to_e(ep,d,a,lev)); 306 } 307 308 i_to_y(ep,d,a,lev) 309 char *ep,*d,*a; 310 int lev; 311 { 312 if(ep[-1]=='i') { 313 ep[-1] = 'y'; 314 a = d; 315 } 316 return(strip(ep,"",a,lev)); 317 } 318 319 es(ep,d,a,lev) 320 char *ep,*d,*a; 321 int lev; 322 { 323 if(lev>DLEV) 324 return(0); 325 switch(ep[-1]) { 326 default: 327 return(0); 328 case 'i': 329 return(i_to_y(ep,d,a,lev)); 330 case 's': 331 case 'h': 332 case 'z': 333 case 'x': 334 return(strip(ep,d,a,lev)); 335 } 336 } 337 338 metry(ep,d,a,lev) 339 char *ep, *d,*a; 340 int lev; 341 { 342 ep[-2] = 'e'; 343 ep[-1] = 'r'; 344 return(strip(ep,d,a,lev)); 345 } 346 347 tion(ep,d,a,lev) 348 char *ep,*d,*a; 349 int lev; 350 { 351 switch(ep[-2]) { 352 case 'c': 353 case 'r': 354 return(putsuf(ep,a,lev)); 355 case 'a': 356 return(y_to_e(ep,d,a,lev)); 357 } 358 return(0); 359 } 360 361 /* possible consonant-consonant-e ending*/ 362 CCe(ep,d,a,lev) 363 char *ep,*d,*a; 364 int lev; 365 { 366 switch(ep[-1]) { 367 case 'l': 368 if(vowel(ep[-2])) 369 break; 370 switch(ep[-2]) { 371 case 'l': 372 case 'r': 373 case 'w': 374 break; 375 default: 376 return(y_to_e(ep,d,a,lev)); 377 } 378 break; 379 case 's': 380 if(ep[-2]=='s') 381 break; 382 case 'c': 383 case 'g': 384 if(*ep=='a') 385 return(0); 386 case 'v': 387 case 'z': 388 if(vowel(ep[-2])) 389 break; 390 case 'u': 391 if(y_to_e(ep,d,a,lev)) 392 return(1); 393 if(!(ep[-2]=='n'&&ep[-1]=='g')) 394 return(0); 395 } 396 return(VCe(ep,d,a,lev)); 397 } 398 399 /* possible consonant-vowel-consonant-e ending*/ 400 VCe(ep,d,a,lev) 401 char *ep,*d,*a; 402 int lev; 403 { 404 char c; 405 c = ep[-1]; 406 if(c=='e') 407 return(0); 408 if(!vowel(c) && vowel(ep[-2])) { 409 c = *ep; 410 *ep++ = 'e'; 411 if(putsuf(ep,d,lev)||suffix(ep,lev)) 412 return(1); 413 ep--; 414 *ep = c; 415 } 416 return(strip(ep,d,a,lev)); 417 } 418 419 char *lookuppref(wp,ep) 420 char **wp; 421 char *ep; 422 { 423 register char **sp; 424 register char *bp,*cp; 425 for(sp=preftab;*sp;sp++) { 426 bp = *wp; 427 for(cp= *sp;*cp;cp++,bp++) 428 if(Tolower(*bp)!=*cp) 429 goto next; 430 for(cp=bp;cp<ep;cp++) 431 if(vowel(*cp)) { 432 *wp = bp; 433 return(*sp); 434 } 435 next: ; 436 } 437 return(0); 438 } 439 440 putsuf(ep,a,lev) 441 char *ep,*a; 442 int lev; 443 { 444 register char *cp; 445 char *bp; 446 register char *pp; 447 int val = 0; 448 char space[20]; 449 deriv[lev] = a; 450 if(putword(word,ep,lev)) 451 return(1); 452 bp = word; 453 pp = space; 454 deriv[lev+1] = pp; 455 while(cp=lookuppref(&bp,ep)) { 456 *pp++ = '+'; 457 while(*pp = *cp++) 458 pp++; 459 if(putword(bp,ep,lev+1)) { 460 val = 1; 461 break; 462 } 463 } 464 deriv[lev+1] = deriv[lev+2] = 0; 465 return(val); 466 } 467 468 putword(bp,ep,lev) 469 char *bp,*ep; 470 int lev; 471 { 472 register i, j; 473 char duple[3]; 474 if(ep-bp<=1) 475 return(0); 476 if(vowel(*ep)) { 477 if(monosyl(bp,ep)) 478 return(0); 479 } 480 i = dict(bp,ep); 481 if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) { 482 ep--; 483 deriv[++lev] = duple; 484 duple[0] = '+'; 485 duple[1] = *ep; 486 duple[2] = 0; 487 i = dict(bp,ep); 488 } 489 if(vflag==0||i==0) 490 return(i); 491 j = lev; 492 do { 493 if(deriv[j]) 494 strcat(affix,deriv[j]); 495 } while(--j>0); 496 strcat(affix,"\t"); 497 return(i); 498 } 499 500 501 monosyl(bp,ep) 502 char *bp, *ep; 503 { 504 if(ep<bp+2) 505 return(0); 506 if(vowel(*--ep)||!vowel(*--ep) 507 ||ep[1]=='x'||ep[1]=='w') 508 return(0); 509 while(--ep>=bp) 510 if(vowel(*ep)) 511 return(0); 512 return(1); 513 } 514 515 char * 516 skipv(s) 517 char *s; 518 { 519 if(s>=word&&vowel(*s)) 520 s--; 521 while(s>=word&&!vowel(*s)) 522 s--; 523 return(s); 524 } 525 526 vowel(c) 527 int c; 528 { 529 switch(Tolower(c)) { 530 case 'a': 531 case 'e': 532 case 'i': 533 case 'o': 534 case 'u': 535 case 'y': 536 return(1); 537 } 538 return(0); 539 } 540 541 /* crummy way to Britishise */ 542 ise() 543 { 544 register struct suftab *p; 545 for(p = suftab;p->suf;p++) { 546 ztos(p->suf); 547 ztos(p->d1); 548 ztos(p->a1); 549 } 550 } 551 ztos(s) 552 char *s; 553 { 554 for(;*s;s++) 555 if(*s=='z') 556 *s = 's'; 557 } 558 559 dict(bp,ep) 560 char *bp, *ep; 561 { 562 register char *wp; 563 long h; 564 register long *lp; 565 register i; 566 if(xflag) 567 printf("=%.*s\n",ep-bp,bp); 568 for(i=0; i<NP; i++) { 569 for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp) 570 h += *wp * *lp; 571 h += '\n' * *lp; 572 h %= p[i]; 573 if(get(h)==0) 574 return(0); 575 } 576 return(1); 577 } 578