1 #ifndef lint 2 static char sccsid[] = "@(#)spell.c 4.4 02/02/91"; 3 #endif 4 5 #include "spell.h" 6 #define DLEV 2 7 8 char *strcat(); 9 int strip(); 10 char *skipv(); 11 int an(); 12 int s(); 13 int es(); 14 int ily(); 15 int ncy(); 16 int CCe(); 17 int VCe(); 18 int bility(); 19 int tion(); 20 int ize(); 21 int y_to_e(); 22 int i_to_y(); 23 int nop(); 24 int metry(); 25 26 struct suftab { 27 char *suf; 28 int (*p1)(); 29 int n1; 30 char *d1; 31 char *a1; 32 int (*p2)(); 33 int n2; 34 char *d2; 35 char *a2; 36 } suftab[] = { 37 {"ssen",ily,4,"-y+iness","+ness" }, 38 {"ssel",ily,4,"-y+i+less","+less" }, 39 {"se",s,1,"","+s", es,2,"-y+ies","+es" }, 40 {"s'",s,2,"","+'s"}, 41 {"s",s,1,"","+s"}, 42 {"ecn",ncy,1,"","-t+ce"}, 43 {"ycn",ncy,1,"","-cy+t"}, 44 {"ytilb",nop,0,"",""}, 45 {"ytilib",bility,5,"-le+ility",""}, 46 {"elbaif",i_to_y,4,"-y+iable",""}, 47 {"elba",CCe,4,"-e+able","+able"}, 48 {"yti",CCe,3,"-e+ity","+ity"}, 49 {"ylb",y_to_e,1,"-e+y",""}, 50 {"yl",ily,2,"-y+ily","+ly"}, 51 {"laci",strip,2,"","+al"}, 52 {"latnem",strip,2,"","+al"}, 53 {"lanoi",strip,2,"","+al"}, 54 {"tnem",strip,4,"","+ment"}, 55 {"gni",CCe,3,"-e+ing","+ing"}, 56 {"reta",nop,0,"",""}, 57 {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"}, 58 {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"}, 59 {"citsi",strip,2,"","+ic"}, 60 {"cihparg",i_to_y,1,"-y+ic",""}, 61 {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"}, 62 {"cirtem",i_to_y,1,"-y+ic",""}, 63 {"yrtem",metry,0,"-ry+er",""}, 64 {"cigol",i_to_y,1,"-y+ic",""}, 65 {"tsigol",i_to_y,2,"-y+ist",""}, 66 {"tsi",VCe,3,"-e+ist","+ist"}, 67 {"msi",VCe,3,"-e+ism","+ist"}, 68 {"noitacif",i_to_y,6,"-y+ication",""}, 69 {"noitazi",ize,5,"-e+ation",""}, 70 {"rota",tion,2,"-e+or",""}, 71 {"noit",tion,3,"-e+ion","+ion"}, 72 {"naino",an,3,"","+ian"}, 73 {"na",an,1,"","+n"}, 74 {"evit",tion,3,"-e+ive","+ive"}, 75 {"ezi",CCe,3,"-e+ize","+ize"}, 76 {"pihs",strip,4,"","+ship"}, 77 {"dooh",ily,4,"-y+hood","+hood"}, 78 {"ekil",strip,4,"","+like"}, 79 0 80 }; 81 82 char *preftab[] = { 83 "anti", 84 "bio", 85 "dis", 86 "electro", 87 "en", 88 "fore", 89 "hyper", 90 "intra", 91 "inter", 92 "iso", 93 "kilo", 94 "magneto", 95 "meta", 96 "micro", 97 "milli", 98 "mis", 99 "mono", 100 "multi", 101 "non", 102 "out", 103 "over", 104 "photo", 105 "poly", 106 "pre", 107 "pseudo", 108 "re", 109 "semi", 110 "stereo", 111 "sub", 112 "super", 113 "thermo", 114 "ultra", 115 "under", /*must precede un*/ 116 "un", 117 0 118 }; 119 120 int vflag; 121 int xflag; 122 char word[100]; 123 char original[100]; 124 char *deriv[40]; 125 char affix[40]; 126 127 main(argc,argv) 128 int argc; 129 char **argv; 130 { 131 register char *ep, *cp; 132 register char *dp; 133 int fold; 134 int j; 135 FILE *file, *found; 136 if(!prime(argc,argv)) { 137 fprintf(stderr, 138 "spell: cannot initialize hash table\n"); 139 exit(1); 140 } 141 found = fopen(argv[2],"w"); 142 for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++) 143 switch(argv[0][1]) { 144 case 'b': 145 ise(); 146 break; 147 case 'v': 148 vflag++; 149 break; 150 case 'x': 151 xflag++; 152 break; 153 } 154 for(;; fprintf(file,"%s%s\n",affix,original)) { 155 affix[0] = 0; 156 file = found; 157 for(ep=word;(*ep=j=getchar())!='\n';ep++) 158 if(j == EOF) { 159 fclose(found); 160 exit(0); 161 } 162 for(cp=word,dp=original; cp<ep; ) 163 *dp++ = *cp++; 164 *dp = 0; 165 fold = 0; 166 for(cp=word;cp<ep;cp++) 167 if(islower(*cp)) 168 goto lcase; 169 if(putsuf(ep,".",0)) 170 continue; 171 ++fold; 172 for(cp=original+1,dp=word+1;dp<ep;dp++,cp++) 173 *dp = Tolower(*cp); 174 lcase: 175 if(putsuf(ep,".",0)||suffix(ep,0)) 176 continue; 177 if(isupper(word[0])) { 178 for(cp=original,dp=word; *dp = *cp++; dp++) 179 if (fold) *dp = Tolower(*dp); 180 word[0] = Tolower(word[0]); 181 goto lcase; 182 } 183 file = stdout; 184 } 185 } 186 187 suffix(ep,lev) 188 char *ep; 189 int lev; 190 { 191 register struct suftab *t; 192 register char *cp, *sp; 193 lev += DLEV; 194 deriv[lev] = deriv[lev-1] = 0; 195 for(t= &suftab[0];sp=t->suf;t++) { 196 cp = ep; 197 while(*sp) 198 if(*--cp!=*sp++) 199 goto next; 200 for(sp=cp; --sp>=word&&!vowel(*sp); ) ; 201 if(sp<word) 202 return(0); 203 if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1)) 204 return(1); 205 if(t->p2!=0) { 206 deriv[lev] = deriv[lev+1] = 0; 207 return((*t->p2)(ep-t->n2,t->d2,t->a2,lev)); 208 } 209 return(0); 210 next: ; 211 } 212 return(0); 213 } 214 215 nop() 216 { 217 return(0); 218 } 219 220 strip(ep,d,a,lev) 221 char *ep,*d,*a; 222 int lev; 223 { 224 return(putsuf(ep,a,lev)||suffix(ep,lev)); 225 } 226 227 s(ep,d,a,lev) 228 char *ep,*d,*a; 229 int lev; 230 { 231 if(lev>DLEV+1) 232 return(0); 233 if(*ep=='s'&&ep[-1]=='s') 234 return(0); 235 return(strip(ep,d,a,lev)); 236 } 237 238 an(ep,d,a,lev) 239 char *ep,*d,*a; 240 int lev; 241 { 242 if(!isupper(*word)) /*must be proper name*/ 243 return(0); 244 return(putsuf(ep,a,lev)); 245 } 246 247 ize(ep,d,a,lev) 248 char *ep,*d,*a; 249 int lev; 250 { 251 *ep++ = 'e'; 252 return(strip(ep,"",d,lev)); 253 } 254 255 y_to_e(ep,d,a,lev) 256 char *ep,*d,*a; 257 int lev; 258 { 259 char c = *ep; 260 *ep++ = 'e'; 261 if (strip(ep,"",d,lev)) 262 return (1); 263 ep[-1] = c; 264 return (0); 265 } 266 267 ily(ep,d,a,lev) 268 char *ep,*d,*a; 269 int lev; 270 { 271 if(ep[-1]=='i') 272 return(i_to_y(ep,d,a,lev)); 273 else 274 return(strip(ep,d,a,lev)); 275 } 276 277 ncy(ep,d,a,lev) 278 char *ep, *d, *a; 279 int lev; 280 { 281 if(skipv(skipv(ep-1))<word) 282 return(0); 283 ep[-1] = 't'; 284 return(strip(ep,d,a,lev)); 285 } 286 287 bility(ep,d,a,lev) 288 char *ep,*d,*a; 289 int lev; 290 { 291 *ep++ = 'l'; 292 return(y_to_e(ep,d,a,lev)); 293 } 294 295 i_to_y(ep,d,a,lev) 296 char *ep,*d,*a; 297 int lev; 298 { 299 if(ep[-1]=='i') { 300 ep[-1] = 'y'; 301 a = d; 302 } 303 return(strip(ep,"",a,lev)); 304 } 305 306 es(ep,d,a,lev) 307 char *ep,*d,*a; 308 int lev; 309 { 310 if(lev>DLEV) 311 return(0); 312 switch(ep[-1]) { 313 default: 314 return(0); 315 case 'i': 316 return(i_to_y(ep,d,a,lev)); 317 case 's': 318 case 'h': 319 case 'z': 320 case 'x': 321 return(strip(ep,d,a,lev)); 322 } 323 } 324 325 metry(ep,d,a,lev) 326 char *ep, *d,*a; 327 int lev; 328 { 329 ep[-2] = 'e'; 330 ep[-1] = 'r'; 331 return(strip(ep,d,a,lev)); 332 } 333 334 tion(ep,d,a,lev) 335 char *ep,*d,*a; 336 int lev; 337 { 338 switch(ep[-2]) { 339 case 'c': 340 case 'r': 341 return(putsuf(ep,a,lev)); 342 case 'a': 343 return(y_to_e(ep,d,a,lev)); 344 } 345 return(0); 346 } 347 348 /* possible consonant-consonant-e ending*/ 349 CCe(ep,d,a,lev) 350 char *ep,*d,*a; 351 int lev; 352 { 353 switch(ep[-1]) { 354 case 'l': 355 if(vowel(ep[-2])) 356 break; 357 switch(ep[-2]) { 358 case 'l': 359 case 'r': 360 case 'w': 361 break; 362 default: 363 return(y_to_e(ep,d,a,lev)); 364 } 365 break; 366 case 's': 367 if(ep[-2]=='s') 368 break; 369 case 'c': 370 case 'g': 371 if(*ep=='a') 372 return(0); 373 case 'v': 374 case 'z': 375 if(vowel(ep[-2])) 376 break; 377 case 'u': 378 if(y_to_e(ep,d,a,lev)) 379 return(1); 380 if(!(ep[-2]=='n'&&ep[-1]=='g')) 381 return(0); 382 } 383 return(VCe(ep,d,a,lev)); 384 } 385 386 /* possible consonant-vowel-consonant-e ending*/ 387 VCe(ep,d,a,lev) 388 char *ep,*d,*a; 389 int lev; 390 { 391 char c; 392 c = ep[-1]; 393 if(c=='e') 394 return(0); 395 if(!vowel(c) && vowel(ep[-2])) { 396 c = *ep; 397 *ep++ = 'e'; 398 if(putsuf(ep,d,lev)||suffix(ep,lev)) 399 return(1); 400 ep--; 401 *ep = c; 402 } 403 return(strip(ep,d,a,lev)); 404 } 405 406 char *lookuppref(wp,ep) 407 char **wp; 408 char *ep; 409 { 410 register char **sp; 411 register char *bp,*cp; 412 for(sp=preftab;*sp;sp++) { 413 bp = *wp; 414 for(cp= *sp;*cp;cp++,bp++) 415 if(Tolower(*bp)!=*cp) 416 goto next; 417 for(cp=bp;cp<ep;cp++) 418 if(vowel(*cp)) { 419 *wp = bp; 420 return(*sp); 421 } 422 next: ; 423 } 424 return(0); 425 } 426 427 putsuf(ep,a,lev) 428 char *ep,*a; 429 int lev; 430 { 431 register char *cp; 432 char *bp; 433 register char *pp; 434 int val = 0; 435 char space[20]; 436 deriv[lev] = a; 437 if(putword(word,ep,lev)) 438 return(1); 439 bp = word; 440 pp = space; 441 deriv[lev+1] = pp; 442 while(cp=lookuppref(&bp,ep)) { 443 *pp++ = '+'; 444 while(*pp = *cp++) 445 pp++; 446 if(putword(bp,ep,lev+1)) { 447 val = 1; 448 break; 449 } 450 } 451 deriv[lev+1] = deriv[lev+2] = 0; 452 return(val); 453 } 454 455 putword(bp,ep,lev) 456 char *bp,*ep; 457 int lev; 458 { 459 register i, j; 460 char duple[3]; 461 if(ep-bp<=1) 462 return(0); 463 if(vowel(*ep)) { 464 if(monosyl(bp,ep)) 465 return(0); 466 } 467 i = dict(bp,ep); 468 if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) { 469 ep--; 470 deriv[++lev] = duple; 471 duple[0] = '+'; 472 duple[1] = *ep; 473 duple[2] = 0; 474 i = dict(bp,ep); 475 } 476 if(vflag==0||i==0) 477 return(i); 478 j = lev; 479 do { 480 if(deriv[j]) 481 strcat(affix,deriv[j]); 482 } while(--j>0); 483 strcat(affix,"\t"); 484 return(i); 485 } 486 487 488 monosyl(bp,ep) 489 char *bp, *ep; 490 { 491 if(ep<bp+2) 492 return(0); 493 if(vowel(*--ep)||!vowel(*--ep) 494 ||ep[1]=='x'||ep[1]=='w') 495 return(0); 496 while(--ep>=bp) 497 if(vowel(*ep)) 498 return(0); 499 return(1); 500 } 501 502 char * 503 skipv(s) 504 char *s; 505 { 506 if(s>=word&&vowel(*s)) 507 s--; 508 while(s>=word&&!vowel(*s)) 509 s--; 510 return(s); 511 } 512 513 vowel(c) 514 int c; 515 { 516 switch(Tolower(c)) { 517 case 'a': 518 case 'e': 519 case 'i': 520 case 'o': 521 case 'u': 522 case 'y': 523 return(1); 524 } 525 return(0); 526 } 527 528 /* crummy way to Britishise */ 529 ise() 530 { 531 register struct suftab *p; 532 for(p = suftab;p->suf;p++) { 533 ztos(p->suf); 534 ztos(p->d1); 535 ztos(p->a1); 536 } 537 } 538 ztos(s) 539 char *s; 540 { 541 for(;*s;s++) 542 if(*s=='z') 543 *s = 's'; 544 } 545 546 dict(bp,ep) 547 char *bp, *ep; 548 { 549 register char *wp; 550 long h; 551 register long *lp; 552 register i; 553 if(xflag) 554 printf("=%.*s\n",ep-bp,bp); 555 for(i=0; i<NP; i++) { 556 for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp) 557 h += *wp * *lp; 558 h += '\n' * *lp; 559 h %= p[i]; 560 if(get(h)==0) 561 return(0); 562 } 563 return(1); 564 } 565