1 #ifndef lint 2 static char sccsid[] = "@(#)spell.c 4.3 05/15/90"; 3 #endif 4 5 #include "spell.h" 6 #define DLEV 2 7 8 char *strcat(); 9 int strip(); 10 char *skipv(); 11 int an(); 12 int s(); 13 int es(); 14 int ily(); 15 int ncy(); 16 int CCe(); 17 int VCe(); 18 int bility(); 19 int tion(); 20 int ize(); 21 int y_to_e(); 22 int i_to_y(); 23 int nop(); 24 int metry(); 25 26 struct suftab { 27 char *suf; 28 int (*p1)(); 29 int n1; 30 char *d1; 31 char *a1; 32 int (*p2)(); 33 int n2; 34 char *d2; 35 char *a2; 36 } suftab[] = { 37 {"ssen",ily,4,"-y+iness","+ness" }, 38 {"ssel",ily,4,"-y+i+less","+less" }, 39 {"se",s,1,"","+s", es,2,"-y+ies","+es" }, 40 {"s'",s,2,"","+'s"}, 41 {"s",s,1,"","+s"}, 42 {"ecn",ncy,1,"","-t+ce"}, 43 {"ycn",ncy,1,"","-cy+t"}, 44 {"ytilb",nop,0,"",""}, 45 {"ytilib",bility,5,"-le+ility",""}, 46 {"elbaif",i_to_y,4,"-y+iable",""}, 47 {"elba",CCe,4,"-e+able","+able"}, 48 {"yti",CCe,3,"-e+ity","+ity"}, 49 {"ylb",y_to_e,1,"-e+y",""}, 50 {"yl",ily,2,"-y+ily","+ly"}, 51 {"laci",strip,2,"","+al"}, 52 {"latnem",strip,2,"","+al"}, 53 {"lanoi",strip,2,"","+al"}, 54 {"tnem",strip,4,"","+ment"}, 55 {"gni",CCe,3,"-e+ing","+ing"}, 56 {"reta",nop,0,"",""}, 57 {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"}, 58 {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"}, 59 {"citsi",strip,2,"","+ic"}, 60 {"cihparg",i_to_y,1,"-y+ic",""}, 61 {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"}, 62 {"cirtem",i_to_y,1,"-y+ic",""}, 63 {"yrtem",metry,0,"-ry+er",""}, 64 {"cigol",i_to_y,1,"-y+ic",""}, 65 {"tsigol",i_to_y,2,"-y+ist",""}, 66 {"tsi",VCe,3,"-e+ist","+ist"}, 67 {"msi",VCe,3,"-e+ism","+ist"}, 68 {"noitacif",i_to_y,6,"-y+ication",""}, 69 {"noitazi",ize,5,"-e+ation",""}, 70 {"rota",tion,2,"-e+or",""}, 71 {"noit",tion,3,"-e+ion","+ion"}, 72 {"naino",an,3,"","+ian"}, 73 {"na",an,1,"","+n"}, 74 {"evit",tion,3,"-e+ive","+ive"}, 75 {"ezi",CCe,3,"-e+ize","+ize"}, 76 {"pihs",strip,4,"","+ship"}, 77 {"dooh",ily,4,"-y+hood","+hood"}, 78 {"ekil",strip,4,"","+like"}, 79 0 80 }; 81 82 char *preftab[] = { 83 "anti", 84 "bio", 85 "dis", 86 "electro", 87 "en", 88 "fore", 89 "hyper", 90 "intra", 91 "inter", 92 "iso", 93 "kilo", 94 "magneto", 95 "meta", 96 "micro", 97 "milli", 98 "mis", 99 "mono", 100 "multi", 101 "non", 102 "out", 103 "over", 104 "photo", 105 "poly", 106 "pre", 107 "pseudo", 108 "re", 109 "semi", 110 "stereo", 111 "sub", 112 "super", 113 "thermo", 114 "ultra", 115 "under", /*must precede un*/ 116 "un", 117 0 118 }; 119 120 int vflag; 121 int xflag; 122 char word[100]; 123 char original[100]; 124 char *deriv[40]; 125 char affix[40]; 126 127 main(argc,argv) 128 char **argv; 129 { 130 register char *ep, *cp; 131 register char *dp; 132 int fold; 133 int j; 134 FILE *file, *found; 135 if(!prime(argc,argv)) { 136 fprintf(stderr, 137 "spell: cannot initialize hash table\n"); 138 exit(1); 139 } 140 found = fopen(argv[2],"w"); 141 for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++) 142 switch(argv[0][1]) { 143 case 'b': 144 ise(); 145 break; 146 case 'v': 147 vflag++; 148 break; 149 case 'x': 150 xflag++; 151 break; 152 } 153 for(;; fprintf(file,"%s%s\n",affix,original)) { 154 affix[0] = 0; 155 file = found; 156 for(ep=word;(*ep=j=getchar())!='\n';ep++) 157 if(j == EOF) { 158 fclose(found); 159 exit(0); 160 } 161 for(cp=word,dp=original; cp<ep; ) 162 *dp++ = *cp++; 163 *dp = 0; 164 fold = 0; 165 for(cp=word;cp<ep;cp++) 166 if(islower(*cp)) 167 goto lcase; 168 if(putsuf(ep,".",0)) 169 continue; 170 ++fold; 171 for(cp=original+1,dp=word+1;dp<ep;dp++,cp++) 172 *dp = Tolower(*cp); 173 lcase: 174 if(putsuf(ep,".",0)||suffix(ep,0)) 175 continue; 176 if(isupper(word[0])) { 177 for(cp=original,dp=word; *dp = *cp++; dp++) 178 if (fold) *dp = Tolower(*dp); 179 word[0] = Tolower(word[0]); 180 goto lcase; 181 } 182 file = stdout; 183 } 184 } 185 186 suffix(ep,lev) 187 char *ep; 188 { 189 register struct suftab *t; 190 register char *cp, *sp; 191 lev += DLEV; 192 deriv[lev] = deriv[lev-1] = 0; 193 for(t= &suftab[0];sp=t->suf;t++) { 194 cp = ep; 195 while(*sp) 196 if(*--cp!=*sp++) 197 goto next; 198 for(sp=cp; --sp>=word&&!vowel(*sp); ) ; 199 if(sp<word) 200 return(0); 201 if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1)) 202 return(1); 203 if(t->p2!=0) { 204 deriv[lev] = deriv[lev+1] = 0; 205 return((*t->p2)(ep-t->n2,t->d2,t->a2,lev)); 206 } 207 return(0); 208 next: ; 209 } 210 return(0); 211 } 212 213 nop() 214 { 215 return(0); 216 } 217 218 strip(ep,d,a,lev) 219 char *ep,*d,*a; 220 { 221 return(putsuf(ep,a,lev)||suffix(ep,lev)); 222 } 223 224 s(ep,d,a,lev) 225 char *ep,*d,*a; 226 { 227 if(lev>DLEV+1) 228 return(0); 229 if(*ep=='s'&&ep[-1]=='s') 230 return(0); 231 return(strip(ep,d,a,lev)); 232 } 233 234 an(ep,d,a,lev) 235 char *ep,*d,*a; 236 { 237 if(!isupper(*word)) /*must be proper name*/ 238 return(0); 239 return(putsuf(ep,a,lev)); 240 } 241 242 ize(ep,d,a,lev) 243 char *ep,*d,*a; 244 { 245 *ep++ = 'e'; 246 return(strip(ep,"",d,lev)); 247 } 248 249 y_to_e(ep,d,a,lev) 250 char *ep,*d,*a; 251 { 252 char c = *ep; 253 *ep++ = 'e'; 254 if (strip(ep,"",d,lev)) 255 return (1); 256 ep[-1] = c; 257 return (0); 258 } 259 260 ily(ep,d,a,lev) 261 char *ep,*d,*a; 262 { 263 if(ep[-1]=='i') 264 return(i_to_y(ep,d,a,lev)); 265 else 266 return(strip(ep,d,a,lev)); 267 } 268 269 ncy(ep,d,a,lev) 270 char *ep, *d, *a; 271 { 272 if(skipv(skipv(ep-1))<word) 273 return(0); 274 ep[-1] = 't'; 275 return(strip(ep,d,a,lev)); 276 } 277 278 bility(ep,d,a,lev) 279 char *ep,*d,*a; 280 { 281 *ep++ = 'l'; 282 return(y_to_e(ep,d,a,lev)); 283 } 284 285 i_to_y(ep,d,a,lev) 286 char *ep,*d,*a; 287 { 288 if(ep[-1]=='i') { 289 ep[-1] = 'y'; 290 a = d; 291 } 292 return(strip(ep,"",a,lev)); 293 } 294 295 es(ep,d,a,lev) 296 char *ep,*d,*a; 297 { 298 if(lev>DLEV) 299 return(0); 300 switch(ep[-1]) { 301 default: 302 return(0); 303 case 'i': 304 return(i_to_y(ep,d,a,lev)); 305 case 's': 306 case 'h': 307 case 'z': 308 case 'x': 309 return(strip(ep,d,a,lev)); 310 } 311 } 312 313 metry(ep,d,a,lev) 314 char *ep, *d,*a; 315 { 316 ep[-2] = 'e'; 317 ep[-1] = 'r'; 318 return(strip(ep,d,a,lev)); 319 } 320 321 tion(ep,d,a,lev) 322 char *ep,*d,*a; 323 { 324 switch(ep[-2]) { 325 case 'c': 326 case 'r': 327 return(putsuf(ep,a,lev)); 328 case 'a': 329 return(y_to_e(ep,d,a,lev)); 330 } 331 return(0); 332 } 333 334 /* possible consonant-consonant-e ending*/ 335 CCe(ep,d,a,lev) 336 char *ep,*d,*a; 337 { 338 switch(ep[-1]) { 339 case 'l': 340 if(vowel(ep[-2])) 341 break; 342 switch(ep[-2]) { 343 case 'l': 344 case 'r': 345 case 'w': 346 break; 347 default: 348 return(y_to_e(ep,d,a,lev)); 349 } 350 break; 351 case 's': 352 if(ep[-2]=='s') 353 break; 354 case 'c': 355 case 'g': 356 if(*ep=='a') 357 return(0); 358 case 'v': 359 case 'z': 360 if(vowel(ep[-2])) 361 break; 362 case 'u': 363 if(y_to_e(ep,d,a,lev)) 364 return(1); 365 if(!(ep[-2]=='n'&&ep[-1]=='g')) 366 return(0); 367 } 368 return(VCe(ep,d,a,lev)); 369 } 370 371 /* possible consonant-vowel-consonant-e ending*/ 372 VCe(ep,d,a,lev) 373 char *ep,*d,*a; 374 { 375 char c; 376 c = ep[-1]; 377 if(c=='e') 378 return(0); 379 if(!vowel(c) && vowel(ep[-2])) { 380 c = *ep; 381 *ep++ = 'e'; 382 if(putsuf(ep,d,lev)||suffix(ep,lev)) 383 return(1); 384 ep--; 385 *ep = c; 386 } 387 return(strip(ep,d,a,lev)); 388 } 389 390 char *lookuppref(wp,ep) 391 char **wp; 392 char *ep; 393 { 394 register char **sp; 395 register char *bp,*cp; 396 for(sp=preftab;*sp;sp++) { 397 bp = *wp; 398 for(cp= *sp;*cp;cp++,bp++) 399 if(Tolower(*bp)!=*cp) 400 goto next; 401 for(cp=bp;cp<ep;cp++) 402 if(vowel(*cp)) { 403 *wp = bp; 404 return(*sp); 405 } 406 next: ; 407 } 408 return(0); 409 } 410 411 putsuf(ep,a,lev) 412 char *ep,*a; 413 { 414 register char *cp; 415 char *bp; 416 register char *pp; 417 int val = 0; 418 char space[20]; 419 deriv[lev] = a; 420 if(putw(word,ep,lev)) 421 return(1); 422 bp = word; 423 pp = space; 424 deriv[lev+1] = pp; 425 while(cp=lookuppref(&bp,ep)) { 426 *pp++ = '+'; 427 while(*pp = *cp++) 428 pp++; 429 if(putw(bp,ep,lev+1)) { 430 val = 1; 431 break; 432 } 433 } 434 deriv[lev+1] = deriv[lev+2] = 0; 435 return(val); 436 } 437 438 putw(bp,ep,lev) 439 char *bp,*ep; 440 { 441 register i, j; 442 char duple[3]; 443 if(ep-bp<=1) 444 return(0); 445 if(vowel(*ep)) { 446 if(monosyl(bp,ep)) 447 return(0); 448 } 449 i = dict(bp,ep); 450 if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) { 451 ep--; 452 deriv[++lev] = duple; 453 duple[0] = '+'; 454 duple[1] = *ep; 455 duple[2] = 0; 456 i = dict(bp,ep); 457 } 458 if(vflag==0||i==0) 459 return(i); 460 j = lev; 461 do { 462 if(deriv[j]) 463 strcat(affix,deriv[j]); 464 } while(--j>0); 465 strcat(affix,"\t"); 466 return(i); 467 } 468 469 470 monosyl(bp,ep) 471 char *bp, *ep; 472 { 473 if(ep<bp+2) 474 return(0); 475 if(vowel(*--ep)||!vowel(*--ep) 476 ||ep[1]=='x'||ep[1]=='w') 477 return(0); 478 while(--ep>=bp) 479 if(vowel(*ep)) 480 return(0); 481 return(1); 482 } 483 484 char * 485 skipv(s) 486 char *s; 487 { 488 if(s>=word&&vowel(*s)) 489 s--; 490 while(s>=word&&!vowel(*s)) 491 s--; 492 return(s); 493 } 494 495 vowel(c) 496 { 497 switch(Tolower(c)) { 498 case 'a': 499 case 'e': 500 case 'i': 501 case 'o': 502 case 'u': 503 case 'y': 504 return(1); 505 } 506 return(0); 507 } 508 509 /* crummy way to Britishise */ 510 ise() 511 { 512 register struct suftab *p; 513 for(p = suftab;p->suf;p++) { 514 ztos(p->suf); 515 ztos(p->d1); 516 ztos(p->a1); 517 } 518 } 519 ztos(s) 520 char *s; 521 { 522 for(;*s;s++) 523 if(*s=='z') 524 *s = 's'; 525 } 526 527 dict(bp,ep) 528 char *bp, *ep; 529 { 530 register char *wp; 531 long h; 532 register long *lp; 533 register i; 534 if(xflag) 535 printf("=%.*s\n",ep-bp,bp); 536 for(i=0; i<NP; i++) { 537 for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp) 538 h += *wp * *lp; 539 h += '\n' * *lp; 540 h %= p[i]; 541 if(get(h)==0) 542 return(0); 543 } 544 return(1); 545 } 546