1 #ifndef lint 2 static char sccsid[] = "@(#)spell.c 4.1 12/18/82"; 3 #endif 4 5 #include "spell.h" 6 #define DLEV 2 7 8 char *strcat(); 9 int strip(); 10 char *skipv(); 11 int an(); 12 int s(); 13 int es(); 14 int ily(); 15 int ncy(); 16 int CCe(); 17 int VCe(); 18 int bility(); 19 int tion(); 20 int ize(); 21 int y_to_e(); 22 int i_to_y(); 23 int nop(); 24 int metry(); 25 26 struct suftab { 27 char *suf; 28 int (*p1)(); 29 int n1; 30 char *d1; 31 char *a1; 32 int (*p2)(); 33 int n2; 34 char *d2; 35 char *a2; 36 } suftab[] = { 37 {"ssen",ily,4,"-y+iness","+ness" }, 38 {"ssel",ily,4,"-y+i+less","+less" }, 39 {"se",s,1,"","+s", es,2,"-y+ies","+es" }, 40 {"s'",s,2,"","+'s"}, 41 {"s",s,1,"","+s"}, 42 {"ecn",ncy,1,"","-t+ce"}, 43 {"ycn",ncy,1,"","-cy+t"}, 44 {"ytilb",nop,0,"",""}, 45 {"ytilib",bility,5,"-le+ility",""}, 46 {"elbaif",i_to_y,4,"-y+iable",""}, 47 {"elba",CCe,4,"-e+able","+able"}, 48 {"yti",CCe,3,"-e+ity","+ity"}, 49 {"ylb",y_to_e,1,"-e+y",""}, 50 {"yl",ily,2,"-y+ily","+ly"}, 51 {"laci",strip,2,"","+al"}, 52 {"latnem",strip,2,"","+al"}, 53 {"lanoi",strip,2,"","+al"}, 54 {"tnem",strip,4,"","+ment"}, 55 {"gni",CCe,3,"-e+ing","+ing"}, 56 {"reta",nop,0,"",""}, 57 {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"}, 58 {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"}, 59 {"citsi",strip,2,"","+ic"}, 60 {"cihparg",i_to_y,1,"-y+ic",""}, 61 {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"}, 62 {"cirtem",i_to_y,1,"-y+ic",""}, 63 {"yrtem",metry,0,"-ry+er",""}, 64 {"cigol",i_to_y,1,"-y+ic",""}, 65 {"tsigol",i_to_y,2,"-y+ist",""}, 66 {"tsi",VCe,3,"-e+ist","+ist"}, 67 {"msi",VCe,3,"-e+ism","+ist"}, 68 {"noitacif",i_to_y,6,"-y+ication",""}, 69 {"noitazi",ize,5,"-e+ation",""}, 70 {"rota",tion,2,"-e+or",""}, 71 {"noit",tion,3,"-e+ion","+ion"}, 72 {"naino",an,3,"","+ian"}, 73 {"na",an,1,"","+n"}, 74 {"evit",tion,3,"-e+ive","+ive"}, 75 {"ezi",CCe,3,"-e+ize","+ize"}, 76 {"pihs",strip,4,"","+ship"}, 77 {"dooh",ily,4,"-y+hood","+hood"}, 78 {"ekil",strip,4,"","+like"}, 79 0 80 }; 81 82 char *preftab[] = { 83 "anti", 84 "bio", 85 "dis", 86 "electro", 87 "en", 88 "fore", 89 "hyper", 90 "intra", 91 "inter", 92 "iso", 93 "kilo", 94 "magneto", 95 "meta", 96 "micro", 97 "milli", 98 "mis", 99 "mono", 100 "multi", 101 "non", 102 "out", 103 "over", 104 "photo", 105 "poly", 106 "pre", 107 "pseudo", 108 "re", 109 "semi", 110 "stereo", 111 "sub", 112 "super", 113 "thermo", 114 "ultra", 115 "under", /*must precede un*/ 116 "un", 117 0 118 }; 119 120 int vflag; 121 int xflag; 122 char word[100]; 123 char original[100]; 124 char *deriv[40]; 125 char affix[40]; 126 127 main(argc,argv) 128 char **argv; 129 { 130 register char *ep, *cp; 131 register char *dp; 132 int fold; 133 int j; 134 FILE *file, *found; 135 if(!prime(argc,argv)) { 136 fprintf(stderr, 137 "spell: cannot initialize hash table\n"); 138 exit(1); 139 } 140 found = fopen(argv[2],"w"); 141 for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++) 142 switch(argv[0][1]) { 143 case 'b': 144 ise(); 145 break; 146 case 'v': 147 vflag++; 148 break; 149 case 'x': 150 xflag++; 151 break; 152 } 153 for(;; fprintf(file,"%s%s\n",affix,original)) { 154 affix[0] = 0; 155 file = found; 156 for(ep=word;(*ep=j=getchar())!='\n';ep++) 157 if(j == EOF) 158 exit(0); 159 for(cp=word,dp=original; cp<ep; ) 160 *dp++ = *cp++; 161 *dp = 0; 162 fold = 0; 163 for(cp=word;cp<ep;cp++) 164 if(islower(*cp)) 165 goto lcase; 166 if(putsuf(ep,".",0)) 167 continue; 168 ++fold; 169 for(cp=original+1,dp=word+1;dp<ep;dp++,cp++) 170 *dp = Tolower(*cp); 171 lcase: 172 if(putsuf(ep,".",0)||suffix(ep,0)) 173 continue; 174 if(isupper(word[0])) { 175 for(cp=original,dp=word; *dp = *cp++; dp++) 176 if (fold) *dp = Tolower(*dp); 177 word[0] = Tolower(word[0]); 178 goto lcase; 179 } 180 file = stdout; 181 } 182 } 183 184 suffix(ep,lev) 185 char *ep; 186 { 187 register struct suftab *t; 188 register char *cp, *sp; 189 lev += DLEV; 190 deriv[lev] = deriv[lev-1] = 0; 191 for(t= &suftab[0];sp=t->suf;t++) { 192 cp = ep; 193 while(*sp) 194 if(*--cp!=*sp++) 195 goto next; 196 for(sp=cp; --sp>=word&&!vowel(*sp); ) ; 197 if(sp<word) 198 return(0); 199 if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1)) 200 return(1); 201 if(t->p2!=0) { 202 deriv[lev] = deriv[lev+1] = 0; 203 return((*t->p2)(ep-t->n2,t->d2,t->a2,lev)); 204 } 205 return(0); 206 next: ; 207 } 208 return(0); 209 } 210 211 nop() 212 { 213 return(0); 214 } 215 216 strip(ep,d,a,lev) 217 char *ep,*d,*a; 218 { 219 return(putsuf(ep,a,lev)||suffix(ep,lev)); 220 } 221 222 s(ep,d,a,lev) 223 char *ep,*d,*a; 224 { 225 if(lev>DLEV+1) 226 return(0); 227 if(*ep=='s'&&ep[-1]=='s') 228 return(0); 229 return(strip(ep,d,a,lev)); 230 } 231 232 an(ep,d,a,lev) 233 char *ep,*d,*a; 234 { 235 if(!isupper(*word)) /*must be proper name*/ 236 return(0); 237 return(putsuf(ep,a,lev)); 238 } 239 240 ize(ep,d,a,lev) 241 char *ep,*d,*a; 242 { 243 *ep++ = 'e'; 244 return(strip(ep,"",d,lev)); 245 } 246 247 y_to_e(ep,d,a,lev) 248 char *ep,*d,*a; 249 { 250 *ep++ = 'e'; 251 return(strip(ep,"",d,lev)); 252 } 253 254 ily(ep,d,a,lev) 255 char *ep,*d,*a; 256 { 257 if(ep[-1]=='i') 258 return(i_to_y(ep,d,a,lev)); 259 else 260 return(strip(ep,d,a,lev)); 261 } 262 263 ncy(ep,d,a,lev) 264 char *ep, *d, *a; 265 { 266 if(skipv(skipv(ep-1))<word) 267 return(0); 268 ep[-1] = 't'; 269 return(strip(ep,d,a,lev)); 270 } 271 272 bility(ep,d,a,lev) 273 char *ep,*d,*a; 274 { 275 *ep++ = 'l'; 276 return(y_to_e(ep,d,a,lev)); 277 } 278 279 i_to_y(ep,d,a,lev) 280 char *ep,*d,*a; 281 { 282 if(ep[-1]=='i') { 283 ep[-1] = 'y'; 284 a = d; 285 } 286 return(strip(ep,"",a,lev)); 287 } 288 289 es(ep,d,a,lev) 290 char *ep,*d,*a; 291 { 292 if(lev>DLEV) 293 return(0); 294 switch(ep[-1]) { 295 default: 296 return(0); 297 case 'i': 298 return(i_to_y(ep,d,a,lev)); 299 case 's': 300 case 'h': 301 case 'z': 302 case 'x': 303 return(strip(ep,d,a,lev)); 304 } 305 } 306 307 metry(ep,d,a,lev) 308 char *ep, *d,*a; 309 { 310 ep[-2] = 'e'; 311 ep[-1] = 'r'; 312 return(strip(ep,d,a,lev)); 313 } 314 315 tion(ep,d,a,lev) 316 char *ep,*d,*a; 317 { 318 switch(ep[-2]) { 319 case 'c': 320 case 'r': 321 return(putsuf(ep,a,lev)); 322 case 'a': 323 return(y_to_e(ep,d,a,lev)); 324 } 325 return(0); 326 } 327 328 /* possible consonant-consonant-e ending*/ 329 CCe(ep,d,a,lev) 330 char *ep,*d,*a; 331 { 332 switch(ep[-1]) { 333 case 'l': 334 if(vowel(ep[-2])) 335 break; 336 switch(ep[-2]) { 337 case 'l': 338 case 'r': 339 case 'w': 340 break; 341 default: 342 return(y_to_e(ep,d,a,lev)); 343 } 344 break; 345 case 's': 346 if(ep[-2]=='s') 347 break; 348 case 'c': 349 case 'g': 350 if(*ep=='a') 351 return(0); 352 case 'v': 353 case 'z': 354 if(vowel(ep[-2])) 355 break; 356 case 'u': 357 if(y_to_e(ep,d,a,lev)) 358 return(1); 359 if(!(ep[-2]=='n'&&ep[-1]=='g')) 360 return(0); 361 } 362 return(VCe(ep,d,a,lev)); 363 } 364 365 /* possible consonant-vowel-consonant-e ending*/ 366 VCe(ep,d,a,lev) 367 char *ep,*d,*a; 368 { 369 char c; 370 c = ep[-1]; 371 if(c=='e') 372 return(0); 373 if(!vowel(c) && vowel(ep[-2])) { 374 c = *ep; 375 *ep++ = 'e'; 376 if(putsuf(ep,d,lev)||suffix(ep,lev)) 377 return(1); 378 ep--; 379 *ep = c; 380 } 381 return(strip(ep,d,a,lev)); 382 } 383 384 char *lookuppref(wp,ep) 385 char **wp; 386 char *ep; 387 { 388 register char **sp; 389 register char *bp,*cp; 390 for(sp=preftab;*sp;sp++) { 391 bp = *wp; 392 for(cp= *sp;*cp;cp++,bp++) 393 if(Tolower(*bp)!=*cp) 394 goto next; 395 for(cp=bp;cp<ep;cp++) 396 if(vowel(*cp)) { 397 *wp = bp; 398 return(*sp); 399 } 400 next: ; 401 } 402 return(0); 403 } 404 405 putsuf(ep,a,lev) 406 char *ep,*a; 407 { 408 register char *cp; 409 char *bp; 410 register char *pp; 411 int val = 0; 412 char space[20]; 413 deriv[lev] = a; 414 if(putw(word,ep,lev)) 415 return(1); 416 bp = word; 417 pp = space; 418 deriv[lev+1] = pp; 419 while(cp=lookuppref(&bp,ep)) { 420 *pp++ = '+'; 421 while(*pp = *cp++) 422 pp++; 423 if(putw(bp,ep,lev+1)) { 424 val = 1; 425 break; 426 } 427 } 428 deriv[lev+1] = deriv[lev+2] = 0; 429 return(val); 430 } 431 432 putw(bp,ep,lev) 433 char *bp,*ep; 434 { 435 register i, j; 436 char duple[3]; 437 if(ep-bp<=1) 438 return(0); 439 if(vowel(*ep)) { 440 if(monosyl(bp,ep)) 441 return(0); 442 } 443 i = dict(bp,ep); 444 if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) { 445 ep--; 446 deriv[++lev] = duple; 447 duple[0] = '+'; 448 duple[1] = *ep; 449 duple[2] = 0; 450 i = dict(bp,ep); 451 } 452 if(vflag==0||i==0) 453 return(i); 454 j = lev; 455 do { 456 if(deriv[j]) 457 strcat(affix,deriv[j]); 458 } while(--j>0); 459 strcat(affix,"\t"); 460 return(i); 461 } 462 463 464 monosyl(bp,ep) 465 char *bp, *ep; 466 { 467 if(ep<bp+2) 468 return(0); 469 if(vowel(*--ep)||!vowel(*--ep) 470 ||ep[1]=='x'||ep[1]=='w') 471 return(0); 472 while(--ep>=bp) 473 if(vowel(*ep)) 474 return(0); 475 return(1); 476 } 477 478 char * 479 skipv(s) 480 char *s; 481 { 482 if(s>=word&&vowel(*s)) 483 s--; 484 while(s>=word&&!vowel(*s)) 485 s--; 486 return(s); 487 } 488 489 vowel(c) 490 { 491 switch(Tolower(c)) { 492 case 'a': 493 case 'e': 494 case 'i': 495 case 'o': 496 case 'u': 497 case 'y': 498 return(1); 499 } 500 return(0); 501 } 502 503 /* crummy way to Britishise */ 504 ise() 505 { 506 register struct suftab *p; 507 for(p = suftab;p->suf;p++) { 508 ztos(p->suf); 509 ztos(p->d1); 510 ztos(p->a1); 511 } 512 } 513 ztos(s) 514 char *s; 515 { 516 for(;*s;s++) 517 if(*s=='z') 518 *s = 's'; 519 } 520 521 dict(bp,ep) 522 char *bp, *ep; 523 { 524 register char *wp; 525 long h; 526 register long *lp; 527 register i; 528 if(xflag) 529 printf("=%.*s\n",ep-bp,bp); 530 for(i=0; i<NP; i++) { 531 for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp) 532 h += *wp * *lp; 533 h += '\n' * *lp; 534 h %= p[i]; 535 if(get(h)==0) 536 return(0); 537 } 538 return(1); 539 } 540