xref: /original-bsd/usr.bin/spell/spell.c (revision 91abda3c)
1 #ifndef lint
2 static char sccsid[] = "@(#)spell.c	4.3 05/15/90";
3 #endif
4 
5 #include "spell.h"
6 #define DLEV 2
7 
8 char	*strcat();
9 int	strip();
10 char	*skipv();
11 int	an();
12 int	s();
13 int	es();
14 int	ily();
15 int	ncy();
16 int	CCe();
17 int	VCe();
18 int	bility();
19 int	tion();
20 int	ize();
21 int	y_to_e();
22 int	i_to_y();
23 int	nop();
24 int	metry();
25 
26 struct suftab {
27 	char *suf;
28 	int (*p1)();
29 	int n1;
30 	char *d1;
31 	char *a1;
32 	int (*p2)();
33 	int n2;
34 	char *d2;
35 	char *a2;
36 } suftab[] = {
37 	{"ssen",ily,4,"-y+iness","+ness" },
38 	{"ssel",ily,4,"-y+i+less","+less" },
39 	{"se",s,1,"","+s",		es,2,"-y+ies","+es" },
40 	{"s'",s,2,"","+'s"},
41 	{"s",s,1,"","+s"},
42 	{"ecn",ncy,1,"","-t+ce"},
43 	{"ycn",ncy,1,"","-cy+t"},
44 	{"ytilb",nop,0,"",""},
45 	{"ytilib",bility,5,"-le+ility",""},
46 	{"elbaif",i_to_y,4,"-y+iable",""},
47 	{"elba",CCe,4,"-e+able","+able"},
48 	{"yti",CCe,3,"-e+ity","+ity"},
49 	{"ylb",y_to_e,1,"-e+y",""},
50 	{"yl",ily,2,"-y+ily","+ly"},
51 	{"laci",strip,2,"","+al"},
52 	{"latnem",strip,2,"","+al"},
53 	{"lanoi",strip,2,"","+al"},
54 	{"tnem",strip,4,"","+ment"},
55 	{"gni",CCe,3,"-e+ing","+ing"},
56 	{"reta",nop,0,"",""},
57 	{"re",strip,1,"","+r",		i_to_y,2,"-y+ier","+er"},
58 	{"de",strip,1,"","+d",		i_to_y,2,"-y+ied","+ed"},
59 	{"citsi",strip,2,"","+ic"},
60 	{"cihparg",i_to_y,1,"-y+ic",""},
61 	{"tse",strip,2,"","+st",	i_to_y,3,"-y+iest","+est"},
62 	{"cirtem",i_to_y,1,"-y+ic",""},
63 	{"yrtem",metry,0,"-ry+er",""},
64 	{"cigol",i_to_y,1,"-y+ic",""},
65 	{"tsigol",i_to_y,2,"-y+ist",""},
66 	{"tsi",VCe,3,"-e+ist","+ist"},
67 	{"msi",VCe,3,"-e+ism","+ist"},
68 	{"noitacif",i_to_y,6,"-y+ication",""},
69 	{"noitazi",ize,5,"-e+ation",""},
70 	{"rota",tion,2,"-e+or",""},
71 	{"noit",tion,3,"-e+ion","+ion"},
72 	{"naino",an,3,"","+ian"},
73 	{"na",an,1,"","+n"},
74 	{"evit",tion,3,"-e+ive","+ive"},
75 	{"ezi",CCe,3,"-e+ize","+ize"},
76 	{"pihs",strip,4,"","+ship"},
77 	{"dooh",ily,4,"-y+hood","+hood"},
78 	{"ekil",strip,4,"","+like"},
79 	0
80 };
81 
82 char *preftab[] = {
83 	"anti",
84 	"bio",
85 	"dis",
86 	"electro",
87 	"en",
88 	"fore",
89 	"hyper",
90 	"intra",
91 	"inter",
92 	"iso",
93 	"kilo",
94 	"magneto",
95 	"meta",
96 	"micro",
97 	"milli",
98 	"mis",
99 	"mono",
100 	"multi",
101 	"non",
102 	"out",
103 	"over",
104 	"photo",
105 	"poly",
106 	"pre",
107 	"pseudo",
108 	"re",
109 	"semi",
110 	"stereo",
111 	"sub",
112 	"super",
113 	"thermo",
114 	"ultra",
115 	"under",	/*must precede un*/
116 	"un",
117 	0
118 };
119 
120 int vflag;
121 int xflag;
122 char word[100];
123 char original[100];
124 char *deriv[40];
125 char affix[40];
126 
127 main(argc,argv)
128 char **argv;
129 {
130 	register char *ep, *cp;
131 	register char *dp;
132 	int fold;
133 	int j;
134 	FILE *file, *found;
135 	if(!prime(argc,argv)) {
136 		fprintf(stderr,
137 		    "spell: cannot initialize hash table\n");
138 		exit(1);
139 	}
140 	found = fopen(argv[2],"w");
141 	for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++)
142 		switch(argv[0][1]) {
143 		case 'b':
144 			ise();
145 			break;
146 		case 'v':
147 			vflag++;
148 			break;
149 		case 'x':
150 			xflag++;
151 			break;
152 		}
153 	for(;; fprintf(file,"%s%s\n",affix,original)) {
154 		affix[0] = 0;
155 		file = found;
156 		for(ep=word;(*ep=j=getchar())!='\n';ep++)
157 			if(j == EOF) {
158 				fclose(found);
159 				exit(0);
160 			}
161 		for(cp=word,dp=original; cp<ep; )
162 			*dp++ = *cp++;
163 		*dp = 0;
164 		fold = 0;
165 		for(cp=word;cp<ep;cp++)
166 			if(islower(*cp))
167 				goto lcase;
168 		if(putsuf(ep,".",0))
169 			continue;
170 		++fold;
171 		for(cp=original+1,dp=word+1;dp<ep;dp++,cp++)
172 			*dp = Tolower(*cp);
173 lcase:
174 		if(putsuf(ep,".",0)||suffix(ep,0))
175 			continue;
176 		if(isupper(word[0])) {
177 			for(cp=original,dp=word; *dp = *cp++; dp++)
178 				if (fold) *dp = Tolower(*dp);
179 			word[0] = Tolower(word[0]);
180 			goto lcase;
181 		}
182 		file = stdout;
183 	}
184 }
185 
186 suffix(ep,lev)
187 char *ep;
188 {
189 	register struct suftab *t;
190 	register char *cp, *sp;
191 	lev += DLEV;
192 	deriv[lev] = deriv[lev-1] = 0;
193 	for(t= &suftab[0];sp=t->suf;t++) {
194 		cp = ep;
195 		while(*sp)
196 			if(*--cp!=*sp++)
197 				goto next;
198 		for(sp=cp; --sp>=word&&!vowel(*sp); ) ;
199 		if(sp<word)
200 			return(0);
201 		if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1))
202 			return(1);
203 		if(t->p2!=0) {
204 			deriv[lev] = deriv[lev+1] = 0;
205 			return((*t->p2)(ep-t->n2,t->d2,t->a2,lev));
206 		}
207 		return(0);
208 next:		;
209 	}
210 	return(0);
211 }
212 
213 nop()
214 {
215 	return(0);
216 }
217 
218 strip(ep,d,a,lev)
219 char *ep,*d,*a;
220 {
221 	return(putsuf(ep,a,lev)||suffix(ep,lev));
222 }
223 
224 s(ep,d,a,lev)
225 char *ep,*d,*a;
226 {
227 	if(lev>DLEV+1)
228 		return(0);
229 	if(*ep=='s'&&ep[-1]=='s')
230 		return(0);
231 	return(strip(ep,d,a,lev));
232 }
233 
234 an(ep,d,a,lev)
235 char *ep,*d,*a;
236 {
237 	if(!isupper(*word))	/*must be proper name*/
238 		return(0);
239 	return(putsuf(ep,a,lev));
240 }
241 
242 ize(ep,d,a,lev)
243 char *ep,*d,*a;
244 {
245 	*ep++ = 'e';
246 	return(strip(ep,"",d,lev));
247 }
248 
249 y_to_e(ep,d,a,lev)
250 char *ep,*d,*a;
251 {
252 	char c = *ep;
253 	*ep++ = 'e';
254 	if (strip(ep,"",d,lev))
255 		return (1);
256 	ep[-1] = c;
257 	return (0);
258 }
259 
260 ily(ep,d,a,lev)
261 char *ep,*d,*a;
262 {
263 	if(ep[-1]=='i')
264 		return(i_to_y(ep,d,a,lev));
265 	else
266 		return(strip(ep,d,a,lev));
267 }
268 
269 ncy(ep,d,a,lev)
270 char *ep, *d, *a;
271 {
272 	if(skipv(skipv(ep-1))<word)
273 		return(0);
274 	ep[-1] = 't';
275 	return(strip(ep,d,a,lev));
276 }
277 
278 bility(ep,d,a,lev)
279 char *ep,*d,*a;
280 {
281 	*ep++ = 'l';
282 	return(y_to_e(ep,d,a,lev));
283 }
284 
285 i_to_y(ep,d,a,lev)
286 char *ep,*d,*a;
287 {
288 	if(ep[-1]=='i') {
289 		ep[-1] = 'y';
290 		a = d;
291 	}
292 	return(strip(ep,"",a,lev));
293 }
294 
295 es(ep,d,a,lev)
296 char *ep,*d,*a;
297 {
298 	if(lev>DLEV)
299 		return(0);
300 	switch(ep[-1]) {
301 	default:
302 		return(0);
303 	case 'i':
304 		return(i_to_y(ep,d,a,lev));
305 	case 's':
306 	case 'h':
307 	case 'z':
308 	case 'x':
309 		return(strip(ep,d,a,lev));
310 	}
311 }
312 
313 metry(ep,d,a,lev)
314 char *ep, *d,*a;
315 {
316 	ep[-2] = 'e';
317 	ep[-1] = 'r';
318 	return(strip(ep,d,a,lev));
319 }
320 
321 tion(ep,d,a,lev)
322 char *ep,*d,*a;
323 {
324 	switch(ep[-2]) {
325 	case 'c':
326 	case 'r':
327 		return(putsuf(ep,a,lev));
328 	case 'a':
329 		return(y_to_e(ep,d,a,lev));
330 	}
331 	return(0);
332 }
333 
334 /*	possible consonant-consonant-e ending*/
335 CCe(ep,d,a,lev)
336 char *ep,*d,*a;
337 {
338 	switch(ep[-1]) {
339 	case 'l':
340 		if(vowel(ep[-2]))
341 			break;
342 		switch(ep[-2]) {
343 		case 'l':
344 		case 'r':
345 		case 'w':
346 			break;
347 		default:
348 			return(y_to_e(ep,d,a,lev));
349 		}
350 		break;
351 	case 's':
352 		if(ep[-2]=='s')
353 			break;
354 	case 'c':
355 	case 'g':
356 		if(*ep=='a')
357 			return(0);
358 	case 'v':
359 	case 'z':
360 		if(vowel(ep[-2]))
361 			break;
362 	case 'u':
363 		if(y_to_e(ep,d,a,lev))
364 			return(1);
365 		if(!(ep[-2]=='n'&&ep[-1]=='g'))
366 			return(0);
367 	}
368 	return(VCe(ep,d,a,lev));
369 }
370 
371 /*	possible consonant-vowel-consonant-e ending*/
372 VCe(ep,d,a,lev)
373 char *ep,*d,*a;
374 {
375 	char c;
376 	c = ep[-1];
377 	if(c=='e')
378 		return(0);
379 	if(!vowel(c) && vowel(ep[-2])) {
380 		c = *ep;
381 		*ep++ = 'e';
382 		if(putsuf(ep,d,lev)||suffix(ep,lev))
383 			return(1);
384 		ep--;
385 		*ep = c;
386 	}
387 	return(strip(ep,d,a,lev));
388 }
389 
390 char *lookuppref(wp,ep)
391 char **wp;
392 char *ep;
393 {
394 	register char **sp;
395 	register char *bp,*cp;
396 	for(sp=preftab;*sp;sp++) {
397 		bp = *wp;
398 		for(cp= *sp;*cp;cp++,bp++)
399 			if(Tolower(*bp)!=*cp)
400 				goto next;
401 		for(cp=bp;cp<ep;cp++)
402 			if(vowel(*cp)) {
403 				*wp = bp;
404 				return(*sp);
405 			}
406 next:	;
407 	}
408 	return(0);
409 }
410 
411 putsuf(ep,a,lev)
412 char *ep,*a;
413 {
414 	register char *cp;
415 	char *bp;
416 	register char *pp;
417 	int val = 0;
418 	char space[20];
419 	deriv[lev] = a;
420 	if(putw(word,ep,lev))
421 		return(1);
422 	bp = word;
423 	pp = space;
424 	deriv[lev+1] = pp;
425 	while(cp=lookuppref(&bp,ep)) {
426 		*pp++ = '+';
427 		while(*pp = *cp++)
428 			pp++;
429 		if(putw(bp,ep,lev+1)) {
430 			val = 1;
431 			break;
432 		}
433 	}
434 	deriv[lev+1] = deriv[lev+2] = 0;
435 	return(val);
436 }
437 
438 putw(bp,ep,lev)
439 char *bp,*ep;
440 {
441 	register i, j;
442 	char duple[3];
443 	if(ep-bp<=1)
444 		return(0);
445 	if(vowel(*ep)) {
446 		if(monosyl(bp,ep))
447 			return(0);
448 	}
449 	i = dict(bp,ep);
450 	if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) {
451 		ep--;
452 		deriv[++lev] = duple;
453 		duple[0] = '+';
454 		duple[1] = *ep;
455 		duple[2] = 0;
456 		i = dict(bp,ep);
457 	}
458 	if(vflag==0||i==0)
459 		return(i);
460 	j = lev;
461 	do {
462 		if(deriv[j])
463 			strcat(affix,deriv[j]);
464 	} while(--j>0);
465 	strcat(affix,"\t");
466 	return(i);
467 }
468 
469 
470 monosyl(bp,ep)
471 char *bp, *ep;
472 {
473 	if(ep<bp+2)
474 		return(0);
475 	if(vowel(*--ep)||!vowel(*--ep)
476 		||ep[1]=='x'||ep[1]=='w')
477 		return(0);
478 	while(--ep>=bp)
479 		if(vowel(*ep))
480 			return(0);
481 	return(1);
482 }
483 
484 char *
485 skipv(s)
486 char *s;
487 {
488 	if(s>=word&&vowel(*s))
489 		s--;
490 	while(s>=word&&!vowel(*s))
491 		s--;
492 	return(s);
493 }
494 
495 vowel(c)
496 {
497 	switch(Tolower(c)) {
498 	case 'a':
499 	case 'e':
500 	case 'i':
501 	case 'o':
502 	case 'u':
503 	case 'y':
504 		return(1);
505 	}
506 	return(0);
507 }
508 
509 /* crummy way to Britishise */
510 ise()
511 {
512 	register struct suftab *p;
513 	for(p = suftab;p->suf;p++) {
514 		ztos(p->suf);
515 		ztos(p->d1);
516 		ztos(p->a1);
517 	}
518 }
519 ztos(s)
520 char *s;
521 {
522 	for(;*s;s++)
523 		if(*s=='z')
524 			*s = 's';
525 }
526 
527 dict(bp,ep)
528 char *bp, *ep;
529 {
530 	register char *wp;
531 	long h;
532 	register long *lp;
533 	register i;
534 	if(xflag)
535 		printf("=%.*s\n",ep-bp,bp);
536 	for(i=0; i<NP; i++) {
537 		for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp)
538 			h += *wp * *lp;
539 		h += '\n' * *lp;
540 		h %= p[i];
541 		if(get(h)==0)
542 			return(0);
543 	}
544 	return(1);
545 }
546