xref: /original-bsd/usr.bin/spell/spell.c (revision f0fd5f8a)
1 #ifndef lint
2 static char sccsid[] = "@(#)spell.c	4.1 12/18/82";
3 #endif
4 
5 #include "spell.h"
6 #define DLEV 2
7 
8 char	*strcat();
9 int	strip();
10 char	*skipv();
11 int	an();
12 int	s();
13 int	es();
14 int	ily();
15 int	ncy();
16 int	CCe();
17 int	VCe();
18 int	bility();
19 int	tion();
20 int	ize();
21 int	y_to_e();
22 int	i_to_y();
23 int	nop();
24 int	metry();
25 
26 struct suftab {
27 	char *suf;
28 	int (*p1)();
29 	int n1;
30 	char *d1;
31 	char *a1;
32 	int (*p2)();
33 	int n2;
34 	char *d2;
35 	char *a2;
36 } suftab[] = {
37 	{"ssen",ily,4,"-y+iness","+ness" },
38 	{"ssel",ily,4,"-y+i+less","+less" },
39 	{"se",s,1,"","+s",		es,2,"-y+ies","+es" },
40 	{"s'",s,2,"","+'s"},
41 	{"s",s,1,"","+s"},
42 	{"ecn",ncy,1,"","-t+ce"},
43 	{"ycn",ncy,1,"","-cy+t"},
44 	{"ytilb",nop,0,"",""},
45 	{"ytilib",bility,5,"-le+ility",""},
46 	{"elbaif",i_to_y,4,"-y+iable",""},
47 	{"elba",CCe,4,"-e+able","+able"},
48 	{"yti",CCe,3,"-e+ity","+ity"},
49 	{"ylb",y_to_e,1,"-e+y",""},
50 	{"yl",ily,2,"-y+ily","+ly"},
51 	{"laci",strip,2,"","+al"},
52 	{"latnem",strip,2,"","+al"},
53 	{"lanoi",strip,2,"","+al"},
54 	{"tnem",strip,4,"","+ment"},
55 	{"gni",CCe,3,"-e+ing","+ing"},
56 	{"reta",nop,0,"",""},
57 	{"re",strip,1,"","+r",		i_to_y,2,"-y+ier","+er"},
58 	{"de",strip,1,"","+d",		i_to_y,2,"-y+ied","+ed"},
59 	{"citsi",strip,2,"","+ic"},
60 	{"cihparg",i_to_y,1,"-y+ic",""},
61 	{"tse",strip,2,"","+st",	i_to_y,3,"-y+iest","+est"},
62 	{"cirtem",i_to_y,1,"-y+ic",""},
63 	{"yrtem",metry,0,"-ry+er",""},
64 	{"cigol",i_to_y,1,"-y+ic",""},
65 	{"tsigol",i_to_y,2,"-y+ist",""},
66 	{"tsi",VCe,3,"-e+ist","+ist"},
67 	{"msi",VCe,3,"-e+ism","+ist"},
68 	{"noitacif",i_to_y,6,"-y+ication",""},
69 	{"noitazi",ize,5,"-e+ation",""},
70 	{"rota",tion,2,"-e+or",""},
71 	{"noit",tion,3,"-e+ion","+ion"},
72 	{"naino",an,3,"","+ian"},
73 	{"na",an,1,"","+n"},
74 	{"evit",tion,3,"-e+ive","+ive"},
75 	{"ezi",CCe,3,"-e+ize","+ize"},
76 	{"pihs",strip,4,"","+ship"},
77 	{"dooh",ily,4,"-y+hood","+hood"},
78 	{"ekil",strip,4,"","+like"},
79 	0
80 };
81 
82 char *preftab[] = {
83 	"anti",
84 	"bio",
85 	"dis",
86 	"electro",
87 	"en",
88 	"fore",
89 	"hyper",
90 	"intra",
91 	"inter",
92 	"iso",
93 	"kilo",
94 	"magneto",
95 	"meta",
96 	"micro",
97 	"milli",
98 	"mis",
99 	"mono",
100 	"multi",
101 	"non",
102 	"out",
103 	"over",
104 	"photo",
105 	"poly",
106 	"pre",
107 	"pseudo",
108 	"re",
109 	"semi",
110 	"stereo",
111 	"sub",
112 	"super",
113 	"thermo",
114 	"ultra",
115 	"under",	/*must precede un*/
116 	"un",
117 	0
118 };
119 
120 int vflag;
121 int xflag;
122 char word[100];
123 char original[100];
124 char *deriv[40];
125 char affix[40];
126 
127 main(argc,argv)
128 char **argv;
129 {
130 	register char *ep, *cp;
131 	register char *dp;
132 	int fold;
133 	int j;
134 	FILE *file, *found;
135 	if(!prime(argc,argv)) {
136 		fprintf(stderr,
137 		    "spell: cannot initialize hash table\n");
138 		exit(1);
139 	}
140 	found = fopen(argv[2],"w");
141 	for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++)
142 		switch(argv[0][1]) {
143 		case 'b':
144 			ise();
145 			break;
146 		case 'v':
147 			vflag++;
148 			break;
149 		case 'x':
150 			xflag++;
151 			break;
152 		}
153 	for(;; fprintf(file,"%s%s\n",affix,original)) {
154 		affix[0] = 0;
155 		file = found;
156 		for(ep=word;(*ep=j=getchar())!='\n';ep++)
157 			if(j == EOF)
158 				exit(0);
159 		for(cp=word,dp=original; cp<ep; )
160 			*dp++ = *cp++;
161 		*dp = 0;
162 		fold = 0;
163 		for(cp=word;cp<ep;cp++)
164 			if(islower(*cp))
165 				goto lcase;
166 		if(putsuf(ep,".",0))
167 			continue;
168 		++fold;
169 		for(cp=original+1,dp=word+1;dp<ep;dp++,cp++)
170 			*dp = Tolower(*cp);
171 lcase:
172 		if(putsuf(ep,".",0)||suffix(ep,0))
173 			continue;
174 		if(isupper(word[0])) {
175 			for(cp=original,dp=word; *dp = *cp++; dp++)
176 				if (fold) *dp = Tolower(*dp);
177 			word[0] = Tolower(word[0]);
178 			goto lcase;
179 		}
180 		file = stdout;
181 	}
182 }
183 
184 suffix(ep,lev)
185 char *ep;
186 {
187 	register struct suftab *t;
188 	register char *cp, *sp;
189 	lev += DLEV;
190 	deriv[lev] = deriv[lev-1] = 0;
191 	for(t= &suftab[0];sp=t->suf;t++) {
192 		cp = ep;
193 		while(*sp)
194 			if(*--cp!=*sp++)
195 				goto next;
196 		for(sp=cp; --sp>=word&&!vowel(*sp); ) ;
197 		if(sp<word)
198 			return(0);
199 		if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1))
200 			return(1);
201 		if(t->p2!=0) {
202 			deriv[lev] = deriv[lev+1] = 0;
203 			return((*t->p2)(ep-t->n2,t->d2,t->a2,lev));
204 		}
205 		return(0);
206 next:		;
207 	}
208 	return(0);
209 }
210 
211 nop()
212 {
213 	return(0);
214 }
215 
216 strip(ep,d,a,lev)
217 char *ep,*d,*a;
218 {
219 	return(putsuf(ep,a,lev)||suffix(ep,lev));
220 }
221 
222 s(ep,d,a,lev)
223 char *ep,*d,*a;
224 {
225 	if(lev>DLEV+1)
226 		return(0);
227 	if(*ep=='s'&&ep[-1]=='s')
228 		return(0);
229 	return(strip(ep,d,a,lev));
230 }
231 
232 an(ep,d,a,lev)
233 char *ep,*d,*a;
234 {
235 	if(!isupper(*word))	/*must be proper name*/
236 		return(0);
237 	return(putsuf(ep,a,lev));
238 }
239 
240 ize(ep,d,a,lev)
241 char *ep,*d,*a;
242 {
243 	*ep++ = 'e';
244 	return(strip(ep,"",d,lev));
245 }
246 
247 y_to_e(ep,d,a,lev)
248 char *ep,*d,*a;
249 {
250 	*ep++ = 'e';
251 	return(strip(ep,"",d,lev));
252 }
253 
254 ily(ep,d,a,lev)
255 char *ep,*d,*a;
256 {
257 	if(ep[-1]=='i')
258 		return(i_to_y(ep,d,a,lev));
259 	else
260 		return(strip(ep,d,a,lev));
261 }
262 
263 ncy(ep,d,a,lev)
264 char *ep, *d, *a;
265 {
266 	if(skipv(skipv(ep-1))<word)
267 		return(0);
268 	ep[-1] = 't';
269 	return(strip(ep,d,a,lev));
270 }
271 
272 bility(ep,d,a,lev)
273 char *ep,*d,*a;
274 {
275 	*ep++ = 'l';
276 	return(y_to_e(ep,d,a,lev));
277 }
278 
279 i_to_y(ep,d,a,lev)
280 char *ep,*d,*a;
281 {
282 	if(ep[-1]=='i') {
283 		ep[-1] = 'y';
284 		a = d;
285 	}
286 	return(strip(ep,"",a,lev));
287 }
288 
289 es(ep,d,a,lev)
290 char *ep,*d,*a;
291 {
292 	if(lev>DLEV)
293 		return(0);
294 	switch(ep[-1]) {
295 	default:
296 		return(0);
297 	case 'i':
298 		return(i_to_y(ep,d,a,lev));
299 	case 's':
300 	case 'h':
301 	case 'z':
302 	case 'x':
303 		return(strip(ep,d,a,lev));
304 	}
305 }
306 
307 metry(ep,d,a,lev)
308 char *ep, *d,*a;
309 {
310 	ep[-2] = 'e';
311 	ep[-1] = 'r';
312 	return(strip(ep,d,a,lev));
313 }
314 
315 tion(ep,d,a,lev)
316 char *ep,*d,*a;
317 {
318 	switch(ep[-2]) {
319 	case 'c':
320 	case 'r':
321 		return(putsuf(ep,a,lev));
322 	case 'a':
323 		return(y_to_e(ep,d,a,lev));
324 	}
325 	return(0);
326 }
327 
328 /*	possible consonant-consonant-e ending*/
329 CCe(ep,d,a,lev)
330 char *ep,*d,*a;
331 {
332 	switch(ep[-1]) {
333 	case 'l':
334 		if(vowel(ep[-2]))
335 			break;
336 		switch(ep[-2]) {
337 		case 'l':
338 		case 'r':
339 		case 'w':
340 			break;
341 		default:
342 			return(y_to_e(ep,d,a,lev));
343 		}
344 		break;
345 	case 's':
346 		if(ep[-2]=='s')
347 			break;
348 	case 'c':
349 	case 'g':
350 		if(*ep=='a')
351 			return(0);
352 	case 'v':
353 	case 'z':
354 		if(vowel(ep[-2]))
355 			break;
356 	case 'u':
357 		if(y_to_e(ep,d,a,lev))
358 			return(1);
359 		if(!(ep[-2]=='n'&&ep[-1]=='g'))
360 			return(0);
361 	}
362 	return(VCe(ep,d,a,lev));
363 }
364 
365 /*	possible consonant-vowel-consonant-e ending*/
366 VCe(ep,d,a,lev)
367 char *ep,*d,*a;
368 {
369 	char c;
370 	c = ep[-1];
371 	if(c=='e')
372 		return(0);
373 	if(!vowel(c) && vowel(ep[-2])) {
374 		c = *ep;
375 		*ep++ = 'e';
376 		if(putsuf(ep,d,lev)||suffix(ep,lev))
377 			return(1);
378 		ep--;
379 		*ep = c;
380 	}
381 	return(strip(ep,d,a,lev));
382 }
383 
384 char *lookuppref(wp,ep)
385 char **wp;
386 char *ep;
387 {
388 	register char **sp;
389 	register char *bp,*cp;
390 	for(sp=preftab;*sp;sp++) {
391 		bp = *wp;
392 		for(cp= *sp;*cp;cp++,bp++)
393 			if(Tolower(*bp)!=*cp)
394 				goto next;
395 		for(cp=bp;cp<ep;cp++)
396 			if(vowel(*cp)) {
397 				*wp = bp;
398 				return(*sp);
399 			}
400 next:	;
401 	}
402 	return(0);
403 }
404 
405 putsuf(ep,a,lev)
406 char *ep,*a;
407 {
408 	register char *cp;
409 	char *bp;
410 	register char *pp;
411 	int val = 0;
412 	char space[20];
413 	deriv[lev] = a;
414 	if(putw(word,ep,lev))
415 		return(1);
416 	bp = word;
417 	pp = space;
418 	deriv[lev+1] = pp;
419 	while(cp=lookuppref(&bp,ep)) {
420 		*pp++ = '+';
421 		while(*pp = *cp++)
422 			pp++;
423 		if(putw(bp,ep,lev+1)) {
424 			val = 1;
425 			break;
426 		}
427 	}
428 	deriv[lev+1] = deriv[lev+2] = 0;
429 	return(val);
430 }
431 
432 putw(bp,ep,lev)
433 char *bp,*ep;
434 {
435 	register i, j;
436 	char duple[3];
437 	if(ep-bp<=1)
438 		return(0);
439 	if(vowel(*ep)) {
440 		if(monosyl(bp,ep))
441 			return(0);
442 	}
443 	i = dict(bp,ep);
444 	if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) {
445 		ep--;
446 		deriv[++lev] = duple;
447 		duple[0] = '+';
448 		duple[1] = *ep;
449 		duple[2] = 0;
450 		i = dict(bp,ep);
451 	}
452 	if(vflag==0||i==0)
453 		return(i);
454 	j = lev;
455 	do {
456 		if(deriv[j])
457 			strcat(affix,deriv[j]);
458 	} while(--j>0);
459 	strcat(affix,"\t");
460 	return(i);
461 }
462 
463 
464 monosyl(bp,ep)
465 char *bp, *ep;
466 {
467 	if(ep<bp+2)
468 		return(0);
469 	if(vowel(*--ep)||!vowel(*--ep)
470 		||ep[1]=='x'||ep[1]=='w')
471 		return(0);
472 	while(--ep>=bp)
473 		if(vowel(*ep))
474 			return(0);
475 	return(1);
476 }
477 
478 char *
479 skipv(s)
480 char *s;
481 {
482 	if(s>=word&&vowel(*s))
483 		s--;
484 	while(s>=word&&!vowel(*s))
485 		s--;
486 	return(s);
487 }
488 
489 vowel(c)
490 {
491 	switch(Tolower(c)) {
492 	case 'a':
493 	case 'e':
494 	case 'i':
495 	case 'o':
496 	case 'u':
497 	case 'y':
498 		return(1);
499 	}
500 	return(0);
501 }
502 
503 /* crummy way to Britishise */
504 ise()
505 {
506 	register struct suftab *p;
507 	for(p = suftab;p->suf;p++) {
508 		ztos(p->suf);
509 		ztos(p->d1);
510 		ztos(p->a1);
511 	}
512 }
513 ztos(s)
514 char *s;
515 {
516 	for(;*s;s++)
517 		if(*s=='z')
518 			*s = 's';
519 }
520 
521 dict(bp,ep)
522 char *bp, *ep;
523 {
524 	register char *wp;
525 	long h;
526 	register long *lp;
527 	register i;
528 	if(xflag)
529 		printf("=%.*s\n",ep-bp,bp);
530 	for(i=0; i<NP; i++) {
531 		for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp)
532 			h += *wp * *lp;
533 		h += '\n' * *lp;
534 		h %= p[i];
535 		if(get(h)==0)
536 			return(0);
537 	}
538 	return(1);
539 }
540