xref: /original-bsd/usr.bin/spell/spell.c (revision 2cb1372a)
1 /*-
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.proprietary.c%
6  */
7 
8 #ifndef lint
9 static char copyright[] =
10 "@(#) Copyright (c) 1991, 1993\n\
11 	The Regents of the University of California.  All rights reserved.\n";
12 #endif /* not lint */
13 
14 #ifndef lint
15 static char sccsid[] = "@(#)spell.c	8.1 (Berkeley) 06/06/93";
16 #endif /* not lint */
17 
18 #include "spell.h"
19 #define DLEV 2
20 
21 char	*strcat();
22 int	strip();
23 char	*skipv();
24 int	an();
25 int	s();
26 int	es();
27 int	ily();
28 int	ncy();
29 int	CCe();
30 int	VCe();
31 int	bility();
32 int	tion();
33 int	ize();
34 int	y_to_e();
35 int	i_to_y();
36 int	nop();
37 int	metry();
38 
39 struct suftab {
40 	char *suf;
41 	int (*p1)();
42 	int n1;
43 	char *d1;
44 	char *a1;
45 	int (*p2)();
46 	int n2;
47 	char *d2;
48 	char *a2;
49 } suftab[] = {
50 	{"ssen",ily,4,"-y+iness","+ness" },
51 	{"ssel",ily,4,"-y+i+less","+less" },
52 	{"se",s,1,"","+s",		es,2,"-y+ies","+es" },
53 	{"s'",s,2,"","+'s"},
54 	{"s",s,1,"","+s"},
55 	{"ecn",ncy,1,"","-t+ce"},
56 	{"ycn",ncy,1,"","-cy+t"},
57 	{"ytilb",nop,0,"",""},
58 	{"ytilib",bility,5,"-le+ility",""},
59 	{"elbaif",i_to_y,4,"-y+iable",""},
60 	{"elba",CCe,4,"-e+able","+able"},
61 	{"yti",CCe,3,"-e+ity","+ity"},
62 	{"ylb",y_to_e,1,"-e+y",""},
63 	{"yl",ily,2,"-y+ily","+ly"},
64 	{"laci",strip,2,"","+al"},
65 	{"latnem",strip,2,"","+al"},
66 	{"lanoi",strip,2,"","+al"},
67 	{"tnem",strip,4,"","+ment"},
68 	{"gni",CCe,3,"-e+ing","+ing"},
69 	{"reta",nop,0,"",""},
70 	{"re",strip,1,"","+r",		i_to_y,2,"-y+ier","+er"},
71 	{"de",strip,1,"","+d",		i_to_y,2,"-y+ied","+ed"},
72 	{"citsi",strip,2,"","+ic"},
73 	{"cihparg",i_to_y,1,"-y+ic",""},
74 	{"tse",strip,2,"","+st",	i_to_y,3,"-y+iest","+est"},
75 	{"cirtem",i_to_y,1,"-y+ic",""},
76 	{"yrtem",metry,0,"-ry+er",""},
77 	{"cigol",i_to_y,1,"-y+ic",""},
78 	{"tsigol",i_to_y,2,"-y+ist",""},
79 	{"tsi",VCe,3,"-e+ist","+ist"},
80 	{"msi",VCe,3,"-e+ism","+ist"},
81 	{"noitacif",i_to_y,6,"-y+ication",""},
82 	{"noitazi",ize,5,"-e+ation",""},
83 	{"rota",tion,2,"-e+or",""},
84 	{"noit",tion,3,"-e+ion","+ion"},
85 	{"naino",an,3,"","+ian"},
86 	{"na",an,1,"","+n"},
87 	{"evit",tion,3,"-e+ive","+ive"},
88 	{"ezi",CCe,3,"-e+ize","+ize"},
89 	{"pihs",strip,4,"","+ship"},
90 	{"dooh",ily,4,"-y+hood","+hood"},
91 	{"ekil",strip,4,"","+like"},
92 	0
93 };
94 
95 char *preftab[] = {
96 	"anti",
97 	"bio",
98 	"dis",
99 	"electro",
100 	"en",
101 	"fore",
102 	"hyper",
103 	"intra",
104 	"inter",
105 	"iso",
106 	"kilo",
107 	"magneto",
108 	"meta",
109 	"micro",
110 	"milli",
111 	"mis",
112 	"mono",
113 	"multi",
114 	"non",
115 	"out",
116 	"over",
117 	"photo",
118 	"poly",
119 	"pre",
120 	"pseudo",
121 	"re",
122 	"semi",
123 	"stereo",
124 	"sub",
125 	"super",
126 	"thermo",
127 	"ultra",
128 	"under",	/*must precede un*/
129 	"un",
130 	0
131 };
132 
133 int vflag;
134 int xflag;
135 char word[100];
136 char original[100];
137 char *deriv[40];
138 char affix[40];
139 
140 main(argc,argv)
141 int argc;
142 char **argv;
143 {
144 	register char *ep, *cp;
145 	register char *dp;
146 	int fold;
147 	int j;
148 	FILE *file, *found;
149 	if(!prime(argc,argv)) {
150 		fprintf(stderr,
151 		    "spell: cannot initialize hash table\n");
152 		exit(1);
153 	}
154 	found = fopen(argv[2],"w");
155 	for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++)
156 		switch(argv[0][1]) {
157 		case 'b':
158 			ise();
159 			break;
160 		case 'v':
161 			vflag++;
162 			break;
163 		case 'x':
164 			xflag++;
165 			break;
166 		}
167 	for(;; fprintf(file,"%s%s\n",affix,original)) {
168 		affix[0] = 0;
169 		file = found;
170 		for(ep=word;(*ep=j=getchar())!='\n';ep++)
171 			if(j == EOF) {
172 				fclose(found);
173 				exit(0);
174 			}
175 		for(cp=word,dp=original; cp<ep; )
176 			*dp++ = *cp++;
177 		*dp = 0;
178 		fold = 0;
179 		for(cp=word;cp<ep;cp++)
180 			if(islower(*cp))
181 				goto lcase;
182 		if(putsuf(ep,".",0))
183 			continue;
184 		++fold;
185 		for(cp=original+1,dp=word+1;dp<ep;dp++,cp++)
186 			*dp = Tolower(*cp);
187 lcase:
188 		if(putsuf(ep,".",0)||suffix(ep,0))
189 			continue;
190 		if(isupper(word[0])) {
191 			for(cp=original,dp=word; *dp = *cp++; dp++)
192 				if (fold) *dp = Tolower(*dp);
193 			word[0] = Tolower(word[0]);
194 			goto lcase;
195 		}
196 		file = stdout;
197 	}
198 }
199 
200 suffix(ep,lev)
201 char *ep;
202 int lev;
203 {
204 	register struct suftab *t;
205 	register char *cp, *sp;
206 	lev += DLEV;
207 	deriv[lev] = deriv[lev-1] = 0;
208 	for(t= &suftab[0];sp=t->suf;t++) {
209 		cp = ep;
210 		while(*sp)
211 			if(*--cp!=*sp++)
212 				goto next;
213 		for(sp=cp; --sp>=word&&!vowel(*sp); ) ;
214 		if(sp<word)
215 			return(0);
216 		if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1))
217 			return(1);
218 		if(t->p2!=0) {
219 			deriv[lev] = deriv[lev+1] = 0;
220 			return((*t->p2)(ep-t->n2,t->d2,t->a2,lev));
221 		}
222 		return(0);
223 next:		;
224 	}
225 	return(0);
226 }
227 
228 nop()
229 {
230 	return(0);
231 }
232 
233 strip(ep,d,a,lev)
234 char *ep,*d,*a;
235 int lev;
236 {
237 	return(putsuf(ep,a,lev)||suffix(ep,lev));
238 }
239 
240 s(ep,d,a,lev)
241 char *ep,*d,*a;
242 int lev;
243 {
244 	if(lev>DLEV+1)
245 		return(0);
246 	if(*ep=='s'&&ep[-1]=='s')
247 		return(0);
248 	return(strip(ep,d,a,lev));
249 }
250 
251 an(ep,d,a,lev)
252 char *ep,*d,*a;
253 int lev;
254 {
255 	if(!isupper(*word))	/*must be proper name*/
256 		return(0);
257 	return(putsuf(ep,a,lev));
258 }
259 
260 ize(ep,d,a,lev)
261 char *ep,*d,*a;
262 int lev;
263 {
264 	*ep++ = 'e';
265 	return(strip(ep,"",d,lev));
266 }
267 
268 y_to_e(ep,d,a,lev)
269 char *ep,*d,*a;
270 int lev;
271 {
272 	char c = *ep;
273 	*ep++ = 'e';
274 	if (strip(ep,"",d,lev))
275 		return (1);
276 	ep[-1] = c;
277 	return (0);
278 }
279 
280 ily(ep,d,a,lev)
281 char *ep,*d,*a;
282 int lev;
283 {
284 	if(ep[-1]=='i')
285 		return(i_to_y(ep,d,a,lev));
286 	else
287 		return(strip(ep,d,a,lev));
288 }
289 
290 ncy(ep,d,a,lev)
291 char *ep, *d, *a;
292 int lev;
293 {
294 	if(skipv(skipv(ep-1))<word)
295 		return(0);
296 	ep[-1] = 't';
297 	return(strip(ep,d,a,lev));
298 }
299 
300 bility(ep,d,a,lev)
301 char *ep,*d,*a;
302 int lev;
303 {
304 	*ep++ = 'l';
305 	return(y_to_e(ep,d,a,lev));
306 }
307 
308 i_to_y(ep,d,a,lev)
309 char *ep,*d,*a;
310 int lev;
311 {
312 	if(ep[-1]=='i') {
313 		ep[-1] = 'y';
314 		a = d;
315 	}
316 	return(strip(ep,"",a,lev));
317 }
318 
319 es(ep,d,a,lev)
320 char *ep,*d,*a;
321 int lev;
322 {
323 	if(lev>DLEV)
324 		return(0);
325 	switch(ep[-1]) {
326 	default:
327 		return(0);
328 	case 'i':
329 		return(i_to_y(ep,d,a,lev));
330 	case 's':
331 	case 'h':
332 	case 'z':
333 	case 'x':
334 		return(strip(ep,d,a,lev));
335 	}
336 }
337 
338 metry(ep,d,a,lev)
339 char *ep, *d,*a;
340 int lev;
341 {
342 	ep[-2] = 'e';
343 	ep[-1] = 'r';
344 	return(strip(ep,d,a,lev));
345 }
346 
347 tion(ep,d,a,lev)
348 char *ep,*d,*a;
349 int lev;
350 {
351 	switch(ep[-2]) {
352 	case 'c':
353 	case 'r':
354 		return(putsuf(ep,a,lev));
355 	case 'a':
356 		return(y_to_e(ep,d,a,lev));
357 	}
358 	return(0);
359 }
360 
361 /*	possible consonant-consonant-e ending*/
362 CCe(ep,d,a,lev)
363 char *ep,*d,*a;
364 int lev;
365 {
366 	switch(ep[-1]) {
367 	case 'l':
368 		if(vowel(ep[-2]))
369 			break;
370 		switch(ep[-2]) {
371 		case 'l':
372 		case 'r':
373 		case 'w':
374 			break;
375 		default:
376 			return(y_to_e(ep,d,a,lev));
377 		}
378 		break;
379 	case 's':
380 		if(ep[-2]=='s')
381 			break;
382 	case 'c':
383 	case 'g':
384 		if(*ep=='a')
385 			return(0);
386 	case 'v':
387 	case 'z':
388 		if(vowel(ep[-2]))
389 			break;
390 	case 'u':
391 		if(y_to_e(ep,d,a,lev))
392 			return(1);
393 		if(!(ep[-2]=='n'&&ep[-1]=='g'))
394 			return(0);
395 	}
396 	return(VCe(ep,d,a,lev));
397 }
398 
399 /*	possible consonant-vowel-consonant-e ending*/
400 VCe(ep,d,a,lev)
401 char *ep,*d,*a;
402 int lev;
403 {
404 	char c;
405 	c = ep[-1];
406 	if(c=='e')
407 		return(0);
408 	if(!vowel(c) && vowel(ep[-2])) {
409 		c = *ep;
410 		*ep++ = 'e';
411 		if(putsuf(ep,d,lev)||suffix(ep,lev))
412 			return(1);
413 		ep--;
414 		*ep = c;
415 	}
416 	return(strip(ep,d,a,lev));
417 }
418 
419 char *lookuppref(wp,ep)
420 char **wp;
421 char *ep;
422 {
423 	register char **sp;
424 	register char *bp,*cp;
425 	for(sp=preftab;*sp;sp++) {
426 		bp = *wp;
427 		for(cp= *sp;*cp;cp++,bp++)
428 			if(Tolower(*bp)!=*cp)
429 				goto next;
430 		for(cp=bp;cp<ep;cp++)
431 			if(vowel(*cp)) {
432 				*wp = bp;
433 				return(*sp);
434 			}
435 next:	;
436 	}
437 	return(0);
438 }
439 
440 putsuf(ep,a,lev)
441 char *ep,*a;
442 int lev;
443 {
444 	register char *cp;
445 	char *bp;
446 	register char *pp;
447 	int val = 0;
448 	char space[20];
449 	deriv[lev] = a;
450 	if(putword(word,ep,lev))
451 		return(1);
452 	bp = word;
453 	pp = space;
454 	deriv[lev+1] = pp;
455 	while(cp=lookuppref(&bp,ep)) {
456 		*pp++ = '+';
457 		while(*pp = *cp++)
458 			pp++;
459 		if(putword(bp,ep,lev+1)) {
460 			val = 1;
461 			break;
462 		}
463 	}
464 	deriv[lev+1] = deriv[lev+2] = 0;
465 	return(val);
466 }
467 
468 putword(bp,ep,lev)
469 char *bp,*ep;
470 int lev;
471 {
472 	register i, j;
473 	char duple[3];
474 	if(ep-bp<=1)
475 		return(0);
476 	if(vowel(*ep)) {
477 		if(monosyl(bp,ep))
478 			return(0);
479 	}
480 	i = dict(bp,ep);
481 	if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) {
482 		ep--;
483 		deriv[++lev] = duple;
484 		duple[0] = '+';
485 		duple[1] = *ep;
486 		duple[2] = 0;
487 		i = dict(bp,ep);
488 	}
489 	if(vflag==0||i==0)
490 		return(i);
491 	j = lev;
492 	do {
493 		if(deriv[j])
494 			strcat(affix,deriv[j]);
495 	} while(--j>0);
496 	strcat(affix,"\t");
497 	return(i);
498 }
499 
500 
501 monosyl(bp,ep)
502 char *bp, *ep;
503 {
504 	if(ep<bp+2)
505 		return(0);
506 	if(vowel(*--ep)||!vowel(*--ep)
507 		||ep[1]=='x'||ep[1]=='w')
508 		return(0);
509 	while(--ep>=bp)
510 		if(vowel(*ep))
511 			return(0);
512 	return(1);
513 }
514 
515 char *
516 skipv(s)
517 char *s;
518 {
519 	if(s>=word&&vowel(*s))
520 		s--;
521 	while(s>=word&&!vowel(*s))
522 		s--;
523 	return(s);
524 }
525 
526 vowel(c)
527 int c;
528 {
529 	switch(Tolower(c)) {
530 	case 'a':
531 	case 'e':
532 	case 'i':
533 	case 'o':
534 	case 'u':
535 	case 'y':
536 		return(1);
537 	}
538 	return(0);
539 }
540 
541 /* crummy way to Britishise */
542 ise()
543 {
544 	register struct suftab *p;
545 	for(p = suftab;p->suf;p++) {
546 		ztos(p->suf);
547 		ztos(p->d1);
548 		ztos(p->a1);
549 	}
550 }
551 ztos(s)
552 char *s;
553 {
554 	for(;*s;s++)
555 		if(*s=='z')
556 			*s = 's';
557 }
558 
559 dict(bp,ep)
560 char *bp, *ep;
561 {
562 	register char *wp;
563 	long h;
564 	register long *lp;
565 	register i;
566 	if(xflag)
567 		printf("=%.*s\n",ep-bp,bp);
568 	for(i=0; i<NP; i++) {
569 		for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp)
570 			h += *wp * *lp;
571 		h += '\n' * *lp;
572 		h %= p[i];
573 		if(get(h)==0)
574 			return(0);
575 	}
576 	return(1);
577 }
578