1 /*
2 
3   morph.c - WordNet search code morphology functions
4 
5 */
6 
7 #include <stdio.h>
8 #include <ctype.h>
9 #include <string.h>
10 #include <stdlib.h>
11 #include "wn.h"
12 
13 #ifdef UNIX
14 #define EXCFILE	"%s/%s.exc"
15 #endif
16 #ifdef PC
17 #define EXCFILE	"%s\\%s.exc"
18 #endif
19 #ifdef MAC
20 #define EXCFILE	"%s:%s.exc"
21 #endif
22 
23 static char *Id = "$Id: morph.c,v 1.61 2003/06/23 16:15:39 wn Exp $";
24 
25 static char *sufx[] ={
26     /* Noun suffixes */
27     "s", "ses", "xes", "zes", "ches", "shes", "men", "ies",
28     /* Verb suffixes */
29     "s", "ies", "es", "es", "ed", "ed", "ing", "ing",
30     /* Adjective suffixes */
31     "er", "est", "er", "est"
32 };
33 
34 static char *addr[] ={
35     /* Noun endings */
36     "", "s", "x", "z", "ch", "sh", "man", "y",
37     /* Verb endings */
38     "", "y", "e", "", "e", "", "e", "",
39     /* Adjective endings */
40     "", "", "e", "e"
41 };
42 
43 static int offsets[NUMPARTS] = { 0, 0, 8, 16 };
44 static int cnts[NUMPARTS] = { 0, 8, 8, 4 };
45 static char msgbuf[256];
46 
47 #define NUMPREPS	15
48 
49 static struct {
50     char *str;
51     int strlen;
52 } prepositions[NUMPREPS] = {
53     "to", 2,
54     "at", 2,
55     "of", 2,
56     "on", 2,
57     "off", 3,
58     "in", 2,
59     "out", 3,
60     "up", 2,
61     "down", 4,
62     "from", 4,
63     "with", 4,
64     "into", 4,
65     "for", 3,
66     "about", 5,
67     "between", 7,
68 };
69 
70 static FILE *exc_fps[NUMPARTS + 1];
71 
72 static int do_init();
73 static int strend(char *, char *);
74 static char *wordbase(char *, int);
75 static int hasprep(char *, int);
76 static char *exc_lookup(char *, int);
77 static char *morphprep(char *);
78 
79 /* Open exception list files */
80 
morphinit(void)81 int morphinit(void)
82 {
83     static int done = 0;
84     static int openerr = 0;
85 
86     if (!done) {
87       if (OpenDB) {		/* make sure WN database files are open */
88             if (!(openerr = do_init()))
89 	        done = 1;
90 	} else
91 	    openerr = -1;
92     }
93 
94     return(openerr);
95 }
96 
97 /* Close exception list files and reopen */
re_morphinit(void)98 int re_morphinit(void)
99 {
100     int i;
101 
102     for (i = 1; i <= NUMPARTS; i++) {
103 	if (exc_fps[i] != NULL) {
104 	    fclose(exc_fps[i]); exc_fps[i] = NULL;
105 	}
106     }
107 
108     return(OpenDB ? do_init() : -1);
109 }
110 
do_init(void)111 static int do_init(void)
112 {
113     int i, openerr;
114     char *env;
115     char searchdir[256], fname[256];
116 
117     openerr = 0;
118 
119     /* Find base directory for database.  If set, use WNSEARCHDIR.
120        If not set, check for WNHOME/dict, otherwise use DEFAULTPATH. */
121 
122     if ((env = getenv("WNSEARCHDIR")) != NULL)
123 	strcpy(searchdir, env);
124     else if ((env = getenv("WNHOME")) != NULL)
125 	sprintf(searchdir, "%s%s", env, DICTDIR);
126     else
127 	strcpy(searchdir, DEFAULTPATH);
128 
129     for (i = 1; i <= NUMPARTS; i++) {
130 	sprintf(fname, EXCFILE, searchdir, partnames[i]);
131 	if ((exc_fps[i] = fopen(fname, "r")) == NULL) {
132 	    sprintf(msgbuf,
133 		    "WordNet library error: Can't open exception file(%s)\n\n",
134 		    fname);
135 	    display_message(msgbuf);
136 	    openerr = -1;
137 	}
138     }
139     return(openerr);
140 }
141 
142 /* Try to find baseform (lemma) of word or collocation in POS.
143    Works like strtok() - first call is with string, subsequent calls
144    with NULL argument return additional baseforms for original string. */
145 
morphstr(char * origstr,int pos)146 char *morphstr(char *origstr, int pos)
147 {
148     static char searchstr[WORDBUF], str[WORDBUF];
149     static int svcnt, svprep;
150     char word[WORDBUF], *tmp;
151     int cnt, st_idx = 0, end_idx;
152     int prep;
153     char *end_idx1, *end_idx2;
154     char *append;
155 
156     if (pos == SATELLITE)
157 	pos = ADJ;
158 
159     /* First time through for this string */
160 
161     if (origstr != NULL) {
162 	/* Assume string hasn't had spaces substitued with '_' */
163 	strtolower(strsubst(strcpy(str, origstr), ' ', '_'));
164 	searchstr[0] = '\0';
165 	cnt = cntwords(str, '_');
166 	svprep = 0;
167 
168 	/* first try exception list */
169 
170 	if ((tmp = exc_lookup(str, pos)) && strcmp(tmp, str)) {
171 	    svcnt = 1;		/* force next time to pass NULL */
172 	    return(tmp);
173 	}
174 
175 	/* Then try simply morph on original string */
176 
177 	if (pos != VERB && (tmp = morphword(str, pos)) && strcmp(tmp, str))
178 	    return(tmp);
179 
180 	if (pos == VERB && cnt > 1 && (prep = hasprep(str, cnt))) {
181 	    /* assume we have a verb followed by a preposition */
182 	    svprep = prep;
183 	    return(morphprep(str));
184 	} else {
185 	    svcnt = cnt = cntwords(str, '-');
186 	    while (origstr && --cnt) {
187 		end_idx1 = strchr(str + st_idx, '_');
188 		end_idx2 = strchr(str + st_idx, '-');
189 		if (end_idx1 && end_idx2) {
190 		    if (end_idx1 < end_idx2) {
191 			end_idx = (int)(end_idx1 - str);
192 			append = "_";
193 		    } else {
194 			end_idx = (int)(end_idx2 - str);
195 			append = "-";
196 		    }
197 		} else {
198 		    if (end_idx1) {
199 			end_idx = (int)(end_idx1 - str);
200 			append = "_";
201 		    } else {
202 			end_idx = (int)(end_idx2 - str);
203 			append = "-";
204 		    }
205 		}
206 		if (end_idx < 0) return(NULL);		/* shouldn't do this */
207 		strncpy(word, str + st_idx, end_idx - st_idx);
208 		word[end_idx - st_idx] = '\0';
209 		if(tmp = morphword(word, pos))
210 		    strcat(searchstr,tmp);
211 		else
212 		    strcat(searchstr,word);
213 		strcat(searchstr, append);
214 		st_idx = end_idx + 1;
215 	    }
216 
217 	    if(tmp = morphword(strcpy(word, str + st_idx), pos))
218 		strcat(searchstr,tmp);
219 	    else
220 		strcat(searchstr,word);
221 	    if(strcmp(searchstr, str) && is_defined(searchstr,pos))
222 		return(searchstr);
223 	    else
224 		return(NULL);
225 	}
226     } else {			/* subsequent call on string */
227 	if (svprep) {		/* if verb has preposition, no more morphs */
228 	    svprep = 0;
229 	    return(NULL);
230 	} else if (svcnt == 1)
231 	    return(exc_lookup(NULL, pos));
232 	else {
233 	    svcnt = 1;
234 	    if ((tmp = exc_lookup(str, pos)) && strcmp(tmp, str))
235 		return(tmp);
236 	    else
237 		return(NULL);
238 	}
239     }
240 }
241 
242 /* Try to find baseform (lemma) of individual word in POS */
morphword(char * word,int pos)243 char *morphword(char *word, int pos)
244 {
245     int offset, cnt;
246     int i;
247     static char retval[WORDBUF];
248     char *tmp, tmpbuf[WORDBUF], *end;
249 
250     sprintf(retval,"");
251     sprintf(tmpbuf, "");
252     end = "";
253 
254     if(word == NULL)
255 	return(NULL);
256 
257     /* first look for word on exception list */
258 
259     if((tmp = exc_lookup(word, pos)) != NULL)
260 	return(tmp);		/* found it in exception list */
261 
262     if (pos == ADV) {		/* only use exception list for adverbs */
263 	return(NULL);
264     }
265     if (pos == NOUN) {
266 	if (strend(word, "ful")) {
267 	    cnt = strrchr(word, 'f') - word;
268 	    strncat(tmpbuf, word, cnt);
269 	    end = "ful";
270 	} else
271 	    /* check for noun ending with 'ss' or short words */
272 	    if (strend(word, "ss") || (strlen(word) <= 2))
273 		return(NULL);
274     }
275 
276 /* If not in exception list, try applying rules from tables */
277 
278     if (tmpbuf[0] == '\0')
279 	strcpy(tmpbuf, word);
280 
281     offset = offsets[pos];
282     cnt = cnts[pos];
283 
284     for(i = 0; i < cnt; i++){
285 	strcpy(retval, wordbase(tmpbuf, (i + offset)));
286 	if(strcmp(retval, tmpbuf) && is_defined(retval, pos)) {
287 	    strcat(retval, end);
288 	    return(retval);
289 	}
290     }
291     return(NULL);
292 }
293 
strend(char * str1,char * str2)294 static int strend(char *str1, char *str2)
295 {
296     char *pt1;
297 
298     if(strlen(str2) >= strlen(str1))
299 	return(0);
300     else {
301 	pt1=str1;
302 	pt1=strchr(str1,0);
303 	pt1=pt1-strlen(str2);
304 	return(!strcmp(pt1,str2));
305     }
306 }
307 
wordbase(char * word,int ender)308 static char *wordbase(char *word, int ender)
309 {
310     char *pt1;
311     static char copy[WORDBUF];
312 
313     strcpy(copy, word);
314     if(strend(copy,sufx[ender])) {
315 	pt1=strchr(copy,'\0');
316 	pt1 -= strlen(sufx[ender]);
317 	*pt1='\0';
318 	strcat(copy,addr[ender]);
319     }
320     return(copy);
321 }
322 
hasprep(char * s,int wdcnt)323 static int hasprep(char *s, int wdcnt)
324 {
325     /* Find a preposition in the verb string and return its
326        corresponding word number. */
327 
328     int i, wdnum;
329 
330     for (wdnum = 2; wdnum <= wdcnt; wdnum++) {
331 	s = strchr(s, '_');
332 	for (s++, i = 0; i < NUMPREPS; i++)
333 	    if (!strncmp(s, prepositions[i].str, prepositions[i].strlen) &&
334 		(s[prepositions[i].strlen] == '_' ||
335 		 s[prepositions[i].strlen] == '\0'))
336 		return(wdnum);
337     }
338     return(0);
339 }
340 
exc_lookup(char * word,int pos)341 static char *exc_lookup(char *word, int pos)
342 {
343     static char line[WORDBUF], *beglp, *endlp;
344     char *excline;
345     int found = 0;
346 
347     if (exc_fps[pos] == NULL)
348 	return(NULL);
349 
350     /* first time through load line from exception file */
351     if(word != NULL){
352 	if ((excline = bin_search(word, exc_fps[pos])) != NULL) {
353 	    strcpy(line, excline);
354 	    endlp = strchr(line,' ');
355 	} else
356 	    endlp = NULL;
357     }
358     if(endlp && *(endlp + 1) != ' '){
359 	beglp = endlp + 1;
360 	while(*beglp && *beglp == ' ') beglp++;
361 	endlp = beglp;
362 	while(*endlp && *endlp != ' ' && *endlp != '\n') endlp++;
363 	if(endlp != beglp){
364 	    *endlp='\0';
365 	    return(beglp);
366 	}
367     }
368     beglp = NULL;
369     endlp = NULL;
370     return(NULL);
371 }
372 
morphprep(char * s)373 static char *morphprep(char *s)
374 {
375     char *rest, *exc_word, *lastwd = NULL, *last;
376     int i, offset, cnt;
377     char word[WORDBUF], end[WORDBUF];
378     static char retval[WORDBUF];
379 
380     /* Assume that the verb is the first word in the phrase.  Strip it
381        off, check for validity, then try various morphs with the
382        rest of the phrase tacked on, trying to find a match. */
383 
384     rest = strchr(s, '_');
385     last = strrchr(s, '_');
386     if (rest != last) {		/* more than 2 words */
387 	if (lastwd = morphword(last + 1, NOUN)) {
388 	    strncpy(end, rest, last - rest + 1);
389 	    end[last-rest+1] = '\0';
390 	    strcat(end, lastwd);
391 	}
392     }
393 
394     strncpy(word, s, rest - s);
395     word[rest - s] = '\0';
396     for (i = 0, cnt = strlen(word); i < cnt; i++)
397 	if (!isalnum((unsigned char)(word[i]))) return(NULL);
398 
399     offset = offsets[VERB];
400     cnt = cnts[VERB];
401 
402     /* First try to find the verb in the exception list */
403 
404     if ((exc_word = exc_lookup(word, VERB)) &&
405 	strcmp(exc_word, word)) {
406 
407 	sprintf(retval, "%s%s", exc_word, rest);
408 	if(is_defined(retval, VERB))
409 	    return(retval);
410 	else if (lastwd) {
411 	    sprintf(retval, "%s%s", exc_word, end);
412 	    if(is_defined(retval, VERB))
413 		return(retval);
414 	}
415     }
416 
417     for (i = 0; i < cnt; i++) {
418 	if ((exc_word = wordbase(word, (i + offset))) &&
419 	    strcmp(word, exc_word)) { /* ending is different */
420 
421 	    sprintf(retval, "%s%s", exc_word, rest);
422 	    if(is_defined(retval, VERB))
423 		return(retval);
424 	    else if (lastwd) {
425 		sprintf(retval, "%s%s", exc_word, end);
426 		if(is_defined(retval, VERB))
427 		    return(retval);
428 	    }
429 	}
430     }
431     sprintf(retval, "%s%s", word, rest);
432     if (strcmp(s, retval))
433 	return(retval);
434     if (lastwd) {
435 	sprintf(retval, "%s%s", word, end);
436 	if (strcmp(s, retval))
437 	    return(retval);
438     }
439     return(NULL);
440 }
441 
442 /*
443  * Revision 1.1  91/09/25  15:39:47  wn
444  * Initial revision
445  *
446  */
447