1 /*
2 
3   morph.c - WordNet search code morphology functions
4 
5 */
6 
7 #include <stdio.h>
8 #include <ctype.h>
9 #include <string.h>
10 #include <stdlib.h>
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14 #include "wn.h"
15 
16 #ifdef _WINDOWS
17 #include <windows.h>
18 #include <windowsx.h>
19 #define EXCFILE	"%s\\%s.exc"
20 #else
21 #define EXCFILE	"%s/%s.exc"
22 #endif
23 
24 __FBSDID("$Id: morph.c,v 1.67 2006/11/14 21:00:23 wn Exp $");
25 
26 static const char *sufx[] ={
27     /* Noun suffixes */
28     "s", "ses", "xes", "zes", "ches", "shes", "men", "ies",
29     /* Verb suffixes */
30     "s", "ies", "es", "es", "ed", "ed", "ing", "ing",
31     /* Adjective suffixes */
32     "er", "est", "er", "est"
33 };
34 
35 static const char *addr[] ={
36     /* Noun endings */
37     "", "s", "x", "z", "ch", "sh", "man", "y",
38     /* Verb endings */
39     "", "y", "e", "", "e", "", "e", "",
40     /* Adjective endings */
41     "", "", "e", "e"
42 };
43 
44 static int offsets[NUMPARTS] = { 0, 0, 8, 16 };
45 static int cnts[NUMPARTS] = { 0, 8, 8, 4 };
46 static char msgbuf[256];
47 
48 
49 static struct {
50     const char *str;
51     int strlen;
52 } prepositions[] = {
53     { "to", 2 },
54     { "at", 2 },
55     { "of", 2 },
56     { "on", 2 },
57     { "off", 3 },
58     { "in", 2 },
59     { "out", 3 },
60     { "up", 2 },
61     { "down", 4 },
62     { "from", 4 },
63     { "with", 4 },
64     { "into", 4 },
65     { "for", 3 },
66     { "about", 5 },
67     { "between", 7 }
68 };
69 
70 #define NUMPREPS	sizeof(prepositions)/sizeof(prepositions[0])
71 
72 static FILE *exc_fps[NUMPARTS + 1];
73 
74 static int do_init(void);
75 static int strend(const char *, const char *);
76 static const char *wordbase(const char *, int);
77 static int hasprep(const char *, unsigned int);
78 static const char *exc_lookup(const char *, int);
79 static const char *morphprep(const char *);
80 
81 /* Open exception list files */
82 
morphinit(void)83 int morphinit(void)
84 {
85     static int done = 0;
86     static int openerr = 0;
87 
88     if (!done) {
89       if (OpenDB) {		/* make sure WN database files are open */
90             if (!(openerr = do_init()))
91 	        done = 1;
92 	} else
93 	    openerr = -1;
94     }
95 
96     return(openerr);
97 }
98 
99 /* Close exception list files and reopen */
re_morphinit(void)100 int re_morphinit(void)
101 {
102     int i;
103 
104     for (i = 1; i <= NUMPARTS; i++) {
105 	if (exc_fps[i] != NULL) {
106 	    fclose(exc_fps[i]); exc_fps[i] = NULL;
107 	}
108     }
109 
110     return(OpenDB ? do_init() : -1);
111 }
112 
do_init(void)113 static int do_init(void)
114 {
115     int i, openerr;
116 #ifdef _WINDOWS
117     HKEY hkey;
118     DWORD dwType, dwSize;
119 #else
120     char *env;
121 #endif
122     char searchdir[256], fname[256];
123 
124     openerr = 0;
125 
126     /* Find base directory for database.  If set, use WNSEARCHDIR.
127        If not set, check for WNHOME/dict, otherwise use DEFAULTPATH. */
128 
129 #ifdef _WINDOWS
130     if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, TEXT("Software\\WordNet\\3.0"),
131 		     0, KEY_READ, &hkey) == ERROR_SUCCESS) {
132 	dwSize = sizeof(searchdir);
133 	RegQueryValueEx(hkey, TEXT("WNHome"),
134 			NULL, &dwType, searchdir, &dwSize);
135 	RegCloseKey(hkey);
136 	strcat(searchdir, DICTDIR);
137     }
138     else if (RegOpenKeyEx(HKEY_CURRENT_USER, TEXT("Software\\WordNet\\3.0"),
139 		     0, KEY_READ, &hkey) == ERROR_SUCCESS) {
140 	dwSize = sizeof(searchdir);
141 	RegQueryValueEx(hkey, TEXT("WNHome"),
142 			NULL, &dwType, searchdir, &dwSize);
143 	RegCloseKey(hkey);
144 	strcat(searchdir, DICTDIR);
145     } else
146 	sprintf(searchdir, DEFAULTPATH);
147 #else
148     if ((env = getenv("WNSEARCHDIR")) != NULL)
149 	strcpy(searchdir, env);
150     else if ((env = getenv("WNHOME")) != NULL)
151 	sprintf(searchdir, "%s%s", env, DICTDIR);
152     else
153 	strcpy(searchdir, DEFAULTPATH);
154 #endif
155 
156     for (i = 1; i <= NUMPARTS; i++) {
157 	sprintf(fname, EXCFILE, searchdir, partnames[i]);
158 	if ((exc_fps[i] = fopen(fname, "r")) == NULL) {
159 	    sprintf(msgbuf,
160 		    "WordNet library error: Can't open exception file(%s)\n\n",
161 		    fname);
162 	    display_message(msgbuf);
163 	    openerr = -1;
164 	}
165     }
166     return(openerr);
167 }
168 
169 /* Try to find baseform (lemma) of word or collocation in POS.
170    Works like strtok() - first call is with string, subsequent calls
171    with NULL argument return additional baseforms for original string. */
172 
173 const char *
morphstr(const char * origstr,int pos)174 morphstr(const char *origstr, int pos)
175 {
176     static char searchstr[WORDBUF], str[WORDBUF];
177     static int svcnt, svprep;
178     char word[WORDBUF];
179     const char *tmp;
180     int cnt, st_idx = 0, end_idx;
181     int prep;
182     const char *end_idx1, *end_idx2;
183     const char *append;
184 
185     if (pos == SATELLITE)
186 	pos = ADJ;
187 
188     /* First time through for this string */
189 
190     if (origstr != NULL) {
191 	/* Assume string hasn't had spaces substitued with '_' */
192 	strtolower(strsubst(strcpy(str, origstr), ' ', '_'));
193 	searchstr[0] = '\0';
194 	cnt = cntwords(str, '_');
195 	svprep = 0;
196 
197 	/* first try exception list */
198 
199 	if ((tmp = exc_lookup(str, pos)) && strcmp(tmp, str)) {
200 	    svcnt = 1;		/* force next time to pass NULL */
201 	    return(tmp);
202 	}
203 
204 	/* Then try simply morph on original string */
205 
206 	if (pos != VERB && (tmp = morphword(str, pos)) && strcmp(tmp, str))
207 	    return(tmp);
208 
209 	if (pos == VERB && cnt > 1 && (prep = hasprep(str, cnt))) {
210 	    /* assume we have a verb followed by a preposition */
211 	    svprep = prep;
212 	    return(morphprep(str));
213 	} else {
214 	    svcnt = cnt = cntwords(str, '-');
215 	    while (origstr && --cnt) {
216 		end_idx1 = strchr(str + st_idx, '_');
217 		end_idx2 = strchr(str + st_idx, '-');
218 		if (end_idx1 && end_idx2) {
219 		    if (end_idx1 < end_idx2) {
220 			end_idx = (int)(end_idx1 - str);
221 			append = "_";
222 		    } else {
223 			end_idx = (int)(end_idx2 - str);
224 			append = "-";
225 		    }
226 		} else {
227 		    if (end_idx1) {
228 			end_idx = (int)(end_idx1 - str);
229 			append = "_";
230 		    } else {
231 			end_idx = (int)(end_idx2 - str);
232 			append = "-";
233 		    }
234 		}
235 		if (end_idx < 0) return(NULL);		/* shouldn't do this */
236 		strncpy(word, str + st_idx, end_idx - st_idx);
237 		word[end_idx - st_idx] = '\0';
238 		tmp = morphword(word, pos);
239 		if(tmp)
240 		    strcat(searchstr,tmp);
241 		else
242 		    strcat(searchstr,word);
243 		strcat(searchstr, append);
244 		st_idx = end_idx + 1;
245 	    }
246 
247 	    tmp = morphword(strcpy(word, str + st_idx), pos);
248 	    if(tmp)
249 		strcat(searchstr,tmp);
250 	    else
251 		strcat(searchstr,word);
252 	    if(strcmp(searchstr, str) && is_defined(searchstr,pos))
253 		return(searchstr);
254 	    else
255 		return(NULL);
256 	}
257     } else {			/* subsequent call on string */
258 	if (svprep) {		/* if verb has preposition, no more morphs */
259 	    svprep = 0;
260 	    return(NULL);
261 	} else if (svcnt == 1)
262 	    return(exc_lookup(NULL, pos));
263 	else {
264 	    svcnt = 1;
265 	    if ((tmp = exc_lookup(str, pos)) && strcmp(tmp, str))
266 		return(tmp);
267 	    else
268 		return(NULL);
269 	}
270     }
271 }
272 
273 /* Try to find baseform (lemma) of individual word in POS */
274 const char *
morphword(const char * word,int pos)275 morphword(const char *word, int pos)
276 {
277     int offset, cnt;
278     int i;
279     static char retval[WORDBUF];
280     char tmpbuf[WORDBUF];
281     const char *tmp, *end;
282 
283     retval[0] = tmpbuf[0] = '\0';
284     end = "";
285 
286     if(word == NULL)
287 	return(NULL);
288 
289     /* first look for word on exception list */
290 
291     if((tmp = exc_lookup(word, pos)) != NULL)
292 	return(tmp);		/* found it in exception list */
293 
294     if (pos == ADV) {		/* only use exception list for adverbs */
295 	return(NULL);
296     }
297     if (pos == NOUN) {
298 	if (strend(word, "ful")) {
299 	    cnt = strrchr(word, 'f') - word;
300 	    strncat(tmpbuf, word, cnt);
301 	    end = "ful";
302 	} else
303 	    /* check for noun ending with 'ss' or short words */
304 	    if (strend(word, "ss") || (strlen(word) <= 2))
305 		return(NULL);
306     }
307 
308 /* If not in exception list, try applying rules from tables */
309 
310     if (tmpbuf[0] == '\0')
311 	strcpy(tmpbuf, word);
312 
313     offset = offsets[pos];
314     cnt = cnts[pos];
315 
316     for(i = 0; i < cnt; i++){
317 	strcpy(retval, wordbase(tmpbuf, (i + offset)));
318 	if(strcmp(retval, tmpbuf) && is_defined(retval, pos)) {
319 	    strcat(retval, end);
320 	    return(retval);
321 	}
322     }
323     return(NULL);
324 }
325 
strend(const char * str1,const char * str2)326 static int strend(const char *str1, const char *str2)
327 {
328     const char *pt1;
329 
330     if(strlen(str2) >= strlen(str1))
331 	return(0);
332     else {
333 	pt1=str1;
334 	pt1=strchr(str1,0);
335 	pt1=pt1-strlen(str2);
336 	return(!strcmp(pt1,str2));
337     }
338 }
339 
340 static const char *
wordbase(const char * word,int ender)341 wordbase(const char *word, int ender)
342 {
343     char *pt1;
344     static char copy[WORDBUF];
345 
346     strcpy(copy, word);
347     if(strend(copy,sufx[ender])) {
348 	pt1=strchr(copy,'\0');
349 	pt1 -= strlen(sufx[ender]);
350 	*pt1='\0';
351 	strcat(copy,addr[ender]);
352     }
353     return(copy);
354 }
355 
hasprep(const char * s,unsigned int wdcnt)356 static int hasprep(const char *s, unsigned int wdcnt)
357 {
358     /* Find a preposition in the verb string and return its
359        corresponding word number. */
360 
361     unsigned int i, wdnum;
362 
363     for (wdnum = 2; wdnum <= wdcnt; wdnum++) {
364 	s = strchr(s, '_');
365 	for (s++, i = 0; i < NUMPREPS; i++)
366 	    if (!strncmp(s, prepositions[i].str, prepositions[i].strlen) &&
367 		(s[prepositions[i].strlen] == '_' ||
368 		 s[prepositions[i].strlen] == '\0'))
369 		return(wdnum);
370     }
371     return(0);
372 }
373 
374 static const char *
exc_lookup(const char * word,int pos)375 exc_lookup(const char *word, int pos)
376 {
377     static char line[WORDBUF], *beglp, *endlp;
378     const char *excline;
379 
380     if (exc_fps[pos] == NULL)
381 	return(NULL);
382 
383     /* first time through load line from exception file */
384     if(word != NULL){
385 	if ((excline = bin_search(word, exc_fps[pos])) != NULL) {
386 	    strcpy(line, excline);
387 	    endlp = strchr(line,' ');
388 	} else
389 	    endlp = NULL;
390     }
391     if(endlp && *(endlp + 1) != ' '){
392 	beglp = endlp + 1;
393 	while(*beglp && *beglp == ' ') beglp++;
394 	endlp = beglp;
395 	while(*endlp && *endlp != ' ' && *endlp != '\n') endlp++;
396 	if(endlp != beglp){
397 	    *endlp='\0';
398 	    return(beglp);
399 	}
400     }
401     beglp = NULL;
402     endlp = NULL;
403     return(NULL);
404 }
405 
406 static const char *
morphprep(const char * s)407 morphprep(const char *s)
408 {
409     const char *rest, *exc_word, *lastwd = NULL, *last;
410     int i, offset, cnt;
411     char word[WORDBUF], end[WORDBUF];
412     static char retval[WORDBUF];
413 
414     /* Assume that the verb is the first word in the phrase.  Strip it
415        off, check for validity, then try various morphs with the
416        rest of the phrase tacked on, trying to find a match. */
417 
418     rest = strchr(s, '_');
419     last = strrchr(s, '_');
420     if (rest != last) {		/* more than 2 words */
421 	lastwd = morphword(last + 1, NOUN);
422 	if (lastwd) {
423 	    strncpy(end, rest, last - rest + 1);
424 	    end[last-rest+1] = '\0';
425 	    strcat(end, lastwd);
426 	}
427     }
428 
429     strncpy(word, s, rest - s);
430     word[rest - s] = '\0';
431     for (i = 0, cnt = strlen(word); i < cnt; i++)
432 	if (!isalnum((unsigned char)(word[i]))) return(NULL);
433 
434     offset = offsets[VERB];
435     cnt = cnts[VERB];
436 
437     /* First try to find the verb in the exception list */
438 
439     if ((exc_word = exc_lookup(word, VERB)) &&
440 	strcmp(exc_word, word)) {
441 
442 	sprintf(retval, "%s%s", exc_word, rest);
443 	if(is_defined(retval, VERB))
444 	    return(retval);
445 	else if (lastwd) {
446 	    sprintf(retval, "%s%s", exc_word, end);
447 	    if(is_defined(retval, VERB))
448 		return(retval);
449 	}
450     }
451 
452     for (i = 0; i < cnt; i++) {
453 	if ((exc_word = wordbase(word, (i + offset))) &&
454 	    strcmp(word, exc_word)) { /* ending is different */
455 
456 	    sprintf(retval, "%s%s", exc_word, rest);
457 	    if(is_defined(retval, VERB))
458 		return(retval);
459 	    else if (lastwd) {
460 		sprintf(retval, "%s%s", exc_word, end);
461 		if(is_defined(retval, VERB))
462 		    return(retval);
463 	    }
464 	}
465     }
466     sprintf(retval, "%s%s", word, rest);
467     if (strcmp(s, retval))
468 	return(retval);
469     if (lastwd) {
470 	sprintf(retval, "%s%s", word, end);
471 	if (strcmp(s, retval))
472 	    return(retval);
473     }
474     return(NULL);
475 }
476 
477 /*
478  * Revision 1.1  91/09/25  15:39:47  wn
479  * Initial revision
480  *
481  */
482