1 /*
2
3 wnutil.c - utility functions used by WordNet code
4
5 */
6
7 #ifdef _WINDOWS
8 #include <windows.h>
9 #include <windowsx.h>
10 #endif
11
12 #include <stdio.h>
13 #include <ctype.h>
14
15 #ifdef __unix__
16 #ifndef __MACH__
17 #include <stdlib.h>
18 #include <stdint.h>
19 #endif
20 #endif
21
22 #include <assert.h>
23 #include <string.h>
24 #include <stdlib.h>
25
26 #ifdef HAVE_CONFIG_H
27 #include "config.h"
28 #endif
29 #include "wn.h"
30
31 static int do_init(void);
32
33 static char msgbuf[256]; /* buffer for constructing error messages */
34
35 /* used by the strstr wrapper functions */
36 static char *strstr_word;
37 static char *strstr_stringstart;
38 static char *strstr_stringcurrent;
39
40
41 /* Initialization functions */
42
43 static void closefps(void);
44
wninit(void)45 int wninit(void)
46 {
47 static int done = 0;
48 static int openerr = 0;
49 char *env;
50
51 if (!done) {
52 if ((env = getenv("WNDBVERSION"))) {
53 wnrelease = env; /* set release */
54 }
55 openerr = do_init();
56 if (!openerr) {
57 done = 1;
58 OpenDB = 1;
59 openerr = morphinit();
60 }
61 }
62
63 return(openerr);
64 }
65
re_wninit(void)66 int re_wninit(void)
67 {
68 int openerr;
69 char *env;
70
71 closefps();
72
73 if ((env = getenv("WNDBVERSION"))) {
74 wnrelease = env; /* set release */
75 }
76 openerr = do_init();
77 if (!openerr) {
78 OpenDB = 1;
79 openerr = re_morphinit();
80 }
81
82 return(openerr);
83 }
84
closefps(void)85 static void closefps(void)
86 {
87 int i;
88
89 if (OpenDB) {
90 for (i = 1; i < NUMPARTS + 1; i++) {
91 if (datafps[i] != NULL) {
92 fclose(datafps[i]);
93 datafps[i] = NULL;
94 }
95 if (indexfps[i] != NULL) {
96 fclose(indexfps[i]);
97 indexfps[i] = NULL;
98 }
99 }
100 if (sensefp != NULL) {
101 fclose(sensefp); sensefp = NULL;
102 }
103 if (cntlistfp != NULL) {
104 fclose(cntlistfp); cntlistfp = NULL;
105 }
106 if (keyindexfp != NULL) {
107 fclose(keyindexfp); keyindexfp = NULL;
108 }
109 if (vsentfilefp != NULL) {
110 fclose(vsentfilefp); vsentfilefp = NULL;
111 }
112 if (vidxfilefp != NULL) {
113 fclose(vidxfilefp); vidxfilefp = NULL;
114 }
115 OpenDB = 0;
116 }
117 }
118
do_init(void)119 static int do_init(void)
120 {
121 int i, openerr;
122 char searchdir[256], tmpbuf[256];
123
124 #ifdef _WINDOWS
125 HKEY hkey;
126 DWORD dwType, dwSize;
127 #else
128 char *env;
129 #endif
130
131 openerr = 0;
132
133 /* Find base directory for database. If set, use WNSEARCHDIR.
134 If not set, check for WNHOME/dict, otherwise use DEFAULTPATH. */
135
136 #ifdef _WINDOWS
137 if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, TEXT("Software\\WordNet\\3.0"),
138 0, KEY_READ, &hkey) == ERROR_SUCCESS) {
139 dwSize = sizeof(searchdir);
140 RegQueryValueEx(hkey, TEXT("WNHome"),
141 NULL, &dwType, searchdir, &dwSize);
142 RegCloseKey(hkey);
143 strcat(searchdir, DICTDIR);
144 } else if (RegOpenKeyEx(HKEY_CURRENT_USER, TEXT("Software\\WordNet\\3.0"),
145 0, KEY_READ, &hkey) == ERROR_SUCCESS) {
146 dwSize = sizeof(searchdir);
147 RegQueryValueEx(hkey, TEXT("WNHome"),
148 NULL, &dwType, searchdir, &dwSize);
149 RegCloseKey(hkey);
150 strcat(searchdir, DICTDIR);
151 } else
152 sprintf(searchdir, DEFAULTPATH);
153 #else
154 if ((env = getenv("WNSEARCHDIR")) != NULL)
155 strcpy(searchdir, env);
156 else if ((env = getenv("WNHOME")) != NULL)
157 sprintf(searchdir, "%s%s", env, DICTDIR);
158 else
159 strcpy(searchdir, DEFAULTPATH);
160 #endif
161
162 for (i = 1; i < NUMPARTS + 1; i++) {
163 sprintf(tmpbuf, DATAFILE, searchdir, partnames[i]);
164 if((datafps[i] = fopen(tmpbuf, "r")) == NULL) {
165 sprintf(msgbuf,
166 "WordNet library error: Can't open datafile(%s)\n",
167 tmpbuf);
168 display_message(msgbuf);
169 openerr = -1;
170 }
171 sprintf(tmpbuf, INDEXFILE, searchdir, partnames[i]);
172 if((indexfps[i] = fopen(tmpbuf, "r")) == NULL) {
173 sprintf(msgbuf,
174 "WordNet library error: Can't open indexfile(%s)\n",
175 tmpbuf);
176 display_message(msgbuf);
177 openerr = -1;
178 }
179 }
180
181 /* This file isn't used by the library and doesn't have to
182 be present. No error is reported if the open fails. */
183
184 sprintf(tmpbuf, SENSEIDXFILE, searchdir);
185 sensefp = fopen(tmpbuf, "r");
186
187 /* If this file isn't present, the runtime code will skip printint out
188 the number of times each sense was tagged. */
189
190 sprintf(tmpbuf, CNTLISTFILE, searchdir);
191 cntlistfp = fopen(tmpbuf, "r");
192
193 /* This file doesn't have to be present. No error is reported if the
194 open fails. */
195
196 sprintf(tmpbuf, KEYIDXFILE, searchdir);
197 keyindexfp = fopen(tmpbuf, "r");
198
199 sprintf(tmpbuf, REVKEYIDXFILE, searchdir);
200 revkeyindexfp = fopen(tmpbuf, "r");
201
202 sprintf(tmpbuf, VRBSENTFILE, searchdir);
203 if ((vsentfilefp = fopen(tmpbuf, "r")) == NULL) {
204 sprintf(msgbuf,
205 "WordNet library warning: Can't open verb example sentence file(%s)\n",
206 tmpbuf);
207 display_message(msgbuf);
208 }
209
210 sprintf(tmpbuf, VRBIDXFILE, searchdir);
211 if ((vidxfilefp = fopen(tmpbuf, "r")) == NULL) {
212 sprintf(msgbuf,
213 "WordNet library warning: Can't open verb example sentence index file(%s)\n",
214 tmpbuf);
215 display_message(msgbuf);
216 }
217
218 return(openerr);
219 }
220
221 /* Count the number of underscore or space separated words in a string. */
222
cntwords(char * s,char separator)223 int cntwords(char *s, char separator)
224 {
225 register int wdcnt = 0;
226
227 while (*s) {
228 if (*s == separator || *s == ' ' || *s == '_') {
229 wdcnt++;
230 while (*s && (*s == separator || *s == ' ' || *s == '_'))
231 s++;
232 } else
233 s++;
234 }
235 return(++wdcnt);
236 }
237
238 /* Convert string to lower case remove trailing adjective marker if found */
239
strtolower(char * str)240 char *strtolower(char *str)
241 {
242 register char *s = str;
243
244 while(*s != '\0') {
245 if(*s >= 'A' && *s <= 'Z')
246 *s += 32;
247 else if(*s == '(') {
248 *s='\0';
249 break;
250 }
251 s++;
252 }
253 return(str);
254 }
255
strtolower2(const char * from,char * to)256 char *strtolower2(const char *from, char *to)
257 {
258
259 char *t = to;
260
261 do {
262 if(*from >= 'A' && *from <= 'Z')
263 *t = *from++ + 32;
264 else if(*from == '(')
265 *t = '\0';
266 else
267 *t = *from++;
268 } while (*t++);
269 return(to);
270 }
271
272 /* Convert string passed to lower case */
273
ToLowerCase(char * str)274 char *ToLowerCase(char *str)
275 {
276 register char *s = str;
277
278 while(*s != '\0') {
279 if(*s >= 'A' && *s <= 'Z')
280 *s += 32;
281 s++;
282 }
283 return(str);
284 }
285
286 /* Replace all occurences of 'from' with 'to' in 'str' */
287
strsubst(char * str,char from,char to)288 char *strsubst(char *str, char from, char to)
289 {
290 register char *p;
291
292 for (p = str; *p != 0; ++p)
293 if (*p == from)
294 *p = to;
295 return str;
296 }
297
298 /* Return pointer code for pointer type characer passed. */
299
300 unsigned short
getptrtype(const char * ptrstr,char ** end)301 getptrtype(const char *ptrstr, char **end)
302 {
303 unsigned short i;
304 const char *ptype, *pstr;
305 for(i = 1; i <= MAXPTR; i++) {
306 ptype = ptrtyp[i];
307 pstr = ptrstr;
308 while (*pstr == *ptype && *ptype) {
309 pstr++;
310 ptype++;
311 }
312 if (*ptype == '\0' &&
313 (*pstr == '\n' || *pstr == ' ' || *pstr == '\0')) {
314 if (end)
315 *end = __DECONST(char *, pstr);
316 return(i);
317 }
318 }
319 fprintf(stderr, "Could not find the type of %s\n", ptrstr);
320 return(0);
321 }
322
323 /* Return part of speech code for string passed */
324
325 int
getpos(const char * s)326 getpos(const char *s)
327 {
328 switch (*s) {
329 case 'n':
330 return(NOUN);
331 case 'a':
332 case 's':
333 return(ADJ);
334 case 'v':
335 return(VERB);
336 case 'r':
337 return(ADV);
338 default:
339 sprintf(msgbuf,
340 "WordNet library error: unknown part of speech %s\n", s);
341 display_message(msgbuf);
342 exit(-1);
343 }
344 }
345
346 /* Return synset type code for string passed. */
347
348 int
getsstype(const char * s)349 getsstype(const char *s)
350 {
351 switch (*s) {
352 case 'n':
353 return(NOUN);
354 case 'a':
355 return(ADJ);
356 case 'v':
357 return(VERB);
358 case 's':
359 return(SATELLITE);
360 case 'r':
361 return(ADV);
362 default:
363 sprintf(msgbuf, "WordNet library error: Unknown synset type %s\n", s);
364 display_message(msgbuf);
365 exit(-1);
366 }
367 }
368
369 /* Pass in string for POS, return corresponding integer value */
370
371 int
StrToPos(const char * str)372 StrToPos(const char *str)
373 {
374 if (!strcmp(str, "noun"))
375 return(NOUN);
376 else if (!strcmp(str, "verb"))
377 return(VERB);
378 else if (!strcmp(str, "adj"))
379 return(ADJ);
380 else if (!strcmp(str, "adv"))
381 return(ADV);
382 else {
383 return(-1);
384 }
385 }
386
387 #define MAX_TRIES 5
388
389 /* Find string for 'searchstr' as it is in index file */
390
GetWNStr(char * searchstr,int dbase)391 char *GetWNStr(char *searchstr, int dbase)
392 {
393 register int i, j, k, offset = 0;
394 register char c;
395 char *underscore = NULL, *hyphen = NULL, *period = NULL;
396 static char strings[MAX_TRIES][WORDBUF];
397
398 ToLowerCase(searchstr);
399
400 if (!(underscore = strchr(searchstr, '_')) &&
401 !(hyphen = strchr(searchstr, '-')) &&
402 !(period = strchr(searchstr, '.')))
403 return (strcpy(strings[0],searchstr));
404
405 for(i = 0; i < 3; i++)
406 strcpy(strings[i], searchstr);
407 if (underscore != NULL) strsubst(strings[1], '_', '-');
408 if (hyphen != NULL) strsubst(strings[2], '-', '_');
409 for(i = j = k = 0; (c = searchstr[i]) != '\0'; i++){
410 if(c != '_' && c != '-') strings[3][j++] = c;
411 if(c != '.') strings[4][k++] = c;
412 }
413 strings[3][j] = '\0';
414 strings[4][k] = '\0';
415
416 for(i = 1; i < MAX_TRIES; i++)
417 if(strcmp(strings[0], strings[i]) == 0) strings[i][0] = '\0';
418
419 for (i = (MAX_TRIES - 1); i >= 0; i--)
420 if (strings[i][0] != '\0')
421 if (bin_search(strings[i], indexfps[dbase]) != NULL)
422 offset = i;
423
424 return(strings[offset]);
425 }
426
427 /* Return synset for sense key passed. */
428
429 SynsetPtr
GetSynsetForSense(const char * sensekey)430 GetSynsetForSense(const char *sensekey)
431 {
432 long offset;
433
434 /* Pass in sense key and return parsed sysnet structure */
435
436 if ((offset = GetDataOffset(sensekey)))
437 return(read_synset(GetPOS(sensekey),
438 offset,
439 GetWORD(sensekey)));
440 else
441 return(NULL);
442 }
443
444 /* Find offset of sense key in data file */
445
446 long
GetDataOffset(const char * sensekey)447 GetDataOffset(const char *sensekey)
448 {
449 const char *line;
450
451 /* Pass in encoded sense string, return byte offset of corresponding
452 synset in data file. */
453
454 if (sensefp == NULL) {
455 display_message("WordNet library error: Sense index file not open\n");
456 return(0L);
457 }
458 line = bin_search(sensekey, sensefp);
459 if (line) {
460 while (*line++ != ' ');
461 return(atol(line));
462 } else
463 return(0L);
464 }
465
466 /* Find polysemy count for sense key passed. */
467
468 int
GetPolyCount(const char * sensekey)469 GetPolyCount(const char *sensekey)
470 {
471 IndexPtr idx;
472 int sense_cnt = 0;
473
474 /* Pass in encoded sense string and return polysemy count
475 for word in corresponding POS */
476
477 idx = index_lookup(GetWORD(sensekey), GetPOS(sensekey));
478 if (idx) {
479 sense_cnt = idx->sense_cnt;
480 free_index(idx);
481 }
482 return(sense_cnt);
483 }
484
485 /* Return word part of sense key */
486 const char *
GetWORD(const char * sensekey)487 GetWORD(const char *sensekey)
488 {
489 static char word[100];
490 int i = 0;
491
492 /* Pass in encoded sense string and return WORD */
493
494 while ((word[i++] = *sensekey++) != '%');
495 word[i - 1] = '\0';
496 return(word);
497 }
498
499 /* Return POS code for sense key passed. */
500
501 int
GetPOS(const char * sensekey)502 GetPOS(const char *sensekey)
503 {
504 int pos;
505
506 /* Pass in encoded sense string and return POS */
507
508 while (*sensekey++ != '%'); /* skip over WORD */
509 sscanf(sensekey, "%1d", &pos);
510 return(pos == SATELLITE ? ADJ : pos);
511 }
512
513 /* Reconstruct synset from synset pointer and return ptr to buffer */
514
515 const char *
FmtSynset(SynsetPtr synptr,int defn)516 FmtSynset(SynsetPtr synptr, int defn)
517 {
518 int i;
519 static char synset[SMLINEBUF];
520
521 synset[0] = '\0';
522
523 if (fileinfoflag)
524 sprintf(synset, "<%s> ", lexfiles[synptr->fnum]);
525
526 strcat(synset, "{ ");
527 for (i = 0; i < (synptr->wcount - 1); i++)
528 sprintf(synset + strlen(synset), "%s, ", synptr->words[i]);
529
530 strcat(synset, synptr->words[i]);
531
532 if (defn && synptr->defn)
533 sprintf(synset + strlen(synset), " (%s) ", synptr->defn);
534
535 strcat(synset, " }");
536 return(synset);
537 }
538
539 /* Convert WordNet sense number passed of IndexPtr entry to sense key. */
WNSnsToStr(IndexPtr idx,int sense)540 char *WNSnsToStr(IndexPtr idx, int sense)
541 {
542 SynsetPtr sptr, adjss;
543 char sensekey[512], lowerword[256];
544 int j, sstype, pos;
545
546 pos = getpos(idx->pos);
547 sptr = read_synset(pos, idx->offset[sense - 1], "");
548
549 if ((sstype = getsstype(sptr->pos)) == SATELLITE) {
550 for (j = 0; j < sptr->ptrcount; j++) {
551 if (sptr->ptrtyp[j] == SIMPTR) {
552 adjss = read_synset(sptr->ppos[j],sptr->ptroff[j],"");
553 sptr->headword = malloc (strlen(adjss->words[0]) + 1);
554 assert(sptr->headword);
555 strcpy(sptr->headword, adjss->words[0]);
556 strtolower(sptr->headword);
557 sptr->headsense = adjss->lexid[0];
558 free_synset(adjss);
559 break;
560 }
561 }
562 }
563
564 for (j = 0; j < sptr->wcount; j++) {
565 strcpy(lowerword, sptr->words[j]);
566 strtolower(lowerword);
567 if(!strcmp(lowerword, idx->wd))
568 break;
569 }
570
571 if (j == sptr->wcount) {
572 free_synset(sptr);
573 return(NULL);
574 }
575
576 if (sstype == SATELLITE)
577 sprintf(sensekey,"%s%%%-1.1d:%-2.2d:%-2.2d:%s:%-2.2d",
578 idx->wd, SATELLITE, sptr->fnum,
579 sptr->lexid[j], sptr->headword,sptr->headsense);
580 else
581 sprintf(sensekey,"%s%%%-1.1d:%-2.2d:%-2.2d::",
582 idx->wd, pos, sptr->fnum, sptr->lexid[j]);
583
584 free_synset(sptr);
585 return(strdup(sensekey));
586 }
587
588 /* Return sense number in database for word and lexsn passed. */
589
590 int
GetWNSense(const char * word,const char * lexsn)591 GetWNSense(const char *word, const char *lexsn)
592 {
593 SnsIndexPtr snsidx;
594 char buf[256];
595
596 sprintf(buf, "%s%%%s", word, lexsn); /* create sensekey */
597 if ((snsidx = GetSenseIndex(buf)) != NULL)
598 return(snsidx->wnsense);
599 else
600 return(0);
601 }
602
603 /* Return parsed sense index entry for sense key passed. */
604
605 SnsIndexPtr
GetSenseIndex(const char * sensekey)606 GetSenseIndex(const char *sensekey)
607 {
608 const char *line;
609 char buf[256], loc[9];
610 SnsIndexPtr snsidx = NULL;
611
612 if ((line = bin_search(sensekey, sensefp)) != NULL) {
613 snsidx = (SnsIndexPtr)malloc(sizeof(SnsIndex));
614 assert(snsidx);
615 sscanf(line, "%s %s %d %d\n",
616 buf,
617 loc,
618 &snsidx->wnsense,
619 &snsidx->tag_cnt);
620 snsidx->sensekey = malloc(strlen(buf + 1));
621 assert(snsidx->sensekey);
622 strcpy(snsidx->sensekey, buf);
623 snsidx->loc = atol(loc);
624 /* Parse out word from sensekey to make things easier for caller */
625 snsidx->word = strdup(GetWORD(snsidx->sensekey));
626 assert(snsidx->word);
627 snsidx->nextsi = NULL;
628 }
629 return(snsidx);
630 }
631
632 /* Return number of times sense is tagged */
633
GetTagcnt(IndexPtr idx,int sense)634 int GetTagcnt(IndexPtr idx, int sense)
635 {
636 char *sensekey;
637 const char *line;
638 char buf[256];
639 int snum, cnt = 0;
640
641 if (cntlistfp) {
642
643 sensekey = WNSnsToStr(idx, sense);
644 if ((line = bin_search(sensekey, cntlistfp)) != NULL) {
645 sscanf(line, "%s %d %d", buf, &snum, &cnt);
646 }
647 free(sensekey);
648 }
649
650 return(cnt);
651 }
652
FreeSenseIndex(SnsIndexPtr snsidx)653 void FreeSenseIndex(SnsIndexPtr snsidx)
654 {
655 if (snsidx) {
656 free(snsidx->word);
657 free(snsidx);
658 }
659 }
660
661 const char *
GetOffsetForKey(unsigned int key)662 GetOffsetForKey(unsigned int key)
663 {
664 unsigned int rkey;
665 char ckey[7];
666 static char loc[11] = "";
667 const char *line;
668 char searchdir[256], tmpbuf[256];
669
670 /* Try to open file in case wn_init wasn't called */
671
672 if (!keyindexfp) {
673 strcpy(searchdir, SetSearchdir());
674 sprintf(tmpbuf, KEYIDXFILE, searchdir);
675 keyindexfp = fopen(tmpbuf, "r");
676 }
677 if (keyindexfp) {
678 sprintf(ckey, "%6.6d", key);
679 if ((line = bin_search(ckey, keyindexfp)) != NULL) {
680 sscanf(line, "%d %s", &rkey, loc);
681 return(loc);
682 }
683 }
684 return(NULL);
685 }
686
687 unsigned int
GetKeyForOffset(const char * loc)688 GetKeyForOffset(const char *loc)
689 {
690 unsigned int key;
691 char rloc[11] = "";
692 const char *line;
693 char searchdir[256], tmpbuf[256];
694
695 /* Try to open file in case wn_init wasn't called */
696
697 if (!revkeyindexfp) {
698 strcpy(searchdir, SetSearchdir());
699 sprintf(tmpbuf, REVKEYIDXFILE, searchdir);
700 revkeyindexfp = fopen(tmpbuf, "r");
701 }
702 if (revkeyindexfp) {
703 if ((line = bin_search(loc, revkeyindexfp)) != NULL) {
704 sscanf(line, "%s %d", rloc, &key );
705 return(key);
706 }
707 }
708 return(0);
709 }
710
711 const char *
SetSearchdir()712 SetSearchdir()
713 {
714 char *searchdir;
715 const char *env;
716
717 /* Find base directory for database. If set, use WNSEARCHDIR.
718 If not set, check for WNHOME/dict, otherwise use DEFAULTPATH. */
719
720 if ((env = getenv("WNSEARCHDIR")) != NULL)
721 return(env);
722 else if ((env = getenv("WNHOME")) != NULL) {
723 searchdir = malloc(strlen(env) + sizeof(DICTDIR));
724 sprintf(searchdir, "%s%s", env, DICTDIR);
725 return(searchdir);
726 } else
727 return(DEFAULTPATH);
728 }
729
730 #ifndef __GNUC__
731 # define __unused
732 #endif
default_display_message(const char * msg __unused)733 int default_display_message(const char *msg __unused)
734 {
735 return(-1);
736 }
737
738 /*
739 ** Wrapper functions for strstr that allow you to retrieve each
740 ** occurance of a word within a longer string, not just the first.
741 **
742 ** strstr_init is called with the same arguments as normal strstr,
743 ** but does not return any value.
744 **
745 ** strstr_getnext returns the position offset (not a pointer, as does
746 ** normal strstr) of the next occurance, or -1 if none remain.
747 */
748
strstr_init(char * string,char * word)749 void strstr_init (char *string, char *word) {
750 strstr_word = word;
751 strstr_stringstart = string;
752 strstr_stringcurrent = string;
753 }
754
strstr_getnext(void)755 int strstr_getnext (void) {
756 char *loc = strstr (strstr_stringcurrent, strstr_word);
757 if (loc == NULL) return -1;
758 strstr_stringcurrent = loc + 1;
759 return (loc - strstr_stringstart);
760 }
761