1 /*
2
3 search.c - WordNet library of search code
4
5 */
6
7 #ifdef _WINDOWS
8 #include <windows.h>
9 #include <windowsx.h>
10 #endif
11 #include <stdio.h>
12 #include <ctype.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <assert.h>
16
17 #include "wn.h"
18
19 __FBSDID("$Id: search.c,v 1.166 2006/11/14 20:52:45 wn Exp $");
20
21 /* For adjectives, indicates synset type */
22
23 #define DONT_KNOW 0
24 #define DIRECT_ANT 1 /* direct antonyms (cluster head) */
25 #define INDIRECT_ANT 2 /* indrect antonyms (similar) */
26 #define PERTAINYM 3 /* no antonyms or similars (pertainyms) */
27
28 /* Flags for printsynset() */
29
30 #define ALLWORDS 0 /* print all words */
31 #define SKIP_ANTS 0 /* skip printing antonyms in printsynset() */
32 #define PRINT_ANTS 1 /* print antonyms in printsynset() */
33 #define SKIP_MARKER 0 /* skip printing adjective marker */
34 #define PRINT_MARKER 1 /* print adjective marker */
35
36 /* Trace types used by printspaces() to determine print sytle */
37
38 #define TRACEP 1 /* traceptrs */
39 #define TRACEC 2 /* tracecoords() */
40 #define TRACEI 3 /* traceinherit() */
41
42 #define DEFON 1
43 #define DEFOFF 0
44
45 /* Forward function declarations */
46
47 static void WNOverview(const char *, int);
48 static void findverbgroups(IndexPtr);
49 static void add_relatives(int, int);
50 static void free_rellist(void);
51 static void printsynset(const char *, SynsetPtr, const char *, int, int, int, int);
52 static void printantsynset(SynsetPtr, const char *, int);
53 static const char *printant(int, SynsetPtr, int, const char *, const char *);
54 static void printbuffer(const char *);
55 static void printsns(SynsetPtr, int);
56 static void printsense(SynsetPtr, int);
57 static void catword(char *, SynsetPtr, int, int, int);
58 static void printspaces(int, int);
59 static void printrelatives(IndexPtr, int);
60 static int HasHoloMero(IndexPtr, int);
61 static int HasPtr(SynsetPtr, int);
62 static int getsearchsense(SynsetPtr, int);
63 static int depthcheck(int, SynsetPtr);
64 static void interface_doevents(void);
65 static void getexample(const char *, const char *);
66 static int findexample(SynsetPtr);
67
68 /* Static variables */
69
70 static int prflag, sense, prlexid;
71 static int overflag = 0; /* set when output buffer overflows */
72 static char searchbuffer[SEARCHBUF];
73 static int lastholomero; /* keep track of last holo/meronym printed */
74 #define TMPBUFSIZE 1024*10
75 static char tmpbuf[TMPBUFSIZE]; /* general purpose printing buffer */
76 static char wdbuf[WORDBUF]; /* general purpose word buffer */
77 static char msgbuf[256]; /* buffer for constructing error messages */
78 static int adj_marker;
79
80 extern long last_bin_search_offset;
81
82
83 /* Find word in index file and return parsed entry in data structure.
84 Input word must be exact match of string in database. */
85
86 IndexPtr
index_lookup(const char * word,int dbase)87 index_lookup(const char *word, int dbase)
88 {
89 IndexPtr idx = NULL;
90 FILE *fp;
91 const char *line;
92
93 if ((fp = indexfps[dbase]) == NULL) {
94 sprintf(msgbuf, "WordNet library error: %s indexfile not open\n",
95 partnames[dbase]);
96 display_message(msgbuf);
97 return(NULL);
98 }
99
100 if ((line = bin_search(word, fp)) != NULL) {
101 idx = parse_index( last_bin_search_offset, dbase, line);
102 }
103 return (idx);
104 }
105
106 /* This function parses an entry from an index file into an Index data
107 * structure. It takes the byte offset and file number, and optionally the
108 * line. If the line is NULL, parse_index will get the line from the file.
109 * If the line is non-NULL, parse_index won't look at the file, but it still
110 * needs the dbase and offset parameters to be set, so it can store them in
111 * the Index struct.
112 */
113
114 IndexPtr
parse_index(long offset,int dbase,const char * line)115 parse_index(long offset, int dbase, const char *line) {
116
117 IndexPtr idx = NULL;
118 char *ptrtok;
119 int j;
120 int len;
121
122 if ( !line )
123 line = read_index( offset, indexfps[dbase] );
124
125 idx = malloc(sizeof(Index));
126 assert(idx);
127
128 /* set offset of entry in index file */
129 idx->idxoffset = offset;
130
131 idx->wd=NULL;
132 idx->pos=NULL;
133 idx->off_cnt=0;
134 idx->tagged_cnt = 0;
135 idx->sense_cnt=0;
136 idx->offset=NULL;
137 idx->ptruse_cnt=0;
138 idx->ptruse=NULL;
139
140 /* get the word */
141 ptrtok = strpbrk(line, " \n");
142 len = ptrtok - line;
143
144 idx->wd = malloc(len + 1);
145 assert(idx->wd);
146 memcpy(idx->wd, line, len);
147 idx->wd[len] = '\0';
148 line = ptrtok + 1;
149
150 /* get the part of speech */
151 ptrtok = strpbrk(line, " \n");
152 len = ptrtok - line;
153
154 idx->pos = malloc(len + 1);
155 assert(idx->pos);
156 memcpy(idx->pos, line, len);
157 idx->pos[len] = '\0';
158 line = ptrtok + 1;
159
160 /* get the collins count */
161 idx->sense_cnt = strtoul(line, &ptrtok, 10);
162
163 /* get the number of pointers types */
164 idx->ptruse_cnt = strtoul(ptrtok + 1, &ptrtok, 10);
165
166 if (idx->ptruse_cnt) {
167 idx->ptruse = malloc(idx->ptruse_cnt * (sizeof(short)));
168 assert(idx->ptruse);
169
170 /* get the pointers types */
171 for(j=0;j < idx->ptruse_cnt; j++) {
172 idx->ptruse[j] = getptrtype(ptrtok + 1, &ptrtok);
173 }
174 }
175
176 /* get the number of offsets */
177 idx->off_cnt = strtoul(ptrtok + 1, &ptrtok, 10);
178
179 /* get the number of senses that are tagged */
180 idx->tagged_cnt = strtoul(ptrtok + 1, &ptrtok, 10);
181
182 /* make space for the offsets */
183 idx->offset = malloc(idx->off_cnt * (sizeof(long)));
184 assert(idx->offset);
185
186 /* get the offsets */
187 for(j=0;j<idx->off_cnt;j++) {
188 idx->offset[j] = strtoul(ptrtok + 1, &ptrtok, 10);
189 }
190 return(idx);
191 }
192
193 /* 'smart' search of index file. Find word in index file, trying different
194 techniques - replace hyphens with underscores, replace underscores with
195 hyphens, strip hyphens and underscores, strip periods. */
196
197 IndexPtr
getindex(const char * searchstr,int dbase)198 getindex(const char *searchstr, int dbase)
199 {
200 int i, j, k;
201 char c;
202 char strings[MAX_FORMS][WORDBUF]; /* vector of search strings */
203 static IndexPtr offsets[MAX_FORMS];
204 static int offset;
205
206 /* This works like strrok(): if passed with a non-null string,
207 prepare vector of search strings and offsets. If string
208 is null, look at current list of offsets and return next
209 one, or NULL if no more alternatives for this word. */
210
211 if (searchstr != NULL) {
212
213 offset = 0;
214 strtolower2(searchstr, strings[0]);
215 searchstr = strings[0];
216 offsets[0] = 0;
217 for (i = 1; i < MAX_FORMS; i++) {
218 strcpy(strings[i], searchstr);
219 offsets[i] = 0;
220 }
221
222 strsubst(strings[1], '_', '-');
223 strsubst(strings[2], '-', '_');
224
225 /* remove all spaces and hyphens from last search string, then
226 all periods */
227 for (i = j = k = 0; (c = searchstr[i]) != '\0'; i++) {
228 if (c != '_' && c != '-')
229 strings[3][j++] = c;
230 if (c != '.')
231 strings[4][k++] = c;
232 }
233 strings[3][j] = '\0';
234 strings[4][k] = '\0';
235
236 /* Get offset of first entry. Then eliminate duplicates
237 and get offsets of unique strings. */
238
239 if (strings[0][0] != '\0')
240 offsets[0] = index_lookup(strings[0], dbase);
241
242 for (i = 1; i < MAX_FORMS; i++)
243 if ((strings[i][0]) != '\0' && (strcmp(strings[0], strings[i])))
244 offsets[i] = index_lookup(strings[i], dbase);
245 }
246
247
248 for (i = offset; i < MAX_FORMS; i++)
249 if (offsets[i]) {
250 offset = i + 1;
251 return(offsets[i]);
252 }
253
254 return(NULL);
255 }
256
257 /* Read synset from data file at byte offset passed and return parsed
258 entry in data structure. */
259
260 SynsetPtr
read_synset(int dbase,long boffset,const char * word)261 read_synset(int dbase, long boffset, const char *word)
262 {
263 FILE *fp;
264
265 if((fp = datafps[dbase]) == NULL) {
266 sprintf(msgbuf, "WordNet library error: %s datafile not open\n",
267 partnames[dbase]);
268 display_message(msgbuf);
269 return(NULL);
270 }
271
272 fseek(fp, boffset, 0); /* position file to byte offset requested */
273
274 return(parse_synset(fp, dbase, word)); /* parse synset and return */
275 }
276
277 /* Read synset at current byte offset in file and return parsed entry
278 in data structure. */
279
280 SynsetPtr
parse_synset(FILE * fp,int dbase,const char * word)281 parse_synset(FILE *fp, int dbase, const char *word)
282 {
283 char line[LINEBUF];
284 char *ptrtok;
285 int foundpert = 0;
286 char wdnum[3];
287 int i, len;
288 SynsetPtr synptr;
289 unsigned long loc; /* sanity check on file location */
290
291 loc = ftell(fp);
292
293 if (fgets(line, LINEBUF, fp) == NULL)
294 return(NULL);
295
296 synptr = (SynsetPtr)malloc(sizeof(Synset));
297 assert(synptr);
298
299 synptr->hereiam = 0;
300 synptr->sstype = DONT_KNOW;
301 synptr->fnum = 0;
302 synptr->pos = NULL;
303 synptr->wcount = 0;
304 synptr->words = NULL;
305 synptr->whichword = 0;
306 synptr->ptrcount = 0;
307 synptr->ptrtyp = NULL;
308 synptr->ptroff = NULL;
309 synptr->ppos = NULL;
310 synptr->pto = NULL;
311 synptr->pfrm = NULL;
312 synptr->fcount = 0;
313 synptr->frmid = NULL;
314 synptr->frmto = NULL;
315 synptr->defn = NULL;
316 synptr->key = 0;
317 synptr->nextss = NULL;
318 synptr->nextform = NULL;
319 synptr->searchtype = -1;
320 synptr->ptrlist = NULL;
321 synptr->headword = NULL;
322 synptr->headsense = 0;
323
324 /* looking at offset */
325 synptr->hereiam = strtol(line, &ptrtok, 10);
326
327 /* sanity check - make sure starting file offset matches first field */
328 if (synptr->hereiam != loc) {
329 sprintf(msgbuf, "WordNet library error: no synset at location %lu\n",
330 loc);
331 display_message(msgbuf);
332 free(synptr);
333 return(NULL);
334 }
335
336 /* looking at FNUM */
337 synptr->fnum = strtol(ptrtok + 1, &ptrtok, 10);
338
339 /* looking at POS */
340 ptrtok++;
341 len = strpbrk(ptrtok, " \n") - ptrtok;
342 synptr->pos = malloc(len + 1);
343 assert(synptr->pos);
344 memcpy(synptr->pos, ptrtok, len);
345 synptr->pos[len] = '\0';
346 ptrtok += len;
347 if (getsstype(synptr->pos) == SATELLITE)
348 synptr->sstype = INDIRECT_ANT;
349
350 /* looking at numwords */
351 synptr->wcount = strtol(ptrtok, &ptrtok, 16);
352 ptrtok++;
353
354 synptr->words = (char **)malloc(synptr->wcount * sizeof(char *));
355 assert(synptr->words);
356 synptr->wnsns = (int *)malloc(synptr->wcount * sizeof(int));
357 assert(synptr->wnsns);
358 synptr->lexid = (int *)malloc(synptr->wcount * sizeof(int));
359 assert(synptr->lexid);
360
361 for (i = 0; i < synptr->wcount; i++) {
362 len = strpbrk(ptrtok, " \n") - ptrtok;
363 synptr->words[i] = malloc(len + 1);
364 assert(synptr->words[i]);
365 memcpy(synptr->words[i], ptrtok, len);
366 synptr->words[i][len] = '\0';
367
368 /* is this the word we're looking for? */
369
370 if (word && !strcmp(word, synptr->words[i]))
371 synptr->whichword = i+1;
372
373 ptrtok += len;
374 synptr->lexid[i] = strtol(ptrtok, &ptrtok, 16);
375 ptrtok++;
376 }
377
378 /* get the pointer count */
379 synptr->ptrcount = strtol(ptrtok, &ptrtok, 10);
380 ptrtok++;
381
382 if (synptr->ptrcount) {
383
384 /* alloc storage for the pointers */
385 synptr->ptrtyp = malloc(synptr->ptrcount * sizeof(int));
386 assert(synptr->ptrtyp);
387 synptr->ptroff = malloc(synptr->ptrcount * sizeof(unsigned long));
388 assert(synptr->ptroff);
389 synptr->ppos = malloc(synptr->ptrcount * sizeof(int));
390 assert(synptr->ppos);
391 synptr->pto = malloc(synptr->ptrcount * sizeof(short));
392 assert(synptr->pto);
393 synptr->pfrm = malloc(synptr->ptrcount * sizeof(short));
394 assert(synptr->pfrm);
395
396 for(i = 0; i < synptr->ptrcount; i++) {
397 /* get the pointer type */
398 synptr->ptrtyp[i] = getptrtype(ptrtok, &ptrtok);
399 ptrtok++;
400 /* For adjectives, set the synset type if it has a direct
401 antonym */
402 if (dbase == ADJ && synptr->sstype == DONT_KNOW) {
403 if (synptr->ptrtyp[i] == ANTPTR)
404 synptr->sstype = DIRECT_ANT;
405 else if (synptr->ptrtyp[i] == PERTPTR)
406 foundpert = 1;
407 }
408
409 /* get the pointer offset */
410 synptr->ptroff[i] = strtol(ptrtok, &ptrtok, 10);
411 ptrtok++;
412
413 /* get the pointer part of speech */
414 synptr->ppos[i] = getpos(ptrtok);
415
416 /* get the lexp to/from restrictions */
417
418 ptrtok = strpbrk(ptrtok, " \n") + 1;
419 strncpy(wdnum, ptrtok, 2);
420 wdnum[2] = '\0';
421 synptr->pfrm[i] = strtoul(wdnum, NULL, 16);
422
423 synptr->pto[i] = strtoul(ptrtok + 2, &ptrtok, 16);
424 ptrtok++; /* Something like ``0000 '' */
425 }
426 }
427
428 /* If synset type is still not set, see if it's a pertainym */
429
430 if (dbase == ADJ && synptr->sstype == DONT_KNOW && foundpert == 1)
431 synptr->sstype = PERTAINYM;
432
433 /* retireve optional information from verb synset */
434 if(dbase == VERB) {
435 synptr->fcount = strtol(ptrtok, &ptrtok, 10);
436 ptrtok++;
437 /* allocate frame storage */
438
439 synptr->frmid = malloc(synptr->fcount * sizeof(int));
440 assert(synptr->frmid);
441 synptr->frmto = malloc(synptr->fcount * sizeof(int));
442 assert(synptr->frmto);
443
444 for(i=0;i<synptr->fcount;i++) {
445 /* skip the frame pointer (+) */
446 ptrtok = strpbrk(ptrtok, " \n") + 1;
447
448 synptr->frmid[i] = strtol(ptrtok, &ptrtok, 10);
449 ptrtok++;
450
451 synptr->frmto[i] = strtol(ptrtok, &ptrtok, 16);
452 ptrtok++;
453 }
454 }
455
456 /* get the optional definition */
457
458 ptrtok = strpbrk(ptrtok, " \n") + 1;
459 if (ptrtok && *ptrtok) {
460 char *defn;
461 len = strlen(ptrtok);
462 synptr->defn = malloc(len + 2);
463 assert(synptr->defn);
464 synptr->defn[0] = '(';
465 for (defn = synptr->defn + 1; *ptrtok; ptrtok++) {
466 switch (*ptrtok) {
467 case '\n':
468 case ' ':
469 /* skip adjacent and initial blanks: */
470 if (defn == synptr->defn + 1 || defn[-1] == ' ')
471 continue;
472 do
473 ptrtok++;
474 while (*ptrtok == '\n' || *ptrtok == ' ');
475 if (*ptrtok == '\0')
476 break; /* out of the loop */
477 *defn++ = ' ';
478 /* FALLTHROUGH */;
479 default:
480 *defn++ = *ptrtok;
481 continue;
482 }
483 break;
484 }
485 *defn++ = ')';
486 assert(defn - synptr->defn < len + 2);
487 *defn = '\0';
488 }
489
490 if (keyindexfp) { /* we have unique keys */
491 sprintf(tmpbuf, "%c:%8.8ld", partchars[dbase], synptr->hereiam);
492 synptr->key = GetKeyForOffset(tmpbuf);
493 }
494
495 /* Can't do earlier - calls indexlookup which messes up strtok calls */
496
497 for (i = 0; i < synptr->wcount; i++)
498 synptr->wnsns[i] = getsearchsense(synptr, i + 1);
499
500 return(synptr);
501 }
502
503 /* Free a synset linked list allocated by findtheinfo_ds() */
504
free_syns(SynsetPtr synptr)505 void free_syns(SynsetPtr synptr)
506 {
507 SynsetPtr cursyn, nextsyn;
508
509 if (synptr) {
510 cursyn = synptr;
511 while(cursyn) {
512 if (cursyn->nextform)
513 free_syns(cursyn->nextform);
514 nextsyn = cursyn->nextss;
515 free_synset(cursyn);
516 cursyn = nextsyn;
517 }
518 }
519 }
520
521 /* Free a synset */
522
free_synset(SynsetPtr synptr)523 void free_synset(SynsetPtr synptr)
524 {
525 int i;
526
527 free(synptr->pos);
528 for (i = 0; i < synptr->wcount; i++){
529 free(synptr->words[i]);
530 }
531 free(synptr->words);
532 free(synptr->wnsns);
533 free(synptr->lexid);
534 if (synptr->ptrcount) {
535 free(synptr->ptrtyp);
536 free(synptr->ptroff);
537 free(synptr->ppos);
538 free(synptr->pto);
539 free(synptr->pfrm);
540 }
541 if (synptr->fcount) {
542 free(synptr->frmid);
543 free(synptr->frmto);
544 }
545 if (synptr->defn)
546 free(synptr->defn);
547 if (synptr->headword)
548 free(synptr->headword);
549 if (synptr->ptrlist)
550 free_syns(synptr->ptrlist); /* changed from free_synset() */
551 free(synptr);
552 }
553
554 /* Free an index structure */
555
free_index(IndexPtr idx)556 void free_index(IndexPtr idx)
557 {
558 free(idx->wd);
559 free(idx->pos);
560 if (idx->ptruse)
561 free(idx->ptruse);
562 free(idx->offset);
563 free(idx);
564 }
565
566 /* Recursive search algorithm to trace a pointer tree */
567
traceptrs(SynsetPtr synptr,int ptyp,int dbase,int depth)568 static void traceptrs(SynsetPtr synptr, int ptyp, int dbase, int depth)
569 {
570 int i;
571 int extraindent = 0;
572 SynsetPtr cursyn;
573 char prefix[40], tbuf[20];
574 int realptr;
575
576 interface_doevents();
577 if (abortsearch)
578 return;
579
580 if (ptyp < 0) {
581 ptyp = -ptyp;
582 extraindent = 2;
583 }
584
585 for (i = 0; i < synptr->ptrcount; i++) {
586 if ((ptyp == HYPERPTR && (synptr->ptrtyp[i] == HYPERPTR ||
587 synptr->ptrtyp[i] == INSTANCE)) ||
588 (ptyp == HYPOPTR && (synptr->ptrtyp[i] == HYPOPTR ||
589 synptr->ptrtyp[i] == INSTANCES)) ||
590 ((synptr->ptrtyp[i] == ptyp) &&
591 ((synptr->pfrm[i] == 0) ||
592 (synptr->pfrm[i] == synptr->whichword)))) {
593
594 realptr = synptr->ptrtyp[i]; /* deal with INSTANCE */
595
596 if(!prflag) { /* print sense number and synset */
597 printsns(synptr, sense + 1);
598 prflag = 1;
599 }
600 printspaces(TRACEP, depth + extraindent);
601
602 switch(realptr) {
603 case PERTPTR:
604 if (dbase == ADV)
605 sprintf(prefix, "Derived from %s ",
606 partnames[synptr->ppos[i]]);
607 else
608 sprintf(prefix, "Pertains to %s ",
609 partnames[synptr->ppos[i]]);
610 break;
611 case ANTPTR:
612 if (dbase != ADJ)
613 sprintf(prefix, "Antonym of ");
614 break;
615 case PPLPTR:
616 sprintf(prefix, "Participle of verb ");
617 break;
618 case INSTANCE:
619 sprintf(prefix, "INSTANCE OF=> ");
620 break;
621 case INSTANCES:
622 sprintf(prefix, "HAS INSTANCE=> ");
623 break;
624 case HASMEMBERPTR:
625 sprintf(prefix, " HAS MEMBER: ");
626 break;
627 case HASSTUFFPTR:
628 sprintf(prefix, " HAS SUBSTANCE: ");
629 break;
630 case HASPARTPTR:
631 sprintf(prefix, " HAS PART: ");
632 break;
633 case ISMEMBERPTR:
634 sprintf(prefix, " MEMBER OF: ");
635 break;
636 case ISSTUFFPTR:
637 sprintf(prefix, " SUBSTANCE OF: ");
638 break;
639 case ISPARTPTR:
640 sprintf(prefix, " PART OF: ");
641 break;
642 default:
643 sprintf(prefix, "=> ");
644 break;
645 }
646
647 /* Read synset pointed to */
648 cursyn=read_synset(synptr->ppos[i], synptr->ptroff[i], "");
649
650 /* For Pertainyms and Participles pointing to a specific
651 sense, indicate the sense then retrieve the synset
652 pointed to and other info as determined by type.
653 Otherwise, just print the synset pointed to. */
654
655 if ((ptyp == PERTPTR || ptyp == PPLPTR) &&
656 synptr->pto[i] != 0) {
657 sprintf(tbuf, " (Sense %d)\n",
658 cursyn->wnsns[synptr->pto[i] - 1]);
659 printsynset(prefix, cursyn, tbuf, DEFOFF, synptr->pto[i],
660 SKIP_ANTS, PRINT_MARKER);
661 if (ptyp == PPLPTR) { /* adjective pointing to verb */
662 printsynset(" =>", cursyn, "\n",
663 DEFON, ALLWORDS, PRINT_ANTS, PRINT_MARKER);
664 traceptrs(cursyn, HYPERPTR, getpos(cursyn->pos), 0);
665 } else if (dbase == ADV) { /* adverb pointing to adjective */
666 printsynset(" =>", cursyn, "\n",DEFON, ALLWORDS,
667 ((getsstype(cursyn->pos) == SATELLITE)
668 ? SKIP_ANTS : PRINT_ANTS), PRINT_MARKER);
669 #ifdef FOOP
670 traceptrs(cursyn, HYPERPTR, getpos(cursyn->pos), 0);
671 #endif
672 } else { /* adjective pointing to noun */
673 printsynset(" =>", cursyn, "\n",
674 DEFON, ALLWORDS, PRINT_ANTS, PRINT_MARKER);
675 traceptrs(cursyn, HYPERPTR, getpos(cursyn->pos), 0);
676 }
677 } else if (ptyp == ANTPTR && dbase != ADJ && synptr->pto[i] != 0) {
678 sprintf(tbuf, " (Sense %d)\n",
679 cursyn->wnsns[synptr->pto[i] - 1]);
680 printsynset(prefix, cursyn, tbuf, DEFOFF, synptr->pto[i],
681 SKIP_ANTS, PRINT_MARKER);
682 printsynset(" =>", cursyn, "\n", DEFON, ALLWORDS,
683 PRINT_ANTS, PRINT_MARKER);
684 } else
685 printsynset(prefix, cursyn, "\n", DEFON, ALLWORDS,
686 PRINT_ANTS, PRINT_MARKER);
687
688 /* For HOLONYMS and MERONYMS, keep track of last one
689 printed in buffer so results can be truncated later. */
690
691 if (ptyp >= ISMEMBERPTR && ptyp <= HASPARTPTR)
692 lastholomero = strlen(searchbuffer);
693
694 if(depth) {
695 depth = depthcheck(depth, cursyn);
696 traceptrs(cursyn, ptyp, getpos(cursyn->pos), (depth+1));
697
698 free_synset(cursyn);
699 } else
700 free_synset(cursyn);
701 }
702 }
703 }
704
705 static void
tracecoords(SynsetPtr synptr,int ptyp,int depth)706 tracecoords(SynsetPtr synptr, int ptyp, int depth)
707 {
708 int i;
709 SynsetPtr cursyn;
710
711 interface_doevents();
712 if (abortsearch)
713 return;
714
715 for(i = 0; i < synptr->ptrcount; i++) {
716 if((synptr->ptrtyp[i] == HYPERPTR || synptr->ptrtyp[i] == INSTANCE) &&
717 ((synptr->pfrm[i] == 0) ||
718 (synptr->pfrm[i] == synptr->whichword))) {
719
720 if(!prflag) {
721 printsns(synptr, sense + 1);
722 prflag = 1;
723 }
724 printspaces(TRACEC, depth);
725
726 cursyn = read_synset(synptr->ppos[i], synptr->ptroff[i], "");
727
728 printsynset("-> ", cursyn, "\n", DEFON, ALLWORDS,
729 SKIP_ANTS, PRINT_MARKER);
730
731 traceptrs(cursyn, ptyp, getpos(cursyn->pos), depth);
732
733 if(depth) {
734 depth = depthcheck(depth, cursyn);
735 tracecoords(cursyn, ptyp, (depth+1));
736 free_synset(cursyn);
737 } else
738 free_synset(cursyn);
739 }
740 }
741 }
742
743 static void
traceclassif(SynsetPtr synptr,int search)744 traceclassif(SynsetPtr synptr, int search)
745 {
746 int i, j, idx;
747 SynsetPtr cursyn;
748 unsigned int prlist[1024];
749 char head[60];
750 int svwnsnsflag;
751
752 interface_doevents();
753 if (abortsearch)
754 return;
755
756 idx = 0;
757
758 for (i = 0; i < synptr->ptrcount; i++) {
759 if (((synptr->ptrtyp[i] >= CLASSIF_START) &&
760 (synptr->ptrtyp[i] <= CLASSIF_END) && search == CLASSIFICATION) ||
761
762 ((synptr->ptrtyp[i] >= CLASS_START) &&
763 (synptr->ptrtyp[i] <= CLASS_END) && search == CLASS) ) {
764
765 if (!prflag) {
766 printsns(synptr, sense + 1);
767 prflag = 1;
768 }
769
770 cursyn = read_synset(synptr->ppos[i], synptr->ptroff[i], "");
771
772 for (j = 0; j < idx; j++) {
773 if (synptr->ptroff[i] == prlist[j]) {
774 break;
775 }
776 }
777
778 if (j == idx) {
779 prlist[idx++] = synptr->ptroff[i];
780 printspaces(TRACEP, 0);
781
782 if (synptr->ptrtyp[i] == CLASSIF_CATEGORY)
783 strcpy(head, "TOPIC->(");
784 else if (synptr->ptrtyp[i] == CLASSIF_USAGE)
785 strcpy(head, "USAGE->(");
786 else if (synptr->ptrtyp[i] == CLASSIF_REGIONAL)
787 strcpy(head, "REGION->(");
788 else if (synptr->ptrtyp[i] == CLASS_CATEGORY)
789 strcpy(head, "TOPIC_TERM->(");
790 else if (synptr->ptrtyp[i] == CLASS_USAGE)
791 strcpy(head, "USAGE_TERM->(");
792 else if (synptr->ptrtyp[i] == CLASS_REGIONAL)
793 strcpy(head, "REGION_TERM->(");
794
795 strcat(head, partnames[synptr->ppos[i]]);
796 strcat(head, ") ");
797
798 svwnsnsflag = wnsnsflag;
799 wnsnsflag = 1;
800
801 printsynset(head, cursyn, "\n", DEFOFF, ALLWORDS,
802 SKIP_ANTS, SKIP_MARKER);
803
804 wnsnsflag = svwnsnsflag;
805 }
806
807 free_synset(cursyn);
808 }
809 }
810 }
811
812 static void
tracenomins(SynsetPtr synptr)813 tracenomins(SynsetPtr synptr)
814 {
815 int i, j, idx;
816 SynsetPtr cursyn;
817 #ifdef FOOP
818 long int prlist[1024];
819 #endif
820 char prefix[40], tbuf[20];
821
822 interface_doevents();
823 if (abortsearch)
824 return;
825
826 idx = 0;
827
828 for (i = 0; i < synptr->ptrcount; i++) {
829 if ((synptr->ptrtyp[i] == DERIVATION) &&
830 (synptr->pfrm[i] == synptr->whichword)) {
831
832 if (!prflag) {
833 printsns(synptr, sense + 1);
834 prflag = 1;
835 }
836
837 printspaces(TRACEP, 0);
838
839 sprintf(prefix, "RELATED TO->(%s) ",
840 partnames[synptr->ppos[i]]);
841
842 cursyn = read_synset(synptr->ppos[i], synptr->ptroff[i], "");
843
844 sprintf(tbuf, "#%d\n",
845 cursyn->wnsns[synptr->pto[i] - 1]);
846 printsynset(prefix, cursyn, tbuf, DEFOFF, synptr->pto[i],
847 SKIP_ANTS, SKIP_MARKER);
848
849 #ifdef FOOP
850 /* only print synset once, even if more than one link */
851
852 for (j = 0; j < idx; j++) {
853 if (synptr->ptroff[i] == prlist[j]) {
854 break;
855 }
856 }
857 #else
858 j = idx;
859 #endif
860
861 if (j == idx) {
862 #ifdef FOOP
863 prlist[idx++] = synptr->ptroff[i];
864 #endif
865 printspaces(TRACEP, 2);
866 printsynset("=> ", cursyn, "\n", DEFON, ALLWORDS,
867 SKIP_ANTS, PRINT_MARKER);
868 }
869
870 free_synset(cursyn);
871 }
872 }
873 }
874
875 /* Trace through the hypernym tree and print all MEMBER, STUFF
876 and PART info. */
877
878 static void
traceinherit(SynsetPtr synptr,int ptrbase,int depth)879 traceinherit(SynsetPtr synptr, int ptrbase, int depth)
880 {
881 int i;
882 SynsetPtr cursyn;
883
884 interface_doevents();
885 if (abortsearch)
886 return;
887
888 for(i=0;i<synptr->ptrcount;i++) {
889 if((synptr->ptrtyp[i] == HYPERPTR) &&
890 ((synptr->pfrm[i] == 0) ||
891 (synptr->pfrm[i] == synptr->whichword))) {
892
893 if(!prflag) {
894 printsns(synptr, sense + 1);
895 prflag = 1;
896 }
897 printspaces(TRACEI, depth);
898
899 cursyn = read_synset(synptr->ppos[i], synptr->ptroff[i], "");
900
901 printsynset("=> ", cursyn, "\n", DEFON, ALLWORDS,
902 SKIP_ANTS, PRINT_MARKER);
903
904 traceptrs(cursyn, ptrbase, NOUN, depth);
905 traceptrs(cursyn, ptrbase + 1, NOUN, depth);
906 traceptrs(cursyn, ptrbase + 2, NOUN, depth);
907
908 if(depth) {
909 depth = depthcheck(depth, cursyn);
910 traceinherit(cursyn, ptrbase, depth + 1);
911 free_synset(cursyn);
912 } else
913 free_synset(cursyn);
914 }
915 }
916
917 /* Truncate search buffer after last holo/meronym printed */
918 searchbuffer[lastholomero] = '\0';
919 }
920
partsall(SynsetPtr synptr,int ptyp)921 static void partsall(SynsetPtr synptr, int ptyp)
922 {
923 int ptrbase;
924 int i, hasptr = 0;
925
926 ptrbase = (ptyp == HMERONYM) ? HASMEMBERPTR : ISMEMBERPTR;
927
928 /* First, print out the MEMBER, STUFF, PART info for this synset */
929
930 for (i = 0; i < 3; i++) {
931 if (HasPtr(synptr, ptrbase + i)) {
932 traceptrs(synptr, ptrbase + i, NOUN, 1);
933 hasptr++;
934 }
935 interface_doevents();
936 if (abortsearch)
937 return;
938 }
939
940 /* Print out MEMBER, STUFF, PART info for hypernyms on
941 HMERONYM search only */
942
943 /* if (hasptr && ptyp == HMERONYM) { */
944 if (ptyp == HMERONYM) {
945 lastholomero = strlen(searchbuffer);
946 traceinherit(synptr, ptrbase, 1);
947 }
948 }
949
traceadjant(SynsetPtr synptr)950 static void traceadjant(SynsetPtr synptr)
951 {
952 SynsetPtr newsynptr;
953 int i, j;
954 int anttype = DIRECT_ANT;
955 SynsetPtr simptr, antptr;
956 static char similar[] = " => ";
957
958 /* This search is only applicable for ADJ synsets which have
959 either direct or indirect antonyms (not valid for pertainyms). */
960
961 if (synptr->sstype == DIRECT_ANT || synptr->sstype == INDIRECT_ANT) {
962 printsns(synptr, sense + 1);
963 printbuffer("\n");
964
965 /* if indirect, get cluster head */
966
967 if(synptr->sstype == INDIRECT_ANT) {
968 anttype = INDIRECT_ANT;
969 i = 0;
970 while (synptr->ptrtyp[i] != SIMPTR) i++;
971 newsynptr = read_synset(ADJ, synptr->ptroff[i], "");
972 } else
973 newsynptr = synptr;
974
975 /* find antonyms - if direct, make sure that the antonym
976 ptr we're looking at is from this word */
977
978 for (i = 0; i < newsynptr->ptrcount; i++) {
979
980 if (newsynptr->ptrtyp[i] == ANTPTR &&
981 ((anttype == DIRECT_ANT &&
982 newsynptr->pfrm[i] == newsynptr->whichword) ||
983 (anttype == INDIRECT_ANT))) {
984
985 /* read the antonym's synset and print it. if a
986 direct antonym, print it's satellites. */
987
988 antptr = read_synset(ADJ, newsynptr->ptroff[i], "");
989
990 if (anttype == DIRECT_ANT) {
991 printsynset("", antptr, "\n", DEFON, ALLWORDS,
992 PRINT_ANTS, PRINT_MARKER);
993 for(j = 0; j < antptr->ptrcount; j++) {
994 if(antptr->ptrtyp[j] == SIMPTR) {
995 simptr = read_synset(ADJ, antptr->ptroff[j], "");
996 printsynset(similar, simptr, "\n", DEFON,
997 ALLWORDS, SKIP_ANTS, PRINT_MARKER);
998 free_synset(simptr);
999 }
1000 }
1001 } else
1002 printantsynset(antptr, "\n", DEFON);
1003
1004 free_synset(antptr);
1005 }
1006 }
1007 if (newsynptr != synptr)
1008 free_synset(newsynptr);
1009 }
1010 }
1011
1012
1013 /* Fetch the given example sentence from the example file and print it out */
1014
1015 static void
getexample(const char * offset,const char * wd)1016 getexample(const char *offset, const char *wd)
1017 {
1018 const char *line;
1019 char sentbuf[512];
1020
1021 if (vsentfilefp != NULL) {
1022 line = bin_search(offset, vsentfilefp);
1023 if (line) {
1024 while(*line != ' ')
1025 line++;
1026
1027 printbuffer(" EX: ");
1028 sprintf(sentbuf, line, wd);
1029 printbuffer(sentbuf);
1030 }
1031 }
1032 }
1033
1034 /* Find the example sentence references in the example sentence index file */
1035
findexample(SynsetPtr synptr)1036 int findexample(SynsetPtr synptr)
1037 {
1038 char tbuf[256];
1039 const char *temp, *offset;
1040 int wdnum;
1041 int found = 0;
1042
1043 if (vidxfilefp != NULL) {
1044 wdnum = synptr->whichword - 1;
1045
1046 sprintf(tbuf,"%s%%%-1.1d:%-2.2d:%-2.2d::",
1047 synptr->words[wdnum],
1048 getpos(synptr->pos),
1049 synptr->fnum,
1050 synptr->lexid[wdnum]);
1051
1052 if ((temp = bin_search(tbuf, vidxfilefp)) != NULL) {
1053
1054 /* skip over sense key and get sentence numbers */
1055
1056 temp += strlen(synptr->words[wdnum]) + 11;
1057 strcpy(tbuf, temp);
1058
1059 offset = strtok(tbuf, " ,\n");
1060
1061 while (offset) {
1062 getexample(offset, synptr->words[wdnum]);
1063 offset = strtok(NULL, ",\n");
1064 }
1065 found = 1;
1066 }
1067 }
1068 return(found);
1069 }
1070
printframe(SynsetPtr synptr,int prsynset)1071 static void printframe(SynsetPtr synptr, int prsynset)
1072 {
1073 int i;
1074
1075 if (prsynset)
1076 printsns(synptr, sense + 1);
1077
1078 if (!findexample(synptr)) {
1079 for(i = 0; i < synptr->fcount; i++) {
1080 if ((synptr->frmto[i] == synptr->whichword) ||
1081 (synptr->frmto[i] == 0)) {
1082 if (synptr->frmto[i] == synptr->whichword)
1083 printbuffer(" => ");
1084 else
1085 printbuffer(" *> ");
1086 printbuffer(frametext[synptr->frmid[i]]);
1087 printbuffer("\n");
1088 }
1089 }
1090 }
1091 }
1092
printseealso(SynsetPtr synptr)1093 static void printseealso(SynsetPtr synptr)
1094 {
1095 SynsetPtr cursyn;
1096 int i, first = 1;
1097 int svwnsnsflag;
1098 char firstline_v[] = " Phrasal Verb-> ";
1099 char firstline_nar[] = " Also See-> ";
1100 char otherlines[] = "; ";
1101 char *prefix;
1102
1103 if ( getpos( synptr->pos ) == VERB )
1104 prefix = firstline_v;
1105 else
1106 prefix = firstline_nar;
1107
1108 /* Find all SEEALSO pointers from the searchword and print the
1109 word or synset pointed to. */
1110
1111 for(i = 0; i < synptr->ptrcount; i++) {
1112 if ((synptr->ptrtyp[i] == SEEALSOPTR) &&
1113 ((synptr->pfrm[i] == 0) ||
1114 (synptr->pfrm[i] == synptr->whichword))) {
1115
1116 cursyn = read_synset(synptr->ppos[i], synptr->ptroff[i], "");
1117
1118 svwnsnsflag = wnsnsflag;
1119 wnsnsflag = 1;
1120 printsynset(prefix, cursyn, "", DEFOFF,
1121 synptr->pto[i] == 0 ? ALLWORDS : synptr->pto[i],
1122 SKIP_ANTS, SKIP_MARKER);
1123 wnsnsflag = svwnsnsflag;
1124
1125 free_synset(cursyn);
1126
1127 if (first) {
1128 prefix = otherlines;
1129 first = 0;
1130 }
1131 }
1132 }
1133 if (!first)
1134 printbuffer("\n");
1135 }
1136
freq_word(IndexPtr idx)1137 static void freq_word(IndexPtr idx)
1138 {
1139 int familiar=0;
1140 int cnt;
1141 static const char *a_an[] = {
1142 "", "a noun", "a verb", "an adjective", "an adverb" };
1143 static const char *freqcats[] = {
1144 "extremely rare","very rare","rare","uncommon","common",
1145 "familiar","very familiar","extremely familiar"
1146 };
1147
1148 if(idx) {
1149 cnt = idx->sense_cnt;
1150 if (cnt == 0) familiar = 0;
1151 if (cnt == 1) familiar = 1;
1152 if (cnt == 2) familiar = 2;
1153 if (cnt >= 3 && cnt <= 4) familiar = 3;
1154 if (cnt >= 5 && cnt <= 8) familiar = 4;
1155 if (cnt >= 9 && cnt <= 16) familiar = 5;
1156 if (cnt >= 17 && cnt <= 32) familiar = 6;
1157 if (cnt > 32 ) familiar = 7;
1158
1159 sprintf(tmpbuf,
1160 "\n%s used as %s is %s (polysemy count = %d)\n",
1161 idx->wd, a_an[getpos(idx->pos)], freqcats[familiar], cnt);
1162 printbuffer(tmpbuf);
1163 }
1164 }
1165
1166 static void
wngrep(const char * word_passed,int pos)1167 wngrep(const char *word_passed, int pos) {
1168 FILE *inputfile;
1169 char word[256];
1170 int wordlen, linelen, loc;
1171 char line[1024];
1172 int count = 0;
1173
1174 inputfile = indexfps[pos];
1175 if (inputfile == NULL) {
1176 sprintf (msgbuf, "WordNet library error: Can't perform compounds "
1177 "search because %s index file is not open\n", partnames[pos]);
1178 display_message (msgbuf);
1179 return;
1180 }
1181 rewind(inputfile);
1182
1183 strcpy (word, word_passed);
1184 ToLowerCase(word); /* map to lower case for index file search */
1185 strsubst (word, ' ', '_'); /* replace spaces with underscores */
1186 wordlen = strlen (word);
1187
1188 while (fgets (line, 1024, inputfile) != NULL) {
1189 for (linelen = 0; line[linelen] != ' '; linelen++) {}
1190 if (linelen < wordlen)
1191 continue;
1192 line[linelen] = '\0';
1193 strstr_init (line, word);
1194 while ((loc = strstr_getnext ()) != -1) {
1195 if (
1196 /* at the start of the line */
1197 (loc == 0) ||
1198 /* at the end of the line */
1199 ((linelen - wordlen) == loc) ||
1200 /* as a word in the middle of the line */
1201 (((line[loc - 1] == '-') || (line[loc - 1] == '_')) &&
1202 ((line[loc + wordlen] == '-') || (line[loc + wordlen] == '_')))
1203 ) {
1204 strsubst (line, '_', ' ');
1205 sprintf (tmpbuf, "%s\n", line);
1206 printbuffer (tmpbuf);
1207 break;
1208 }
1209 }
1210 if (count++ % 2000 == 0) {
1211 interface_doevents ();
1212 if (abortsearch) break;
1213 }
1214 }
1215 }
1216
1217 /* Stucture to keep track of 'relative groups'. All senses in a relative
1218 group are displayed together at end of search. Transitivity is
1219 supported, so if either of a new set of related senses is already
1220 in a 'relative group', the other sense is added to that group as well. */
1221
1222 struct relgrp {
1223 int senses[MAXSENSE];
1224 struct relgrp *next;
1225 };
1226 static struct relgrp *rellist;
1227
1228 static struct relgrp *mkrellist(void);
1229
1230 /* Simple hash function */
1231 #define HASHTABSIZE 1223 /* Prime number. Must be > 2*MAXTOPS */
1232 #define hash(n) ((n) % HASHTABSIZE)
1233
1234 /* Find relative groups for all senses of target word in given part
1235 of speech. */
1236
relatives(IndexPtr idx,int dbase)1237 static void relatives(IndexPtr idx, int dbase)
1238 {
1239 rellist = NULL;
1240
1241 switch(dbase) {
1242
1243 case VERB:
1244 findverbgroups(idx);
1245 interface_doevents();
1246 if (abortsearch)
1247 break;
1248 printrelatives(idx, VERB);
1249 break;
1250 default:
1251 break;
1252 }
1253
1254 free_rellist();
1255 }
1256
findverbgroups(IndexPtr idx)1257 static void findverbgroups(IndexPtr idx)
1258 {
1259 int i, j, k;
1260 SynsetPtr synset;
1261
1262 assert(idx);
1263
1264 /* Read all senses */
1265
1266 for (i = 0; i < idx->off_cnt; i++) {
1267
1268 synset = read_synset(VERB, idx->offset[i], idx->wd);
1269
1270 /* Look for VERBGROUP ptr(s) for this sense. If found,
1271 create group for senses, or add to existing group. */
1272
1273 for (j = 0; j < synset->ptrcount; j++) {
1274 if (synset->ptrtyp[j] == VERBGROUP) {
1275 /* Need to find sense number for ptr offset */
1276 for (k = 0; k < idx->off_cnt; k++) {
1277 if (synset->ptroff[j] == idx->offset[k]) {
1278 add_relatives(i, k);
1279 break;
1280 }
1281 }
1282 }
1283 }
1284 free_synset(synset);
1285 }
1286 }
1287
1288 static void
add_relatives(int rel1,int rel2)1289 add_relatives(int rel1, int rel2)
1290 {
1291 int i;
1292 struct relgrp *rel, *last = NULL, *r;
1293
1294 /* If either of the new relatives are already in a relative group,
1295 then add the other to the existing group (transitivity).
1296 Otherwise create a new group and add these 2 senses to it. */
1297
1298 for (rel = rellist; rel; rel = rel->next) {
1299 if (rel->senses[rel1] == 1 || rel->senses[rel2] == 1) {
1300 rel->senses[rel1] = rel->senses[rel2] = 1;
1301
1302 /* If part of another relative group, merge the groups */
1303 for (r = rellist; r; r = r->next) {
1304 if (r != rel &&
1305 (r->senses[rel1] == 1 || r->senses[rel2] == 1)) {
1306 for (i = 0; i < MAXSENSE; i++)
1307 rel->senses[i] |= r->senses[i];
1308 }
1309 }
1310 return;
1311 }
1312 last = rel;
1313 }
1314 rel = mkrellist();
1315 rel->senses[rel1] = rel->senses[rel2] = 1;
1316 if (rellist == NULL)
1317 rellist = rel;
1318 else
1319 last->next = rel;
1320 }
1321
mkrellist(void)1322 static struct relgrp *mkrellist(void)
1323 {
1324 struct relgrp *rel;
1325 int i;
1326
1327 rel = (struct relgrp *) malloc(sizeof(struct relgrp));
1328 assert(rel);
1329 for (i = 0; i < MAXSENSE; i++)
1330 rel->senses[i] = 0;
1331 rel->next = NULL;
1332 return(rel);
1333 }
1334
free_rellist(void)1335 static void free_rellist(void)
1336 {
1337 struct relgrp *rel, *next;
1338
1339 rel = rellist;
1340 while(rel) {
1341 next = rel->next;
1342 free(rel);
1343 rel = next;
1344 }
1345 }
1346
printrelatives(IndexPtr idx,int dbase)1347 static void printrelatives(IndexPtr idx, int dbase)
1348 {
1349 SynsetPtr synptr;
1350 struct relgrp *rel;
1351 int i, flag;
1352 int outsenses[MAXSENSE];
1353
1354 for (i = 0; i < idx->off_cnt; i++)
1355 outsenses[i] = 0;
1356 prflag = 1;
1357
1358 for (rel = rellist; rel; rel = rel->next) {
1359 flag = 0;
1360 for (i = 0; i < idx->off_cnt; i++) {
1361 if (rel->senses[i] && !outsenses[i]) {
1362 flag = 1;
1363 synptr = read_synset(dbase, idx->offset[i], "");
1364 printsns(synptr, i + 1);
1365 traceptrs(synptr, HYPERPTR, dbase, 0);
1366 outsenses[i] = 1;
1367 free_synset(synptr);
1368 }
1369 }
1370 if (flag)
1371 printbuffer("--------------\n");
1372 }
1373
1374 for (i = 0; i < idx->off_cnt; i++) {
1375 if (!outsenses[i]) {
1376 synptr = read_synset(dbase, idx->offset[i], "");
1377 printsns(synptr, i + 1);
1378 traceptrs(synptr, HYPERPTR, dbase, 0);
1379 printbuffer("--------------\n");
1380 free_synset(synptr);
1381 }
1382 }
1383 }
1384
1385 /*
1386 Search code interfaces to WordNet database
1387
1388 findtheinfo() - print search results and return ptr to output buffer
1389 findtheinfo_ds() - return search results in linked list data structrure
1390 */
1391
1392 const char *
findtheinfo(const char * searchstr,int dbase,int ptyp,int whichsense)1393 findtheinfo(const char *searchstr, int dbase, int ptyp, int whichsense)
1394 {
1395 SynsetPtr cursyn;
1396 IndexPtr idx = NULL;
1397 int depth = 0;
1398 int i, offsetcnt;
1399 char *bufstart;
1400 unsigned long offsets[MAXSENSE];
1401 int skipit = 0;
1402
1403 /* Initializations -
1404 clear output buffer, search results structure, flags */
1405
1406 searchbuffer[0] = '\0';
1407
1408 wnresults.numforms = wnresults.printcnt = 0;
1409 wnresults.searchbuf = searchbuffer;
1410 wnresults.searchds = NULL;
1411
1412 abortsearch = overflag = 0;
1413 for (i = 0; i < MAXSENSE; i++)
1414 offsets[i] = 0;
1415
1416 switch (ptyp) {
1417 case OVERVIEW:
1418 WNOverview(searchstr, dbase);
1419 break;
1420 case FREQ:
1421 while ((idx = getindex(searchstr, dbase)) != NULL) {
1422 searchstr = NULL;
1423 wnresults.SenseCount[wnresults.numforms] = idx->off_cnt;
1424 freq_word(idx);
1425 free_index(idx);
1426 wnresults.numforms++;
1427 }
1428 break;
1429 case WNGREP:
1430 wngrep(searchstr, dbase);
1431 break;
1432 case RELATIVES:
1433 case VERBGROUP:
1434 while ((idx = getindex(searchstr, dbase)) != NULL) {
1435 searchstr = NULL;
1436 wnresults.SenseCount[wnresults.numforms] = idx->off_cnt;
1437 relatives(idx, dbase);
1438 free_index(idx);
1439 wnresults.numforms++;
1440 }
1441 break;
1442 default:
1443
1444 /* If negative search type, set flag for recursive search */
1445 if (ptyp < 0) {
1446 ptyp = -ptyp;
1447 depth = 1;
1448 }
1449 bufstart = searchbuffer;
1450 offsetcnt = 0;
1451
1452 /* look at all spellings of word */
1453
1454 while ((idx = getindex(searchstr, dbase)) != NULL) {
1455
1456 searchstr = NULL; /* clear out for next call to getindex() */
1457 wnresults.SenseCount[wnresults.numforms] = idx->off_cnt;
1458 wnresults.OutSenseCount[wnresults.numforms] = 0;
1459
1460 /* Print extra sense msgs if looking at all senses */
1461 if (whichsense == ALLSENSES)
1462 printbuffer(
1463 " \n");
1464
1465 /* Go through all of the searchword's senses in the
1466 database and perform the search requested. */
1467
1468 for (sense = 0; sense < idx->off_cnt; sense++) {
1469
1470 if (whichsense == ALLSENSES || whichsense == sense + 1) {
1471 prflag = 0;
1472
1473 /* Determine if this synset has already been done
1474 with a different spelling. If so, skip it. */
1475 for (i = 0, skipit = 0; i < offsetcnt && !skipit; i++) {
1476 if (offsets[i] == idx->offset[sense])
1477 skipit = 1;
1478 }
1479 if (skipit != 1) {
1480 offsets[offsetcnt++] = idx->offset[sense];
1481 cursyn = read_synset(dbase, idx->offset[sense], idx->wd);
1482 switch(ptyp) {
1483 case ANTPTR:
1484 if(dbase == ADJ)
1485 traceadjant(cursyn);
1486 else
1487 traceptrs(cursyn, ANTPTR, dbase, depth);
1488 break;
1489
1490 case COORDS:
1491 tracecoords(cursyn, HYPOPTR, depth);
1492 break;
1493
1494 case FRAMES:
1495 printframe(cursyn, 1);
1496 break;
1497
1498 case MERONYM:
1499 traceptrs(cursyn, HASMEMBERPTR, dbase, depth);
1500 traceptrs(cursyn, HASSTUFFPTR, dbase, depth);
1501 traceptrs(cursyn, HASPARTPTR, dbase, depth);
1502 break;
1503
1504 case HOLONYM:
1505 traceptrs(cursyn, ISMEMBERPTR, dbase, depth);
1506 traceptrs(cursyn, ISSTUFFPTR, dbase, depth);
1507 traceptrs(cursyn, ISPARTPTR, dbase, depth);
1508 break;
1509
1510 case HMERONYM:
1511 partsall(cursyn, HMERONYM);
1512 break;
1513
1514 case HHOLONYM:
1515 partsall(cursyn, HHOLONYM);
1516 break;
1517
1518 case SEEALSOPTR:
1519 printseealso(cursyn);
1520 break;
1521
1522 #ifdef FOOP
1523 case PPLPTR:
1524 traceptrs(cursyn, ptyp, dbase, depth);
1525 traceptrs(cursyn, PPLPTR, dbase, depth);
1526 break;
1527 #endif
1528
1529 case SIMPTR:
1530 case SYNS:
1531 case HYPERPTR:
1532 printsns(cursyn, sense + 1);
1533 prflag = 1;
1534
1535 traceptrs(cursyn, ptyp, dbase, depth);
1536
1537 if (dbase == ADJ) {
1538 /* traceptrs(cursyn, PERTPTR, dbase, depth); */
1539 traceptrs(cursyn, PPLPTR, dbase, depth);
1540 } else if (dbase == ADV) {
1541 /* traceptrs(cursyn, PERTPTR, dbase, depth);*/
1542 }
1543
1544 if (saflag) /* print SEE ALSO pointers */
1545 printseealso(cursyn);
1546
1547 if (dbase == VERB && frflag)
1548 printframe(cursyn, 0);
1549 break;
1550
1551 case PERTPTR:
1552 printsns(cursyn, sense + 1);
1553 prflag = 1;
1554
1555 traceptrs(cursyn, PERTPTR, dbase, depth);
1556 break;
1557
1558 case DERIVATION:
1559 tracenomins(cursyn);
1560 break;
1561
1562 case CLASSIFICATION:
1563 case CLASS:
1564 traceclassif(cursyn, ptyp);
1565 break;
1566
1567 default:
1568 traceptrs(cursyn, ptyp, dbase, depth);
1569 break;
1570
1571 } /* end switch */
1572
1573 free_synset(cursyn);
1574
1575 } /* end if (skipit) */
1576
1577 } /* end if (whichsense) */
1578
1579 if (skipit != 1) {
1580 interface_doevents();
1581 if ((whichsense == sense + 1) || abortsearch || overflag)
1582 break; /* break out of loop - we're done */
1583 }
1584
1585 } /* end for (sense) */
1586
1587 /* Done with an index entry - patch in number of senses output */
1588
1589 if (whichsense == ALLSENSES) {
1590 i = wnresults.OutSenseCount[wnresults.numforms];
1591 if (i == idx->off_cnt && i == 1)
1592 sprintf(tmpbuf, "\n1 sense of %s", idx->wd);
1593 else if (i == idx->off_cnt)
1594 sprintf(tmpbuf, "\n%d senses of %s", i, idx->wd);
1595 else if (i > 0) /* printed some senses */
1596 sprintf(tmpbuf, "\n%d of %d senses of %s",
1597 i, idx->off_cnt, idx->wd);
1598
1599 /* Find starting offset in searchbuffer for this index
1600 entry and patch string in. Then update bufstart
1601 to end of searchbuffer for start of next index entry. */
1602
1603 if (i > 0) {
1604 if (wnresults.numforms > 0) {
1605 bufstart[0] = '\n';
1606 bufstart++;
1607 }
1608 strncpy(bufstart, tmpbuf, strlen(tmpbuf));
1609 bufstart = searchbuffer + strlen(searchbuffer);
1610 }
1611 }
1612
1613 free_index(idx);
1614
1615 interface_doevents();
1616 if (overflag || abortsearch)
1617 break; /* break out of while (idx) loop */
1618
1619 wnresults.numforms++;
1620
1621 } /* end while (idx) */
1622
1623 } /* end switch */
1624
1625 interface_doevents();
1626 if (abortsearch)
1627 printbuffer("\nSearch Interrupted...\n");
1628 else if (overflag)
1629 sprintf(searchbuffer,
1630 "Search too large. Narrow search and try again...\n");
1631
1632 /* replace underscores with spaces before returning */
1633
1634 return(strsubst(searchbuffer, '_', ' '));
1635 }
1636
1637 SynsetPtr
findtheinfo_ds(char * searchstr,int dbase,int ptyp,int whichsense)1638 findtheinfo_ds(char *searchstr, int dbase, int ptyp, int whichsense)
1639 {
1640 IndexPtr idx;
1641 SynsetPtr cursyn;
1642 SynsetPtr synlist = NULL, lastsyn = NULL;
1643 int depth = 0;
1644 int newsense = 0;
1645
1646 wnresults.numforms = 0;
1647 wnresults.printcnt = 0;
1648
1649 while ((idx = getindex(searchstr, dbase)) != NULL) {
1650
1651 searchstr = NULL; /* clear out for next call */
1652 newsense = 1;
1653
1654 if(ptyp < 0) {
1655 ptyp = -ptyp;
1656 depth = 1;
1657 }
1658
1659 wnresults.SenseCount[wnresults.numforms] = idx->off_cnt;
1660 wnresults.OutSenseCount[wnresults.numforms] = 0;
1661 wnresults.searchbuf = NULL;
1662 wnresults.searchds = NULL;
1663
1664 /* Go through all of the searchword's senses in the
1665 database and perform the search requested. */
1666
1667 for(sense = 0; sense < idx->off_cnt; sense++) {
1668 if (whichsense == ALLSENSES || whichsense == sense + 1) {
1669 cursyn = read_synset(dbase, idx->offset[sense], idx->wd);
1670 if (lastsyn) {
1671 if (newsense)
1672 lastsyn->nextform = cursyn;
1673 else
1674 lastsyn->nextss = cursyn;
1675 }
1676 if (!synlist)
1677 synlist = cursyn;
1678 newsense = 0;
1679
1680 cursyn->searchtype = ptyp;
1681 cursyn->ptrlist = traceptrs_ds(cursyn, ptyp, depth);
1682
1683 lastsyn = cursyn;
1684
1685 if (whichsense == sense + 1)
1686 break;
1687 }
1688 }
1689 free_index(idx);
1690 wnresults.numforms++;
1691
1692 if (ptyp == COORDS) { /* clean up by removing hypernym */
1693 lastsyn = synlist->ptrlist;
1694 synlist->ptrlist = lastsyn->ptrlist;
1695 free_synset(lastsyn);
1696 }
1697 }
1698 wnresults.searchds = synlist;
1699 return(synlist);
1700 }
1701
1702 /* Recursive search algorithm to trace a pointer tree and return results
1703 in linked list of data structures. */
1704
1705 SynsetPtr
traceptrs_ds(SynsetPtr synptr,int ptyp,int depth)1706 traceptrs_ds(SynsetPtr synptr, int ptyp, int depth)
1707 {
1708 int i;
1709 SynsetPtr cursyn, synlist = NULL, lastsyn = NULL;
1710 int tstptrtyp, docoords;
1711
1712 /* If synset is a satellite, find the head word of its
1713 head synset and the head word's sense number. */
1714
1715 if (getsstype(synptr->pos) == SATELLITE) {
1716 for (i = 0; i < synptr->ptrcount; i++)
1717 if (synptr->ptrtyp[i] == SIMPTR) {
1718 cursyn = read_synset(synptr->ppos[i],
1719 synptr->ptroff[i],
1720 "");
1721 synptr->headword = malloc(strlen(cursyn->words[0]) + 1);
1722 assert(synptr->headword);
1723 strcpy(synptr->headword, cursyn->words[0]);
1724 synptr->headsense = cursyn->lexid[0];
1725 free_synset(cursyn);
1726 break;
1727 }
1728 }
1729
1730 if (ptyp == COORDS) {
1731 tstptrtyp = HYPERPTR;
1732 docoords = 1;
1733 } else {
1734 tstptrtyp = ptyp;
1735 docoords = 0;
1736 }
1737
1738 for (i = 0; i < synptr->ptrcount; i++) {
1739 if((synptr->ptrtyp[i] == tstptrtyp) &&
1740 ((synptr->pfrm[i] == 0) ||
1741 (synptr->pfrm[i] == synptr->whichword))) {
1742
1743 cursyn=read_synset(synptr->ppos[i], synptr->ptroff[i], "");
1744 cursyn->searchtype = ptyp;
1745
1746 if (lastsyn)
1747 lastsyn->nextss = cursyn;
1748 if (!synlist)
1749 synlist = cursyn;
1750 lastsyn = cursyn;
1751
1752 if(depth) {
1753 depth = depthcheck(depth, cursyn);
1754 cursyn->ptrlist = traceptrs_ds(cursyn, ptyp, (depth+1));
1755 } else if (docoords) {
1756 cursyn->ptrlist = traceptrs_ds(cursyn, HYPOPTR, 0);
1757 }
1758 }
1759 }
1760 return(synlist);
1761 }
1762
1763 static void
WNOverview(const char * searchstr,int pos)1764 WNOverview(const char *searchstr, int pos)
1765 {
1766 SynsetPtr cursyn;
1767 IndexPtr idx = NULL;
1768 const char *cpstring;
1769 char *bufstart;
1770 int sense_, i, offsetcnt;
1771 int svdflag, skipit;
1772 unsigned long offsets[MAXSENSE];
1773
1774 cpstring = searchstr;
1775 bufstart = searchbuffer;
1776 for (i = 0; i < MAXSENSE; i++)
1777 offsets[i] = 0;
1778 offsetcnt = 0;
1779
1780 while ((idx = getindex(cpstring, pos)) != NULL) {
1781
1782 cpstring = NULL; /* clear for next call to getindex() */
1783 wnresults.SenseCount[wnresults.numforms++] = idx->off_cnt;
1784 wnresults.OutSenseCount[wnresults.numforms] = 0;
1785
1786 printbuffer(
1787 " \n");
1788
1789 /* Print synset for each sense. If requested, precede
1790 synset with synset offset and/or lexical file information.*/
1791
1792 for (sense_ = 0; sense_ < idx->off_cnt; sense_++) {
1793
1794 for (i = 0, skipit = 0; i < offsetcnt && !skipit; i++)
1795 if (offsets[i] == idx->offset[sense_])
1796 skipit = 1;
1797
1798 if (!skipit) {
1799 offsets[offsetcnt++] = idx->offset[sense_];
1800 cursyn = read_synset(pos, idx->offset[sense_], idx->wd);
1801 if (idx->tagged_cnt != -1 &&
1802 ((sense_ + 1) <= idx->tagged_cnt)) {
1803 sprintf(tmpbuf, "%d. (%d) ",
1804 sense_ + 1, GetTagcnt(idx, sense_ + 1));
1805 } else {
1806 sprintf(tmpbuf, "%d. ", sense_ + 1);
1807 }
1808
1809 svdflag = dflag;
1810 dflag = 1;
1811 printsynset(tmpbuf, cursyn, "\n", DEFON, ALLWORDS,
1812 SKIP_ANTS, SKIP_MARKER);
1813 dflag = svdflag;
1814 wnresults.OutSenseCount[wnresults.numforms]++;
1815 wnresults.printcnt++;
1816
1817 free_synset(cursyn);
1818 }
1819 }
1820
1821 /* Print sense summary message */
1822
1823 i = wnresults.OutSenseCount[wnresults.numforms];
1824
1825 if (i > 0) {
1826 if (i == 1)
1827 sprintf(tmpbuf, "\nThe %s %s has 1 sense",
1828 partnames[pos], idx->wd);
1829 else
1830 sprintf(tmpbuf, "\nThe %s %s has %d senses",
1831 partnames[pos], idx->wd, i);
1832 if (idx->tagged_cnt > 0)
1833 sprintf(tmpbuf + strlen(tmpbuf),
1834 " (first %d from tagged texts)\n", idx->tagged_cnt);
1835 else if (idx->tagged_cnt == 0)
1836 sprintf(tmpbuf + strlen(tmpbuf),
1837 " (no senses from tagged texts)\n");
1838
1839 strncpy(bufstart, tmpbuf, strlen(tmpbuf));
1840 bufstart = searchbuffer + strlen(searchbuffer);
1841 } else
1842 bufstart[0] = '\0';
1843
1844 wnresults.numforms++;
1845 free_index(idx);
1846 }
1847 }
1848
1849 /* Do requested search on synset passed, returning output in buffer. */
1850
1851 const char *
do_trace(SynsetPtr synptr,int ptyp,int dbase,int depth)1852 do_trace(SynsetPtr synptr, int ptyp, int dbase, int depth)
1853 {
1854 searchbuffer[0] = '\0'; /* clear output buffer */
1855 traceptrs(synptr, ptyp, dbase, depth);
1856 return(searchbuffer);
1857 }
1858
1859 /* Set bit for each search type that is valid for the search word
1860 passed and return bit mask. */
1861
1862 unsigned int
is_defined(const char * searchstr,int dbase)1863 is_defined(const char *searchstr, int dbase)
1864 {
1865 IndexPtr idx;
1866 int i;
1867 unsigned long retval = 0;
1868
1869 wnresults.numforms = wnresults.printcnt = 0;
1870 wnresults.searchbuf = NULL;
1871 wnresults.searchds = NULL;
1872
1873 while ((idx = getindex(searchstr, dbase)) != NULL) {
1874 searchstr = NULL; /* clear out for next getindex() call */
1875
1876 wnresults.SenseCount[wnresults.numforms] = idx->off_cnt;
1877
1878 /* set bits that must be true for all words */
1879
1880 retval |= bit(SIMPTR) | bit(FREQ) | bit(SYNS)|
1881 bit(WNGREP) | bit(OVERVIEW);
1882
1883 /* go through list of pointer characters and set appropriate bits */
1884
1885 for(i = 0; i < idx->ptruse_cnt; i++) {
1886
1887 if (idx->ptruse[i] <= LASTTYPE) {
1888 retval |= bit(idx->ptruse[i]);
1889 } else if (idx->ptruse[i] == INSTANCE) {
1890 retval |= bit(HYPERPTR);
1891 } else if (idx->ptruse[i] == INSTANCES) {
1892 retval |= bit(HYPOPTR);
1893 }
1894
1895 if (idx->ptruse[i] == SIMPTR) {
1896 retval |= bit(ANTPTR);
1897 }
1898 #ifdef FOOP
1899
1900 if (idx->ptruse[i] >= CLASSIF_START &&
1901 idx->ptruse[i] <= CLASSIF_END) {
1902 retval |= bit(CLASSIFICATION);
1903 }
1904
1905
1906 if (idx->ptruse[i] >= CLASS_START &&
1907 idx->ptruse[i] <= CLASS_END) {
1908 retval |= bit(CLASS);
1909 }
1910 #endif
1911
1912 if (idx->ptruse[i] >= ISMEMBERPTR &&
1913 idx->ptruse[i] <= ISPARTPTR)
1914 retval |= bit(HOLONYM);
1915 else if (idx->ptruse[i] >= HASMEMBERPTR &&
1916 idx->ptruse[i] <= HASPARTPTR)
1917 retval |= bit(MERONYM);
1918
1919 }
1920
1921 if (dbase == NOUN) {
1922
1923 /* check for inherited holonyms and meronyms */
1924
1925 if (HasHoloMero(idx, HMERONYM))
1926 retval |= bit(HMERONYM);
1927 if (HasHoloMero(idx, HHOLONYM))
1928 retval |= bit(HHOLONYM);
1929
1930 /* if synset has hypernyms, enable coordinate search */
1931
1932 if (retval & bit(HYPERPTR))
1933 retval |= bit(COORDS);
1934 } else if (dbase == VERB) {
1935
1936 /* if synset has hypernyms, enable coordinate search */
1937 if (retval & bit(HYPERPTR))
1938 retval |= bit(COORDS);
1939
1940 /* enable grouping of related synsets and verb frames */
1941
1942 retval |= bit(RELATIVES) | bit(FRAMES);
1943 }
1944
1945 free_index(idx);
1946 wnresults.numforms++;
1947 }
1948 return(retval);
1949 }
1950
1951 /* Determine if any of the synsets that this word is in have inherited
1952 meronyms or holonyms. */
1953
1954 static int
HasHoloMero(IndexPtr idx,int ptyp)1955 HasHoloMero(IndexPtr idx, int ptyp)
1956 {
1957 int i, j;
1958 SynsetPtr synset, psynset;
1959 int found=0;
1960 int ptrbase;
1961
1962 ptrbase = (ptyp == HMERONYM) ? HASMEMBERPTR : ISMEMBERPTR;
1963
1964 for(i = 0; i < idx->off_cnt; i++) {
1965 synset = read_synset(NOUN, idx->offset[i], "");
1966 for (j = 0; j < synset->ptrcount; j++) {
1967 if (synset->ptrtyp[j] == HYPERPTR) {
1968 psynset = read_synset(NOUN, synset->ptroff[j], "");
1969 found += HasPtr(psynset, ptrbase);
1970 found += HasPtr(psynset, ptrbase + 1);
1971 found += HasPtr(psynset, ptrbase + 2);
1972
1973 free_synset(psynset);
1974 }
1975 }
1976 free_synset(synset);
1977 }
1978 return(found);
1979 }
1980
HasPtr(SynsetPtr synptr,int ptyp)1981 static int HasPtr(SynsetPtr synptr, int ptyp)
1982 {
1983 int i;
1984
1985 for(i = 0; i < synptr->ptrcount; i++) {
1986 if(synptr->ptrtyp[i] == ptyp) {
1987 return(1);
1988 }
1989 }
1990 return(0);
1991 }
1992
1993 /* Set bit for each POS that search word is in. 0 returned if
1994 word is not in WordNet. */
1995
1996 unsigned int
in_wn(const char * word,int pos)1997 in_wn(const char *word, int pos)
1998 {
1999 int i;
2000 unsigned int retval = 0;
2001
2002 if (pos == ALL_POS) {
2003 for (i = 1; i < NUMPARTS + 1; i++)
2004 if (indexfps[i] != NULL && bin_search(word, indexfps[i]) != NULL)
2005 retval |= bit(i);
2006 } else if (indexfps[pos] != NULL && bin_search(word,indexfps[pos]) != NULL)
2007 retval |= bit(pos);
2008 return(retval);
2009 }
2010
depthcheck(int depth,SynsetPtr synptr)2011 static int depthcheck(int depth, SynsetPtr synptr)
2012 {
2013 if(depth >= MAXDEPTH) {
2014 sprintf(msgbuf,
2015 "WordNet library error: Error Cycle detected\n %s\n",
2016 synptr->words[0]);
2017 display_message(msgbuf);
2018 depth = -1; /* reset to get one more trace then quit */
2019 }
2020 return(depth);
2021 }
2022
2023 /* Strip off () enclosed comments from a word */
2024
2025 static char *
deadjify(char * word)2026 deadjify(char *word)
2027 {
2028 char *y;
2029
2030 adj_marker = UNKNOWN_MARKER; /* default if not adj or unknown */
2031
2032 y=word;
2033 while(*y) {
2034 if(*y == '(') {
2035 if (!strncmp(y, "(a)", 3))
2036 adj_marker = ATTRIBUTIVE;
2037 else if (!strncmp(y, "(ip)", 4))
2038 adj_marker = IMMED_POSTNOMINAL;
2039 else if (!strncmp(y, "(p)", 3))
2040 adj_marker = PREDICATIVE;
2041 *y='\0';
2042 } else
2043 y++;
2044 }
2045 return(word);
2046 }
2047
getsearchsense(SynsetPtr synptr,int whichword)2048 static int getsearchsense(SynsetPtr synptr, int whichword)
2049 {
2050 IndexPtr idx;
2051 int i;
2052
2053 strsubst(strcpy(wdbuf, synptr->words[whichword - 1]), ' ', '_');
2054 strtolower(wdbuf);
2055
2056 idx = index_lookup(wdbuf, getpos(synptr->pos));
2057 if (idx) {
2058 for (i = 0; i < idx->off_cnt; i++)
2059 if (idx->offset[i] == synptr->hereiam) {
2060 free_index(idx);
2061 return(i + 1);
2062 }
2063 free_index(idx);
2064 }
2065 return(0);
2066 }
2067
2068 static void
printsynset(const char * head,SynsetPtr synptr,const char * tail,int definition,int wdnum,int antflag,int markerflag)2069 printsynset(const char *head, SynsetPtr synptr, const char *tail,
2070 int definition, int wdnum, int antflag, int markerflag)
2071 {
2072 int i, wdcnt;
2073 char tbuf[SMLINEBUF];
2074
2075 tbuf[0] = '\0'; /* clear working buffer */
2076
2077 strcat(tbuf, head); /* print head */
2078
2079 /* Precede synset with additional information as indiecated
2080 by flags */
2081
2082 if (offsetflag) /* print synset offset */
2083 sprintf(tbuf + strlen(tbuf),"{%8.8ld} ", synptr->hereiam);
2084 if (fileinfoflag) { /* print lexicographer file information */
2085 sprintf(tbuf + strlen(tbuf), "<%s> ", lexfiles[synptr->fnum]);
2086 prlexid = 1; /* print lexicographer id after word */
2087 } else
2088 prlexid = 0;
2089
2090 if (wdnum) /* print only specific word asked for */
2091 catword(tbuf, synptr, wdnum - 1, markerflag, antflag);
2092 else /* print all words in synset */
2093 for(i = 0, wdcnt = synptr->wcount; i < wdcnt; i++) {
2094 catword(tbuf, synptr, i, markerflag, antflag);
2095 if (i < wdcnt - 1)
2096 strcat(tbuf, ", ");
2097 }
2098
2099 if(definition && dflag && synptr->defn) {
2100 strcat(tbuf," -- ");
2101 strcat(tbuf,synptr->defn);
2102 }
2103
2104 strcat(tbuf,tail);
2105 printbuffer(tbuf);
2106 }
2107
2108 static void
printantsynset(SynsetPtr synptr,const char * tail,int definition)2109 printantsynset(SynsetPtr synptr, const char *tail, int definition)
2110 {
2111 int i, wdcnt;
2112 char tbuf[SMLINEBUF];
2113 const char *str;
2114 int first = 1;
2115
2116 tbuf[0] = '\0';
2117
2118 if (offsetflag)
2119 sprintf(tbuf,"{%8.8ld} ", synptr->hereiam);
2120 if (fileinfoflag) {
2121 sprintf(tbuf + strlen(tbuf),"<%s> ", lexfiles[synptr->fnum]);
2122 prlexid = 1;
2123 } else
2124 prlexid = 0;
2125
2126 /* print anotnyms from cluster head (of indirect ant) */
2127
2128 strcat(tbuf, "INDIRECT (VIA ");
2129 for(i = 0, wdcnt = synptr->wcount; i < wdcnt; i++) {
2130 if (first) {
2131 str = printant(ADJ, synptr, i + 1, "%s", ", ");
2132 first = 0;
2133 } else
2134 str = printant(ADJ, synptr, i + 1, ", %s", ", ");
2135 if (*str)
2136 strcat(tbuf, str);
2137 }
2138 strcat(tbuf, ") -> ");
2139
2140 /* now print synonyms from cluster head (of indirect ant) */
2141
2142 for (i = 0, wdcnt = synptr->wcount; i < wdcnt; i++) {
2143 catword(tbuf, synptr, i, SKIP_MARKER, SKIP_ANTS);
2144 if (i < wdcnt - 1)
2145 strcat(tbuf, ", ");
2146 }
2147
2148 if(dflag && synptr->defn && definition) {
2149 strcat(tbuf," -- ");
2150 strcat(tbuf,synptr->defn);
2151 }
2152
2153 strcat(tbuf,tail);
2154 printbuffer(tbuf);
2155 }
2156
catword(char * buf,SynsetPtr synptr,int wdnum,int adjmarker,int antflag)2157 static void catword(char *buf, SynsetPtr synptr, int wdnum, int adjmarker, int antflag)
2158 {
2159 static const char vs[] = " (vs. %s)";
2160 static const char *markers[] = {
2161 "", /* UNKNOWN_MARKER */
2162 "(predicate)", /* PREDICATIVE */
2163 "(prenominal)", /* ATTRIBUTIVE */
2164 "(postnominal)", /* IMMED_POSTNOMINAL */
2165 };
2166
2167 /* Copy the word (since deadjify() changes original string),
2168 deadjify() the copy and append to buffer */
2169
2170 strcpy(wdbuf, synptr->words[wdnum]);
2171 strcat(buf, deadjify(wdbuf));
2172
2173 /* Print additional lexicographer information and WordNet sense
2174 number as indicated by flags */
2175
2176 if (prlexid && (synptr->lexid[wdnum] != 0))
2177 sprintf(buf + strlen(buf), "%d", synptr->lexid[wdnum]);
2178 if (wnsnsflag)
2179 sprintf(buf + strlen(buf), "#%d", synptr->wnsns[wdnum]);
2180
2181 /* For adjectives, append adjective marker if present, and
2182 print antonym if flag is passed */
2183
2184 if (getpos(synptr->pos) == ADJ) {
2185 if (adjmarker == PRINT_MARKER)
2186 strcat(buf, markers[adj_marker]);
2187 if (antflag == PRINT_ANTS)
2188 strcat(buf, printant(ADJ, synptr, wdnum + 1, vs, ""));
2189 }
2190 }
2191
2192 static const char *
printant(int dbase,SynsetPtr synptr,int wdnum,const char * template,const char * tail)2193 printant(int dbase, SynsetPtr synptr, int wdnum,
2194 const char *template, const char *tail)
2195 {
2196 int i, j, wdoff;
2197 SynsetPtr psynptr;
2198 char tbuf[WORDBUF];
2199 static char retbuf[SMLINEBUF];
2200 int first = 1;
2201
2202 retbuf[0] = '\0';
2203
2204 /* Go through all the pointers looking for anotnyms from the word
2205 indicated by wdnum. When found, print all the antonym's
2206 antonym pointers which point back to wdnum. */
2207
2208 for (i = 0; i < synptr->ptrcount; i++) {
2209 if (synptr->ptrtyp[i] == ANTPTR && synptr->pfrm[i] == wdnum) {
2210
2211 psynptr = read_synset(dbase, synptr->ptroff[i], "");
2212
2213 for (j = 0; j < psynptr->ptrcount; j++) {
2214 if (psynptr->ptrtyp[j] == ANTPTR &&
2215 psynptr->pto[j] == wdnum &&
2216 psynptr->ptroff[j] == synptr->hereiam) {
2217
2218 wdoff = (psynptr->pfrm[j] ? (psynptr->pfrm[j] - 1) : 0);
2219
2220 /* Construct buffer containing formatted antonym,
2221 then add it onto end of return buffer */
2222
2223 strcpy(wdbuf, psynptr->words[wdoff]);
2224 strcpy(tbuf, deadjify(wdbuf));
2225
2226 /* Print additional lexicographer information and
2227 WordNet sense number as indicated by flags */
2228
2229 if (prlexid && (psynptr->lexid[wdoff] != 0))
2230 sprintf(tbuf + strlen(tbuf), "%d",
2231 psynptr->lexid[wdoff]);
2232 if (wnsnsflag)
2233 sprintf(tbuf + strlen(tbuf), "#%d",
2234 psynptr->wnsns[wdoff]);
2235 if (!first)
2236 strcat(retbuf, tail);
2237 else
2238 first = 0;
2239 sprintf(retbuf + strlen(retbuf), template, tbuf);
2240 }
2241 }
2242 free_synset(psynptr);
2243 }
2244 }
2245 return(retbuf);
2246 }
2247
2248 static void
printbuffer(const char * string)2249 printbuffer(const char *string)
2250 {
2251 if (overflag)
2252 return;
2253 if (strlen(searchbuffer) + strlen(string) >= SEARCHBUF)
2254 overflag = 1;
2255 else
2256 strcat(searchbuffer, string);
2257 }
2258
2259 static void
printsns(SynsetPtr synptr,int sense_)2260 printsns(SynsetPtr synptr, int sense_)
2261 {
2262 printsense(synptr, sense_);
2263 printsynset("", synptr, "\n", DEFON, ALLWORDS, PRINT_ANTS, PRINT_MARKER);
2264 }
2265
2266 static void
printsense(SynsetPtr synptr,int sense_)2267 printsense(SynsetPtr synptr, int sense_)
2268 {
2269 char tbuf[256];
2270
2271 /* Append lexicographer filename after Sense # if flag is set. */
2272
2273 if (fnflag)
2274 sprintf(tbuf,"\nSense %d in file \"%s\"\n",
2275 sense_, lexfiles[synptr->fnum]);
2276 else
2277 sprintf(tbuf,"\nSense %d\n", sense_);
2278
2279 printbuffer(tbuf);
2280
2281 /* update counters */
2282 wnresults.OutSenseCount[wnresults.numforms]++;
2283 wnresults.printcnt++;
2284 }
2285
printspaces(int trace,int depth)2286 static void printspaces(int trace, int depth)
2287 {
2288 int j;
2289
2290 for (j = 0; j < depth; j++)
2291 printbuffer(" ");
2292
2293 switch(trace) {
2294 case TRACEP: /* traceptrs(), tracenomins() */
2295 if (depth)
2296 printbuffer(" ");
2297 else
2298 printbuffer(" ");
2299 break;
2300
2301 case TRACEC: /* tracecoords() */
2302 if (!depth)
2303 printbuffer(" ");
2304 break;
2305
2306 case TRACEI: /* traceinherit() */
2307 if (!depth)
2308 printbuffer("\n ");
2309 break;
2310 }
2311 }
2312
2313 /* Dummy function to force Tcl/Tk to look at event queue to see of
2314 the user wants to stop the search. */
2315
interface_doevents(void)2316 static void interface_doevents (void) {
2317 if (interface_doevents_func != NULL) interface_doevents_func ();
2318 }
2319
2320 /*
2321 Revision log: (since version 1.5)
2322
2323 $Log: search.c,v $
2324 Revision 1.166 2006/11/14 20:52:45 wn
2325 for 2.1
2326
2327 Revision 1.165 2005/02/24 15:36:00 wn
2328 fixed bug - coordinate search was missing INSTANCE pointers
2329
2330 Revision 1.164 2005/01/27 16:32:32 wn
2331 removed 1.6 stuff and cleaned up #ifdefs
2332
2333 Revision 1.163 2004/10/25 15:25:18 wn
2334 added instances code
2335
2336 Revision 1.162 2004/01/12 16:32:52 wn
2337 changed "CATEGORY" to "TOPIC"
2338
2339 Revision 1.161 2003/06/23 15:52:27 wn
2340 cleaned up format of nomin output
2341
2342 Revision 1.160 2003/06/05 15:29:45 wn
2343 added pos and sense number for domains
2344
2345 Revision 1.159 2003/04/15 13:54:16 wn
2346 *** empty log message ***
2347
2348 Revision 1.158 2003/03/20 19:31:36 wn
2349 removed NOMIN_START/NOMIN_END range and replaced with DERIVATION
2350
2351 Revision 1.157 2003/02/06 19:01:36 wn
2352 added code to print out word pointed to in derivational links.
2353
2354 Revision 1.156 2003/02/06 18:03:30 wn
2355 work on classifications
2356
2357 Revision 1.155 2002/10/29 15:46:27 wn
2358 added CLASSIFICATION code
2359
2360 Revision 1.154 2002/09/16 15:43:01 wn
2361 allow "grep" string to be in upper case
2362
2363 Revision 1.153 2002/09/16 15:39:16 wn
2364 *** empty log message ***
2365
2366 Revision 1.152 2002/03/22 19:39:15 wn
2367 fill in key field in SynsetPtr if key file found
2368
2369 Revision 1.151 2002/03/07 18:47:52 wn
2370 updates for 1.7.1
2371
2372 Revision 1.150 2001/12/04 17:48:21 wn
2373 added test to tracenomins to only print nominalizations of serach
2374 word and not all words in synset
2375
2376 Revision 1.149 2001/11/27 19:53:24 wn
2377 removed check for version on verb example sentence stuff. only
2378 needed for 1.5
2379
2380 Revision 1.148 2001/11/06 18:51:04 wn
2381 fixed bug in getindex when passed "."
2382 added code to skip classification
2383
2384 Revision 1.147 2001/10/11 18:00:56 wn
2385 fixed bug in free_syns - wasn't freeing synset pointed to by nextform
2386
2387 Revision 1.146 2001/07/27 14:32:41 wn
2388 fixed order of adjective markers
2389
2390 Revision 1.145 2001/06/19 15:01:22 wn
2391 commed out include for setutil.h
2392
2393 Revision 1.144 2001/05/30 16:24:17 wn
2394 changed is_defined to return unsigned int
2395
2396 Revision 1.143 2001/03/30 17:13:00 wn
2397 fixed is_defined - wasn't setting coords for verbs
2398
2399 Revision 1.142 2001/03/29 16:18:03 wn
2400 added newline before output from FREQ search
2401
2402 Revision 1.141 2001/03/29 16:11:39 wn
2403 added code to tractptrs to print direct antonyms nicer
2404
2405 Revision 1.140 2001/03/27 18:47:41 wn
2406 removed tcflag
2407
2408 Revision 1.139 2001/03/27 16:47:44 wn
2409 updated is_defined for holonyms and meronyms
2410
2411 Revision 1.138 2000/08/14 16:04:24 wn
2412 changed 'get_index' to call sub to do work
2413 added code for nominalizations
2414
2415 Revision 1.137 1998/08/11 18:07:11 wn
2416 minor fixes: free synptr space before rreturning if error; remove
2417 useless statement in free_syns
2418
2419 * Revision 1.136 1998/08/07 17:51:32 wn
2420 * added COORDS to traceptrs_ds and findtheinfo_ds
2421 * fixed getsearchsense code to only happen in parse_synset
2422 *
2423 * Revision 1.135 1998/08/07 13:04:24 wn
2424 * *** empty log message ***
2425 *
2426 * Revision 1.134 1997/11/07 16:27:36 wn
2427 * cleanup calls to traceptrs
2428 *
2429 * Revision 1.133 1997/10/16 17:13:08 wn
2430 * fixed bug in add_topnode when index == 0
2431 *
2432 * Revision 1.132 1997/09/05 15:33:18 wn
2433 * change printframes to only print generic frames if specific example not found
2434 *
2435 * Revision 1.131 1997/09/02 16:31:18 wn
2436 * changed includes
2437 *
2438 * Revision 1.130 1997/09/02 14:43:23 wn
2439 * added code to test wnrelease in parse_synset and WNOverview
2440 *
2441 * Revision 1.129 1997/08/29 20:45:25 wn
2442 * added location sanity check on parse_synset
2443 *
2444 * Revision 1.128 1997/08/29 18:35:03 wn
2445 * a bunch of additional cleanups; added code to traceptrs_ds to
2446 * tore wordnet sense number for each word; added wnresults structure;
2447 * terminate holo/mero search at highest level having holo/mero
2448 *
2449 * Revision 1.127 1997/08/28 17:26:46 wn
2450 * Changed "n senses from tagged data" to "n senses from tagged texts"
2451 * in the overview.
2452 *
2453 * Revision 1.126 1997/08/27 13:26:07 wn
2454 * trivial change in wngrep (initialized count to zero)
2455 *
2456 * Revision 1.125 1997/08/26 21:13:14 wn
2457 * Grep now runs quickly because it doesn't call the doevents callback
2458 * after each line of the search.
2459 *
2460 * Revision 1.124 1997/08/26 20:11:23 wn
2461 * massive cleanups to print functions
2462 *
2463 * Revision 1.123 1997/08/26 15:04:18 wn
2464 * I think I got it this time; replaced goto skipit with int skipit flag
2465 * to make compiling easier on the Mac.
2466 *
2467 * Revision 1.122 1997/08/26 14:43:40 wn
2468 * In an effort to avoid compilation errors on the
2469 * Mac caused by the use of a "goto", I had tried to replace it with
2470 * an if block, but had done so improperly. This is the restored version
2471 * from before. Next check-in will have it properly replaced with flags.
2472 *
2473 * Revision 1.121 1997/08/25 15:54:21 wn
2474 * *** empty log message ***
2475 *
2476 * Revision 1.120 1997/08/22 21:06:02 wn
2477 * added code to use wnsnsflag to print wn sense number after each word
2478 *
2479 * Revision 1.119 1997/08/22 20:52:09 wn
2480 * cleaned up findtheinfo and other fns a bit
2481 *
2482 * Revision 1.118 1997/08/21 20:59:20 wn
2483 * grep now uses strstr instead of regexp searches. the old version is
2484 * still there but commented out.
2485 *
2486 * Revision 1.117 1997/08/21 18:41:30 wn
2487 * now eliminates duplicates on search returns, but not yet in overview
2488 *
2489 Revision 1.116 1997/08/13 17:23:45 wn
2490 fixed mac defines
2491
2492 * Revision 1.115 1997/08/08 20:56:33 wn
2493 * now uses built-in grep
2494 *
2495 * Revision 1.114 1997/08/08 19:15:41 wn
2496 * added code to read attest_cnt field in index file.
2497 * made searchbuffer fixed size
2498 * added WNOverview (OVERVIEW) search
2499 * added offsetflag to print synset offset before synset
2500 *
2501 * Revision 1.113 1997/08/05 14:20:29 wn
2502 * changed printbuffer to not realloc space, removed calls to stopsearch()
2503 *
2504 * Revision 1.112 1997/07/25 17:30:03 wn
2505 * various cleanups for release 1.6
2506 *
2507 Revision 1.111 1997/07/11 20:20:04 wn
2508 Added interface_doevents code for making searches interruptable in single-threaded environments.
2509
2510 * Revision 1.110 1997/07/10 19:01:57 wn
2511 * changed evca stuff
2512 *
2513 Revision 1.109 1997/04/22 19:59:08 wn
2514 allow pertainyms to have antonyms
2515
2516 * Revision 1.108 1996/09/17 20:05:01 wn
2517 * cleaned up EVCA code
2518 *
2519 * Revision 1.107 1996/08/16 18:34:13 wn
2520 * fixed minor bug in findcousins
2521 *
2522 * Revision 1.106 1996/07/17 14:02:17 wn
2523 * Added Kohl's verb example sentences. See getexample() and findExample().
2524 *
2525 * Revision 1.105 1996/06/14 18:49:49 wn
2526 * upped size of tmpbuf
2527 *
2528 * Revision 1.104 1996/02/08 16:42:30 wn
2529 * added some newlines to separate output and clear out tmpbuf
2530 * so invalid searches return empty string
2531 *
2532 * Revision 1.103 1995/11/30 14:54:53 wn
2533 * added grouped search for verbs
2534 *
2535 * Revision 1.102 1995/07/19 13:17:38 bagyenda
2536 * *** empty log message ***
2537 *
2538 * Revision 1.101 1995/07/18 19:15:30 wn
2539 * *** empty log message ***
2540 *
2541 * Revision 1.100 1995/07/18 18:56:24 bagyenda
2542 * New implementation of grouped searches --Paul.
2543 *
2544 * Revision 1.99 1995/06/30 19:21:23 wn
2545 * added code to findtheinfo_ds to link additional word forms
2546 * onto synset chain
2547 *
2548 * Revision 1.98 1995/06/12 18:33:51 wn
2549 * Minor change to getindex() -- Paul.
2550 *
2551 * Revision 1.97 1995/06/09 14:46:42 wn
2552 * *** empty log message ***
2553 *
2554 * Revision 1.96 1995/06/09 14:32:49 wn
2555 * changed code for PPLPTR and PERTPTR to print synsets pointed to
2556 *
2557 * Revision 1.95 1995/06/01 15:50:34 wn
2558 * cleanup of code dealing with various hyphenations
2559 *
2560 */
2561