1 /*************************************************************************/
2 /* Copyright (c) 2008, 2009, 2014 Linas Vepstas */
3 /* All rights reserved */
4 /* */
5 /* Use of the link grammar parsing system is subject to the terms of the */
6 /* license set forth in the LICENSE file included with this software. */
7 /* This license allows free redistribution and use in source and binary */
8 /* forms, with or without modification, subject to certain conditions. */
9 /* */
10 /*************************************************************************/
11 /*
12 * lisjuncts.c
13 *
14 * Miscellaneous utilities for returning the list of disjuncts that
15 * were actually used in a given parse of a sentence.
16 */
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include "api-structures.h"
21 #include "connectors.h"
22 #include "disjunct-utils.h"
23 #include "linkage.h"
24 #include "lisjuncts.h"
25 #include "string-set.h"
26
27 #ifdef DEBUG_lisjuncts
28 #include "print/print-util.h"
29 static void assert_same_disjunct(Linkage, WordIdx, const char *);
30 #endif /* DEBUG_lisjuncts */
31
32 /**
33 * lg_compute_disjunct_strings -- Given sentence, compute disjuncts.
34 *
35 * This routine will compute the string representation of the disjunct
36 * used for each word in parsing the given sentence.
37 *
38 * The connectors are extracted from link_array (and not chosen_disjuncts)
39 * so the lexical links remain hidden when HIDE_MORPHO is true (see
40 * compute_chosen_disjuncts()).
41 *
42 * In order that multi-connectors will not be extracted several times
43 * for each disjunct (if they connect to multiple words) their tracon_id
44 * is checked for duplication.
45 */
lg_compute_disjunct_strings(Linkage lkg)46 void lg_compute_disjunct_strings(Linkage lkg)
47 {
48 char djstr[MAX_LINK_NAME_LENGTH*20]; /* no word will have more than 20 links */
49 size_t nwords = lkg->num_words;
50
51 if (lkg->disjunct_list_str) return;
52 lkg->disjunct_list_str = malloc(nwords * sizeof(char *));
53
54 for (WordIdx w = 0; w < nwords; w++)
55 {
56 size_t len = 0;
57
58 for (int dir = 0; dir < 2; dir++)
59 {
60 int last_multi_tracon_id = 0; /* last multi-connector */
61
62 for (LinkIdx i = lkg->num_links-1; i != (WordIdx)-1; i--)
63 {
64 Link *lnk = &lkg->link_array[i];
65 Connector *c;
66
67 if (0 == dir)
68 {
69 if (lnk->rw != w) continue;
70 c = lnk->rc;
71 }
72 else
73 {
74 if (lnk->lw != w) continue;
75 c = lnk->lc;
76 }
77
78 if (c->multi)
79 {
80 if (last_multi_tracon_id == c->tracon_id) continue; /* already included */
81 last_multi_tracon_id = c->tracon_id;
82 djstr[len++] = '@';
83 }
84 len += lg_strlcpy(djstr+len, connector_string(c), sizeof(djstr)-len);
85
86 if (len >= sizeof(djstr) - 3)
87 {
88 len = sizeof(djstr) - 1;
89 break;
90 }
91 djstr[len++] = (dir == 0) ? '-' : '+';
92 djstr[len++] = ' ';
93 }
94 }
95 if ((len > 0) && (djstr[len-1] == ' ')) len--;
96 djstr[len++] = '\0';
97
98 #ifdef DEBUG_lisjuncts
99 assert_same_disjunct(lkg, w, djstr);
100 #endif /* DEBUG_lisjuncts */
101
102 lkg->disjunct_list_str[w] = string_set_add(djstr, lkg->sent->string_set);
103 }
104 }
105
106 #ifdef DEBUG_lisjuncts
107 /* Cannot be used when morphology is not suppressed and lexical links exist. */
assert_same_disjunct(Linkage lkg,WordIdx w,const char * djstr)108 static void assert_same_disjunct(Linkage lkg, WordIdx w, const char *djstr)
109 {
110 char *cs;
111 if (lkg->chosen_disjuncts[w])
112 {
113 cs = print_one_disjunct(lkg->chosen_disjuncts[w]);
114 char *cs_lastchar = &cs[strlen(cs)-1];
115 if (*cs_lastchar == ' ') *cs_lastchar = '\0';
116 }
117 else
118 cs = (char *)"";
119
120 assert(strcmp(cs, djstr) == 0,
121 "Word %zu: Inconsistent disjunct string %s (link_array %s)",
122 w, cs, djstr);
123
124 if (lkg->chosen_disjuncts[w])
125 free(cs);
126 }
127 #endif /* DEBUG_lisjuncts */
128