1 #ifndef __WORD_TAG_HPP__
2 #define __WORD_TAG_HPP__
3 
4 #include <vector>
5 #include <map>
6 #include <set>
7 
8 #include "variables.hpp"
9 
10 #undef assert
11 #include "lg_assert.h"
12 
13 extern "C" {
14 #include "connectors.h"
15 #include "dict-common/dict-common.h"
16 #include "tokenize/tok-structures.h"    // gword_set
17 #include "tokenize/wordgraph.h"         // in_same_alternative()
18 };
19 
20 struct PositionConnector
21 {
PositionConnectorPositionConnector22   PositionConnector(Exp* pe, Exp* e, char d, int w, int p,
23                     double cst, double pcst, bool lr, bool ll,
24                     const std::vector<int>& er, const std::vector<int>& el, const X_node *w_xnode, Parse_Options opts)
25     : exp(pe), dir(d), word(w), position(p),
26       cost(cst), parent_cost(pcst),
27       leading_right(lr), leading_left(ll),
28       eps_right(er), eps_left(el), word_xnode(w_xnode)
29   {
30     if (word_xnode == NULL) {
31        cerr << "Internal error: Word" << w << ": " << "; connector: '" << e->condesc->string << "'; X_node: " << (word_xnode?word_xnode->string: "(null)") << endl;
32     }
33 
34     // Initialize some fields in the connector struct.
35     connector.desc = e->condesc;
36     connector.multi = e->multi;
37     set_connector_length_limit(&connector, opts);
38     connector.originating_gword = &w_xnode->word->gword_set_head;
39 
40     /*
41     cout << c->string << " : ." << w << ". : ." << p << ". ";
42     if (leading_right) {
43       cout << "lr: ";
44       copy(er.begin(), er.end(), ostream_iterator<int>(cout, " "));
45     }
46     if (leading_left) {
47       cout << "ll: ";
48       copy(el.begin(), el.end(), ostream_iterator<int>(cout, " "));
49     }
50     cout << endl;
51     */
52   }
53 
54   // Added only to suppress the warning:
55   // warning: inlining failed in call to ‘PositionConnector::~PositionConnector() noexcept’: call is unlikely and code size would grow [-Winline]
56   // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70328
57   // Can be removed when this GCC problem is fixed.
~PositionConnectorPositionConnector58   ~PositionConnector() {};
59 
60   // Original expression that this connector came from
61   Exp* exp;
62 
63   // Connector itself
64   Connector connector;
65   // Direction
66   char dir;
67   // word in a sentence that this connector belongs to
68   size_t word;
69   // position in the word tag
70   int position;
71   // cost of the connector
72   double cost;
73   // parent cost
74   double parent_cost;
75 
76   bool leading_right;
77   bool leading_left;
78   std::vector<int> eps_right;
79   std::vector<int> eps_left;
80 
81 
82   // The corresponding X_node - chosen-disjuncts[]
83   const X_node *word_xnode;
84 
85   // Matches with other words
86   std::vector<PositionConnector*> matches;
87 
88 };
89 
90 /*
91  * Record the SAT variable and cost of costly-null expressions.
92  * Their cost is recovered (in sat_extract_links()) if
93  * they happen to reside on a participating disjunct.
94  */
95 struct EmptyConnector {
EmptyConnectorEmptyConnector96   EmptyConnector(int var, double cst)
97     : ec_var(var), ec_cost(cst)
98   {
99   }
100   int ec_var;
101   double ec_cost;
102 };
103 
104 // XXX TODO: Hash connectors for faster matching
105 
106 class WordTag
107 {
108 private:
109   std::vector<PositionConnector> _left_connectors;
110   std::vector<PositionConnector> _right_connectors;
111   std::vector<EmptyConnector> _empty_connectors;
112 
113   std::vector<char> _dir;
114   std::vector<int> _position;
115 
116   int _word;
117   Variables* _variables;
118 
119   Sentence _sent;
120   Parse_Options _opts;
121 
122   // Could this word tag match a connector (wi, pi)?
123   // For each word wi I keep a set of positions pi that can be matched
124   std::vector< std::set<int> > _match_possible;
set_match_possible(int wj,int pj)125   void set_match_possible(int wj, int pj) {
126     _match_possible[wj].insert(pj);
127   }
128 
129 public:
WordTag(int word,Variables * variables,Sentence sent,Parse_Options opts)130   WordTag(int word, Variables* variables, Sentence sent, Parse_Options opts)
131     : _word(word), _variables(variables), _sent(sent), _opts(opts) {
132     _match_possible.resize(_sent->length);
133 
134     verbosity = opts->verbosity;
135     debug = opts->debug;
136     test = opts->test;
137   }
138 
139   // Added only to suppress the warning:
140   // warning: inlining failed in call to ‘WordTag::~WordTag() noexcept’: call is unlikely and code size would grow [-Winline]
141   // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70328
142   // Can be removed when this GCC problem is fixed.
~WordTag()143   ~WordTag() {};
144 
get_left_connectors() const145   const std::vector<PositionConnector>& get_left_connectors() const {
146     return _left_connectors;
147   }
148 
get_right_connectors() const149   const std::vector<PositionConnector>& get_right_connectors() const {
150     return _right_connectors;
151   }
152 
get_empty_connectors() const153   const std::vector<EmptyConnector>& get_empty_connectors() const {
154     return _empty_connectors;
155   }
156 
get(int dfs_position)157   PositionConnector* get(int dfs_position)
158   {
159     switch (_dir[dfs_position - 1]) {
160     case '+':
161       return &_right_connectors[_position[dfs_position - 1]];
162     case '-':
163       return &_left_connectors[_position[dfs_position - 1]];
164     }
165     return NULL;
166   }
167 
168 #define OPTIMIZE_EN
alt_connectivity_possible(Connector & c1,Connector & c2)169   static bool alt_connectivity_possible(Connector& c1, Connector & c2)
170   {
171 #ifdef OPTIMIZE_EN
172   /* Try a shortcut first. */
173   if ((c2.originating_gword->o_gword->hier_depth == 0) ||
174      (c1.originating_gword->o_gword->hier_depth == 0)) return true;
175 #endif // OPTIMIZE_EN
176 
177     return in_same_alternative(c1.originating_gword->o_gword, c2.originating_gword->o_gword);
178   }
179 
match(int w1,Connector & cntr1,char dir,int w2,Connector & cntr2)180   bool match(int w1, Connector& cntr1, char dir, int w2, Connector& cntr2)
181   {
182       int dist = w2 - w1;
183       assert(0 < dist, "match() did not receive words in the natural order.");
184       if (dist > cntr1.length_limit || dist > cntr2.length_limit) return false;
185       if (!alt_connectivity_possible(cntr1, cntr2)) return false;
186       return easy_match_desc(cntr1.desc, cntr2.desc);
187   }
188 
189   void insert_connectors(Exp* exp, int& dfs_position,
190                          bool& leading_right, bool& leading_left,
191                          std::vector<int>& eps_right,
192                          std::vector<int>& eps_left,
193                          char* var, bool root, double parent_cost,
194                          Exp* parent, const X_node *word_xnode);
195 
196   // Caches information about the found matches to the _matches vector, and also
197   // updates the _matches vector of all connectors in the given tag.
198   // In order to have all possible matches correctly cached, the function assumes that it is
199   // iteratively called for all words in the sentence, where the tag is on the right side of
200   // this word
201   void add_matches_with_word(WordTag& tag);
202 
203   // Find matches in this word tag with the connector (name, dir).
204   void find_matches(int w, Connector* C, char dir,  std::vector<PositionConnector*>& matches);
205 
206   // A simpler function: Can any connector in this word match a connector wi, pi?
207   // It is assumed that
match_possible(int wi,int pi)208   bool match_possible(int wi, int pi)
209   {
210     return _match_possible[wi].find(pi) != _match_possible[wi].end();
211   }
212 
213 private:
214   int verbosity;
215   const char *debug;
216   const char *test;
217 };
218 
219 #endif
220