1 #ifndef __WORD_TAG_HPP__ 2 #define __WORD_TAG_HPP__ 3 4 #include <vector> 5 #include <map> 6 #include <set> 7 8 #include "variables.hpp" 9 10 #undef assert 11 #include "lg_assert.h" 12 13 extern "C" { 14 #include "connectors.h" 15 #include "dict-common/dict-common.h" 16 #include "tokenize/tok-structures.h" // gword_set 17 #include "tokenize/wordgraph.h" // in_same_alternative() 18 }; 19 20 struct PositionConnector 21 { PositionConnectorPositionConnector22 PositionConnector(Exp* pe, Exp* e, char d, int w, int p, 23 double cst, double pcst, bool lr, bool ll, 24 const std::vector<int>& er, const std::vector<int>& el, const X_node *w_xnode, Parse_Options opts) 25 : exp(pe), dir(d), word(w), position(p), 26 cost(cst), parent_cost(pcst), 27 leading_right(lr), leading_left(ll), 28 eps_right(er), eps_left(el), word_xnode(w_xnode) 29 { 30 if (word_xnode == NULL) { 31 cerr << "Internal error: Word" << w << ": " << "; connector: '" << e->condesc->string << "'; X_node: " << (word_xnode?word_xnode->string: "(null)") << endl; 32 } 33 34 // Initialize some fields in the connector struct. 35 connector.desc = e->condesc; 36 connector.multi = e->multi; 37 set_connector_length_limit(&connector, opts); 38 connector.originating_gword = &w_xnode->word->gword_set_head; 39 40 /* 41 cout << c->string << " : ." << w << ". : ." << p << ". "; 42 if (leading_right) { 43 cout << "lr: "; 44 copy(er.begin(), er.end(), ostream_iterator<int>(cout, " ")); 45 } 46 if (leading_left) { 47 cout << "ll: "; 48 copy(el.begin(), el.end(), ostream_iterator<int>(cout, " ")); 49 } 50 cout << endl; 51 */ 52 } 53 54 // Added only to suppress the warning: 55 // warning: inlining failed in call to ‘PositionConnector::~PositionConnector() noexcept’: call is unlikely and code size would grow [-Winline] 56 // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70328 57 // Can be removed when this GCC problem is fixed. ~PositionConnectorPositionConnector58 ~PositionConnector() {}; 59 60 // Original expression that this connector came from 61 Exp* exp; 62 63 // Connector itself 64 Connector connector; 65 // Direction 66 char dir; 67 // word in a sentence that this connector belongs to 68 size_t word; 69 // position in the word tag 70 int position; 71 // cost of the connector 72 double cost; 73 // parent cost 74 double parent_cost; 75 76 bool leading_right; 77 bool leading_left; 78 std::vector<int> eps_right; 79 std::vector<int> eps_left; 80 81 82 // The corresponding X_node - chosen-disjuncts[] 83 const X_node *word_xnode; 84 85 // Matches with other words 86 std::vector<PositionConnector*> matches; 87 88 }; 89 90 /* 91 * Record the SAT variable and cost of costly-null expressions. 92 * Their cost is recovered (in sat_extract_links()) if 93 * they happen to reside on a participating disjunct. 94 */ 95 struct EmptyConnector { EmptyConnectorEmptyConnector96 EmptyConnector(int var, double cst) 97 : ec_var(var), ec_cost(cst) 98 { 99 } 100 int ec_var; 101 double ec_cost; 102 }; 103 104 // XXX TODO: Hash connectors for faster matching 105 106 class WordTag 107 { 108 private: 109 std::vector<PositionConnector> _left_connectors; 110 std::vector<PositionConnector> _right_connectors; 111 std::vector<EmptyConnector> _empty_connectors; 112 113 std::vector<char> _dir; 114 std::vector<int> _position; 115 116 int _word; 117 Variables* _variables; 118 119 Sentence _sent; 120 Parse_Options _opts; 121 122 // Could this word tag match a connector (wi, pi)? 123 // For each word wi I keep a set of positions pi that can be matched 124 std::vector< std::set<int> > _match_possible; set_match_possible(int wj,int pj)125 void set_match_possible(int wj, int pj) { 126 _match_possible[wj].insert(pj); 127 } 128 129 public: WordTag(int word,Variables * variables,Sentence sent,Parse_Options opts)130 WordTag(int word, Variables* variables, Sentence sent, Parse_Options opts) 131 : _word(word), _variables(variables), _sent(sent), _opts(opts) { 132 _match_possible.resize(_sent->length); 133 134 verbosity = opts->verbosity; 135 debug = opts->debug; 136 test = opts->test; 137 } 138 139 // Added only to suppress the warning: 140 // warning: inlining failed in call to ‘WordTag::~WordTag() noexcept’: call is unlikely and code size would grow [-Winline] 141 // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70328 142 // Can be removed when this GCC problem is fixed. ~WordTag()143 ~WordTag() {}; 144 get_left_connectors() const145 const std::vector<PositionConnector>& get_left_connectors() const { 146 return _left_connectors; 147 } 148 get_right_connectors() const149 const std::vector<PositionConnector>& get_right_connectors() const { 150 return _right_connectors; 151 } 152 get_empty_connectors() const153 const std::vector<EmptyConnector>& get_empty_connectors() const { 154 return _empty_connectors; 155 } 156 get(int dfs_position)157 PositionConnector* get(int dfs_position) 158 { 159 switch (_dir[dfs_position - 1]) { 160 case '+': 161 return &_right_connectors[_position[dfs_position - 1]]; 162 case '-': 163 return &_left_connectors[_position[dfs_position - 1]]; 164 } 165 return NULL; 166 } 167 168 #define OPTIMIZE_EN alt_connectivity_possible(Connector & c1,Connector & c2)169 static bool alt_connectivity_possible(Connector& c1, Connector & c2) 170 { 171 #ifdef OPTIMIZE_EN 172 /* Try a shortcut first. */ 173 if ((c2.originating_gword->o_gword->hier_depth == 0) || 174 (c1.originating_gword->o_gword->hier_depth == 0)) return true; 175 #endif // OPTIMIZE_EN 176 177 return in_same_alternative(c1.originating_gword->o_gword, c2.originating_gword->o_gword); 178 } 179 match(int w1,Connector & cntr1,char dir,int w2,Connector & cntr2)180 bool match(int w1, Connector& cntr1, char dir, int w2, Connector& cntr2) 181 { 182 int dist = w2 - w1; 183 assert(0 < dist, "match() did not receive words in the natural order."); 184 if (dist > cntr1.length_limit || dist > cntr2.length_limit) return false; 185 if (!alt_connectivity_possible(cntr1, cntr2)) return false; 186 return easy_match_desc(cntr1.desc, cntr2.desc); 187 } 188 189 void insert_connectors(Exp* exp, int& dfs_position, 190 bool& leading_right, bool& leading_left, 191 std::vector<int>& eps_right, 192 std::vector<int>& eps_left, 193 char* var, bool root, double parent_cost, 194 Exp* parent, const X_node *word_xnode); 195 196 // Caches information about the found matches to the _matches vector, and also 197 // updates the _matches vector of all connectors in the given tag. 198 // In order to have all possible matches correctly cached, the function assumes that it is 199 // iteratively called for all words in the sentence, where the tag is on the right side of 200 // this word 201 void add_matches_with_word(WordTag& tag); 202 203 // Find matches in this word tag with the connector (name, dir). 204 void find_matches(int w, Connector* C, char dir, std::vector<PositionConnector*>& matches); 205 206 // A simpler function: Can any connector in this word match a connector wi, pi? 207 // It is assumed that match_possible(int wi,int pi)208 bool match_possible(int wi, int pi) 209 { 210 return _match_possible[wi].find(pi) != _match_possible[wi].end(); 211 } 212 213 private: 214 int verbosity; 215 const char *debug; 216 const char *test; 217 }; 218 219 #endif 220