1 //
2 // Copyright RIME Developers
3 // Distributed under the BSD License
4 //
5 // simplistic sentence-making
6 //
7 // 2011-10-06 GONG Chen <chen.sst@gmail.com>
8 //
9 #include <algorithm>
10 #include <functional>
11 #include <rime/candidate.h>
12 #include <rime/config.h>
13 #include <rime/dict/vocabulary.h>
14 #include <rime/gear/grammar.h>
15 #include <rime/gear/poet.h>
16
17 namespace rime {
18
19 // internal data structure used during the sentence making process.
20 // the output line of the algorithm is transformed to an<Sentence>.
21 struct Line {
22 // be sure the pointer to predecessor Line object is stable. it works since
23 // pointer to values stored in std::map and std::unordered_map are stable.
24 const Line* predecessor;
25 // as long as the word graph lives, pointers to entries are valid.
26 const DictEntry* entry;
27 size_t end_pos;
28 double weight;
29
30 static const Line kEmpty;
31
emptyrime::Line32 bool empty() const {
33 return !predecessor && !entry;
34 }
35
last_wordrime::Line36 string last_word() const {
37 return entry ? entry->text : string();
38 }
39
40 struct Components {
41 vector<const Line*> lines;
42
Componentsrime::Line::Components43 Components(const Line* line) {
44 for (const Line* cursor = line;
45 !cursor->empty();
46 cursor = cursor->predecessor) {
47 lines.push_back(cursor);
48 }
49 }
50
beginrime::Line::Components51 decltype(lines.crbegin()) begin() const { return lines.crbegin(); }
endrime::Line::Components52 decltype(lines.crend()) end() const { return lines.crend(); }
53 };
54
componentsrime::Line55 Components components() const { return Components(this); }
56
contextrime::Line57 string context() const {
58 // look back 2 words
59 return empty() ? string() :
60 !predecessor || predecessor->empty() ? last_word() :
61 predecessor->last_word() + last_word();
62 }
63
word_lengthsrime::Line64 vector<size_t> word_lengths() const {
65 vector<size_t> lengths;
66 size_t last_end_pos = 0;
67 for (const auto* c : components()) {
68 lengths.push_back(c->end_pos - last_end_pos);
69 last_end_pos = c->end_pos;
70 }
71 return lengths;
72 }
73 };
74
75 const Line Line::kEmpty{nullptr, nullptr, 0, 0.0};
76
create_grammar(Config * config)77 inline static Grammar* create_grammar(Config* config) {
78 if (auto* grammar = Grammar::Require("grammar")) {
79 return grammar->Create(config);
80 }
81 return nullptr;
82 }
83
Poet(const Language * language,Config * config,Compare compare)84 Poet::Poet(const Language* language, Config* config, Compare compare)
85 : language_(language),
86 grammar_(create_grammar(config)),
87 compare_(compare) {}
88
~Poet()89 Poet::~Poet() {}
90
CompareWeight(const Line & one,const Line & other)91 bool Poet::CompareWeight(const Line& one, const Line& other) {
92 return one.weight < other.weight;
93 }
94
95 // returns true if one is less than other.
LeftAssociateCompare(const Line & one,const Line & other)96 bool Poet::LeftAssociateCompare(const Line& one, const Line& other) {
97 if (one.weight < other.weight) return true;
98 if (one.weight == other.weight) {
99 auto one_word_lens = one.word_lengths();
100 auto other_word_lens = other.word_lengths();
101 // less words is more favorable
102 if (one_word_lens.size() > other_word_lens.size()) return true;
103 if (one_word_lens.size() == other_word_lens.size()) {
104 return std::lexicographical_compare(
105 one_word_lens.begin(), one_word_lens.end(),
106 other_word_lens.begin(), other_word_lens.end());
107 }
108 }
109 return false;
110 }
111
112 // keep the best line candidate per last phrase
113 using LineCandidates = hash_map<string, Line>;
114
115 template <int N>
find_top_candidates(const LineCandidates & candidates,Poet::Compare compare)116 static vector<const Line*> find_top_candidates(
117 const LineCandidates& candidates, Poet::Compare compare) {
118 vector<const Line*> top;
119 top.reserve(N + 1);
120 for (const auto& candidate : candidates) {
121 auto pos = std::upper_bound(
122 top.begin(), top.end(), &candidate.second,
123 [&](const Line* a, const Line* b) { return compare(*b, *a); }); // desc
124 if (pos - top.begin() >= N) continue;
125 top.insert(pos, &candidate.second);
126 if (top.size() > N) top.pop_back();
127 }
128 return top;
129 }
130
131 using UpdateLineCandidate = function<void (const Line& candidate)>;
132
133 struct BeamSearch {
134 using State = LineCandidates;
135
136 static constexpr int kMaxLineCandidates = 7;
137
Initiaterime::BeamSearch138 static void Initiate(State& initial_state) {
139 initial_state.emplace("", Line::kEmpty);
140 }
141
ForEachCandidaterime::BeamSearch142 static void ForEachCandidate(const State& state,
143 Poet::Compare compare,
144 UpdateLineCandidate update) {
145 auto top_candidates =
146 find_top_candidates<kMaxLineCandidates>(state, compare);
147 for (const auto* candidate : top_candidates) {
148 update(*candidate);
149 }
150 }
151
BestLineToUpdaterime::BeamSearch152 static Line& BestLineToUpdate(State& state, const Line& new_line) {
153 const auto& key = new_line.last_word();
154 return state[key];
155 }
156
BestLineInStaterime::BeamSearch157 static const Line& BestLineInState(const State& final_state,
158 Poet::Compare compare) {
159 const Line* best = nullptr;
160 for (const auto& candidate : final_state) {
161 if (!best || compare(*best, candidate.second)) {
162 best = &candidate.second;
163 }
164 }
165 return best ? *best : Line::kEmpty;
166 }
167 };
168
169 struct DynamicProgramming {
170 using State = Line;
171
Initiaterime::DynamicProgramming172 static void Initiate(State& initial_state) {
173 initial_state = Line::kEmpty;
174 }
175
ForEachCandidaterime::DynamicProgramming176 static void ForEachCandidate(const State& state,
177 Poet::Compare compare,
178 UpdateLineCandidate update) {
179 update(state);
180 }
181
BestLineToUpdaterime::DynamicProgramming182 static Line& BestLineToUpdate(State& state, const Line& new_line) {
183 return state;
184 }
185
BestLineInStaterime::DynamicProgramming186 static const Line& BestLineInState(const State& final_state,
187 Poet::Compare compare) {
188 return final_state;
189 }
190 };
191
192 template <class Strategy>
MakeSentenceWithStrategy(const WordGraph & graph,size_t total_length,const string & preceding_text)193 an<Sentence> Poet::MakeSentenceWithStrategy(const WordGraph& graph,
194 size_t total_length,
195 const string& preceding_text) {
196 map<int, typename Strategy::State> states;
197 Strategy::Initiate(states[0]);
198 for (const auto& sv : graph) {
199 size_t start_pos = sv.first;
200 if (states.find(start_pos) == states.end())
201 continue;
202 DLOG(INFO) << "start pos: " << start_pos;
203 const auto& source_state = states[start_pos];
204 const auto update =
205 [this, &states, &sv, start_pos, total_length, &preceding_text]
206 (const Line& candidate) {
207 for (const auto& ev : sv.second) {
208 size_t end_pos = ev.first;
209 if (start_pos == 0 && end_pos == total_length)
210 continue; // exclude single word from the result
211 DLOG(INFO) << "end pos: " << end_pos;
212 bool is_rear = end_pos == total_length;
213 auto& target_state = states[end_pos];
214 // extend candidates with dict entries on a valid edge.
215 const DictEntryList& entries = ev.second;
216 for (const auto& entry : entries) {
217 const string& context =
218 candidate.empty() ? preceding_text : candidate.context();
219 double weight = candidate.weight +
220 Grammar::Evaluate(context,
221 entry->text,
222 entry->weight,
223 is_rear,
224 grammar_.get());
225 Line new_line{&candidate, entry.get(), end_pos, weight};
226 Line& best = Strategy::BestLineToUpdate(target_state, new_line);
227 if (best.empty() || compare_(best, new_line)) {
228 DLOG(INFO) << "updated line ending at " << end_pos
229 << " with text: ..." << new_line.last_word()
230 << " weight: " << new_line.weight;
231 best = new_line;
232 }
233 }
234 }
235 };
236 Strategy::ForEachCandidate(source_state, compare_, update);
237 }
238 auto found = states.find(total_length);
239 if (found == states.end() || found->second.empty())
240 return nullptr;
241 const Line& best = Strategy::BestLineInState(found->second, compare_);
242 auto sentence = New<Sentence>(language_);
243 for (const auto* c : best.components()) {
244 if (!c->entry) continue;
245 sentence->Extend(*c->entry, c->end_pos, c->weight);
246 }
247 return sentence;
248 }
249
MakeSentence(const WordGraph & graph,size_t total_length,const string & preceding_text)250 an<Sentence> Poet::MakeSentence(const WordGraph& graph,
251 size_t total_length,
252 const string& preceding_text) {
253 return grammar_ ?
254 MakeSentenceWithStrategy<BeamSearch>(
255 graph, total_length, preceding_text) :
256 MakeSentenceWithStrategy<DynamicProgramming>(
257 graph, total_length, preceding_text);
258 }
259
260 } // namespace rime
261