1 /*
2 * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3 *
4 * SPDX-License-Identifier: LGPL-2.1-or-later
5 */
6 #ifndef _FCITX_LIBIME_CORE_LATTICE_H_
7 #define _FCITX_LIBIME_CORE_LATTICE_H_
8
9 #include "libimecore_export.h"
10 #include <algorithm>
11 #include <boost/range/adaptor/transformed.hpp>
12 #include <boost/range/adaptor/type_erased.hpp>
13 #include <fcitx-utils/macros.h>
14 #include <fcitx-utils/stringutils.h>
15 #include <libime/core/languagemodel.h>
16 #include <libime/core/segmentgraph.h>
17 #include <memory>
18 #include <type_traits>
19
20 namespace libime {
21
22 class Decoder;
23 class LatticePrivate;
24 class SegmentGraphNode;
25 class SegmentGraph;
26 class LatticeNode;
27
28 class SentenceResult {
29 public:
30 typedef std::vector<const LatticeNode *> Sentence;
31 SentenceResult(Sentence sentence = {}, float score = 0.0f)
sentence_(std::move (sentence))32 : sentence_(std::move(sentence)), score_(score) {}
FCITX_INLINE_DEFINE_DEFAULT_DTOR_COPY_AND_MOVE(SentenceResult)33 FCITX_INLINE_DEFINE_DEFAULT_DTOR_COPY_AND_MOVE(SentenceResult)
34
35 const Sentence &sentence() const { return sentence_; }
size()36 size_t size() const { return sentence_.size(); }
37
score()38 float score() const { return score_; }
adjustScore(float adjust)39 void adjustScore(float adjust) { score_ += adjust; }
setScore(float score)40 void setScore(float score) { score_ = score; }
41
42 bool operator<(const SentenceResult &rhs) const {
43 return score_ < rhs.score_;
44 }
45
46 bool operator>(const SentenceResult &rhs) const {
47 return score_ > rhs.score_;
48 }
49
50 std::string toString() const;
51
52 private:
53 Sentence sentence_;
54 float score_;
55 };
56
57 class LIBIMECORE_EXPORT WordNode {
58 public:
WordNode(std::string_view word,WordIndex idx)59 WordNode(std::string_view word, WordIndex idx) : word_(word), idx_(idx) {}
60
61 virtual ~WordNode() = default;
62 WordNode(WordNode &&other) noexcept(
63 std::is_nothrow_move_constructible<std::string>::value);
64 WordNode &operator=(WordNode &&other) noexcept(
65 std::is_nothrow_move_assignable<std::string>::value);
66
word()67 const std::string &word() const { return word_; }
idx()68 WordIndex idx() const { return idx_; }
setIdx(WordIndex idx)69 void setIdx(WordIndex idx) { idx_ = idx; }
70
71 protected:
72 std::string word_;
73 WordIndex idx_;
74 };
75
76 class LIBIMECORE_EXPORT LatticeNodeData {
77 public:
78 virtual ~LatticeNodeData() = default;
79 };
80
81 class LIBIMECORE_EXPORT LatticeNode : public WordNode {
82 public:
83 LatticeNode(std::string_view word, WordIndex idx, SegmentGraphPath path,
84 const State &state, float cost = 0)
WordNode(word,idx)85 : WordNode(word, idx), path_(std::move(path)), cost_(cost),
86 state_(state) {
87 assert(path_.size() >= 2);
88 }
cost()89 float cost() const { return cost_; }
90
score()91 float score() const { return score_; }
setScore(float score)92 void setScore(float score) { score_ = score; }
93
from()94 const SegmentGraphNode *from() const { return path_.front(); }
to()95 const SegmentGraphNode *to() const { return path_.back(); }
path()96 const SegmentGraphPath &path() const { return path_; }
97
prev()98 LatticeNode *prev() const { return prev_; }
setPrev(LatticeNode * prev)99 void setPrev(LatticeNode *prev) { prev_ = prev; }
100
101 /// Return the full word till the begining of the sentence.
fullWord()102 std::string fullWord() const {
103 size_t length = 0;
104 const auto *pivot = this;
105 while (pivot) {
106 length += pivot->word().size();
107 pivot = pivot->prev();
108 }
109
110 std::string result;
111 result.resize(length);
112 pivot = this;
113 while (pivot) {
114 const auto &word = pivot->word();
115 length -= word.size();
116 std::copy(word.begin(), word.end(), result.begin() + length);
117 pivot = pivot->prev();
118 }
119
120 return result;
121 }
122
123 SentenceResult toSentenceResult(float adjust = 0.0f) const {
124 SentenceResult::Sentence result;
125 const auto *pivot = this;
126 // to skip bos
127 while (pivot->prev()) {
128 if (pivot->to()) {
129 result.emplace_back(pivot);
130 }
131 pivot = pivot->prev();
132 }
133
134 std::reverse(result.begin(), result.end());
135 return {std::move(result), score() + adjust};
136 }
137
state()138 State &state() { return state_; }
139
140 protected:
141 SegmentGraphPath path_;
142 float cost_;
143 float score_ = 0.0f;
144 State state_;
145 LatticeNode *prev_ = nullptr;
146 };
147
toString()148 inline std::string SentenceResult::toString() const {
149 return fcitx::stringutils::join(
150 sentence_ | boost::adaptors::transformed(
151 [](const auto &item) { return item->word(); }),
152 "");
153 }
154
155 // Lattice graph is a overlay graph on the SegmentGraph.
156 // Every node in SegmentGraph may have multiple corresponding LatticeNode.
157 // If there is an edge between two lattice nodes, there is a path between their
158 // corresponding SegmentGraphNode.
159 //
160 // For example, pinyin, "xian" has three SegmentGraphNodes.
161 // [0] ---- xian ----- [4]
162 // \- xi -[2] - an -/
163 //
164 // while [0] has only one lattice node, the remaining nodes may has multiple
165 // nodes.
166 // So there is an extra "end" node that links every nodes from [4] to it.
167 class LIBIMECORE_EXPORT Lattice {
168 friend class Decoder;
169 friend class DecoderPrivate;
170 friend class SegmentGraph;
171
172 public:
173 Lattice();
174 FCITX_DECLARE_VIRTUAL_DTOR_MOVE(Lattice)
175
176 size_t sentenceSize() const;
177 const SentenceResult &sentence(size_t idx) const;
178 void clear();
179 void discardNode(const std::unordered_set<const SegmentGraphNode *> &node);
180
181 typedef boost::any_range<LatticeNode, boost::bidirectional_traversal_tag,
182 const LatticeNode &>
183 NodeRange;
184
185 NodeRange nodes(const SegmentGraphNode *node) const;
186
187 private:
188 std::unique_ptr<LatticePrivate> d_ptr;
189 FCITX_DECLARE_PRIVATE(Lattice);
190 };
191 } // namespace libime
192
193 #endif // _FCITX_LIBIME_CORE_LATTICE_H_
194