1 /*
2  * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 #ifndef _FCITX_LIBIME_CORE_LATTICE_H_
7 #define _FCITX_LIBIME_CORE_LATTICE_H_
8 
9 #include "libimecore_export.h"
10 #include <algorithm>
11 #include <boost/range/adaptor/transformed.hpp>
12 #include <boost/range/adaptor/type_erased.hpp>
13 #include <fcitx-utils/macros.h>
14 #include <fcitx-utils/stringutils.h>
15 #include <libime/core/languagemodel.h>
16 #include <libime/core/segmentgraph.h>
17 #include <memory>
18 #include <type_traits>
19 
20 namespace libime {
21 
22 class Decoder;
23 class LatticePrivate;
24 class SegmentGraphNode;
25 class SegmentGraph;
26 class LatticeNode;
27 
28 class SentenceResult {
29 public:
30     typedef std::vector<const LatticeNode *> Sentence;
31     SentenceResult(Sentence sentence = {}, float score = 0.0f)
sentence_(std::move (sentence))32         : sentence_(std::move(sentence)), score_(score) {}
FCITX_INLINE_DEFINE_DEFAULT_DTOR_COPY_AND_MOVE(SentenceResult)33     FCITX_INLINE_DEFINE_DEFAULT_DTOR_COPY_AND_MOVE(SentenceResult)
34 
35     const Sentence &sentence() const { return sentence_; }
size()36     size_t size() const { return sentence_.size(); }
37 
score()38     float score() const { return score_; }
adjustScore(float adjust)39     void adjustScore(float adjust) { score_ += adjust; }
setScore(float score)40     void setScore(float score) { score_ = score; }
41 
42     bool operator<(const SentenceResult &rhs) const {
43         return score_ < rhs.score_;
44     }
45 
46     bool operator>(const SentenceResult &rhs) const {
47         return score_ > rhs.score_;
48     }
49 
50     std::string toString() const;
51 
52 private:
53     Sentence sentence_;
54     float score_;
55 };
56 
57 class LIBIMECORE_EXPORT WordNode {
58 public:
WordNode(std::string_view word,WordIndex idx)59     WordNode(std::string_view word, WordIndex idx) : word_(word), idx_(idx) {}
60 
61     virtual ~WordNode() = default;
62     WordNode(WordNode &&other) noexcept(
63         std::is_nothrow_move_constructible<std::string>::value);
64     WordNode &operator=(WordNode &&other) noexcept(
65         std::is_nothrow_move_assignable<std::string>::value);
66 
word()67     const std::string &word() const { return word_; }
idx()68     WordIndex idx() const { return idx_; }
setIdx(WordIndex idx)69     void setIdx(WordIndex idx) { idx_ = idx; }
70 
71 protected:
72     std::string word_;
73     WordIndex idx_;
74 };
75 
76 class LIBIMECORE_EXPORT LatticeNodeData {
77 public:
78     virtual ~LatticeNodeData() = default;
79 };
80 
81 class LIBIMECORE_EXPORT LatticeNode : public WordNode {
82 public:
83     LatticeNode(std::string_view word, WordIndex idx, SegmentGraphPath path,
84                 const State &state, float cost = 0)
WordNode(word,idx)85         : WordNode(word, idx), path_(std::move(path)), cost_(cost),
86           state_(state) {
87         assert(path_.size() >= 2);
88     }
cost()89     float cost() const { return cost_; }
90 
score()91     float score() const { return score_; }
setScore(float score)92     void setScore(float score) { score_ = score; }
93 
from()94     const SegmentGraphNode *from() const { return path_.front(); }
to()95     const SegmentGraphNode *to() const { return path_.back(); }
path()96     const SegmentGraphPath &path() const { return path_; }
97 
prev()98     LatticeNode *prev() const { return prev_; }
setPrev(LatticeNode * prev)99     void setPrev(LatticeNode *prev) { prev_ = prev; }
100 
101     /// Return the full word till the begining of the sentence.
fullWord()102     std::string fullWord() const {
103         size_t length = 0;
104         const auto *pivot = this;
105         while (pivot) {
106             length += pivot->word().size();
107             pivot = pivot->prev();
108         }
109 
110         std::string result;
111         result.resize(length);
112         pivot = this;
113         while (pivot) {
114             const auto &word = pivot->word();
115             length -= word.size();
116             std::copy(word.begin(), word.end(), result.begin() + length);
117             pivot = pivot->prev();
118         }
119 
120         return result;
121     }
122 
123     SentenceResult toSentenceResult(float adjust = 0.0f) const {
124         SentenceResult::Sentence result;
125         const auto *pivot = this;
126         // to skip bos
127         while (pivot->prev()) {
128             if (pivot->to()) {
129                 result.emplace_back(pivot);
130             }
131             pivot = pivot->prev();
132         }
133 
134         std::reverse(result.begin(), result.end());
135         return {std::move(result), score() + adjust};
136     }
137 
state()138     State &state() { return state_; }
139 
140 protected:
141     SegmentGraphPath path_;
142     float cost_;
143     float score_ = 0.0f;
144     State state_;
145     LatticeNode *prev_ = nullptr;
146 };
147 
toString()148 inline std::string SentenceResult::toString() const {
149     return fcitx::stringutils::join(
150         sentence_ | boost::adaptors::transformed(
151                         [](const auto &item) { return item->word(); }),
152         "");
153 }
154 
155 // Lattice graph is a overlay graph on the SegmentGraph.
156 // Every node in SegmentGraph may have multiple corresponding LatticeNode.
157 // If there is an edge between two lattice nodes, there is a path between their
158 // corresponding SegmentGraphNode.
159 //
160 // For example, pinyin, "xian" has three SegmentGraphNodes.
161 // [0] ---- xian ----- [4]
162 //   \- xi -[2] - an -/
163 //
164 // while [0] has only one lattice node, the remaining nodes may has multiple
165 // nodes.
166 // So there is an extra "end" node that links every nodes from [4] to it.
167 class LIBIMECORE_EXPORT Lattice {
168     friend class Decoder;
169     friend class DecoderPrivate;
170     friend class SegmentGraph;
171 
172 public:
173     Lattice();
174     FCITX_DECLARE_VIRTUAL_DTOR_MOVE(Lattice)
175 
176     size_t sentenceSize() const;
177     const SentenceResult &sentence(size_t idx) const;
178     void clear();
179     void discardNode(const std::unordered_set<const SegmentGraphNode *> &node);
180 
181     typedef boost::any_range<LatticeNode, boost::bidirectional_traversal_tag,
182                              const LatticeNode &>
183         NodeRange;
184 
185     NodeRange nodes(const SegmentGraphNode *node) const;
186 
187 private:
188     std::unique_ptr<LatticePrivate> d_ptr;
189     FCITX_DECLARE_PRIVATE(Lattice);
190 };
191 } // namespace libime
192 
193 #endif // _FCITX_LIBIME_CORE_LATTICE_H_
194