1 /*
2  * SPDX-FileCopyrightText: 2017-2017 CSSlayer <wengxt@gmail.com>
3  *
4  * SPDX-License-Identifier: LGPL-2.1-or-later
5  */
6 #ifndef _FCITX_LIBIME_CORE_SEGMENTGRAPH_H_
7 #define _FCITX_LIBIME_CORE_SEGMENTGRAPH_H_
8 
9 #include "libimecore_export.h"
10 #include <boost/iterator/transform_iterator.hpp>
11 #include <boost/ptr_container/ptr_list.hpp>
12 #include <boost/range/adaptor/type_erased.hpp>
13 #include <boost/range/iterator_range.hpp>
14 #include <fcitx-utils/element.h>
15 #include <functional>
16 #include <list>
17 #include <string_view>
18 #include <unordered_map>
19 #include <unordered_set>
20 
21 namespace libime {
22 
23 class Lattice;
24 class SegmentGraphBase;
25 class SegmentGraph;
26 class SegmentGraphNode;
27 typedef std::function<bool(const SegmentGraphBase &graph,
28                            const std::vector<size_t> &)>
29     SegmentGraphDFSCallback;
30 typedef std::function<bool(const SegmentGraphBase &graph,
31                            const SegmentGraphNode *)>
32     SegmentGraphBFSCallback;
33 
34 using SegmentGraphNodeRange =
35     boost::any_range<SegmentGraphNode, boost::bidirectional_traversal_tag>;
36 using SegmentGraphNodeConstRange =
37     boost::any_range<const SegmentGraphNode,
38                      boost::bidirectional_traversal_tag>;
39 
40 class LIBIMECORE_EXPORT SegmentGraphNode : public fcitx::Element {
41     friend class SegmentGraph;
42 
43 public:
SegmentGraphNode(size_t start)44     SegmentGraphNode(size_t start) : start_(start) {}
45     SegmentGraphNode(const SegmentGraphNode &node) = delete;
~SegmentGraphNode()46     virtual ~SegmentGraphNode() {}
47 
nexts()48     SegmentGraphNodeConstRange nexts() const {
49         const auto &nexts = childs();
50         return boost::make_iterator_range(
51             boost::make_transform_iterator(nexts.begin(),
52                                            &SegmentGraphNode::castConst),
53             boost::make_transform_iterator(nexts.end(),
54                                            &SegmentGraphNode::castConst));
55     }
56 
nextSize()57     size_t nextSize() const { return childs().size(); }
58 
prevs()59     SegmentGraphNodeConstRange prevs() const {
60         const auto &prevs = parents();
61         return boost::make_iterator_range(
62             boost::make_transform_iterator(prevs.begin(),
63                                            &SegmentGraphNode::castConst),
64             boost::make_transform_iterator(prevs.end(),
65                                            &SegmentGraphNode::castConst));
66     }
67 
prevSize()68     size_t prevSize() const { return parents().size(); }
index()69     inline size_t index() const { return start_; }
70 
71     bool operator==(const SegmentGraphNode &other) const {
72         return this == &other;
73     }
74     bool operator!=(const SegmentGraphNode &other) const {
75         return !operator==(other);
76     }
77 
78 protected:
mutablePrevs()79     SegmentGraphNodeRange mutablePrevs() {
80         const auto &prevs = parents();
81         return boost::make_iterator_range(
82             boost::make_transform_iterator(prevs.begin(),
83                                            &SegmentGraphNode::cast),
84             boost::make_transform_iterator(prevs.end(),
85                                            &SegmentGraphNode::cast));
86     }
87 
mutableNexts()88     SegmentGraphNodeRange mutableNexts() {
89         const auto &nexts = childs();
90         return boost::make_iterator_range(
91             boost::make_transform_iterator(nexts.begin(),
92                                            &SegmentGraphNode::cast),
93             boost::make_transform_iterator(nexts.end(),
94                                            &SegmentGraphNode::cast));
95     }
96 
addEdge(SegmentGraphNode & ref)97     void addEdge(SegmentGraphNode &ref) {
98         assert(ref.start_ > start_);
99         addChild(&ref);
100     }
removeEdge(SegmentGraphNode & ref)101     void removeEdge(SegmentGraphNode &ref) { removeChild(&ref); }
102 
103     static auto castConst(fcitx::Element *ele) -> const SegmentGraphNode & {
104         return *static_cast<SegmentGraphNode *>(ele);
105     }
106     static auto cast(fcitx::Element *ele) -> SegmentGraphNode & {
107         return *static_cast<SegmentGraphNode *>(ele);
108     }
109 
110 private:
111     size_t start_;
112 };
113 
114 typedef std::vector<const SegmentGraphNode *> SegmentGraphPath;
115 typedef std::function<void(
116     const std::unordered_set<const SegmentGraphNode *> &node)>
117     DiscardCallback;
118 
119 class LIBIMECORE_EXPORT SegmentGraphBase {
120 public:
SegmentGraphBase(std::string data)121     SegmentGraphBase(std::string data) : data_(std::move(data)) {}
122     FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_MOVE_WITHOUT_SPEC(SegmentGraphBase)
123 
124     virtual const SegmentGraphNode &start() const = 0;
125     virtual const SegmentGraphNode &end() const = 0;
126 
127     virtual SegmentGraphNodeConstRange nodes(size_t idx) const = 0;
node(size_t idx)128     inline const SegmentGraphNode &node(size_t idx) const {
129         return nodes(idx).front();
130     }
131 
132     // Return the string.
data()133     const std::string &data() const { return data_; }
134 
135     // Return the size of string.
size()136     size_t size() const { return data().size(); }
137 
segment(size_t start,size_t end)138     inline std::string_view segment(size_t start, size_t end) const {
139         return std::string_view(data().data() + start, end - start);
140     }
141 
segment(const SegmentGraphNode & start,const SegmentGraphNode & end)142     inline std::string_view segment(const SegmentGraphNode &start,
143                                     const SegmentGraphNode &end) const {
144         return segment(start.index(), end.index());
145     }
146 
147     bool bfs(const SegmentGraphNode *from,
148              const SegmentGraphBFSCallback &callback) const;
149 
dfs(const SegmentGraphDFSCallback & callback)150     bool dfs(const SegmentGraphDFSCallback &callback) const {
151         std::vector<size_t> path;
152         return dfsHelper(path, start(), callback);
153     }
154 
155     // A naive distance, not necessary be the shortest.
distanceToEnd(const SegmentGraphNode & node)156     size_t distanceToEnd(const SegmentGraphNode &node) const {
157         const auto *pNode = &node;
158         const SegmentGraphNode *endNode = &end();
159         size_t distance = 0;
160         while (pNode != endNode) {
161             pNode = &pNode->nexts().front();
162             ++distance;
163         }
164         return distance;
165     }
166 
isList()167     bool isList() const {
168         const SegmentGraphNode *node = &start();
169         const SegmentGraphNode *endNode = &end();
170         while (node != endNode) {
171             if (node->nextSize() != 1) {
172                 return false;
173             }
174             node = &node->nexts().front();
175         }
176         return true;
177     }
178 
checkGraph()179     bool checkGraph() const {
180         std::unordered_set<const SegmentGraphNode *> allNodes;
181         for (size_t i = 0, e = size(); i < e; i++) {
182             for (const auto &n : nodes(i)) {
183                 if (n.nexts().empty() && n != end()) {
184                     return false;
185                 }
186                 allNodes.insert(&n);
187             }
188         }
189 
190         bfs(&start(), [&allNodes](const SegmentGraphBase &,
191                                   const SegmentGraphNode *node) {
192             allNodes.erase(node);
193             return true;
194         });
195 
196         return allNodes.empty();
197     }
198 
checkNodeInGraph(const SegmentGraphNode * node)199     bool checkNodeInGraph(const SegmentGraphNode *node) const {
200         for (size_t i = 0, e = size(); i <= e; i++) {
201             for (const auto &n : nodes(i)) {
202                 if (&n == node) {
203                     return true;
204                 }
205             }
206         }
207         return false;
208     }
209 
210 protected:
mutableData()211     std::string &mutableData() { return data_; }
212 
213 private:
dfsHelper(std::vector<size_t> & path,const SegmentGraphNode & start,const SegmentGraphDFSCallback & callback)214     bool dfsHelper(std::vector<size_t> &path, const SegmentGraphNode &start,
215                    const SegmentGraphDFSCallback &callback) const {
216         if (start == end()) {
217             return callback(*this, path);
218         }
219         auto nexts = start.nexts();
220         for (const auto &next : nexts) {
221             auto idx = next.index();
222             path.push_back(idx);
223             if (!dfsHelper(path, next, callback)) {
224                 return false;
225             }
226             path.pop_back();
227         }
228         return true;
229     }
230 
231     std::string data_;
232 };
233 
234 class LIBIMECORE_EXPORT SegmentGraph : public SegmentGraphBase {
235 public:
SegmentGraphBase(std::move (str))236     SegmentGraph(std::string str = {}) : SegmentGraphBase(std::move(str)) {
237         resize(data().size() + 1);
238         if (!data().empty()) {
239             newNode(data().size());
240         }
241         newNode(0);
242     }
243     SegmentGraph(const SegmentGraph &seg) = delete;
FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_MOVE_WITHOUT_SPEC(SegmentGraph)244     FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_MOVE_WITHOUT_SPEC(SegmentGraph)
245 
246     const SegmentGraphNode &start() const override { return *graph_[0]; }
end()247     const SegmentGraphNode &end() const override {
248         return *graph_[data().size()];
249     }
250     void merge(SegmentGraph &graph,
251                const DiscardCallback &discardCallback = {});
252 
ensureNode(size_t idx)253     SegmentGraphNode &ensureNode(size_t idx) {
254         if (nodes(idx).empty()) {
255             newNode(idx);
256         }
257         return mutableNode(idx);
258     }
259 
nodes(size_t idx)260     SegmentGraphNodeConstRange nodes(size_t idx) const override {
261         if (graph_[idx]) {
262             return {graph_[idx].get(), graph_[idx].get() + 1};
263         }
264         return {};
265     }
266     using SegmentGraphBase::node;
267 
268     // helper for dag style segments
addNext(size_t from,size_t to)269     void addNext(size_t from, size_t to) {
270         assert(from < to);
271         assert(to <= data().size());
272         if (nodes(from).empty()) {
273             newNode(from);
274         }
275         if (nodes(to).empty()) {
276             newNode(to);
277         }
278         graph_[from]->addEdge(*graph_[to]);
279     }
280 
appendNewSegment(std::string_view str)281     void appendNewSegment(std::string_view str) {
282         // append empty string is meaningless.
283         if (str.empty()) {
284             return;
285         }
286 
287         size_t oldSize = data().size();
288         mutableData().append(str.data(), str.size());
289         resize(data().size() + 1);
290         auto &newEnd = newNode(data().size());
291         auto &node = mutableNode(oldSize);
292         node.addEdge(newEnd);
293     }
294 
appendToLastSegment(std::string_view str)295     void appendToLastSegment(std::string_view str) {
296         // append empty string is meaningless.
297         if (str.empty()) {
298             return;
299         }
300 
301         size_t oldSize = data().size();
302         mutableData().append(str.data(), str.size());
303         resize(data().size() + 1);
304 
305         auto &newEnd = newNode(data().size());
306         auto &node = mutableNode(oldSize);
307         // If old size is 0, then just create an edge from begin to end.
308         if (oldSize == 0) {
309             node.addEdge(newEnd);
310             return;
311         }
312 
313         // Otherwise, iterate the prevs of oldEnd, create a edge between all
314         // prevs and newEnd, and delete old end.
315         for (auto &prev : node.mutablePrevs()) {
316             prev.addEdge(newEnd);
317         }
318         // Remove the old node.
319         graph_[oldSize].reset();
320     }
321 
removeSuffixFrom(size_t idx)322     void removeSuffixFrom(size_t idx) {
323         if (idx >= size()) {
324             return;
325         }
326 
327         auto *pNode = &mutableNode(size());
328         const auto *pStart = &start();
329         while (pNode != pStart && pNode->index() > idx) {
330             pNode = &pNode->mutablePrevs().front();
331         }
332 
333         mutableData().erase(idx);
334         resize(data().size() + 1);
335         if (size() == 0) {
336             return;
337         }
338 
339         if (pNode->index() == idx) {
340             return;
341         }
342         auto &newEnd = newNode(size());
343         pNode->addEdge(newEnd);
344     }
345 
346 private:
resize(size_t newSize)347     void resize(size_t newSize) {
348         auto oldSize = graph_.size();
349         graph_.resize(newSize);
350         for (; oldSize < newSize; oldSize++) {
351             graph_[oldSize].reset();
352         }
353     }
354 
newNode(size_t idx)355     SegmentGraphNode &newNode(size_t idx) {
356         graph_[idx] = std::make_unique<SegmentGraphNode>(idx);
357         return *graph_[idx];
358     }
359 
mutableNode(size_t idx)360     inline SegmentGraphNode &mutableNode(size_t idx) {
361         return mutableNodes(idx).front();
362     }
363 
mutableNodes(size_t idx)364     SegmentGraphNodeRange mutableNodes(size_t idx) {
365         if (graph_[idx]) {
366             return {graph_[idx].get(), graph_[idx].get() + 1};
367         }
368         return {};
369     }
370 
371     size_t check(const SegmentGraph &graph) const;
372     // ptr_vector doesn't have move constructor, G-R-E-A-T
373     std::vector<std::unique_ptr<SegmentGraphNode>> graph_;
374 };
375 } // namespace libime
376 
377 #endif // _FCITX_LIBIME_CORE_SEGMENTGRAPH_H_
378