1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #ifndef MOZC_DICTIONARY_SYSTEM_SYSTEM_DICTIONARY_H_
31 #define MOZC_DICTIONARY_SYSTEM_SYSTEM_DICTIONARY_H_
32 
33 #include <memory>
34 #include <set>
35 #include <string>
36 #include <vector>
37 
38 #include "base/port.h"
39 #include "base/string_piece.h"
40 #include "dictionary/dictionary_interface.h"
41 #include "dictionary/file/codec_interface.h"
42 #include "dictionary/system/codec_interface.h"
43 #include "dictionary/system/key_expansion_table.h"
44 #include "dictionary/system/words_info.h"
45 #include "storage/louds/bit_vector_based_array.h"
46 #include "storage/louds/louds_trie.h"
47 
48 namespace mozc {
49 namespace dictionary {
50 
51 class DictionaryFile;
52 class DictionaryFileCodecInterface;
53 class SystemDictionaryCodecInterface;
54 
55 class SystemDictionary : public DictionaryInterface {
56  public:
57   // System dictionary options represented as bitwise enum.
58   enum Options {
59     NONE = 0,
60     // If ENABLE_REVERSE_LOOKUP_INDEX is set, we will have the index in heap
61     // from the id in value trie to the id in key trie.
62     // That consumes more memory but we can perform reverse lookup more quickly.
63     ENABLE_REVERSE_LOOKUP_INDEX = 1,
64   };
65 
66   // Builder class for system dictionary
67   // Usage:
68   //   SystemDictionary::Builder builder(filename);
69   //   builder.SetOptions(SystemDictionary::NONE);
70   //   builder.SetCodec(NULL);
71   //   SystemDictionary *dictionary = builder.Build();
72   //   ...
73   //   delete dictionary;
74   class Builder {
75    public:
76     // Creates Builder from filename
77     explicit Builder(const string &filename);
78     // Creates Builder from image
79     Builder(const char *ptr, int len);
80     ~Builder();
81 
82     // Sets options (default: NONE)
83     Builder &SetOptions(Options options);
84 
85     // Sets codec (default: NULL)
86     // Uses default codec if this is NULL
87     // Doesn't take the ownership of |codec|.
88     Builder &SetCodec(const SystemDictionaryCodecInterface *codec);
89 
90     // Builds and returns system dictionary.
91     SystemDictionary *Build();
92 
93    private:
94     struct Specification;
95     std::unique_ptr<Specification> spec_;
96     DISALLOW_COPY_AND_ASSIGN(Builder);
97   };
98 
99   virtual ~SystemDictionary();
100 
value_trie()101   const storage::louds::LoudsTrie &value_trie() const { return value_trie_; }
102 
103   // Implementation of DictionaryInterface.
104   virtual bool HasKey(StringPiece key) const;
105   virtual bool HasValue(StringPiece value) const;
106 
107   virtual void LookupPredictive(StringPiece key,
108                                 const ConversionRequest &converter_request,
109                                 Callback *callback) const;
110 
111   virtual void LookupPrefix(StringPiece key,
112                             const ConversionRequest &converter_request,
113                             Callback *callback) const;
114 
115   virtual void LookupExact(StringPiece key,
116                            const ConversionRequest &converter_request,
117                            Callback *callback) const;
118 
119   virtual void LookupReverse(StringPiece str,
120                              const ConversionRequest &converter_request,
121                              Callback *callback) const;
122 
123   virtual void PopulateReverseLookupCache(StringPiece str) const;
124   virtual void ClearReverseLookupCache() const;
125 
126  private:
127   class ReverseLookupCache;
128   class ReverseLookupIndex;
129   struct PredictiveLookupSearchState;
130 
131   explicit SystemDictionary(const SystemDictionaryCodecInterface *codec,
132                             const DictionaryFileCodecInterface *file_codec);
133   bool OpenDictionaryFile(bool enable_reverse_lookup_index);
134 
135   void RegisterReverseLookupTokensForT13N(StringPiece value,
136                                           Callback *callback) const;
137   void RegisterReverseLookupTokensForValue(StringPiece value,
138                                            Callback *callback) const;
139   void ScanTokens(const std::set<int> &id_set, ReverseLookupCache *cache) const;
140   void RegisterReverseLookupResults(const std::set<int> &id_set,
141                                     const ReverseLookupCache &cache,
142                                     Callback *callback) const;
143   void InitReverseLookupIndex();
144 
145   Callback::ResultType LookupPrefixWithKeyExpansionImpl(
146       const char *key,
147       StringPiece encoded_key,
148       const KeyExpansionTable &table,
149       Callback *callback,
150       storage::louds::LoudsTrie::Node node,
151       StringPiece::size_type key_pos,
152       bool is_expanded,
153       char *actual_key_buffer,
154       string *actual_prefix) const;
155 
156   void CollectPredictiveNodesInBfsOrder(
157       StringPiece encoded_key,
158       const KeyExpansionTable &table,
159       size_t limit,
160       std::vector<PredictiveLookupSearchState> *result) const;
161 
162   storage::louds::LoudsTrie key_trie_;
163   storage::louds::LoudsTrie value_trie_;
164   storage::louds::BitVectorBasedArray token_array_;
165   const uint32 *frequent_pos_;
166   const SystemDictionaryCodecInterface *codec_;
167   KeyExpansionTable hiragana_expansion_table_;
168   std::unique_ptr<DictionaryFile> dictionary_file_;
169   mutable std::unique_ptr<ReverseLookupCache> reverse_lookup_cache_;
170   std::unique_ptr<ReverseLookupIndex> reverse_lookup_index_;
171 
172   DISALLOW_COPY_AND_ASSIGN(SystemDictionary);
173 };
174 
175 }  // namespace dictionary
176 }  // namespace mozc
177 
178 #endif  // MOZC_DICTIONARY_SYSTEM_SYSTEM_DICTIONARY_H_
179