1 //
2 // Copyright RIME Developers
3 // Distributed under the BSD License
4 //
5 // 2012-01-05 GONG Chen <chen.sst@gmail.com>
6 // 2014-07-06 GONG Chen <chen.sst@gmail.com> redesigned binary file format.
7 //
8 #include <sstream>
9 #include <boost/algorithm/string.hpp>
10 #include <boost/filesystem.hpp>
11 #include <boost/lexical_cast.hpp>
12 #include <rime/resource.h>
13 #include <rime/schema.h>
14 #include <rime/service.h>
15 #include <rime/ticket.h>
16 #include <rime/dict/dict_settings.h>
17 #include <rime/dict/reverse_lookup_dictionary.h>
18
19 namespace rime {
20
21 const char kReverseFormat[] = "Rime::Reverse/3.0";
22
23 const char kReverseFormatPrefix[] = "Rime::Reverse/";
24 const size_t kReverseFormatPrefixLen = sizeof(kReverseFormatPrefix) - 1;
25
26 static const char* kStemKeySuffix = "\x1fstem";
27
ReverseDb(const string & file_name)28 ReverseDb::ReverseDb(const string& file_name)
29 : MappedFile(file_name) {
30 }
31
Load()32 bool ReverseDb::Load() {
33 LOG(INFO) << "loading reversedb: " << file_name();
34
35 if (IsOpen())
36 Close();
37
38 if (!OpenReadOnly()) {
39 LOG(ERROR) << "Error opening reversedb '" << file_name() << "'.";
40 return false;
41 }
42
43 metadata_ = Find<reverse::Metadata>(0);
44 if (!metadata_) {
45 LOG(ERROR) << "metadata not found.";
46 Close();
47 return false;
48 }
49 if (strncmp(metadata_->format,
50 kReverseFormatPrefix, kReverseFormatPrefixLen)) {
51 LOG(ERROR) << "invalid metadata.";
52 Close();
53 return false;
54 }
55 //double format = atof(&metadata_->format[kReverseFormatPrefixLen]);
56
57 key_trie_.reset(new StringTable(metadata_->key_trie.get(),
58 metadata_->key_trie_size));
59 value_trie_.reset(new StringTable(metadata_->value_trie.get(),
60 metadata_->value_trie_size));
61
62 return true;
63 }
64
Lookup(const string & text,string * result)65 bool ReverseDb::Lookup(const string& text, string* result) {
66 if (!key_trie_ || !value_trie_ || !metadata_->index.size) {
67 return false;
68 }
69 StringId key_id = key_trie_->Lookup(text);
70 if (key_id == kInvalidStringId) {
71 return false;
72 }
73 StringId value_id = metadata_->index.at[key_id];
74 *result = value_trie_->GetString(value_id);
75 return !result->empty();
76 }
77
Build(DictSettings * settings,const Syllabary & syllabary,const Vocabulary & vocabulary,const ReverseLookupTable & stems,uint32_t dict_file_checksum)78 bool ReverseDb::Build(DictSettings* settings,
79 const Syllabary& syllabary,
80 const Vocabulary& vocabulary,
81 const ReverseLookupTable& stems,
82 uint32_t dict_file_checksum) {
83 LOG(INFO) << "building reversedb...";
84 ReverseLookupTable rev_table;
85 int syllable_id = 0;
86 for (const string& syllable : syllabary) {
87 auto it = vocabulary.find(syllable_id++);
88 if (it == vocabulary.end())
89 continue;
90 const auto& entries(it->second.entries);
91 for (const auto& e : entries) {
92 rev_table[e->text].insert(syllable);
93 }
94 }
95 StringTableBuilder key_trie_builder;
96 StringTableBuilder value_trie_builder;
97 size_t entry_count = rev_table.size() + stems.size();
98 vector<StringId> key_ids(entry_count);
99 vector<StringId> value_ids(entry_count);
100 int i = 0;
101 // save reverse lookup entries
102 for (const auto& v : rev_table) {
103 const string& key(v.first);
104 string value(boost::algorithm::join(v.second, " "));
105 key_trie_builder.Add(key, 0.0, &key_ids[i]);
106 value_trie_builder.Add(value, 0.0, &value_ids[i]);
107 ++i;
108 }
109 // save stems
110 for (const auto& v : stems) {
111 string key(v.first + kStemKeySuffix);
112 string value(boost::algorithm::join(v.second, " "));
113 key_trie_builder.Add(key, 0.0, &key_ids[i]);
114 value_trie_builder.Add(value, 0.0, &value_ids[i]);
115 ++i;
116 }
117 key_trie_builder.Build();
118 value_trie_builder.Build();
119
120 // dict settings required by UniTE
121 string dict_settings;
122 if (settings && settings->use_rule_based_encoder()) {
123 std::ostringstream yaml;
124 settings->SaveToStream(yaml);
125 dict_settings = yaml.str();
126 }
127
128 // creating reversedb file
129 const size_t kReservedSize = 1024;
130 size_t key_trie_image_size = key_trie_builder.BinarySize();
131 size_t value_trie_image_size = value_trie_builder.BinarySize();
132 size_t estimated_data_size = kReservedSize +
133 dict_settings.length() +
134 entry_count * sizeof(StringId) +
135 key_trie_image_size + value_trie_image_size;
136 if (!Create(estimated_data_size)) {
137 LOG(ERROR) << "Error creating prism file '" << file_name() << "'.";
138 return false;
139 }
140
141 // create metadata
142 metadata_ = Allocate<reverse::Metadata>();
143 if (!metadata_) {
144 LOG(ERROR) << "Error creating metadata in file '" << file_name() << "'.";
145 return false;
146 }
147 metadata_->dict_file_checksum = dict_file_checksum;
148 if (!dict_settings.empty()) {
149 if(!CopyString(dict_settings, &metadata_->dict_settings)) {
150 LOG(ERROR) << "Error saving dict settings.";
151 return false;
152 }
153 }
154
155 auto entries = Allocate<StringId>(entry_count);
156 if (!entries) {
157 return false;
158 }
159 for (size_t i = 0; i < entry_count; ++i) {
160 entries[key_ids[i]] = value_ids[i];
161 }
162 metadata_->index.size = entry_count;
163 metadata_->index.at = entries;
164
165 // save key trie image
166 char* key_trie_image = Allocate<char>(key_trie_image_size);
167 if (!key_trie_image) {
168 LOG(ERROR) << "Error creating key trie image.";
169 return false;
170 }
171 key_trie_builder.Dump(key_trie_image, key_trie_image_size);
172 metadata_->key_trie = key_trie_image;
173 metadata_->key_trie_size = key_trie_image_size;
174
175 // save value trie image
176 char* value_trie_image = Allocate<char>();
177 if (!value_trie_image) {
178 LOG(ERROR) << "Error creating value trie image.";
179 return false;
180 }
181 value_trie_builder.Dump(value_trie_image, value_trie_image_size);
182 metadata_->value_trie = value_trie_image;
183 metadata_->value_trie_size = value_trie_image_size;
184
185 // at last, complete the metadata
186 std::strncpy(metadata_->format, kReverseFormat,
187 reverse::Metadata::kFormatMaxLength);
188 return true;
189 }
190
dict_file_checksum() const191 uint32_t ReverseDb::dict_file_checksum() const {
192 return metadata_ ? metadata_->dict_file_checksum : 0;
193 }
194
ReverseLookupDictionary(an<ReverseDb> db)195 ReverseLookupDictionary::ReverseLookupDictionary(an<ReverseDb> db)
196 : db_(db) {
197 }
198
Load()199 bool ReverseLookupDictionary::Load() {
200 return db_ && (db_->IsOpen() || db_->Load());
201 }
202
ReverseLookup(const string & text,string * result)203 bool ReverseLookupDictionary::ReverseLookup(const string& text,
204 string* result) {
205 return db_->Lookup(text, result);
206
207 }
208
LookupStems(const string & text,string * result)209 bool ReverseLookupDictionary::LookupStems(const string& text,
210 string* result) {
211 return db_->Lookup(text + kStemKeySuffix, result);
212 }
213
GetDictSettings()214 an<DictSettings> ReverseLookupDictionary::GetDictSettings() {
215 an<DictSettings> settings;
216 reverse::Metadata* metadata = db_->metadata();
217 if (metadata && !metadata->dict_settings.empty()) {
218 string yaml(metadata->dict_settings.c_str());
219 std::istringstream iss(yaml);
220 settings = New<DictSettings>();
221 if (!settings->LoadFromStream(iss)) {
222 settings.reset();
223 }
224 }
225 return settings;
226 }
227
228 static const ResourceType kReverseDbResourceType = {
229 "reverse_db", "", ".reverse.bin"
230 };
231
ReverseLookupDictionaryComponent()232 ReverseLookupDictionaryComponent::ReverseLookupDictionaryComponent()
233 : resource_resolver_(
234 Service::instance().CreateDeployedResourceResolver(
235 kReverseDbResourceType)) {
236 }
237
238 ReverseLookupDictionary*
Create(const Ticket & ticket)239 ReverseLookupDictionaryComponent::Create(const Ticket& ticket) {
240 if (!ticket.schema) return NULL;
241 Config* config = ticket.schema->config();
242 string dict_name;
243 if (!config->GetString(ticket.name_space + "/dictionary",
244 &dict_name)) {
245 // missing!
246 return NULL;
247 }
248 auto db = db_pool_[dict_name].lock();
249 if (!db) {
250 auto file_path = resource_resolver_->ResolvePath(dict_name).string();
251 db = New<ReverseDb>(file_path);
252 db_pool_[dict_name] = db;
253 }
254 return new ReverseLookupDictionary(db);
255 }
256
257 } // namespace rime
258