1 //
2 // Copyright RIME Developers
3 // Distributed under the BSD License
4 //
5 // 2012-01-05 GONG Chen <chen.sst@gmail.com>
6 // 2014-07-06 GONG Chen <chen.sst@gmail.com> redesigned binary file format.
7 //
8 #include <sstream>
9 #include <boost/algorithm/string.hpp>
10 #include <boost/filesystem.hpp>
11 #include <boost/lexical_cast.hpp>
12 #include <rime/resource.h>
13 #include <rime/schema.h>
14 #include <rime/service.h>
15 #include <rime/ticket.h>
16 #include <rime/dict/dict_settings.h>
17 #include <rime/dict/reverse_lookup_dictionary.h>
18 
19 namespace rime {
20 
21 const char kReverseFormat[] = "Rime::Reverse/3.0";
22 
23 const char kReverseFormatPrefix[] = "Rime::Reverse/";
24 const size_t kReverseFormatPrefixLen = sizeof(kReverseFormatPrefix) - 1;
25 
26 static const char* kStemKeySuffix = "\x1fstem";
27 
ReverseDb(const string & file_name)28 ReverseDb::ReverseDb(const string& file_name)
29     : MappedFile(file_name) {
30 }
31 
Load()32 bool ReverseDb::Load() {
33   LOG(INFO) << "loading reversedb: " << file_name();
34 
35   if (IsOpen())
36     Close();
37 
38   if (!OpenReadOnly()) {
39     LOG(ERROR) << "Error opening reversedb '" << file_name() << "'.";
40     return false;
41   }
42 
43   metadata_ = Find<reverse::Metadata>(0);
44   if (!metadata_) {
45     LOG(ERROR) << "metadata not found.";
46     Close();
47     return false;
48   }
49   if (strncmp(metadata_->format,
50               kReverseFormatPrefix, kReverseFormatPrefixLen)) {
51     LOG(ERROR) << "invalid metadata.";
52     Close();
53     return false;
54   }
55   //double format = atof(&metadata_->format[kReverseFormatPrefixLen]);
56 
57   key_trie_.reset(new StringTable(metadata_->key_trie.get(),
58                                   metadata_->key_trie_size));
59   value_trie_.reset(new StringTable(metadata_->value_trie.get(),
60                                     metadata_->value_trie_size));
61 
62   return true;
63 }
64 
Lookup(const string & text,string * result)65 bool ReverseDb::Lookup(const string& text, string* result) {
66   if (!key_trie_ || !value_trie_ || !metadata_->index.size) {
67     return false;
68   }
69   StringId key_id = key_trie_->Lookup(text);
70   if (key_id == kInvalidStringId) {
71     return false;
72   }
73   StringId value_id = metadata_->index.at[key_id];
74   *result = value_trie_->GetString(value_id);
75   return !result->empty();
76 }
77 
Build(DictSettings * settings,const Syllabary & syllabary,const Vocabulary & vocabulary,const ReverseLookupTable & stems,uint32_t dict_file_checksum)78 bool ReverseDb::Build(DictSettings* settings,
79                       const Syllabary& syllabary,
80                       const Vocabulary& vocabulary,
81                       const ReverseLookupTable& stems,
82                       uint32_t dict_file_checksum) {
83   LOG(INFO) << "building reversedb...";
84   ReverseLookupTable rev_table;
85   int syllable_id = 0;
86   for (const string& syllable : syllabary) {
87     auto it = vocabulary.find(syllable_id++);
88     if (it == vocabulary.end())
89       continue;
90     const auto& entries(it->second.entries);
91     for (const auto& e : entries) {
92       rev_table[e->text].insert(syllable);
93     }
94   }
95   StringTableBuilder key_trie_builder;
96   StringTableBuilder value_trie_builder;
97   size_t entry_count = rev_table.size() + stems.size();
98   vector<StringId> key_ids(entry_count);
99   vector<StringId> value_ids(entry_count);
100   int i = 0;
101   // save reverse lookup entries
102   for (const auto& v : rev_table) {
103     const string& key(v.first);
104     string value(boost::algorithm::join(v.second, " "));
105     key_trie_builder.Add(key, 0.0, &key_ids[i]);
106     value_trie_builder.Add(value, 0.0, &value_ids[i]);
107     ++i;
108   }
109   // save stems
110   for (const auto& v : stems) {
111     string key(v.first + kStemKeySuffix);
112     string value(boost::algorithm::join(v.second, " "));
113     key_trie_builder.Add(key, 0.0, &key_ids[i]);
114     value_trie_builder.Add(value, 0.0, &value_ids[i]);
115     ++i;
116   }
117   key_trie_builder.Build();
118   value_trie_builder.Build();
119 
120   // dict settings required by UniTE
121   string dict_settings;
122   if (settings && settings->use_rule_based_encoder()) {
123     std::ostringstream yaml;
124     settings->SaveToStream(yaml);
125     dict_settings = yaml.str();
126   }
127 
128   // creating reversedb file
129   const size_t kReservedSize = 1024;
130   size_t key_trie_image_size = key_trie_builder.BinarySize();
131   size_t value_trie_image_size = value_trie_builder.BinarySize();
132   size_t estimated_data_size = kReservedSize +
133       dict_settings.length() +
134       entry_count * sizeof(StringId) +
135       key_trie_image_size + value_trie_image_size;
136   if (!Create(estimated_data_size)) {
137     LOG(ERROR) << "Error creating prism file '" << file_name() << "'.";
138     return false;
139   }
140 
141   // create metadata
142   metadata_ = Allocate<reverse::Metadata>();
143   if (!metadata_) {
144     LOG(ERROR) << "Error creating metadata in file '" << file_name() << "'.";
145     return false;
146   }
147   metadata_->dict_file_checksum = dict_file_checksum;
148   if (!dict_settings.empty()) {
149     if(!CopyString(dict_settings, &metadata_->dict_settings)) {
150       LOG(ERROR) << "Error saving dict settings.";
151       return false;
152     }
153   }
154 
155   auto entries = Allocate<StringId>(entry_count);
156   if (!entries) {
157     return false;
158   }
159   for (size_t i = 0; i < entry_count; ++i) {
160     entries[key_ids[i]] = value_ids[i];
161   }
162   metadata_->index.size = entry_count;
163   metadata_->index.at = entries;
164 
165   // save key trie image
166   char* key_trie_image = Allocate<char>(key_trie_image_size);
167   if (!key_trie_image) {
168     LOG(ERROR) << "Error creating key trie image.";
169     return false;
170   }
171   key_trie_builder.Dump(key_trie_image, key_trie_image_size);
172   metadata_->key_trie = key_trie_image;
173   metadata_->key_trie_size = key_trie_image_size;
174 
175   // save value trie image
176   char* value_trie_image = Allocate<char>();
177   if (!value_trie_image) {
178     LOG(ERROR) << "Error creating value trie image.";
179     return false;
180   }
181   value_trie_builder.Dump(value_trie_image, value_trie_image_size);
182   metadata_->value_trie = value_trie_image;
183   metadata_->value_trie_size = value_trie_image_size;
184 
185   // at last, complete the metadata
186   std::strncpy(metadata_->format, kReverseFormat,
187                reverse::Metadata::kFormatMaxLength);
188   return true;
189 }
190 
dict_file_checksum() const191 uint32_t ReverseDb::dict_file_checksum() const {
192   return metadata_ ? metadata_->dict_file_checksum : 0;
193 }
194 
ReverseLookupDictionary(an<ReverseDb> db)195 ReverseLookupDictionary::ReverseLookupDictionary(an<ReverseDb> db)
196     : db_(db) {
197 }
198 
Load()199 bool ReverseLookupDictionary::Load() {
200   return db_ && (db_->IsOpen() || db_->Load());
201 }
202 
ReverseLookup(const string & text,string * result)203 bool ReverseLookupDictionary::ReverseLookup(const string& text,
204                                             string* result) {
205   return db_->Lookup(text, result);
206 
207 }
208 
LookupStems(const string & text,string * result)209 bool ReverseLookupDictionary::LookupStems(const string& text,
210                                           string* result) {
211   return db_->Lookup(text + kStemKeySuffix, result);
212 }
213 
GetDictSettings()214 an<DictSettings> ReverseLookupDictionary::GetDictSettings() {
215   an<DictSettings> settings;
216   reverse::Metadata* metadata = db_->metadata();
217   if (metadata && !metadata->dict_settings.empty()) {
218     string yaml(metadata->dict_settings.c_str());
219     std::istringstream iss(yaml);
220     settings = New<DictSettings>();
221     if (!settings->LoadFromStream(iss)) {
222       settings.reset();
223     }
224   }
225   return settings;
226 }
227 
228 static const ResourceType kReverseDbResourceType = {
229   "reverse_db", "", ".reverse.bin"
230 };
231 
ReverseLookupDictionaryComponent()232 ReverseLookupDictionaryComponent::ReverseLookupDictionaryComponent()
233     : resource_resolver_(
234           Service::instance().CreateDeployedResourceResolver(
235               kReverseDbResourceType)) {
236 }
237 
238 ReverseLookupDictionary*
Create(const Ticket & ticket)239 ReverseLookupDictionaryComponent::Create(const Ticket& ticket) {
240   if (!ticket.schema) return NULL;
241   Config* config = ticket.schema->config();
242   string dict_name;
243   if (!config->GetString(ticket.name_space + "/dictionary",
244                          &dict_name)) {
245     // missing!
246     return NULL;
247   }
248   auto db = db_pool_[dict_name].lock();
249   if (!db) {
250     auto file_path = resource_resolver_->ResolvePath(dict_name).string();
251     db = New<ReverseDb>(file_path);
252     db_pool_[dict_name] = db;
253   }
254   return new ReverseLookupDictionary(db);
255 }
256 
257 }  // namespace rime
258