1 // Copyright 2010-2018, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "storage/tiny_storage.h"
31 
32 #ifdef OS_WIN
33 #include <Windows.h>
34 #endif  // OS_WIN
35 
36 #include <cstring>
37 #include <map>
38 #include <memory>
39 #include <string>
40 
41 #include "base/file_stream.h"
42 #include "base/file_util.h"
43 #include "base/logging.h"
44 #include "base/mmap.h"
45 #include "base/port.h"
46 
47 namespace mozc {
48 namespace storage {
49 namespace {
50 
51 const uint32 kStorageVersion = 0;
52 const uint32 kStorageMagicId = 0x431fe241;  // random seed
53 const size_t kMaxElementSize = 1024;        // max map size
54 const size_t kMaxKeySize     = 4096;        // 4k for key/value
55 const size_t kMaxValueSize   = 4096;        // 4k for key/value
56 // 1024 * (4096 + 4096) =~ 8MByte
57 // so 10Mbyte data is reasonable upper bound for file size
58 const size_t kMaxFileSize     = 1024 * 1024 * 10;  // 10Mbyte
59 
60 template<typename T>
ReadData(char ** begin,const char * end,T * value)61 bool ReadData(char **begin, const char *end, T *value) {
62   if (*begin + sizeof(*value) > end) {
63     LOG(WARNING) << "accessing invalid pointer";
64     return false;
65   }
66   memcpy(value, *begin, sizeof(*value));
67   *begin += sizeof(*value);
68   return true;
69 }
70 
IsInvalid(const string & key,const string & value,size_t size)71 bool IsInvalid(const string &key, const string &value, size_t size) {
72   if (size >= kMaxElementSize) {
73     LOG(ERROR) << "too many elements";
74     return true;
75   }
76   if (key.size() >= kMaxKeySize) {
77     LOG(ERROR) << "too long key";
78     return true;
79   }
80   if (value.size() >= kMaxValueSize) {
81     LOG(ERROR) << "too long value";
82     return true;
83   }
84   return false;
85 }
86 
87 class TinyStorageImpl : public StorageInterface {
88  public:
89   TinyStorageImpl();
90   virtual ~TinyStorageImpl();
91 
92   virtual bool Open(const string &filename);
93   virtual bool Sync();
94   virtual bool Lookup(const string &key, string *value) const;
95   virtual bool Insert(const string &key, const string &value);
96   virtual bool Erase(const string &key);
97   virtual bool Clear();
Size() const98   virtual size_t Size() const {
99     return dic_.size();
100   }
101 
102  private:
103   string filename_;
104   bool should_sync_;
105   std::map<string, string> dic_;
106 
107   DISALLOW_COPY_AND_ASSIGN(TinyStorageImpl);
108 };
109 
TinyStorageImpl()110 TinyStorageImpl::TinyStorageImpl() : should_sync_(true) {
111   // the each entry consumes at most
112   // sizeof(uint32) * 2 (key/value length) +
113   // kMaxKeySize + kMaxValueSize
114   DCHECK_GT(
115       kMaxFileSize,
116       kMaxElementSize * (kMaxKeySize + kMaxValueSize + sizeof(uint32) * 2));
117 }
118 
~TinyStorageImpl()119 TinyStorageImpl::~TinyStorageImpl() {
120   if (should_sync_) {
121     Sync();
122   }
123 }
124 
Open(const string & filename)125 bool TinyStorageImpl::Open(const string &filename) {
126   Mmap mmap;
127   dic_.clear();
128   filename_ = filename;
129   if (!mmap.Open(filename.c_str(), "r")) {
130     LOG(WARNING) << "cannot open:" << filename;
131     // here we return true if we cannot open the file.
132     // it happens mostly when file doesn't exist.
133     // we just make an empty file from scratch here.
134     return true;
135   }
136 
137   if (mmap.size() > kMaxFileSize) {
138     LOG(ERROR) << "tring to open too big file";
139     return false;
140   }
141 
142   char *begin = mmap.begin();
143   const char *end = mmap.end();
144 
145   uint32 version = 0;
146   uint32 magic = 0;
147   uint32 size = 0;
148   // magic is used for checking whether given file is correct or not.
149   // magic = (file_size ^ kStorageMagicId)
150   if (!ReadData<uint32>(&begin, end, &magic)) {
151     LOG(ERROR) << "cannot read magic";
152     return false;
153   }
154 
155   if ((magic ^ kStorageMagicId) != mmap.size()) {
156     LOG(ERROR) << "file magic is broken";
157     return false;
158   }
159 
160   if (!ReadData<uint32>(&begin, end, &version)) {
161     LOG(ERROR) << "cannot read version";
162     return false;
163   }
164 
165   if (version != kStorageVersion) {
166     LOG(ERROR) << "Incompatible version";
167     return false;
168   }
169 
170   if (!ReadData<uint32>(&begin, end, &size)) {
171     LOG(ERROR) << "cannot read size";
172     return false;
173   }
174 
175   for (size_t i = 0; i < size; ++i) {
176     uint32 key_size = 0;
177     uint32 value_size = 0;
178     if (!ReadData<uint32>(&begin, end, &key_size)) {
179       LOG(ERROR) << "key_size is invalid";
180       return false;
181     }
182 
183     if (begin + key_size > end) {
184       LOG(ERROR) << "Too long key is passed";
185       return false;
186     }
187 
188     const string key(begin, key_size);
189     begin += key_size;
190 
191     if (!ReadData<uint32>(&begin, end, &value_size)) {
192       LOG(ERROR) << "value_size is invalid";
193       return false;
194     }
195 
196     if (begin + value_size > end) {
197       LOG(ERROR) << "Too long value is passed";
198       return false;
199     }
200 
201     const string value(begin, value_size);
202     begin += value_size;
203 
204     if (IsInvalid(key, value, dic_.size())) {
205       return false;
206     }
207 
208     dic_.insert(std::make_pair(key, value));
209   }
210 
211   if (static_cast<size_t>(begin - mmap.begin()) != mmap.size()) {
212     LOG(ERROR) << "file is broken: " << filename_;
213     dic_.clear();
214     return false;
215   }
216 
217   return true;
218 }
219 
220 // Format of storage:
221 // |magic(uint32 file_size ^ kStorageVersion)|version(uint32)|size(uint32)|
222 // |key_size(uint32)|key(variable length)|
223 // |value_size(uint32)|value(variable length)| ...
Sync()224 bool TinyStorageImpl::Sync() {
225   if (!should_sync_) {
226     VLOG(2) << "Already synced";
227     return true;
228   }
229 
230   const string output_filename = filename_ + ".tmp";
231 
232   OutputFileStream ofs(output_filename.c_str(),
233                        std::ios::binary | std::ios::out);
234   if (!ofs) {
235     LOG(ERROR) << "cannot open " << output_filename;
236     return false;
237   }
238 
239   uint32 magic = 0;
240   uint32 size = 0;
241   ofs.write(reinterpret_cast<const char *>(&magic),
242             sizeof(magic));
243   ofs.write(reinterpret_cast<const char *>(&kStorageVersion),
244             sizeof(kStorageVersion));
245   ofs.write(reinterpret_cast<const char *>(&size),
246             sizeof(size));
247 
248   for (std::map<string, string>::const_iterator it = dic_.begin();
249        it != dic_.end(); ++it) {
250     if (it->first.empty()) {
251       continue;
252     }
253     const string &key = it->first;
254     const string &value = it->second;
255     const uint32 key_size = static_cast<uint32>(key.size());
256     const uint32 value_size = static_cast<uint32>(value.size());
257     ofs.write(reinterpret_cast<const char *>(&key_size),
258               sizeof(key_size));
259     ofs.write(key.data(), key_size);
260     ofs.write(reinterpret_cast<const char *>(&value_size),
261               sizeof(value_size));
262     ofs.write(value.data(), value_size);
263     ++size;
264   }
265 
266   magic = static_cast<uint32>(ofs.tellp());
267   ofs.seekp(0);
268   magic ^= kStorageMagicId;
269 
270   ofs.write(reinterpret_cast<const char *>(&magic),
271             sizeof(magic));
272   ofs.write(reinterpret_cast<const char *>(&kStorageVersion),
273             sizeof(kStorageVersion));
274   ofs.write(reinterpret_cast<const char *>(&size),
275             sizeof(size));
276 
277   // should call close(). Othrwise AtomicRename will be failed.
278   ofs.close();
279 
280   if (!FileUtil::AtomicRename(output_filename, filename_)) {
281     LOG(ERROR) << "AtomicRename failed";
282     return false;
283   }
284 
285 #ifdef OS_WIN
286   if (!FileUtil::HideFile(filename_)) {
287     LOG(ERROR) << "Cannot make hidden: " << filename_
288                << " " << ::GetLastError();
289   }
290 #endif
291 
292   should_sync_ = false;
293 
294   return true;
295 }
296 
Insert(const string & key,const string & value)297 bool TinyStorageImpl::Insert(const string &key, const string &value) {
298   if (IsInvalid(key, value, dic_.size())) {
299     LOG(WARNING) << "invalid key/value is passed";
300     return false;
301   }
302   dic_[key] = value;
303   should_sync_ = true;
304   return true;
305 }
306 
Erase(const string & key)307 bool TinyStorageImpl::Erase(const string &key) {
308   std::map<string, string>::iterator it = dic_.find(key);
309   if (it == dic_.end()) {
310     VLOG(2) << "cannot erase key: " << key;
311     return false;
312   }
313   dic_.erase(it);
314   should_sync_ = true;
315   return true;
316 }
317 
Lookup(const string & key,string * value) const318 bool TinyStorageImpl::Lookup(const string &key, string *value) const {
319   std::map<string, string>::const_iterator it = dic_.find(key);
320   if (it == dic_.end()) {
321     VLOG(3) << "cannot find key: " << key;
322     return false;
323   }
324   *value = it->second;
325   return true;
326 }
327 
Clear()328 bool TinyStorageImpl::Clear() {
329   dic_.clear();
330   should_sync_ = true;
331   return Sync();
332 }
333 
334 }  // namespace
335 
Create(const char * filename)336 StorageInterface *TinyStorage::Create(const char *filename) {
337   std::unique_ptr<TinyStorageImpl> storage(new TinyStorageImpl);
338   if (!storage->Open(filename)) {
339     LOG(ERROR) << "cannot open " << filename;
340     return NULL;
341   }
342   return storage.release();
343 }
344 
New()345 StorageInterface *TinyStorage::New() {
346   return new TinyStorageImpl;
347 }
348 
349 }  // namespace storage
350 }  // namespace mozc
351