1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/performance_manager/persistence/site_data/leveldb_site_data_store.h"
6 
7 #include <limits>
8 #include <string>
9 
10 #include "base/auto_reset.h"
11 #include "base/bind.h"
12 #include "base/callback.h"
13 #include "base/files/file_util.h"
14 #include "base/hash/md5.h"
15 #include "base/logging.h"
16 #include "base/memory/ptr_util.h"
17 #include "base/metrics/histogram_functions.h"
18 #include "base/metrics/histogram_macros.h"
19 #include "base/strings/string_number_conversions.h"
20 #include "base/task/thread_pool.h"
21 #include "base/task_runner_util.h"
22 #include "base/threading/scoped_blocking_call.h"
23 #include "build/build_config.h"
24 #include "third_party/leveldatabase/env_chromium.h"
25 #include "third_party/leveldatabase/leveldb_chrome.h"
26 #include "third_party/leveldatabase/src/include/leveldb/env.h"
27 #include "third_party/leveldatabase/src/include/leveldb/write_batch.h"
28 
29 namespace performance_manager {
30 
31 namespace {
32 
33 bool g_use_in_memory_db_for_testing = false;
34 
35 // The name of the following histograms is the same as the one used in the
36 // //c/b/resource_coordinator version of this file. It's fine to keep the same
37 // name as these 2 codepath will never be enabled at the same time. These
38 // histograms should be removed once it has been confirmed that the data is
39 // similar to the one from the other implementation.
40 //
41 // TODO(sebmarchand): Remove these histograms.
42 const char kInitStatusHistogramLabel[] =
43     "ResourceCoordinator.LocalDB.DatabaseInit";
44 const char kInitStatusAfterRepairHistogramLabel[] =
45     "ResourceCoordinator.LocalDB.DatabaseInitAfterRepair";
46 const char kInitStatusAfterDeleteHistogramLabel[] =
47     "ResourceCoordinator.LocalDB.DatabaseInitAfterDelete";
48 
49 enum class InitStatus {
50   kInitStatusOk,
51   kInitStatusCorruption,
52   kInitStatusIOError,
53   kInitStatusUnknownError,
54   kInitStatusMax
55 };
56 
57 // Report the database's initialization status metrics.
ReportInitStatus(const char * histogram_name,const leveldb::Status & status)58 void ReportInitStatus(const char* histogram_name,
59                       const leveldb::Status& status) {
60   if (status.ok()) {
61     base::UmaHistogramEnumeration(histogram_name, InitStatus::kInitStatusOk,
62                                   InitStatus::kInitStatusMax);
63   } else if (status.IsCorruption()) {
64     base::UmaHistogramEnumeration(histogram_name,
65                                   InitStatus::kInitStatusCorruption,
66                                   InitStatus::kInitStatusMax);
67   } else if (status.IsIOError()) {
68     base::UmaHistogramEnumeration(histogram_name,
69                                   InitStatus::kInitStatusIOError,
70                                   InitStatus::kInitStatusMax);
71   } else {
72     base::UmaHistogramEnumeration(histogram_name,
73                                   InitStatus::kInitStatusUnknownError,
74                                   InitStatus::kInitStatusMax);
75   }
76 }
77 
78 // Attempt to repair the database stored in |db_path|.
RepairDatabase(const std::string & db_path)79 bool RepairDatabase(const std::string& db_path) {
80   leveldb_env::Options options;
81   options.reuse_logs = false;
82   options.max_open_files = 0;
83   bool repair_succeeded = leveldb::RepairDB(db_path, options).ok();
84   UMA_HISTOGRAM_BOOLEAN("ResourceCoordinator.LocalDB.DatabaseRepair",
85                         repair_succeeded);
86   return repair_succeeded;
87 }
88 
ShouldAttemptDbRepair(const leveldb::Status & status)89 bool ShouldAttemptDbRepair(const leveldb::Status& status) {
90   // A corrupt database might be repaired (some data might be loss but it's
91   // better than losing everything).
92   if (status.IsCorruption())
93     return true;
94   // An I/O error might be caused by a missing manifest, it's sometime possible
95   // to repair this (some data might be loss).
96   if (status.IsIOError())
97     return true;
98 
99   return false;
100 }
101 
102 struct DatabaseSizeResult {
103   base::Optional<int64_t> num_rows;
104   base::Optional<int64_t> on_disk_size_kb;
105 };
106 
SerializeOriginIntoDatabaseKey(const url::Origin & origin)107 std::string SerializeOriginIntoDatabaseKey(const url::Origin& origin) {
108   return base::MD5String(origin.host());
109 }
110 
111 }  // namespace
112 
113 // Version history:
114 //
115 // - {no version}:
116 //     - Initial launch of the Database.
117 // - 1:
118 //     - Ignore the title/favicon events happening during the first few seconds
119 //       after a tab being loaded.
120 //     - Ignore the audio events happening during the first few seconds after a
121 //       tab being backgrounded.
122 //
123 // Transform logic:
124 //     - From {no version} to v1: The database is erased entirely.
125 const size_t LevelDBSiteDataStore::kDbVersion = 1U;
126 
127 const char LevelDBSiteDataStore::kDbMetadataKey[] = "database_metadata";
128 
129 // Helper class used to run all the blocking operations posted by
130 // LocalSiteCharacteristicDatabase on a ThreadPool sequence with the
131 // |MayBlock()| trait.
132 //
133 // Instances of this class should only be destructed once all the posted tasks
134 // have been run, in practice it means that they should ideally be stored in a
135 // std::unique_ptr<AsyncHelper, base::OnTaskRunnerDeleter>.
136 class LevelDBSiteDataStore::AsyncHelper {
137  public:
AsyncHelper(const base::FilePath & db_path)138   explicit AsyncHelper(const base::FilePath& db_path) : db_path_(db_path) {
139     DETACH_FROM_SEQUENCE(sequence_checker_);
140     // Setting |sync| to false might cause some data loss if the system crashes
141     // but it'll make the write operations faster (no data will be lost if only
142     // the process crashes).
143     write_options_.sync = false;
144   }
145   ~AsyncHelper() = default;
146 
147   // Open the database from |db_path_| after creating it if it didn't exist,
148   // this reset the database if it's not at the expected version.
149   void OpenOrCreateDatabase();
150 
151   // Implementations of the DB manipulation functions of
152   // LevelDBSiteDataStore that run on a blocking sequence.
153   base::Optional<SiteDataProto> ReadSiteDataFromDB(const url::Origin& origin);
154   void WriteSiteDataIntoDB(const url::Origin& origin,
155                            const SiteDataProto& site_characteristic_proto);
156   void RemoveSiteDataFromDB(const std::vector<url::Origin>& site_origin);
157   void ClearDatabase();
158   // Returns a struct with unset fields on failure.
159   DatabaseSizeResult GetDatabaseSize();
160 
DBIsInitialized()161   bool DBIsInitialized() { return db_ != nullptr; }
162 
GetDBForTesting()163   leveldb::DB* GetDBForTesting() {
164     DCHECK(DBIsInitialized());
165     return db_.get();
166   }
167 
SetInitializationCallbackForTesting(base::OnceClosure callback)168   void SetInitializationCallbackForTesting(base::OnceClosure callback) {
169     init_callback_for_testing_ = std::move(callback);
170     if (DBIsInitialized())
171       std::move(init_callback_for_testing_).Run();
172   }
173 
174  private:
175   enum class OpeningType {
176     // A new database has been created.
177     kNewDb,
178     // An existing database has been used.
179     kExistingDb,
180   };
181 
182   // Implementation for the OpenOrCreateDatabase function.
183   OpeningType OpenOrCreateDatabaseImpl();
184 
185   // A levelDB environment that gets used for testing. This allows using an
186   // in-memory database when needed.
187   std::unique_ptr<leveldb::Env> env_for_testing_;
188 
189   // The on disk location of the database.
190   const base::FilePath db_path_;
191   // The connection to the LevelDB database.
192   std::unique_ptr<leveldb::DB> db_;
193   // The options to be used for all database read operations.
194   leveldb::ReadOptions read_options_;
195   // The options to be used for all database write operations.
196   leveldb::WriteOptions write_options_;
197 
198   base::OnceClosure init_callback_for_testing_;
199 
200   SEQUENCE_CHECKER(sequence_checker_);
201   DISALLOW_COPY_AND_ASSIGN(AsyncHelper);
202 };
203 
OpenOrCreateDatabase()204 void LevelDBSiteDataStore::AsyncHelper::OpenOrCreateDatabase() {
205   OpeningType opening_type = OpenOrCreateDatabaseImpl();
206 
207   if (init_callback_for_testing_)
208     std::move(init_callback_for_testing_).Run();
209 
210   if (!db_)
211     return;
212   std::string db_metadata;
213   leveldb::Status s = db_->Get(
214       read_options_, LevelDBSiteDataStore::kDbMetadataKey, &db_metadata);
215   bool is_expected_version = false;
216   if (s.ok()) {
217     // The metadata only contains the version of the database as a size_t value
218     // for now.
219     size_t version = std::numeric_limits<size_t>::max();
220     CHECK(base::StringToSizeT(db_metadata, &version));
221     if (version == LevelDBSiteDataStore::kDbVersion)
222       is_expected_version = true;
223   }
224   // TODO(sebmarchand): Add a migration engine rather than flushing the database
225   // for every version change, https://crbug.com/866540.
226   if ((opening_type == OpeningType::kExistingDb) && !is_expected_version) {
227     DLOG(ERROR) << "Invalid DB version, recreating it.";
228     ClearDatabase();
229     // The database might fail to open.
230     if (!db_)
231       return;
232     opening_type = OpeningType::kNewDb;
233   }
234   if (opening_type == OpeningType::kNewDb) {
235     std::string metadata =
236         base::NumberToString(LevelDBSiteDataStore::kDbVersion);
237     s = db_->Put(write_options_, LevelDBSiteDataStore::kDbMetadataKey,
238                  metadata);
239     if (!s.ok()) {
240       DLOG(ERROR) << "Error while inserting the metadata in the site "
241                   << "characteristics database: " << s.ToString();
242     }
243   }
244 }
245 
246 base::Optional<SiteDataProto>
ReadSiteDataFromDB(const url::Origin & origin)247 LevelDBSiteDataStore::AsyncHelper::ReadSiteDataFromDB(
248     const url::Origin& origin) {
249   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
250 
251   if (!db_)
252     return base::nullopt;
253 
254   leveldb::Status s;
255   std::string protobuf_value;
256   {
257     base::ScopedBlockingCall scoped_blocking_call(
258         FROM_HERE, base::BlockingType::MAY_BLOCK);
259     s = db_->Get(read_options_, SerializeOriginIntoDatabaseKey(origin),
260                  &protobuf_value);
261   }
262   base::Optional<SiteDataProto> site_characteristic_proto;
263   if (s.ok()) {
264     site_characteristic_proto = SiteDataProto();
265     if (!site_characteristic_proto->ParseFromString(protobuf_value)) {
266       site_characteristic_proto = base::nullopt;
267       DLOG(ERROR) << "Error while trying to parse a SiteDataProto "
268                   << "protobuf.";
269     }
270   }
271   return site_characteristic_proto;
272 }
273 
WriteSiteDataIntoDB(const url::Origin & origin,const SiteDataProto & site_characteristic_proto)274 void LevelDBSiteDataStore::AsyncHelper::WriteSiteDataIntoDB(
275     const url::Origin& origin,
276     const SiteDataProto& site_characteristic_proto) {
277   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
278 
279   if (!db_)
280     return;
281 
282   leveldb::Status s;
283   {
284     base::ScopedBlockingCall scoped_blocking_call(
285         FROM_HERE, base::BlockingType::MAY_BLOCK);
286     s = db_->Put(write_options_, SerializeOriginIntoDatabaseKey(origin),
287                  site_characteristic_proto.SerializeAsString());
288   }
289 
290   if (!s.ok()) {
291     DLOG(ERROR)
292         << "Error while inserting an element in the site characteristics "
293         << "database: " << s.ToString();
294   }
295 }
296 
RemoveSiteDataFromDB(const std::vector<url::Origin> & site_origins)297 void LevelDBSiteDataStore::AsyncHelper::RemoveSiteDataFromDB(
298     const std::vector<url::Origin>& site_origins) {
299   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
300 
301   if (!db_)
302     return;
303 
304   base::ScopedBlockingCall scoped_blocking_call(FROM_HERE,
305                                                 base::BlockingType::MAY_BLOCK);
306   leveldb::WriteBatch batch;
307   for (const auto& iter : site_origins)
308     batch.Delete(SerializeOriginIntoDatabaseKey(iter));
309   leveldb::Status status = db_->Write(write_options_, &batch);
310   if (!status.ok()) {
311     LOG(WARNING) << "Failed to remove some entries from the site "
312                  << "characteristics database: " << status.ToString();
313   }
314 }
315 
ClearDatabase()316 void LevelDBSiteDataStore::AsyncHelper::ClearDatabase() {
317   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
318   if (!db_)
319     return;
320 
321   base::ScopedBlockingCall scoped_blocking_call(FROM_HERE,
322                                                 base::BlockingType::MAY_BLOCK);
323   db_.reset();
324   leveldb_env::Options options;
325   leveldb::Status status = leveldb::DestroyDB(db_path_.AsUTF8Unsafe(), options);
326   if (status.ok()) {
327     OpenOrCreateDatabaseImpl();
328   } else {
329     LOG(WARNING) << "Failed to destroy the site characteristics database: "
330                  << status.ToString();
331   }
332 }
333 
GetDatabaseSize()334 DatabaseSizeResult LevelDBSiteDataStore::AsyncHelper::GetDatabaseSize() {
335   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
336   if (!db_)
337     return DatabaseSizeResult();
338 
339   base::ScopedBlockingCall scoped_blocking_call(FROM_HERE,
340                                                 base::BlockingType::MAY_BLOCK);
341   DatabaseSizeResult ret;
342 #if defined(OS_WIN)
343   // Windows has an annoying mis-feature that the size of an open file is not
344   // written to the parent directory until the file is closed. Since this is a
345   // diagnostic interface that should be rarely called, go to the trouble of
346   // closing and re-opening the database in order to get an up-to date size to
347   // report.
348   db_.reset();
349 #endif
350   ret.on_disk_size_kb = base::ComputeDirectorySize(db_path_) / 1024;
351 #if defined(OS_WIN)
352   OpenOrCreateDatabase();
353   if (!db_)
354     return DatabaseSizeResult();
355 #endif
356 
357   // Default read options will fill the cache as we go.
358   std::unique_ptr<leveldb::Iterator> iterator(
359       db_->NewIterator(leveldb::ReadOptions()));
360   int64_t num_rows = 0;
361   for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next())
362     ++num_rows;
363 
364   ret.num_rows = num_rows;
365   return ret;
366 }
367 
368 LevelDBSiteDataStore::AsyncHelper::OpeningType
OpenOrCreateDatabaseImpl()369 LevelDBSiteDataStore::AsyncHelper::OpenOrCreateDatabaseImpl() {
370   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
371   DCHECK(!db_) << "Database already open";
372   base::ScopedBlockingCall scoped_blocking_call(FROM_HERE,
373                                                 base::BlockingType::MAY_BLOCK);
374 
375   OpeningType opening_type = OpeningType::kNewDb;
376 
377   // Report the on disk size of the database if it already exists.
378   if (base::DirectoryExists(db_path_)) {
379     opening_type = OpeningType::kExistingDb;
380     int64_t db_ondisk_size_in_bytes = base::ComputeDirectorySize(db_path_);
381     UMA_HISTOGRAM_MEMORY_KB("ResourceCoordinator.LocalDB.OnDiskSize",
382                             db_ondisk_size_in_bytes / 1024);
383   }
384 
385   leveldb_env::Options options;
386   options.create_if_missing = true;
387 
388   if (g_use_in_memory_db_for_testing) {
389     env_for_testing_ = leveldb_chrome::NewMemEnv("LevelDBSiteDataStore");
390     options.env = env_for_testing_.get();
391   }
392 
393   leveldb::Status status =
394       leveldb_env::OpenDB(options, db_path_.AsUTF8Unsafe(), &db_);
395 
396   ReportInitStatus(kInitStatusHistogramLabel, status);
397 
398   if (status.ok())
399     return opening_type;
400 
401   if (!ShouldAttemptDbRepair(status))
402     return opening_type;
403 
404   if (RepairDatabase(db_path_.AsUTF8Unsafe())) {
405     status = leveldb_env::OpenDB(options, db_path_.AsUTF8Unsafe(), &db_);
406     ReportInitStatus(kInitStatusAfterRepairHistogramLabel, status);
407     if (status.ok())
408       return opening_type;
409   }
410 
411   // Delete the database and try to open it one last time.
412   if (leveldb_chrome::DeleteDB(db_path_, options).ok()) {
413     status = leveldb_env::OpenDB(options, db_path_.AsUTF8Unsafe(), &db_);
414     ReportInitStatus(kInitStatusAfterDeleteHistogramLabel, status);
415     if (!status.ok())
416       db_.reset();
417   }
418 
419   return opening_type;
420 }
421 
LevelDBSiteDataStore(const base::FilePath & db_path)422 LevelDBSiteDataStore::LevelDBSiteDataStore(const base::FilePath& db_path)
423     : blocking_task_runner_(base::ThreadPool::CreateSequencedTaskRunner(
424           // The |BLOCK_SHUTDOWN| trait is required to ensure that a clearing of
425           // the database won't be skipped.
426           {base::MayBlock(), base::TaskShutdownBehavior::BLOCK_SHUTDOWN})),
427       async_helper_(new AsyncHelper(db_path),
428                     base::OnTaskRunnerDeleter(blocking_task_runner_)) {
429   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
430 
431   blocking_task_runner_->PostTask(
432       FROM_HERE,
433       base::BindOnce(&LevelDBSiteDataStore::AsyncHelper::OpenOrCreateDatabase,
434                      base::Unretained(async_helper_.get())));
435 }
436 
437 LevelDBSiteDataStore::~LevelDBSiteDataStore() = default;
438 
ReadSiteDataFromStore(const url::Origin & origin,SiteDataStore::ReadSiteDataFromStoreCallback callback)439 void LevelDBSiteDataStore::ReadSiteDataFromStore(
440     const url::Origin& origin,
441     SiteDataStore::ReadSiteDataFromStoreCallback callback) {
442   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
443 
444   // Trigger the asynchronous task and make it run the callback on this thread
445   // once it returns.
446   base::PostTaskAndReplyWithResult(
447       blocking_task_runner_.get(), FROM_HERE,
448       base::BindOnce(&LevelDBSiteDataStore::AsyncHelper::ReadSiteDataFromDB,
449                      base::Unretained(async_helper_.get()), origin),
450       base::BindOnce(std::move(callback)));
451 }
452 
WriteSiteDataIntoStore(const url::Origin & origin,const SiteDataProto & site_characteristic_proto)453 void LevelDBSiteDataStore::WriteSiteDataIntoStore(
454     const url::Origin& origin,
455     const SiteDataProto& site_characteristic_proto) {
456   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
457   blocking_task_runner_->PostTask(
458       FROM_HERE,
459       base::BindOnce(&LevelDBSiteDataStore::AsyncHelper::WriteSiteDataIntoDB,
460                      base::Unretained(async_helper_.get()), origin,
461                      std::move(site_characteristic_proto)));
462 }
463 
RemoveSiteDataFromStore(const std::vector<url::Origin> & site_origins)464 void LevelDBSiteDataStore::RemoveSiteDataFromStore(
465     const std::vector<url::Origin>& site_origins) {
466   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
467   blocking_task_runner_->PostTask(
468       FROM_HERE,
469       base::BindOnce(&LevelDBSiteDataStore::AsyncHelper::RemoveSiteDataFromDB,
470                      base::Unretained(async_helper_.get()),
471                      std::move(site_origins)));
472 }
473 
ClearStore()474 void LevelDBSiteDataStore::ClearStore() {
475   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
476   blocking_task_runner_->PostTask(
477       FROM_HERE,
478       base::BindOnce(&LevelDBSiteDataStore::AsyncHelper::ClearDatabase,
479                      base::Unretained(async_helper_.get())));
480 }
481 
GetStoreSize(GetStoreSizeCallback callback)482 void LevelDBSiteDataStore::GetStoreSize(GetStoreSizeCallback callback) {
483   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
484 
485   // Adapt the callback with a lambda to allow using PostTaskAndReplyWithResult.
486   auto reply_callback = base::BindOnce(
487       [](GetStoreSizeCallback callback, const DatabaseSizeResult& result) {
488         std::move(callback).Run(result.num_rows, result.on_disk_size_kb);
489       },
490       std::move(callback));
491 
492   base::PostTaskAndReplyWithResult(
493       blocking_task_runner_.get(), FROM_HERE,
494       base::BindOnce(&LevelDBSiteDataStore::AsyncHelper::GetDatabaseSize,
495                      base::Unretained(async_helper_.get())),
496       std::move(reply_callback));
497 }
498 
SetInitializationCallbackForTesting(base::OnceClosure callback)499 void LevelDBSiteDataStore::SetInitializationCallbackForTesting(
500     base::OnceClosure callback) {
501   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
502   blocking_task_runner_->PostTask(
503       FROM_HERE, base::BindOnce(&LevelDBSiteDataStore::AsyncHelper::
504                                     SetInitializationCallbackForTesting,
505                                 base::Unretained(async_helper_.get()),
506                                 std::move(callback)));
507 }
508 
DatabaseIsInitializedForTesting()509 bool LevelDBSiteDataStore::DatabaseIsInitializedForTesting() {
510   return async_helper_->DBIsInitialized();
511 }
512 
GetDBForTesting()513 leveldb::DB* LevelDBSiteDataStore::GetDBForTesting() {
514   return async_helper_->GetDBForTesting();
515 }
516 
517 // static
518 std::unique_ptr<base::AutoReset<bool>>
UseInMemoryDBForTesting()519 LevelDBSiteDataStore::UseInMemoryDBForTesting() {
520   return std::make_unique<base::AutoReset<bool>>(
521       &g_use_in_memory_db_for_testing, true);
522 }
523 
524 }  // namespace performance_manager
525