1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/history/core/browser/top_sites_database.h"
6 
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <utility>
10 
11 #include "base/bind.h"
12 #include "base/files/file_util.h"
13 #include "base/metrics/histogram_macros.h"
14 #include "base/strings/string_piece.h"
15 #include "base/strings/string_split.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/stringprintf.h"
18 #include "components/history/core/browser/history_types.h"
19 #include "components/history/core/browser/top_sites.h"
20 #include "sql/database.h"
21 #include "sql/recovery.h"
22 #include "sql/statement.h"
23 #include "third_party/sqlite/sqlite3.h"
24 
25 namespace history {
26 
27 // Description of database table:
28 //
29 // top_sites
30 //   url              URL of the top site.
31 //   url_rank         Index of the site, 0-based. The site with the highest rank
32 //                    will be the next one evicted.
33 //   title            The title to display under that site.
34 //   redirects        A space separated list of URLs that are known to redirect
35 //                    to this url. As of 9/2019 this column is not used. It will
36 //                    be removed shortly.
37 
38 namespace {
39 
40 // For this database, schema migrations are deprecated after two
41 // years.  This means that the oldest non-deprecated version should be
42 // two years old or greater (thus the migrations to get there are
43 // older).  Databases containing deprecated versions will be cleared
44 // at startup.  Since this database is a cache, losing old data is not
45 // fatal (in fact, very old data may be expired immediately at startup
46 // anyhow).
47 
48 // Version 4: 95af34ec/r618360 kristipark@chromium.org on 2018-12-20
49 // Version 3: b6d6a783/r231648 by beaudoin@chromium.org on 2013-10-29
50 // Version 2: eb0b24e6/r87284 by satorux@chromium.org on 2011-05-31 (deprecated)
51 // Version 1: 809cc4d8/r64072 by sky@chromium.org on 2010-10-27 (deprecated)
52 
53 // NOTE(shess): When changing the version, add a new golden file for
54 // the new version and a test to verify that Init() works with it.
55 static const int kVersionNumber = 4;
56 static const int kDeprecatedVersionNumber = 2;  // and earlier.
57 
58 // Rank used to indicate that this is a newly added URL.
59 static const int kRankOfNewURL = -1;
60 
InitTables(sql::Database * db)61 bool InitTables(sql::Database* db) {
62   static const char kTopSitesSql[] =
63       "CREATE TABLE IF NOT EXISTS top_sites ("
64       "url LONGVARCHAR PRIMARY KEY,"
65       "url_rank INTEGER,"
66       "title LONGVARCHAR,"
67       "redirects LONGVARCHAR)";
68   return db->Execute(kTopSitesSql);
69 }
70 
71 // Track various failure (and success) cases in recovery code.
72 //
73 // TODO(shess): The recovery code is complete, but by nature runs in challenging
74 // circumstances, so errors will happen.  This histogram is intended to expose
75 // the failures seen in the fleet.  Frequent failure cases can be explored more
76 // deeply to see if the complexity to fix them is warranted.  Infrequent failure
77 // cases can be resolved by marking the database unrecoverable (which will
78 // delete the data).
79 //
80 // Based on the thumbnail_database.cc recovery code, FAILED_SCOPER should
81 // dominate, followed distantly by FAILED_META, with few or no other failures.
82 enum RecoveryEventType {
83   // Database successfully recovered.
84   RECOVERY_EVENT_RECOVERED = 0,
85 
86   // Database successfully deprecated.
87   RECOVERY_EVENT_DEPRECATED,
88 
89   // Sqlite.RecoveryEvent can usually be used to get more detail about the
90   // specific failure (see sql/recovery.cc).
91   OBSOLETE_RECOVERY_EVENT_FAILED_SCOPER,
92   RECOVERY_EVENT_FAILED_META_VERSION,
93   RECOVERY_EVENT_FAILED_META_WRONG_VERSION,
94   OBSOLETE_RECOVERY_EVENT_FAILED_META_INIT,
95   OBSOLETE_RECOVERY_EVENT_FAILED_SCHEMA_INIT,
96   OBSOLETE_RECOVERY_EVENT_FAILED_AUTORECOVER_THUMBNAILS,
97   RECOVERY_EVENT_FAILED_COMMIT,
98 
99   // Track invariants resolved by FixTopSitesTable().
100   RECOVERY_EVENT_INVARIANT_RANK,
101   RECOVERY_EVENT_INVARIANT_REDIRECT,
102   RECOVERY_EVENT_INVARIANT_CONTIGUOUS,
103 
104   // Track automated full-database recovery.
105   RECOVERY_EVENT_FAILED_AUTORECOVER,
106 
107   // Always keep this at the end.
108   RECOVERY_EVENT_MAX,
109 };
110 
RecordRecoveryEvent(RecoveryEventType recovery_event)111 void RecordRecoveryEvent(RecoveryEventType recovery_event) {
112   UMA_HISTOGRAM_ENUMERATION("History.TopSitesRecovery", recovery_event,
113                             RECOVERY_EVENT_MAX);
114 }
115 
116 // Most corruption comes down to atomic updates between pages being broken
117 // somehow.  This can result in either missing data, or overlapping data,
118 // depending on the operation broken.  This table has large rows, which will use
119 // overflow pages, so it is possible (though unlikely) that a chain could fit
120 // together and yield a row with errors.
FixTopSitesTable(sql::Database * db,int version)121 void FixTopSitesTable(sql::Database* db, int version) {
122   // Forced sites are only present in version 3.
123   if (version == 3) {
124     // Enforce invariant separating forced and non-forced thumbnails.
125     static const char kFixRankSql[] =
126         "DELETE FROM thumbnails "
127         "WHERE (url_rank = -1 AND last_forced = 0) "
128         "OR (url_rank <> -1 AND last_forced <> 0)";
129     ignore_result(db->Execute(kFixRankSql));
130     if (db->GetLastChangeCount() > 0)
131       RecordRecoveryEvent(RECOVERY_EVENT_INVARIANT_RANK);
132   }
133 
134   // The table was renamed to "top_sites" in version 4.
135   const char* kTableName = (version == 3 ? "thumbnails" : "top_sites");
136 
137   // Enforce invariant that url is in its own redirects.
138   static const char kFixRedirectsSql[] =
139       "DELETE FROM %s "
140       "WHERE url <> substr(redirects, -length(url), length(url))";
141   ignore_result(
142       db->Execute(base::StringPrintf(kFixRedirectsSql, kTableName).c_str()));
143   if (db->GetLastChangeCount() > 0)
144     RecordRecoveryEvent(RECOVERY_EVENT_INVARIANT_REDIRECT);
145 
146   // Enforce invariant that url_rank>=0 forms a contiguous series.
147   // TODO(shess): I have not found an UPDATE+SUBSELECT method of managing this.
148   // It can be done with a temporary table and a subselect, but doing it
149   // manually is easier to follow.  Another option would be to somehow integrate
150   // the renumbering into the table recovery code.
151   static const char kByRankSql[] =
152       "SELECT url_rank, rowid FROM %s WHERE url_rank <> -1 "
153       "ORDER BY url_rank";
154   sql::Statement select_statement(db->GetUniqueStatement(
155       base::StringPrintf(kByRankSql, kTableName).c_str()));
156 
157   static const char kAdjustRankSql[] =
158       "UPDATE %s SET url_rank = ? WHERE rowid = ?";
159   sql::Statement update_statement(db->GetUniqueStatement(
160       base::StringPrintf(kAdjustRankSql, kTableName).c_str()));
161 
162   // Update any rows where |next_rank| doesn't match |url_rank|.
163   int next_rank = 0;
164   bool adjusted = false;
165   while (select_statement.Step()) {
166     const int url_rank = select_statement.ColumnInt(0);
167     if (url_rank != next_rank) {
168       adjusted = true;
169       update_statement.Reset(true);
170       update_statement.BindInt(0, next_rank);
171       update_statement.BindInt64(1, select_statement.ColumnInt64(1));
172       update_statement.Run();
173     }
174     ++next_rank;
175   }
176   if (adjusted)
177     RecordRecoveryEvent(RECOVERY_EVENT_INVARIANT_CONTIGUOUS);
178 }
179 
180 // Recover the database to the extent possible, then fixup any broken
181 // constraints.
RecoverAndFixup(sql::Database * db,const base::FilePath & db_path)182 void RecoverAndFixup(sql::Database* db, const base::FilePath& db_path) {
183   // NOTE(shess): If the version changes, review this code.
184   DCHECK_EQ(4, kVersionNumber);
185 
186   std::unique_ptr<sql::Recovery> recovery =
187       sql::Recovery::BeginRecoverDatabase(db, db_path);
188   if (!recovery) {
189     RecordRecoveryEvent(RECOVERY_EVENT_FAILED_AUTORECOVER);
190     return;
191   }
192 
193   // If the [meta] table does not exist, or the [version] key cannot be found,
194   // then the schema is indeterminate.  The only plausible approach would be to
195   // validate that the schema contains all of the tables and indices and columns
196   // expected, but that complexity may not be warranted, this case has only been
197   // seen for a few thousand database files.
198   int version = 0;
199   if (!recovery->SetupMeta() || !recovery->GetMetaVersionNumber(&version)) {
200     sql::Recovery::Unrecoverable(std::move(recovery));
201     RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_VERSION);
202     return;
203   }
204 
205   // In this case the next open will clear the database anyhow.
206   if (version <= kDeprecatedVersionNumber) {
207     sql::Recovery::Unrecoverable(std::move(recovery));
208     RecordRecoveryEvent(RECOVERY_EVENT_DEPRECATED);
209     return;
210   }
211 
212   // TODO(shess): Consider marking corrupt databases from the future
213   // Unrecoverable(), since this histogram value has never been seen.  OTOH,
214   // this may be too risky, because if future code was correlated with
215   // corruption then rollback would be a sensible response.
216   if (version > kVersionNumber) {
217     RecordRecoveryEvent(RECOVERY_EVENT_FAILED_META_WRONG_VERSION);
218     sql::Recovery::Rollback(std::move(recovery));
219     return;
220   }
221 
222   // TODO(shess): Inline this?
223   FixTopSitesTable(recovery->db(), version);
224 
225   if (!sql::Recovery::Recovered(std::move(recovery))) {
226     // TODO(shess): Very unclear what this failure would actually mean, and what
227     // should be done.  Add histograms to Recovered() implementation to get some
228     // insight.
229     RecordRecoveryEvent(RECOVERY_EVENT_FAILED_COMMIT);
230     return;
231   }
232 
233   RecordRecoveryEvent(RECOVERY_EVENT_RECOVERED);
234 }
235 
DatabaseErrorCallback(sql::Database * db,const base::FilePath & db_path,int extended_error,sql::Statement * stmt)236 void DatabaseErrorCallback(sql::Database* db,
237                            const base::FilePath& db_path,
238                            int extended_error,
239                            sql::Statement* stmt) {
240   // TODO(shess): Assert that this is running on a safe thread.  AFAICT, should
241   // be the history thread, but at this level I can't see how to reach that.
242 
243   // Attempt to recover corrupt databases.
244   if (sql::Recovery::ShouldRecover(extended_error)) {
245     // Prevent reentrant calls.
246     db->reset_error_callback();
247 
248     // After this call, the |db| handle is poisoned so that future calls will
249     // return errors until the handle is re-opened.
250     RecoverAndFixup(db, db_path);
251 
252     // The DLOG(FATAL) below is intended to draw immediate attention to errors
253     // in newly-written code.  Database corruption is generally a result of OS
254     // or hardware issues, not coding errors at the client level, so displaying
255     // the error would probably lead to confusion.  The ignored call signals the
256     // test-expectation framework that the error was handled.
257     ignore_result(sql::Database::IsExpectedSqliteError(extended_error));
258     return;
259   }
260 
261   // TODO(shess): This database's error histograms look like:
262   // 84% SQLITE_CORRUPT, SQLITE_CANTOPEN, SQLITE_NOTADB
263   //  7% SQLITE_ERROR
264   //  6% SQLITE_IOERR variants
265   //  2% SQLITE_READONLY
266   // .4% SQLITE_FULL
267   // nominal SQLITE_TOBIG, SQLITE_AUTH, and SQLITE_BUSY.  In the case of
268   // thumbnail_database.cc, as soon as the recovery code landed, SQLITE_IOERR
269   // shot to leadership.  If the I/O error is system-level, there is probably no
270   // hope, but if it is restricted to something about the database file, it is
271   // possible that the recovery code could be brought to bear.  In fact, it is
272   // possible that running recovery would be a reasonable default when errors
273   // are seen.
274 
275   // The default handling is to assert on debug and to ignore on release.
276   if (!sql::Database::IsExpectedSqliteError(extended_error))
277     DLOG(FATAL) << db->GetErrorMessage();
278 }
279 
280 }  // namespace
281 
282 // static
283 const int TopSitesDatabase::kRankOfNonExistingURL = -2;
284 
TopSitesDatabase()285 TopSitesDatabase::TopSitesDatabase() {
286 }
287 
~TopSitesDatabase()288 TopSitesDatabase::~TopSitesDatabase() {
289 }
290 
Init(const base::FilePath & db_name)291 bool TopSitesDatabase::Init(const base::FilePath& db_name) {
292   // Retry failed InitImpl() in case the recovery system fixed things.
293   // TODO(shess): Instrument to figure out if there are any persistent failure
294   // cases which do not resolve themselves.
295   const size_t kAttempts = 2;
296 
297   for (size_t i = 0; i < kAttempts; ++i) {
298     if (InitImpl(db_name))
299       return true;
300 
301     meta_table_.Reset();
302     db_.reset();
303   }
304   return false;
305 }
306 
InitImpl(const base::FilePath & db_name)307 bool TopSitesDatabase::InitImpl(const base::FilePath& db_name) {
308   const bool file_existed = base::PathExists(db_name);
309 
310   db_.reset(CreateDB(db_name));
311   if (!db_)
312     return false;
313 
314   // An older version had data with no meta table.  Deprecate by razing.
315   // TODO(shess): Just have RazeIfDeprecated() handle this case.
316   const bool does_meta_exist = sql::MetaTable::DoesTableExist(db_.get());
317   if (!does_meta_exist && file_existed) {
318     if (!db_->Raze())
319       return false;
320   }
321 
322   // Clear databases which are too old to process.
323   DCHECK_LT(kDeprecatedVersionNumber, kVersionNumber);
324   sql::MetaTable::RazeIfDeprecated(db_.get(), kDeprecatedVersionNumber);
325 
326   // Scope initialization in a transaction so we can't be partially
327   // initialized.
328   sql::Transaction transaction(db_.get());
329   // TODO(shess): Failure to open transaction is bad, address it.
330   if (!transaction.Begin())
331     return false;
332 
333   if (!meta_table_.Init(db_.get(), kVersionNumber, kVersionNumber))
334     return false;
335 
336   if (!InitTables(db_.get()))
337     return false;
338 
339   if (meta_table_.GetVersionNumber() == 2) {
340     if (!UpgradeToVersion3()) {
341       LOG(WARNING) << "Unable to upgrade top sites database to version 3.";
342       return false;
343     }
344   }
345 
346   if (meta_table_.GetVersionNumber() == 3) {
347     if (!UpgradeToVersion4()) {
348       LOG(WARNING) << "Unable to upgrade top sites database to version 4.";
349       return false;
350     }
351   }
352 
353   // Version check.
354   if (meta_table_.GetVersionNumber() != kVersionNumber)
355     return false;
356 
357   // Initialization is complete.
358   if (!transaction.Commit())
359     return false;
360 
361   return true;
362 }
363 
ApplyDelta(const TopSitesDelta & delta)364 void TopSitesDatabase::ApplyDelta(const TopSitesDelta& delta) {
365   sql::Transaction transaction(db_.get());
366   transaction.Begin();
367 
368   for (size_t i = 0; i < delta.deleted.size(); ++i) {
369     if (!RemoveURLNoTransaction(delta.deleted[i]))
370       return;
371   }
372 
373   for (size_t i = 0; i < delta.added.size(); ++i)
374     SetSiteNoTransaction(delta.added[i].url, delta.added[i].rank);
375 
376   for (size_t i = 0; i < delta.moved.size(); ++i)
377     UpdateSiteRankNoTransaction(delta.moved[i].url, delta.moved[i].rank);
378 
379   transaction.Commit();
380 }
381 
UpgradeToVersion3()382 bool TopSitesDatabase::UpgradeToVersion3() {
383   // Add 'last_forced' column.
384   if (!db_->Execute(
385           "ALTER TABLE thumbnails ADD last_forced INTEGER DEFAULT 0")) {
386     NOTREACHED();
387     return false;
388   }
389   meta_table_.SetVersionNumber(3);
390   return true;
391 }
392 
UpgradeToVersion4()393 bool TopSitesDatabase::UpgradeToVersion4() {
394   // Rename table to "top_sites" and retain only the url, url_rank, title, and
395   // redirects columns. Also, remove any remaining forced sites.
396   const char* statement =
397       // The top_sites table is created before the version upgrade.
398       "INSERT INTO top_sites SELECT "
399       "url,url_rank,title,redirects FROM thumbnails;"
400       "DROP TABLE thumbnails;"
401       // Remove any forced sites.
402       "DELETE FROM top_sites WHERE (url_rank = -1);";
403   if (!db_->Execute(statement)) {
404     NOTREACHED();
405     return false;
406   }
407 
408   meta_table_.SetVersionNumber(4);
409   return true;
410 }
411 
GetSites(MostVisitedURLList * urls)412 void TopSitesDatabase::GetSites(MostVisitedURLList* urls) {
413   sql::Statement statement(
414       db_->GetCachedStatement(SQL_FROM_HERE,
415                               "SELECT url, url_rank, title "
416                               "FROM top_sites ORDER BY url_rank"));
417 
418   if (!statement.is_valid()) {
419     LOG(WARNING) << db_->GetErrorMessage();
420     return;
421   }
422 
423   urls->clear();
424 
425   while (statement.Step()) {
426     // Results are sorted by url_rank.
427     MostVisitedURL url;
428     GURL gurl(statement.ColumnString(0));
429     url.url = gurl;
430     url.title = statement.ColumnString16(2);
431     urls->push_back(url);
432   }
433 }
434 
SetSiteNoTransaction(const MostVisitedURL & url,int new_rank)435 void TopSitesDatabase::SetSiteNoTransaction(const MostVisitedURL& url,
436                                             int new_rank) {
437   int rank = GetURLRank(url);
438   if (rank == kRankOfNonExistingURL) {
439     AddSite(url, new_rank);
440   } else {
441     UpdateSiteRankNoTransaction(url, new_rank);
442     UpdateSite(url);
443   }
444 }
445 
AddSite(const MostVisitedURL & url,int new_rank)446 void TopSitesDatabase::AddSite(const MostVisitedURL& url, int new_rank) {
447   sql::Statement statement(
448       db_->GetCachedStatement(SQL_FROM_HERE,
449                               "INSERT OR REPLACE INTO top_sites "
450                               "(url, url_rank, title) "
451                               "VALUES (?, ?, ?)"));
452   statement.BindString(0, url.url.spec());
453   statement.BindInt(1, kRankOfNewURL);
454   statement.BindString16(2, url.title);
455   if (!statement.Run())
456     return;
457 
458   // Update the new site's rank.
459   UpdateSiteRankNoTransaction(url, new_rank);
460 }
461 
UpdateSite(const MostVisitedURL & url)462 bool TopSitesDatabase::UpdateSite(const MostVisitedURL& url) {
463   sql::Statement statement(db_->GetCachedStatement(SQL_FROM_HERE,
464                                                    "UPDATE top_sites SET "
465                                                    "title = ? "
466                                                    "WHERE url = ?"));
467   statement.BindString16(0, url.title);
468 
469   return statement.Run();
470 }
471 
GetURLRank(const MostVisitedURL & url)472 int TopSitesDatabase::GetURLRank(const MostVisitedURL& url) {
473   sql::Statement select_statement(
474       db_->GetCachedStatement(SQL_FROM_HERE,
475                               "SELECT url_rank "
476                               "FROM top_sites WHERE url = ?"));
477   select_statement.BindString(0, url.url.spec());
478   if (select_statement.Step())
479     return select_statement.ColumnInt(0);
480 
481   return kRankOfNonExistingURL;
482 }
483 
UpdateSiteRankNoTransaction(const MostVisitedURL & url,int new_rank)484 void TopSitesDatabase::UpdateSiteRankNoTransaction(const MostVisitedURL& url,
485                                                    int new_rank) {
486   DCHECK_GT(db_->transaction_nesting(), 0);
487 
488   int prev_rank = GetURLRank(url);
489   if (prev_rank == kRankOfNonExistingURL) {
490     LOG(WARNING) << "Updating rank of an unknown URL: " << url.url.spec();
491     return;
492   }
493 
494   // Shift the ranks.
495   if (prev_rank == kRankOfNewURL) {
496     // Starting from new_rank, shift up.
497     // Example: -1 -> 2
498     // [-1 -> 2], 0, 1, [2 -> 3], [3 -> 4], [4 -> 5]
499     sql::Statement shift_statement(
500         db_->GetCachedStatement(SQL_FROM_HERE,
501                                 "UPDATE top_sites "
502                                 "SET url_rank = url_rank + 1 "
503                                 "WHERE url_rank >= ?"));
504     shift_statement.BindInt(0, new_rank);
505     shift_statement.Run();
506   } else if (prev_rank > new_rank) {
507     // From [new_rank, prev_rank), shift up.
508     // Example: 3 -> 1
509     // 0, [1 -> 2], [2 -> 3], [3 -> 1], 4
510     sql::Statement shift_statement(
511         db_->GetCachedStatement(SQL_FROM_HERE,
512                                 "UPDATE top_sites "
513                                 "SET url_rank = url_rank + 1 "
514                                 "WHERE url_rank >= ? AND url_rank < ?"));
515     shift_statement.BindInt(0, new_rank);
516     shift_statement.BindInt(1, prev_rank);
517     shift_statement.Run();
518   } else if (prev_rank < new_rank) {
519     // From (prev_rank, new_rank], shift down.
520     // Example: 1 -> 3.
521     // 0, [1 -> 3], [2 -> 1], [3 -> 2], 4
522     sql::Statement shift_statement(
523         db_->GetCachedStatement(SQL_FROM_HERE,
524                                 "UPDATE top_sites "
525                                 "SET url_rank = url_rank - 1 "
526                                 "WHERE url_rank > ? AND url_rank <= ?"));
527     shift_statement.BindInt(0, prev_rank);
528     shift_statement.BindInt(1, new_rank);
529     shift_statement.Run();
530   }
531 
532   // Set the url's new_rank.
533   sql::Statement set_statement(db_->GetCachedStatement(SQL_FROM_HERE,
534                                                        "UPDATE top_sites "
535                                                        "SET url_rank = ? "
536                                                        "WHERE url == ?"));
537   set_statement.BindInt(0, new_rank);
538   set_statement.BindString(1, url.url.spec());
539   set_statement.Run();
540 }
541 
RemoveURLNoTransaction(const MostVisitedURL & url)542 bool TopSitesDatabase::RemoveURLNoTransaction(const MostVisitedURL& url) {
543   int old_rank = GetURLRank(url);
544   if (old_rank == kRankOfNonExistingURL)
545     return true;
546 
547   // Decrement all following ranks.
548   sql::Statement shift_statement(
549       db_->GetCachedStatement(SQL_FROM_HERE,
550                               "UPDATE top_sites "
551                               "SET url_rank = url_rank - 1 "
552                               "WHERE url_rank > ?"));
553   shift_statement.BindInt(0, old_rank);
554 
555   if (!shift_statement.Run())
556     return false;
557 
558   sql::Statement delete_statement(db_->GetCachedStatement(
559       SQL_FROM_HERE, "DELETE FROM top_sites WHERE url = ?"));
560   delete_statement.BindString(0, url.url.spec());
561 
562   return delete_statement.Run();
563 }
564 
CreateDB(const base::FilePath & db_name)565 sql::Database* TopSitesDatabase::CreateDB(const base::FilePath& db_name) {
566   std::unique_ptr<sql::Database> db(new sql::Database());
567   // Settings copied from ThumbnailDatabase.
568   db->set_histogram_tag("TopSites");
569   db->set_error_callback(
570       base::BindRepeating(&DatabaseErrorCallback, db.get(), db_name));
571   db->set_page_size(4096);
572   db->set_cache_size(32);
573 
574   if (!db->Open(db_name))
575     return nullptr;
576   return db.release();
577 }
578 
579 }  // namespace history
580