1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/omnibox/browser/in_memory_url_index.h"
6 
7 #include <cinttypes>
8 #include <memory>
9 
10 #include "base/bind.h"
11 #include "base/feature_list.h"
12 #include "base/files/file_util.h"
13 #include "base/no_destructor.h"
14 #include "base/strings/stringprintf.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/task/post_task.h"
17 #include "base/task/thread_pool.h"
18 #include "base/task_runner_util.h"
19 #include "base/threading/thread_task_runner_handle.h"
20 #include "base/trace_event/memory_dump_manager.h"
21 #include "base/trace_event/memory_usage_estimator.h"
22 #include "base/trace_event/trace_event.h"
23 #include "components/history/core/browser/history_service.h"
24 #include "components/history/core/browser/url_database.h"
25 #include "components/omnibox/browser/url_index_private_data.h"
26 #include "components/omnibox/common/omnibox_features.h"
27 
28 using in_memory_url_index::InMemoryURLIndexCacheItem;
29 
30 // Initializes a whitelist of URL schemes.
InitializeSchemeWhitelist(SchemeSet * whitelist,const SchemeSet & client_schemes_to_whitelist)31 void InitializeSchemeWhitelist(
32     SchemeSet* whitelist,
33     const SchemeSet& client_schemes_to_whitelist) {
34   DCHECK(whitelist);
35   if (!whitelist->empty())
36     return;  // Nothing to do, already initialized.
37 
38   whitelist->insert(client_schemes_to_whitelist.begin(),
39                     client_schemes_to_whitelist.end());
40 
41   whitelist->insert(std::string(url::kAboutScheme));
42   whitelist->insert(std::string(url::kFileScheme));
43   whitelist->insert(std::string(url::kFtpScheme));
44   whitelist->insert(std::string(url::kHttpScheme));
45   whitelist->insert(std::string(url::kHttpsScheme));
46   whitelist->insert(std::string(url::kMailToScheme));
47 }
48 
49 // Restore/SaveCacheObserver ---------------------------------------------------
50 
~RestoreCacheObserver()51 InMemoryURLIndex::RestoreCacheObserver::~RestoreCacheObserver() {
52 }
53 
~SaveCacheObserver()54 InMemoryURLIndex::SaveCacheObserver::~SaveCacheObserver() {
55 }
56 
57 // RebuildPrivateDataFromHistoryDBTask -----------------------------------------
58 
59 InMemoryURLIndex::RebuildPrivateDataFromHistoryDBTask::
RebuildPrivateDataFromHistoryDBTask(InMemoryURLIndex * index,const SchemeSet & scheme_whitelist)60     RebuildPrivateDataFromHistoryDBTask(
61         InMemoryURLIndex* index,
62         const SchemeSet& scheme_whitelist)
63     : index_(index),
64       scheme_whitelist_(scheme_whitelist),
65       succeeded_(false) {
66 }
67 
RunOnDBThread(history::HistoryBackend * backend,history::HistoryDatabase * db)68 bool InMemoryURLIndex::RebuildPrivateDataFromHistoryDBTask::RunOnDBThread(
69     history::HistoryBackend* backend,
70     history::HistoryDatabase* db) {
71   data_ = URLIndexPrivateData::RebuildFromHistory(db, scheme_whitelist_);
72   succeeded_ = data_.get() && !data_->Empty();
73   if (!succeeded_ && data_.get())
74     data_->Clear();
75   return true;
76 }
77 
78 void InMemoryURLIndex::RebuildPrivateDataFromHistoryDBTask::
DoneRunOnMainThread()79     DoneRunOnMainThread() {
80   index_->DoneRebuidingPrivateDataFromHistoryDB(succeeded_, data_);
81 }
82 
83 InMemoryURLIndex::RebuildPrivateDataFromHistoryDBTask::
~RebuildPrivateDataFromHistoryDBTask()84     ~RebuildPrivateDataFromHistoryDBTask() {
85 }
86 
87 // InMemoryURLIndex ------------------------------------------------------------
88 
InMemoryURLIndex(bookmarks::BookmarkModel * bookmark_model,history::HistoryService * history_service,TemplateURLService * template_url_service,const base::FilePath & history_dir,const SchemeSet & client_schemes_to_whitelist)89 InMemoryURLIndex::InMemoryURLIndex(bookmarks::BookmarkModel* bookmark_model,
90                                    history::HistoryService* history_service,
91                                    TemplateURLService* template_url_service,
92                                    const base::FilePath& history_dir,
93                                    const SchemeSet& client_schemes_to_whitelist)
94     : bookmark_model_(bookmark_model),
95       history_service_(history_service),
96       template_url_service_(template_url_service),
97       history_dir_(history_dir),
98       private_data_(new URLIndexPrivateData),
99       restore_cache_observer_(nullptr),
100       save_cache_observer_(nullptr),
101       task_runner_(base::ThreadPool::CreateSequencedTaskRunner(
102           {base::MayBlock(), base::TaskPriority::BEST_EFFORT})),
103       shutdown_(false),
104       restored_(false),
105       needs_to_be_cached_(false),
106       listen_to_history_service_loaded_(false) {
107   InitializeSchemeWhitelist(&scheme_whitelist_, client_schemes_to_whitelist);
108   // TODO(mrossetti): Register for language change notifications.
109   if (history_service_)
110     history_service_->AddObserver(this);
111 
112   base::trace_event::MemoryDumpManager::GetInstance()->RegisterDumpProvider(
113       this, "InMemoryURLIndex", base::ThreadTaskRunnerHandle::Get());
114 }
115 
~InMemoryURLIndex()116 InMemoryURLIndex::~InMemoryURLIndex() {
117   base::trace_event::MemoryDumpManager::GetInstance()->UnregisterDumpProvider(
118       this);
119 
120   // If there was a history directory (which there won't be for some unit tests)
121   // then insure that the cache has already been saved.
122   DCHECK(history_dir_.empty() || !needs_to_be_cached_);
123   DCHECK(!history_service_);
124   DCHECK(shutdown_);
125 }
126 
Init()127 void InMemoryURLIndex::Init() {
128   PostRestoreFromCacheFileTask();
129 }
130 
ClearPrivateData()131 void InMemoryURLIndex::ClearPrivateData() {
132   private_data_->Clear();
133 }
134 
GetCacheFilePath(base::FilePath * file_path)135 bool InMemoryURLIndex::GetCacheFilePath(base::FilePath* file_path) {
136   if (history_dir_.empty())
137     return false;
138   *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache"));
139   return true;
140 }
141 
142 // Querying --------------------------------------------------------------------
143 
HistoryItemsForTerms(const base::string16 & term_string,size_t cursor_position,size_t max_matches)144 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
145     const base::string16& term_string,
146     size_t cursor_position,
147     size_t max_matches) {
148   return private_data_->HistoryItemsForTerms(
149       term_string, cursor_position, max_matches, bookmark_model_,
150       template_url_service_);
151 }
152 
153 // Updating --------------------------------------------------------------------
154 
DeleteURL(const GURL & url)155 void InMemoryURLIndex::DeleteURL(const GURL& url) {
156   private_data_->DeleteURL(url);
157 }
158 
OnURLVisited(history::HistoryService * history_service,ui::PageTransition transition,const history::URLRow & row,const history::RedirectList & redirects,base::Time visit_time)159 void InMemoryURLIndex::OnURLVisited(history::HistoryService* history_service,
160                                     ui::PageTransition transition,
161                                     const history::URLRow& row,
162                                     const history::RedirectList& redirects,
163                                     base::Time visit_time) {
164   DCHECK_EQ(history_service_, history_service);
165   // If |row| is not known to URLIndexPrivateData and the row is significant,
166   // URLIndexPrivateData will index it. When excluding visits from cct, the row
167   // may be significant, but not indexed. UpdateURL() does not have the full
168   // context to know it should not index the row (it lacks visits). If |row|
169   // has not been indexed, and the visit is from cct, we know it should not be
170   // indexed and should not call to UpdateURL().
171   if (!private_data_->IsUrlRowIndexed(row) &&
172       URLIndexPrivateData::ShouldExcludeBecauseOfCctTransition(transition)) {
173     return;
174   }
175   needs_to_be_cached_ |= private_data_->UpdateURL(
176       history_service_, row, scheme_whitelist_, &private_data_tracker_);
177 }
178 
OnURLsModified(history::HistoryService * history_service,const history::URLRows & changed_urls,history::UrlsModifiedReason reason)179 void InMemoryURLIndex::OnURLsModified(history::HistoryService* history_service,
180                                       const history::URLRows& changed_urls,
181                                       history::UrlsModifiedReason reason) {
182   DCHECK_EQ(history_service_, history_service);
183   for (const auto& row : changed_urls) {
184     // When hiding visits from cct, don't add the entry just because the title
185     // changed. In other words, |row| may qualify (RowQualifiesAsSignificant),
186     // but not be indexed because all visits where excluded. In this case, the
187     // row won't be indexed and we shouldn't add just because the title
188     // changed.
189     if (base::FeatureList::IsEnabled(omnibox::kHideVisitsFromCct) &&
190         !private_data_->IsUrlRowIndexed(row) &&
191         reason == history::UrlsModifiedReason::kTitleChanged) {
192       continue;
193     }
194     needs_to_be_cached_ |= private_data_->UpdateURL(
195         history_service_, row, scheme_whitelist_, &private_data_tracker_);
196   }
197 }
198 
OnURLsDeleted(history::HistoryService * history_service,const history::DeletionInfo & deletion_info)199 void InMemoryURLIndex::OnURLsDeleted(
200     history::HistoryService* history_service,
201     const history::DeletionInfo& deletion_info) {
202   if (deletion_info.IsAllHistory()) {
203     ClearPrivateData();
204     needs_to_be_cached_ = true;
205   } else {
206     for (const auto& row : deletion_info.deleted_rows())
207       needs_to_be_cached_ |= private_data_->DeleteURL(row.url());
208   }
209   // If we made changes, destroy the previous cache.  Otherwise, if we go
210   // through an unclean shutdown (and therefore fail to write a new cache file),
211   // when Chrome restarts and we restore from the previous cache, we'll end up
212   // searching over URLs that may be deleted.  This would be wrong, and
213   // surprising to the user who bothered to delete some URLs from their
214   // history.  In this situation, deleting the cache is a better solution than
215   // writing a new cache (after deleting the URLs from the in-memory structure)
216   // because deleting the cache forces it to be rebuilt from history upon
217   // startup.  If we instead write a new, updated cache then at the time of next
218   // startup (after an unclean shutdown) we will not rebuild the in-memory data
219   // structures from history but rather use the cache.  This solution is
220   // mediocre because this cache may not have the most-recently-visited URLs
221   // in it (URLs visited after user deleted some URLs from history), which
222   // would be odd and confusing.  It's better to force a rebuild.
223   base::FilePath path;
224   if (needs_to_be_cached_ && GetCacheFilePath(&path))
225     task_runner_->PostTask(FROM_HERE,
226                            base::BindOnce(base::GetDeleteFileCallback(), path));
227 }
228 
OnHistoryServiceLoaded(history::HistoryService * history_service)229 void InMemoryURLIndex::OnHistoryServiceLoaded(
230     history::HistoryService* history_service) {
231   if (listen_to_history_service_loaded_)
232     ScheduleRebuildFromHistory();
233   listen_to_history_service_loaded_ = false;
234 }
235 
OnMemoryDump(const base::trace_event::MemoryDumpArgs & args,base::trace_event::ProcessMemoryDump * process_memory_dump)236 bool InMemoryURLIndex::OnMemoryDump(
237     const base::trace_event::MemoryDumpArgs& args,
238     base::trace_event::ProcessMemoryDump* process_memory_dump) {
239   size_t res = 0;
240 
241   res += base::trace_event::EstimateMemoryUsage(scheme_whitelist_);
242 
243   // TODO(dyaroshev): Add support for scoped_refptr in
244   //                  base::trace_event::EstimateMemoryUsage.
245   res += sizeof(URLIndexPrivateData) + private_data_->EstimateMemoryUsage();
246 
247   const std::string dump_name =
248       base::StringPrintf("omnibox/in_memory_url_index/0x%" PRIXPTR,
249                          reinterpret_cast<uintptr_t>(this));
250   auto* dump = process_memory_dump->CreateAllocatorDump(dump_name);
251   dump->AddScalar(base::trace_event::MemoryAllocatorDump::kNameSize,
252                   base::trace_event::MemoryAllocatorDump::kUnitsBytes, res);
253 
254   // TODO(https://crbug.com/1068883): Remove this code when the bug is fixed.
255   private_data_->OnMemoryAllocatorDump(dump);
256 
257   return true;
258 }
259 
260 // Restoring from Cache --------------------------------------------------------
261 
PostRestoreFromCacheFileTask()262 void InMemoryURLIndex::PostRestoreFromCacheFileTask() {
263   DCHECK(thread_checker_.CalledOnValidThread());
264   TRACE_EVENT0("browser", "InMemoryURLIndex::PostRestoreFromCacheFileTask");
265 
266   if (base::FeatureList::IsEnabled(
267           omnibox::kHistoryQuickProviderAblateInMemoryURLIndexCacheFile)) {
268     // To short circuit the cache, pretend we've failed to load it.
269     OnCacheLoadDone(nullptr);
270     return;
271   }
272 
273   base::FilePath path;
274   if (!GetCacheFilePath(&path) || shutdown_) {
275     restored_ = true;
276     if (restore_cache_observer_)
277       restore_cache_observer_->OnCacheRestoreFinished(false);
278     return;
279   }
280 
281   base::PostTaskAndReplyWithResult(
282       task_runner_.get(), FROM_HERE,
283       base::BindOnce(&URLIndexPrivateData::RestoreFromFile, path),
284       base::BindOnce(&InMemoryURLIndex::OnCacheLoadDone, AsWeakPtr()));
285 }
286 
OnCacheLoadDone(scoped_refptr<URLIndexPrivateData> private_data)287 void InMemoryURLIndex::OnCacheLoadDone(
288     scoped_refptr<URLIndexPrivateData> private_data) {
289   if (private_data.get() && !private_data->Empty()) {
290     private_data_tracker_.TryCancelAll();
291     private_data_ = private_data;
292     restored_ = true;
293     if (restore_cache_observer_)
294       restore_cache_observer_->OnCacheRestoreFinished(true);
295   } else if (history_service_) {
296     // When unable to restore from the cache file delete the cache file, if
297     // it exists, and then rebuild from the history database if it's available,
298     // otherwise wait until the history database loaded and then rebuild.
299     base::FilePath path;
300     if (!GetCacheFilePath(&path) || shutdown_)
301       return;
302     task_runner_->PostTask(FROM_HERE,
303                            base::BindOnce(base::GetDeleteFileCallback(), path));
304     if (history_service_->backend_loaded()) {
305       ScheduleRebuildFromHistory();
306     } else {
307       listen_to_history_service_loaded_ = true;
308     }
309   }
310 }
311 
312 // Cleanup ---------------------------------------------------------------------
313 
Shutdown()314 void InMemoryURLIndex::Shutdown() {
315   if (history_service_) {
316     history_service_->RemoveObserver(this);
317     history_service_ = nullptr;
318   }
319   cache_reader_tracker_.TryCancelAll();
320   shutdown_ = true;
321   base::FilePath path;
322   if (!GetCacheFilePath(&path))
323     return;
324   private_data_tracker_.TryCancelAll();
325 
326   if (!base::FeatureList::IsEnabled(
327           omnibox::kHistoryQuickProviderAblateInMemoryURLIndexCacheFile)) {
328     task_runner_->PostTask(
329         FROM_HERE,
330         base::BindOnce(
331             base::IgnoreResult(
332                 &URLIndexPrivateData::WritePrivateDataToCacheFileTask),
333             private_data_, path));
334   }
335 #ifndef LEAK_SANITIZER
336   // Intentionally create and then leak a scoped_refptr to private_data_. This
337   // permanently raises the reference count so that the URLIndexPrivateData
338   // destructor won't run during browser shutdown. This saves having to walk the
339   // maps to free their memory, which saves time and avoids shutdown hangs,
340   // especially if some of the memory has been paged out.
341   base::NoDestructor<scoped_refptr<URLIndexPrivateData>> leak_reference(
342       private_data_);
343 #endif
344   needs_to_be_cached_ = false;
345 }
346 
347 // Restoring from the History DB -----------------------------------------------
348 
ScheduleRebuildFromHistory()349 void InMemoryURLIndex::ScheduleRebuildFromHistory() {
350   DCHECK(history_service_);
351   history_service_->ScheduleDBTask(
352       FROM_HERE,
353       std::unique_ptr<history::HistoryDBTask>(
354           new InMemoryURLIndex::RebuildPrivateDataFromHistoryDBTask(
355               this, scheme_whitelist_)),
356       &cache_reader_tracker_);
357 }
358 
DoneRebuidingPrivateDataFromHistoryDB(bool succeeded,scoped_refptr<URLIndexPrivateData> private_data)359 void InMemoryURLIndex::DoneRebuidingPrivateDataFromHistoryDB(
360     bool succeeded,
361     scoped_refptr<URLIndexPrivateData> private_data) {
362   DCHECK(thread_checker_.CalledOnValidThread());
363   if (succeeded) {
364     private_data_tracker_.TryCancelAll();
365     private_data_ = private_data;
366     PostSaveToCacheFileTask();  // Cache the newly rebuilt index.
367   } else {
368     private_data_->Clear();  // Dump the old private data.
369     // There is no need to do anything with the cache file as it was deleted
370     // when the rebuild from the history operation was kicked off.
371   }
372   restored_ = true;
373   if (restore_cache_observer_)
374     restore_cache_observer_->OnCacheRestoreFinished(succeeded);
375 }
376 
RebuildFromHistory(history::HistoryDatabase * history_db)377 void InMemoryURLIndex::RebuildFromHistory(
378     history::HistoryDatabase* history_db) {
379   private_data_tracker_.TryCancelAll();
380   private_data_ = URLIndexPrivateData::RebuildFromHistory(history_db,
381                                                           scheme_whitelist_);
382 }
383 
384 // Saving to Cache -------------------------------------------------------------
385 
PostSaveToCacheFileTask()386 void InMemoryURLIndex::PostSaveToCacheFileTask() {
387   if (base::FeatureList::IsEnabled(
388           omnibox::kHistoryQuickProviderAblateInMemoryURLIndexCacheFile)) {
389     return;
390   }
391 
392   base::FilePath path;
393   if (!GetCacheFilePath(&path))
394     return;
395   // If there is anything in our private data then make a copy of it and tell
396   // it to save itself to a file.
397   if (private_data_.get() && !private_data_->Empty()) {
398     // Note that ownership of the copy of our private data is passed to the
399     // completion closure below.
400     scoped_refptr<URLIndexPrivateData> private_data_copy =
401         private_data_->Duplicate();
402     base::PostTaskAndReplyWithResult(
403         task_runner_.get(), FROM_HERE,
404         base::BindOnce(&URLIndexPrivateData::WritePrivateDataToCacheFileTask,
405                        private_data_copy, path),
406         base::BindOnce(&InMemoryURLIndex::OnCacheSaveDone, AsWeakPtr()));
407   } else {
408     // If there is no data in our index then delete any existing cache file.
409     task_runner_->PostTask(FROM_HERE,
410                            base::BindOnce(base::GetDeleteFileCallback(), path));
411   }
412 }
413 
OnCacheSaveDone(bool succeeded)414 void InMemoryURLIndex::OnCacheSaveDone(bool succeeded) {
415   if (save_cache_observer_)
416     save_cache_observer_->OnCacheSaveFinished(succeeded);
417 }
418