1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/omnibox/browser/in_memory_url_index.h"
6
7 #include <cinttypes>
8 #include <memory>
9
10 #include "base/bind.h"
11 #include "base/feature_list.h"
12 #include "base/files/file_util.h"
13 #include "base/no_destructor.h"
14 #include "base/strings/stringprintf.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/task/post_task.h"
17 #include "base/task/thread_pool.h"
18 #include "base/task_runner_util.h"
19 #include "base/threading/thread_task_runner_handle.h"
20 #include "base/trace_event/memory_dump_manager.h"
21 #include "base/trace_event/memory_usage_estimator.h"
22 #include "base/trace_event/trace_event.h"
23 #include "components/history/core/browser/history_service.h"
24 #include "components/history/core/browser/url_database.h"
25 #include "components/omnibox/browser/url_index_private_data.h"
26 #include "components/omnibox/common/omnibox_features.h"
27
28 using in_memory_url_index::InMemoryURLIndexCacheItem;
29
30 // Initializes a whitelist of URL schemes.
InitializeSchemeWhitelist(SchemeSet * whitelist,const SchemeSet & client_schemes_to_whitelist)31 void InitializeSchemeWhitelist(
32 SchemeSet* whitelist,
33 const SchemeSet& client_schemes_to_whitelist) {
34 DCHECK(whitelist);
35 if (!whitelist->empty())
36 return; // Nothing to do, already initialized.
37
38 whitelist->insert(client_schemes_to_whitelist.begin(),
39 client_schemes_to_whitelist.end());
40
41 whitelist->insert(std::string(url::kAboutScheme));
42 whitelist->insert(std::string(url::kFileScheme));
43 whitelist->insert(std::string(url::kFtpScheme));
44 whitelist->insert(std::string(url::kHttpScheme));
45 whitelist->insert(std::string(url::kHttpsScheme));
46 whitelist->insert(std::string(url::kMailToScheme));
47 }
48
49 // Restore/SaveCacheObserver ---------------------------------------------------
50
~RestoreCacheObserver()51 InMemoryURLIndex::RestoreCacheObserver::~RestoreCacheObserver() {
52 }
53
~SaveCacheObserver()54 InMemoryURLIndex::SaveCacheObserver::~SaveCacheObserver() {
55 }
56
57 // RebuildPrivateDataFromHistoryDBTask -----------------------------------------
58
59 InMemoryURLIndex::RebuildPrivateDataFromHistoryDBTask::
RebuildPrivateDataFromHistoryDBTask(InMemoryURLIndex * index,const SchemeSet & scheme_whitelist)60 RebuildPrivateDataFromHistoryDBTask(
61 InMemoryURLIndex* index,
62 const SchemeSet& scheme_whitelist)
63 : index_(index),
64 scheme_whitelist_(scheme_whitelist),
65 succeeded_(false) {
66 }
67
RunOnDBThread(history::HistoryBackend * backend,history::HistoryDatabase * db)68 bool InMemoryURLIndex::RebuildPrivateDataFromHistoryDBTask::RunOnDBThread(
69 history::HistoryBackend* backend,
70 history::HistoryDatabase* db) {
71 data_ = URLIndexPrivateData::RebuildFromHistory(db, scheme_whitelist_);
72 succeeded_ = data_.get() && !data_->Empty();
73 if (!succeeded_ && data_.get())
74 data_->Clear();
75 return true;
76 }
77
78 void InMemoryURLIndex::RebuildPrivateDataFromHistoryDBTask::
DoneRunOnMainThread()79 DoneRunOnMainThread() {
80 index_->DoneRebuidingPrivateDataFromHistoryDB(succeeded_, data_);
81 }
82
83 InMemoryURLIndex::RebuildPrivateDataFromHistoryDBTask::
~RebuildPrivateDataFromHistoryDBTask()84 ~RebuildPrivateDataFromHistoryDBTask() {
85 }
86
87 // InMemoryURLIndex ------------------------------------------------------------
88
InMemoryURLIndex(bookmarks::BookmarkModel * bookmark_model,history::HistoryService * history_service,TemplateURLService * template_url_service,const base::FilePath & history_dir,const SchemeSet & client_schemes_to_whitelist)89 InMemoryURLIndex::InMemoryURLIndex(bookmarks::BookmarkModel* bookmark_model,
90 history::HistoryService* history_service,
91 TemplateURLService* template_url_service,
92 const base::FilePath& history_dir,
93 const SchemeSet& client_schemes_to_whitelist)
94 : bookmark_model_(bookmark_model),
95 history_service_(history_service),
96 template_url_service_(template_url_service),
97 history_dir_(history_dir),
98 private_data_(new URLIndexPrivateData),
99 restore_cache_observer_(nullptr),
100 save_cache_observer_(nullptr),
101 task_runner_(base::ThreadPool::CreateSequencedTaskRunner(
102 {base::MayBlock(), base::TaskPriority::BEST_EFFORT})),
103 shutdown_(false),
104 restored_(false),
105 needs_to_be_cached_(false),
106 listen_to_history_service_loaded_(false) {
107 InitializeSchemeWhitelist(&scheme_whitelist_, client_schemes_to_whitelist);
108 // TODO(mrossetti): Register for language change notifications.
109 if (history_service_)
110 history_service_->AddObserver(this);
111
112 base::trace_event::MemoryDumpManager::GetInstance()->RegisterDumpProvider(
113 this, "InMemoryURLIndex", base::ThreadTaskRunnerHandle::Get());
114 }
115
~InMemoryURLIndex()116 InMemoryURLIndex::~InMemoryURLIndex() {
117 base::trace_event::MemoryDumpManager::GetInstance()->UnregisterDumpProvider(
118 this);
119
120 // If there was a history directory (which there won't be for some unit tests)
121 // then insure that the cache has already been saved.
122 DCHECK(history_dir_.empty() || !needs_to_be_cached_);
123 DCHECK(!history_service_);
124 DCHECK(shutdown_);
125 }
126
Init()127 void InMemoryURLIndex::Init() {
128 PostRestoreFromCacheFileTask();
129 }
130
ClearPrivateData()131 void InMemoryURLIndex::ClearPrivateData() {
132 private_data_->Clear();
133 }
134
GetCacheFilePath(base::FilePath * file_path)135 bool InMemoryURLIndex::GetCacheFilePath(base::FilePath* file_path) {
136 if (history_dir_.empty())
137 return false;
138 *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache"));
139 return true;
140 }
141
142 // Querying --------------------------------------------------------------------
143
HistoryItemsForTerms(const base::string16 & term_string,size_t cursor_position,size_t max_matches)144 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
145 const base::string16& term_string,
146 size_t cursor_position,
147 size_t max_matches) {
148 return private_data_->HistoryItemsForTerms(
149 term_string, cursor_position, max_matches, bookmark_model_,
150 template_url_service_);
151 }
152
153 // Updating --------------------------------------------------------------------
154
DeleteURL(const GURL & url)155 void InMemoryURLIndex::DeleteURL(const GURL& url) {
156 private_data_->DeleteURL(url);
157 }
158
OnURLVisited(history::HistoryService * history_service,ui::PageTransition transition,const history::URLRow & row,const history::RedirectList & redirects,base::Time visit_time)159 void InMemoryURLIndex::OnURLVisited(history::HistoryService* history_service,
160 ui::PageTransition transition,
161 const history::URLRow& row,
162 const history::RedirectList& redirects,
163 base::Time visit_time) {
164 DCHECK_EQ(history_service_, history_service);
165 // If |row| is not known to URLIndexPrivateData and the row is significant,
166 // URLIndexPrivateData will index it. When excluding visits from cct, the row
167 // may be significant, but not indexed. UpdateURL() does not have the full
168 // context to know it should not index the row (it lacks visits). If |row|
169 // has not been indexed, and the visit is from cct, we know it should not be
170 // indexed and should not call to UpdateURL().
171 if (!private_data_->IsUrlRowIndexed(row) &&
172 URLIndexPrivateData::ShouldExcludeBecauseOfCctTransition(transition)) {
173 return;
174 }
175 needs_to_be_cached_ |= private_data_->UpdateURL(
176 history_service_, row, scheme_whitelist_, &private_data_tracker_);
177 }
178
OnURLsModified(history::HistoryService * history_service,const history::URLRows & changed_urls,history::UrlsModifiedReason reason)179 void InMemoryURLIndex::OnURLsModified(history::HistoryService* history_service,
180 const history::URLRows& changed_urls,
181 history::UrlsModifiedReason reason) {
182 DCHECK_EQ(history_service_, history_service);
183 for (const auto& row : changed_urls) {
184 // When hiding visits from cct, don't add the entry just because the title
185 // changed. In other words, |row| may qualify (RowQualifiesAsSignificant),
186 // but not be indexed because all visits where excluded. In this case, the
187 // row won't be indexed and we shouldn't add just because the title
188 // changed.
189 if (base::FeatureList::IsEnabled(omnibox::kHideVisitsFromCct) &&
190 !private_data_->IsUrlRowIndexed(row) &&
191 reason == history::UrlsModifiedReason::kTitleChanged) {
192 continue;
193 }
194 needs_to_be_cached_ |= private_data_->UpdateURL(
195 history_service_, row, scheme_whitelist_, &private_data_tracker_);
196 }
197 }
198
OnURLsDeleted(history::HistoryService * history_service,const history::DeletionInfo & deletion_info)199 void InMemoryURLIndex::OnURLsDeleted(
200 history::HistoryService* history_service,
201 const history::DeletionInfo& deletion_info) {
202 if (deletion_info.IsAllHistory()) {
203 ClearPrivateData();
204 needs_to_be_cached_ = true;
205 } else {
206 for (const auto& row : deletion_info.deleted_rows())
207 needs_to_be_cached_ |= private_data_->DeleteURL(row.url());
208 }
209 // If we made changes, destroy the previous cache. Otherwise, if we go
210 // through an unclean shutdown (and therefore fail to write a new cache file),
211 // when Chrome restarts and we restore from the previous cache, we'll end up
212 // searching over URLs that may be deleted. This would be wrong, and
213 // surprising to the user who bothered to delete some URLs from their
214 // history. In this situation, deleting the cache is a better solution than
215 // writing a new cache (after deleting the URLs from the in-memory structure)
216 // because deleting the cache forces it to be rebuilt from history upon
217 // startup. If we instead write a new, updated cache then at the time of next
218 // startup (after an unclean shutdown) we will not rebuild the in-memory data
219 // structures from history but rather use the cache. This solution is
220 // mediocre because this cache may not have the most-recently-visited URLs
221 // in it (URLs visited after user deleted some URLs from history), which
222 // would be odd and confusing. It's better to force a rebuild.
223 base::FilePath path;
224 if (needs_to_be_cached_ && GetCacheFilePath(&path))
225 task_runner_->PostTask(FROM_HERE,
226 base::BindOnce(base::GetDeleteFileCallback(), path));
227 }
228
OnHistoryServiceLoaded(history::HistoryService * history_service)229 void InMemoryURLIndex::OnHistoryServiceLoaded(
230 history::HistoryService* history_service) {
231 if (listen_to_history_service_loaded_)
232 ScheduleRebuildFromHistory();
233 listen_to_history_service_loaded_ = false;
234 }
235
OnMemoryDump(const base::trace_event::MemoryDumpArgs & args,base::trace_event::ProcessMemoryDump * process_memory_dump)236 bool InMemoryURLIndex::OnMemoryDump(
237 const base::trace_event::MemoryDumpArgs& args,
238 base::trace_event::ProcessMemoryDump* process_memory_dump) {
239 size_t res = 0;
240
241 res += base::trace_event::EstimateMemoryUsage(scheme_whitelist_);
242
243 // TODO(dyaroshev): Add support for scoped_refptr in
244 // base::trace_event::EstimateMemoryUsage.
245 res += sizeof(URLIndexPrivateData) + private_data_->EstimateMemoryUsage();
246
247 const std::string dump_name =
248 base::StringPrintf("omnibox/in_memory_url_index/0x%" PRIXPTR,
249 reinterpret_cast<uintptr_t>(this));
250 auto* dump = process_memory_dump->CreateAllocatorDump(dump_name);
251 dump->AddScalar(base::trace_event::MemoryAllocatorDump::kNameSize,
252 base::trace_event::MemoryAllocatorDump::kUnitsBytes, res);
253
254 // TODO(https://crbug.com/1068883): Remove this code when the bug is fixed.
255 private_data_->OnMemoryAllocatorDump(dump);
256
257 return true;
258 }
259
260 // Restoring from Cache --------------------------------------------------------
261
PostRestoreFromCacheFileTask()262 void InMemoryURLIndex::PostRestoreFromCacheFileTask() {
263 DCHECK(thread_checker_.CalledOnValidThread());
264 TRACE_EVENT0("browser", "InMemoryURLIndex::PostRestoreFromCacheFileTask");
265
266 if (base::FeatureList::IsEnabled(
267 omnibox::kHistoryQuickProviderAblateInMemoryURLIndexCacheFile)) {
268 // To short circuit the cache, pretend we've failed to load it.
269 OnCacheLoadDone(nullptr);
270 return;
271 }
272
273 base::FilePath path;
274 if (!GetCacheFilePath(&path) || shutdown_) {
275 restored_ = true;
276 if (restore_cache_observer_)
277 restore_cache_observer_->OnCacheRestoreFinished(false);
278 return;
279 }
280
281 base::PostTaskAndReplyWithResult(
282 task_runner_.get(), FROM_HERE,
283 base::BindOnce(&URLIndexPrivateData::RestoreFromFile, path),
284 base::BindOnce(&InMemoryURLIndex::OnCacheLoadDone, AsWeakPtr()));
285 }
286
OnCacheLoadDone(scoped_refptr<URLIndexPrivateData> private_data)287 void InMemoryURLIndex::OnCacheLoadDone(
288 scoped_refptr<URLIndexPrivateData> private_data) {
289 if (private_data.get() && !private_data->Empty()) {
290 private_data_tracker_.TryCancelAll();
291 private_data_ = private_data;
292 restored_ = true;
293 if (restore_cache_observer_)
294 restore_cache_observer_->OnCacheRestoreFinished(true);
295 } else if (history_service_) {
296 // When unable to restore from the cache file delete the cache file, if
297 // it exists, and then rebuild from the history database if it's available,
298 // otherwise wait until the history database loaded and then rebuild.
299 base::FilePath path;
300 if (!GetCacheFilePath(&path) || shutdown_)
301 return;
302 task_runner_->PostTask(FROM_HERE,
303 base::BindOnce(base::GetDeleteFileCallback(), path));
304 if (history_service_->backend_loaded()) {
305 ScheduleRebuildFromHistory();
306 } else {
307 listen_to_history_service_loaded_ = true;
308 }
309 }
310 }
311
312 // Cleanup ---------------------------------------------------------------------
313
Shutdown()314 void InMemoryURLIndex::Shutdown() {
315 if (history_service_) {
316 history_service_->RemoveObserver(this);
317 history_service_ = nullptr;
318 }
319 cache_reader_tracker_.TryCancelAll();
320 shutdown_ = true;
321 base::FilePath path;
322 if (!GetCacheFilePath(&path))
323 return;
324 private_data_tracker_.TryCancelAll();
325
326 if (!base::FeatureList::IsEnabled(
327 omnibox::kHistoryQuickProviderAblateInMemoryURLIndexCacheFile)) {
328 task_runner_->PostTask(
329 FROM_HERE,
330 base::BindOnce(
331 base::IgnoreResult(
332 &URLIndexPrivateData::WritePrivateDataToCacheFileTask),
333 private_data_, path));
334 }
335 #ifndef LEAK_SANITIZER
336 // Intentionally create and then leak a scoped_refptr to private_data_. This
337 // permanently raises the reference count so that the URLIndexPrivateData
338 // destructor won't run during browser shutdown. This saves having to walk the
339 // maps to free their memory, which saves time and avoids shutdown hangs,
340 // especially if some of the memory has been paged out.
341 base::NoDestructor<scoped_refptr<URLIndexPrivateData>> leak_reference(
342 private_data_);
343 #endif
344 needs_to_be_cached_ = false;
345 }
346
347 // Restoring from the History DB -----------------------------------------------
348
ScheduleRebuildFromHistory()349 void InMemoryURLIndex::ScheduleRebuildFromHistory() {
350 DCHECK(history_service_);
351 history_service_->ScheduleDBTask(
352 FROM_HERE,
353 std::unique_ptr<history::HistoryDBTask>(
354 new InMemoryURLIndex::RebuildPrivateDataFromHistoryDBTask(
355 this, scheme_whitelist_)),
356 &cache_reader_tracker_);
357 }
358
DoneRebuidingPrivateDataFromHistoryDB(bool succeeded,scoped_refptr<URLIndexPrivateData> private_data)359 void InMemoryURLIndex::DoneRebuidingPrivateDataFromHistoryDB(
360 bool succeeded,
361 scoped_refptr<URLIndexPrivateData> private_data) {
362 DCHECK(thread_checker_.CalledOnValidThread());
363 if (succeeded) {
364 private_data_tracker_.TryCancelAll();
365 private_data_ = private_data;
366 PostSaveToCacheFileTask(); // Cache the newly rebuilt index.
367 } else {
368 private_data_->Clear(); // Dump the old private data.
369 // There is no need to do anything with the cache file as it was deleted
370 // when the rebuild from the history operation was kicked off.
371 }
372 restored_ = true;
373 if (restore_cache_observer_)
374 restore_cache_observer_->OnCacheRestoreFinished(succeeded);
375 }
376
RebuildFromHistory(history::HistoryDatabase * history_db)377 void InMemoryURLIndex::RebuildFromHistory(
378 history::HistoryDatabase* history_db) {
379 private_data_tracker_.TryCancelAll();
380 private_data_ = URLIndexPrivateData::RebuildFromHistory(history_db,
381 scheme_whitelist_);
382 }
383
384 // Saving to Cache -------------------------------------------------------------
385
PostSaveToCacheFileTask()386 void InMemoryURLIndex::PostSaveToCacheFileTask() {
387 if (base::FeatureList::IsEnabled(
388 omnibox::kHistoryQuickProviderAblateInMemoryURLIndexCacheFile)) {
389 return;
390 }
391
392 base::FilePath path;
393 if (!GetCacheFilePath(&path))
394 return;
395 // If there is anything in our private data then make a copy of it and tell
396 // it to save itself to a file.
397 if (private_data_.get() && !private_data_->Empty()) {
398 // Note that ownership of the copy of our private data is passed to the
399 // completion closure below.
400 scoped_refptr<URLIndexPrivateData> private_data_copy =
401 private_data_->Duplicate();
402 base::PostTaskAndReplyWithResult(
403 task_runner_.get(), FROM_HERE,
404 base::BindOnce(&URLIndexPrivateData::WritePrivateDataToCacheFileTask,
405 private_data_copy, path),
406 base::BindOnce(&InMemoryURLIndex::OnCacheSaveDone, AsWeakPtr()));
407 } else {
408 // If there is no data in our index then delete any existing cache file.
409 task_runner_->PostTask(FROM_HERE,
410 base::BindOnce(base::GetDeleteFileCallback(), path));
411 }
412 }
413
OnCacheSaveDone(bool succeeded)414 void InMemoryURLIndex::OnCacheSaveDone(bool succeeded) {
415 if (save_cache_observer_)
416 save_cache_observer_->OnCacheSaveFinished(succeeded);
417 }
418