1 //* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 #include "LookupCache.h"
7 #include "HashStore.h"
8 #include "nsISeekableStream.h"
9 #include "mozilla/Telemetry.h"
10 #include "mozilla/Logging.h"
11 #include "nsNetUtil.h"
12 #include "prprf.h"
13 #include "Classifier.h"
14
15 // We act as the main entry point for all the real lookups,
16 // so note that those are not done to the actual HashStore.
17 // The latter solely exists to store the data needed to handle
18 // the updates from the protocol.
19
20 // This module provides a front for PrefixSet, mUpdateCompletions,
21 // and mGetHashCache, which together contain everything needed to
22 // provide a classification as long as the data is up to date.
23
24 // PrefixSet stores and provides lookups for 4-byte prefixes.
25 // mUpdateCompletions contains 32-byte completions which were
26 // contained in updates. They are retrieved from HashStore/.sbtore
27 // on startup.
28 // mGetHashCache contains 32-byte completions which were
29 // returned from the gethash server. They are not serialized,
30 // only cached until the next update.
31
32 // Name of the persistent PrefixSet storage
33 #define PREFIXSET_SUFFIX ".pset"
34
35 // MOZ_LOG=UrlClassifierDbService:5
36 extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
37 #define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
38 #define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
39
40 namespace mozilla {
41 namespace safebrowsing {
42
43 const int LookupCacheV2::VER = 2;
44
LookupCache(const nsACString & aTableName,const nsACString & aProvider,nsIFile * aRootStoreDir)45 LookupCache::LookupCache(const nsACString& aTableName,
46 const nsACString& aProvider,
47 nsIFile* aRootStoreDir)
48 : mPrimed(false)
49 , mTableName(aTableName)
50 , mProvider(aProvider)
51 , mRootStoreDirectory(aRootStoreDir)
52 {
53 UpdateRootDirHandle(mRootStoreDirectory);
54 }
55
56 nsresult
Open()57 LookupCache::Open()
58 {
59 LOG(("Loading PrefixSet"));
60 nsresult rv = LoadPrefixSet();
61 NS_ENSURE_SUCCESS(rv, rv);
62
63 return NS_OK;
64 }
65
66 nsresult
UpdateRootDirHandle(nsIFile * aNewRootStoreDirectory)67 LookupCache::UpdateRootDirHandle(nsIFile* aNewRootStoreDirectory)
68 {
69 nsresult rv;
70
71 if (aNewRootStoreDirectory != mRootStoreDirectory) {
72 rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
73 NS_ENSURE_SUCCESS(rv, rv);
74 }
75
76 rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory,
77 mTableName,
78 mProvider,
79 getter_AddRefs(mStoreDirectory));
80
81 if (NS_FAILED(rv)) {
82 LOG(("Failed to get private store directory for %s", mTableName.get()));
83 mStoreDirectory = mRootStoreDirectory;
84 }
85
86 if (LOG_ENABLED()) {
87 nsString path;
88 mStoreDirectory->GetPath(path);
89 LOG(("Private store directory for %s is %s", mTableName.get(),
90 NS_ConvertUTF16toUTF8(path).get()));
91 }
92
93 return rv;
94 }
95
96 nsresult
Reset()97 LookupCache::Reset()
98 {
99 LOG(("LookupCache resetting"));
100
101 nsCOMPtr<nsIFile> prefixsetFile;
102 nsresult rv = mStoreDirectory->Clone(getter_AddRefs(prefixsetFile));
103 NS_ENSURE_SUCCESS(rv, rv);
104
105 rv = prefixsetFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
106 NS_ENSURE_SUCCESS(rv, rv);
107
108 rv = prefixsetFile->Remove(false);
109 NS_ENSURE_SUCCESS(rv, rv);
110
111 ClearAll();
112
113 return NS_OK;
114 }
115
116 nsresult
AddCompletionsToCache(AddCompleteArray & aAddCompletes)117 LookupCache::AddCompletionsToCache(AddCompleteArray& aAddCompletes)
118 {
119 for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
120 if (mGetHashCache.BinaryIndexOf(aAddCompletes[i].CompleteHash()) == mGetHashCache.NoIndex) {
121 mGetHashCache.AppendElement(aAddCompletes[i].CompleteHash());
122 }
123 }
124 mGetHashCache.Sort();
125
126 return NS_OK;
127 }
128
129 #if defined(DEBUG)
130 void
DumpCache()131 LookupCache::DumpCache()
132 {
133 if (!LOG_ENABLED())
134 return;
135
136 for (uint32_t i = 0; i < mGetHashCache.Length(); i++) {
137 nsAutoCString str;
138 mGetHashCache[i].ToHexString(str);
139 LOG(("Caches: %s", str.get()));
140 }
141 }
142 #endif
143
144 nsresult
WriteFile()145 LookupCache::WriteFile()
146 {
147 if (nsUrlClassifierDBService::ShutdownHasStarted()) {
148 return NS_ERROR_ABORT;
149 }
150
151 nsCOMPtr<nsIFile> psFile;
152 nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
153 NS_ENSURE_SUCCESS(rv, rv);
154
155 rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
156 NS_ENSURE_SUCCESS(rv, rv);
157
158 rv = StoreToFile(psFile);
159 NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "failed to store the prefixset");
160
161 return NS_OK;
162 }
163
164 void
ClearAll()165 LookupCache::ClearAll()
166 {
167 ClearCache();
168 ClearPrefixes();
169 mPrimed = false;
170 }
171
172 void
ClearCache()173 LookupCache::ClearCache()
174 {
175 mGetHashCache.Clear();
176 }
177
178 /* static */ bool
IsCanonicalizedIP(const nsACString & aHost)179 LookupCache::IsCanonicalizedIP(const nsACString& aHost)
180 {
181 // The canonicalization process will have left IP addresses in dotted
182 // decimal with no surprises.
183 uint32_t i1, i2, i3, i4;
184 char c;
185 if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c",
186 &i1, &i2, &i3, &i4, &c) == 4) {
187 return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
188 }
189
190 return false;
191 }
192
193 /* static */ nsresult
GetLookupFragments(const nsACString & aSpec,nsTArray<nsCString> * aFragments)194 LookupCache::GetLookupFragments(const nsACString& aSpec,
195 nsTArray<nsCString>* aFragments)
196
197 {
198 aFragments->Clear();
199
200 nsACString::const_iterator begin, end, iter;
201 aSpec.BeginReading(begin);
202 aSpec.EndReading(end);
203
204 iter = begin;
205 if (!FindCharInReadable('/', iter, end)) {
206 return NS_OK;
207 }
208
209 const nsCSubstring& host = Substring(begin, iter++);
210 nsAutoCString path;
211 path.Assign(Substring(iter, end));
212
213 /**
214 * From the protocol doc:
215 * For the hostname, the client will try at most 5 different strings. They
216 * are:
217 * a) The exact hostname of the url
218 * b) The 4 hostnames formed by starting with the last 5 components and
219 * successivly removing the leading component. The top-level component
220 * can be skipped. This is not done if the hostname is a numerical IP.
221 */
222 nsTArray<nsCString> hosts;
223 hosts.AppendElement(host);
224
225 if (!IsCanonicalizedIP(host)) {
226 host.BeginReading(begin);
227 host.EndReading(end);
228 int numHostComponents = 0;
229 while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) &&
230 numHostComponents < MAX_HOST_COMPONENTS) {
231 // don't bother checking toplevel domains
232 if (++numHostComponents >= 2) {
233 host.EndReading(iter);
234 hosts.AppendElement(Substring(end, iter));
235 }
236 end = begin;
237 host.BeginReading(begin);
238 }
239 }
240
241 /**
242 * From the protocol doc:
243 * For the path, the client will also try at most 6 different strings.
244 * They are:
245 * a) the exact path of the url, including query parameters
246 * b) the exact path of the url, without query parameters
247 * c) the 4 paths formed by starting at the root (/) and
248 * successively appending path components, including a trailing
249 * slash. This behavior should only extend up to the next-to-last
250 * path component, that is, a trailing slash should never be
251 * appended that was not present in the original url.
252 */
253 nsTArray<nsCString> paths;
254 nsAutoCString pathToAdd;
255
256 path.BeginReading(begin);
257 path.EndReading(end);
258 iter = begin;
259 if (FindCharInReadable('?', iter, end)) {
260 pathToAdd = Substring(begin, iter);
261 paths.AppendElement(pathToAdd);
262 end = iter;
263 }
264
265 int numPathComponents = 1;
266 iter = begin;
267 while (FindCharInReadable('/', iter, end) &&
268 numPathComponents < MAX_PATH_COMPONENTS) {
269 iter++;
270 pathToAdd.Assign(Substring(begin, iter));
271 paths.AppendElement(pathToAdd);
272 numPathComponents++;
273 }
274
275 // If we haven't already done so, add the full path
276 if (!pathToAdd.Equals(path)) {
277 paths.AppendElement(path);
278 }
279 // Check an empty path (for whole-domain blacklist entries)
280 paths.AppendElement(EmptyCString());
281
282 for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
283 for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
284 nsCString key;
285 key.Assign(hosts[hostIndex]);
286 key.Append('/');
287 key.Append(paths[pathIndex]);
288 LOG(("Checking fragment %s", key.get()));
289
290 aFragments->AppendElement(key);
291 }
292 }
293
294 return NS_OK;
295 }
296
297 /* static */ nsresult
GetHostKeys(const nsACString & aSpec,nsTArray<nsCString> * aHostKeys)298 LookupCache::GetHostKeys(const nsACString& aSpec,
299 nsTArray<nsCString>* aHostKeys)
300 {
301 nsACString::const_iterator begin, end, iter;
302 aSpec.BeginReading(begin);
303 aSpec.EndReading(end);
304
305 iter = begin;
306 if (!FindCharInReadable('/', iter, end)) {
307 return NS_OK;
308 }
309
310 const nsCSubstring& host = Substring(begin, iter);
311
312 if (IsCanonicalizedIP(host)) {
313 nsCString *key = aHostKeys->AppendElement();
314 if (!key)
315 return NS_ERROR_OUT_OF_MEMORY;
316
317 key->Assign(host);
318 key->Append("/");
319 return NS_OK;
320 }
321
322 nsTArray<nsCString> hostComponents;
323 ParseString(PromiseFlatCString(host), '.', hostComponents);
324
325 if (hostComponents.Length() < 2) {
326 // no host or toplevel host, this won't match anything in the db
327 return NS_OK;
328 }
329
330 // First check with two domain components
331 int32_t last = int32_t(hostComponents.Length()) - 1;
332 nsCString *lookupHost = aHostKeys->AppendElement();
333 if (!lookupHost)
334 return NS_ERROR_OUT_OF_MEMORY;
335
336 lookupHost->Assign(hostComponents[last - 1]);
337 lookupHost->Append(".");
338 lookupHost->Append(hostComponents[last]);
339 lookupHost->Append("/");
340
341 // Now check with three domain components
342 if (hostComponents.Length() > 2) {
343 nsCString *lookupHost2 = aHostKeys->AppendElement();
344 if (!lookupHost2)
345 return NS_ERROR_OUT_OF_MEMORY;
346 lookupHost2->Assign(hostComponents[last - 2]);
347 lookupHost2->Append(".");
348 lookupHost2->Append(*lookupHost);
349 }
350
351 return NS_OK;
352 }
353
354 nsresult
LoadPrefixSet()355 LookupCache::LoadPrefixSet()
356 {
357 nsCOMPtr<nsIFile> psFile;
358 nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
359 NS_ENSURE_SUCCESS(rv, rv);
360
361 rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
362 NS_ENSURE_SUCCESS(rv, rv);
363
364 bool exists;
365 rv = psFile->Exists(&exists);
366 NS_ENSURE_SUCCESS(rv, rv);
367
368 if (exists) {
369 LOG(("stored PrefixSet exists, loading from disk"));
370 rv = LoadFromFile(psFile);
371 if (NS_FAILED(rv)) {
372 if (rv == NS_ERROR_FILE_CORRUPTED) {
373 Reset();
374 }
375 return rv;
376 }
377 mPrimed = true;
378 } else {
379 LOG(("no (usable) stored PrefixSet found"));
380 }
381
382 #ifdef DEBUG
383 if (mPrimed) {
384 uint32_t size = SizeOfPrefixSet();
385 LOG(("SB tree done, size = %d bytes\n", size));
386 }
387 #endif
388
389 return NS_OK;
390 }
391
392 nsresult
Init()393 LookupCacheV2::Init()
394 {
395 mPrefixSet = new nsUrlClassifierPrefixSet();
396 nsresult rv = mPrefixSet->Init(mTableName);
397 NS_ENSURE_SUCCESS(rv, rv);
398
399 return NS_OK;
400 }
401
402 nsresult
Open()403 LookupCacheV2::Open()
404 {
405 nsresult rv = LookupCache::Open();
406 NS_ENSURE_SUCCESS(rv, rv);
407
408 LOG(("Reading Completions"));
409 rv = ReadCompletions();
410 NS_ENSURE_SUCCESS(rv, rv);
411
412 return NS_OK;
413 }
414
415 void
ClearAll()416 LookupCacheV2::ClearAll()
417 {
418 LookupCache::ClearAll();
419 mUpdateCompletions.Clear();
420 }
421
422 nsresult
Has(const Completion & aCompletion,bool * aHas,bool * aComplete)423 LookupCacheV2::Has(const Completion& aCompletion,
424 bool* aHas, bool* aComplete)
425 {
426 *aHas = *aComplete = false;
427
428 uint32_t prefix = aCompletion.ToUint32();
429
430 bool found;
431 nsresult rv = mPrefixSet->Contains(prefix, &found);
432 NS_ENSURE_SUCCESS(rv, rv);
433
434 LOG(("Probe in %s: %X, found %d", mTableName.get(), prefix, found));
435
436 if (found) {
437 *aHas = true;
438 }
439
440 if ((mGetHashCache.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex) ||
441 (mUpdateCompletions.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex)) {
442 LOG(("Complete in %s", mTableName.get()));
443 *aComplete = true;
444 *aHas = true;
445 }
446
447 return NS_OK;
448 }
449
450 nsresult
Build(AddPrefixArray & aAddPrefixes,AddCompleteArray & aAddCompletes)451 LookupCacheV2::Build(AddPrefixArray& aAddPrefixes,
452 AddCompleteArray& aAddCompletes)
453 {
454 Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_COMPLETIONS,
455 static_cast<uint32_t>(aAddCompletes.Length()));
456
457 mUpdateCompletions.Clear();
458 mUpdateCompletions.SetCapacity(aAddCompletes.Length());
459 for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
460 mUpdateCompletions.AppendElement(aAddCompletes[i].CompleteHash());
461 }
462 aAddCompletes.Clear();
463 mUpdateCompletions.Sort();
464
465 Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_PREFIXES,
466 static_cast<uint32_t>(aAddPrefixes.Length()));
467
468 nsresult rv = ConstructPrefixSet(aAddPrefixes);
469 NS_ENSURE_SUCCESS(rv, rv);
470 mPrimed = true;
471
472 return NS_OK;
473 }
474
475 nsresult
GetPrefixes(FallibleTArray<uint32_t> & aAddPrefixes)476 LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes)
477 {
478 if (!mPrimed) {
479 // This can happen if its a new table, so no error.
480 LOG(("GetPrefixes from empty LookupCache"));
481 return NS_OK;
482 }
483 return mPrefixSet->GetPrefixesNative(aAddPrefixes);
484 }
485
486 nsresult
ReadCompletions()487 LookupCacheV2::ReadCompletions()
488 {
489 HashStore store(mTableName, mProvider, mRootStoreDirectory);
490
491 nsresult rv = store.Open();
492 NS_ENSURE_SUCCESS(rv, rv);
493
494 mUpdateCompletions.Clear();
495
496 const AddCompleteArray& addComplete = store.AddCompletes();
497 for (uint32_t i = 0; i < addComplete.Length(); i++) {
498 mUpdateCompletions.AppendElement(addComplete[i].complete);
499 }
500
501 return NS_OK;
502 }
503
504 nsresult
ClearPrefixes()505 LookupCacheV2::ClearPrefixes()
506 {
507 return mPrefixSet->SetPrefixes(nullptr, 0);
508 }
509
510 nsresult
StoreToFile(nsIFile * aFile)511 LookupCacheV2::StoreToFile(nsIFile* aFile)
512 {
513 return mPrefixSet->StoreToFile(aFile);
514 }
515
516 nsresult
LoadFromFile(nsIFile * aFile)517 LookupCacheV2::LoadFromFile(nsIFile* aFile)
518 {
519 return mPrefixSet->LoadFromFile(aFile);
520 }
521
522 size_t
SizeOfPrefixSet()523 LookupCacheV2::SizeOfPrefixSet()
524 {
525 return mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
526 }
527
528 #ifdef DEBUG
529 template <class T>
EnsureSorted(T * aArray)530 static void EnsureSorted(T* aArray)
531 {
532 typename T::elem_type* start = aArray->Elements();
533 typename T::elem_type* end = aArray->Elements() + aArray->Length();
534 typename T::elem_type* iter = start;
535 typename T::elem_type* previous = start;
536
537 while (iter != end) {
538 previous = iter;
539 ++iter;
540 if (iter != end) {
541 MOZ_ASSERT(*previous <= *iter);
542 }
543 }
544 return;
545 }
546 #endif
547
548 nsresult
ConstructPrefixSet(AddPrefixArray & aAddPrefixes)549 LookupCacheV2::ConstructPrefixSet(AddPrefixArray& aAddPrefixes)
550 {
551 Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_CONSTRUCT_TIME> timer;
552
553 nsTArray<uint32_t> array;
554 if (!array.SetCapacity(aAddPrefixes.Length(), fallible)) {
555 return NS_ERROR_OUT_OF_MEMORY;
556 }
557
558 for (uint32_t i = 0; i < aAddPrefixes.Length(); i++) {
559 array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32());
560 }
561 aAddPrefixes.Clear();
562
563 #ifdef DEBUG
564 // PrefixSet requires sorted order
565 EnsureSorted(&array);
566 #endif
567
568 // construct new one, replace old entries
569 nsresult rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length());
570 NS_ENSURE_SUCCESS(rv, rv);
571
572 #ifdef DEBUG
573 uint32_t size;
574 size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
575 LOG(("SB tree done, size = %d bytes\n", size));
576 #endif
577
578 mPrimed = true;
579
580 return NS_OK;
581 }
582
583 #if defined(DEBUG)
584 void
DumpCompletions()585 LookupCacheV2::DumpCompletions()
586 {
587 if (!LOG_ENABLED())
588 return;
589
590 for (uint32_t i = 0; i < mUpdateCompletions.Length(); i++) {
591 nsAutoCString str;
592 mUpdateCompletions[i].ToHexString(str);
593 LOG(("Update: %s", str.get()));
594 }
595 }
596 #endif
597
598 } // namespace safebrowsing
599 } // namespace mozilla
600