1 //* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 #include "LookupCache.h"
7 #include "HashStore.h"
8 #include "nsIFileStreams.h"
9 #include "nsISeekableStream.h"
10 #include "mozilla/ArrayUtils.h"
11 #include "mozilla/Telemetry.h"
12 #include "mozilla/Logging.h"
13 #include "nsNetUtil.h"
14 #include "nsCheckSummedOutputStream.h"
15 #include "crc32c.h"
16 #include "prprf.h"
17 #include "Classifier.h"
18 #include "nsUrlClassifierInfo.h"
19
20 // We act as the main entry point for all the real lookups,
21 // so note that those are not done to the actual HashStore.
22 // The latter solely exists to store the data needed to handle
23 // the updates from the protocol.
24
25 // This module provides a front for PrefixSet, mUpdateCompletions,
26 // and mGetHashCache, which together contain everything needed to
27 // provide a classification as long as the data is up to date.
28
29 // PrefixSet stores and provides lookups for 4-byte prefixes.
30 // mUpdateCompletions contains 32-byte completions which were
31 // contained in updates. They are retrieved from HashStore/.sbtore
32 // on startup.
33 // mGetHashCache contains 32-byte completions which were
34 // returned from the gethash server. They are not serialized,
35 // only cached until the next update.
36
37 // MOZ_LOG=UrlClassifierDbService:5
38 extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
39 #define LOG(args) \
40 MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
41 #define LOG_ENABLED() \
42 MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
43
44 namespace mozilla {
45 namespace safebrowsing {
46
47 const uint32_t LookupCache::MAX_BUFFER_SIZE = 64 * 1024;
48
49 const int CacheResultV2::VER = CacheResult::V2;
50 const int CacheResultV4::VER = CacheResult::V4;
51
52 const int LookupCacheV2::VER = 2;
53 const uint32_t LookupCacheV2::VLPSET_MAGIC = 0xe5b862e7;
54 const uint32_t LookupCacheV2::VLPSET_VERSION = 1;
55
56 namespace {
57
58 //////////////////////////////////////////////////////////////////////////
59 // A set of lightweight functions for reading/writing value from/to file.
60 template <typename T>
61 struct ValueTraits {
62 static_assert(sizeof(T) <= LookupCacheV4::MAX_METADATA_VALUE_LENGTH,
63 "LookupCacheV4::MAX_METADATA_VALUE_LENGTH is too small.");
Lengthmozilla::safebrowsing::__anon56b1ea280111::ValueTraits64 static uint32_t Length(const T& aValue) { return sizeof(T); }
WritePtrmozilla::safebrowsing::__anon56b1ea280111::ValueTraits65 static char* WritePtr(T& aValue, uint32_t aLength) { return (char*)&aValue; }
ReadPtrmozilla::safebrowsing::__anon56b1ea280111::ValueTraits66 static const char* ReadPtr(const T& aValue) { return (char*)&aValue; }
IsFixedLengthmozilla::safebrowsing::__anon56b1ea280111::ValueTraits67 static bool IsFixedLength() { return true; }
68 };
69
70 template <>
71 struct ValueTraits<nsACString> {
IsFixedLengthmozilla::safebrowsing::__anon56b1ea280111::ValueTraits72 static bool IsFixedLength() { return false; }
73
Lengthmozilla::safebrowsing::__anon56b1ea280111::ValueTraits74 static uint32_t Length(const nsACString& aValue) { return aValue.Length(); }
75
WritePtrmozilla::safebrowsing::__anon56b1ea280111::ValueTraits76 static char* WritePtr(nsACString& aValue, uint32_t aLength) {
77 aValue.SetLength(aLength);
78 return aValue.BeginWriting();
79 }
80
ReadPtrmozilla::safebrowsing::__anon56b1ea280111::ValueTraits81 static const char* ReadPtr(const nsACString& aValue) {
82 return aValue.BeginReading();
83 }
84 };
85
86 template <typename T>
WriteValue(nsIOutputStream * aOutputStream,const T & aValue)87 static nsresult WriteValue(nsIOutputStream* aOutputStream, const T& aValue) {
88 uint32_t writeLength = ValueTraits<T>::Length(aValue);
89 MOZ_ASSERT(writeLength <= LookupCacheV4::MAX_METADATA_VALUE_LENGTH,
90 "LookupCacheV4::MAX_METADATA_VALUE_LENGTH is too small.");
91 if (!ValueTraits<T>::IsFixedLength()) {
92 // We need to write out the variable value length.
93 nsresult rv = WriteValue(aOutputStream, writeLength);
94 NS_ENSURE_SUCCESS(rv, rv);
95 }
96
97 // Write out the value.
98 auto valueReadPtr = ValueTraits<T>::ReadPtr(aValue);
99 uint32_t written;
100 nsresult rv = aOutputStream->Write(valueReadPtr, writeLength, &written);
101 NS_ENSURE_SUCCESS(rv, rv);
102 if (NS_WARN_IF(written != writeLength)) {
103 return NS_ERROR_FAILURE;
104 }
105
106 return rv;
107 }
108
109 template <typename T>
ReadValue(nsIInputStream * aInputStream,T & aValue)110 static nsresult ReadValue(nsIInputStream* aInputStream, T& aValue) {
111 nsresult rv;
112
113 uint32_t readLength;
114 if (ValueTraits<T>::IsFixedLength()) {
115 readLength = ValueTraits<T>::Length(aValue);
116 } else {
117 // Read the variable value length from file.
118 nsresult rv = ReadValue(aInputStream, readLength);
119 NS_ENSURE_SUCCESS(rv, rv);
120 }
121
122 // Sanity-check the readLength in case of disk corruption
123 // (see bug 1433636).
124 if (readLength > LookupCacheV4::MAX_METADATA_VALUE_LENGTH) {
125 return NS_ERROR_FILE_CORRUPTED;
126 }
127
128 // Read the value.
129 uint32_t read;
130 auto valueWritePtr = ValueTraits<T>::WritePtr(aValue, readLength);
131 rv = aInputStream->Read(valueWritePtr, readLength, &read);
132 if (NS_FAILED(rv) || read != readLength) {
133 LOG(("Failed to read the value."));
134 return NS_FAILED(rv) ? rv : NS_ERROR_FAILURE;
135 }
136
137 return rv;
138 }
139
CStringToHexString(const nsACString & aIn,nsACString & aOut)140 void CStringToHexString(const nsACString& aIn, nsACString& aOut) {
141 static const char* const lut = "0123456789ABCDEF";
142
143 size_t len = aIn.Length();
144 MOZ_ASSERT(len <= COMPLETE_SIZE);
145
146 aOut.SetCapacity(2 * len);
147 for (size_t i = 0; i < aIn.Length(); ++i) {
148 const char c = static_cast<char>(aIn[i]);
149 aOut.Append(lut[(c >> 4) & 0x0F]);
150 aOut.Append(lut[c & 15]);
151 }
152 }
153
154 #ifdef DEBUG
GetFormattedTimeString(int64_t aCurTimeSec)155 nsCString GetFormattedTimeString(int64_t aCurTimeSec) {
156 PRExplodedTime pret;
157 PR_ExplodeTime(aCurTimeSec * PR_USEC_PER_SEC, PR_GMTParameters, &pret);
158
159 return nsPrintfCString("%04d-%02d-%02d %02d:%02d:%02d UTC", pret.tm_year,
160 pret.tm_month + 1, pret.tm_mday, pret.tm_hour,
161 pret.tm_min, pret.tm_sec);
162 }
163 #endif
164
165 } // end of unnamed namespace.
166 ////////////////////////////////////////////////////////////////////////
167
LookupCache(const nsACString & aTableName,const nsACString & aProvider,nsCOMPtr<nsIFile> & aRootStoreDir)168 LookupCache::LookupCache(const nsACString& aTableName,
169 const nsACString& aProvider,
170 nsCOMPtr<nsIFile>& aRootStoreDir)
171 : mPrimed(false),
172 mTableName(aTableName),
173 mProvider(aProvider),
174 mRootStoreDirectory(aRootStoreDir) {
175 UpdateRootDirHandle(mRootStoreDirectory);
176 }
177
Open()178 nsresult LookupCache::Open() {
179 LOG(("Loading PrefixSet for %s", mTableName.get()));
180 nsresult rv;
181 if (nsUrlClassifierUtils::IsMozTestTable(mTableName)) {
182 // For built-in test table, we don't load it from disk,
183 // test entries are directly added in memory.
184 rv = LoadMozEntries();
185 } else {
186 rv = LoadPrefixSet();
187 }
188
189 Unused << NS_WARN_IF(NS_FAILED(rv));
190
191 return rv;
192 }
193
Init()194 nsresult LookupCache::Init() {
195 mVLPrefixSet = new VariableLengthPrefixSet();
196 nsresult rv = mVLPrefixSet->Init(mTableName);
197 NS_ENSURE_SUCCESS(rv, rv);
198
199 return NS_OK;
200 }
201
UpdateRootDirHandle(nsCOMPtr<nsIFile> & aNewRootStoreDirectory)202 nsresult LookupCache::UpdateRootDirHandle(
203 nsCOMPtr<nsIFile>& aNewRootStoreDirectory) {
204 nsresult rv;
205
206 if (aNewRootStoreDirectory != mRootStoreDirectory) {
207 rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
208 NS_ENSURE_SUCCESS(rv, rv);
209 }
210
211 rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory, mTableName,
212 mProvider,
213 getter_AddRefs(mStoreDirectory));
214
215 if (NS_FAILED(rv)) {
216 LOG(("Failed to get private store directory for %s", mTableName.get()));
217 mStoreDirectory = mRootStoreDirectory;
218 }
219
220 if (LOG_ENABLED()) {
221 nsString path;
222 mStoreDirectory->GetPath(path);
223 LOG(("Private store directory for %s is %s", mTableName.get(),
224 NS_ConvertUTF16toUTF8(path).get()));
225 }
226
227 return rv;
228 }
229
WriteFile()230 nsresult LookupCache::WriteFile() {
231 if (nsUrlClassifierDBService::ShutdownHasStarted()) {
232 return NS_ERROR_ABORT;
233 }
234
235 nsCOMPtr<nsIFile> psFile;
236 nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
237 if (NS_WARN_IF(NS_FAILED(rv))) {
238 return rv;
239 }
240
241 rv = psFile->AppendNative(mTableName + GetPrefixSetSuffix());
242 if (NS_WARN_IF(NS_FAILED(rv))) {
243 return rv;
244 }
245
246 rv = StoreToFile(psFile);
247 if (NS_WARN_IF(NS_FAILED(rv))) {
248 LOG(("Failed to store the prefixset for table %s", mTableName.get()));
249 return rv;
250 }
251
252 return NS_OK;
253 }
254
CheckCache(const Completion & aCompletion,bool * aHas,bool * aConfirmed)255 nsresult LookupCache::CheckCache(const Completion& aCompletion, bool* aHas,
256 bool* aConfirmed) {
257 // Shouldn't call this function if prefix is not in the database.
258 MOZ_ASSERT(*aHas);
259
260 *aConfirmed = false;
261
262 uint32_t prefix = aCompletion.ToUint32();
263
264 CachedFullHashResponse* fullHashResponse = mFullHashCache.Get(prefix);
265 if (!fullHashResponse) {
266 return NS_OK;
267 }
268
269 int64_t nowSec = PR_Now() / PR_USEC_PER_SEC;
270 int64_t expiryTimeSec;
271
272 FullHashExpiryCache& fullHashes = fullHashResponse->fullHashes;
273 nsDependentCSubstring completion(
274 reinterpret_cast<const char*>(aCompletion.buf), COMPLETE_SIZE);
275
276 // Check if we can find the fullhash in positive cache
277 if (fullHashes.Get(completion, &expiryTimeSec)) {
278 if (nowSec <= expiryTimeSec) {
279 // Url is NOT safe.
280 *aConfirmed = true;
281 LOG(("Found a valid fullhash in the positive cache"));
282 } else {
283 // Trigger a gethash request in this case(aConfirmed is false).
284 LOG(("Found an expired fullhash in the positive cache"));
285
286 // Remove fullhash entry from the cache when the negative cache
287 // is also expired because whether or not the fullhash is cached
288 // locally, we will need to consult the server next time we
289 // lookup this hash. We may as well remove it from our cache.
290 if (fullHashResponse->negativeCacheExpirySec < expiryTimeSec) {
291 fullHashes.Remove(completion);
292 if (fullHashes.Count() == 0 &&
293 fullHashResponse->negativeCacheExpirySec < nowSec) {
294 mFullHashCache.Remove(prefix);
295 }
296 }
297 }
298 return NS_OK;
299 }
300
301 // Check negative cache.
302 if (fullHashResponse->negativeCacheExpirySec >= nowSec) {
303 // Url is safe.
304 LOG(("Found a valid prefix in the negative cache"));
305 *aHas = false;
306 } else {
307 LOG(("Found an expired prefix in the negative cache"));
308 if (fullHashes.Count() == 0) {
309 mFullHashCache.Remove(prefix);
310 }
311 }
312
313 return NS_OK;
314 }
315
316 // This function remove cache entries whose negative cache time is expired.
317 // It is possible that a cache entry whose positive cache time is not yet
318 // expired but still being removed after calling this API. Right now we call
319 // this on every update.
InvalidateExpiredCacheEntries()320 void LookupCache::InvalidateExpiredCacheEntries() {
321 int64_t nowSec = PR_Now() / PR_USEC_PER_SEC;
322
323 for (auto iter = mFullHashCache.Iter(); !iter.Done(); iter.Next()) {
324 CachedFullHashResponse* response = iter.UserData();
325 if (response->negativeCacheExpirySec < nowSec) {
326 iter.Remove();
327 }
328 }
329 }
330
CopyFullHashCache(const LookupCache * aSource)331 void LookupCache::CopyFullHashCache(const LookupCache* aSource) {
332 if (!aSource) {
333 return;
334 }
335
336 CopyClassHashTable<FullHashResponseMap>(aSource->mFullHashCache,
337 mFullHashCache);
338 }
339
ClearCache()340 void LookupCache::ClearCache() { mFullHashCache.Clear(); }
341
ClearAll()342 void LookupCache::ClearAll() {
343 ClearCache();
344 ClearPrefixes();
345 mPrimed = false;
346 }
347
ClearPrefixes()348 nsresult LookupCache::ClearPrefixes() {
349 // Clear by seting a empty map
350 PrefixStringMap map;
351 return mVLPrefixSet->SetPrefixes(map);
352 }
353
IsEmpty() const354 bool LookupCache::IsEmpty() const {
355 bool isEmpty;
356 mVLPrefixSet->IsEmpty(&isEmpty);
357 return isEmpty;
358 }
359
GetCacheInfo(nsIUrlClassifierCacheInfo ** aCache) const360 void LookupCache::GetCacheInfo(nsIUrlClassifierCacheInfo** aCache) const {
361 MOZ_ASSERT(aCache);
362
363 RefPtr<nsUrlClassifierCacheInfo> info = new nsUrlClassifierCacheInfo;
364 info->table = mTableName;
365
366 for (const auto& cacheEntry : mFullHashCache) {
367 RefPtr<nsUrlClassifierCacheEntry> entry = new nsUrlClassifierCacheEntry;
368
369 // Set prefix of the cache entry.
370 nsAutoCString prefix(reinterpret_cast<const char*>(&cacheEntry.GetKey()),
371 PREFIX_SIZE);
372 CStringToHexString(prefix, entry->prefix);
373
374 // Set expiry of the cache entry.
375 CachedFullHashResponse* response = cacheEntry.GetWeak();
376 entry->expirySec = response->negativeCacheExpirySec;
377
378 // Set positive cache.
379 FullHashExpiryCache& fullHashes = response->fullHashes;
380 for (const auto& fullHashEntry : fullHashes) {
381 RefPtr<nsUrlClassifierPositiveCacheEntry> match =
382 new nsUrlClassifierPositiveCacheEntry;
383
384 // Set fullhash of positive cache entry.
385 CStringToHexString(fullHashEntry.GetKey(), match->fullhash);
386
387 // Set expiry of positive cache entry.
388 match->expirySec = fullHashEntry.GetData();
389
390 entry->matches.AppendElement(
391 static_cast<nsIUrlClassifierPositiveCacheEntry*>(match));
392 }
393
394 info->entries.AppendElement(
395 static_cast<nsIUrlClassifierCacheEntry*>(entry));
396 }
397
398 info.forget(aCache);
399 }
400
401 /* static */
IsCanonicalizedIP(const nsACString & aHost)402 bool LookupCache::IsCanonicalizedIP(const nsACString& aHost) {
403 // The canonicalization process will have left IP addresses in dotted
404 // decimal with no surprises.
405 uint32_t i1, i2, i3, i4;
406 char c;
407 if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c", &i1, &i2, &i3,
408 &i4, &c) == 4) {
409 return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
410 }
411
412 return false;
413 }
414
415 // This is used when the URL is created by CreatePairwiseEntityListURI(),
416 // which returns an URI like "toplevel.page/?resource=third.party.domain"
417 // The fragment rule for the hostname(toplevel.page) is still the same
418 // as Safe Browsing protocol.
419 // The difference is that we always keep the path and query string and
420 // generate an additional fragment by removing the leading component of
421 // third.party.domain. This is to make sure we can find a match when a
422 // exceptionlisted domain is eTLD.
423 /* static */
GetLookupEntitylistFragments(const nsACString & aSpec,nsTArray<nsCString> * aFragments)424 nsresult LookupCache::GetLookupEntitylistFragments(
425 const nsACString& aSpec, nsTArray<nsCString>* aFragments) {
426 aFragments->Clear();
427
428 nsACString::const_iterator begin, end, iter, iter_end;
429 aSpec.BeginReading(begin);
430 aSpec.EndReading(end);
431
432 iter = begin;
433 iter_end = end;
434
435 // Fallback to use default fragment rule when the URL doesn't contain
436 // "/?resoruce=" because this means the URL is not generated in
437 // CreatePairwiseEntityListURI()
438 if (!FindInReadable("/?resource="_ns, iter, iter_end)) {
439 return GetLookupFragments(aSpec, aFragments);
440 }
441
442 const nsACString& topLevelURL = Substring(begin, iter++);
443 const nsACString& thirdPartyURL = Substring(iter_end, end);
444
445 /**
446 * For the top-level URL, we follow the host fragment rule defined
447 * in the Safe Browsing protocol.
448 */
449 nsTArray<nsCString> topLevelURLs;
450 topLevelURLs.AppendElement(topLevelURL);
451
452 if (!IsCanonicalizedIP(topLevelURL)) {
453 topLevelURL.BeginReading(begin);
454 topLevelURL.EndReading(end);
455 int numTopLevelURLComponents = 0;
456 while (RFindInReadable("."_ns, begin, end) &&
457 numTopLevelURLComponents < MAX_HOST_COMPONENTS) {
458 // don't bother checking toplevel domains
459 if (++numTopLevelURLComponents >= 2) {
460 topLevelURL.EndReading(iter);
461 topLevelURLs.AppendElement(Substring(end, iter));
462 }
463 end = begin;
464 topLevelURL.BeginReading(begin);
465 }
466 }
467
468 /**
469 * The whiltelisted domain in the entity list may be eTLD or eTLD+1.
470 * Since the number of the domain name part in the third-party URL searching
471 * is always less than or equal to eTLD+1, we remove the leading
472 * component from the third-party domain to make sure we can find a match
473 * if the exceptionlisted domain stoed in the entity list is eTLD.
474 */
475 nsTArray<nsCString> thirdPartyURLs;
476 thirdPartyURLs.AppendElement(thirdPartyURL);
477
478 if (!IsCanonicalizedIP(thirdPartyURL)) {
479 thirdPartyURL.BeginReading(iter);
480 thirdPartyURL.EndReading(end);
481 if (FindCharInReadable('.', iter, end)) {
482 iter++;
483 nsAutoCString thirdPartyURLToAdd;
484 thirdPartyURLToAdd.Assign(Substring(iter++, end));
485
486 // don't bother checking toplevel domains
487 if (FindCharInReadable('.', iter, end)) {
488 thirdPartyURLs.AppendElement(thirdPartyURLToAdd);
489 }
490 }
491 }
492
493 for (size_t i = 0; i < topLevelURLs.Length(); i++) {
494 for (size_t j = 0; j < thirdPartyURLs.Length(); j++) {
495 nsAutoCString key;
496 key.Assign(topLevelURLs[i]);
497 key.Append("/?resource=");
498 key.Append(thirdPartyURLs[j]);
499
500 aFragments->AppendElement(key);
501 }
502 }
503
504 return NS_OK;
505 }
506
507 /* static */
GetLookupFragments(const nsACString & aSpec,nsTArray<nsCString> * aFragments)508 nsresult LookupCache::GetLookupFragments(const nsACString& aSpec,
509 nsTArray<nsCString>* aFragments)
510
511 {
512 aFragments->Clear();
513
514 nsACString::const_iterator begin, end, iter;
515 aSpec.BeginReading(begin);
516 aSpec.EndReading(end);
517
518 iter = begin;
519 if (!FindCharInReadable('/', iter, end)) {
520 return NS_OK;
521 }
522
523 const nsACString& host = Substring(begin, iter++);
524 nsAutoCString path;
525 path.Assign(Substring(iter, end));
526
527 /**
528 * From the protocol doc:
529 * For the hostname, the client will try at most 5 different strings. They
530 * are:
531 * a) The exact hostname of the url
532 * b) The 4 hostnames formed by starting with the last 5 components and
533 * successivly removing the leading component. The top-level component
534 * can be skipped. This is not done if the hostname is a numerical IP.
535 */
536 nsTArray<nsCString> hosts;
537 hosts.AppendElement(host);
538
539 if (!IsCanonicalizedIP(host)) {
540 host.BeginReading(begin);
541 host.EndReading(end);
542 int numHostComponents = 0;
543 while (RFindInReadable("."_ns, begin, end) &&
544 numHostComponents < MAX_HOST_COMPONENTS) {
545 // don't bother checking toplevel domains
546 if (++numHostComponents >= 2) {
547 host.EndReading(iter);
548 hosts.AppendElement(Substring(end, iter));
549 }
550 end = begin;
551 host.BeginReading(begin);
552 }
553 }
554
555 /**
556 * From the protocol doc:
557 * For the path, the client will also try at most 6 different strings.
558 * They are:
559 * a) the exact path of the url, including query parameters
560 * b) the exact path of the url, without query parameters
561 * c) the 4 paths formed by starting at the root (/) and
562 * successively appending path components, including a trailing
563 * slash. This behavior should only extend up to the next-to-last
564 * path component, that is, a trailing slash should never be
565 * appended that was not present in the original url.
566 */
567 nsTArray<nsCString> paths;
568 nsAutoCString pathToAdd;
569
570 path.BeginReading(begin);
571 path.EndReading(end);
572 iter = begin;
573 if (FindCharInReadable('?', iter, end)) {
574 pathToAdd = Substring(begin, iter);
575 paths.AppendElement(pathToAdd);
576 end = iter;
577 }
578
579 int numPathComponents = 1;
580 iter = begin;
581 while (FindCharInReadable('/', iter, end) &&
582 numPathComponents < MAX_PATH_COMPONENTS) {
583 iter++;
584 pathToAdd.Assign(Substring(begin, iter));
585 paths.AppendElement(pathToAdd);
586 numPathComponents++;
587 }
588
589 // If we haven't already done so, add the full path
590 if (!pathToAdd.Equals(path)) {
591 paths.AppendElement(path);
592 }
593 // Check an empty path (for whole-domain blocklist entries)
594 if (!paths.Contains(""_ns)) {
595 paths.AppendElement(""_ns);
596 }
597
598 for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
599 for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
600 nsCString key;
601 key.Assign(hosts[hostIndex]);
602 key.Append('/');
603 key.Append(paths[pathIndex]);
604
605 aFragments->AppendElement(key);
606 }
607 }
608
609 return NS_OK;
610 }
611
LoadPrefixSet()612 nsresult LookupCache::LoadPrefixSet() {
613 nsCOMPtr<nsIFile> psFile;
614 nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
615 NS_ENSURE_SUCCESS(rv, rv);
616
617 rv = psFile->AppendNative(mTableName + GetPrefixSetSuffix());
618 NS_ENSURE_SUCCESS(rv, rv);
619
620 bool exists;
621 rv = psFile->Exists(&exists);
622 NS_ENSURE_SUCCESS(rv, rv);
623
624 if (exists) {
625 LOG(("stored PrefixSet exists, loading from disk"));
626 rv = LoadFromFile(psFile);
627 if (NS_FAILED(rv)) {
628 return rv;
629 }
630 mPrimed = true;
631 } else {
632 // The only scenario we load the old .pset file is when we haven't received
633 // a SafeBrowsng update before. After receiving an update, new .vlpset will
634 // be stored while old .pset will be removed.
635 if (NS_SUCCEEDED(LoadLegacyFile())) {
636 mPrimed = true;
637 } else {
638 LOG(("no (usable) stored PrefixSet found"));
639 }
640 }
641
642 #ifdef DEBUG
643 if (mPrimed) {
644 uint32_t size = SizeOfPrefixSet();
645 LOG(("SB tree done, size = %d bytes\n", size));
646 }
647 #endif
648
649 return NS_OK;
650 }
651
SizeOfPrefixSet() const652 size_t LookupCache::SizeOfPrefixSet() const {
653 return mVLPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
654 }
655
656 #if defined(DEBUG)
DumpCache() const657 void LookupCache::DumpCache() const {
658 if (!LOG_ENABLED()) {
659 return;
660 }
661
662 for (const auto& cacheEntry : mFullHashCache) {
663 CachedFullHashResponse* response = cacheEntry.GetWeak();
664
665 nsAutoCString prefix;
666 CStringToHexString(
667 nsCString(reinterpret_cast<const char*>(&cacheEntry.GetKey()),
668 PREFIX_SIZE),
669 prefix);
670 LOG(("Cache prefix(%s): %s, Expiry: %s", mTableName.get(), prefix.get(),
671 GetFormattedTimeString(response->negativeCacheExpirySec).get()));
672
673 FullHashExpiryCache& fullHashes = response->fullHashes;
674 for (const auto& fullHashEntry : fullHashes) {
675 nsAutoCString fullhash;
676 CStringToHexString(fullHashEntry.GetKey(), fullhash);
677 LOG((" - %s, Expiry: %s", fullhash.get(),
678 GetFormattedTimeString(fullHashEntry.GetData()).get()));
679 }
680 }
681 }
682 #endif
683
StoreToFile(nsCOMPtr<nsIFile> & aFile)684 nsresult LookupCache::StoreToFile(nsCOMPtr<nsIFile>& aFile) {
685 NS_ENSURE_ARG_POINTER(aFile);
686
687 uint32_t fileSize = sizeof(Header) +
688 mVLPrefixSet->CalculatePreallocateSize() +
689 nsCrc32CheckSumedOutputStream::CHECKSUM_SIZE;
690
691 nsCOMPtr<nsIOutputStream> localOutFile;
692 nsresult rv =
693 NS_NewSafeLocalFileOutputStream(getter_AddRefs(localOutFile), aFile,
694 PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
695 if (NS_WARN_IF(NS_FAILED(rv))) {
696 return rv;
697 }
698
699 // Preallocate the file storage
700 {
701 nsCOMPtr<nsIFileOutputStream> fos(do_QueryInterface(localOutFile));
702 Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FALLOCATE_TIME> timer;
703
704 Unused << fos->Preallocate(fileSize);
705 }
706
707 nsCOMPtr<nsIOutputStream> out;
708 rv = NS_NewCrc32OutputStream(getter_AddRefs(out), localOutFile.forget(),
709 std::min(fileSize, MAX_BUFFER_SIZE));
710
711 // Write header
712 Header header;
713 GetHeader(header);
714
715 rv = WriteValue(out, header);
716 if (NS_WARN_IF(NS_FAILED(rv))) {
717 return rv;
718 }
719
720 // Write prefixes
721 rv = mVLPrefixSet->WritePrefixes(out);
722 if (NS_WARN_IF(NS_FAILED(rv))) {
723 return rv;
724 }
725
726 // Write checksum
727 nsCOMPtr<nsISafeOutputStream> safeOut = do_QueryInterface(out, &rv);
728 if (NS_WARN_IF(NS_FAILED(rv))) {
729 return rv;
730 }
731
732 rv = safeOut->Finish();
733 if (NS_WARN_IF(NS_FAILED(rv))) {
734 return rv;
735 }
736
737 LOG(("[%s] Storing PrefixSet successful", mTableName.get()));
738
739 // This is to remove old ".pset" files if exist
740 Unused << ClearLegacyFile();
741 return NS_OK;
742 }
743
LoadFromFile(nsCOMPtr<nsIFile> & aFile)744 nsresult LookupCache::LoadFromFile(nsCOMPtr<nsIFile>& aFile) {
745 NS_ENSURE_ARG_POINTER(aFile);
746
747 Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FILELOAD_TIME> timer;
748
749 nsCOMPtr<nsIInputStream> localInFile;
750 nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile,
751 PR_RDONLY | nsIFile::OS_READAHEAD);
752 if (NS_WARN_IF(NS_FAILED(rv))) {
753 return rv;
754 }
755
756 // Calculate how big the file is, make sure our read buffer isn't bigger
757 // than the file itself which is just wasting memory.
758 int64_t fileSize;
759 rv = aFile->GetFileSize(&fileSize);
760 if (NS_WARN_IF(NS_FAILED(rv))) {
761 return rv;
762 }
763
764 if (fileSize < 0 || fileSize > UINT32_MAX) {
765 return NS_ERROR_FAILURE;
766 }
767
768 uint32_t bufferSize =
769 std::min<uint32_t>(static_cast<uint32_t>(fileSize), MAX_BUFFER_SIZE);
770
771 // Convert to buffered stream
772 nsCOMPtr<nsIInputStream> in;
773 rv = NS_NewBufferedInputStream(getter_AddRefs(in), localInFile.forget(),
774 bufferSize);
775 if (NS_WARN_IF(NS_FAILED(rv))) {
776 return rv;
777 }
778
779 // Load header
780 Header header;
781 rv = ReadValue(in, header);
782 if (NS_WARN_IF(NS_FAILED(rv))) {
783 LOG(("Failed to read header for %s", mTableName.get()));
784 return NS_ERROR_FILE_CORRUPTED;
785 }
786
787 rv = SanityCheck(header);
788 if (NS_WARN_IF(NS_FAILED(rv))) {
789 return rv;
790 }
791
792 // Load data
793 rv = mVLPrefixSet->LoadPrefixes(in);
794 if (NS_WARN_IF(NS_FAILED(rv))) {
795 return rv;
796 }
797
798 // Load crc32 checksum and verify
799 rv = VerifyCRC32(in);
800 if (NS_WARN_IF(NS_FAILED(rv))) {
801 return rv;
802 }
803
804 mPrimed = true;
805
806 LOG(("[%s] Loading PrefixSet successful", mTableName.get()));
807 return NS_OK;
808 }
809
810 // This function assumes CRC32 checksum is in the end of the input stream
VerifyCRC32(nsCOMPtr<nsIInputStream> & aIn)811 nsresult LookupCache::VerifyCRC32(nsCOMPtr<nsIInputStream>& aIn) {
812 nsCOMPtr<nsISeekableStream> seekIn = do_QueryInterface(aIn);
813 nsresult rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, 0);
814 if (NS_WARN_IF(NS_FAILED(rv))) {
815 return rv;
816 }
817
818 uint64_t len;
819 rv = aIn->Available(&len);
820 if (NS_WARN_IF(NS_FAILED(rv))) {
821 return rv;
822 }
823
824 uint32_t calculateCrc32 = ~0;
825
826 // We don't want to include the checksum itself
827 len = len - nsCrc32CheckSumedOutputStream::CHECKSUM_SIZE;
828
829 static const uint64_t STREAM_BUFFER_SIZE = 4096;
830 char buffer[STREAM_BUFFER_SIZE];
831 while (len) {
832 uint32_t read;
833 uint64_t readLimit = std::min<uint64_t>(STREAM_BUFFER_SIZE, len);
834
835 rv = aIn->Read(buffer, readLimit, &read);
836 if (NS_WARN_IF(NS_FAILED(rv))) {
837 return rv;
838 }
839
840 calculateCrc32 = ComputeCrc32c(
841 calculateCrc32, reinterpret_cast<const uint8_t*>(buffer), read);
842
843 len -= read;
844 }
845
846 // Now read the CRC32
847 uint32_t crc32;
848 ReadValue(aIn, crc32);
849 if (NS_WARN_IF(NS_FAILED(rv))) {
850 return rv;
851 }
852
853 if (crc32 != calculateCrc32) {
854 return NS_ERROR_FILE_CORRUPTED;
855 }
856
857 return NS_OK;
858 }
859
Has(const Completion & aCompletion,bool * aHas,uint32_t * aMatchLength,bool * aConfirmed)860 nsresult LookupCacheV2::Has(const Completion& aCompletion, bool* aHas,
861 uint32_t* aMatchLength, bool* aConfirmed) {
862 *aHas = *aConfirmed = false;
863 *aMatchLength = 0;
864
865 uint32_t length = 0;
866 nsDependentCSubstring fullhash;
867 fullhash.Rebind((const char*)aCompletion.buf, COMPLETE_SIZE);
868
869 uint32_t prefix = aCompletion.ToUint32();
870
871 nsresult rv = mVLPrefixSet->Matches(prefix, fullhash, &length);
872 NS_ENSURE_SUCCESS(rv, rv);
873
874 if (length == 0) {
875 return NS_OK;
876 }
877
878 MOZ_ASSERT(length == PREFIX_SIZE || length == COMPLETE_SIZE);
879
880 *aHas = true;
881 *aMatchLength = length;
882 *aConfirmed = length == COMPLETE_SIZE;
883
884 if (!(*aConfirmed)) {
885 rv = CheckCache(aCompletion, aHas, aConfirmed);
886 }
887
888 return rv;
889 }
890
Build(AddPrefixArray & aAddPrefixes,AddCompleteArray & aAddCompletes)891 nsresult LookupCacheV2::Build(AddPrefixArray& aAddPrefixes,
892 AddCompleteArray& aAddCompletes) {
893 nsresult rv = mVLPrefixSet->SetPrefixes(aAddPrefixes, aAddCompletes);
894 if (NS_WARN_IF(NS_FAILED(rv))) {
895 return rv;
896 }
897 mPrimed = true;
898
899 return NS_OK;
900 }
901
GetPrefixes(FallibleTArray<uint32_t> & aAddPrefixes)902 nsresult LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes) {
903 if (!mPrimed) {
904 // This can happen if its a new table, so no error.
905 LOG(("GetPrefixes from empty LookupCache"));
906 return NS_OK;
907 }
908
909 return mVLPrefixSet->GetFixedLengthPrefixes(&aAddPrefixes, nullptr);
910 }
911
GetPrefixes(FallibleTArray<uint32_t> & aAddPrefixes,FallibleTArray<nsCString> & aAddCompletes)912 nsresult LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes,
913 FallibleTArray<nsCString>& aAddCompletes) {
914 if (!mPrimed) {
915 // This can happen if its a new table, so no error.
916 LOG(("GetHashes from empty LookupCache"));
917 return NS_OK;
918 }
919
920 return mVLPrefixSet->GetFixedLengthPrefixes(&aAddPrefixes, &aAddCompletes);
921 }
922
AddGethashResultToCache(const AddCompleteArray & aAddCompletes,const MissPrefixArray & aMissPrefixes,int64_t aExpirySec)923 void LookupCacheV2::AddGethashResultToCache(
924 const AddCompleteArray& aAddCompletes, const MissPrefixArray& aMissPrefixes,
925 int64_t aExpirySec) {
926 static const int64_t CACHE_DURATION_SEC = 15 * 60;
927 int64_t defaultExpirySec = PR_Now() / PR_USEC_PER_SEC + CACHE_DURATION_SEC;
928 if (aExpirySec != 0) {
929 defaultExpirySec = aExpirySec;
930 }
931
932 for (const AddComplete& add : aAddCompletes) {
933 nsDependentCSubstring fullhash(
934 reinterpret_cast<const char*>(add.CompleteHash().buf), COMPLETE_SIZE);
935
936 CachedFullHashResponse* response =
937 mFullHashCache.GetOrInsertNew(add.ToUint32());
938 response->negativeCacheExpirySec = defaultExpirySec;
939
940 FullHashExpiryCache& fullHashes = response->fullHashes;
941 fullHashes.InsertOrUpdate(fullhash, defaultExpirySec);
942 }
943
944 for (const Prefix& prefix : aMissPrefixes) {
945 CachedFullHashResponse* response =
946 mFullHashCache.GetOrInsertNew(prefix.ToUint32());
947
948 response->negativeCacheExpirySec = defaultExpirySec;
949 }
950 }
951
GetHeader(Header & aHeader)952 void LookupCacheV2::GetHeader(Header& aHeader) {
953 aHeader.magic = LookupCacheV2::VLPSET_MAGIC;
954 aHeader.version = LookupCacheV2::VLPSET_VERSION;
955 }
956
SanityCheck(const Header & aHeader)957 nsresult LookupCacheV2::SanityCheck(const Header& aHeader) {
958 if (aHeader.magic != LookupCacheV2::VLPSET_MAGIC) {
959 return NS_ERROR_FILE_CORRUPTED;
960 }
961
962 if (aHeader.version != LookupCacheV2::VLPSET_VERSION) {
963 return NS_ERROR_FAILURE;
964 }
965
966 return NS_OK;
967 }
968
LoadLegacyFile()969 nsresult LookupCacheV2::LoadLegacyFile() {
970 // Because mozilla Safe Browsing v2 server only includes completions
971 // in the update, we can simplify this function by only loading .sbtore
972 if (!mProvider.EqualsLiteral("mozilla")) {
973 return NS_OK;
974 }
975
976 HashStore store(mTableName, mProvider, mRootStoreDirectory);
977
978 // Support loading version 3 HashStore.
979 nsresult rv = store.Open(3);
980 NS_ENSURE_SUCCESS(rv, rv);
981
982 if (store.AddChunks().Length() == 0 && store.SubChunks().Length() == 0) {
983 // Return when file doesn't exist
984 return NS_OK;
985 }
986
987 AddPrefixArray prefix;
988 AddCompleteArray addComplete;
989
990 rv = store.ReadCompletionsLegacyV3(addComplete);
991 NS_ENSURE_SUCCESS(rv, rv);
992
993 return Build(prefix, addComplete);
994 }
995
ClearLegacyFile()996 nsresult LookupCacheV2::ClearLegacyFile() {
997 nsCOMPtr<nsIFile> file;
998 nsresult rv = mStoreDirectory->Clone(getter_AddRefs(file));
999 if (NS_WARN_IF(NS_FAILED(rv))) {
1000 return rv;
1001 }
1002
1003 rv = file->AppendNative(mTableName + ".pset"_ns);
1004 if (NS_WARN_IF(NS_FAILED(rv))) {
1005 return rv;
1006 }
1007
1008 bool exists;
1009 rv = file->Exists(&exists);
1010 if (NS_WARN_IF(NS_FAILED(rv))) {
1011 return rv;
1012 }
1013
1014 if (exists) {
1015 rv = file->Remove(false);
1016 if (NS_WARN_IF(NS_FAILED(rv))) {
1017 return rv;
1018 }
1019
1020 LOG(("[%s]Old PrefixSet is successfully removed!", mTableName.get()));
1021 }
1022
1023 return NS_OK;
1024 }
1025
GetPrefixSetSuffix() const1026 nsCString LookupCacheV2::GetPrefixSetSuffix() const { return ".vlpset"_ns; }
1027
1028 // Support creating built-in entries for phsihing, malware, unwanted, harmful,
1029 // tracking/tracking exceptionlist and flash block tables.
1030 //
LoadMozEntries()1031 nsresult LookupCacheV2::LoadMozEntries() {
1032 // We already have the entries, return
1033 if (!IsEmpty() || IsPrimed()) {
1034 return NS_OK;
1035 }
1036
1037 nsTArray<nsLiteralCString> entries;
1038
1039 if (mTableName.EqualsLiteral("moztest-phish-simple")) {
1040 // Entries for phishing table
1041 entries.AppendElement("itisatrap.org/firefox/its-a-trap.html"_ns);
1042 } else if (mTableName.EqualsLiteral("moztest-malware-simple")) {
1043 // Entries for malware table
1044 entries.AppendElement("itisatrap.org/firefox/its-an-attack.html"_ns);
1045 } else if (mTableName.EqualsLiteral("moztest-unwanted-simple")) {
1046 // Entries for unwanted table
1047 entries.AppendElement("itisatrap.org/firefox/unwanted.html"_ns);
1048 } else if (mTableName.EqualsLiteral("moztest-harmful-simple")) {
1049 // Entries for harmfule tables
1050 entries.AppendElement("itisatrap.org/firefox/harmful.html"_ns);
1051 } else if (mTableName.EqualsLiteral("moztest-track-simple")) {
1052 // Entries for tracking table
1053 entries.AppendElement("trackertest.org/"_ns);
1054 entries.AppendElement("itisatracker.org/"_ns);
1055 } else if (mTableName.EqualsLiteral("moztest-trackwhite-simple")) {
1056 // Entries for tracking entitylist table
1057 entries.AppendElement("itisatrap.org/?resource=itisatracker.org"_ns);
1058 } else if (mTableName.EqualsLiteral("moztest-block-simple")) {
1059 // Entries for flash block table
1060 entries.AppendElement("itisatrap.org/firefox/blocked.html"_ns);
1061 } else {
1062 MOZ_ASSERT_UNREACHABLE();
1063 }
1064
1065 AddPrefixArray prefix;
1066 AddCompleteArray completes;
1067 for (const auto& entry : entries) {
1068 AddComplete add;
1069 if (NS_FAILED(add.complete.FromPlaintext(entry))) {
1070 continue;
1071 }
1072 if (!completes.AppendElement(add, fallible)) {
1073 return NS_ERROR_OUT_OF_MEMORY;
1074 }
1075 }
1076
1077 return Build(prefix, completes);
1078 }
1079
1080 } // namespace safebrowsing
1081 } // namespace mozilla
1082