1 //* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #include "LookupCache.h"
7 #include "HashStore.h"
8 #include "nsIFileStreams.h"
9 #include "nsISeekableStream.h"
10 #include "mozilla/ArrayUtils.h"
11 #include "mozilla/Telemetry.h"
12 #include "mozilla/Logging.h"
13 #include "nsNetUtil.h"
14 #include "nsCheckSummedOutputStream.h"
15 #include "crc32c.h"
16 #include "prprf.h"
17 #include "Classifier.h"
18 #include "nsUrlClassifierInfo.h"
19 
20 // We act as the main entry point for all the real lookups,
21 // so note that those are not done to the actual HashStore.
22 // The latter solely exists to store the data needed to handle
23 // the updates from the protocol.
24 
25 // This module provides a front for PrefixSet, mUpdateCompletions,
26 // and mGetHashCache, which together contain everything needed to
27 // provide a classification as long as the data is up to date.
28 
29 // PrefixSet stores and provides lookups for 4-byte prefixes.
30 // mUpdateCompletions contains 32-byte completions which were
31 // contained in updates. They are retrieved from HashStore/.sbtore
32 // on startup.
33 // mGetHashCache contains 32-byte completions which were
34 // returned from the gethash server. They are not serialized,
35 // only cached until the next update.
36 
37 // MOZ_LOG=UrlClassifierDbService:5
38 extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
39 #define LOG(args) \
40   MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
41 #define LOG_ENABLED() \
42   MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
43 
44 namespace mozilla {
45 namespace safebrowsing {
46 
47 const uint32_t LookupCache::MAX_BUFFER_SIZE = 64 * 1024;
48 
49 const int CacheResultV2::VER = CacheResult::V2;
50 const int CacheResultV4::VER = CacheResult::V4;
51 
52 const int LookupCacheV2::VER = 2;
53 const uint32_t LookupCacheV2::VLPSET_MAGIC = 0xe5b862e7;
54 const uint32_t LookupCacheV2::VLPSET_VERSION = 1;
55 
56 namespace {
57 
58 //////////////////////////////////////////////////////////////////////////
59 // A set of lightweight functions for reading/writing value from/to file.
60 template <typename T>
61 struct ValueTraits {
62   static_assert(sizeof(T) <= LookupCacheV4::MAX_METADATA_VALUE_LENGTH,
63                 "LookupCacheV4::MAX_METADATA_VALUE_LENGTH is too small.");
Lengthmozilla::safebrowsing::__anon56b1ea280111::ValueTraits64   static uint32_t Length(const T& aValue) { return sizeof(T); }
WritePtrmozilla::safebrowsing::__anon56b1ea280111::ValueTraits65   static char* WritePtr(T& aValue, uint32_t aLength) { return (char*)&aValue; }
ReadPtrmozilla::safebrowsing::__anon56b1ea280111::ValueTraits66   static const char* ReadPtr(const T& aValue) { return (char*)&aValue; }
IsFixedLengthmozilla::safebrowsing::__anon56b1ea280111::ValueTraits67   static bool IsFixedLength() { return true; }
68 };
69 
70 template <>
71 struct ValueTraits<nsACString> {
IsFixedLengthmozilla::safebrowsing::__anon56b1ea280111::ValueTraits72   static bool IsFixedLength() { return false; }
73 
Lengthmozilla::safebrowsing::__anon56b1ea280111::ValueTraits74   static uint32_t Length(const nsACString& aValue) { return aValue.Length(); }
75 
WritePtrmozilla::safebrowsing::__anon56b1ea280111::ValueTraits76   static char* WritePtr(nsACString& aValue, uint32_t aLength) {
77     aValue.SetLength(aLength);
78     return aValue.BeginWriting();
79   }
80 
ReadPtrmozilla::safebrowsing::__anon56b1ea280111::ValueTraits81   static const char* ReadPtr(const nsACString& aValue) {
82     return aValue.BeginReading();
83   }
84 };
85 
86 template <typename T>
WriteValue(nsIOutputStream * aOutputStream,const T & aValue)87 static nsresult WriteValue(nsIOutputStream* aOutputStream, const T& aValue) {
88   uint32_t writeLength = ValueTraits<T>::Length(aValue);
89   MOZ_ASSERT(writeLength <= LookupCacheV4::MAX_METADATA_VALUE_LENGTH,
90              "LookupCacheV4::MAX_METADATA_VALUE_LENGTH is too small.");
91   if (!ValueTraits<T>::IsFixedLength()) {
92     // We need to write out the variable value length.
93     nsresult rv = WriteValue(aOutputStream, writeLength);
94     NS_ENSURE_SUCCESS(rv, rv);
95   }
96 
97   // Write out the value.
98   auto valueReadPtr = ValueTraits<T>::ReadPtr(aValue);
99   uint32_t written;
100   nsresult rv = aOutputStream->Write(valueReadPtr, writeLength, &written);
101   NS_ENSURE_SUCCESS(rv, rv);
102   if (NS_WARN_IF(written != writeLength)) {
103     return NS_ERROR_FAILURE;
104   }
105 
106   return rv;
107 }
108 
109 template <typename T>
ReadValue(nsIInputStream * aInputStream,T & aValue)110 static nsresult ReadValue(nsIInputStream* aInputStream, T& aValue) {
111   nsresult rv;
112 
113   uint32_t readLength;
114   if (ValueTraits<T>::IsFixedLength()) {
115     readLength = ValueTraits<T>::Length(aValue);
116   } else {
117     // Read the variable value length from file.
118     nsresult rv = ReadValue(aInputStream, readLength);
119     NS_ENSURE_SUCCESS(rv, rv);
120   }
121 
122   // Sanity-check the readLength in case of disk corruption
123   // (see bug 1433636).
124   if (readLength > LookupCacheV4::MAX_METADATA_VALUE_LENGTH) {
125     return NS_ERROR_FILE_CORRUPTED;
126   }
127 
128   // Read the value.
129   uint32_t read;
130   auto valueWritePtr = ValueTraits<T>::WritePtr(aValue, readLength);
131   rv = aInputStream->Read(valueWritePtr, readLength, &read);
132   if (NS_FAILED(rv) || read != readLength) {
133     LOG(("Failed to read the value."));
134     return NS_FAILED(rv) ? rv : NS_ERROR_FAILURE;
135   }
136 
137   return rv;
138 }
139 
CStringToHexString(const nsACString & aIn,nsACString & aOut)140 void CStringToHexString(const nsACString& aIn, nsACString& aOut) {
141   static const char* const lut = "0123456789ABCDEF";
142 
143   size_t len = aIn.Length();
144   MOZ_ASSERT(len <= COMPLETE_SIZE);
145 
146   aOut.SetCapacity(2 * len);
147   for (size_t i = 0; i < aIn.Length(); ++i) {
148     const char c = static_cast<char>(aIn[i]);
149     aOut.Append(lut[(c >> 4) & 0x0F]);
150     aOut.Append(lut[c & 15]);
151   }
152 }
153 
154 #ifdef DEBUG
GetFormattedTimeString(int64_t aCurTimeSec)155 nsCString GetFormattedTimeString(int64_t aCurTimeSec) {
156   PRExplodedTime pret;
157   PR_ExplodeTime(aCurTimeSec * PR_USEC_PER_SEC, PR_GMTParameters, &pret);
158 
159   return nsPrintfCString("%04d-%02d-%02d %02d:%02d:%02d UTC", pret.tm_year,
160                          pret.tm_month + 1, pret.tm_mday, pret.tm_hour,
161                          pret.tm_min, pret.tm_sec);
162 }
163 #endif
164 
165 }  // end of unnamed namespace.
166 ////////////////////////////////////////////////////////////////////////
167 
LookupCache(const nsACString & aTableName,const nsACString & aProvider,nsCOMPtr<nsIFile> & aRootStoreDir)168 LookupCache::LookupCache(const nsACString& aTableName,
169                          const nsACString& aProvider,
170                          nsCOMPtr<nsIFile>& aRootStoreDir)
171     : mPrimed(false),
172       mTableName(aTableName),
173       mProvider(aProvider),
174       mRootStoreDirectory(aRootStoreDir) {
175   UpdateRootDirHandle(mRootStoreDirectory);
176 }
177 
Open()178 nsresult LookupCache::Open() {
179   LOG(("Loading PrefixSet for %s", mTableName.get()));
180   nsresult rv;
181   if (nsUrlClassifierUtils::IsMozTestTable(mTableName)) {
182     // For built-in test table, we don't load it from disk,
183     // test entries are directly added in memory.
184     rv = LoadMozEntries();
185   } else {
186     rv = LoadPrefixSet();
187   }
188 
189   Unused << NS_WARN_IF(NS_FAILED(rv));
190 
191   return rv;
192 }
193 
Init()194 nsresult LookupCache::Init() {
195   mVLPrefixSet = new VariableLengthPrefixSet();
196   nsresult rv = mVLPrefixSet->Init(mTableName);
197   NS_ENSURE_SUCCESS(rv, rv);
198 
199   return NS_OK;
200 }
201 
UpdateRootDirHandle(nsCOMPtr<nsIFile> & aNewRootStoreDirectory)202 nsresult LookupCache::UpdateRootDirHandle(
203     nsCOMPtr<nsIFile>& aNewRootStoreDirectory) {
204   nsresult rv;
205 
206   if (aNewRootStoreDirectory != mRootStoreDirectory) {
207     rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
208     NS_ENSURE_SUCCESS(rv, rv);
209   }
210 
211   rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory, mTableName,
212                                             mProvider,
213                                             getter_AddRefs(mStoreDirectory));
214 
215   if (NS_FAILED(rv)) {
216     LOG(("Failed to get private store directory for %s", mTableName.get()));
217     mStoreDirectory = mRootStoreDirectory;
218   }
219 
220   if (LOG_ENABLED()) {
221     nsString path;
222     mStoreDirectory->GetPath(path);
223     LOG(("Private store directory for %s is %s", mTableName.get(),
224          NS_ConvertUTF16toUTF8(path).get()));
225   }
226 
227   return rv;
228 }
229 
WriteFile()230 nsresult LookupCache::WriteFile() {
231   if (nsUrlClassifierDBService::ShutdownHasStarted()) {
232     return NS_ERROR_ABORT;
233   }
234 
235   nsCOMPtr<nsIFile> psFile;
236   nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
237   if (NS_WARN_IF(NS_FAILED(rv))) {
238     return rv;
239   }
240 
241   rv = psFile->AppendNative(mTableName + GetPrefixSetSuffix());
242   if (NS_WARN_IF(NS_FAILED(rv))) {
243     return rv;
244   }
245 
246   rv = StoreToFile(psFile);
247   if (NS_WARN_IF(NS_FAILED(rv))) {
248     LOG(("Failed to store the prefixset for table %s", mTableName.get()));
249     return rv;
250   }
251 
252   return NS_OK;
253 }
254 
CheckCache(const Completion & aCompletion,bool * aHas,bool * aConfirmed)255 nsresult LookupCache::CheckCache(const Completion& aCompletion, bool* aHas,
256                                  bool* aConfirmed) {
257   // Shouldn't call this function if prefix is not in the database.
258   MOZ_ASSERT(*aHas);
259 
260   *aConfirmed = false;
261 
262   uint32_t prefix = aCompletion.ToUint32();
263 
264   CachedFullHashResponse* fullHashResponse = mFullHashCache.Get(prefix);
265   if (!fullHashResponse) {
266     return NS_OK;
267   }
268 
269   int64_t nowSec = PR_Now() / PR_USEC_PER_SEC;
270   int64_t expiryTimeSec;
271 
272   FullHashExpiryCache& fullHashes = fullHashResponse->fullHashes;
273   nsDependentCSubstring completion(
274       reinterpret_cast<const char*>(aCompletion.buf), COMPLETE_SIZE);
275 
276   // Check if we can find the fullhash in positive cache
277   if (fullHashes.Get(completion, &expiryTimeSec)) {
278     if (nowSec <= expiryTimeSec) {
279       // Url is NOT safe.
280       *aConfirmed = true;
281       LOG(("Found a valid fullhash in the positive cache"));
282     } else {
283       // Trigger a gethash request in this case(aConfirmed is false).
284       LOG(("Found an expired fullhash in the positive cache"));
285 
286       // Remove fullhash entry from the cache when the negative cache
287       // is also expired because whether or not the fullhash is cached
288       // locally, we will need to consult the server next time we
289       // lookup this hash. We may as well remove it from our cache.
290       if (fullHashResponse->negativeCacheExpirySec < expiryTimeSec) {
291         fullHashes.Remove(completion);
292         if (fullHashes.Count() == 0 &&
293             fullHashResponse->negativeCacheExpirySec < nowSec) {
294           mFullHashCache.Remove(prefix);
295         }
296       }
297     }
298     return NS_OK;
299   }
300 
301   // Check negative cache.
302   if (fullHashResponse->negativeCacheExpirySec >= nowSec) {
303     // Url is safe.
304     LOG(("Found a valid prefix in the negative cache"));
305     *aHas = false;
306   } else {
307     LOG(("Found an expired prefix in the negative cache"));
308     if (fullHashes.Count() == 0) {
309       mFullHashCache.Remove(prefix);
310     }
311   }
312 
313   return NS_OK;
314 }
315 
316 // This function remove cache entries whose negative cache time is expired.
317 // It is possible that a cache entry whose positive cache time is not yet
318 // expired but still being removed after calling this API. Right now we call
319 // this on every update.
InvalidateExpiredCacheEntries()320 void LookupCache::InvalidateExpiredCacheEntries() {
321   int64_t nowSec = PR_Now() / PR_USEC_PER_SEC;
322 
323   for (auto iter = mFullHashCache.Iter(); !iter.Done(); iter.Next()) {
324     CachedFullHashResponse* response = iter.UserData();
325     if (response->negativeCacheExpirySec < nowSec) {
326       iter.Remove();
327     }
328   }
329 }
330 
CopyFullHashCache(const LookupCache * aSource)331 void LookupCache::CopyFullHashCache(const LookupCache* aSource) {
332   if (!aSource) {
333     return;
334   }
335 
336   CopyClassHashTable<FullHashResponseMap>(aSource->mFullHashCache,
337                                           mFullHashCache);
338 }
339 
ClearCache()340 void LookupCache::ClearCache() { mFullHashCache.Clear(); }
341 
ClearAll()342 void LookupCache::ClearAll() {
343   ClearCache();
344   ClearPrefixes();
345   mPrimed = false;
346 }
347 
ClearPrefixes()348 nsresult LookupCache::ClearPrefixes() {
349   // Clear by seting a empty map
350   PrefixStringMap map;
351   return mVLPrefixSet->SetPrefixes(map);
352 }
353 
IsEmpty() const354 bool LookupCache::IsEmpty() const {
355   bool isEmpty;
356   mVLPrefixSet->IsEmpty(&isEmpty);
357   return isEmpty;
358 }
359 
GetCacheInfo(nsIUrlClassifierCacheInfo ** aCache) const360 void LookupCache::GetCacheInfo(nsIUrlClassifierCacheInfo** aCache) const {
361   MOZ_ASSERT(aCache);
362 
363   RefPtr<nsUrlClassifierCacheInfo> info = new nsUrlClassifierCacheInfo;
364   info->table = mTableName;
365 
366   for (const auto& cacheEntry : mFullHashCache) {
367     RefPtr<nsUrlClassifierCacheEntry> entry = new nsUrlClassifierCacheEntry;
368 
369     // Set prefix of the cache entry.
370     nsAutoCString prefix(reinterpret_cast<const char*>(&cacheEntry.GetKey()),
371                          PREFIX_SIZE);
372     CStringToHexString(prefix, entry->prefix);
373 
374     // Set expiry of the cache entry.
375     CachedFullHashResponse* response = cacheEntry.GetWeak();
376     entry->expirySec = response->negativeCacheExpirySec;
377 
378     // Set positive cache.
379     FullHashExpiryCache& fullHashes = response->fullHashes;
380     for (const auto& fullHashEntry : fullHashes) {
381       RefPtr<nsUrlClassifierPositiveCacheEntry> match =
382           new nsUrlClassifierPositiveCacheEntry;
383 
384       // Set fullhash of positive cache entry.
385       CStringToHexString(fullHashEntry.GetKey(), match->fullhash);
386 
387       // Set expiry of positive cache entry.
388       match->expirySec = fullHashEntry.GetData();
389 
390       entry->matches.AppendElement(
391           static_cast<nsIUrlClassifierPositiveCacheEntry*>(match));
392     }
393 
394     info->entries.AppendElement(
395         static_cast<nsIUrlClassifierCacheEntry*>(entry));
396   }
397 
398   info.forget(aCache);
399 }
400 
401 /* static */
IsCanonicalizedIP(const nsACString & aHost)402 bool LookupCache::IsCanonicalizedIP(const nsACString& aHost) {
403   // The canonicalization process will have left IP addresses in dotted
404   // decimal with no surprises.
405   uint32_t i1, i2, i3, i4;
406   char c;
407   if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c", &i1, &i2, &i3,
408                 &i4, &c) == 4) {
409     return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
410   }
411 
412   return false;
413 }
414 
415 // This is used when the URL is created by CreatePairwiseEntityListURI(),
416 // which returns an URI like "toplevel.page/?resource=third.party.domain"
417 // The fragment rule for the hostname(toplevel.page) is still the same
418 // as Safe Browsing protocol.
419 // The difference is that we always keep the path and query string and
420 // generate an additional fragment by removing the leading component of
421 // third.party.domain. This is to make sure we can find a match when a
422 // exceptionlisted domain is eTLD.
423 /* static */
GetLookupEntitylistFragments(const nsACString & aSpec,nsTArray<nsCString> * aFragments)424 nsresult LookupCache::GetLookupEntitylistFragments(
425     const nsACString& aSpec, nsTArray<nsCString>* aFragments) {
426   aFragments->Clear();
427 
428   nsACString::const_iterator begin, end, iter, iter_end;
429   aSpec.BeginReading(begin);
430   aSpec.EndReading(end);
431 
432   iter = begin;
433   iter_end = end;
434 
435   // Fallback to use default fragment rule when the URL doesn't contain
436   // "/?resoruce=" because this means the URL is not generated in
437   // CreatePairwiseEntityListURI()
438   if (!FindInReadable("/?resource="_ns, iter, iter_end)) {
439     return GetLookupFragments(aSpec, aFragments);
440   }
441 
442   const nsACString& topLevelURL = Substring(begin, iter++);
443   const nsACString& thirdPartyURL = Substring(iter_end, end);
444 
445   /**
446    * For the top-level URL, we follow the host fragment rule defined
447    * in the Safe Browsing protocol.
448    */
449   nsTArray<nsCString> topLevelURLs;
450   topLevelURLs.AppendElement(topLevelURL);
451 
452   if (!IsCanonicalizedIP(topLevelURL)) {
453     topLevelURL.BeginReading(begin);
454     topLevelURL.EndReading(end);
455     int numTopLevelURLComponents = 0;
456     while (RFindInReadable("."_ns, begin, end) &&
457            numTopLevelURLComponents < MAX_HOST_COMPONENTS) {
458       // don't bother checking toplevel domains
459       if (++numTopLevelURLComponents >= 2) {
460         topLevelURL.EndReading(iter);
461         topLevelURLs.AppendElement(Substring(end, iter));
462       }
463       end = begin;
464       topLevelURL.BeginReading(begin);
465     }
466   }
467 
468   /**
469    * The whiltelisted domain in the entity list may be eTLD or eTLD+1.
470    * Since the number of the domain name part in the third-party URL searching
471    * is always less than or equal to eTLD+1, we remove the leading
472    * component from the third-party domain to make sure we can find a match
473    * if the exceptionlisted domain stoed in the entity list is eTLD.
474    */
475   nsTArray<nsCString> thirdPartyURLs;
476   thirdPartyURLs.AppendElement(thirdPartyURL);
477 
478   if (!IsCanonicalizedIP(thirdPartyURL)) {
479     thirdPartyURL.BeginReading(iter);
480     thirdPartyURL.EndReading(end);
481     if (FindCharInReadable('.', iter, end)) {
482       iter++;
483       nsAutoCString thirdPartyURLToAdd;
484       thirdPartyURLToAdd.Assign(Substring(iter++, end));
485 
486       // don't bother checking toplevel domains
487       if (FindCharInReadable('.', iter, end)) {
488         thirdPartyURLs.AppendElement(thirdPartyURLToAdd);
489       }
490     }
491   }
492 
493   for (size_t i = 0; i < topLevelURLs.Length(); i++) {
494     for (size_t j = 0; j < thirdPartyURLs.Length(); j++) {
495       nsAutoCString key;
496       key.Assign(topLevelURLs[i]);
497       key.Append("/?resource=");
498       key.Append(thirdPartyURLs[j]);
499 
500       aFragments->AppendElement(key);
501     }
502   }
503 
504   return NS_OK;
505 }
506 
507 /* static */
GetLookupFragments(const nsACString & aSpec,nsTArray<nsCString> * aFragments)508 nsresult LookupCache::GetLookupFragments(const nsACString& aSpec,
509                                          nsTArray<nsCString>* aFragments)
510 
511 {
512   aFragments->Clear();
513 
514   nsACString::const_iterator begin, end, iter;
515   aSpec.BeginReading(begin);
516   aSpec.EndReading(end);
517 
518   iter = begin;
519   if (!FindCharInReadable('/', iter, end)) {
520     return NS_OK;
521   }
522 
523   const nsACString& host = Substring(begin, iter++);
524   nsAutoCString path;
525   path.Assign(Substring(iter, end));
526 
527   /**
528    * From the protocol doc:
529    * For the hostname, the client will try at most 5 different strings.  They
530    * are:
531    * a) The exact hostname of the url
532    * b) The 4 hostnames formed by starting with the last 5 components and
533    *    successivly removing the leading component.  The top-level component
534    *    can be skipped. This is not done if the hostname is a numerical IP.
535    */
536   nsTArray<nsCString> hosts;
537   hosts.AppendElement(host);
538 
539   if (!IsCanonicalizedIP(host)) {
540     host.BeginReading(begin);
541     host.EndReading(end);
542     int numHostComponents = 0;
543     while (RFindInReadable("."_ns, begin, end) &&
544            numHostComponents < MAX_HOST_COMPONENTS) {
545       // don't bother checking toplevel domains
546       if (++numHostComponents >= 2) {
547         host.EndReading(iter);
548         hosts.AppendElement(Substring(end, iter));
549       }
550       end = begin;
551       host.BeginReading(begin);
552     }
553   }
554 
555   /**
556    * From the protocol doc:
557    * For the path, the client will also try at most 6 different strings.
558    * They are:
559    * a) the exact path of the url, including query parameters
560    * b) the exact path of the url, without query parameters
561    * c) the 4 paths formed by starting at the root (/) and
562    *    successively appending path components, including a trailing
563    *    slash.  This behavior should only extend up to the next-to-last
564    *    path component, that is, a trailing slash should never be
565    *    appended that was not present in the original url.
566    */
567   nsTArray<nsCString> paths;
568   nsAutoCString pathToAdd;
569 
570   path.BeginReading(begin);
571   path.EndReading(end);
572   iter = begin;
573   if (FindCharInReadable('?', iter, end)) {
574     pathToAdd = Substring(begin, iter);
575     paths.AppendElement(pathToAdd);
576     end = iter;
577   }
578 
579   int numPathComponents = 1;
580   iter = begin;
581   while (FindCharInReadable('/', iter, end) &&
582          numPathComponents < MAX_PATH_COMPONENTS) {
583     iter++;
584     pathToAdd.Assign(Substring(begin, iter));
585     paths.AppendElement(pathToAdd);
586     numPathComponents++;
587   }
588 
589   // If we haven't already done so, add the full path
590   if (!pathToAdd.Equals(path)) {
591     paths.AppendElement(path);
592   }
593   // Check an empty path (for whole-domain blocklist entries)
594   if (!paths.Contains(""_ns)) {
595     paths.AppendElement(""_ns);
596   }
597 
598   for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
599     for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
600       nsCString key;
601       key.Assign(hosts[hostIndex]);
602       key.Append('/');
603       key.Append(paths[pathIndex]);
604 
605       aFragments->AppendElement(key);
606     }
607   }
608 
609   return NS_OK;
610 }
611 
LoadPrefixSet()612 nsresult LookupCache::LoadPrefixSet() {
613   nsCOMPtr<nsIFile> psFile;
614   nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
615   NS_ENSURE_SUCCESS(rv, rv);
616 
617   rv = psFile->AppendNative(mTableName + GetPrefixSetSuffix());
618   NS_ENSURE_SUCCESS(rv, rv);
619 
620   bool exists;
621   rv = psFile->Exists(&exists);
622   NS_ENSURE_SUCCESS(rv, rv);
623 
624   if (exists) {
625     LOG(("stored PrefixSet exists, loading from disk"));
626     rv = LoadFromFile(psFile);
627     if (NS_FAILED(rv)) {
628       return rv;
629     }
630     mPrimed = true;
631   } else {
632     // The only scenario we load the old .pset file is when we haven't received
633     // a SafeBrowsng update before. After receiving an update, new .vlpset will
634     // be stored while old .pset will be removed.
635     if (NS_SUCCEEDED(LoadLegacyFile())) {
636       mPrimed = true;
637     } else {
638       LOG(("no (usable) stored PrefixSet found"));
639     }
640   }
641 
642 #ifdef DEBUG
643   if (mPrimed) {
644     uint32_t size = SizeOfPrefixSet();
645     LOG(("SB tree done, size = %d bytes\n", size));
646   }
647 #endif
648 
649   return NS_OK;
650 }
651 
SizeOfPrefixSet() const652 size_t LookupCache::SizeOfPrefixSet() const {
653   return mVLPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
654 }
655 
656 #if defined(DEBUG)
DumpCache() const657 void LookupCache::DumpCache() const {
658   if (!LOG_ENABLED()) {
659     return;
660   }
661 
662   for (const auto& cacheEntry : mFullHashCache) {
663     CachedFullHashResponse* response = cacheEntry.GetWeak();
664 
665     nsAutoCString prefix;
666     CStringToHexString(
667         nsCString(reinterpret_cast<const char*>(&cacheEntry.GetKey()),
668                   PREFIX_SIZE),
669         prefix);
670     LOG(("Cache prefix(%s): %s, Expiry: %s", mTableName.get(), prefix.get(),
671          GetFormattedTimeString(response->negativeCacheExpirySec).get()));
672 
673     FullHashExpiryCache& fullHashes = response->fullHashes;
674     for (const auto& fullHashEntry : fullHashes) {
675       nsAutoCString fullhash;
676       CStringToHexString(fullHashEntry.GetKey(), fullhash);
677       LOG(("  - %s, Expiry: %s", fullhash.get(),
678            GetFormattedTimeString(fullHashEntry.GetData()).get()));
679     }
680   }
681 }
682 #endif
683 
StoreToFile(nsCOMPtr<nsIFile> & aFile)684 nsresult LookupCache::StoreToFile(nsCOMPtr<nsIFile>& aFile) {
685   NS_ENSURE_ARG_POINTER(aFile);
686 
687   uint32_t fileSize = sizeof(Header) +
688                       mVLPrefixSet->CalculatePreallocateSize() +
689                       nsCrc32CheckSumedOutputStream::CHECKSUM_SIZE;
690 
691   nsCOMPtr<nsIOutputStream> localOutFile;
692   nsresult rv =
693       NS_NewSafeLocalFileOutputStream(getter_AddRefs(localOutFile), aFile,
694                                       PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
695   if (NS_WARN_IF(NS_FAILED(rv))) {
696     return rv;
697   }
698 
699   // Preallocate the file storage
700   {
701     nsCOMPtr<nsIFileOutputStream> fos(do_QueryInterface(localOutFile));
702     Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FALLOCATE_TIME> timer;
703 
704     Unused << fos->Preallocate(fileSize);
705   }
706 
707   nsCOMPtr<nsIOutputStream> out;
708   rv = NS_NewCrc32OutputStream(getter_AddRefs(out), localOutFile.forget(),
709                                std::min(fileSize, MAX_BUFFER_SIZE));
710 
711   // Write header
712   Header header;
713   GetHeader(header);
714 
715   rv = WriteValue(out, header);
716   if (NS_WARN_IF(NS_FAILED(rv))) {
717     return rv;
718   }
719 
720   // Write prefixes
721   rv = mVLPrefixSet->WritePrefixes(out);
722   if (NS_WARN_IF(NS_FAILED(rv))) {
723     return rv;
724   }
725 
726   // Write checksum
727   nsCOMPtr<nsISafeOutputStream> safeOut = do_QueryInterface(out, &rv);
728   if (NS_WARN_IF(NS_FAILED(rv))) {
729     return rv;
730   }
731 
732   rv = safeOut->Finish();
733   if (NS_WARN_IF(NS_FAILED(rv))) {
734     return rv;
735   }
736 
737   LOG(("[%s] Storing PrefixSet successful", mTableName.get()));
738 
739   // This is to remove old ".pset" files if exist
740   Unused << ClearLegacyFile();
741   return NS_OK;
742 }
743 
LoadFromFile(nsCOMPtr<nsIFile> & aFile)744 nsresult LookupCache::LoadFromFile(nsCOMPtr<nsIFile>& aFile) {
745   NS_ENSURE_ARG_POINTER(aFile);
746 
747   Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FILELOAD_TIME> timer;
748 
749   nsCOMPtr<nsIInputStream> localInFile;
750   nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile,
751                                            PR_RDONLY | nsIFile::OS_READAHEAD);
752   if (NS_WARN_IF(NS_FAILED(rv))) {
753     return rv;
754   }
755 
756   // Calculate how big the file is, make sure our read buffer isn't bigger
757   // than the file itself which is just wasting memory.
758   int64_t fileSize;
759   rv = aFile->GetFileSize(&fileSize);
760   if (NS_WARN_IF(NS_FAILED(rv))) {
761     return rv;
762   }
763 
764   if (fileSize < 0 || fileSize > UINT32_MAX) {
765     return NS_ERROR_FAILURE;
766   }
767 
768   uint32_t bufferSize =
769       std::min<uint32_t>(static_cast<uint32_t>(fileSize), MAX_BUFFER_SIZE);
770 
771   // Convert to buffered stream
772   nsCOMPtr<nsIInputStream> in;
773   rv = NS_NewBufferedInputStream(getter_AddRefs(in), localInFile.forget(),
774                                  bufferSize);
775   if (NS_WARN_IF(NS_FAILED(rv))) {
776     return rv;
777   }
778 
779   // Load header
780   Header header;
781   rv = ReadValue(in, header);
782   if (NS_WARN_IF(NS_FAILED(rv))) {
783     LOG(("Failed to read header for %s", mTableName.get()));
784     return NS_ERROR_FILE_CORRUPTED;
785   }
786 
787   rv = SanityCheck(header);
788   if (NS_WARN_IF(NS_FAILED(rv))) {
789     return rv;
790   }
791 
792   // Load data
793   rv = mVLPrefixSet->LoadPrefixes(in);
794   if (NS_WARN_IF(NS_FAILED(rv))) {
795     return rv;
796   }
797 
798   // Load crc32 checksum and verify
799   rv = VerifyCRC32(in);
800   if (NS_WARN_IF(NS_FAILED(rv))) {
801     return rv;
802   }
803 
804   mPrimed = true;
805 
806   LOG(("[%s] Loading PrefixSet successful", mTableName.get()));
807   return NS_OK;
808 }
809 
810 // This function assumes CRC32 checksum is in the end of the input stream
VerifyCRC32(nsCOMPtr<nsIInputStream> & aIn)811 nsresult LookupCache::VerifyCRC32(nsCOMPtr<nsIInputStream>& aIn) {
812   nsCOMPtr<nsISeekableStream> seekIn = do_QueryInterface(aIn);
813   nsresult rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, 0);
814   if (NS_WARN_IF(NS_FAILED(rv))) {
815     return rv;
816   }
817 
818   uint64_t len;
819   rv = aIn->Available(&len);
820   if (NS_WARN_IF(NS_FAILED(rv))) {
821     return rv;
822   }
823 
824   uint32_t calculateCrc32 = ~0;
825 
826   // We don't want to include the checksum itself
827   len = len - nsCrc32CheckSumedOutputStream::CHECKSUM_SIZE;
828 
829   static const uint64_t STREAM_BUFFER_SIZE = 4096;
830   char buffer[STREAM_BUFFER_SIZE];
831   while (len) {
832     uint32_t read;
833     uint64_t readLimit = std::min<uint64_t>(STREAM_BUFFER_SIZE, len);
834 
835     rv = aIn->Read(buffer, readLimit, &read);
836     if (NS_WARN_IF(NS_FAILED(rv))) {
837       return rv;
838     }
839 
840     calculateCrc32 = ComputeCrc32c(
841         calculateCrc32, reinterpret_cast<const uint8_t*>(buffer), read);
842 
843     len -= read;
844   }
845 
846   // Now read the CRC32
847   uint32_t crc32;
848   ReadValue(aIn, crc32);
849   if (NS_WARN_IF(NS_FAILED(rv))) {
850     return rv;
851   }
852 
853   if (crc32 != calculateCrc32) {
854     return NS_ERROR_FILE_CORRUPTED;
855   }
856 
857   return NS_OK;
858 }
859 
Has(const Completion & aCompletion,bool * aHas,uint32_t * aMatchLength,bool * aConfirmed)860 nsresult LookupCacheV2::Has(const Completion& aCompletion, bool* aHas,
861                             uint32_t* aMatchLength, bool* aConfirmed) {
862   *aHas = *aConfirmed = false;
863   *aMatchLength = 0;
864 
865   uint32_t length = 0;
866   nsDependentCSubstring fullhash;
867   fullhash.Rebind((const char*)aCompletion.buf, COMPLETE_SIZE);
868 
869   uint32_t prefix = aCompletion.ToUint32();
870 
871   nsresult rv = mVLPrefixSet->Matches(prefix, fullhash, &length);
872   NS_ENSURE_SUCCESS(rv, rv);
873 
874   if (length == 0) {
875     return NS_OK;
876   }
877 
878   MOZ_ASSERT(length == PREFIX_SIZE || length == COMPLETE_SIZE);
879 
880   *aHas = true;
881   *aMatchLength = length;
882   *aConfirmed = length == COMPLETE_SIZE;
883 
884   if (!(*aConfirmed)) {
885     rv = CheckCache(aCompletion, aHas, aConfirmed);
886   }
887 
888   return rv;
889 }
890 
Build(AddPrefixArray & aAddPrefixes,AddCompleteArray & aAddCompletes)891 nsresult LookupCacheV2::Build(AddPrefixArray& aAddPrefixes,
892                               AddCompleteArray& aAddCompletes) {
893   nsresult rv = mVLPrefixSet->SetPrefixes(aAddPrefixes, aAddCompletes);
894   if (NS_WARN_IF(NS_FAILED(rv))) {
895     return rv;
896   }
897   mPrimed = true;
898 
899   return NS_OK;
900 }
901 
GetPrefixes(FallibleTArray<uint32_t> & aAddPrefixes)902 nsresult LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes) {
903   if (!mPrimed) {
904     // This can happen if its a new table, so no error.
905     LOG(("GetPrefixes from empty LookupCache"));
906     return NS_OK;
907   }
908 
909   return mVLPrefixSet->GetFixedLengthPrefixes(&aAddPrefixes, nullptr);
910 }
911 
GetPrefixes(FallibleTArray<uint32_t> & aAddPrefixes,FallibleTArray<nsCString> & aAddCompletes)912 nsresult LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes,
913                                     FallibleTArray<nsCString>& aAddCompletes) {
914   if (!mPrimed) {
915     // This can happen if its a new table, so no error.
916     LOG(("GetHashes from empty LookupCache"));
917     return NS_OK;
918   }
919 
920   return mVLPrefixSet->GetFixedLengthPrefixes(&aAddPrefixes, &aAddCompletes);
921 }
922 
AddGethashResultToCache(const AddCompleteArray & aAddCompletes,const MissPrefixArray & aMissPrefixes,int64_t aExpirySec)923 void LookupCacheV2::AddGethashResultToCache(
924     const AddCompleteArray& aAddCompletes, const MissPrefixArray& aMissPrefixes,
925     int64_t aExpirySec) {
926   static const int64_t CACHE_DURATION_SEC = 15 * 60;
927   int64_t defaultExpirySec = PR_Now() / PR_USEC_PER_SEC + CACHE_DURATION_SEC;
928   if (aExpirySec != 0) {
929     defaultExpirySec = aExpirySec;
930   }
931 
932   for (const AddComplete& add : aAddCompletes) {
933     nsDependentCSubstring fullhash(
934         reinterpret_cast<const char*>(add.CompleteHash().buf), COMPLETE_SIZE);
935 
936     CachedFullHashResponse* response =
937         mFullHashCache.GetOrInsertNew(add.ToUint32());
938     response->negativeCacheExpirySec = defaultExpirySec;
939 
940     FullHashExpiryCache& fullHashes = response->fullHashes;
941     fullHashes.InsertOrUpdate(fullhash, defaultExpirySec);
942   }
943 
944   for (const Prefix& prefix : aMissPrefixes) {
945     CachedFullHashResponse* response =
946         mFullHashCache.GetOrInsertNew(prefix.ToUint32());
947 
948     response->negativeCacheExpirySec = defaultExpirySec;
949   }
950 }
951 
GetHeader(Header & aHeader)952 void LookupCacheV2::GetHeader(Header& aHeader) {
953   aHeader.magic = LookupCacheV2::VLPSET_MAGIC;
954   aHeader.version = LookupCacheV2::VLPSET_VERSION;
955 }
956 
SanityCheck(const Header & aHeader)957 nsresult LookupCacheV2::SanityCheck(const Header& aHeader) {
958   if (aHeader.magic != LookupCacheV2::VLPSET_MAGIC) {
959     return NS_ERROR_FILE_CORRUPTED;
960   }
961 
962   if (aHeader.version != LookupCacheV2::VLPSET_VERSION) {
963     return NS_ERROR_FAILURE;
964   }
965 
966   return NS_OK;
967 }
968 
LoadLegacyFile()969 nsresult LookupCacheV2::LoadLegacyFile() {
970   // Because mozilla Safe Browsing v2 server only includes completions
971   // in the update, we can simplify this function by only loading .sbtore
972   if (!mProvider.EqualsLiteral("mozilla")) {
973     return NS_OK;
974   }
975 
976   HashStore store(mTableName, mProvider, mRootStoreDirectory);
977 
978   // Support loading version 3 HashStore.
979   nsresult rv = store.Open(3);
980   NS_ENSURE_SUCCESS(rv, rv);
981 
982   if (store.AddChunks().Length() == 0 && store.SubChunks().Length() == 0) {
983     // Return when file doesn't exist
984     return NS_OK;
985   }
986 
987   AddPrefixArray prefix;
988   AddCompleteArray addComplete;
989 
990   rv = store.ReadCompletionsLegacyV3(addComplete);
991   NS_ENSURE_SUCCESS(rv, rv);
992 
993   return Build(prefix, addComplete);
994 }
995 
ClearLegacyFile()996 nsresult LookupCacheV2::ClearLegacyFile() {
997   nsCOMPtr<nsIFile> file;
998   nsresult rv = mStoreDirectory->Clone(getter_AddRefs(file));
999   if (NS_WARN_IF(NS_FAILED(rv))) {
1000     return rv;
1001   }
1002 
1003   rv = file->AppendNative(mTableName + ".pset"_ns);
1004   if (NS_WARN_IF(NS_FAILED(rv))) {
1005     return rv;
1006   }
1007 
1008   bool exists;
1009   rv = file->Exists(&exists);
1010   if (NS_WARN_IF(NS_FAILED(rv))) {
1011     return rv;
1012   }
1013 
1014   if (exists) {
1015     rv = file->Remove(false);
1016     if (NS_WARN_IF(NS_FAILED(rv))) {
1017       return rv;
1018     }
1019 
1020     LOG(("[%s]Old PrefixSet is successfully removed!", mTableName.get()));
1021   }
1022 
1023   return NS_OK;
1024 }
1025 
GetPrefixSetSuffix() const1026 nsCString LookupCacheV2::GetPrefixSetSuffix() const { return ".vlpset"_ns; }
1027 
1028 // Support creating built-in entries for phsihing, malware, unwanted, harmful,
1029 // tracking/tracking exceptionlist and flash block tables.
1030 //
LoadMozEntries()1031 nsresult LookupCacheV2::LoadMozEntries() {
1032   // We already have the entries, return
1033   if (!IsEmpty() || IsPrimed()) {
1034     return NS_OK;
1035   }
1036 
1037   nsTArray<nsLiteralCString> entries;
1038 
1039   if (mTableName.EqualsLiteral("moztest-phish-simple")) {
1040     // Entries for phishing table
1041     entries.AppendElement("itisatrap.org/firefox/its-a-trap.html"_ns);
1042   } else if (mTableName.EqualsLiteral("moztest-malware-simple")) {
1043     // Entries for malware table
1044     entries.AppendElement("itisatrap.org/firefox/its-an-attack.html"_ns);
1045   } else if (mTableName.EqualsLiteral("moztest-unwanted-simple")) {
1046     // Entries for unwanted table
1047     entries.AppendElement("itisatrap.org/firefox/unwanted.html"_ns);
1048   } else if (mTableName.EqualsLiteral("moztest-harmful-simple")) {
1049     // Entries for harmfule tables
1050     entries.AppendElement("itisatrap.org/firefox/harmful.html"_ns);
1051   } else if (mTableName.EqualsLiteral("moztest-track-simple")) {
1052     // Entries for tracking table
1053     entries.AppendElement("trackertest.org/"_ns);
1054     entries.AppendElement("itisatracker.org/"_ns);
1055   } else if (mTableName.EqualsLiteral("moztest-trackwhite-simple")) {
1056     // Entries for tracking entitylist table
1057     entries.AppendElement("itisatrap.org/?resource=itisatracker.org"_ns);
1058   } else if (mTableName.EqualsLiteral("moztest-block-simple")) {
1059     // Entries for flash block table
1060     entries.AppendElement("itisatrap.org/firefox/blocked.html"_ns);
1061   } else {
1062     MOZ_ASSERT_UNREACHABLE();
1063   }
1064 
1065   AddPrefixArray prefix;
1066   AddCompleteArray completes;
1067   for (const auto& entry : entries) {
1068     AddComplete add;
1069     if (NS_FAILED(add.complete.FromPlaintext(entry))) {
1070       continue;
1071     }
1072     if (!completes.AppendElement(add, fallible)) {
1073       return NS_ERROR_OUT_OF_MEMORY;
1074     }
1075   }
1076 
1077   return Build(prefix, completes);
1078 }
1079 
1080 }  // namespace safebrowsing
1081 }  // namespace mozilla
1082