1 //* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #include "LookupCache.h"
7 #include "HashStore.h"
8 #include "nsISeekableStream.h"
9 #include "mozilla/Telemetry.h"
10 #include "mozilla/Logging.h"
11 #include "nsNetUtil.h"
12 #include "prprf.h"
13 #include "Classifier.h"
14 
15 // We act as the main entry point for all the real lookups,
16 // so note that those are not done to the actual HashStore.
17 // The latter solely exists to store the data needed to handle
18 // the updates from the protocol.
19 
20 // This module provides a front for PrefixSet, mUpdateCompletions,
21 // and mGetHashCache, which together contain everything needed to
22 // provide a classification as long as the data is up to date.
23 
24 // PrefixSet stores and provides lookups for 4-byte prefixes.
25 // mUpdateCompletions contains 32-byte completions which were
26 // contained in updates. They are retrieved from HashStore/.sbtore
27 // on startup.
28 // mGetHashCache contains 32-byte completions which were
29 // returned from the gethash server. They are not serialized,
30 // only cached until the next update.
31 
32 // Name of the persistent PrefixSet storage
33 #define PREFIXSET_SUFFIX  ".pset"
34 
35 // MOZ_LOG=UrlClassifierDbService:5
36 extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
37 #define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
38 #define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
39 
40 namespace mozilla {
41 namespace safebrowsing {
42 
43 const int LookupCacheV2::VER = 2;
44 
LookupCache(const nsACString & aTableName,const nsACString & aProvider,nsIFile * aRootStoreDir)45 LookupCache::LookupCache(const nsACString& aTableName,
46                          const nsACString& aProvider,
47                          nsIFile* aRootStoreDir)
48   : mPrimed(false)
49   , mTableName(aTableName)
50   , mProvider(aProvider)
51   , mRootStoreDirectory(aRootStoreDir)
52 {
53   UpdateRootDirHandle(mRootStoreDirectory);
54 }
55 
56 nsresult
Open()57 LookupCache::Open()
58 {
59   LOG(("Loading PrefixSet"));
60   nsresult rv = LoadPrefixSet();
61   NS_ENSURE_SUCCESS(rv, rv);
62 
63   return NS_OK;
64 }
65 
66 nsresult
UpdateRootDirHandle(nsIFile * aNewRootStoreDirectory)67 LookupCache::UpdateRootDirHandle(nsIFile* aNewRootStoreDirectory)
68 {
69   nsresult rv;
70 
71   if (aNewRootStoreDirectory != mRootStoreDirectory) {
72     rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
73     NS_ENSURE_SUCCESS(rv, rv);
74   }
75 
76   rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory,
77                                             mTableName,
78                                             mProvider,
79                                             getter_AddRefs(mStoreDirectory));
80 
81   if (NS_FAILED(rv)) {
82     LOG(("Failed to get private store directory for %s", mTableName.get()));
83     mStoreDirectory = mRootStoreDirectory;
84   }
85 
86   if (LOG_ENABLED()) {
87     nsString path;
88     mStoreDirectory->GetPath(path);
89     LOG(("Private store directory for %s is %s", mTableName.get(),
90                                                  NS_ConvertUTF16toUTF8(path).get()));
91   }
92 
93   return rv;
94 }
95 
96 nsresult
Reset()97 LookupCache::Reset()
98 {
99   LOG(("LookupCache resetting"));
100 
101   nsCOMPtr<nsIFile> prefixsetFile;
102   nsresult rv = mStoreDirectory->Clone(getter_AddRefs(prefixsetFile));
103   NS_ENSURE_SUCCESS(rv, rv);
104 
105   rv = prefixsetFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
106   NS_ENSURE_SUCCESS(rv, rv);
107 
108   rv = prefixsetFile->Remove(false);
109   NS_ENSURE_SUCCESS(rv, rv);
110 
111   ClearAll();
112 
113   return NS_OK;
114 }
115 
116 nsresult
AddCompletionsToCache(AddCompleteArray & aAddCompletes)117 LookupCache::AddCompletionsToCache(AddCompleteArray& aAddCompletes)
118 {
119   for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
120     if (mGetHashCache.BinaryIndexOf(aAddCompletes[i].CompleteHash()) == mGetHashCache.NoIndex) {
121       mGetHashCache.AppendElement(aAddCompletes[i].CompleteHash());
122     }
123   }
124   mGetHashCache.Sort();
125 
126   return NS_OK;
127 }
128 
129 #if defined(DEBUG)
130 void
DumpCache()131 LookupCache::DumpCache()
132 {
133   if (!LOG_ENABLED())
134     return;
135 
136   for (uint32_t i = 0; i < mGetHashCache.Length(); i++) {
137     nsAutoCString str;
138     mGetHashCache[i].ToHexString(str);
139     LOG(("Caches: %s", str.get()));
140   }
141 }
142 #endif
143 
144 nsresult
WriteFile()145 LookupCache::WriteFile()
146 {
147   if (nsUrlClassifierDBService::ShutdownHasStarted()) {
148     return NS_ERROR_ABORT;
149   }
150 
151   nsCOMPtr<nsIFile> psFile;
152   nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
153   NS_ENSURE_SUCCESS(rv, rv);
154 
155   rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
156   NS_ENSURE_SUCCESS(rv, rv);
157 
158   rv = StoreToFile(psFile);
159   NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "failed to store the prefixset");
160 
161   return NS_OK;
162 }
163 
164 void
ClearAll()165 LookupCache::ClearAll()
166 {
167   ClearCache();
168   ClearPrefixes();
169   mPrimed = false;
170 }
171 
172 void
ClearCache()173 LookupCache::ClearCache()
174 {
175   mGetHashCache.Clear();
176 }
177 
178 /* static */ bool
IsCanonicalizedIP(const nsACString & aHost)179 LookupCache::IsCanonicalizedIP(const nsACString& aHost)
180 {
181   // The canonicalization process will have left IP addresses in dotted
182   // decimal with no surprises.
183   uint32_t i1, i2, i3, i4;
184   char c;
185   if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c",
186                 &i1, &i2, &i3, &i4, &c) == 4) {
187     return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
188   }
189 
190   return false;
191 }
192 
193 /* static */ nsresult
GetLookupFragments(const nsACString & aSpec,nsTArray<nsCString> * aFragments)194 LookupCache::GetLookupFragments(const nsACString& aSpec,
195                                 nsTArray<nsCString>* aFragments)
196 
197 {
198   aFragments->Clear();
199 
200   nsACString::const_iterator begin, end, iter;
201   aSpec.BeginReading(begin);
202   aSpec.EndReading(end);
203 
204   iter = begin;
205   if (!FindCharInReadable('/', iter, end)) {
206     return NS_OK;
207   }
208 
209   const nsCSubstring& host = Substring(begin, iter++);
210   nsAutoCString path;
211   path.Assign(Substring(iter, end));
212 
213   /**
214    * From the protocol doc:
215    * For the hostname, the client will try at most 5 different strings.  They
216    * are:
217    * a) The exact hostname of the url
218    * b) The 4 hostnames formed by starting with the last 5 components and
219    *    successivly removing the leading component.  The top-level component
220    *    can be skipped. This is not done if the hostname is a numerical IP.
221    */
222   nsTArray<nsCString> hosts;
223   hosts.AppendElement(host);
224 
225   if (!IsCanonicalizedIP(host)) {
226     host.BeginReading(begin);
227     host.EndReading(end);
228     int numHostComponents = 0;
229     while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) &&
230            numHostComponents < MAX_HOST_COMPONENTS) {
231       // don't bother checking toplevel domains
232       if (++numHostComponents >= 2) {
233         host.EndReading(iter);
234         hosts.AppendElement(Substring(end, iter));
235       }
236       end = begin;
237       host.BeginReading(begin);
238     }
239   }
240 
241   /**
242    * From the protocol doc:
243    * For the path, the client will also try at most 6 different strings.
244    * They are:
245    * a) the exact path of the url, including query parameters
246    * b) the exact path of the url, without query parameters
247    * c) the 4 paths formed by starting at the root (/) and
248    *    successively appending path components, including a trailing
249    *    slash.  This behavior should only extend up to the next-to-last
250    *    path component, that is, a trailing slash should never be
251    *    appended that was not present in the original url.
252    */
253   nsTArray<nsCString> paths;
254   nsAutoCString pathToAdd;
255 
256   path.BeginReading(begin);
257   path.EndReading(end);
258   iter = begin;
259   if (FindCharInReadable('?', iter, end)) {
260     pathToAdd = Substring(begin, iter);
261     paths.AppendElement(pathToAdd);
262     end = iter;
263   }
264 
265   int numPathComponents = 1;
266   iter = begin;
267   while (FindCharInReadable('/', iter, end) &&
268          numPathComponents < MAX_PATH_COMPONENTS) {
269     iter++;
270     pathToAdd.Assign(Substring(begin, iter));
271     paths.AppendElement(pathToAdd);
272     numPathComponents++;
273   }
274 
275   // If we haven't already done so, add the full path
276   if (!pathToAdd.Equals(path)) {
277     paths.AppendElement(path);
278   }
279   // Check an empty path (for whole-domain blacklist entries)
280   paths.AppendElement(EmptyCString());
281 
282   for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
283     for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
284       nsCString key;
285       key.Assign(hosts[hostIndex]);
286       key.Append('/');
287       key.Append(paths[pathIndex]);
288       LOG(("Checking fragment %s", key.get()));
289 
290       aFragments->AppendElement(key);
291     }
292   }
293 
294   return NS_OK;
295 }
296 
297 /* static */ nsresult
GetHostKeys(const nsACString & aSpec,nsTArray<nsCString> * aHostKeys)298 LookupCache::GetHostKeys(const nsACString& aSpec,
299                          nsTArray<nsCString>* aHostKeys)
300 {
301   nsACString::const_iterator begin, end, iter;
302   aSpec.BeginReading(begin);
303   aSpec.EndReading(end);
304 
305   iter = begin;
306   if (!FindCharInReadable('/', iter, end)) {
307     return NS_OK;
308   }
309 
310   const nsCSubstring& host = Substring(begin, iter);
311 
312   if (IsCanonicalizedIP(host)) {
313     nsCString *key = aHostKeys->AppendElement();
314     if (!key)
315       return NS_ERROR_OUT_OF_MEMORY;
316 
317     key->Assign(host);
318     key->Append("/");
319     return NS_OK;
320   }
321 
322   nsTArray<nsCString> hostComponents;
323   ParseString(PromiseFlatCString(host), '.', hostComponents);
324 
325   if (hostComponents.Length() < 2) {
326     // no host or toplevel host, this won't match anything in the db
327     return NS_OK;
328   }
329 
330   // First check with two domain components
331   int32_t last = int32_t(hostComponents.Length()) - 1;
332   nsCString *lookupHost = aHostKeys->AppendElement();
333   if (!lookupHost)
334     return NS_ERROR_OUT_OF_MEMORY;
335 
336   lookupHost->Assign(hostComponents[last - 1]);
337   lookupHost->Append(".");
338   lookupHost->Append(hostComponents[last]);
339   lookupHost->Append("/");
340 
341   // Now check with three domain components
342   if (hostComponents.Length() > 2) {
343     nsCString *lookupHost2 = aHostKeys->AppendElement();
344     if (!lookupHost2)
345       return NS_ERROR_OUT_OF_MEMORY;
346     lookupHost2->Assign(hostComponents[last - 2]);
347     lookupHost2->Append(".");
348     lookupHost2->Append(*lookupHost);
349   }
350 
351   return NS_OK;
352 }
353 
354 nsresult
LoadPrefixSet()355 LookupCache::LoadPrefixSet()
356 {
357   nsCOMPtr<nsIFile> psFile;
358   nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
359   NS_ENSURE_SUCCESS(rv, rv);
360 
361   rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
362   NS_ENSURE_SUCCESS(rv, rv);
363 
364   bool exists;
365   rv = psFile->Exists(&exists);
366   NS_ENSURE_SUCCESS(rv, rv);
367 
368   if (exists) {
369     LOG(("stored PrefixSet exists, loading from disk"));
370     rv = LoadFromFile(psFile);
371     if (NS_FAILED(rv)) {
372       if (rv == NS_ERROR_FILE_CORRUPTED) {
373         Reset();
374       }
375       return rv;
376     }
377     mPrimed = true;
378   } else {
379     LOG(("no (usable) stored PrefixSet found"));
380   }
381 
382 #ifdef DEBUG
383   if (mPrimed) {
384     uint32_t size = SizeOfPrefixSet();
385     LOG(("SB tree done, size = %d bytes\n", size));
386   }
387 #endif
388 
389   return NS_OK;
390 }
391 
392 nsresult
Init()393 LookupCacheV2::Init()
394 {
395   mPrefixSet = new nsUrlClassifierPrefixSet();
396   nsresult rv = mPrefixSet->Init(mTableName);
397   NS_ENSURE_SUCCESS(rv, rv);
398 
399   return NS_OK;
400 }
401 
402 nsresult
Open()403 LookupCacheV2::Open()
404 {
405   nsresult rv = LookupCache::Open();
406   NS_ENSURE_SUCCESS(rv, rv);
407 
408   LOG(("Reading Completions"));
409   rv = ReadCompletions();
410   NS_ENSURE_SUCCESS(rv, rv);
411 
412   return NS_OK;
413 }
414 
415 void
ClearAll()416 LookupCacheV2::ClearAll()
417 {
418   LookupCache::ClearAll();
419   mUpdateCompletions.Clear();
420 }
421 
422 nsresult
Has(const Completion & aCompletion,bool * aHas,bool * aComplete)423 LookupCacheV2::Has(const Completion& aCompletion,
424                    bool* aHas, bool* aComplete)
425 {
426   *aHas = *aComplete = false;
427 
428   uint32_t prefix = aCompletion.ToUint32();
429 
430   bool found;
431   nsresult rv = mPrefixSet->Contains(prefix, &found);
432   NS_ENSURE_SUCCESS(rv, rv);
433 
434   LOG(("Probe in %s: %X, found %d", mTableName.get(), prefix, found));
435 
436   if (found) {
437     *aHas = true;
438   }
439 
440   if ((mGetHashCache.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex) ||
441       (mUpdateCompletions.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex)) {
442     LOG(("Complete in %s", mTableName.get()));
443     *aComplete = true;
444     *aHas = true;
445   }
446 
447   return NS_OK;
448 }
449 
450 nsresult
Build(AddPrefixArray & aAddPrefixes,AddCompleteArray & aAddCompletes)451 LookupCacheV2::Build(AddPrefixArray& aAddPrefixes,
452                      AddCompleteArray& aAddCompletes)
453 {
454   Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_COMPLETIONS,
455                         static_cast<uint32_t>(aAddCompletes.Length()));
456 
457   mUpdateCompletions.Clear();
458   mUpdateCompletions.SetCapacity(aAddCompletes.Length());
459   for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
460     mUpdateCompletions.AppendElement(aAddCompletes[i].CompleteHash());
461   }
462   aAddCompletes.Clear();
463   mUpdateCompletions.Sort();
464 
465   Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_PREFIXES,
466                         static_cast<uint32_t>(aAddPrefixes.Length()));
467 
468   nsresult rv = ConstructPrefixSet(aAddPrefixes);
469   NS_ENSURE_SUCCESS(rv, rv);
470   mPrimed = true;
471 
472   return NS_OK;
473 }
474 
475 nsresult
GetPrefixes(FallibleTArray<uint32_t> & aAddPrefixes)476 LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes)
477 {
478   if (!mPrimed) {
479     // This can happen if its a new table, so no error.
480     LOG(("GetPrefixes from empty LookupCache"));
481     return NS_OK;
482   }
483   return mPrefixSet->GetPrefixesNative(aAddPrefixes);
484 }
485 
486 nsresult
ReadCompletions()487 LookupCacheV2::ReadCompletions()
488 {
489   HashStore store(mTableName, mProvider, mRootStoreDirectory);
490 
491   nsresult rv = store.Open();
492   NS_ENSURE_SUCCESS(rv, rv);
493 
494   mUpdateCompletions.Clear();
495 
496   const AddCompleteArray& addComplete = store.AddCompletes();
497   for (uint32_t i = 0; i < addComplete.Length(); i++) {
498     mUpdateCompletions.AppendElement(addComplete[i].complete);
499   }
500 
501   return NS_OK;
502 }
503 
504 nsresult
ClearPrefixes()505 LookupCacheV2::ClearPrefixes()
506 {
507   return mPrefixSet->SetPrefixes(nullptr, 0);
508 }
509 
510 nsresult
StoreToFile(nsIFile * aFile)511 LookupCacheV2::StoreToFile(nsIFile* aFile)
512 {
513   return mPrefixSet->StoreToFile(aFile);
514 }
515 
516 nsresult
LoadFromFile(nsIFile * aFile)517 LookupCacheV2::LoadFromFile(nsIFile* aFile)
518 {
519   return mPrefixSet->LoadFromFile(aFile);
520 }
521 
522 size_t
SizeOfPrefixSet()523 LookupCacheV2::SizeOfPrefixSet()
524 {
525   return mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
526 }
527 
528 #ifdef DEBUG
529 template <class T>
EnsureSorted(T * aArray)530 static void EnsureSorted(T* aArray)
531 {
532   typename T::elem_type* start = aArray->Elements();
533   typename T::elem_type* end = aArray->Elements() + aArray->Length();
534   typename T::elem_type* iter = start;
535   typename T::elem_type* previous = start;
536 
537   while (iter != end) {
538     previous = iter;
539     ++iter;
540     if (iter != end) {
541       MOZ_ASSERT(*previous <= *iter);
542     }
543   }
544   return;
545 }
546 #endif
547 
548 nsresult
ConstructPrefixSet(AddPrefixArray & aAddPrefixes)549 LookupCacheV2::ConstructPrefixSet(AddPrefixArray& aAddPrefixes)
550 {
551   Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_CONSTRUCT_TIME> timer;
552 
553   nsTArray<uint32_t> array;
554   if (!array.SetCapacity(aAddPrefixes.Length(), fallible)) {
555     return NS_ERROR_OUT_OF_MEMORY;
556   }
557 
558   for (uint32_t i = 0; i < aAddPrefixes.Length(); i++) {
559     array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32());
560   }
561   aAddPrefixes.Clear();
562 
563 #ifdef DEBUG
564   // PrefixSet requires sorted order
565   EnsureSorted(&array);
566 #endif
567 
568   // construct new one, replace old entries
569   nsresult rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length());
570   NS_ENSURE_SUCCESS(rv, rv);
571 
572 #ifdef DEBUG
573   uint32_t size;
574   size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
575   LOG(("SB tree done, size = %d bytes\n", size));
576 #endif
577 
578   mPrimed = true;
579 
580   return NS_OK;
581 }
582 
583 #if defined(DEBUG)
584 void
DumpCompletions()585 LookupCacheV2::DumpCompletions()
586 {
587   if (!LOG_ENABLED())
588     return;
589 
590   for (uint32_t i = 0; i < mUpdateCompletions.Length(); i++) {
591     nsAutoCString str;
592     mUpdateCompletions[i].ToHexString(str);
593     LOG(("Update: %s", str.get()));
594   }
595 }
596 #endif
597 
598 } // namespace safebrowsing
599 } // namespace mozilla
600