1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "nsUrlClassifierPrefixSet.h"
8 #include "nsIUrlClassifierPrefixSet.h"
9 #include "nsCOMPtr.h"
10 #include "nsDebug.h"
11 #include "nsPrintfCString.h"
12 #include "nsTArray.h"
13 #include "nsString.h"
14 #include "nsIFile.h"
15 #include "nsToolkitCompsCID.h"
16 #include "nsTArray.h"
17 #include "nsThreadUtils.h"
18 #include "nsNetUtil.h"
19 #include "nsISeekableStream.h"
20 #include "nsIBufferedStreams.h"
21 #include "nsIFileStreams.h"
22 #include "mozilla/MemoryReporting.h"
23 #include "mozilla/Telemetry.h"
24 #include "mozilla/FileUtils.h"
25 #include "mozilla/Logging.h"
26 #include "mozilla/Unused.h"
27 #include <algorithm>
28 
29 using namespace mozilla;
30 
31 // MOZ_LOG=UrlClassifierPrefixSet:5
32 static LazyLogModule gUrlClassifierPrefixSetLog("UrlClassifierPrefixSet");
33 #define LOG(args) MOZ_LOG(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug, args)
34 #define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug)
35 
36 NS_IMPL_ISUPPORTS(
37   nsUrlClassifierPrefixSet, nsIUrlClassifierPrefixSet, nsIMemoryReporter)
38 
39 // Definition required due to std::max<>()
40 const uint32_t nsUrlClassifierPrefixSet::MAX_BUFFER_SIZE;
41 
nsUrlClassifierPrefixSet()42 nsUrlClassifierPrefixSet::nsUrlClassifierPrefixSet()
43   : mLock("nsUrlClassifierPrefixSet.mLock")
44   , mTotalPrefixes(0)
45   , mMemoryReportPath()
46 {
47 }
48 
49 NS_IMETHODIMP
Init(const nsACString & aName)50 nsUrlClassifierPrefixSet::Init(const nsACString& aName)
51 {
52   mMemoryReportPath =
53     nsPrintfCString(
54       "explicit/storage/prefix-set/%s",
55       (!aName.IsEmpty() ? PromiseFlatCString(aName).get() : "?!")
56     );
57 
58   RegisterWeakMemoryReporter(this);
59 
60   return NS_OK;
61 }
62 
~nsUrlClassifierPrefixSet()63 nsUrlClassifierPrefixSet::~nsUrlClassifierPrefixSet()
64 {
65   UnregisterWeakMemoryReporter(this);
66 }
67 
68 NS_IMETHODIMP
SetPrefixes(const uint32_t * aArray,uint32_t aLength)69 nsUrlClassifierPrefixSet::SetPrefixes(const uint32_t* aArray, uint32_t aLength)
70 {
71   MutexAutoLock lock(mLock);
72 
73   nsresult rv = NS_OK;
74 
75   if (aLength <= 0) {
76     if (mIndexPrefixes.Length() > 0) {
77       LOG(("Clearing PrefixSet"));
78       mIndexDeltas.Clear();
79       mIndexPrefixes.Clear();
80       mTotalPrefixes = 0;
81     }
82   } else {
83     rv = MakePrefixSet(aArray, aLength);
84   }
85 
86   return rv;
87 }
88 
89 nsresult
MakePrefixSet(const uint32_t * aPrefixes,uint32_t aLength)90 nsUrlClassifierPrefixSet::MakePrefixSet(const uint32_t* aPrefixes, uint32_t aLength)
91 {
92   mLock.AssertCurrentThreadOwns();
93 
94   if (aLength == 0) {
95     return NS_OK;
96   }
97 
98 #ifdef DEBUG
99   for (uint32_t i = 1; i < aLength; i++) {
100     MOZ_ASSERT(aPrefixes[i] >= aPrefixes[i-1]);
101   }
102 #endif
103 
104   mIndexPrefixes.Clear();
105   mIndexDeltas.Clear();
106   mTotalPrefixes = aLength;
107 
108   mIndexPrefixes.AppendElement(aPrefixes[0]);
109   mIndexDeltas.AppendElement();
110 
111   uint32_t numOfDeltas = 0;
112   uint32_t totalDeltas = 0;
113   uint32_t previousItem = aPrefixes[0];
114   for (uint32_t i = 1; i < aLength; i++) {
115     if ((numOfDeltas >= DELTAS_LIMIT) ||
116           (aPrefixes[i] - previousItem >= MAX_INDEX_DIFF)) {
117       // Compact the previous element.
118       // Note there is always at least one element when we get here,
119       // because we created the first element before the loop.
120       mIndexDeltas.LastElement().Compact();
121       mIndexDeltas.AppendElement();
122       mIndexPrefixes.AppendElement(aPrefixes[i]);
123       numOfDeltas = 0;
124     } else {
125       uint16_t delta = aPrefixes[i] - previousItem;
126       mIndexDeltas.LastElement().AppendElement(delta);
127       numOfDeltas++;
128       totalDeltas++;
129     }
130     previousItem = aPrefixes[i];
131   }
132 
133   mIndexDeltas.LastElement().Compact();
134   mIndexDeltas.Compact();
135   mIndexPrefixes.Compact();
136 
137   LOG(("Total number of indices: %d", aLength));
138   LOG(("Total number of deltas: %d", totalDeltas));
139   LOG(("Total number of delta chunks: %d", mIndexDeltas.Length()));
140 
141   return NS_OK;
142 }
143 
144 nsresult
GetPrefixesNative(FallibleTArray<uint32_t> & outArray)145 nsUrlClassifierPrefixSet::GetPrefixesNative(FallibleTArray<uint32_t>& outArray)
146 {
147   MutexAutoLock lock(mLock);
148 
149   if (!outArray.SetLength(mTotalPrefixes, fallible)) {
150     return NS_ERROR_OUT_OF_MEMORY;
151   }
152 
153   uint32_t prefixIdxLength = mIndexPrefixes.Length();
154   uint32_t prefixCnt = 0;
155 
156   for (uint32_t i = 0; i < prefixIdxLength; i++) {
157     uint32_t prefix = mIndexPrefixes[i];
158 
159     outArray[prefixCnt++] = prefix;
160     for (uint32_t j = 0; j < mIndexDeltas[i].Length(); j++) {
161       prefix += mIndexDeltas[i][j];
162       outArray[prefixCnt++] = prefix;
163     }
164   }
165 
166   NS_ASSERTION(mTotalPrefixes == prefixCnt, "Lengths are inconsistent");
167   return NS_OK;
168 }
169 
170 NS_IMETHODIMP
GetPrefixes(uint32_t * aCount,uint32_t ** aPrefixes)171 nsUrlClassifierPrefixSet::GetPrefixes(uint32_t* aCount,
172                                       uint32_t** aPrefixes)
173 {
174   // No need to get mLock here because this function does not directly touch
175   // the class's data members. (GetPrefixesNative() will get mLock, however.)
176 
177   NS_ENSURE_ARG_POINTER(aCount);
178   *aCount = 0;
179   NS_ENSURE_ARG_POINTER(aPrefixes);
180   *aPrefixes = nullptr;
181 
182   FallibleTArray<uint32_t> prefixes;
183   nsresult rv = GetPrefixesNative(prefixes);
184   if (NS_FAILED(rv)) {
185     return rv;
186   }
187 
188   uint64_t itemCount = prefixes.Length();
189   uint32_t* prefixArray = static_cast<uint32_t*>(moz_xmalloc(itemCount * sizeof(uint32_t)));
190   NS_ENSURE_TRUE(prefixArray, NS_ERROR_OUT_OF_MEMORY);
191 
192   memcpy(prefixArray, prefixes.Elements(), sizeof(uint32_t) * itemCount);
193 
194   *aCount = itemCount;
195   *aPrefixes = prefixArray;
196 
197   return NS_OK;
198 }
199 
BinSearch(uint32_t start,uint32_t end,uint32_t target)200 uint32_t nsUrlClassifierPrefixSet::BinSearch(uint32_t start,
201                                              uint32_t end,
202                                              uint32_t target)
203 {
204   mLock.AssertCurrentThreadOwns();
205 
206   while (start != end && end >= start) {
207     uint32_t i = start + ((end - start) >> 1);
208     uint32_t value = mIndexPrefixes[i];
209     if (value < target) {
210       start = i + 1;
211     } else if (value > target) {
212       end = i - 1;
213     } else {
214       return i;
215     }
216   }
217   return end;
218 }
219 
220 NS_IMETHODIMP
Contains(uint32_t aPrefix,bool * aFound)221 nsUrlClassifierPrefixSet::Contains(uint32_t aPrefix, bool* aFound)
222 {
223   MutexAutoLock lock(mLock);
224 
225   *aFound = false;
226 
227   if (mIndexPrefixes.Length() == 0) {
228     return NS_OK;
229   }
230 
231   uint32_t target = aPrefix;
232 
233   // We want to do a "Price is Right" binary search, that is, we want to find
234   // the index of the value either equal to the target or the closest value
235   // that is less than the target.
236   //
237   if (target < mIndexPrefixes[0]) {
238     return NS_OK;
239   }
240 
241   // |binsearch| does not necessarily return the correct index (when the
242   // target is not found) but rather it returns an index at least one away
243   // from the correct index.
244   // Because of this, we need to check if the target lies before the beginning
245   // of the indices.
246 
247   uint32_t i = BinSearch(0, mIndexPrefixes.Length() - 1, target);
248   if (mIndexPrefixes[i] > target && i > 0) {
249     i--;
250   }
251 
252   // Now search through the deltas for the target.
253   uint32_t diff = target - mIndexPrefixes[i];
254   uint32_t deltaSize  = mIndexDeltas[i].Length();
255   uint32_t deltaIndex = 0;
256 
257   while (diff > 0 && deltaIndex < deltaSize) {
258     diff -= mIndexDeltas[i][deltaIndex];
259     deltaIndex++;
260   }
261 
262   if (diff == 0) {
263     *aFound = true;
264   }
265 
266   return NS_OK;
267 }
268 
MOZ_DEFINE_MALLOC_SIZE_OF(UrlClassifierMallocSizeOf)269 MOZ_DEFINE_MALLOC_SIZE_OF(UrlClassifierMallocSizeOf)
270 
271 NS_IMETHODIMP
272 nsUrlClassifierPrefixSet::CollectReports(nsIHandleReportCallback* aHandleReport,
273                                          nsISupports* aData, bool aAnonymize)
274 {
275   MOZ_ASSERT(NS_IsMainThread());
276 
277   // No need to get mLock here because this function does not directly touch
278   // the class's data members. (SizeOfIncludingThis() will get mLock, however.)
279 
280   aHandleReport->Callback(
281     EmptyCString(), mMemoryReportPath, KIND_HEAP, UNITS_BYTES,
282     SizeOfIncludingThis(UrlClassifierMallocSizeOf),
283     NS_LITERAL_CSTRING("Memory used by the prefix set for a URL classifier."),
284     aData);
285 
286   return NS_OK;
287 }
288 
289 size_t
SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)290 nsUrlClassifierPrefixSet::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
291 {
292   MutexAutoLock lock(mLock);
293 
294   size_t n = 0;
295   n += aMallocSizeOf(this);
296   n += mIndexDeltas.ShallowSizeOfExcludingThis(aMallocSizeOf);
297   for (uint32_t i = 0; i < mIndexDeltas.Length(); i++) {
298     n += mIndexDeltas[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
299   }
300   n += mIndexPrefixes.ShallowSizeOfExcludingThis(aMallocSizeOf);
301   return n;
302 }
303 
304 NS_IMETHODIMP
IsEmpty(bool * aEmpty)305 nsUrlClassifierPrefixSet::IsEmpty(bool * aEmpty)
306 {
307   MutexAutoLock lock(mLock);
308 
309   *aEmpty = (mIndexPrefixes.Length() == 0);
310   return NS_OK;
311 }
312 
313 NS_IMETHODIMP
LoadFromFile(nsIFile * aFile)314 nsUrlClassifierPrefixSet::LoadFromFile(nsIFile* aFile)
315 {
316   MutexAutoLock lock(mLock);
317 
318   Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_FILELOAD_TIME> timer;
319 
320   nsCOMPtr<nsIInputStream> localInFile;
321   nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile,
322                                            PR_RDONLY | nsIFile::OS_READAHEAD);
323   NS_ENSURE_SUCCESS(rv, rv);
324 
325   // Calculate how big the file is, make sure our read buffer isn't bigger
326   // than the file itself which is just wasting memory.
327   int64_t fileSize;
328   rv = aFile->GetFileSize(&fileSize);
329   NS_ENSURE_SUCCESS(rv, rv);
330 
331   if (fileSize < 0 || fileSize > UINT32_MAX) {
332     return NS_ERROR_FAILURE;
333   }
334 
335   uint32_t bufferSize = std::min<uint32_t>(static_cast<uint32_t>(fileSize),
336                                            MAX_BUFFER_SIZE);
337 
338   // Convert to buffered stream
339   nsCOMPtr<nsIInputStream> in = NS_BufferInputStream(localInFile, bufferSize);
340 
341   rv = LoadPrefixes(in);
342   NS_ENSURE_SUCCESS(rv, rv);
343 
344   return NS_OK;
345 }
346 
347 NS_IMETHODIMP
StoreToFile(nsIFile * aFile)348 nsUrlClassifierPrefixSet::StoreToFile(nsIFile* aFile)
349 {
350   MutexAutoLock lock(mLock);
351 
352   nsCOMPtr<nsIOutputStream> localOutFile;
353   nsresult rv = NS_NewLocalFileOutputStream(getter_AddRefs(localOutFile), aFile,
354                                             PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
355   NS_ENSURE_SUCCESS(rv, rv);
356 
357   uint32_t fileSize;
358 
359   // Preallocate the file storage
360   {
361     nsCOMPtr<nsIFileOutputStream> fos(do_QueryInterface(localOutFile));
362     Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_FALLOCATE_TIME> timer;
363 
364     fileSize = CalculatePreallocateSize();
365 
366     // Ignore failure, the preallocation is a hint and we write out the entire
367     // file later on
368     Unused << fos->Preallocate(fileSize);
369   }
370 
371   // Convert to buffered stream
372   nsCOMPtr<nsIOutputStream> out =
373     NS_BufferOutputStream(localOutFile, std::min(fileSize, MAX_BUFFER_SIZE));
374 
375   rv = WritePrefixes(out);
376   NS_ENSURE_SUCCESS(rv, rv);
377 
378   LOG(("Saving PrefixSet successful\n"));
379 
380   return NS_OK;
381 }
382 
383 nsresult
LoadPrefixes(nsIInputStream * in)384 nsUrlClassifierPrefixSet::LoadPrefixes(nsIInputStream* in)
385 {
386   uint32_t magic;
387   uint32_t read;
388 
389   nsresult rv = in->Read(reinterpret_cast<char*>(&magic), sizeof(uint32_t), &read);
390   NS_ENSURE_SUCCESS(rv, rv);
391   NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
392 
393   if (magic == PREFIXSET_VERSION_MAGIC) {
394     uint32_t indexSize;
395     uint32_t deltaSize;
396 
397     rv = in->Read(reinterpret_cast<char*>(&indexSize), sizeof(uint32_t), &read);
398     NS_ENSURE_SUCCESS(rv, rv);
399     NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
400 
401     rv = in->Read(reinterpret_cast<char*>(&deltaSize), sizeof(uint32_t), &read);
402     NS_ENSURE_SUCCESS(rv, rv);
403     NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
404 
405     if (indexSize == 0) {
406       LOG(("stored PrefixSet is empty!"));
407       return NS_OK;
408     }
409 
410     if (deltaSize > (indexSize * DELTAS_LIMIT)) {
411       return NS_ERROR_FILE_CORRUPTED;
412     }
413 
414     nsTArray<uint32_t> indexStarts;
415     indexStarts.SetLength(indexSize);
416     mIndexPrefixes.SetLength(indexSize);
417     mIndexDeltas.SetLength(indexSize);
418 
419     mTotalPrefixes = indexSize;
420 
421     uint32_t toRead = indexSize*sizeof(uint32_t);
422     rv = in->Read(reinterpret_cast<char*>(mIndexPrefixes.Elements()), toRead, &read);
423     NS_ENSURE_SUCCESS(rv, rv);
424     NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
425 
426     rv = in->Read(reinterpret_cast<char*>(indexStarts.Elements()), toRead, &read);
427     NS_ENSURE_SUCCESS(rv, rv);
428     NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
429 
430     if (indexSize != 0 && indexStarts[0] != 0) {
431       return NS_ERROR_FILE_CORRUPTED;
432     }
433     for (uint32_t i = 0; i < indexSize; i++) {
434       uint32_t numInDelta = i == indexSize - 1 ? deltaSize - indexStarts[i]
435                                : indexStarts[i + 1] - indexStarts[i];
436       if (numInDelta > DELTAS_LIMIT) {
437         return NS_ERROR_FILE_CORRUPTED;
438       }
439       if (numInDelta > 0) {
440         mIndexDeltas[i].SetLength(numInDelta);
441         mTotalPrefixes += numInDelta;
442         toRead = numInDelta * sizeof(uint16_t);
443         rv = in->Read(reinterpret_cast<char*>(mIndexDeltas[i].Elements()), toRead, &read);
444         NS_ENSURE_SUCCESS(rv, rv);
445         NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
446       }
447     }
448   } else {
449     LOG(("Version magic mismatch, not loading"));
450     return NS_ERROR_FILE_CORRUPTED;
451   }
452 
453   MOZ_ASSERT(mIndexPrefixes.Length() == mIndexDeltas.Length());
454   LOG(("Loading PrefixSet successful"));
455 
456   return NS_OK;
457 }
458 
459 uint32_t
CalculatePreallocateSize()460 nsUrlClassifierPrefixSet::CalculatePreallocateSize()
461 {
462   uint32_t fileSize = 4 * sizeof(uint32_t);
463   uint32_t deltas = mTotalPrefixes - mIndexPrefixes.Length();
464   fileSize += 2 * mIndexPrefixes.Length() * sizeof(uint32_t);
465   fileSize += deltas * sizeof(uint16_t);
466   return fileSize;
467 }
468 
469 nsresult
WritePrefixes(nsIOutputStream * out)470 nsUrlClassifierPrefixSet::WritePrefixes(nsIOutputStream* out)
471 {
472   uint32_t written;
473   uint32_t writelen = sizeof(uint32_t);
474   uint32_t magic = PREFIXSET_VERSION_MAGIC;
475   nsresult rv = out->Write(reinterpret_cast<char*>(&magic), writelen, &written);
476   NS_ENSURE_SUCCESS(rv, rv);
477   NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
478 
479   uint32_t indexSize = mIndexPrefixes.Length();
480   uint32_t indexDeltaSize = mIndexDeltas.Length();
481   uint32_t totalDeltas = 0;
482 
483   // Store the shape of mIndexDeltas by noting at which "count" of total
484   // indexes a new subarray starts. This is slightly cumbersome but keeps
485   // file format compatibility.
486   // If we ever update the format, we can gain space by storing the delta
487   // subarray sizes, which fit in bytes.
488   nsTArray<uint32_t> indexStarts;
489   indexStarts.AppendElement(0);
490 
491   for (uint32_t i = 0; i < indexDeltaSize; i++) {
492     uint32_t deltaLength = mIndexDeltas[i].Length();
493     totalDeltas += deltaLength;
494     indexStarts.AppendElement(totalDeltas);
495   }
496 
497   rv = out->Write(reinterpret_cast<char*>(&indexSize), writelen, &written);
498   NS_ENSURE_SUCCESS(rv, rv);
499   NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
500 
501   rv = out->Write(reinterpret_cast<char*>(&totalDeltas), writelen, &written);
502   NS_ENSURE_SUCCESS(rv, rv);
503   NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
504 
505   writelen = indexSize * sizeof(uint32_t);
506   rv = out->Write(reinterpret_cast<char*>(mIndexPrefixes.Elements()), writelen, &written);
507   NS_ENSURE_SUCCESS(rv, rv);
508   NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
509 
510   rv = out->Write(reinterpret_cast<char*>(indexStarts.Elements()), writelen, &written);
511   NS_ENSURE_SUCCESS(rv, rv);
512   NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
513 
514   if (totalDeltas > 0) {
515     for (uint32_t i = 0; i < indexDeltaSize; i++) {
516       writelen = mIndexDeltas[i].Length() * sizeof(uint16_t);
517       rv = out->Write(reinterpret_cast<char*>(mIndexDeltas[i].Elements()), writelen, &written);
518       NS_ENSURE_SUCCESS(rv, rv);
519       NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
520     }
521   }
522 
523   LOG(("Saving PrefixSet successful\n"));
524 
525   return NS_OK;
526 }
527