1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "nsUrlClassifierPrefixSet.h"
8 #include "nsIUrlClassifierPrefixSet.h"
9 #include "nsCOMPtr.h"
10 #include "nsDebug.h"
11 #include "nsPrintfCString.h"
12 #include "nsTArray.h"
13 #include "nsString.h"
14 #include "nsIFile.h"
15 #include "nsToolkitCompsCID.h"
16 #include "nsTArray.h"
17 #include "nsThreadUtils.h"
18 #include "nsNetUtil.h"
19 #include "nsISeekableStream.h"
20 #include "nsIBufferedStreams.h"
21 #include "nsIFileStreams.h"
22 #include "mozilla/MemoryReporting.h"
23 #include "mozilla/Telemetry.h"
24 #include "mozilla/FileUtils.h"
25 #include "mozilla/Logging.h"
26 #include "mozilla/Unused.h"
27 #include <algorithm>
28
29 using namespace mozilla;
30
31 // MOZ_LOG=UrlClassifierPrefixSet:5
32 static LazyLogModule gUrlClassifierPrefixSetLog("UrlClassifierPrefixSet");
33 #define LOG(args) MOZ_LOG(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug, args)
34 #define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug)
35
36 NS_IMPL_ISUPPORTS(
37 nsUrlClassifierPrefixSet, nsIUrlClassifierPrefixSet, nsIMemoryReporter)
38
39 // Definition required due to std::max<>()
40 const uint32_t nsUrlClassifierPrefixSet::MAX_BUFFER_SIZE;
41
nsUrlClassifierPrefixSet()42 nsUrlClassifierPrefixSet::nsUrlClassifierPrefixSet()
43 : mLock("nsUrlClassifierPrefixSet.mLock")
44 , mTotalPrefixes(0)
45 , mMemoryReportPath()
46 {
47 }
48
49 NS_IMETHODIMP
Init(const nsACString & aName)50 nsUrlClassifierPrefixSet::Init(const nsACString& aName)
51 {
52 mMemoryReportPath =
53 nsPrintfCString(
54 "explicit/storage/prefix-set/%s",
55 (!aName.IsEmpty() ? PromiseFlatCString(aName).get() : "?!")
56 );
57
58 RegisterWeakMemoryReporter(this);
59
60 return NS_OK;
61 }
62
~nsUrlClassifierPrefixSet()63 nsUrlClassifierPrefixSet::~nsUrlClassifierPrefixSet()
64 {
65 UnregisterWeakMemoryReporter(this);
66 }
67
68 NS_IMETHODIMP
SetPrefixes(const uint32_t * aArray,uint32_t aLength)69 nsUrlClassifierPrefixSet::SetPrefixes(const uint32_t* aArray, uint32_t aLength)
70 {
71 MutexAutoLock lock(mLock);
72
73 nsresult rv = NS_OK;
74
75 if (aLength <= 0) {
76 if (mIndexPrefixes.Length() > 0) {
77 LOG(("Clearing PrefixSet"));
78 mIndexDeltas.Clear();
79 mIndexPrefixes.Clear();
80 mTotalPrefixes = 0;
81 }
82 } else {
83 rv = MakePrefixSet(aArray, aLength);
84 }
85
86 return rv;
87 }
88
89 nsresult
MakePrefixSet(const uint32_t * aPrefixes,uint32_t aLength)90 nsUrlClassifierPrefixSet::MakePrefixSet(const uint32_t* aPrefixes, uint32_t aLength)
91 {
92 mLock.AssertCurrentThreadOwns();
93
94 if (aLength == 0) {
95 return NS_OK;
96 }
97
98 #ifdef DEBUG
99 for (uint32_t i = 1; i < aLength; i++) {
100 MOZ_ASSERT(aPrefixes[i] >= aPrefixes[i-1]);
101 }
102 #endif
103
104 mIndexPrefixes.Clear();
105 mIndexDeltas.Clear();
106 mTotalPrefixes = aLength;
107
108 mIndexPrefixes.AppendElement(aPrefixes[0]);
109 mIndexDeltas.AppendElement();
110
111 uint32_t numOfDeltas = 0;
112 uint32_t totalDeltas = 0;
113 uint32_t previousItem = aPrefixes[0];
114 for (uint32_t i = 1; i < aLength; i++) {
115 if ((numOfDeltas >= DELTAS_LIMIT) ||
116 (aPrefixes[i] - previousItem >= MAX_INDEX_DIFF)) {
117 // Compact the previous element.
118 // Note there is always at least one element when we get here,
119 // because we created the first element before the loop.
120 mIndexDeltas.LastElement().Compact();
121 mIndexDeltas.AppendElement();
122 mIndexPrefixes.AppendElement(aPrefixes[i]);
123 numOfDeltas = 0;
124 } else {
125 uint16_t delta = aPrefixes[i] - previousItem;
126 mIndexDeltas.LastElement().AppendElement(delta);
127 numOfDeltas++;
128 totalDeltas++;
129 }
130 previousItem = aPrefixes[i];
131 }
132
133 mIndexDeltas.LastElement().Compact();
134 mIndexDeltas.Compact();
135 mIndexPrefixes.Compact();
136
137 LOG(("Total number of indices: %d", aLength));
138 LOG(("Total number of deltas: %d", totalDeltas));
139 LOG(("Total number of delta chunks: %d", mIndexDeltas.Length()));
140
141 return NS_OK;
142 }
143
144 nsresult
GetPrefixesNative(FallibleTArray<uint32_t> & outArray)145 nsUrlClassifierPrefixSet::GetPrefixesNative(FallibleTArray<uint32_t>& outArray)
146 {
147 MutexAutoLock lock(mLock);
148
149 if (!outArray.SetLength(mTotalPrefixes, fallible)) {
150 return NS_ERROR_OUT_OF_MEMORY;
151 }
152
153 uint32_t prefixIdxLength = mIndexPrefixes.Length();
154 uint32_t prefixCnt = 0;
155
156 for (uint32_t i = 0; i < prefixIdxLength; i++) {
157 uint32_t prefix = mIndexPrefixes[i];
158
159 outArray[prefixCnt++] = prefix;
160 for (uint32_t j = 0; j < mIndexDeltas[i].Length(); j++) {
161 prefix += mIndexDeltas[i][j];
162 outArray[prefixCnt++] = prefix;
163 }
164 }
165
166 NS_ASSERTION(mTotalPrefixes == prefixCnt, "Lengths are inconsistent");
167 return NS_OK;
168 }
169
170 NS_IMETHODIMP
GetPrefixes(uint32_t * aCount,uint32_t ** aPrefixes)171 nsUrlClassifierPrefixSet::GetPrefixes(uint32_t* aCount,
172 uint32_t** aPrefixes)
173 {
174 // No need to get mLock here because this function does not directly touch
175 // the class's data members. (GetPrefixesNative() will get mLock, however.)
176
177 NS_ENSURE_ARG_POINTER(aCount);
178 *aCount = 0;
179 NS_ENSURE_ARG_POINTER(aPrefixes);
180 *aPrefixes = nullptr;
181
182 FallibleTArray<uint32_t> prefixes;
183 nsresult rv = GetPrefixesNative(prefixes);
184 if (NS_FAILED(rv)) {
185 return rv;
186 }
187
188 uint64_t itemCount = prefixes.Length();
189 uint32_t* prefixArray = static_cast<uint32_t*>(moz_xmalloc(itemCount * sizeof(uint32_t)));
190 NS_ENSURE_TRUE(prefixArray, NS_ERROR_OUT_OF_MEMORY);
191
192 memcpy(prefixArray, prefixes.Elements(), sizeof(uint32_t) * itemCount);
193
194 *aCount = itemCount;
195 *aPrefixes = prefixArray;
196
197 return NS_OK;
198 }
199
BinSearch(uint32_t start,uint32_t end,uint32_t target)200 uint32_t nsUrlClassifierPrefixSet::BinSearch(uint32_t start,
201 uint32_t end,
202 uint32_t target)
203 {
204 mLock.AssertCurrentThreadOwns();
205
206 while (start != end && end >= start) {
207 uint32_t i = start + ((end - start) >> 1);
208 uint32_t value = mIndexPrefixes[i];
209 if (value < target) {
210 start = i + 1;
211 } else if (value > target) {
212 end = i - 1;
213 } else {
214 return i;
215 }
216 }
217 return end;
218 }
219
220 NS_IMETHODIMP
Contains(uint32_t aPrefix,bool * aFound)221 nsUrlClassifierPrefixSet::Contains(uint32_t aPrefix, bool* aFound)
222 {
223 MutexAutoLock lock(mLock);
224
225 *aFound = false;
226
227 if (mIndexPrefixes.Length() == 0) {
228 return NS_OK;
229 }
230
231 uint32_t target = aPrefix;
232
233 // We want to do a "Price is Right" binary search, that is, we want to find
234 // the index of the value either equal to the target or the closest value
235 // that is less than the target.
236 //
237 if (target < mIndexPrefixes[0]) {
238 return NS_OK;
239 }
240
241 // |binsearch| does not necessarily return the correct index (when the
242 // target is not found) but rather it returns an index at least one away
243 // from the correct index.
244 // Because of this, we need to check if the target lies before the beginning
245 // of the indices.
246
247 uint32_t i = BinSearch(0, mIndexPrefixes.Length() - 1, target);
248 if (mIndexPrefixes[i] > target && i > 0) {
249 i--;
250 }
251
252 // Now search through the deltas for the target.
253 uint32_t diff = target - mIndexPrefixes[i];
254 uint32_t deltaSize = mIndexDeltas[i].Length();
255 uint32_t deltaIndex = 0;
256
257 while (diff > 0 && deltaIndex < deltaSize) {
258 diff -= mIndexDeltas[i][deltaIndex];
259 deltaIndex++;
260 }
261
262 if (diff == 0) {
263 *aFound = true;
264 }
265
266 return NS_OK;
267 }
268
MOZ_DEFINE_MALLOC_SIZE_OF(UrlClassifierMallocSizeOf)269 MOZ_DEFINE_MALLOC_SIZE_OF(UrlClassifierMallocSizeOf)
270
271 NS_IMETHODIMP
272 nsUrlClassifierPrefixSet::CollectReports(nsIHandleReportCallback* aHandleReport,
273 nsISupports* aData, bool aAnonymize)
274 {
275 MOZ_ASSERT(NS_IsMainThread());
276
277 // No need to get mLock here because this function does not directly touch
278 // the class's data members. (SizeOfIncludingThis() will get mLock, however.)
279
280 aHandleReport->Callback(
281 EmptyCString(), mMemoryReportPath, KIND_HEAP, UNITS_BYTES,
282 SizeOfIncludingThis(UrlClassifierMallocSizeOf),
283 NS_LITERAL_CSTRING("Memory used by the prefix set for a URL classifier."),
284 aData);
285
286 return NS_OK;
287 }
288
289 size_t
SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)290 nsUrlClassifierPrefixSet::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
291 {
292 MutexAutoLock lock(mLock);
293
294 size_t n = 0;
295 n += aMallocSizeOf(this);
296 n += mIndexDeltas.ShallowSizeOfExcludingThis(aMallocSizeOf);
297 for (uint32_t i = 0; i < mIndexDeltas.Length(); i++) {
298 n += mIndexDeltas[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
299 }
300 n += mIndexPrefixes.ShallowSizeOfExcludingThis(aMallocSizeOf);
301 return n;
302 }
303
304 NS_IMETHODIMP
IsEmpty(bool * aEmpty)305 nsUrlClassifierPrefixSet::IsEmpty(bool * aEmpty)
306 {
307 MutexAutoLock lock(mLock);
308
309 *aEmpty = (mIndexPrefixes.Length() == 0);
310 return NS_OK;
311 }
312
313 NS_IMETHODIMP
LoadFromFile(nsIFile * aFile)314 nsUrlClassifierPrefixSet::LoadFromFile(nsIFile* aFile)
315 {
316 MutexAutoLock lock(mLock);
317
318 Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_FILELOAD_TIME> timer;
319
320 nsCOMPtr<nsIInputStream> localInFile;
321 nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile,
322 PR_RDONLY | nsIFile::OS_READAHEAD);
323 NS_ENSURE_SUCCESS(rv, rv);
324
325 // Calculate how big the file is, make sure our read buffer isn't bigger
326 // than the file itself which is just wasting memory.
327 int64_t fileSize;
328 rv = aFile->GetFileSize(&fileSize);
329 NS_ENSURE_SUCCESS(rv, rv);
330
331 if (fileSize < 0 || fileSize > UINT32_MAX) {
332 return NS_ERROR_FAILURE;
333 }
334
335 uint32_t bufferSize = std::min<uint32_t>(static_cast<uint32_t>(fileSize),
336 MAX_BUFFER_SIZE);
337
338 // Convert to buffered stream
339 nsCOMPtr<nsIInputStream> in = NS_BufferInputStream(localInFile, bufferSize);
340
341 rv = LoadPrefixes(in);
342 NS_ENSURE_SUCCESS(rv, rv);
343
344 return NS_OK;
345 }
346
347 NS_IMETHODIMP
StoreToFile(nsIFile * aFile)348 nsUrlClassifierPrefixSet::StoreToFile(nsIFile* aFile)
349 {
350 MutexAutoLock lock(mLock);
351
352 nsCOMPtr<nsIOutputStream> localOutFile;
353 nsresult rv = NS_NewLocalFileOutputStream(getter_AddRefs(localOutFile), aFile,
354 PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
355 NS_ENSURE_SUCCESS(rv, rv);
356
357 uint32_t fileSize;
358
359 // Preallocate the file storage
360 {
361 nsCOMPtr<nsIFileOutputStream> fos(do_QueryInterface(localOutFile));
362 Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_FALLOCATE_TIME> timer;
363
364 fileSize = CalculatePreallocateSize();
365
366 // Ignore failure, the preallocation is a hint and we write out the entire
367 // file later on
368 Unused << fos->Preallocate(fileSize);
369 }
370
371 // Convert to buffered stream
372 nsCOMPtr<nsIOutputStream> out =
373 NS_BufferOutputStream(localOutFile, std::min(fileSize, MAX_BUFFER_SIZE));
374
375 rv = WritePrefixes(out);
376 NS_ENSURE_SUCCESS(rv, rv);
377
378 LOG(("Saving PrefixSet successful\n"));
379
380 return NS_OK;
381 }
382
383 nsresult
LoadPrefixes(nsIInputStream * in)384 nsUrlClassifierPrefixSet::LoadPrefixes(nsIInputStream* in)
385 {
386 uint32_t magic;
387 uint32_t read;
388
389 nsresult rv = in->Read(reinterpret_cast<char*>(&magic), sizeof(uint32_t), &read);
390 NS_ENSURE_SUCCESS(rv, rv);
391 NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
392
393 if (magic == PREFIXSET_VERSION_MAGIC) {
394 uint32_t indexSize;
395 uint32_t deltaSize;
396
397 rv = in->Read(reinterpret_cast<char*>(&indexSize), sizeof(uint32_t), &read);
398 NS_ENSURE_SUCCESS(rv, rv);
399 NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
400
401 rv = in->Read(reinterpret_cast<char*>(&deltaSize), sizeof(uint32_t), &read);
402 NS_ENSURE_SUCCESS(rv, rv);
403 NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
404
405 if (indexSize == 0) {
406 LOG(("stored PrefixSet is empty!"));
407 return NS_OK;
408 }
409
410 if (deltaSize > (indexSize * DELTAS_LIMIT)) {
411 return NS_ERROR_FILE_CORRUPTED;
412 }
413
414 nsTArray<uint32_t> indexStarts;
415 indexStarts.SetLength(indexSize);
416 mIndexPrefixes.SetLength(indexSize);
417 mIndexDeltas.SetLength(indexSize);
418
419 mTotalPrefixes = indexSize;
420
421 uint32_t toRead = indexSize*sizeof(uint32_t);
422 rv = in->Read(reinterpret_cast<char*>(mIndexPrefixes.Elements()), toRead, &read);
423 NS_ENSURE_SUCCESS(rv, rv);
424 NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
425
426 rv = in->Read(reinterpret_cast<char*>(indexStarts.Elements()), toRead, &read);
427 NS_ENSURE_SUCCESS(rv, rv);
428 NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
429
430 if (indexSize != 0 && indexStarts[0] != 0) {
431 return NS_ERROR_FILE_CORRUPTED;
432 }
433 for (uint32_t i = 0; i < indexSize; i++) {
434 uint32_t numInDelta = i == indexSize - 1 ? deltaSize - indexStarts[i]
435 : indexStarts[i + 1] - indexStarts[i];
436 if (numInDelta > DELTAS_LIMIT) {
437 return NS_ERROR_FILE_CORRUPTED;
438 }
439 if (numInDelta > 0) {
440 mIndexDeltas[i].SetLength(numInDelta);
441 mTotalPrefixes += numInDelta;
442 toRead = numInDelta * sizeof(uint16_t);
443 rv = in->Read(reinterpret_cast<char*>(mIndexDeltas[i].Elements()), toRead, &read);
444 NS_ENSURE_SUCCESS(rv, rv);
445 NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
446 }
447 }
448 } else {
449 LOG(("Version magic mismatch, not loading"));
450 return NS_ERROR_FILE_CORRUPTED;
451 }
452
453 MOZ_ASSERT(mIndexPrefixes.Length() == mIndexDeltas.Length());
454 LOG(("Loading PrefixSet successful"));
455
456 return NS_OK;
457 }
458
459 uint32_t
CalculatePreallocateSize()460 nsUrlClassifierPrefixSet::CalculatePreallocateSize()
461 {
462 uint32_t fileSize = 4 * sizeof(uint32_t);
463 uint32_t deltas = mTotalPrefixes - mIndexPrefixes.Length();
464 fileSize += 2 * mIndexPrefixes.Length() * sizeof(uint32_t);
465 fileSize += deltas * sizeof(uint16_t);
466 return fileSize;
467 }
468
469 nsresult
WritePrefixes(nsIOutputStream * out)470 nsUrlClassifierPrefixSet::WritePrefixes(nsIOutputStream* out)
471 {
472 uint32_t written;
473 uint32_t writelen = sizeof(uint32_t);
474 uint32_t magic = PREFIXSET_VERSION_MAGIC;
475 nsresult rv = out->Write(reinterpret_cast<char*>(&magic), writelen, &written);
476 NS_ENSURE_SUCCESS(rv, rv);
477 NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
478
479 uint32_t indexSize = mIndexPrefixes.Length();
480 uint32_t indexDeltaSize = mIndexDeltas.Length();
481 uint32_t totalDeltas = 0;
482
483 // Store the shape of mIndexDeltas by noting at which "count" of total
484 // indexes a new subarray starts. This is slightly cumbersome but keeps
485 // file format compatibility.
486 // If we ever update the format, we can gain space by storing the delta
487 // subarray sizes, which fit in bytes.
488 nsTArray<uint32_t> indexStarts;
489 indexStarts.AppendElement(0);
490
491 for (uint32_t i = 0; i < indexDeltaSize; i++) {
492 uint32_t deltaLength = mIndexDeltas[i].Length();
493 totalDeltas += deltaLength;
494 indexStarts.AppendElement(totalDeltas);
495 }
496
497 rv = out->Write(reinterpret_cast<char*>(&indexSize), writelen, &written);
498 NS_ENSURE_SUCCESS(rv, rv);
499 NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
500
501 rv = out->Write(reinterpret_cast<char*>(&totalDeltas), writelen, &written);
502 NS_ENSURE_SUCCESS(rv, rv);
503 NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
504
505 writelen = indexSize * sizeof(uint32_t);
506 rv = out->Write(reinterpret_cast<char*>(mIndexPrefixes.Elements()), writelen, &written);
507 NS_ENSURE_SUCCESS(rv, rv);
508 NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
509
510 rv = out->Write(reinterpret_cast<char*>(indexStarts.Elements()), writelen, &written);
511 NS_ENSURE_SUCCESS(rv, rv);
512 NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
513
514 if (totalDeltas > 0) {
515 for (uint32_t i = 0; i < indexDeltaSize; i++) {
516 writelen = mIndexDeltas[i].Length() * sizeof(uint16_t);
517 rv = out->Write(reinterpret_cast<char*>(mIndexDeltas[i].Elements()), writelen, &written);
518 NS_ENSURE_SUCCESS(rv, rv);
519 NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
520 }
521 }
522
523 LOG(("Saving PrefixSet successful\n"));
524
525 return NS_OK;
526 }
527