1 // Copyright (c) 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/allocator/partition_allocator/partition_bucket.h"
6 
7 #include "base/allocator/partition_allocator/address_pool_manager.h"
8 #include "base/allocator/partition_allocator/object_bitmap.h"
9 #include "base/allocator/partition_allocator/oom.h"
10 #include "base/allocator/partition_allocator/page_allocator.h"
11 #include "base/allocator/partition_allocator/page_allocator_constants.h"
12 #include "base/allocator/partition_allocator/partition_address_space.h"
13 #include "base/allocator/partition_allocator/partition_alloc.h"
14 #include "base/allocator/partition_allocator/partition_alloc_check.h"
15 #include "base/allocator/partition_allocator/partition_alloc_constants.h"
16 #include "base/allocator/partition_allocator/partition_alloc_features.h"
17 #include "base/allocator/partition_allocator/partition_direct_map_extent.h"
18 #include "base/allocator/partition_allocator/partition_oom.h"
19 #include "base/allocator/partition_allocator/partition_page.h"
20 #include "base/allocator/partition_allocator/partition_tag.h"
21 #include "base/allocator/partition_allocator/partition_tag_bitmap.h"
22 #include "base/bits.h"
23 #include "base/check.h"
24 #include "build/build_config.h"
25 
26 namespace base {
27 namespace internal {
28 
29 namespace {
30 
31 template <bool thread_safe>
32 ALWAYS_INLINE SlotSpanMetadata<thread_safe>*
PartitionDirectMap(PartitionRoot<thread_safe> * root,int flags,size_t raw_size)33 PartitionDirectMap(PartitionRoot<thread_safe>* root, int flags, size_t raw_size)
34     EXCLUSIVE_LOCKS_REQUIRED(root->lock_) {
35   size_t slot_size =
36       PartitionBucket<thread_safe>::get_direct_map_size(raw_size);
37 
38   // Because we need to fake looking like a super page, we need to allocate
39   // a bunch of system pages more than |slot_size|:
40   // - The first few system pages are the partition page in which the super
41   // page metadata is stored. We commit just one system page out of a partition
42   // page sized clump.
43   // - We add a trailing guard page on 32-bit (on 64-bit we rely on the
44   // massive address space plus randomization instead; additionally GigaCage
45   // guarantees that the region is in the company of regions that have leading
46   // guard pages).
47   size_t reserved_size = slot_size + PartitionPageSize();
48 #if !defined(ARCH_CPU_64_BITS)
49   reserved_size += SystemPageSize();
50 #endif
51   // Round up to the allocation granularity.
52   reserved_size = bits::Align(reserved_size, PageAllocationGranularity());
53   size_t map_size = reserved_size - PartitionPageSize();
54 #if !defined(ARCH_CPU_64_BITS)
55   map_size -= SystemPageSize();
56 #endif
57   PA_DCHECK(slot_size <= map_size);
58 
59   char* ptr = nullptr;
60   // Allocate from GigaCage, if enabled. However, the exception to this is when
61   // tags aren't allowed, as CheckedPtr assumes that everything inside GigaCage
62   // uses tags (specifically, inside the GigaCage's normal bucket pool).
63   if (root->UsesGigaCage()) {
64     ptr = internal::AddressPoolManager::GetInstance()->Alloc(
65         GetDirectMapPool(), nullptr, reserved_size);
66   } else {
67     ptr = reinterpret_cast<char*>(AllocPages(nullptr, reserved_size,
68                                              kSuperPageAlignment, PageReadWrite,
69                                              PageTag::kPartitionAlloc));
70   }
71   if (UNLIKELY(!ptr))
72     return nullptr;
73 
74   size_t committed_page_size = slot_size + SystemPageSize();
75   root->total_size_of_direct_mapped_pages.fetch_add(committed_page_size,
76                                                     std::memory_order_relaxed);
77   root->IncreaseCommittedPages(committed_page_size);
78 
79   char* slot = ptr + PartitionPageSize();
80   SetSystemPagesAccess(ptr, SystemPageSize(), PageInaccessible);
81   SetSystemPagesAccess(ptr + (SystemPageSize() * 2),
82                        PartitionPageSize() - (SystemPageSize() * 2),
83                        PageInaccessible);
84 #if !defined(ARCH_CPU_64_BITS)
85   // TODO(bartekn): Uncommit all the way up to reserved_size, or in case of
86   // GigaCage, all the way up to 2MB boundary.
87   PA_DCHECK(slot + slot_size + SystemPageSize() <= ptr + reserved_size);
88   SetSystemPagesAccess(slot + slot_size, SystemPageSize(), PageInaccessible);
89 #endif
90 
91   auto* metadata = reinterpret_cast<PartitionDirectMapMetadata<thread_safe>*>(
92       PartitionSuperPageToMetadataArea(ptr));
93   metadata->extent.root = root;
94   // The new structures are all located inside a fresh system page so they
95   // will all be zeroed out. These DCHECKs are for documentation.
96   PA_DCHECK(!metadata->extent.super_page_base);
97   PA_DCHECK(!metadata->extent.super_pages_end);
98   PA_DCHECK(!metadata->extent.next);
99   PA_DCHECK(PartitionPage<thread_safe>::FromPointerNoAlignmentCheck(slot) ==
100             &metadata->page);
101 
102   auto* page = &metadata->page;
103   PA_DCHECK(!page->slot_span_metadata_offset);
104   PA_DCHECK(!page->slot_span_metadata.next_slot_span);
105   PA_DCHECK(!page->slot_span_metadata.num_allocated_slots);
106   PA_DCHECK(!page->slot_span_metadata.num_unprovisioned_slots);
107   PA_DCHECK(!page->slot_span_metadata.empty_cache_index);
108   page->slot_span_metadata.bucket = &metadata->bucket;
109   page->slot_span_metadata.SetFreelistHead(
110       reinterpret_cast<PartitionFreelistEntry*>(slot));
111 
112   auto* next_entry = reinterpret_cast<PartitionFreelistEntry*>(slot);
113   next_entry->SetNext(nullptr);
114 
115   PA_DCHECK(!metadata->bucket.active_slot_spans_head);
116   PA_DCHECK(!metadata->bucket.empty_slot_spans_head);
117   PA_DCHECK(!metadata->bucket.decommitted_slot_spans_head);
118   PA_DCHECK(!metadata->bucket.num_system_pages_per_slot_span);
119   PA_DCHECK(!metadata->bucket.num_full_slot_spans);
120   metadata->bucket.slot_size = slot_size;
121 
122   auto* map_extent = &metadata->direct_map_extent;
123   map_extent->map_size = map_size;
124   map_extent->bucket = &metadata->bucket;
125 
126   // Maintain the doubly-linked list of all direct mappings.
127   map_extent->next_extent = root->direct_map_list;
128   if (map_extent->next_extent)
129     map_extent->next_extent->prev_extent = map_extent;
130   map_extent->prev_extent = nullptr;
131   root->direct_map_list = map_extent;
132 
133   return &page->slot_span_metadata;
134 }
135 
136 }  // namespace
137 
138 // TODO(ajwong): This seems to interact badly with
139 // get_pages_per_slot_span() which rounds the value from this up to a
140 // multiple of NumSystemPagesPerPartitionPage() (aka 4) anyways.
141 // http://crbug.com/776537
142 //
143 // TODO(ajwong): The waste calculation seems wrong. The PTE usage should cover
144 // both used and unsed pages.
145 // http://crbug.com/776537
146 template <bool thread_safe>
get_system_pages_per_slot_span()147 uint8_t PartitionBucket<thread_safe>::get_system_pages_per_slot_span() {
148   // This works out reasonably for the current bucket sizes of the generic
149   // allocator, and the current values of partition page size and constants.
150   // Specifically, we have enough room to always pack the slots perfectly into
151   // some number of system pages. The only waste is the waste associated with
152   // unfaulted pages (i.e. wasted address space).
153   // TODO: we end up using a lot of system pages for very small sizes. For
154   // example, we'll use 12 system pages for slot size 24. The slot size is
155   // so small that the waste would be tiny with just 4, or 1, system pages.
156   // Later, we can investigate whether there are anti-fragmentation benefits
157   // to using fewer system pages.
158   double best_waste_ratio = 1.0f;
159   uint16_t best_pages = 0;
160   if (slot_size > MaxSystemPagesPerSlotSpan() * SystemPageSize()) {
161     // TODO(ajwong): Why is there a DCHECK here for this?
162     // http://crbug.com/776537
163     PA_DCHECK(!(slot_size % SystemPageSize()));
164     best_pages = static_cast<uint16_t>(slot_size / SystemPageSize());
165     // TODO(ajwong): Should this be checking against
166     // MaxSystemPagesPerSlotSpan() or numeric_limits<uint8_t>::max?
167     // http://crbug.com/776537
168     PA_CHECK(best_pages < (1 << 8));
169     return static_cast<uint8_t>(best_pages);
170   }
171   PA_DCHECK(slot_size <= MaxSystemPagesPerSlotSpan() * SystemPageSize());
172   for (uint16_t i = NumSystemPagesPerPartitionPage() - 1;
173        i <= MaxSystemPagesPerSlotSpan(); ++i) {
174     size_t page_size = SystemPageSize() * i;
175     size_t num_slots = page_size / slot_size;
176     size_t waste = page_size - (num_slots * slot_size);
177     // Leaving a page unfaulted is not free; the page will occupy an empty page
178     // table entry.  Make a simple attempt to account for that.
179     //
180     // TODO(ajwong): This looks wrong. PTEs are allocated for all pages
181     // regardless of whether or not they are wasted. Should it just
182     // be waste += i * sizeof(void*)?
183     // http://crbug.com/776537
184     size_t num_remainder_pages = i & (NumSystemPagesPerPartitionPage() - 1);
185     size_t num_unfaulted_pages =
186         num_remainder_pages
187             ? (NumSystemPagesPerPartitionPage() - num_remainder_pages)
188             : 0;
189     waste += sizeof(void*) * num_unfaulted_pages;
190     double waste_ratio =
191         static_cast<double>(waste) / static_cast<double>(page_size);
192     if (waste_ratio < best_waste_ratio) {
193       best_waste_ratio = waste_ratio;
194       best_pages = i;
195     }
196   }
197   PA_DCHECK(best_pages > 0);
198   PA_CHECK(best_pages <= MaxSystemPagesPerSlotSpan());
199   return static_cast<uint8_t>(best_pages);
200 }
201 
202 template <bool thread_safe>
Init(uint32_t new_slot_size)203 void PartitionBucket<thread_safe>::Init(uint32_t new_slot_size) {
204   slot_size = new_slot_size;
205   slot_size_reciprocal = kReciprocalMask / new_slot_size + 1;
206   active_slot_spans_head =
207       SlotSpanMetadata<thread_safe>::get_sentinel_slot_span();
208   empty_slot_spans_head = nullptr;
209   decommitted_slot_spans_head = nullptr;
210   num_full_slot_spans = 0;
211   num_system_pages_per_slot_span = get_system_pages_per_slot_span();
212 }
213 
214 template <bool thread_safe>
OnFull()215 NOINLINE void PartitionBucket<thread_safe>::OnFull() {
216   OOM_CRASH(0);
217 }
218 
219 template <bool thread_safe>
AllocNewSlotSpan(PartitionRoot<thread_safe> * root,int flags,uint16_t num_partition_pages,size_t slot_span_committed_size)220 ALWAYS_INLINE void* PartitionBucket<thread_safe>::AllocNewSlotSpan(
221     PartitionRoot<thread_safe>* root,
222     int flags,
223     uint16_t num_partition_pages,
224     size_t slot_span_committed_size) {
225   PA_DCHECK(!(reinterpret_cast<uintptr_t>(root->next_partition_page) %
226               PartitionPageSize()));
227   PA_DCHECK(!(reinterpret_cast<uintptr_t>(root->next_partition_page_end) %
228               PartitionPageSize()));
229   PA_DCHECK(num_partition_pages <= NumPartitionPagesPerSuperPage());
230   PA_DCHECK(slot_span_committed_size % SystemPageSize() == 0);
231   size_t slot_span_reserved_size = PartitionPageSize() * num_partition_pages;
232   PA_DCHECK(slot_span_committed_size <= slot_span_reserved_size);
233   size_t num_partition_pages_left =
234       (root->next_partition_page_end - root->next_partition_page) >>
235       PartitionPageShift();
236   if (LIKELY(num_partition_pages_left >= num_partition_pages)) {
237     // In this case, we can still hand out pages from the current super page
238     // allocation.
239     char* ret = root->next_partition_page;
240 
241     // Fresh System Pages in the SuperPages are decommited. Commit them
242     // before vending them back.
243     SetSystemPagesAccess(ret, slot_span_committed_size, PageReadWrite);
244 
245     root->next_partition_page += slot_span_reserved_size;
246     root->IncreaseCommittedPages(slot_span_committed_size);
247 
248 #if ENABLE_TAG_FOR_MTE_CHECKED_PTR
249     PA_DCHECK(root->next_tag_bitmap_page);
250     char* next_tag_bitmap_page = reinterpret_cast<char*>(
251         bits::Align(reinterpret_cast<uintptr_t>(
252                         PartitionTagPointer(root->next_partition_page)),
253                     SystemPageSize()));
254     if (root->next_tag_bitmap_page < next_tag_bitmap_page) {
255 #if DCHECK_IS_ON()
256       char* super_page = reinterpret_cast<char*>(
257           reinterpret_cast<uintptr_t>(ret) & kSuperPageBaseMask);
258       char* tag_bitmap = super_page + PartitionPageSize();
259       PA_DCHECK(next_tag_bitmap_page <= tag_bitmap + ActualTagBitmapSize());
260       PA_DCHECK(next_tag_bitmap_page > tag_bitmap);
261 #endif
262       SetSystemPagesAccess(root->next_tag_bitmap_page,
263                            next_tag_bitmap_page - root->next_tag_bitmap_page,
264                            PageReadWrite);
265       root->next_tag_bitmap_page = next_tag_bitmap_page;
266     }
267 #if MTE_CHECKED_PTR_SET_TAG_AT_FREE
268     // TODO(tasak): Consider initializing each slot with a different tag.
269     PartitionTagSetValue(ret, slot_span_reserved_size,
270                          root->GetNewPartitionTag());
271 #endif
272 #endif
273     return ret;
274   }
275 
276   // Need a new super page. We want to allocate super pages in a contiguous
277   // address region as much as possible. This is important for not causing
278   // page table bloat and not fragmenting address spaces in 32 bit
279   // architectures.
280   char* requested_address = root->next_super_page;
281   char* super_page = nullptr;
282   // Allocate from GigaCage, if enabled. However, the exception to this is when
283   // tags aren't allowed, as CheckedPtr assumes that everything inside GigaCage
284   // uses tags (specifically, inside the GigaCage's normal bucket pool).
285   if (root->UsesGigaCage()) {
286     super_page = AddressPoolManager::GetInstance()->Alloc(
287         GetNormalBucketPool(), requested_address, kSuperPageSize);
288   } else {
289     super_page = reinterpret_cast<char*>(
290         AllocPages(requested_address, kSuperPageSize, kSuperPageAlignment,
291                    PageReadWrite, PageTag::kPartitionAlloc));
292   }
293   if (UNLIKELY(!super_page))
294     return nullptr;
295 
296   root->total_size_of_super_pages.fetch_add(kSuperPageSize,
297                                             std::memory_order_relaxed);
298 
299   // |slot_span_reserved_size| MUST be less than kSuperPageSize -
300   // (PartitionPageSize()*2). This is a trustworthy value because
301   // num_partition_pages is not user controlled.
302   //
303   // TODO(ajwong): Introduce a DCHECK.
304   root->next_super_page = super_page + kSuperPageSize;
305   // TODO(tasak): Consider starting the bitmap right after metadata to save
306   // space.
307   char* tag_bitmap = super_page + PartitionPageSize();
308   char* quarantine_bitmaps = tag_bitmap + ReservedTagBitmapSize();
309   size_t quarantine_bitmaps_reserved_size = 0;
310   size_t quarantine_bitmaps_size_to_commit = 0;
311   if (root->scannable) {
312     quarantine_bitmaps_reserved_size = ReservedQuarantineBitmapsSize();
313     quarantine_bitmaps_size_to_commit = CommittedQuarantineBitmapsSize();
314   }
315   PA_DCHECK(quarantine_bitmaps_reserved_size % PartitionPageSize() == 0);
316   PA_DCHECK(quarantine_bitmaps_size_to_commit % SystemPageSize() == 0);
317   PA_DCHECK(quarantine_bitmaps_size_to_commit <=
318             quarantine_bitmaps_reserved_size);
319   char* ret = quarantine_bitmaps + quarantine_bitmaps_reserved_size;
320   root->next_partition_page = ret + slot_span_reserved_size;
321   root->next_partition_page_end = root->next_super_page - PartitionPageSize();
322   PA_DCHECK(ret == SuperPagePayloadBegin(super_page, root->scannable));
323   PA_DCHECK(root->next_partition_page_end == SuperPagePayloadEnd(super_page));
324 
325   // The first slot span is accessible. The given slot_span_committed_size is
326   // equal to the system-page-aligned size of the slot span.
327   //
328   // The remainder of the slot span past slot_span_committed_size, as well as
329   // all future slot spans inside the super page are decommitted
330   //
331   // TODO(ajwong): Refactor Page Allocator API so the super page comes in
332   // decommited initially.
333   SetSystemPagesAccess(
334       ret + slot_span_committed_size,
335       (super_page + kSuperPageSize) - (ret + slot_span_committed_size),
336       PageInaccessible);
337   root->IncreaseCommittedPages(slot_span_committed_size);
338 
339   // Make the first partition page in the super page a guard page, but leave a
340   // hole in the middle.
341   // This is where we put page metadata and also a tiny amount of extent
342   // metadata.
343   SetSystemPagesAccess(super_page, SystemPageSize(), PageInaccessible);
344   SetSystemPagesAccess(super_page + (SystemPageSize() * 2),
345                        PartitionPageSize() - (SystemPageSize() * 2),
346                        PageInaccessible);
347 #if ENABLE_TAG_FOR_MTE_CHECKED_PTR
348   // Make the first |slot_span_reserved_size| region of the tag bitmap
349   // accessible. The rest of the region is set to inaccessible.
350   char* next_tag_bitmap_page = reinterpret_cast<char*>(
351       bits::Align(reinterpret_cast<uintptr_t>(
352                       PartitionTagPointer(root->next_partition_page)),
353                   SystemPageSize()));
354   PA_DCHECK(next_tag_bitmap_page <= tag_bitmap + ActualTagBitmapSize());
355   PA_DCHECK(next_tag_bitmap_page > tag_bitmap);
356   // |ret| points at the end of the tag bitmap.
357   PA_DCHECK(next_tag_bitmap_page <= ret);
358   SetSystemPagesAccess(next_tag_bitmap_page, ret - next_tag_bitmap_page,
359                        PageInaccessible);
360 #if MTE_CHECKED_PTR_SET_TAG_AT_FREE
361   // TODO(tasak): Consider initializing each slot with a different tag.
362   PartitionTagSetValue(ret, slot_span_reserved_size,
363                        root->GetNewPartitionTag());
364 #endif
365   root->next_tag_bitmap_page = next_tag_bitmap_page;
366 #endif
367 
368   // If PCScan is used, keep the quarantine bitmap committed, just release the
369   // unused part of partition page, if any. If PCScan isn't used, release the
370   // entire reserved region (PartitionRoot::EnablePCScan will be responsible
371   // for committing it when enabling PCScan).
372   if (root->pcscan.has_value()) {
373     PA_DCHECK(root->scannable);
374     if (quarantine_bitmaps_reserved_size > quarantine_bitmaps_size_to_commit) {
375       SetSystemPagesAccess(
376           quarantine_bitmaps + quarantine_bitmaps_size_to_commit,
377           quarantine_bitmaps_reserved_size - quarantine_bitmaps_size_to_commit,
378           PageInaccessible);
379     }
380   } else {
381     // If partition isn't scannable, no quarantine bitmaps were reserved, hence
382     // nothing to decommit.
383     if (root->scannable) {
384       PA_DCHECK(quarantine_bitmaps_reserved_size > 0);
385       SetSystemPagesAccess(quarantine_bitmaps, quarantine_bitmaps_reserved_size,
386                            PageInaccessible);
387     } else {
388       PA_DCHECK(quarantine_bitmaps_reserved_size == 0);
389     }
390   }
391 
392   // If we were after a specific address, but didn't get it, assume that
393   // the system chose a lousy address. Here most OS'es have a default
394   // algorithm that isn't randomized. For example, most Linux
395   // distributions will allocate the mapping directly before the last
396   // successful mapping, which is far from random. So we just get fresh
397   // randomness for the next mapping attempt.
398   if (requested_address && requested_address != super_page)
399     root->next_super_page = nullptr;
400 
401   // We allocated a new super page so update super page metadata.
402   // First check if this is a new extent or not.
403   auto* latest_extent =
404       reinterpret_cast<PartitionSuperPageExtentEntry<thread_safe>*>(
405           PartitionSuperPageToMetadataArea(super_page));
406   // By storing the root in every extent metadata object, we have a fast way
407   // to go from a pointer within the partition to the root object.
408   latest_extent->root = root;
409   // Most new extents will be part of a larger extent, and these three fields
410   // are unused, but we initialize them to 0 so that we get a clear signal
411   // in case they are accidentally used.
412   latest_extent->super_page_base = nullptr;
413   latest_extent->super_pages_end = nullptr;
414   latest_extent->next = nullptr;
415 
416   PartitionSuperPageExtentEntry<thread_safe>* current_extent =
417       root->current_extent;
418   const bool is_new_extent = super_page != requested_address;
419   if (UNLIKELY(is_new_extent)) {
420     if (UNLIKELY(!current_extent)) {
421       PA_DCHECK(!root->first_extent);
422       root->first_extent = latest_extent;
423     } else {
424       PA_DCHECK(current_extent->super_page_base);
425       current_extent->next = latest_extent;
426     }
427     root->current_extent = latest_extent;
428     latest_extent->super_page_base = super_page;
429     latest_extent->super_pages_end = super_page + kSuperPageSize;
430   } else {
431     // We allocated next to an existing extent so just nudge the size up a
432     // little.
433     PA_DCHECK(current_extent->super_pages_end);
434     current_extent->super_pages_end += kSuperPageSize;
435     PA_DCHECK(ret >= current_extent->super_page_base &&
436               ret < current_extent->super_pages_end);
437   }
438   return ret;
439 }
440 
441 template <bool thread_safe>
InitializeSlotSpan(SlotSpanMetadata<thread_safe> * slot_span)442 ALWAYS_INLINE void PartitionBucket<thread_safe>::InitializeSlotSpan(
443     SlotSpanMetadata<thread_safe>* slot_span) {
444   // The bucket never changes. We set it up once.
445   slot_span->bucket = this;
446   slot_span->empty_cache_index = -1;
447 
448   slot_span->Reset();
449 
450   uint16_t num_partition_pages = get_pages_per_slot_span();
451   auto* page = reinterpret_cast<PartitionPage<thread_safe>*>(slot_span);
452   for (uint16_t i = 1; i < num_partition_pages; ++i) {
453     auto* secondary_page = page + i;
454     secondary_page->slot_span_metadata_offset = i;
455   }
456 }
457 
458 template <bool thread_safe>
AllocAndFillFreelist(SlotSpanMetadata<thread_safe> * slot_span)459 ALWAYS_INLINE char* PartitionBucket<thread_safe>::AllocAndFillFreelist(
460     SlotSpanMetadata<thread_safe>* slot_span) {
461   PA_DCHECK(slot_span !=
462             SlotSpanMetadata<thread_safe>::get_sentinel_slot_span());
463   uint16_t num_slots = slot_span->num_unprovisioned_slots;
464   PA_DCHECK(num_slots);
465   // We should only get here when _every_ slot is either used or unprovisioned.
466   // (The third state is "on the freelist". If we have a non-empty freelist, we
467   // should not get here.)
468   PA_DCHECK(num_slots + slot_span->num_allocated_slots == get_slots_per_span());
469   // Similarly, make explicitly sure that the freelist is empty.
470   PA_DCHECK(!slot_span->freelist_head);
471   PA_DCHECK(slot_span->num_allocated_slots >= 0);
472 
473   size_t size = slot_size;
474   char* base = reinterpret_cast<char*>(
475       SlotSpanMetadata<thread_safe>::ToPointer(slot_span));
476   char* return_object = base + (size * slot_span->num_allocated_slots);
477   char* first_freelist_pointer = return_object + size;
478   char* first_freelist_pointer_extent =
479       first_freelist_pointer + sizeof(PartitionFreelistEntry*);
480   // Our goal is to fault as few system pages as possible. We calculate the
481   // page containing the "end" of the returned slot, and then allow freelist
482   // pointers to be written up to the end of that page.
483   char* sub_page_limit = reinterpret_cast<char*>(
484       RoundUpToSystemPage(reinterpret_cast<size_t>(first_freelist_pointer)));
485   char* slots_limit = return_object + (size * num_slots);
486   char* freelist_limit = sub_page_limit;
487   if (UNLIKELY(slots_limit < freelist_limit))
488     freelist_limit = slots_limit;
489 
490   uint16_t num_new_freelist_entries = 0;
491   if (LIKELY(first_freelist_pointer_extent <= freelist_limit)) {
492     // Only consider used space in the slot span. If we consider wasted
493     // space, we may get an off-by-one when a freelist pointer fits in the
494     // wasted space, but a slot does not.
495     // We know we can fit at least one freelist pointer.
496     num_new_freelist_entries = 1;
497     // Any further entries require space for the whole slot span.
498     num_new_freelist_entries += static_cast<uint16_t>(
499         (freelist_limit - first_freelist_pointer_extent) / size);
500   }
501 
502   // We always return an object slot -- that's the +1 below.
503   // We do not neccessarily create any new freelist entries, because we cross
504   // sub page boundaries frequently for large bucket sizes.
505   PA_DCHECK(num_new_freelist_entries + 1 <= num_slots);
506   num_slots -= (num_new_freelist_entries + 1);
507   slot_span->num_unprovisioned_slots = num_slots;
508   slot_span->num_allocated_slots++;
509 
510   if (LIKELY(num_new_freelist_entries)) {
511     char* freelist_pointer = first_freelist_pointer;
512     auto* entry = reinterpret_cast<PartitionFreelistEntry*>(freelist_pointer);
513     slot_span->SetFreelistHead(entry);
514     while (--num_new_freelist_entries) {
515       freelist_pointer += size;
516       auto* next_entry =
517           reinterpret_cast<PartitionFreelistEntry*>(freelist_pointer);
518       entry->SetNext(next_entry);
519       entry = next_entry;
520     }
521     entry->SetNext(nullptr);
522   } else {
523     slot_span->SetFreelistHead(nullptr);
524   }
525   return return_object;
526 }
527 
528 template <bool thread_safe>
SetNewActiveSlotSpan()529 bool PartitionBucket<thread_safe>::SetNewActiveSlotSpan() {
530   SlotSpanMetadata<thread_safe>* slot_span = active_slot_spans_head;
531   if (slot_span == SlotSpanMetadata<thread_safe>::get_sentinel_slot_span())
532     return false;
533 
534   SlotSpanMetadata<thread_safe>* next_slot_span;
535 
536   for (; slot_span; slot_span = next_slot_span) {
537     next_slot_span = slot_span->next_slot_span;
538     PA_DCHECK(slot_span->bucket == this);
539     PA_DCHECK(slot_span != empty_slot_spans_head);
540     PA_DCHECK(slot_span != decommitted_slot_spans_head);
541 
542     if (LIKELY(slot_span->is_active())) {
543       // This slot span is usable because it has freelist entries, or has
544       // unprovisioned slots we can create freelist entries from.
545       active_slot_spans_head = slot_span;
546       return true;
547     }
548 
549     // Deal with empty and decommitted slot spans.
550     if (LIKELY(slot_span->is_empty())) {
551       slot_span->next_slot_span = empty_slot_spans_head;
552       empty_slot_spans_head = slot_span;
553     } else if (LIKELY(slot_span->is_decommitted())) {
554       slot_span->next_slot_span = decommitted_slot_spans_head;
555       decommitted_slot_spans_head = slot_span;
556     } else {
557       PA_DCHECK(slot_span->is_full());
558       // If we get here, we found a full slot span. Skip over it too, and also
559       // tag it as full (via a negative value). We need it tagged so that
560       // free'ing can tell, and move it back into the active list.
561       slot_span->num_allocated_slots = -slot_span->num_allocated_slots;
562       ++num_full_slot_spans;
563       // num_full_slot_spans is a uint16_t for efficient packing so guard
564       // against overflow to be safe.
565       if (UNLIKELY(!num_full_slot_spans))
566         OnFull();
567       // Not necessary but might help stop accidents.
568       slot_span->next_slot_span = nullptr;
569     }
570   }
571 
572   active_slot_spans_head =
573       SlotSpanMetadata<thread_safe>::get_sentinel_slot_span();
574   return false;
575 }
576 
577 template <bool thread_safe>
SlowPathAlloc(PartitionRoot<thread_safe> * root,int flags,size_t raw_size,bool * is_already_zeroed)578 void* PartitionBucket<thread_safe>::SlowPathAlloc(
579     PartitionRoot<thread_safe>* root,
580     int flags,
581     size_t raw_size,
582     bool* is_already_zeroed) {
583   // The slow path is called when the freelist is empty.
584   PA_DCHECK(!active_slot_spans_head->freelist_head);
585 
586   SlotSpanMetadata<thread_safe>* new_slot_span = nullptr;
587   // |new_slot_span->bucket| will always be |this|, except when |this| is the
588   // sentinel bucket, which is used to signal a direct mapped allocation.  In
589   // this case |new_bucket| will be set properly later. This avoids a read for
590   // most allocations.
591   PartitionBucket* new_bucket = this;
592   *is_already_zeroed = false;
593 
594   // For the PartitionRoot::Alloc() API, we have a bunch of buckets
595   // marked as special cases. We bounce them through to the slow path so that
596   // we can still have a blazing fast hot path due to lack of corner-case
597   // branches.
598   //
599   // Note: The ordering of the conditionals matter! In particular,
600   // SetNewActiveSlotSpan() has a side-effect even when returning
601   // false where it sweeps the active list and may move things into the empty or
602   // decommitted lists which affects the subsequent conditional.
603   bool return_null = flags & PartitionAllocReturnNull;
604   if (UNLIKELY(is_direct_mapped())) {
605     PA_DCHECK(raw_size > kMaxBucketed);
606     PA_DCHECK(this == &root->sentinel_bucket);
607     PA_DCHECK(active_slot_spans_head ==
608               SlotSpanMetadata<thread_safe>::get_sentinel_slot_span());
609     if (raw_size > MaxDirectMapped()) {
610       if (return_null)
611         return nullptr;
612       // The lock is here to protect PA from:
613       // 1. Concurrent calls
614       // 2. Reentrant calls
615       //
616       // This is fine here however, as:
617       // 1. Concurrency: |PartitionRoot::OutOfMemory()| never returns, so the
618       //    lock will not be re-acquired, which would lead to acting on
619       //    inconsistent data that could have been modified in-between releasing
620       //    and acquiring it.
621       // 2. Reentrancy: This is why we release the lock. On some platforms,
622       //    terminating the process may free() memory, or even possibly try to
623       //    allocate some. Calling free() is fine, but will deadlock since
624       //    |PartitionRoot::lock_| is not recursive.
625       //
626       // Supporting reentrant calls properly is hard, and not a requirement for
627       // PA. However up to that point, we've only *read* data, not *written* to
628       // any state. Reentrant calls are then fine, especially as we don't
629       // continue on this path. The only downside is possibly endless recursion
630       // if the OOM handler allocates and fails to use UncheckedMalloc() or
631       // equivalent, but that's violating the contract of
632       // base::OnNoMemoryInternal().
633       ScopedUnlockGuard<thread_safe> unlock{root->lock_};
634       PartitionExcessiveAllocationSize(raw_size);
635       IMMEDIATE_CRASH();  // Not required, kept as documentation.
636     }
637     new_slot_span = PartitionDirectMap(root, flags, raw_size);
638     if (new_slot_span)
639       new_bucket = new_slot_span->bucket;
640     // Memory from PageAllocator is always zeroed.
641     *is_already_zeroed = true;
642   } else if (LIKELY(SetNewActiveSlotSpan())) {
643     // First, did we find an active slot span in the active list?
644     new_slot_span = active_slot_spans_head;
645     PA_DCHECK(new_slot_span->is_active());
646   } else if (LIKELY(empty_slot_spans_head != nullptr) ||
647              LIKELY(decommitted_slot_spans_head != nullptr)) {
648     // Second, look in our lists of empty and decommitted slot spans.
649     // Check empty slot spans first, which are preferred, but beware that an
650     // empty slot span might have been decommitted.
651     while (LIKELY((new_slot_span = empty_slot_spans_head) != nullptr)) {
652       PA_DCHECK(new_slot_span->bucket == this);
653       PA_DCHECK(new_slot_span->is_empty() || new_slot_span->is_decommitted());
654       empty_slot_spans_head = new_slot_span->next_slot_span;
655       // Accept the empty slot span unless it got decommitted.
656       if (new_slot_span->freelist_head) {
657         new_slot_span->next_slot_span = nullptr;
658         break;
659       }
660       PA_DCHECK(new_slot_span->is_decommitted());
661       new_slot_span->next_slot_span = decommitted_slot_spans_head;
662       decommitted_slot_spans_head = new_slot_span;
663     }
664     if (UNLIKELY(!new_slot_span) &&
665         LIKELY(decommitted_slot_spans_head != nullptr)) {
666       new_slot_span = decommitted_slot_spans_head;
667       PA_DCHECK(new_slot_span->bucket == this);
668       PA_DCHECK(new_slot_span->is_decommitted());
669       decommitted_slot_spans_head = new_slot_span->next_slot_span;
670       void* addr = SlotSpanMetadata<thread_safe>::ToPointer(new_slot_span);
671       root->RecommitSystemPages(addr,
672                                 new_slot_span->bucket->get_bytes_per_span());
673       new_slot_span->Reset();
674       *is_already_zeroed = kDecommittedPagesAreAlwaysZeroed;
675     }
676     PA_DCHECK(new_slot_span);
677   } else {
678     // Third. If we get here, we need a brand new slot span.
679     uint16_t num_partition_pages = get_pages_per_slot_span();
680     void* raw_memory = AllocNewSlotSpan(root, flags, num_partition_pages,
681                                         get_bytes_per_span());
682     if (LIKELY(raw_memory != nullptr)) {
683       new_slot_span =
684           SlotSpanMetadata<thread_safe>::FromPointerNoAlignmentCheck(
685               raw_memory);
686       InitializeSlotSpan(new_slot_span);
687       // New memory from PageAllocator is always zeroed.
688       *is_already_zeroed = true;
689     }
690   }
691 
692   // Bail if we had a memory allocation failure.
693   if (UNLIKELY(!new_slot_span)) {
694     PA_DCHECK(active_slot_spans_head ==
695               SlotSpanMetadata<thread_safe>::get_sentinel_slot_span());
696     if (return_null)
697       return nullptr;
698     // See comment above.
699     ScopedUnlockGuard<thread_safe> unlock{root->lock_};
700     root->OutOfMemory(raw_size);
701     IMMEDIATE_CRASH();  // Not required, kept as documentation.
702   }
703 
704   PA_DCHECK(new_bucket != &root->sentinel_bucket);
705   new_bucket->active_slot_spans_head = new_slot_span;
706   if (new_slot_span->CanStoreRawSize())
707     new_slot_span->SetRawSize(raw_size);
708 
709   // If we found an active slot span with free slots, or an empty slot span, we
710   // have a usable freelist head.
711   if (LIKELY(new_slot_span->freelist_head != nullptr)) {
712     PartitionFreelistEntry* entry = new_slot_span->freelist_head;
713     PartitionFreelistEntry* new_head = entry->GetNext();
714     new_slot_span->SetFreelistHead(new_head);
715     new_slot_span->num_allocated_slots++;
716     return entry;
717   }
718   // Otherwise, we need to build the freelist.
719   PA_DCHECK(new_slot_span->num_unprovisioned_slots);
720   return AllocAndFillFreelist(new_slot_span);
721 }
722 
723 template struct PartitionBucket<ThreadSafe>;
724 template struct PartitionBucket<NotThreadSafe>;
725 
726 }  // namespace internal
727 }  // namespace base
728