1 // Copyright (c) 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/allocator/partition_allocator/partition_bucket.h"
6
7 #include "base/allocator/partition_allocator/address_pool_manager.h"
8 #include "base/allocator/partition_allocator/object_bitmap.h"
9 #include "base/allocator/partition_allocator/oom.h"
10 #include "base/allocator/partition_allocator/page_allocator.h"
11 #include "base/allocator/partition_allocator/page_allocator_constants.h"
12 #include "base/allocator/partition_allocator/partition_address_space.h"
13 #include "base/allocator/partition_allocator/partition_alloc.h"
14 #include "base/allocator/partition_allocator/partition_alloc_check.h"
15 #include "base/allocator/partition_allocator/partition_alloc_constants.h"
16 #include "base/allocator/partition_allocator/partition_alloc_features.h"
17 #include "base/allocator/partition_allocator/partition_direct_map_extent.h"
18 #include "base/allocator/partition_allocator/partition_oom.h"
19 #include "base/allocator/partition_allocator/partition_page.h"
20 #include "base/allocator/partition_allocator/partition_tag.h"
21 #include "base/allocator/partition_allocator/partition_tag_bitmap.h"
22 #include "base/bits.h"
23 #include "base/check.h"
24 #include "build/build_config.h"
25
26 namespace base {
27 namespace internal {
28
29 namespace {
30
31 template <bool thread_safe>
32 ALWAYS_INLINE SlotSpanMetadata<thread_safe>*
PartitionDirectMap(PartitionRoot<thread_safe> * root,int flags,size_t raw_size)33 PartitionDirectMap(PartitionRoot<thread_safe>* root, int flags, size_t raw_size)
34 EXCLUSIVE_LOCKS_REQUIRED(root->lock_) {
35 size_t slot_size =
36 PartitionBucket<thread_safe>::get_direct_map_size(raw_size);
37
38 // Because we need to fake looking like a super page, we need to allocate
39 // a bunch of system pages more than |slot_size|:
40 // - The first few system pages are the partition page in which the super
41 // page metadata is stored. We commit just one system page out of a partition
42 // page sized clump.
43 // - We add a trailing guard page on 32-bit (on 64-bit we rely on the
44 // massive address space plus randomization instead; additionally GigaCage
45 // guarantees that the region is in the company of regions that have leading
46 // guard pages).
47 size_t reserved_size = slot_size + PartitionPageSize();
48 #if !defined(ARCH_CPU_64_BITS)
49 reserved_size += SystemPageSize();
50 #endif
51 // Round up to the allocation granularity.
52 reserved_size = bits::Align(reserved_size, PageAllocationGranularity());
53 size_t map_size = reserved_size - PartitionPageSize();
54 #if !defined(ARCH_CPU_64_BITS)
55 map_size -= SystemPageSize();
56 #endif
57 PA_DCHECK(slot_size <= map_size);
58
59 char* ptr = nullptr;
60 // Allocate from GigaCage, if enabled. However, the exception to this is when
61 // tags aren't allowed, as CheckedPtr assumes that everything inside GigaCage
62 // uses tags (specifically, inside the GigaCage's normal bucket pool).
63 if (root->UsesGigaCage()) {
64 ptr = internal::AddressPoolManager::GetInstance()->Alloc(
65 GetDirectMapPool(), nullptr, reserved_size);
66 } else {
67 ptr = reinterpret_cast<char*>(AllocPages(nullptr, reserved_size,
68 kSuperPageAlignment, PageReadWrite,
69 PageTag::kPartitionAlloc));
70 }
71 if (UNLIKELY(!ptr))
72 return nullptr;
73
74 size_t committed_page_size = slot_size + SystemPageSize();
75 root->total_size_of_direct_mapped_pages.fetch_add(committed_page_size,
76 std::memory_order_relaxed);
77 root->IncreaseCommittedPages(committed_page_size);
78
79 char* slot = ptr + PartitionPageSize();
80 SetSystemPagesAccess(ptr, SystemPageSize(), PageInaccessible);
81 SetSystemPagesAccess(ptr + (SystemPageSize() * 2),
82 PartitionPageSize() - (SystemPageSize() * 2),
83 PageInaccessible);
84 #if !defined(ARCH_CPU_64_BITS)
85 // TODO(bartekn): Uncommit all the way up to reserved_size, or in case of
86 // GigaCage, all the way up to 2MB boundary.
87 PA_DCHECK(slot + slot_size + SystemPageSize() <= ptr + reserved_size);
88 SetSystemPagesAccess(slot + slot_size, SystemPageSize(), PageInaccessible);
89 #endif
90
91 auto* metadata = reinterpret_cast<PartitionDirectMapMetadata<thread_safe>*>(
92 PartitionSuperPageToMetadataArea(ptr));
93 metadata->extent.root = root;
94 // The new structures are all located inside a fresh system page so they
95 // will all be zeroed out. These DCHECKs are for documentation.
96 PA_DCHECK(!metadata->extent.super_page_base);
97 PA_DCHECK(!metadata->extent.super_pages_end);
98 PA_DCHECK(!metadata->extent.next);
99 PA_DCHECK(PartitionPage<thread_safe>::FromPointerNoAlignmentCheck(slot) ==
100 &metadata->page);
101
102 auto* page = &metadata->page;
103 PA_DCHECK(!page->slot_span_metadata_offset);
104 PA_DCHECK(!page->slot_span_metadata.next_slot_span);
105 PA_DCHECK(!page->slot_span_metadata.num_allocated_slots);
106 PA_DCHECK(!page->slot_span_metadata.num_unprovisioned_slots);
107 PA_DCHECK(!page->slot_span_metadata.empty_cache_index);
108 page->slot_span_metadata.bucket = &metadata->bucket;
109 page->slot_span_metadata.SetFreelistHead(
110 reinterpret_cast<PartitionFreelistEntry*>(slot));
111
112 auto* next_entry = reinterpret_cast<PartitionFreelistEntry*>(slot);
113 next_entry->SetNext(nullptr);
114
115 PA_DCHECK(!metadata->bucket.active_slot_spans_head);
116 PA_DCHECK(!metadata->bucket.empty_slot_spans_head);
117 PA_DCHECK(!metadata->bucket.decommitted_slot_spans_head);
118 PA_DCHECK(!metadata->bucket.num_system_pages_per_slot_span);
119 PA_DCHECK(!metadata->bucket.num_full_slot_spans);
120 metadata->bucket.slot_size = slot_size;
121
122 auto* map_extent = &metadata->direct_map_extent;
123 map_extent->map_size = map_size;
124 map_extent->bucket = &metadata->bucket;
125
126 // Maintain the doubly-linked list of all direct mappings.
127 map_extent->next_extent = root->direct_map_list;
128 if (map_extent->next_extent)
129 map_extent->next_extent->prev_extent = map_extent;
130 map_extent->prev_extent = nullptr;
131 root->direct_map_list = map_extent;
132
133 return &page->slot_span_metadata;
134 }
135
136 } // namespace
137
138 // TODO(ajwong): This seems to interact badly with
139 // get_pages_per_slot_span() which rounds the value from this up to a
140 // multiple of NumSystemPagesPerPartitionPage() (aka 4) anyways.
141 // http://crbug.com/776537
142 //
143 // TODO(ajwong): The waste calculation seems wrong. The PTE usage should cover
144 // both used and unsed pages.
145 // http://crbug.com/776537
146 template <bool thread_safe>
get_system_pages_per_slot_span()147 uint8_t PartitionBucket<thread_safe>::get_system_pages_per_slot_span() {
148 // This works out reasonably for the current bucket sizes of the generic
149 // allocator, and the current values of partition page size and constants.
150 // Specifically, we have enough room to always pack the slots perfectly into
151 // some number of system pages. The only waste is the waste associated with
152 // unfaulted pages (i.e. wasted address space).
153 // TODO: we end up using a lot of system pages for very small sizes. For
154 // example, we'll use 12 system pages for slot size 24. The slot size is
155 // so small that the waste would be tiny with just 4, or 1, system pages.
156 // Later, we can investigate whether there are anti-fragmentation benefits
157 // to using fewer system pages.
158 double best_waste_ratio = 1.0f;
159 uint16_t best_pages = 0;
160 if (slot_size > MaxSystemPagesPerSlotSpan() * SystemPageSize()) {
161 // TODO(ajwong): Why is there a DCHECK here for this?
162 // http://crbug.com/776537
163 PA_DCHECK(!(slot_size % SystemPageSize()));
164 best_pages = static_cast<uint16_t>(slot_size / SystemPageSize());
165 // TODO(ajwong): Should this be checking against
166 // MaxSystemPagesPerSlotSpan() or numeric_limits<uint8_t>::max?
167 // http://crbug.com/776537
168 PA_CHECK(best_pages < (1 << 8));
169 return static_cast<uint8_t>(best_pages);
170 }
171 PA_DCHECK(slot_size <= MaxSystemPagesPerSlotSpan() * SystemPageSize());
172 for (uint16_t i = NumSystemPagesPerPartitionPage() - 1;
173 i <= MaxSystemPagesPerSlotSpan(); ++i) {
174 size_t page_size = SystemPageSize() * i;
175 size_t num_slots = page_size / slot_size;
176 size_t waste = page_size - (num_slots * slot_size);
177 // Leaving a page unfaulted is not free; the page will occupy an empty page
178 // table entry. Make a simple attempt to account for that.
179 //
180 // TODO(ajwong): This looks wrong. PTEs are allocated for all pages
181 // regardless of whether or not they are wasted. Should it just
182 // be waste += i * sizeof(void*)?
183 // http://crbug.com/776537
184 size_t num_remainder_pages = i & (NumSystemPagesPerPartitionPage() - 1);
185 size_t num_unfaulted_pages =
186 num_remainder_pages
187 ? (NumSystemPagesPerPartitionPage() - num_remainder_pages)
188 : 0;
189 waste += sizeof(void*) * num_unfaulted_pages;
190 double waste_ratio =
191 static_cast<double>(waste) / static_cast<double>(page_size);
192 if (waste_ratio < best_waste_ratio) {
193 best_waste_ratio = waste_ratio;
194 best_pages = i;
195 }
196 }
197 PA_DCHECK(best_pages > 0);
198 PA_CHECK(best_pages <= MaxSystemPagesPerSlotSpan());
199 return static_cast<uint8_t>(best_pages);
200 }
201
202 template <bool thread_safe>
Init(uint32_t new_slot_size)203 void PartitionBucket<thread_safe>::Init(uint32_t new_slot_size) {
204 slot_size = new_slot_size;
205 slot_size_reciprocal = kReciprocalMask / new_slot_size + 1;
206 active_slot_spans_head =
207 SlotSpanMetadata<thread_safe>::get_sentinel_slot_span();
208 empty_slot_spans_head = nullptr;
209 decommitted_slot_spans_head = nullptr;
210 num_full_slot_spans = 0;
211 num_system_pages_per_slot_span = get_system_pages_per_slot_span();
212 }
213
214 template <bool thread_safe>
OnFull()215 NOINLINE void PartitionBucket<thread_safe>::OnFull() {
216 OOM_CRASH(0);
217 }
218
219 template <bool thread_safe>
AllocNewSlotSpan(PartitionRoot<thread_safe> * root,int flags,uint16_t num_partition_pages,size_t slot_span_committed_size)220 ALWAYS_INLINE void* PartitionBucket<thread_safe>::AllocNewSlotSpan(
221 PartitionRoot<thread_safe>* root,
222 int flags,
223 uint16_t num_partition_pages,
224 size_t slot_span_committed_size) {
225 PA_DCHECK(!(reinterpret_cast<uintptr_t>(root->next_partition_page) %
226 PartitionPageSize()));
227 PA_DCHECK(!(reinterpret_cast<uintptr_t>(root->next_partition_page_end) %
228 PartitionPageSize()));
229 PA_DCHECK(num_partition_pages <= NumPartitionPagesPerSuperPage());
230 PA_DCHECK(slot_span_committed_size % SystemPageSize() == 0);
231 size_t slot_span_reserved_size = PartitionPageSize() * num_partition_pages;
232 PA_DCHECK(slot_span_committed_size <= slot_span_reserved_size);
233 size_t num_partition_pages_left =
234 (root->next_partition_page_end - root->next_partition_page) >>
235 PartitionPageShift();
236 if (LIKELY(num_partition_pages_left >= num_partition_pages)) {
237 // In this case, we can still hand out pages from the current super page
238 // allocation.
239 char* ret = root->next_partition_page;
240
241 // Fresh System Pages in the SuperPages are decommited. Commit them
242 // before vending them back.
243 SetSystemPagesAccess(ret, slot_span_committed_size, PageReadWrite);
244
245 root->next_partition_page += slot_span_reserved_size;
246 root->IncreaseCommittedPages(slot_span_committed_size);
247
248 #if ENABLE_TAG_FOR_MTE_CHECKED_PTR
249 PA_DCHECK(root->next_tag_bitmap_page);
250 char* next_tag_bitmap_page = reinterpret_cast<char*>(
251 bits::Align(reinterpret_cast<uintptr_t>(
252 PartitionTagPointer(root->next_partition_page)),
253 SystemPageSize()));
254 if (root->next_tag_bitmap_page < next_tag_bitmap_page) {
255 #if DCHECK_IS_ON()
256 char* super_page = reinterpret_cast<char*>(
257 reinterpret_cast<uintptr_t>(ret) & kSuperPageBaseMask);
258 char* tag_bitmap = super_page + PartitionPageSize();
259 PA_DCHECK(next_tag_bitmap_page <= tag_bitmap + ActualTagBitmapSize());
260 PA_DCHECK(next_tag_bitmap_page > tag_bitmap);
261 #endif
262 SetSystemPagesAccess(root->next_tag_bitmap_page,
263 next_tag_bitmap_page - root->next_tag_bitmap_page,
264 PageReadWrite);
265 root->next_tag_bitmap_page = next_tag_bitmap_page;
266 }
267 #if MTE_CHECKED_PTR_SET_TAG_AT_FREE
268 // TODO(tasak): Consider initializing each slot with a different tag.
269 PartitionTagSetValue(ret, slot_span_reserved_size,
270 root->GetNewPartitionTag());
271 #endif
272 #endif
273 return ret;
274 }
275
276 // Need a new super page. We want to allocate super pages in a contiguous
277 // address region as much as possible. This is important for not causing
278 // page table bloat and not fragmenting address spaces in 32 bit
279 // architectures.
280 char* requested_address = root->next_super_page;
281 char* super_page = nullptr;
282 // Allocate from GigaCage, if enabled. However, the exception to this is when
283 // tags aren't allowed, as CheckedPtr assumes that everything inside GigaCage
284 // uses tags (specifically, inside the GigaCage's normal bucket pool).
285 if (root->UsesGigaCage()) {
286 super_page = AddressPoolManager::GetInstance()->Alloc(
287 GetNormalBucketPool(), requested_address, kSuperPageSize);
288 } else {
289 super_page = reinterpret_cast<char*>(
290 AllocPages(requested_address, kSuperPageSize, kSuperPageAlignment,
291 PageReadWrite, PageTag::kPartitionAlloc));
292 }
293 if (UNLIKELY(!super_page))
294 return nullptr;
295
296 root->total_size_of_super_pages.fetch_add(kSuperPageSize,
297 std::memory_order_relaxed);
298
299 // |slot_span_reserved_size| MUST be less than kSuperPageSize -
300 // (PartitionPageSize()*2). This is a trustworthy value because
301 // num_partition_pages is not user controlled.
302 //
303 // TODO(ajwong): Introduce a DCHECK.
304 root->next_super_page = super_page + kSuperPageSize;
305 // TODO(tasak): Consider starting the bitmap right after metadata to save
306 // space.
307 char* tag_bitmap = super_page + PartitionPageSize();
308 char* quarantine_bitmaps = tag_bitmap + ReservedTagBitmapSize();
309 size_t quarantine_bitmaps_reserved_size = 0;
310 size_t quarantine_bitmaps_size_to_commit = 0;
311 if (root->scannable) {
312 quarantine_bitmaps_reserved_size = ReservedQuarantineBitmapsSize();
313 quarantine_bitmaps_size_to_commit = CommittedQuarantineBitmapsSize();
314 }
315 PA_DCHECK(quarantine_bitmaps_reserved_size % PartitionPageSize() == 0);
316 PA_DCHECK(quarantine_bitmaps_size_to_commit % SystemPageSize() == 0);
317 PA_DCHECK(quarantine_bitmaps_size_to_commit <=
318 quarantine_bitmaps_reserved_size);
319 char* ret = quarantine_bitmaps + quarantine_bitmaps_reserved_size;
320 root->next_partition_page = ret + slot_span_reserved_size;
321 root->next_partition_page_end = root->next_super_page - PartitionPageSize();
322 PA_DCHECK(ret == SuperPagePayloadBegin(super_page, root->scannable));
323 PA_DCHECK(root->next_partition_page_end == SuperPagePayloadEnd(super_page));
324
325 // The first slot span is accessible. The given slot_span_committed_size is
326 // equal to the system-page-aligned size of the slot span.
327 //
328 // The remainder of the slot span past slot_span_committed_size, as well as
329 // all future slot spans inside the super page are decommitted
330 //
331 // TODO(ajwong): Refactor Page Allocator API so the super page comes in
332 // decommited initially.
333 SetSystemPagesAccess(
334 ret + slot_span_committed_size,
335 (super_page + kSuperPageSize) - (ret + slot_span_committed_size),
336 PageInaccessible);
337 root->IncreaseCommittedPages(slot_span_committed_size);
338
339 // Make the first partition page in the super page a guard page, but leave a
340 // hole in the middle.
341 // This is where we put page metadata and also a tiny amount of extent
342 // metadata.
343 SetSystemPagesAccess(super_page, SystemPageSize(), PageInaccessible);
344 SetSystemPagesAccess(super_page + (SystemPageSize() * 2),
345 PartitionPageSize() - (SystemPageSize() * 2),
346 PageInaccessible);
347 #if ENABLE_TAG_FOR_MTE_CHECKED_PTR
348 // Make the first |slot_span_reserved_size| region of the tag bitmap
349 // accessible. The rest of the region is set to inaccessible.
350 char* next_tag_bitmap_page = reinterpret_cast<char*>(
351 bits::Align(reinterpret_cast<uintptr_t>(
352 PartitionTagPointer(root->next_partition_page)),
353 SystemPageSize()));
354 PA_DCHECK(next_tag_bitmap_page <= tag_bitmap + ActualTagBitmapSize());
355 PA_DCHECK(next_tag_bitmap_page > tag_bitmap);
356 // |ret| points at the end of the tag bitmap.
357 PA_DCHECK(next_tag_bitmap_page <= ret);
358 SetSystemPagesAccess(next_tag_bitmap_page, ret - next_tag_bitmap_page,
359 PageInaccessible);
360 #if MTE_CHECKED_PTR_SET_TAG_AT_FREE
361 // TODO(tasak): Consider initializing each slot with a different tag.
362 PartitionTagSetValue(ret, slot_span_reserved_size,
363 root->GetNewPartitionTag());
364 #endif
365 root->next_tag_bitmap_page = next_tag_bitmap_page;
366 #endif
367
368 // If PCScan is used, keep the quarantine bitmap committed, just release the
369 // unused part of partition page, if any. If PCScan isn't used, release the
370 // entire reserved region (PartitionRoot::EnablePCScan will be responsible
371 // for committing it when enabling PCScan).
372 if (root->pcscan.has_value()) {
373 PA_DCHECK(root->scannable);
374 if (quarantine_bitmaps_reserved_size > quarantine_bitmaps_size_to_commit) {
375 SetSystemPagesAccess(
376 quarantine_bitmaps + quarantine_bitmaps_size_to_commit,
377 quarantine_bitmaps_reserved_size - quarantine_bitmaps_size_to_commit,
378 PageInaccessible);
379 }
380 } else {
381 // If partition isn't scannable, no quarantine bitmaps were reserved, hence
382 // nothing to decommit.
383 if (root->scannable) {
384 PA_DCHECK(quarantine_bitmaps_reserved_size > 0);
385 SetSystemPagesAccess(quarantine_bitmaps, quarantine_bitmaps_reserved_size,
386 PageInaccessible);
387 } else {
388 PA_DCHECK(quarantine_bitmaps_reserved_size == 0);
389 }
390 }
391
392 // If we were after a specific address, but didn't get it, assume that
393 // the system chose a lousy address. Here most OS'es have a default
394 // algorithm that isn't randomized. For example, most Linux
395 // distributions will allocate the mapping directly before the last
396 // successful mapping, which is far from random. So we just get fresh
397 // randomness for the next mapping attempt.
398 if (requested_address && requested_address != super_page)
399 root->next_super_page = nullptr;
400
401 // We allocated a new super page so update super page metadata.
402 // First check if this is a new extent or not.
403 auto* latest_extent =
404 reinterpret_cast<PartitionSuperPageExtentEntry<thread_safe>*>(
405 PartitionSuperPageToMetadataArea(super_page));
406 // By storing the root in every extent metadata object, we have a fast way
407 // to go from a pointer within the partition to the root object.
408 latest_extent->root = root;
409 // Most new extents will be part of a larger extent, and these three fields
410 // are unused, but we initialize them to 0 so that we get a clear signal
411 // in case they are accidentally used.
412 latest_extent->super_page_base = nullptr;
413 latest_extent->super_pages_end = nullptr;
414 latest_extent->next = nullptr;
415
416 PartitionSuperPageExtentEntry<thread_safe>* current_extent =
417 root->current_extent;
418 const bool is_new_extent = super_page != requested_address;
419 if (UNLIKELY(is_new_extent)) {
420 if (UNLIKELY(!current_extent)) {
421 PA_DCHECK(!root->first_extent);
422 root->first_extent = latest_extent;
423 } else {
424 PA_DCHECK(current_extent->super_page_base);
425 current_extent->next = latest_extent;
426 }
427 root->current_extent = latest_extent;
428 latest_extent->super_page_base = super_page;
429 latest_extent->super_pages_end = super_page + kSuperPageSize;
430 } else {
431 // We allocated next to an existing extent so just nudge the size up a
432 // little.
433 PA_DCHECK(current_extent->super_pages_end);
434 current_extent->super_pages_end += kSuperPageSize;
435 PA_DCHECK(ret >= current_extent->super_page_base &&
436 ret < current_extent->super_pages_end);
437 }
438 return ret;
439 }
440
441 template <bool thread_safe>
InitializeSlotSpan(SlotSpanMetadata<thread_safe> * slot_span)442 ALWAYS_INLINE void PartitionBucket<thread_safe>::InitializeSlotSpan(
443 SlotSpanMetadata<thread_safe>* slot_span) {
444 // The bucket never changes. We set it up once.
445 slot_span->bucket = this;
446 slot_span->empty_cache_index = -1;
447
448 slot_span->Reset();
449
450 uint16_t num_partition_pages = get_pages_per_slot_span();
451 auto* page = reinterpret_cast<PartitionPage<thread_safe>*>(slot_span);
452 for (uint16_t i = 1; i < num_partition_pages; ++i) {
453 auto* secondary_page = page + i;
454 secondary_page->slot_span_metadata_offset = i;
455 }
456 }
457
458 template <bool thread_safe>
AllocAndFillFreelist(SlotSpanMetadata<thread_safe> * slot_span)459 ALWAYS_INLINE char* PartitionBucket<thread_safe>::AllocAndFillFreelist(
460 SlotSpanMetadata<thread_safe>* slot_span) {
461 PA_DCHECK(slot_span !=
462 SlotSpanMetadata<thread_safe>::get_sentinel_slot_span());
463 uint16_t num_slots = slot_span->num_unprovisioned_slots;
464 PA_DCHECK(num_slots);
465 // We should only get here when _every_ slot is either used or unprovisioned.
466 // (The third state is "on the freelist". If we have a non-empty freelist, we
467 // should not get here.)
468 PA_DCHECK(num_slots + slot_span->num_allocated_slots == get_slots_per_span());
469 // Similarly, make explicitly sure that the freelist is empty.
470 PA_DCHECK(!slot_span->freelist_head);
471 PA_DCHECK(slot_span->num_allocated_slots >= 0);
472
473 size_t size = slot_size;
474 char* base = reinterpret_cast<char*>(
475 SlotSpanMetadata<thread_safe>::ToPointer(slot_span));
476 char* return_object = base + (size * slot_span->num_allocated_slots);
477 char* first_freelist_pointer = return_object + size;
478 char* first_freelist_pointer_extent =
479 first_freelist_pointer + sizeof(PartitionFreelistEntry*);
480 // Our goal is to fault as few system pages as possible. We calculate the
481 // page containing the "end" of the returned slot, and then allow freelist
482 // pointers to be written up to the end of that page.
483 char* sub_page_limit = reinterpret_cast<char*>(
484 RoundUpToSystemPage(reinterpret_cast<size_t>(first_freelist_pointer)));
485 char* slots_limit = return_object + (size * num_slots);
486 char* freelist_limit = sub_page_limit;
487 if (UNLIKELY(slots_limit < freelist_limit))
488 freelist_limit = slots_limit;
489
490 uint16_t num_new_freelist_entries = 0;
491 if (LIKELY(first_freelist_pointer_extent <= freelist_limit)) {
492 // Only consider used space in the slot span. If we consider wasted
493 // space, we may get an off-by-one when a freelist pointer fits in the
494 // wasted space, but a slot does not.
495 // We know we can fit at least one freelist pointer.
496 num_new_freelist_entries = 1;
497 // Any further entries require space for the whole slot span.
498 num_new_freelist_entries += static_cast<uint16_t>(
499 (freelist_limit - first_freelist_pointer_extent) / size);
500 }
501
502 // We always return an object slot -- that's the +1 below.
503 // We do not neccessarily create any new freelist entries, because we cross
504 // sub page boundaries frequently for large bucket sizes.
505 PA_DCHECK(num_new_freelist_entries + 1 <= num_slots);
506 num_slots -= (num_new_freelist_entries + 1);
507 slot_span->num_unprovisioned_slots = num_slots;
508 slot_span->num_allocated_slots++;
509
510 if (LIKELY(num_new_freelist_entries)) {
511 char* freelist_pointer = first_freelist_pointer;
512 auto* entry = reinterpret_cast<PartitionFreelistEntry*>(freelist_pointer);
513 slot_span->SetFreelistHead(entry);
514 while (--num_new_freelist_entries) {
515 freelist_pointer += size;
516 auto* next_entry =
517 reinterpret_cast<PartitionFreelistEntry*>(freelist_pointer);
518 entry->SetNext(next_entry);
519 entry = next_entry;
520 }
521 entry->SetNext(nullptr);
522 } else {
523 slot_span->SetFreelistHead(nullptr);
524 }
525 return return_object;
526 }
527
528 template <bool thread_safe>
SetNewActiveSlotSpan()529 bool PartitionBucket<thread_safe>::SetNewActiveSlotSpan() {
530 SlotSpanMetadata<thread_safe>* slot_span = active_slot_spans_head;
531 if (slot_span == SlotSpanMetadata<thread_safe>::get_sentinel_slot_span())
532 return false;
533
534 SlotSpanMetadata<thread_safe>* next_slot_span;
535
536 for (; slot_span; slot_span = next_slot_span) {
537 next_slot_span = slot_span->next_slot_span;
538 PA_DCHECK(slot_span->bucket == this);
539 PA_DCHECK(slot_span != empty_slot_spans_head);
540 PA_DCHECK(slot_span != decommitted_slot_spans_head);
541
542 if (LIKELY(slot_span->is_active())) {
543 // This slot span is usable because it has freelist entries, or has
544 // unprovisioned slots we can create freelist entries from.
545 active_slot_spans_head = slot_span;
546 return true;
547 }
548
549 // Deal with empty and decommitted slot spans.
550 if (LIKELY(slot_span->is_empty())) {
551 slot_span->next_slot_span = empty_slot_spans_head;
552 empty_slot_spans_head = slot_span;
553 } else if (LIKELY(slot_span->is_decommitted())) {
554 slot_span->next_slot_span = decommitted_slot_spans_head;
555 decommitted_slot_spans_head = slot_span;
556 } else {
557 PA_DCHECK(slot_span->is_full());
558 // If we get here, we found a full slot span. Skip over it too, and also
559 // tag it as full (via a negative value). We need it tagged so that
560 // free'ing can tell, and move it back into the active list.
561 slot_span->num_allocated_slots = -slot_span->num_allocated_slots;
562 ++num_full_slot_spans;
563 // num_full_slot_spans is a uint16_t for efficient packing so guard
564 // against overflow to be safe.
565 if (UNLIKELY(!num_full_slot_spans))
566 OnFull();
567 // Not necessary but might help stop accidents.
568 slot_span->next_slot_span = nullptr;
569 }
570 }
571
572 active_slot_spans_head =
573 SlotSpanMetadata<thread_safe>::get_sentinel_slot_span();
574 return false;
575 }
576
577 template <bool thread_safe>
SlowPathAlloc(PartitionRoot<thread_safe> * root,int flags,size_t raw_size,bool * is_already_zeroed)578 void* PartitionBucket<thread_safe>::SlowPathAlloc(
579 PartitionRoot<thread_safe>* root,
580 int flags,
581 size_t raw_size,
582 bool* is_already_zeroed) {
583 // The slow path is called when the freelist is empty.
584 PA_DCHECK(!active_slot_spans_head->freelist_head);
585
586 SlotSpanMetadata<thread_safe>* new_slot_span = nullptr;
587 // |new_slot_span->bucket| will always be |this|, except when |this| is the
588 // sentinel bucket, which is used to signal a direct mapped allocation. In
589 // this case |new_bucket| will be set properly later. This avoids a read for
590 // most allocations.
591 PartitionBucket* new_bucket = this;
592 *is_already_zeroed = false;
593
594 // For the PartitionRoot::Alloc() API, we have a bunch of buckets
595 // marked as special cases. We bounce them through to the slow path so that
596 // we can still have a blazing fast hot path due to lack of corner-case
597 // branches.
598 //
599 // Note: The ordering of the conditionals matter! In particular,
600 // SetNewActiveSlotSpan() has a side-effect even when returning
601 // false where it sweeps the active list and may move things into the empty or
602 // decommitted lists which affects the subsequent conditional.
603 bool return_null = flags & PartitionAllocReturnNull;
604 if (UNLIKELY(is_direct_mapped())) {
605 PA_DCHECK(raw_size > kMaxBucketed);
606 PA_DCHECK(this == &root->sentinel_bucket);
607 PA_DCHECK(active_slot_spans_head ==
608 SlotSpanMetadata<thread_safe>::get_sentinel_slot_span());
609 if (raw_size > MaxDirectMapped()) {
610 if (return_null)
611 return nullptr;
612 // The lock is here to protect PA from:
613 // 1. Concurrent calls
614 // 2. Reentrant calls
615 //
616 // This is fine here however, as:
617 // 1. Concurrency: |PartitionRoot::OutOfMemory()| never returns, so the
618 // lock will not be re-acquired, which would lead to acting on
619 // inconsistent data that could have been modified in-between releasing
620 // and acquiring it.
621 // 2. Reentrancy: This is why we release the lock. On some platforms,
622 // terminating the process may free() memory, or even possibly try to
623 // allocate some. Calling free() is fine, but will deadlock since
624 // |PartitionRoot::lock_| is not recursive.
625 //
626 // Supporting reentrant calls properly is hard, and not a requirement for
627 // PA. However up to that point, we've only *read* data, not *written* to
628 // any state. Reentrant calls are then fine, especially as we don't
629 // continue on this path. The only downside is possibly endless recursion
630 // if the OOM handler allocates and fails to use UncheckedMalloc() or
631 // equivalent, but that's violating the contract of
632 // base::OnNoMemoryInternal().
633 ScopedUnlockGuard<thread_safe> unlock{root->lock_};
634 PartitionExcessiveAllocationSize(raw_size);
635 IMMEDIATE_CRASH(); // Not required, kept as documentation.
636 }
637 new_slot_span = PartitionDirectMap(root, flags, raw_size);
638 if (new_slot_span)
639 new_bucket = new_slot_span->bucket;
640 // Memory from PageAllocator is always zeroed.
641 *is_already_zeroed = true;
642 } else if (LIKELY(SetNewActiveSlotSpan())) {
643 // First, did we find an active slot span in the active list?
644 new_slot_span = active_slot_spans_head;
645 PA_DCHECK(new_slot_span->is_active());
646 } else if (LIKELY(empty_slot_spans_head != nullptr) ||
647 LIKELY(decommitted_slot_spans_head != nullptr)) {
648 // Second, look in our lists of empty and decommitted slot spans.
649 // Check empty slot spans first, which are preferred, but beware that an
650 // empty slot span might have been decommitted.
651 while (LIKELY((new_slot_span = empty_slot_spans_head) != nullptr)) {
652 PA_DCHECK(new_slot_span->bucket == this);
653 PA_DCHECK(new_slot_span->is_empty() || new_slot_span->is_decommitted());
654 empty_slot_spans_head = new_slot_span->next_slot_span;
655 // Accept the empty slot span unless it got decommitted.
656 if (new_slot_span->freelist_head) {
657 new_slot_span->next_slot_span = nullptr;
658 break;
659 }
660 PA_DCHECK(new_slot_span->is_decommitted());
661 new_slot_span->next_slot_span = decommitted_slot_spans_head;
662 decommitted_slot_spans_head = new_slot_span;
663 }
664 if (UNLIKELY(!new_slot_span) &&
665 LIKELY(decommitted_slot_spans_head != nullptr)) {
666 new_slot_span = decommitted_slot_spans_head;
667 PA_DCHECK(new_slot_span->bucket == this);
668 PA_DCHECK(new_slot_span->is_decommitted());
669 decommitted_slot_spans_head = new_slot_span->next_slot_span;
670 void* addr = SlotSpanMetadata<thread_safe>::ToPointer(new_slot_span);
671 root->RecommitSystemPages(addr,
672 new_slot_span->bucket->get_bytes_per_span());
673 new_slot_span->Reset();
674 *is_already_zeroed = kDecommittedPagesAreAlwaysZeroed;
675 }
676 PA_DCHECK(new_slot_span);
677 } else {
678 // Third. If we get here, we need a brand new slot span.
679 uint16_t num_partition_pages = get_pages_per_slot_span();
680 void* raw_memory = AllocNewSlotSpan(root, flags, num_partition_pages,
681 get_bytes_per_span());
682 if (LIKELY(raw_memory != nullptr)) {
683 new_slot_span =
684 SlotSpanMetadata<thread_safe>::FromPointerNoAlignmentCheck(
685 raw_memory);
686 InitializeSlotSpan(new_slot_span);
687 // New memory from PageAllocator is always zeroed.
688 *is_already_zeroed = true;
689 }
690 }
691
692 // Bail if we had a memory allocation failure.
693 if (UNLIKELY(!new_slot_span)) {
694 PA_DCHECK(active_slot_spans_head ==
695 SlotSpanMetadata<thread_safe>::get_sentinel_slot_span());
696 if (return_null)
697 return nullptr;
698 // See comment above.
699 ScopedUnlockGuard<thread_safe> unlock{root->lock_};
700 root->OutOfMemory(raw_size);
701 IMMEDIATE_CRASH(); // Not required, kept as documentation.
702 }
703
704 PA_DCHECK(new_bucket != &root->sentinel_bucket);
705 new_bucket->active_slot_spans_head = new_slot_span;
706 if (new_slot_span->CanStoreRawSize())
707 new_slot_span->SetRawSize(raw_size);
708
709 // If we found an active slot span with free slots, or an empty slot span, we
710 // have a usable freelist head.
711 if (LIKELY(new_slot_span->freelist_head != nullptr)) {
712 PartitionFreelistEntry* entry = new_slot_span->freelist_head;
713 PartitionFreelistEntry* new_head = entry->GetNext();
714 new_slot_span->SetFreelistHead(new_head);
715 new_slot_span->num_allocated_slots++;
716 return entry;
717 }
718 // Otherwise, we need to build the freelist.
719 PA_DCHECK(new_slot_span->num_unprovisioned_slots);
720 return AllocAndFillFreelist(new_slot_span);
721 }
722
723 template struct PartitionBucket<ThreadSafe>;
724 template struct PartitionBucket<NotThreadSafe>;
725
726 } // namespace internal
727 } // namespace base
728