1 // Copyright (c) 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_
6 #define BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_
7
8 #include <errno.h>
9 #include <sys/mman.h>
10
11 #include "base/logging.h"
12 #include "build/build_config.h"
13
14 #if defined(OS_MACOSX)
15 #include "base/mac/foundation_util.h"
16 #include "base/mac/mac_util.h"
17 #include "base/mac/scoped_cftyperef.h"
18
19 #include <Security/Security.h>
20 #include <mach/mach.h>
21 #endif
22 #if defined(OS_ANDROID)
23 #include <sys/prctl.h>
24 #endif
25 #if defined(OS_BSD) || defined(OS_LINUX)
26 #include <sys/resource.h>
27
28 #include <algorithm>
29 #endif
30
31 #include "base/allocator/partition_allocator/page_allocator.h"
32
33 #ifndef MAP_ANONYMOUS
34 #define MAP_ANONYMOUS MAP_ANON
35 #endif
36
37 namespace base {
38
39 namespace {
40
41 #if defined(OS_ANDROID)
PageTagToName(PageTag tag)42 const char* PageTagToName(PageTag tag) {
43 // Important: All the names should be string literals. As per prctl.h in
44 // //third_party/android_ndk the kernel keeps a pointer to the name instead
45 // of copying it.
46 //
47 // Having the name in .rodata ensures that the pointer remains valid as
48 // long as the mapping is alive.
49 switch (tag) {
50 case PageTag::kBlinkGC:
51 return "blink_gc";
52 case PageTag::kPartitionAlloc:
53 return "partition_alloc";
54 case PageTag::kChromium:
55 return "chromium";
56 case PageTag::kV8:
57 return "v8";
58 default:
59 DCHECK(false);
60 return "";
61 }
62 }
63 #endif // defined(OS_ANDROID)
64
65 #if defined(OS_MACOSX)
66 // Tests whether the version of macOS supports the MAP_JIT flag and if the
67 // current process is signed with the allow-jit entitlement.
UseMapJit()68 bool UseMapJit() {
69 if (!mac::IsAtLeastOS10_14())
70 return false;
71
72 ScopedCFTypeRef<SecTaskRef> task(SecTaskCreateFromSelf(kCFAllocatorDefault));
73 ScopedCFTypeRef<CFErrorRef> error;
74 ScopedCFTypeRef<CFTypeRef> value(SecTaskCopyValueForEntitlement(
75 task.get(), CFSTR("com.apple.security.cs.allow-jit"),
76 error.InitializeInto()));
77 if (error)
78 return false;
79 return mac::CFCast<CFBooleanRef>(value.get()) == kCFBooleanTrue;
80 }
81 #endif // defined(OS_MACOSX)
82
83 } // namespace
84
85 // |mmap| uses a nearby address if the hint address is blocked.
86 constexpr bool kHintIsAdvisory = true;
87 std::atomic<int32_t> s_allocPageErrorCode{0};
88
GetAccessFlags(PageAccessibilityConfiguration accessibility)89 int GetAccessFlags(PageAccessibilityConfiguration accessibility) {
90 switch (accessibility) {
91 case PageRead:
92 return PROT_READ;
93 case PageReadWrite:
94 return PROT_READ | PROT_WRITE;
95 case PageReadExecute:
96 return PROT_READ | PROT_EXEC;
97 case PageReadWriteExecute:
98 return PROT_READ | PROT_WRITE | PROT_EXEC;
99 default:
100 NOTREACHED();
101 FALLTHROUGH;
102 case PageInaccessible:
103 return PROT_NONE;
104 }
105 }
106
SystemAllocPagesInternal(void * hint,size_t length,PageAccessibilityConfiguration accessibility,PageTag page_tag,bool commit)107 void* SystemAllocPagesInternal(void* hint,
108 size_t length,
109 PageAccessibilityConfiguration accessibility,
110 PageTag page_tag,
111 bool commit) {
112 #if defined(OS_MACOSX)
113 // Use a custom tag to make it easier to distinguish Partition Alloc regions
114 // in vmmap(1). Tags between 240-255 are supported.
115 DCHECK_LE(PageTag::kFirst, page_tag);
116 DCHECK_GE(PageTag::kLast, page_tag);
117 int fd = VM_MAKE_TAG(static_cast<int>(page_tag));
118 #else
119 int fd = -1;
120 #endif
121
122 int access_flag = GetAccessFlags(accessibility);
123 int map_flags = MAP_ANONYMOUS | MAP_PRIVATE;
124
125 #if defined(OS_MACOSX)
126 // On macOS 10.14 and higher, executables that are code signed with the
127 // "runtime" option cannot execute writable memory by default. They can opt
128 // into this capability by specifying the "com.apple.security.cs.allow-jit"
129 // code signing entitlement and allocating the region with the MAP_JIT flag.
130 static const bool kUseMapJit = UseMapJit();
131 if (page_tag == PageTag::kV8 && kUseMapJit) {
132 map_flags |= MAP_JIT;
133 }
134 #endif
135
136 void* ret =
137 mmap(hint, length, access_flag, map_flags, fd, 0);
138 if (ret == MAP_FAILED) {
139 s_allocPageErrorCode = errno;
140 ret = nullptr;
141 }
142
143 #if defined(OS_ANDROID)
144 // On Android, anonymous mappings can have a name attached to them. This is
145 // useful for debugging, and double-checking memory attribution.
146 if (ret) {
147 // No error checking on purpose, testing only.
148 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ret, length,
149 PageTagToName(page_tag));
150 }
151 #endif
152
153 return ret;
154 }
155
TrimMappingInternal(void * base,size_t base_length,size_t trim_length,PageAccessibilityConfiguration accessibility,bool commit,size_t pre_slack,size_t post_slack)156 void* TrimMappingInternal(void* base,
157 size_t base_length,
158 size_t trim_length,
159 PageAccessibilityConfiguration accessibility,
160 bool commit,
161 size_t pre_slack,
162 size_t post_slack) {
163 void* ret = base;
164 // We can resize the allocation run. Release unneeded memory before and after
165 // the aligned range.
166 if (pre_slack) {
167 int res = munmap(base, pre_slack);
168 CHECK(!res);
169 ret = reinterpret_cast<char*>(base) + pre_slack;
170 }
171 if (post_slack) {
172 int res = munmap(reinterpret_cast<char*>(ret) + trim_length, post_slack);
173 CHECK(!res);
174 }
175 return ret;
176 }
177
TrySetSystemPagesAccessInternal(void * address,size_t length,PageAccessibilityConfiguration accessibility)178 bool TrySetSystemPagesAccessInternal(
179 void* address,
180 size_t length,
181 PageAccessibilityConfiguration accessibility) {
182 return 0 == mprotect(address, length, GetAccessFlags(accessibility));
183 }
184
SetSystemPagesAccessInternal(void * address,size_t length,PageAccessibilityConfiguration accessibility)185 void SetSystemPagesAccessInternal(
186 void* address,
187 size_t length,
188 PageAccessibilityConfiguration accessibility) {
189 CHECK_EQ(0, mprotect(address, length, GetAccessFlags(accessibility)));
190 }
191
FreePagesInternal(void * address,size_t length)192 void FreePagesInternal(void* address, size_t length) {
193 CHECK(!munmap(address, length));
194 }
195
DecommitSystemPagesInternal(void * address,size_t length)196 void DecommitSystemPagesInternal(void* address, size_t length) {
197 // In POSIX, there is no decommit concept. Discarding is an effective way of
198 // implementing the Windows semantics where the OS is allowed to not swap the
199 // pages in the region.
200 //
201 // TODO(ajwong): Also explore setting PageInaccessible to make the protection
202 // semantics consistent between Windows and POSIX. This might have a perf cost
203 // though as both decommit and recommit would incur an extra syscall.
204 // http://crbug.com/766882
205 DiscardSystemPages(address, length);
206 }
207
RecommitSystemPagesInternal(void * address,size_t length,PageAccessibilityConfiguration accessibility)208 bool RecommitSystemPagesInternal(void* address,
209 size_t length,
210 PageAccessibilityConfiguration accessibility) {
211 #if defined(OS_MACOSX)
212 // On macOS, to update accounting, we need to make another syscall. For more
213 // details, see https://crbug.com/823915.
214 madvise(address, length, MADV_FREE_REUSE);
215 #endif
216
217 // On POSIX systems, the caller need simply read the memory to recommit it.
218 // This has the correct behavior because the API requires the permissions to
219 // be the same as before decommitting and all configurations can read.
220 return true;
221 }
222
DiscardSystemPagesInternal(void * address,size_t length)223 void DiscardSystemPagesInternal(void* address, size_t length) {
224 #if defined(OS_MACOSX)
225 int ret = madvise(address, length, MADV_FREE_REUSABLE);
226 if (ret) {
227 // MADV_FREE_REUSABLE sometimes fails, so fall back to MADV_DONTNEED.
228 ret = madvise(address, length, MADV_DONTNEED);
229 }
230 CHECK(0 == ret);
231 #else
232 // We have experimented with other flags, but with suboptimal results.
233 //
234 // MADV_FREE (Linux): Makes our memory measurements less predictable;
235 // performance benefits unclear.
236 //
237 // Therefore, we just do the simple thing: MADV_DONTNEED.
238 CHECK(!madvise(address, length, MADV_DONTNEED));
239 #endif
240 }
241
242 } // namespace base
243
244 #endif // BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_
245