1 // Copyright (c) 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_
6 #define BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_
7 
8 #include <errno.h>
9 #include <sys/mman.h>
10 
11 #include "base/logging.h"
12 #include "build/build_config.h"
13 
14 #if defined(OS_MACOSX)
15 #include "base/mac/foundation_util.h"
16 #include "base/mac/mac_util.h"
17 #include "base/mac/scoped_cftyperef.h"
18 
19 #include <Security/Security.h>
20 #include <mach/mach.h>
21 #endif
22 #if defined(OS_ANDROID)
23 #include <sys/prctl.h>
24 #endif
25 #if defined(OS_BSD) || defined(OS_LINUX)
26 #include <sys/resource.h>
27 
28 #include <algorithm>
29 #endif
30 
31 #include "base/allocator/partition_allocator/page_allocator.h"
32 
33 #ifndef MAP_ANONYMOUS
34 #define MAP_ANONYMOUS MAP_ANON
35 #endif
36 
37 namespace base {
38 
39 namespace {
40 
41 #if defined(OS_ANDROID)
PageTagToName(PageTag tag)42 const char* PageTagToName(PageTag tag) {
43   // Important: All the names should be string literals. As per prctl.h in
44   // //third_party/android_ndk the kernel keeps a pointer to the name instead
45   // of copying it.
46   //
47   // Having the name in .rodata ensures that the pointer remains valid as
48   // long as the mapping is alive.
49   switch (tag) {
50     case PageTag::kBlinkGC:
51       return "blink_gc";
52     case PageTag::kPartitionAlloc:
53       return "partition_alloc";
54     case PageTag::kChromium:
55       return "chromium";
56     case PageTag::kV8:
57       return "v8";
58     default:
59       DCHECK(false);
60       return "";
61   }
62 }
63 #endif  // defined(OS_ANDROID)
64 
65 #if defined(OS_MACOSX)
66 // Tests whether the version of macOS supports the MAP_JIT flag and if the
67 // current process is signed with the allow-jit entitlement.
UseMapJit()68 bool UseMapJit() {
69   if (!mac::IsAtLeastOS10_14())
70     return false;
71 
72   ScopedCFTypeRef<SecTaskRef> task(SecTaskCreateFromSelf(kCFAllocatorDefault));
73   ScopedCFTypeRef<CFErrorRef> error;
74   ScopedCFTypeRef<CFTypeRef> value(SecTaskCopyValueForEntitlement(
75       task.get(), CFSTR("com.apple.security.cs.allow-jit"),
76       error.InitializeInto()));
77   if (error)
78     return false;
79   return mac::CFCast<CFBooleanRef>(value.get()) == kCFBooleanTrue;
80 }
81 #endif  // defined(OS_MACOSX)
82 
83 }  // namespace
84 
85 // |mmap| uses a nearby address if the hint address is blocked.
86 constexpr bool kHintIsAdvisory = true;
87 std::atomic<int32_t> s_allocPageErrorCode{0};
88 
GetAccessFlags(PageAccessibilityConfiguration accessibility)89 int GetAccessFlags(PageAccessibilityConfiguration accessibility) {
90   switch (accessibility) {
91     case PageRead:
92       return PROT_READ;
93     case PageReadWrite:
94       return PROT_READ | PROT_WRITE;
95     case PageReadExecute:
96       return PROT_READ | PROT_EXEC;
97     case PageReadWriteExecute:
98       return PROT_READ | PROT_WRITE | PROT_EXEC;
99     default:
100       NOTREACHED();
101       FALLTHROUGH;
102     case PageInaccessible:
103       return PROT_NONE;
104   }
105 }
106 
SystemAllocPagesInternal(void * hint,size_t length,PageAccessibilityConfiguration accessibility,PageTag page_tag,bool commit)107 void* SystemAllocPagesInternal(void* hint,
108                                size_t length,
109                                PageAccessibilityConfiguration accessibility,
110                                PageTag page_tag,
111                                bool commit) {
112 #if defined(OS_MACOSX)
113   // Use a custom tag to make it easier to distinguish Partition Alloc regions
114   // in vmmap(1). Tags between 240-255 are supported.
115   DCHECK_LE(PageTag::kFirst, page_tag);
116   DCHECK_GE(PageTag::kLast, page_tag);
117   int fd = VM_MAKE_TAG(static_cast<int>(page_tag));
118 #else
119   int fd = -1;
120 #endif
121 
122   int access_flag = GetAccessFlags(accessibility);
123   int map_flags = MAP_ANONYMOUS | MAP_PRIVATE;
124 
125 #if defined(OS_MACOSX)
126   // On macOS 10.14 and higher, executables that are code signed with the
127   // "runtime" option cannot execute writable memory by default. They can opt
128   // into this capability by specifying the "com.apple.security.cs.allow-jit"
129   // code signing entitlement and allocating the region with the MAP_JIT flag.
130   static const bool kUseMapJit = UseMapJit();
131   if (page_tag == PageTag::kV8 && kUseMapJit) {
132     map_flags |= MAP_JIT;
133   }
134 #endif
135 
136   void* ret =
137       mmap(hint, length, access_flag, map_flags, fd, 0);
138   if (ret == MAP_FAILED) {
139     s_allocPageErrorCode = errno;
140     ret = nullptr;
141   }
142 
143 #if defined(OS_ANDROID)
144   // On Android, anonymous mappings can have a name attached to them. This is
145   // useful for debugging, and double-checking memory attribution.
146   if (ret) {
147     // No error checking on purpose, testing only.
148     prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ret, length,
149           PageTagToName(page_tag));
150   }
151 #endif
152 
153   return ret;
154 }
155 
TrimMappingInternal(void * base,size_t base_length,size_t trim_length,PageAccessibilityConfiguration accessibility,bool commit,size_t pre_slack,size_t post_slack)156 void* TrimMappingInternal(void* base,
157                           size_t base_length,
158                           size_t trim_length,
159                           PageAccessibilityConfiguration accessibility,
160                           bool commit,
161                           size_t pre_slack,
162                           size_t post_slack) {
163   void* ret = base;
164   // We can resize the allocation run. Release unneeded memory before and after
165   // the aligned range.
166   if (pre_slack) {
167     int res = munmap(base, pre_slack);
168     CHECK(!res);
169     ret = reinterpret_cast<char*>(base) + pre_slack;
170   }
171   if (post_slack) {
172     int res = munmap(reinterpret_cast<char*>(ret) + trim_length, post_slack);
173     CHECK(!res);
174   }
175   return ret;
176 }
177 
TrySetSystemPagesAccessInternal(void * address,size_t length,PageAccessibilityConfiguration accessibility)178 bool TrySetSystemPagesAccessInternal(
179     void* address,
180     size_t length,
181     PageAccessibilityConfiguration accessibility) {
182   return 0 == mprotect(address, length, GetAccessFlags(accessibility));
183 }
184 
SetSystemPagesAccessInternal(void * address,size_t length,PageAccessibilityConfiguration accessibility)185 void SetSystemPagesAccessInternal(
186     void* address,
187     size_t length,
188     PageAccessibilityConfiguration accessibility) {
189   CHECK_EQ(0, mprotect(address, length, GetAccessFlags(accessibility)));
190 }
191 
FreePagesInternal(void * address,size_t length)192 void FreePagesInternal(void* address, size_t length) {
193   CHECK(!munmap(address, length));
194 }
195 
DecommitSystemPagesInternal(void * address,size_t length)196 void DecommitSystemPagesInternal(void* address, size_t length) {
197   // In POSIX, there is no decommit concept. Discarding is an effective way of
198   // implementing the Windows semantics where the OS is allowed to not swap the
199   // pages in the region.
200   //
201   // TODO(ajwong): Also explore setting PageInaccessible to make the protection
202   // semantics consistent between Windows and POSIX. This might have a perf cost
203   // though as both decommit and recommit would incur an extra syscall.
204   // http://crbug.com/766882
205   DiscardSystemPages(address, length);
206 }
207 
RecommitSystemPagesInternal(void * address,size_t length,PageAccessibilityConfiguration accessibility)208 bool RecommitSystemPagesInternal(void* address,
209                                  size_t length,
210                                  PageAccessibilityConfiguration accessibility) {
211 #if defined(OS_MACOSX)
212   // On macOS, to update accounting, we need to make another syscall. For more
213   // details, see https://crbug.com/823915.
214   madvise(address, length, MADV_FREE_REUSE);
215 #endif
216 
217   // On POSIX systems, the caller need simply read the memory to recommit it.
218   // This has the correct behavior because the API requires the permissions to
219   // be the same as before decommitting and all configurations can read.
220   return true;
221 }
222 
DiscardSystemPagesInternal(void * address,size_t length)223 void DiscardSystemPagesInternal(void* address, size_t length) {
224 #if defined(OS_MACOSX)
225   int ret = madvise(address, length, MADV_FREE_REUSABLE);
226   if (ret) {
227     // MADV_FREE_REUSABLE sometimes fails, so fall back to MADV_DONTNEED.
228     ret = madvise(address, length, MADV_DONTNEED);
229   }
230   CHECK(0 == ret);
231 #else
232   // We have experimented with other flags, but with suboptimal results.
233   //
234   // MADV_FREE (Linux): Makes our memory measurements less predictable;
235   // performance benefits unclear.
236   //
237   // Therefore, we just do the simple thing: MADV_DONTNEED.
238   CHECK(!madvise(address, length, MADV_DONTNEED));
239 #endif
240 }
241 
242 }  // namespace base
243 
244 #endif  // BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_
245