1 /*
2 * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 #include "precompiled.hpp"
25 #include "gc/z/zArray.inline.hpp"
26 #include "gc/z/zBackingFile_linux_x86.hpp"
27 #include "gc/z/zBackingPath_linux_x86.hpp"
28 #include "gc/z/zErrno.hpp"
29 #include "gc/z/zLargePages.inline.hpp"
30 #include "logging/log.hpp"
31 #include "runtime/os.hpp"
32 #include "utilities/align.hpp"
33 #include "utilities/debug.hpp"
34
35 #include <fcntl.h>
36 #include <sys/mman.h>
37 #include <sys/stat.h>
38 #include <sys/statfs.h>
39 #include <sys/types.h>
40 #include <unistd.h>
41
42 // Filesystem names
43 #define ZFILESYSTEM_TMPFS "tmpfs"
44 #define ZFILESYSTEM_HUGETLBFS "hugetlbfs"
45
46 // Sysfs file for transparent huge page on tmpfs
47 #define ZFILENAME_SHMEM_ENABLED "/sys/kernel/mm/transparent_hugepage/shmem_enabled"
48
49 // Java heap filename
50 #define ZFILENAME_HEAP "java_heap"
51
52 // Support for building on older Linux systems
53 #ifndef __NR_memfd_create
54 #define __NR_memfd_create 319
55 #endif
56 #ifndef MFD_CLOEXEC
57 #define MFD_CLOEXEC 0x0001U
58 #endif
59 #ifndef MFD_HUGETLB
60 #define MFD_HUGETLB 0x0004U
61 #endif
62 #ifndef O_CLOEXEC
63 #define O_CLOEXEC 02000000
64 #endif
65 #ifndef O_TMPFILE
66 #define O_TMPFILE (020000000 | O_DIRECTORY)
67 #endif
68
69 // Filesystem types, see statfs(2)
70 #ifndef TMPFS_MAGIC
71 #define TMPFS_MAGIC 0x01021994
72 #endif
73 #ifndef HUGETLBFS_MAGIC
74 #define HUGETLBFS_MAGIC 0x958458f6
75 #endif
76
77 // Preferred tmpfs mount points, ordered by priority
78 static const char* z_preferred_tmpfs_mountpoints[] = {
79 "/dev/shm",
80 "/run/shm",
81 NULL
82 };
83
84 // Preferred hugetlbfs mount points, ordered by priority
85 static const char* z_preferred_hugetlbfs_mountpoints[] = {
86 "/dev/hugepages",
87 "/hugepages",
88 NULL
89 };
90
z_memfd_create(const char * name,unsigned int flags)91 static int z_memfd_create(const char *name, unsigned int flags) {
92 return syscall(__NR_memfd_create, name, flags);
93 }
94
95 bool ZBackingFile::_hugetlbfs_mmap_retry = true;
96
ZBackingFile()97 ZBackingFile::ZBackingFile() :
98 _fd(-1),
99 _filesystem(0),
100 _available(0),
101 _initialized(false) {
102
103 // Create backing file
104 _fd = create_fd(ZFILENAME_HEAP);
105 if (_fd == -1) {
106 return;
107 }
108
109 // Get filesystem statistics
110 struct statfs statfs_buf;
111 if (fstatfs(_fd, &statfs_buf) == -1) {
112 ZErrno err;
113 log_error(gc, init)("Failed to determine filesystem type for backing file (%s)",
114 err.to_string());
115 return;
116 }
117
118 _filesystem = statfs_buf.f_type;
119 _available = statfs_buf.f_bavail * statfs_buf.f_bsize;
120
121 // Make sure we're on a supported filesystem
122 if (!is_tmpfs() && !is_hugetlbfs()) {
123 log_error(gc, init)("Backing file must be located on a %s or a %s filesystem",
124 ZFILESYSTEM_TMPFS, ZFILESYSTEM_HUGETLBFS);
125 return;
126 }
127
128 // Make sure the filesystem type matches requested large page type
129 if (ZLargePages::is_transparent() && !is_tmpfs()) {
130 log_error(gc, init)("-XX:+UseTransparentHugePages can only be enable when using a %s filesystem",
131 ZFILESYSTEM_TMPFS);
132 return;
133 }
134
135 if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) {
136 log_error(gc, init)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel",
137 ZFILESYSTEM_TMPFS);
138 return;
139 }
140
141 if (ZLargePages::is_explicit() && !is_hugetlbfs()) {
142 log_error(gc, init)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled when using a %s filesystem",
143 ZFILESYSTEM_HUGETLBFS);
144 return;
145 }
146
147 if (!ZLargePages::is_explicit() && is_hugetlbfs()) {
148 log_error(gc, init)("-XX:+UseLargePages must be enabled when using a %s filesystem",
149 ZFILESYSTEM_HUGETLBFS);
150 return;
151 }
152
153 // Successfully initialized
154 _initialized = true;
155 }
156
create_mem_fd(const char * name) const157 int ZBackingFile::create_mem_fd(const char* name) const {
158 // Create file name
159 char filename[PATH_MAX];
160 snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : "");
161
162 // Create file
163 const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0;
164 const int fd = z_memfd_create(filename, MFD_CLOEXEC | extra_flags);
165 if (fd == -1) {
166 ZErrno err;
167 log_debug(gc, init)("Failed to create memfd file (%s)",
168 ((UseLargePages && err == EINVAL) ? "Hugepages not supported" : err.to_string()));
169 return -1;
170 }
171
172 log_info(gc, init)("Heap backed by file: /memfd:%s", filename);
173
174 return fd;
175 }
176
create_file_fd(const char * name) const177 int ZBackingFile::create_file_fd(const char* name) const {
178 const char* const filesystem = ZLargePages::is_explicit()
179 ? ZFILESYSTEM_HUGETLBFS
180 : ZFILESYSTEM_TMPFS;
181 const char** const preferred_mountpoints = ZLargePages::is_explicit()
182 ? z_preferred_hugetlbfs_mountpoints
183 : z_preferred_tmpfs_mountpoints;
184
185 // Find mountpoint
186 ZBackingPath path(filesystem, preferred_mountpoints);
187 if (path.get() == NULL) {
188 log_error(gc, init)("Use -XX:ZPath to specify the path to a %s filesystem", filesystem);
189 return -1;
190 }
191
192 // Try to create an anonymous file using the O_TMPFILE flag. Note that this
193 // flag requires kernel >= 3.11. If this fails we fall back to open/unlink.
194 const int fd_anon = open(path.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
195 if (fd_anon == -1) {
196 ZErrno err;
197 log_debug(gc, init)("Failed to create anonymous file in %s (%s)", path.get(),
198 (err == EINVAL ? "Not supported" : err.to_string()));
199 } else {
200 // Get inode number for anonymous file
201 struct stat stat_buf;
202 if (fstat(fd_anon, &stat_buf) == -1) {
203 ZErrno err;
204 log_error(gc, init)("Failed to determine inode number for anonymous file (%s)", err.to_string());
205 return -1;
206 }
207
208 log_info(gc, init)("Heap backed by file: %s/#" UINT64_FORMAT, path.get(), (uint64_t)stat_buf.st_ino);
209
210 return fd_anon;
211 }
212
213 log_debug(gc, init)("Falling back to open/unlink");
214
215 // Create file name
216 char filename[PATH_MAX];
217 snprintf(filename, sizeof(filename), "%s/%s.%d", path.get(), name, os::current_process_id());
218
219 // Create file
220 const int fd = open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
221 if (fd == -1) {
222 ZErrno err;
223 log_error(gc, init)("Failed to create file %s (%s)", filename, err.to_string());
224 return -1;
225 }
226
227 // Unlink file
228 if (unlink(filename) == -1) {
229 ZErrno err;
230 log_error(gc, init)("Failed to unlink file %s (%s)", filename, err.to_string());
231 return -1;
232 }
233
234 log_info(gc, init)("Heap backed by file: %s", filename);
235
236 return fd;
237 }
238
create_fd(const char * name) const239 int ZBackingFile::create_fd(const char* name) const {
240 if (ZPath == NULL) {
241 // If the path is not explicitly specified, then we first try to create a memfd file
242 // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might
243 // not be supported at all (requires kernel >= 3.17), or it might not support large
244 // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a
245 // file on an accessible tmpfs or hugetlbfs mount point.
246 const int fd = create_mem_fd(name);
247 if (fd != -1) {
248 return fd;
249 }
250
251 log_debug(gc, init)("Falling back to searching for an accessible mount point");
252 }
253
254 return create_file_fd(name);
255 }
256
is_initialized() const257 bool ZBackingFile::is_initialized() const {
258 return _initialized;
259 }
260
fd() const261 int ZBackingFile::fd() const {
262 return _fd;
263 }
264
available() const265 size_t ZBackingFile::available() const {
266 return _available;
267 }
268
is_tmpfs() const269 bool ZBackingFile::is_tmpfs() const {
270 return _filesystem == TMPFS_MAGIC;
271 }
272
is_hugetlbfs() const273 bool ZBackingFile::is_hugetlbfs() const {
274 return _filesystem == HUGETLBFS_MAGIC;
275 }
276
tmpfs_supports_transparent_huge_pages() const277 bool ZBackingFile::tmpfs_supports_transparent_huge_pages() const {
278 // If the shmem_enabled file exists and is readable then we
279 // know the kernel supports transparent huge pages for tmpfs.
280 return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
281 }
282
try_split_and_expand_tmpfs(size_t offset,size_t length,size_t alignment) const283 bool ZBackingFile::try_split_and_expand_tmpfs(size_t offset, size_t length, size_t alignment) const {
284 // Try first smaller part.
285 const size_t offset0 = offset;
286 const size_t length0 = align_up(length / 2, alignment);
287 if (!try_expand_tmpfs(offset0, length0, alignment)) {
288 return false;
289 }
290
291 // Try second smaller part.
292 const size_t offset1 = offset0 + length0;
293 const size_t length1 = length - length0;
294 if (!try_expand_tmpfs(offset1, length1, alignment)) {
295 return false;
296 }
297
298 return true;
299 }
300
try_expand_tmpfs(size_t offset,size_t length,size_t alignment) const301 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length, size_t alignment) const {
302 assert(length > 0, "Invalid length");
303 assert(is_aligned(length, alignment), "Invalid length");
304
305 ZErrno err = posix_fallocate(_fd, offset, length);
306
307 if (err == EINTR && length > alignment) {
308 // Calling posix_fallocate() with a large length can take a long
309 // time to complete. When running profilers, such as VTune, this
310 // syscall will be constantly interrupted by signals. Expanding
311 // the file in smaller steps avoids this problem.
312 return try_split_and_expand_tmpfs(offset, length, alignment);
313 }
314
315 if (err) {
316 log_error(gc)("Failed to allocate backing file (%s)", err.to_string());
317 return false;
318 }
319
320 return true;
321 }
322
try_expand_tmpfs(size_t offset,size_t length) const323 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length) const {
324 assert(is_tmpfs(), "Wrong filesystem");
325 return try_expand_tmpfs(offset, length, os::vm_page_size());
326 }
327
try_expand_hugetlbfs(size_t offset,size_t length) const328 bool ZBackingFile::try_expand_hugetlbfs(size_t offset, size_t length) const {
329 assert(is_hugetlbfs(), "Wrong filesystem");
330
331 // Prior to kernel 4.3, hugetlbfs did not support posix_fallocate().
332 // Instead of posix_fallocate() we can use a well-known workaround,
333 // which involves truncating the file to requested size and then try
334 // to map it to verify that there are enough huge pages available to
335 // back it.
336 while (ftruncate(_fd, offset + length) == -1) {
337 ZErrno err;
338 if (err != EINTR) {
339 log_error(gc)("Failed to truncate backing file (%s)", err.to_string());
340 return false;
341 }
342 }
343
344 // If we fail mapping during initialization, i.e. when we are pre-mapping
345 // the heap, then we wait and retry a few times before giving up. Otherwise
346 // there is a risk that running JVMs back-to-back will fail, since there
347 // is a delay between process termination and the huge pages owned by that
348 // process being returned to the huge page pool and made available for new
349 // allocations.
350 void* addr = MAP_FAILED;
351 const int max_attempts = 5;
352 for (int attempt = 1; attempt <= max_attempts; attempt++) {
353 addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
354 if (addr != MAP_FAILED || !_hugetlbfs_mmap_retry) {
355 // Mapping was successful or mmap retry is disabled
356 break;
357 }
358
359 ZErrno err;
360 log_debug(gc)("Failed to map backing file (%s), attempt %d of %d",
361 err.to_string(), attempt, max_attempts);
362
363 // Wait and retry in one second, in the hope that
364 // huge pages will be available by then.
365 sleep(1);
366 }
367
368 // Disable mmap retry from now on
369 if (_hugetlbfs_mmap_retry) {
370 _hugetlbfs_mmap_retry = false;
371 }
372
373 if (addr == MAP_FAILED) {
374 // Not enough huge pages left
375 ZErrno err;
376 log_error(gc)("Failed to map backing file (%s)", err.to_string());
377 return false;
378 }
379
380 // Successful mapping, unmap again. From now on the pages we mapped
381 // will be reserved for this file.
382 if (munmap(addr, length) == -1) {
383 ZErrno err;
384 log_error(gc)("Failed to unmap backing file (%s)", err.to_string());
385 return false;
386 }
387
388 return true;
389 }
390
try_expand_tmpfs_or_hugetlbfs(size_t offset,size_t length,size_t alignment) const391 bool ZBackingFile::try_expand_tmpfs_or_hugetlbfs(size_t offset, size_t length, size_t alignment) const {
392 assert(is_aligned(offset, alignment), "Invalid offset");
393 assert(is_aligned(length, alignment), "Invalid length");
394
395 log_debug(gc)("Expanding heap from " SIZE_FORMAT "M to " SIZE_FORMAT "M", offset / M, (offset + length) / M);
396
397 return is_hugetlbfs() ? try_expand_hugetlbfs(offset, length) : try_expand_tmpfs(offset, length);
398 }
399
try_expand(size_t offset,size_t length,size_t alignment) const400 size_t ZBackingFile::try_expand(size_t offset, size_t length, size_t alignment) const {
401 size_t start = offset;
402 size_t end = offset + length;
403
404 // Try to expand
405 if (try_expand_tmpfs_or_hugetlbfs(start, length, alignment)) {
406 // Success
407 return end;
408 }
409
410 // Failed, try to expand as much as possible
411 for (;;) {
412 length = align_down((end - start) / 2, alignment);
413 if (length < alignment) {
414 // Done, don't expand more
415 return start;
416 }
417
418 if (try_expand_tmpfs_or_hugetlbfs(start, length, alignment)) {
419 // Success, try expand more
420 start += length;
421 } else {
422 // Failed, try expand less
423 end -= length;
424 }
425 }
426 }
427