1 /*
2  * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 #include "precompiled.hpp"
25 #include "gc/z/zArray.inline.hpp"
26 #include "gc/z/zBackingFile_linux_x86.hpp"
27 #include "gc/z/zBackingPath_linux_x86.hpp"
28 #include "gc/z/zErrno.hpp"
29 #include "gc/z/zLargePages.inline.hpp"
30 #include "logging/log.hpp"
31 #include "runtime/os.hpp"
32 #include "utilities/align.hpp"
33 #include "utilities/debug.hpp"
34 
35 #include <fcntl.h>
36 #include <sys/mman.h>
37 #include <sys/stat.h>
38 #include <sys/statfs.h>
39 #include <sys/types.h>
40 #include <unistd.h>
41 
42 // Filesystem names
43 #define ZFILESYSTEM_TMPFS                "tmpfs"
44 #define ZFILESYSTEM_HUGETLBFS            "hugetlbfs"
45 
46 // Sysfs file for transparent huge page on tmpfs
47 #define ZFILENAME_SHMEM_ENABLED          "/sys/kernel/mm/transparent_hugepage/shmem_enabled"
48 
49 // Java heap filename
50 #define ZFILENAME_HEAP                   "java_heap"
51 
52 // Support for building on older Linux systems
53 #ifndef __NR_memfd_create
54 #define __NR_memfd_create                319
55 #endif
56 #ifndef MFD_CLOEXEC
57 #define MFD_CLOEXEC                      0x0001U
58 #endif
59 #ifndef MFD_HUGETLB
60 #define MFD_HUGETLB                      0x0004U
61 #endif
62 #ifndef O_CLOEXEC
63 #define O_CLOEXEC                        02000000
64 #endif
65 #ifndef O_TMPFILE
66 #define O_TMPFILE                        (020000000 | O_DIRECTORY)
67 #endif
68 
69 // Filesystem types, see statfs(2)
70 #ifndef TMPFS_MAGIC
71 #define TMPFS_MAGIC                      0x01021994
72 #endif
73 #ifndef HUGETLBFS_MAGIC
74 #define HUGETLBFS_MAGIC                  0x958458f6
75 #endif
76 
77 // Preferred tmpfs mount points, ordered by priority
78 static const char* z_preferred_tmpfs_mountpoints[] = {
79   "/dev/shm",
80   "/run/shm",
81   NULL
82 };
83 
84 // Preferred hugetlbfs mount points, ordered by priority
85 static const char* z_preferred_hugetlbfs_mountpoints[] = {
86   "/dev/hugepages",
87   "/hugepages",
88   NULL
89 };
90 
z_memfd_create(const char * name,unsigned int flags)91 static int z_memfd_create(const char *name, unsigned int flags) {
92   return syscall(__NR_memfd_create, name, flags);
93 }
94 
95 bool ZBackingFile::_hugetlbfs_mmap_retry = true;
96 
ZBackingFile()97 ZBackingFile::ZBackingFile() :
98     _fd(-1),
99     _filesystem(0),
100     _available(0),
101     _initialized(false) {
102 
103   // Create backing file
104   _fd = create_fd(ZFILENAME_HEAP);
105   if (_fd == -1) {
106     return;
107   }
108 
109   // Get filesystem statistics
110   struct statfs statfs_buf;
111   if (fstatfs(_fd, &statfs_buf) == -1) {
112     ZErrno err;
113     log_error(gc, init)("Failed to determine filesystem type for backing file (%s)",
114                         err.to_string());
115     return;
116   }
117 
118   _filesystem = statfs_buf.f_type;
119   _available = statfs_buf.f_bavail * statfs_buf.f_bsize;
120 
121   // Make sure we're on a supported filesystem
122   if (!is_tmpfs() && !is_hugetlbfs()) {
123     log_error(gc, init)("Backing file must be located on a %s or a %s filesystem",
124                         ZFILESYSTEM_TMPFS, ZFILESYSTEM_HUGETLBFS);
125     return;
126   }
127 
128   // Make sure the filesystem type matches requested large page type
129   if (ZLargePages::is_transparent() && !is_tmpfs()) {
130     log_error(gc, init)("-XX:+UseTransparentHugePages can only be enable when using a %s filesystem",
131                         ZFILESYSTEM_TMPFS);
132     return;
133   }
134 
135   if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) {
136     log_error(gc, init)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel",
137                         ZFILESYSTEM_TMPFS);
138     return;
139   }
140 
141   if (ZLargePages::is_explicit() && !is_hugetlbfs()) {
142     log_error(gc, init)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled when using a %s filesystem",
143                         ZFILESYSTEM_HUGETLBFS);
144     return;
145   }
146 
147   if (!ZLargePages::is_explicit() && is_hugetlbfs()) {
148     log_error(gc, init)("-XX:+UseLargePages must be enabled when using a %s filesystem",
149                         ZFILESYSTEM_HUGETLBFS);
150     return;
151   }
152 
153   // Successfully initialized
154   _initialized = true;
155 }
156 
create_mem_fd(const char * name) const157 int ZBackingFile::create_mem_fd(const char* name) const {
158   // Create file name
159   char filename[PATH_MAX];
160   snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : "");
161 
162   // Create file
163   const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0;
164   const int fd = z_memfd_create(filename, MFD_CLOEXEC | extra_flags);
165   if (fd == -1) {
166     ZErrno err;
167     log_debug(gc, init)("Failed to create memfd file (%s)",
168                         ((UseLargePages && err == EINVAL) ? "Hugepages not supported" : err.to_string()));
169     return -1;
170   }
171 
172   log_info(gc, init)("Heap backed by file: /memfd:%s", filename);
173 
174   return fd;
175 }
176 
create_file_fd(const char * name) const177 int ZBackingFile::create_file_fd(const char* name) const {
178   const char* const filesystem = ZLargePages::is_explicit()
179                                  ? ZFILESYSTEM_HUGETLBFS
180                                  : ZFILESYSTEM_TMPFS;
181   const char** const preferred_mountpoints = ZLargePages::is_explicit()
182                                              ? z_preferred_hugetlbfs_mountpoints
183                                              : z_preferred_tmpfs_mountpoints;
184 
185   // Find mountpoint
186   ZBackingPath path(filesystem, preferred_mountpoints);
187   if (path.get() == NULL) {
188     log_error(gc, init)("Use -XX:ZPath to specify the path to a %s filesystem", filesystem);
189     return -1;
190   }
191 
192   // Try to create an anonymous file using the O_TMPFILE flag. Note that this
193   // flag requires kernel >= 3.11. If this fails we fall back to open/unlink.
194   const int fd_anon = open(path.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
195   if (fd_anon == -1) {
196     ZErrno err;
197     log_debug(gc, init)("Failed to create anonymous file in %s (%s)", path.get(),
198                         (err == EINVAL ? "Not supported" : err.to_string()));
199   } else {
200     // Get inode number for anonymous file
201     struct stat stat_buf;
202     if (fstat(fd_anon, &stat_buf) == -1) {
203       ZErrno err;
204       log_error(gc, init)("Failed to determine inode number for anonymous file (%s)", err.to_string());
205       return -1;
206     }
207 
208     log_info(gc, init)("Heap backed by file: %s/#" UINT64_FORMAT, path.get(), (uint64_t)stat_buf.st_ino);
209 
210     return fd_anon;
211   }
212 
213   log_debug(gc, init)("Falling back to open/unlink");
214 
215   // Create file name
216   char filename[PATH_MAX];
217   snprintf(filename, sizeof(filename), "%s/%s.%d", path.get(), name, os::current_process_id());
218 
219   // Create file
220   const int fd = open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
221   if (fd == -1) {
222     ZErrno err;
223     log_error(gc, init)("Failed to create file %s (%s)", filename, err.to_string());
224     return -1;
225   }
226 
227   // Unlink file
228   if (unlink(filename) == -1) {
229     ZErrno err;
230     log_error(gc, init)("Failed to unlink file %s (%s)", filename, err.to_string());
231     return -1;
232   }
233 
234   log_info(gc, init)("Heap backed by file: %s", filename);
235 
236   return fd;
237 }
238 
create_fd(const char * name) const239 int ZBackingFile::create_fd(const char* name) const {
240   if (ZPath == NULL) {
241     // If the path is not explicitly specified, then we first try to create a memfd file
242     // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might
243     // not be supported at all (requires kernel >= 3.17), or it might not support large
244     // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a
245     // file on an accessible tmpfs or hugetlbfs mount point.
246     const int fd = create_mem_fd(name);
247     if (fd != -1) {
248       return fd;
249     }
250 
251     log_debug(gc, init)("Falling back to searching for an accessible mount point");
252   }
253 
254   return create_file_fd(name);
255 }
256 
is_initialized() const257 bool ZBackingFile::is_initialized() const {
258   return _initialized;
259 }
260 
fd() const261 int ZBackingFile::fd() const {
262   return _fd;
263 }
264 
available() const265 size_t ZBackingFile::available() const {
266   return _available;
267 }
268 
is_tmpfs() const269 bool ZBackingFile::is_tmpfs() const {
270   return _filesystem == TMPFS_MAGIC;
271 }
272 
is_hugetlbfs() const273 bool ZBackingFile::is_hugetlbfs() const {
274   return _filesystem == HUGETLBFS_MAGIC;
275 }
276 
tmpfs_supports_transparent_huge_pages() const277 bool ZBackingFile::tmpfs_supports_transparent_huge_pages() const {
278   // If the shmem_enabled file exists and is readable then we
279   // know the kernel supports transparent huge pages for tmpfs.
280   return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
281 }
282 
try_split_and_expand_tmpfs(size_t offset,size_t length,size_t alignment) const283 bool ZBackingFile::try_split_and_expand_tmpfs(size_t offset, size_t length, size_t alignment) const {
284   // Try first smaller part.
285   const size_t offset0 = offset;
286   const size_t length0 = align_up(length / 2, alignment);
287   if (!try_expand_tmpfs(offset0, length0, alignment)) {
288     return false;
289   }
290 
291   // Try second smaller part.
292   const size_t offset1 = offset0 + length0;
293   const size_t length1 = length - length0;
294   if (!try_expand_tmpfs(offset1, length1, alignment)) {
295     return false;
296   }
297 
298   return true;
299 }
300 
try_expand_tmpfs(size_t offset,size_t length,size_t alignment) const301 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length, size_t alignment) const {
302   assert(length > 0, "Invalid length");
303   assert(is_aligned(length, alignment), "Invalid length");
304 
305   ZErrno err = posix_fallocate(_fd, offset, length);
306 
307   if (err == EINTR && length > alignment) {
308     // Calling posix_fallocate() with a large length can take a long
309     // time to complete. When running profilers, such as VTune, this
310     // syscall will be constantly interrupted by signals. Expanding
311     // the file in smaller steps avoids this problem.
312     return try_split_and_expand_tmpfs(offset, length, alignment);
313   }
314 
315   if (err) {
316     log_error(gc)("Failed to allocate backing file (%s)", err.to_string());
317     return false;
318   }
319 
320   return true;
321 }
322 
try_expand_tmpfs(size_t offset,size_t length) const323 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length) const {
324   assert(is_tmpfs(), "Wrong filesystem");
325   return try_expand_tmpfs(offset, length, os::vm_page_size());
326 }
327 
try_expand_hugetlbfs(size_t offset,size_t length) const328 bool ZBackingFile::try_expand_hugetlbfs(size_t offset, size_t length) const {
329   assert(is_hugetlbfs(), "Wrong filesystem");
330 
331   // Prior to kernel 4.3, hugetlbfs did not support posix_fallocate().
332   // Instead of posix_fallocate() we can use a well-known workaround,
333   // which involves truncating the file to requested size and then try
334   // to map it to verify that there are enough huge pages available to
335   // back it.
336   while (ftruncate(_fd, offset + length) == -1) {
337     ZErrno err;
338     if (err != EINTR) {
339       log_error(gc)("Failed to truncate backing file (%s)", err.to_string());
340       return false;
341     }
342   }
343 
344   // If we fail mapping during initialization, i.e. when we are pre-mapping
345   // the heap, then we wait and retry a few times before giving up. Otherwise
346   // there is a risk that running JVMs back-to-back will fail, since there
347   // is a delay between process termination and the huge pages owned by that
348   // process being returned to the huge page pool and made available for new
349   // allocations.
350   void* addr = MAP_FAILED;
351   const int max_attempts = 5;
352   for (int attempt = 1; attempt <= max_attempts; attempt++) {
353     addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
354     if (addr != MAP_FAILED || !_hugetlbfs_mmap_retry) {
355       // Mapping was successful or mmap retry is disabled
356       break;
357     }
358 
359     ZErrno err;
360     log_debug(gc)("Failed to map backing file (%s), attempt %d of %d",
361                   err.to_string(), attempt, max_attempts);
362 
363     // Wait and retry in one second, in the hope that
364     // huge pages will be available by then.
365     sleep(1);
366   }
367 
368   // Disable mmap retry from now on
369   if (_hugetlbfs_mmap_retry) {
370     _hugetlbfs_mmap_retry = false;
371   }
372 
373   if (addr == MAP_FAILED) {
374     // Not enough huge pages left
375     ZErrno err;
376     log_error(gc)("Failed to map backing file (%s)", err.to_string());
377     return false;
378   }
379 
380   // Successful mapping, unmap again. From now on the pages we mapped
381   // will be reserved for this file.
382   if (munmap(addr, length) == -1) {
383     ZErrno err;
384     log_error(gc)("Failed to unmap backing file (%s)", err.to_string());
385     return false;
386   }
387 
388   return true;
389 }
390 
try_expand_tmpfs_or_hugetlbfs(size_t offset,size_t length,size_t alignment) const391 bool ZBackingFile::try_expand_tmpfs_or_hugetlbfs(size_t offset, size_t length, size_t alignment) const {
392   assert(is_aligned(offset, alignment), "Invalid offset");
393   assert(is_aligned(length, alignment), "Invalid length");
394 
395   log_debug(gc)("Expanding heap from " SIZE_FORMAT "M to " SIZE_FORMAT "M", offset / M, (offset + length) / M);
396 
397   return is_hugetlbfs() ? try_expand_hugetlbfs(offset, length) : try_expand_tmpfs(offset, length);
398 }
399 
try_expand(size_t offset,size_t length,size_t alignment) const400 size_t ZBackingFile::try_expand(size_t offset, size_t length, size_t alignment) const {
401   size_t start = offset;
402   size_t end = offset + length;
403 
404   // Try to expand
405   if (try_expand_tmpfs_or_hugetlbfs(start, length, alignment)) {
406     // Success
407     return end;
408   }
409 
410   // Failed, try to expand as much as possible
411   for (;;) {
412     length = align_down((end - start) / 2, alignment);
413     if (length < alignment) {
414       // Done, don't expand more
415       return start;
416     }
417 
418     if (try_expand_tmpfs_or_hugetlbfs(start, length, alignment)) {
419       // Success, try expand more
420       start += length;
421     } else {
422       // Failed, try expand less
423       end -= length;
424     }
425   }
426 }
427