1 /*
2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef OS_LINUX_OS_LINUX_HPP
26 #define OS_LINUX_OS_LINUX_HPP
27
28 // Linux_OS defines the interface to Linux operating systems
29
30 // Information about the protection of the page at address '0' on this os.
zero_page_read_protected()31 static bool zero_page_read_protected() { return true; }
32
33 class Linux {
34 friend class CgroupSubsystem;
35 friend class os;
36 friend class OSContainer;
37 friend class TestReserveMemorySpecial;
38
39 static int (*_pthread_getcpuclockid)(pthread_t, clockid_t *);
40 static int (*_pthread_setname_np)(pthread_t, const char*);
41
42 static address _initial_thread_stack_bottom;
43 static uintptr_t _initial_thread_stack_size;
44
45 static const char *_libc_version;
46 static const char *_libpthread_version;
47
48 static bool _supports_fast_thread_cpu_time;
49
50 static GrowableArray<int>* _cpu_to_node;
51 static GrowableArray<int>* _nindex_to_node;
52
53 static size_t _default_large_page_size;
54
55 protected:
56
57 static julong _physical_memory;
58 static pthread_t _main_thread;
59 static int _page_size;
60
61 static julong available_memory();
physical_memory()62 static julong physical_memory() { return _physical_memory; }
set_physical_memory(julong phys_mem)63 static void set_physical_memory(julong phys_mem) { _physical_memory = phys_mem; }
64 static int active_processor_count();
65
66 static void initialize_system_info();
67
68 static int commit_memory_impl(char* addr, size_t bytes, bool exec);
69 static int commit_memory_impl(char* addr, size_t bytes,
70 size_t alignment_hint, bool exec);
71
set_libc_version(const char * s)72 static void set_libc_version(const char *s) { _libc_version = s; }
set_libpthread_version(const char * s)73 static void set_libpthread_version(const char *s) { _libpthread_version = s; }
74
75 static void rebuild_cpu_to_node_map();
76 static void rebuild_nindex_to_node_map();
cpu_to_node()77 static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; }
nindex_to_node()78 static GrowableArray<int>* nindex_to_node() { return _nindex_to_node; }
79
80 static size_t default_large_page_size();
81 static size_t find_default_large_page_size();
82 static size_t find_large_page_size(size_t page_size);
83 static size_t setup_large_page_size();
84
85 static bool setup_large_page_type(size_t page_size);
86 static bool transparent_huge_pages_sanity_check(bool warn, size_t pages_size);
87 static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
88
89 static char* reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec);
90 static char* reserve_memory_special_huge_tlbfs(size_t bytes, size_t alignment, char* req_addr, bool exec);
91 static char* reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec);
92 static char* reserve_memory_special_huge_tlbfs_mixed(size_t bytes, size_t alignment, char* req_addr, bool exec);
93
94 static bool release_memory_special_impl(char* base, size_t bytes);
95 static bool release_memory_special_shm(char* base, size_t bytes);
96 static bool release_memory_special_huge_tlbfs(char* base, size_t bytes);
97
98 static void print_process_memory_info(outputStream* st);
99 static void print_system_memory_info(outputStream* st);
100 static bool print_container_info(outputStream* st);
101 static void print_steal_info(outputStream* st);
102 static void print_distro_info(outputStream* st);
103 static void print_libversion_info(outputStream* st);
104 static void print_proc_sys_info(outputStream* st);
105 static bool print_ld_preload_file(outputStream* st);
106 static void print_uptime_info(outputStream* st);
107
108 public:
109 struct CPUPerfTicks {
110 uint64_t used;
111 uint64_t usedKernel;
112 uint64_t total;
113 uint64_t steal;
114 bool has_steal_ticks;
115 };
116
117 // which_logical_cpu=-1 returns accumulated ticks for all cpus.
118 static bool get_tick_information(CPUPerfTicks* pticks, int which_logical_cpu);
119 static bool _stack_is_executable;
120 static void *dlopen_helper(const char *name, char *ebuf, int ebuflen);
121 static void *dll_load_in_vmthread(const char *name, char *ebuf, int ebuflen);
122
123 static void init_thread_fpu_state();
124 static int get_fpu_control_word();
125 static void set_fpu_control_word(int fpu_control);
main_thread(void)126 static pthread_t main_thread(void) { return _main_thread; }
127 // returns kernel thread id (similar to LWP id on Solaris), which can be
128 // used to access /proc
129 static pid_t gettid();
130
initial_thread_stack_bottom(void)131 static address initial_thread_stack_bottom(void) { return _initial_thread_stack_bottom; }
initial_thread_stack_size(void)132 static uintptr_t initial_thread_stack_size(void) { return _initial_thread_stack_size; }
133
page_size(void)134 static int page_size(void) { return _page_size; }
set_page_size(int val)135 static void set_page_size(int val) { _page_size = val; }
136
137 static intptr_t* ucontext_get_sp(const ucontext_t* uc);
138 static intptr_t* ucontext_get_fp(const ucontext_t* uc);
139
140 // GNU libc and libpthread version strings
libc_version()141 static const char *libc_version() { return _libc_version; }
libpthread_version()142 static const char *libpthread_version() { return _libpthread_version; }
143
144 static void libpthread_init();
145 static void sched_getcpu_init();
146 static bool libnuma_init();
147 static void* libnuma_dlsym(void* handle, const char* name);
148 // libnuma v2 (libnuma_1.2) symbols
149 static void* libnuma_v2_dlsym(void* handle, const char* name);
150
151 // Return default guard size for the specified thread type
152 static size_t default_guard_size(os::ThreadType thr_type);
153
154 static void capture_initial_stack(size_t max_size);
155
156 // Stack overflow handling
157 static bool manually_expand_stack(JavaThread * t, address addr);
158
159 // fast POSIX clocks support
160 static void fast_thread_clock_init(void);
161
pthread_getcpuclockid(pthread_t tid,clockid_t * clock_id)162 static int pthread_getcpuclockid(pthread_t tid, clockid_t *clock_id) {
163 return _pthread_getcpuclockid ? _pthread_getcpuclockid(tid, clock_id) : -1;
164 }
165
supports_fast_thread_cpu_time()166 static bool supports_fast_thread_cpu_time() {
167 return _supports_fast_thread_cpu_time;
168 }
169
170 static jlong fast_thread_cpu_time(clockid_t clockid);
171
172 // Stack repair handling
173
174 // none present
175
176 private:
177 static void numa_init();
178 static void expand_stack_to(address bottom);
179
180 typedef int (*sched_getcpu_func_t)(void);
181 typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
182 typedef int (*numa_node_to_cpus_v2_func_t)(int node, void *mask);
183 typedef int (*numa_max_node_func_t)(void);
184 typedef int (*numa_num_configured_nodes_func_t)(void);
185 typedef int (*numa_available_func_t)(void);
186 typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
187 typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask);
188 typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t size, struct bitmask* mask);
189 typedef struct bitmask* (*numa_get_membind_func_t)(void);
190 typedef struct bitmask* (*numa_get_interleave_mask_func_t)(void);
191 typedef long (*numa_move_pages_func_t)(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags);
192 typedef void (*numa_set_preferred_func_t)(int node);
193 typedef void (*numa_set_bind_policy_func_t)(int policy);
194 typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
195 typedef int (*numa_distance_func_t)(int node1, int node2);
196
197 static sched_getcpu_func_t _sched_getcpu;
198 static numa_node_to_cpus_func_t _numa_node_to_cpus;
199 static numa_node_to_cpus_v2_func_t _numa_node_to_cpus_v2;
200 static numa_max_node_func_t _numa_max_node;
201 static numa_num_configured_nodes_func_t _numa_num_configured_nodes;
202 static numa_available_func_t _numa_available;
203 static numa_tonode_memory_func_t _numa_tonode_memory;
204 static numa_interleave_memory_func_t _numa_interleave_memory;
205 static numa_interleave_memory_v2_func_t _numa_interleave_memory_v2;
206 static numa_set_bind_policy_func_t _numa_set_bind_policy;
207 static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
208 static numa_distance_func_t _numa_distance;
209 static numa_get_membind_func_t _numa_get_membind;
210 static numa_get_interleave_mask_func_t _numa_get_interleave_mask;
211 static numa_move_pages_func_t _numa_move_pages;
212 static numa_set_preferred_func_t _numa_set_preferred;
213 static unsigned long* _numa_all_nodes;
214 static struct bitmask* _numa_all_nodes_ptr;
215 static struct bitmask* _numa_nodes_ptr;
216 static struct bitmask* _numa_interleave_bitmask;
217 static struct bitmask* _numa_membind_bitmask;
218
set_sched_getcpu(sched_getcpu_func_t func)219 static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
set_numa_node_to_cpus(numa_node_to_cpus_func_t func)220 static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
set_numa_node_to_cpus_v2(numa_node_to_cpus_v2_func_t func)221 static void set_numa_node_to_cpus_v2(numa_node_to_cpus_v2_func_t func) { _numa_node_to_cpus_v2 = func; }
set_numa_max_node(numa_max_node_func_t func)222 static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func)223 static void set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func) { _numa_num_configured_nodes = func; }
set_numa_available(numa_available_func_t func)224 static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
set_numa_tonode_memory(numa_tonode_memory_func_t func)225 static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
set_numa_interleave_memory(numa_interleave_memory_func_t func)226 static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; }
set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func)227 static void set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func) { _numa_interleave_memory_v2 = func; }
set_numa_set_bind_policy(numa_set_bind_policy_func_t func)228 static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; }
set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func)229 static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; }
set_numa_distance(numa_distance_func_t func)230 static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; }
set_numa_get_membind(numa_get_membind_func_t func)231 static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; }
set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func)232 static void set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func) { _numa_get_interleave_mask = func; }
set_numa_move_pages(numa_move_pages_func_t func)233 static void set_numa_move_pages(numa_move_pages_func_t func) { _numa_move_pages = func; }
set_numa_set_preferred(numa_set_preferred_func_t func)234 static void set_numa_set_preferred(numa_set_preferred_func_t func) { _numa_set_preferred = func; }
set_numa_all_nodes(unsigned long * ptr)235 static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
set_numa_all_nodes_ptr(struct bitmask ** ptr)236 static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
set_numa_nodes_ptr(struct bitmask ** ptr)237 static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
set_numa_interleave_bitmask(struct bitmask * ptr)238 static void set_numa_interleave_bitmask(struct bitmask* ptr) { _numa_interleave_bitmask = ptr ; }
set_numa_membind_bitmask(struct bitmask * ptr)239 static void set_numa_membind_bitmask(struct bitmask* ptr) { _numa_membind_bitmask = ptr ; }
240 static int sched_getcpu_syscall(void);
241
242 enum NumaAllocationPolicy{
243 NotInitialized,
244 Membind,
245 Interleave
246 };
247 static NumaAllocationPolicy _current_numa_policy;
248
249 public:
sched_getcpu()250 static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
251 static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen);
numa_max_node()252 static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; }
numa_num_configured_nodes()253 static int numa_num_configured_nodes() {
254 return _numa_num_configured_nodes != NULL ? _numa_num_configured_nodes() : -1;
255 }
numa_available()256 static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; }
numa_tonode_memory(void * start,size_t size,int node)257 static int numa_tonode_memory(void *start, size_t size, int node) {
258 return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
259 }
260
is_running_in_interleave_mode()261 static bool is_running_in_interleave_mode() {
262 return _current_numa_policy == Interleave;
263 }
264
set_configured_numa_policy(NumaAllocationPolicy numa_policy)265 static void set_configured_numa_policy(NumaAllocationPolicy numa_policy) {
266 _current_numa_policy = numa_policy;
267 }
268
identify_numa_policy()269 static NumaAllocationPolicy identify_numa_policy() {
270 for (int node = 0; node <= Linux::numa_max_node(); node++) {
271 if (Linux::_numa_bitmask_isbitset(Linux::_numa_interleave_bitmask, node)) {
272 return Interleave;
273 }
274 }
275 return Membind;
276 }
277
numa_interleave_memory(void * start,size_t size)278 static void numa_interleave_memory(void *start, size_t size) {
279 // Prefer v2 API
280 if (_numa_interleave_memory_v2 != NULL) {
281 if (is_running_in_interleave_mode()) {
282 _numa_interleave_memory_v2(start, size, _numa_interleave_bitmask);
283 } else if (_numa_membind_bitmask != NULL) {
284 _numa_interleave_memory_v2(start, size, _numa_membind_bitmask);
285 }
286 } else if (_numa_interleave_memory != NULL) {
287 _numa_interleave_memory(start, size, _numa_all_nodes);
288 }
289 }
numa_set_preferred(int node)290 static void numa_set_preferred(int node) {
291 if (_numa_set_preferred != NULL) {
292 _numa_set_preferred(node);
293 }
294 }
numa_set_bind_policy(int policy)295 static void numa_set_bind_policy(int policy) {
296 if (_numa_set_bind_policy != NULL) {
297 _numa_set_bind_policy(policy);
298 }
299 }
numa_distance(int node1,int node2)300 static int numa_distance(int node1, int node2) {
301 return _numa_distance != NULL ? _numa_distance(node1, node2) : -1;
302 }
numa_move_pages(int pid,unsigned long count,void ** pages,const int * nodes,int * status,int flags)303 static long numa_move_pages(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags) {
304 return _numa_move_pages != NULL ? _numa_move_pages(pid, count, pages, nodes, status, flags) : -1;
305 }
306 static int get_node_by_cpu(int cpu_id);
307 static int get_existing_num_nodes();
308 // Check if numa node is configured (non-zero memory node).
is_node_in_configured_nodes(unsigned int n)309 static bool is_node_in_configured_nodes(unsigned int n) {
310 if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
311 return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
312 } else
313 return false;
314 }
315 // Check if numa node exists in the system (including zero memory nodes).
is_node_in_existing_nodes(unsigned int n)316 static bool is_node_in_existing_nodes(unsigned int n) {
317 if (_numa_bitmask_isbitset != NULL && _numa_nodes_ptr != NULL) {
318 return _numa_bitmask_isbitset(_numa_nodes_ptr, n);
319 } else if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
320 // Not all libnuma API v2 implement numa_nodes_ptr, so it's not possible
321 // to trust the API version for checking its absence. On the other hand,
322 // numa_nodes_ptr found in libnuma 2.0.9 and above is the only way to get
323 // a complete view of all numa nodes in the system, hence numa_nodes_ptr
324 // is used to handle CPU and nodes on architectures (like PowerPC) where
325 // there can exist nodes with CPUs but no memory or vice-versa and the
326 // nodes may be non-contiguous. For most of the architectures, like
327 // x86_64, numa_node_ptr presents the same node set as found in
328 // numa_all_nodes_ptr so it's possible to use numa_all_nodes_ptr as a
329 // substitute.
330 return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
331 } else
332 return false;
333 }
334 // Check if node is in bound node set.
is_node_in_bound_nodes(int node)335 static bool is_node_in_bound_nodes(int node) {
336 if (_numa_bitmask_isbitset != NULL) {
337 if (is_running_in_interleave_mode()) {
338 return _numa_bitmask_isbitset(_numa_interleave_bitmask, node);
339 } else {
340 return _numa_membind_bitmask != NULL ? _numa_bitmask_isbitset(_numa_membind_bitmask, node) : false;
341 }
342 }
343 return false;
344 }
345 // Check if bound to only one numa node.
346 // Returns true if bound to a single numa node, otherwise returns false.
is_bound_to_single_node()347 static bool is_bound_to_single_node() {
348 int nodes = 0;
349 struct bitmask* bmp = NULL;
350 unsigned int node = 0;
351 unsigned int highest_node_number = 0;
352
353 if (_numa_get_membind != NULL && _numa_max_node != NULL && _numa_bitmask_isbitset != NULL) {
354 bmp = _numa_get_membind();
355 highest_node_number = _numa_max_node();
356 } else {
357 return false;
358 }
359
360 for (node = 0; node <= highest_node_number; node++) {
361 if (_numa_bitmask_isbitset(bmp, node)) {
362 nodes++;
363 }
364 }
365
366 if (nodes == 1) {
367 return true;
368 } else {
369 return false;
370 }
371 }
372
numa_nindex_to_node()373 static const GrowableArray<int>* numa_nindex_to_node() {
374 return _nindex_to_node;
375 }
376 };
377
378 #endif // OS_LINUX_OS_LINUX_HPP
379