1 /*
2  * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #ifndef OS_LINUX_OS_LINUX_HPP
26 #define OS_LINUX_OS_LINUX_HPP
27 
28 // Linux_OS defines the interface to Linux operating systems
29 
30 // Information about the protection of the page at address '0' on this os.
zero_page_read_protected()31 static bool zero_page_read_protected() { return true; }
32 
33 class Linux {
34   friend class CgroupSubsystem;
35   friend class os;
36   friend class OSContainer;
37   friend class TestReserveMemorySpecial;
38 
39   static int (*_pthread_getcpuclockid)(pthread_t, clockid_t *);
40   static int (*_pthread_setname_np)(pthread_t, const char*);
41 
42   static address   _initial_thread_stack_bottom;
43   static uintptr_t _initial_thread_stack_size;
44 
45   static const char *_libc_version;
46   static const char *_libpthread_version;
47 
48   static bool _supports_fast_thread_cpu_time;
49 
50   static GrowableArray<int>* _cpu_to_node;
51   static GrowableArray<int>* _nindex_to_node;
52 
53   static size_t _default_large_page_size;
54 
55  protected:
56 
57   static julong _physical_memory;
58   static pthread_t _main_thread;
59   static int _page_size;
60 
61   static julong available_memory();
physical_memory()62   static julong physical_memory() { return _physical_memory; }
set_physical_memory(julong phys_mem)63   static void set_physical_memory(julong phys_mem) { _physical_memory = phys_mem; }
64   static int active_processor_count();
65 
66   static void initialize_system_info();
67 
68   static int commit_memory_impl(char* addr, size_t bytes, bool exec);
69   static int commit_memory_impl(char* addr, size_t bytes,
70                                 size_t alignment_hint, bool exec);
71 
set_libc_version(const char * s)72   static void set_libc_version(const char *s)       { _libc_version = s; }
set_libpthread_version(const char * s)73   static void set_libpthread_version(const char *s) { _libpthread_version = s; }
74 
75   static void rebuild_cpu_to_node_map();
76   static void rebuild_nindex_to_node_map();
cpu_to_node()77   static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
nindex_to_node()78   static GrowableArray<int>* nindex_to_node()  { return _nindex_to_node; }
79 
80   static size_t default_large_page_size();
81   static size_t find_default_large_page_size();
82   static size_t find_large_page_size(size_t page_size);
83   static size_t setup_large_page_size();
84 
85   static bool setup_large_page_type(size_t page_size);
86   static bool transparent_huge_pages_sanity_check(bool warn, size_t pages_size);
87   static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
88 
89   static char* reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec);
90   static char* reserve_memory_special_huge_tlbfs(size_t bytes, size_t alignment, char* req_addr, bool exec);
91   static char* reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec);
92   static char* reserve_memory_special_huge_tlbfs_mixed(size_t bytes, size_t alignment, char* req_addr, bool exec);
93 
94   static bool release_memory_special_impl(char* base, size_t bytes);
95   static bool release_memory_special_shm(char* base, size_t bytes);
96   static bool release_memory_special_huge_tlbfs(char* base, size_t bytes);
97 
98   static void print_process_memory_info(outputStream* st);
99   static void print_system_memory_info(outputStream* st);
100   static bool print_container_info(outputStream* st);
101   static void print_steal_info(outputStream* st);
102   static void print_distro_info(outputStream* st);
103   static void print_libversion_info(outputStream* st);
104   static void print_proc_sys_info(outputStream* st);
105   static bool print_ld_preload_file(outputStream* st);
106   static void print_uptime_info(outputStream* st);
107 
108  public:
109   struct CPUPerfTicks {
110     uint64_t used;
111     uint64_t usedKernel;
112     uint64_t total;
113     uint64_t steal;
114     bool     has_steal_ticks;
115   };
116 
117   // which_logical_cpu=-1 returns accumulated ticks for all cpus.
118   static bool get_tick_information(CPUPerfTicks* pticks, int which_logical_cpu);
119   static bool _stack_is_executable;
120   static void *dlopen_helper(const char *name, char *ebuf, int ebuflen);
121   static void *dll_load_in_vmthread(const char *name, char *ebuf, int ebuflen);
122 
123   static void init_thread_fpu_state();
124   static int  get_fpu_control_word();
125   static void set_fpu_control_word(int fpu_control);
main_thread(void)126   static pthread_t main_thread(void)                                { return _main_thread; }
127   // returns kernel thread id (similar to LWP id on Solaris), which can be
128   // used to access /proc
129   static pid_t gettid();
130 
initial_thread_stack_bottom(void)131   static address   initial_thread_stack_bottom(void)                { return _initial_thread_stack_bottom; }
initial_thread_stack_size(void)132   static uintptr_t initial_thread_stack_size(void)                  { return _initial_thread_stack_size; }
133 
page_size(void)134   static int page_size(void)                                        { return _page_size; }
set_page_size(int val)135   static void set_page_size(int val)                                { _page_size = val; }
136 
137   static intptr_t* ucontext_get_sp(const ucontext_t* uc);
138   static intptr_t* ucontext_get_fp(const ucontext_t* uc);
139 
140   // GNU libc and libpthread version strings
libc_version()141   static const char *libc_version()           { return _libc_version; }
libpthread_version()142   static const char *libpthread_version()     { return _libpthread_version; }
143 
144   static void libpthread_init();
145   static void sched_getcpu_init();
146   static bool libnuma_init();
147   static void* libnuma_dlsym(void* handle, const char* name);
148   // libnuma v2 (libnuma_1.2) symbols
149   static void* libnuma_v2_dlsym(void* handle, const char* name);
150 
151   // Return default guard size for the specified thread type
152   static size_t default_guard_size(os::ThreadType thr_type);
153 
154   static void capture_initial_stack(size_t max_size);
155 
156   // Stack overflow handling
157   static bool manually_expand_stack(JavaThread * t, address addr);
158 
159   // fast POSIX clocks support
160   static void fast_thread_clock_init(void);
161 
pthread_getcpuclockid(pthread_t tid,clockid_t * clock_id)162   static int pthread_getcpuclockid(pthread_t tid, clockid_t *clock_id) {
163     return _pthread_getcpuclockid ? _pthread_getcpuclockid(tid, clock_id) : -1;
164   }
165 
supports_fast_thread_cpu_time()166   static bool supports_fast_thread_cpu_time() {
167     return _supports_fast_thread_cpu_time;
168   }
169 
170   static jlong fast_thread_cpu_time(clockid_t clockid);
171 
172   // Stack repair handling
173 
174   // none present
175 
176  private:
177   static void numa_init();
178   static void expand_stack_to(address bottom);
179 
180   typedef int (*sched_getcpu_func_t)(void);
181   typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
182   typedef int (*numa_node_to_cpus_v2_func_t)(int node, void *mask);
183   typedef int (*numa_max_node_func_t)(void);
184   typedef int (*numa_num_configured_nodes_func_t)(void);
185   typedef int (*numa_available_func_t)(void);
186   typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
187   typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask);
188   typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t size, struct bitmask* mask);
189   typedef struct bitmask* (*numa_get_membind_func_t)(void);
190   typedef struct bitmask* (*numa_get_interleave_mask_func_t)(void);
191   typedef long (*numa_move_pages_func_t)(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags);
192   typedef void (*numa_set_preferred_func_t)(int node);
193   typedef void (*numa_set_bind_policy_func_t)(int policy);
194   typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
195   typedef int (*numa_distance_func_t)(int node1, int node2);
196 
197   static sched_getcpu_func_t _sched_getcpu;
198   static numa_node_to_cpus_func_t _numa_node_to_cpus;
199   static numa_node_to_cpus_v2_func_t _numa_node_to_cpus_v2;
200   static numa_max_node_func_t _numa_max_node;
201   static numa_num_configured_nodes_func_t _numa_num_configured_nodes;
202   static numa_available_func_t _numa_available;
203   static numa_tonode_memory_func_t _numa_tonode_memory;
204   static numa_interleave_memory_func_t _numa_interleave_memory;
205   static numa_interleave_memory_v2_func_t _numa_interleave_memory_v2;
206   static numa_set_bind_policy_func_t _numa_set_bind_policy;
207   static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
208   static numa_distance_func_t _numa_distance;
209   static numa_get_membind_func_t _numa_get_membind;
210   static numa_get_interleave_mask_func_t _numa_get_interleave_mask;
211   static numa_move_pages_func_t _numa_move_pages;
212   static numa_set_preferred_func_t _numa_set_preferred;
213   static unsigned long* _numa_all_nodes;
214   static struct bitmask* _numa_all_nodes_ptr;
215   static struct bitmask* _numa_nodes_ptr;
216   static struct bitmask* _numa_interleave_bitmask;
217   static struct bitmask* _numa_membind_bitmask;
218 
set_sched_getcpu(sched_getcpu_func_t func)219   static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
set_numa_node_to_cpus(numa_node_to_cpus_func_t func)220   static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
set_numa_node_to_cpus_v2(numa_node_to_cpus_v2_func_t func)221   static void set_numa_node_to_cpus_v2(numa_node_to_cpus_v2_func_t func) { _numa_node_to_cpus_v2 = func; }
set_numa_max_node(numa_max_node_func_t func)222   static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func)223   static void set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func) { _numa_num_configured_nodes = func; }
set_numa_available(numa_available_func_t func)224   static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
set_numa_tonode_memory(numa_tonode_memory_func_t func)225   static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
set_numa_interleave_memory(numa_interleave_memory_func_t func)226   static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; }
set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func)227   static void set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func) { _numa_interleave_memory_v2 = func; }
set_numa_set_bind_policy(numa_set_bind_policy_func_t func)228   static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; }
set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func)229   static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; }
set_numa_distance(numa_distance_func_t func)230   static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; }
set_numa_get_membind(numa_get_membind_func_t func)231   static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; }
set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func)232   static void set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func) { _numa_get_interleave_mask = func; }
set_numa_move_pages(numa_move_pages_func_t func)233   static void set_numa_move_pages(numa_move_pages_func_t func) { _numa_move_pages = func; }
set_numa_set_preferred(numa_set_preferred_func_t func)234   static void set_numa_set_preferred(numa_set_preferred_func_t func) { _numa_set_preferred = func; }
set_numa_all_nodes(unsigned long * ptr)235   static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
set_numa_all_nodes_ptr(struct bitmask ** ptr)236   static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
set_numa_nodes_ptr(struct bitmask ** ptr)237   static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
set_numa_interleave_bitmask(struct bitmask * ptr)238   static void set_numa_interleave_bitmask(struct bitmask* ptr)     { _numa_interleave_bitmask = ptr ;   }
set_numa_membind_bitmask(struct bitmask * ptr)239   static void set_numa_membind_bitmask(struct bitmask* ptr)        { _numa_membind_bitmask = ptr ;      }
240   static int sched_getcpu_syscall(void);
241 
242   enum NumaAllocationPolicy{
243     NotInitialized,
244     Membind,
245     Interleave
246   };
247   static NumaAllocationPolicy _current_numa_policy;
248 
249  public:
sched_getcpu()250   static int sched_getcpu()  { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
251   static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen);
numa_max_node()252   static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; }
numa_num_configured_nodes()253   static int numa_num_configured_nodes() {
254     return _numa_num_configured_nodes != NULL ? _numa_num_configured_nodes() : -1;
255   }
numa_available()256   static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; }
numa_tonode_memory(void * start,size_t size,int node)257   static int numa_tonode_memory(void *start, size_t size, int node) {
258     return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
259   }
260 
is_running_in_interleave_mode()261   static bool is_running_in_interleave_mode() {
262     return _current_numa_policy == Interleave;
263   }
264 
set_configured_numa_policy(NumaAllocationPolicy numa_policy)265   static void set_configured_numa_policy(NumaAllocationPolicy numa_policy) {
266     _current_numa_policy = numa_policy;
267   }
268 
identify_numa_policy()269   static NumaAllocationPolicy identify_numa_policy() {
270     for (int node = 0; node <= Linux::numa_max_node(); node++) {
271       if (Linux::_numa_bitmask_isbitset(Linux::_numa_interleave_bitmask, node)) {
272         return Interleave;
273       }
274     }
275     return Membind;
276   }
277 
numa_interleave_memory(void * start,size_t size)278   static void numa_interleave_memory(void *start, size_t size) {
279     // Prefer v2 API
280     if (_numa_interleave_memory_v2 != NULL) {
281       if (is_running_in_interleave_mode()) {
282         _numa_interleave_memory_v2(start, size, _numa_interleave_bitmask);
283       } else if (_numa_membind_bitmask != NULL) {
284         _numa_interleave_memory_v2(start, size, _numa_membind_bitmask);
285       }
286     } else if (_numa_interleave_memory != NULL) {
287       _numa_interleave_memory(start, size, _numa_all_nodes);
288     }
289   }
numa_set_preferred(int node)290   static void numa_set_preferred(int node) {
291     if (_numa_set_preferred != NULL) {
292       _numa_set_preferred(node);
293     }
294   }
numa_set_bind_policy(int policy)295   static void numa_set_bind_policy(int policy) {
296     if (_numa_set_bind_policy != NULL) {
297       _numa_set_bind_policy(policy);
298     }
299   }
numa_distance(int node1,int node2)300   static int numa_distance(int node1, int node2) {
301     return _numa_distance != NULL ? _numa_distance(node1, node2) : -1;
302   }
numa_move_pages(int pid,unsigned long count,void ** pages,const int * nodes,int * status,int flags)303   static long numa_move_pages(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags) {
304     return _numa_move_pages != NULL ? _numa_move_pages(pid, count, pages, nodes, status, flags) : -1;
305   }
306   static int get_node_by_cpu(int cpu_id);
307   static int get_existing_num_nodes();
308   // Check if numa node is configured (non-zero memory node).
is_node_in_configured_nodes(unsigned int n)309   static bool is_node_in_configured_nodes(unsigned int n) {
310     if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
311       return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
312     } else
313       return false;
314   }
315   // Check if numa node exists in the system (including zero memory nodes).
is_node_in_existing_nodes(unsigned int n)316   static bool is_node_in_existing_nodes(unsigned int n) {
317     if (_numa_bitmask_isbitset != NULL && _numa_nodes_ptr != NULL) {
318       return _numa_bitmask_isbitset(_numa_nodes_ptr, n);
319     } else if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
320       // Not all libnuma API v2 implement numa_nodes_ptr, so it's not possible
321       // to trust the API version for checking its absence. On the other hand,
322       // numa_nodes_ptr found in libnuma 2.0.9 and above is the only way to get
323       // a complete view of all numa nodes in the system, hence numa_nodes_ptr
324       // is used to handle CPU and nodes on architectures (like PowerPC) where
325       // there can exist nodes with CPUs but no memory or vice-versa and the
326       // nodes may be non-contiguous. For most of the architectures, like
327       // x86_64, numa_node_ptr presents the same node set as found in
328       // numa_all_nodes_ptr so it's possible to use numa_all_nodes_ptr as a
329       // substitute.
330       return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
331     } else
332       return false;
333   }
334   // Check if node is in bound node set.
is_node_in_bound_nodes(int node)335   static bool is_node_in_bound_nodes(int node) {
336     if (_numa_bitmask_isbitset != NULL) {
337       if (is_running_in_interleave_mode()) {
338         return _numa_bitmask_isbitset(_numa_interleave_bitmask, node);
339       } else {
340         return _numa_membind_bitmask != NULL ? _numa_bitmask_isbitset(_numa_membind_bitmask, node) : false;
341       }
342     }
343     return false;
344   }
345   // Check if bound to only one numa node.
346   // Returns true if bound to a single numa node, otherwise returns false.
is_bound_to_single_node()347   static bool is_bound_to_single_node() {
348     int nodes = 0;
349     struct bitmask* bmp = NULL;
350     unsigned int node = 0;
351     unsigned int highest_node_number = 0;
352 
353     if (_numa_get_membind != NULL && _numa_max_node != NULL && _numa_bitmask_isbitset != NULL) {
354       bmp = _numa_get_membind();
355       highest_node_number = _numa_max_node();
356     } else {
357       return false;
358     }
359 
360     for (node = 0; node <= highest_node_number; node++) {
361       if (_numa_bitmask_isbitset(bmp, node)) {
362         nodes++;
363       }
364     }
365 
366     if (nodes == 1) {
367       return true;
368     } else {
369       return false;
370     }
371   }
372 
numa_nindex_to_node()373   static const GrowableArray<int>* numa_nindex_to_node() {
374     return _nindex_to_node;
375   }
376 };
377 
378 #endif // OS_LINUX_OS_LINUX_HPP
379