1 /*
2 * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef CGROUP_SUBSYSTEM_LINUX_HPP
26 #define CGROUP_SUBSYSTEM_LINUX_HPP
27
28 #include "memory/allocation.hpp"
29 #include "runtime/os.hpp"
30 #include "logging/log.hpp"
31 #include "utilities/globalDefinitions.hpp"
32 #include "utilities/macros.hpp"
33 #include "osContainer_linux.hpp"
34
35 // Shared cgroups code (used by cgroup version 1 and version 2)
36
37 /*
38 * PER_CPU_SHARES has been set to 1024 because CPU shares' quota
39 * is commonly used in cloud frameworks like Kubernetes[1],
40 * AWS[2] and Mesos[3] in a similar way. They spawn containers with
41 * --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do
42 * the inverse for determining the number of possible available
43 * CPUs to the JVM inside a container. See JDK-8216366.
44 *
45 * [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu
46 * In particular:
47 * When using Docker:
48 * The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially
49 * fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the
50 * --cpu-shares flag in the docker run command.
51 * [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
52 * [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648
53 * https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30
54 */
55 #define PER_CPU_SHARES 1024
56
57 #define CGROUPS_V1 1
58 #define CGROUPS_V2 2
59 #define INVALID_CGROUPS_V2 3
60 #define INVALID_CGROUPS_V1 4
61 #define INVALID_CGROUPS_NO_MOUNT 5
62 #define INVALID_CGROUPS_GENERIC 6
63
64 // Four controllers: cpu, cpuset, cpuacct, memory
65 #define CG_INFO_LENGTH 4
66 #define CPUSET_IDX 0
67 #define CPU_IDX 1
68 #define CPUACCT_IDX 2
69 #define MEMORY_IDX 3
70
71 typedef char * cptr;
72
73 class CgroupController: public CHeapObj<mtInternal> {
74 public:
75 virtual char *subsystem_path() = 0;
76 };
77
78 PRAGMA_DIAG_PUSH
79 PRAGMA_FORMAT_NONLITERAL_IGNORED
subsystem_file_line_contents(CgroupController * c,const char * filename,const char * matchline,const char * scan_fmt,T returnval)80 template <typename T> int subsystem_file_line_contents(CgroupController* c,
81 const char *filename,
82 const char *matchline,
83 const char *scan_fmt,
84 T returnval) {
85 FILE *fp = NULL;
86 char *p;
87 char file[MAXPATHLEN+1];
88 char buf[MAXPATHLEN+1];
89 char discard[MAXPATHLEN+1];
90 bool found_match = false;
91
92 if (c == NULL) {
93 log_debug(os, container)("subsystem_file_line_contents: CgroupController* is NULL");
94 return OSCONTAINER_ERROR;
95 }
96 if (c->subsystem_path() == NULL) {
97 log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL");
98 return OSCONTAINER_ERROR;
99 }
100
101 strncpy(file, c->subsystem_path(), MAXPATHLEN);
102 file[MAXPATHLEN-1] = '\0';
103 int filelen = strlen(file);
104 if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {
105 log_debug(os, container)("File path too long %s, %s", file, filename);
106 return OSCONTAINER_ERROR;
107 }
108 strncat(file, filename, MAXPATHLEN-filelen);
109 log_trace(os, container)("Path to %s is %s", filename, file);
110 fp = fopen(file, "r");
111 if (fp != NULL) {
112 int err = 0;
113 while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) {
114 found_match = false;
115 if (matchline == NULL) {
116 // single-line file case
117 int matched = sscanf(p, scan_fmt, returnval);
118 found_match = (matched == 1);
119 } else {
120 // multi-line file case
121 if (strstr(p, matchline) != NULL) {
122 // discard matchline string prefix
123 int matched = sscanf(p, scan_fmt, discard, returnval);
124 found_match = (matched == 2);
125 } else {
126 continue; // substring not found
127 }
128 }
129 if (found_match) {
130 fclose(fp);
131 return 0;
132 } else {
133 err = 1;
134 log_debug(os, container)("Type %s not found in file %s", scan_fmt, file);
135 }
136 }
137 if (err == 0) {
138 log_debug(os, container)("Empty file %s", file);
139 }
140 } else {
141 log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno));
142 }
143 if (fp != NULL)
144 fclose(fp);
145 return OSCONTAINER_ERROR;
146 }
147 PRAGMA_DIAG_POP
148
149 #define GET_CONTAINER_INFO(return_type, subsystem, filename, \
150 logstring, scan_fmt, variable) \
151 return_type variable; \
152 { \
153 int err; \
154 err = subsystem_file_line_contents(subsystem, \
155 filename, \
156 NULL, \
157 scan_fmt, \
158 &variable); \
159 if (err != 0) \
160 return (return_type) OSCONTAINER_ERROR; \
161 \
162 log_trace(os, container)(logstring, variable); \
163 }
164
165 #define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \
166 logstring, scan_fmt, variable, bufsize) \
167 char variable[bufsize]; \
168 { \
169 int err; \
170 err = subsystem_file_line_contents(subsystem, \
171 filename, \
172 NULL, \
173 scan_fmt, \
174 variable); \
175 if (err != 0) \
176 return (return_type) NULL; \
177 \
178 log_trace(os, container)(logstring, variable); \
179 }
180
181 #define GET_CONTAINER_INFO_LINE(return_type, controller, filename, \
182 matchline, logstring, scan_fmt, variable) \
183 return_type variable; \
184 { \
185 int err; \
186 err = subsystem_file_line_contents(controller, \
187 filename, \
188 matchline, \
189 scan_fmt, \
190 &variable); \
191 if (err != 0) \
192 return (return_type) OSCONTAINER_ERROR; \
193 \
194 log_trace(os, container)(logstring, variable); \
195 }
196
197
198 class CachedMetric : public CHeapObj<mtInternal>{
199 private:
200 volatile jlong _metric;
201 volatile jlong _next_check_counter;
202 public:
CachedMetric()203 CachedMetric() {
204 _metric = -1;
205 _next_check_counter = min_jlong;
206 }
should_check_metric()207 bool should_check_metric() {
208 return os::elapsed_counter() > _next_check_counter;
209 }
value()210 jlong value() { return _metric; }
set_value(jlong value,jlong timeout)211 void set_value(jlong value, jlong timeout) {
212 _metric = value;
213 // Metric is unlikely to change, but we want to remain
214 // responsive to configuration changes. A very short grace time
215 // between re-read avoids excessive overhead during startup without
216 // significantly reducing the VMs ability to promptly react to changed
217 // metric config
218 _next_check_counter = os::elapsed_counter() + timeout;
219 }
220 };
221
222 class CachingCgroupController : public CHeapObj<mtInternal> {
223 private:
224 CgroupController* _controller;
225 CachedMetric* _metrics_cache;
226
227 public:
CachingCgroupController(CgroupController * cont)228 CachingCgroupController(CgroupController* cont) {
229 _controller = cont;
230 _metrics_cache = new CachedMetric();
231 }
232
metrics_cache()233 CachedMetric* metrics_cache() { return _metrics_cache; }
controller()234 CgroupController* controller() { return _controller; }
235 };
236
237 class CgroupSubsystem: public CHeapObj<mtInternal> {
238 public:
239 jlong memory_limit_in_bytes();
240 int active_processor_count();
241
242 virtual int cpu_quota() = 0;
243 virtual int cpu_period() = 0;
244 virtual int cpu_shares() = 0;
245 virtual jlong memory_usage_in_bytes() = 0;
246 virtual jlong memory_and_swap_limit_in_bytes() = 0;
247 virtual jlong memory_soft_limit_in_bytes() = 0;
248 virtual jlong memory_max_usage_in_bytes() = 0;
249 virtual char * cpu_cpuset_cpus() = 0;
250 virtual char * cpu_cpuset_memory_nodes() = 0;
251 virtual jlong read_memory_limit_in_bytes() = 0;
252 virtual const char * container_type() = 0;
253 virtual CachingCgroupController* memory_controller() = 0;
254 virtual CachingCgroupController* cpu_controller() = 0;
255 };
256
257 // Utility class for storing info retrieved from /proc/cgroups,
258 // /proc/self/cgroup and /proc/self/mountinfo
259 // For reference see man 7 cgroups and CgroupSubsystemFactory
260 class CgroupInfo : public StackObj {
261 friend class CgroupSubsystemFactory;
262 friend class WhiteBox;
263
264 private:
265 char* _name;
266 int _hierarchy_id;
267 bool _enabled;
268 bool _data_complete; // indicating cgroup v1 data is complete for this controller
269 char* _cgroup_path; // cgroup controller path from /proc/self/cgroup
270 char* _root_mount_path; // root mount path from /proc/self/mountinfo. Unused for cgroup v2
271 char* _mount_path; // mount path from /proc/self/mountinfo.
272
273 public:
CgroupInfo()274 CgroupInfo() {
275 _name = NULL;
276 _hierarchy_id = -1;
277 _enabled = false;
278 _data_complete = false;
279 _cgroup_path = NULL;
280 _root_mount_path = NULL;
281 _mount_path = NULL;
282 }
283
284 };
285
286 class CgroupSubsystemFactory: AllStatic {
287 friend class WhiteBox;
288
289 public:
290 static CgroupSubsystem* create();
291 private:
is_cgroup_v2(u1 * flags)292 static inline bool is_cgroup_v2(u1* flags) {
293 return *flags == CGROUPS_V2;
294 }
295
296 #ifdef ASSERT
is_valid_cgroup(u1 * flags)297 static inline bool is_valid_cgroup(u1* flags) {
298 return *flags == CGROUPS_V1 || *flags == CGROUPS_V2;
299 }
is_cgroup_v1(u1 * flags)300 static inline bool is_cgroup_v1(u1* flags) {
301 return *flags == CGROUPS_V1;
302 }
303 #endif
304
305 // Determine the cgroup type (version 1 or version 2), given
306 // relevant paths to files. Sets 'flags' accordingly.
307 static bool determine_type(CgroupInfo* cg_infos,
308 const char* proc_cgroups,
309 const char* proc_self_cgroup,
310 const char* proc_self_mountinfo,
311 u1* flags);
312 static void cleanup(CgroupInfo* cg_infos);
313 };
314
315 #endif // CGROUP_SUBSYSTEM_LINUX_HPP
316