1 /*
2  * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #ifndef CGROUP_SUBSYSTEM_LINUX_HPP
26 #define CGROUP_SUBSYSTEM_LINUX_HPP
27 
28 #include "memory/allocation.hpp"
29 #include "runtime/os.hpp"
30 #include "logging/log.hpp"
31 #include "utilities/globalDefinitions.hpp"
32 #include "utilities/macros.hpp"
33 #include "osContainer_linux.hpp"
34 
35 // Shared cgroups code (used by cgroup version 1 and version 2)
36 
37 /*
38  * PER_CPU_SHARES has been set to 1024 because CPU shares' quota
39  * is commonly used in cloud frameworks like Kubernetes[1],
40  * AWS[2] and Mesos[3] in a similar way. They spawn containers with
41  * --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do
42  * the inverse for determining the number of possible available
43  * CPUs to the JVM inside a container. See JDK-8216366.
44  *
45  * [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu
46  *     In particular:
47  *        When using Docker:
48  *          The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially
49  *          fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the
50  *          --cpu-shares flag in the docker run command.
51  * [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
52  * [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648
53  *     https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30
54  */
55 #define PER_CPU_SHARES 1024
56 
57 #define CGROUPS_V1               1
58 #define CGROUPS_V2               2
59 #define INVALID_CGROUPS_V2       3
60 #define INVALID_CGROUPS_V1       4
61 #define INVALID_CGROUPS_NO_MOUNT 5
62 #define INVALID_CGROUPS_GENERIC  6
63 
64 // Four controllers: cpu, cpuset, cpuacct, memory
65 #define CG_INFO_LENGTH 4
66 #define CPUSET_IDX     0
67 #define CPU_IDX        1
68 #define CPUACCT_IDX    2
69 #define MEMORY_IDX     3
70 
71 typedef char * cptr;
72 
73 class CgroupController: public CHeapObj<mtInternal> {
74   public:
75     virtual char *subsystem_path() = 0;
76 };
77 
78 PRAGMA_DIAG_PUSH
79 PRAGMA_FORMAT_NONLITERAL_IGNORED
subsystem_file_line_contents(CgroupController * c,const char * filename,const char * matchline,const char * scan_fmt,T returnval)80 template <typename T> int subsystem_file_line_contents(CgroupController* c,
81                                               const char *filename,
82                                               const char *matchline,
83                                               const char *scan_fmt,
84                                               T returnval) {
85   FILE *fp = NULL;
86   char *p;
87   char file[MAXPATHLEN+1];
88   char buf[MAXPATHLEN+1];
89   char discard[MAXPATHLEN+1];
90   bool found_match = false;
91 
92   if (c == NULL) {
93     log_debug(os, container)("subsystem_file_line_contents: CgroupController* is NULL");
94     return OSCONTAINER_ERROR;
95   }
96   if (c->subsystem_path() == NULL) {
97     log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL");
98     return OSCONTAINER_ERROR;
99   }
100 
101   strncpy(file, c->subsystem_path(), MAXPATHLEN);
102   file[MAXPATHLEN-1] = '\0';
103   int filelen = strlen(file);
104   if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {
105     log_debug(os, container)("File path too long %s, %s", file, filename);
106     return OSCONTAINER_ERROR;
107   }
108   strncat(file, filename, MAXPATHLEN-filelen);
109   log_trace(os, container)("Path to %s is %s", filename, file);
110   fp = fopen(file, "r");
111   if (fp != NULL) {
112     int err = 0;
113     while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) {
114       found_match = false;
115       if (matchline == NULL) {
116         // single-line file case
117         int matched = sscanf(p, scan_fmt, returnval);
118         found_match = (matched == 1);
119       } else {
120         // multi-line file case
121         if (strstr(p, matchline) != NULL) {
122           // discard matchline string prefix
123           int matched = sscanf(p, scan_fmt, discard, returnval);
124           found_match = (matched == 2);
125         } else {
126           continue; // substring not found
127         }
128       }
129       if (found_match) {
130         fclose(fp);
131         return 0;
132       } else {
133         err = 1;
134         log_debug(os, container)("Type %s not found in file %s", scan_fmt, file);
135       }
136     }
137     if (err == 0) {
138       log_debug(os, container)("Empty file %s", file);
139     }
140   } else {
141     log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno));
142   }
143   if (fp != NULL)
144     fclose(fp);
145   return OSCONTAINER_ERROR;
146 }
147 PRAGMA_DIAG_POP
148 
149 #define GET_CONTAINER_INFO(return_type, subsystem, filename,              \
150                            logstring, scan_fmt, variable)                 \
151   return_type variable;                                                   \
152 {                                                                         \
153   int err;                                                                \
154   err = subsystem_file_line_contents(subsystem,                           \
155                                      filename,                            \
156                                      NULL,                                \
157                                      scan_fmt,                            \
158                                      &variable);                          \
159   if (err != 0)                                                           \
160     return (return_type) OSCONTAINER_ERROR;                               \
161                                                                           \
162   log_trace(os, container)(logstring, variable);                          \
163 }
164 
165 #define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename,         \
166                                logstring, scan_fmt, variable, bufsize)    \
167   char variable[bufsize];                                                 \
168 {                                                                         \
169   int err;                                                                \
170   err = subsystem_file_line_contents(subsystem,                           \
171                                      filename,                            \
172                                      NULL,                                \
173                                      scan_fmt,                            \
174                                      variable);                           \
175   if (err != 0)                                                           \
176     return (return_type) NULL;                                            \
177                                                                           \
178   log_trace(os, container)(logstring, variable);                          \
179 }
180 
181 #define GET_CONTAINER_INFO_LINE(return_type, controller, filename,        \
182                            matchline, logstring, scan_fmt, variable)      \
183   return_type variable;                                                   \
184 {                                                                         \
185   int err;                                                                \
186   err = subsystem_file_line_contents(controller,                          \
187                                 filename,                                 \
188                                 matchline,                                \
189                                 scan_fmt,                                 \
190                                 &variable);                               \
191   if (err != 0)                                                           \
192     return (return_type) OSCONTAINER_ERROR;                               \
193                                                                           \
194   log_trace(os, container)(logstring, variable);                          \
195 }
196 
197 
198 class CachedMetric : public CHeapObj<mtInternal>{
199   private:
200     volatile jlong _metric;
201     volatile jlong _next_check_counter;
202   public:
CachedMetric()203     CachedMetric() {
204       _metric = -1;
205       _next_check_counter = min_jlong;
206     }
should_check_metric()207     bool should_check_metric() {
208       return os::elapsed_counter() > _next_check_counter;
209     }
value()210     jlong value() { return _metric; }
set_value(jlong value,jlong timeout)211     void set_value(jlong value, jlong timeout) {
212       _metric = value;
213       // Metric is unlikely to change, but we want to remain
214       // responsive to configuration changes. A very short grace time
215       // between re-read avoids excessive overhead during startup without
216       // significantly reducing the VMs ability to promptly react to changed
217       // metric config
218       _next_check_counter = os::elapsed_counter() + timeout;
219     }
220 };
221 
222 class CachingCgroupController : public CHeapObj<mtInternal> {
223   private:
224     CgroupController* _controller;
225     CachedMetric* _metrics_cache;
226 
227   public:
CachingCgroupController(CgroupController * cont)228     CachingCgroupController(CgroupController* cont) {
229       _controller = cont;
230       _metrics_cache = new CachedMetric();
231     }
232 
metrics_cache()233     CachedMetric* metrics_cache() { return _metrics_cache; }
controller()234     CgroupController* controller() { return _controller; }
235 };
236 
237 class CgroupSubsystem: public CHeapObj<mtInternal> {
238   public:
239     jlong memory_limit_in_bytes();
240     int active_processor_count();
241 
242     virtual int cpu_quota() = 0;
243     virtual int cpu_period() = 0;
244     virtual int cpu_shares() = 0;
245     virtual jlong memory_usage_in_bytes() = 0;
246     virtual jlong memory_and_swap_limit_in_bytes() = 0;
247     virtual jlong memory_soft_limit_in_bytes() = 0;
248     virtual jlong memory_max_usage_in_bytes() = 0;
249     virtual char * cpu_cpuset_cpus() = 0;
250     virtual char * cpu_cpuset_memory_nodes() = 0;
251     virtual jlong read_memory_limit_in_bytes() = 0;
252     virtual const char * container_type() = 0;
253     virtual CachingCgroupController* memory_controller() = 0;
254     virtual CachingCgroupController* cpu_controller() = 0;
255 };
256 
257 // Utility class for storing info retrieved from /proc/cgroups,
258 // /proc/self/cgroup and /proc/self/mountinfo
259 // For reference see man 7 cgroups and CgroupSubsystemFactory
260 class CgroupInfo : public StackObj {
261   friend class CgroupSubsystemFactory;
262   friend class WhiteBox;
263 
264   private:
265     char* _name;
266     int _hierarchy_id;
267     bool _enabled;
268     bool _data_complete;    // indicating cgroup v1 data is complete for this controller
269     char* _cgroup_path;     // cgroup controller path from /proc/self/cgroup
270     char* _root_mount_path; // root mount path from /proc/self/mountinfo. Unused for cgroup v2
271     char* _mount_path;      // mount path from /proc/self/mountinfo.
272 
273   public:
CgroupInfo()274     CgroupInfo() {
275       _name = NULL;
276       _hierarchy_id = -1;
277       _enabled = false;
278       _data_complete = false;
279       _cgroup_path = NULL;
280       _root_mount_path = NULL;
281       _mount_path = NULL;
282     }
283 
284 };
285 
286 class CgroupSubsystemFactory: AllStatic {
287   friend class WhiteBox;
288 
289   public:
290     static CgroupSubsystem* create();
291   private:
is_cgroup_v2(u1 * flags)292     static inline bool is_cgroup_v2(u1* flags) {
293        return *flags == CGROUPS_V2;
294     }
295 
296 #ifdef ASSERT
is_valid_cgroup(u1 * flags)297     static inline bool is_valid_cgroup(u1* flags) {
298        return *flags == CGROUPS_V1 || *flags == CGROUPS_V2;
299     }
is_cgroup_v1(u1 * flags)300     static inline bool is_cgroup_v1(u1* flags) {
301        return *flags == CGROUPS_V1;
302     }
303 #endif
304 
305     // Determine the cgroup type (version 1 or version 2), given
306     // relevant paths to files. Sets 'flags' accordingly.
307     static bool determine_type(CgroupInfo* cg_infos,
308                                const char* proc_cgroups,
309                                const char* proc_self_cgroup,
310                                const char* proc_self_mountinfo,
311                                u1* flags);
312     static void cleanup(CgroupInfo* cg_infos);
313 };
314 
315 #endif // CGROUP_SUBSYSTEM_LINUX_HPP
316