1 /*
2  * Copyright (c) 2020, Red Hat Inc.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package jdk.internal.platform;
27 
28 import java.io.IOException;
29 import java.io.UncheckedIOException;
30 import java.lang.System.Logger;
31 import java.lang.System.Logger.Level;
32 import java.nio.file.Path;
33 import java.nio.file.Paths;
34 import java.util.Collections;
35 import java.util.HashMap;
36 import java.util.List;
37 import java.util.Map;
38 import java.util.Optional;
39 import java.util.function.Consumer;
40 import java.util.regex.Matcher;
41 import java.util.regex.Pattern;
42 import java.util.stream.Stream;
43 
44 import jdk.internal.platform.cgroupv1.CgroupV1Subsystem;
45 import jdk.internal.platform.cgroupv2.CgroupV2Subsystem;
46 
47 public class CgroupSubsystemFactory {
48 
49     private static final String CPU_CTRL = "cpu";
50     private static final String CPUACCT_CTRL = "cpuacct";
51     private static final String CPUSET_CTRL = "cpuset";
52     private static final String BLKIO_CTRL = "blkio";
53     private static final String MEMORY_CTRL = "memory";
54 
55     /*
56      * From https://www.kernel.org/doc/Documentation/filesystems/proc.txt
57      *
58      *  36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
59      *  (1)(2)(3)   (4)   (5)      (6)      (7)   (8) (9)   (10)         (11)
60      *
61      *  (1) mount ID:  unique identifier of the mount (may be reused after umount)
62      *  (2) parent ID:  ID of parent (or of self for the top of the mount tree)
63      *  (3) major:minor:  value of st_dev for files on filesystem
64      *  (4) root:  root of the mount within the filesystem
65      *  (5) mount point:  mount point relative to the process's root
66      *  (6) mount options:  per mount options
67      *  (7) optional fields:  zero or more fields of the form "tag[:value]"
68      *  (8) separator:  marks the end of the optional fields
69      *  (9) filesystem type:  name of filesystem of the form "type[.subtype]"
70      *  (10) mount source:  filesystem specific information or "none"
71      *  (11) super options:  per super block options
72      */
73     private static final Pattern MOUNTINFO_PATTERN = Pattern.compile(
74         "^[^\\s]+\\s+[^\\s]+\\s+[^\\s]+\\s+" + // (1), (2), (3)
75         "([^\\s]+)\\s+([^\\s]+)\\s+" +         // (4), (5)     - group 1, 2: root, mount point
76         "[^-]+-\\s+" +                         // (6), (7), (8)
77         "([^\\s]+)\\s+" +                      // (9)          - group 3: filesystem type
78         ".*$");                                // (10), (11)
79 
create()80     static CgroupMetrics create() {
81         Optional<CgroupTypeResult> optResult = null;
82         try {
83             optResult = determineType("/proc/self/mountinfo", "/proc/cgroups", "/proc/self/cgroup");
84         } catch (IOException e) {
85             return null;
86         } catch (UncheckedIOException e) {
87             return null;
88         }
89 
90         if (optResult.isEmpty()) {
91             return null;
92         }
93         CgroupTypeResult result = optResult.get();
94 
95         // If no controller is enabled, return no metrics.
96         if (!result.isAnyControllersEnabled()) {
97             return null;
98         }
99 
100         // Warn about mixed cgroups v1 and cgroups v2 controllers. The code is
101         // not ready to deal with that on a per-controller basis. Return no metrics
102         // in that case
103         if (result.isAnyCgroupV1Controllers() && result.isAnyCgroupV2Controllers()) {
104             Logger logger = System.getLogger("jdk.internal.platform");
105             logger.log(Level.DEBUG, "Mixed cgroupv1 and cgroupv2 not supported. Metrics disabled.");
106             return null;
107         }
108 
109         Map<String, CgroupInfo> infos = result.getInfos();
110         if (result.isCgroupV2()) {
111             // For unified it doesn't matter which controller we pick.
112             CgroupInfo anyController = infos.get(MEMORY_CTRL);
113             CgroupSubsystem subsystem = CgroupV2Subsystem.getInstance(anyController);
114             return subsystem != null ? new CgroupMetrics(subsystem) : null;
115         } else {
116             CgroupV1Subsystem subsystem = CgroupV1Subsystem.getInstance(infos);
117             return subsystem != null ? new CgroupV1MetricsImpl(subsystem) : null;
118         }
119     }
120 
121     /*
122      * Determine the type of the cgroup system (v1 - legacy or hybrid - or, v2 - unified)
123      * based on three files:
124      *
125      *  (1) mountInfo  (i.e. /proc/self/mountinfo)
126      *  (2) cgroups    (i.e. /proc/cgroups)
127      *  (3) selfCgroup (i.e. /proc/self/cgroup)
128      *
129      * File 'cgroups' is inspected for the hierarchy ID of the mounted cgroup pseudo
130      * filesystem. The hierarchy ID, in turn, helps us distinguish cgroups v2 and
131      * cgroup v1. For a system with zero hierarchy ID, but with >= 1 relevant cgroup
132      * controllers mounted in 'mountInfo' we can infer it's cgroups v2. Anything else
133      * will be cgroup v1 (hybrid or legacy). File 'selfCgroup' is being used for
134      * figuring out the mount path of the controller in the cgroup hierarchy.
135      */
determineType(String mountInfo, String cgroups, String selfCgroup)136     public static Optional<CgroupTypeResult> determineType(String mountInfo,
137                                                            String cgroups,
138                                                            String selfCgroup) throws IOException {
139         final Map<String, CgroupInfo> infos = new HashMap<>();
140         List<String> lines = CgroupUtil.readAllLinesPrivileged(Paths.get(cgroups));
141         for (String line : lines) {
142             if (line.startsWith("#")) {
143                 continue;
144             }
145             CgroupInfo info = CgroupInfo.fromCgroupsLine(line);
146             switch (info.getName()) {
147             case CPU_CTRL:      infos.put(CPU_CTRL, info); break;
148             case CPUACCT_CTRL:  infos.put(CPUACCT_CTRL, info); break;
149             case CPUSET_CTRL:   infos.put(CPUSET_CTRL, info); break;
150             case MEMORY_CTRL:   infos.put(MEMORY_CTRL, info); break;
151             case BLKIO_CTRL:    infos.put(BLKIO_CTRL, info); break;
152             }
153         }
154 
155         // For cgroups v2 all controllers need to have zero hierarchy id
156         // and /proc/self/mountinfo needs to have at least one cgroup filesystem
157         // mounted. Note that hybrid hierarchy has controllers mounted via
158         // cgroup v1. In that case hierarchy id's will be non-zero.
159         boolean isCgroupsV2 = true;
160         boolean anyControllersEnabled = false;
161         boolean anyCgroupsV2Controller = false;
162         boolean anyCgroupsV1Controller = false;
163         for (CgroupInfo info: infos.values()) {
164             anyCgroupsV1Controller = anyCgroupsV1Controller || info.getHierarchyId() != 0;
165             anyCgroupsV2Controller = anyCgroupsV2Controller || info.getHierarchyId() == 0;
166             isCgroupsV2 = isCgroupsV2 && info.getHierarchyId() == 0;
167             anyControllersEnabled = anyControllersEnabled || info.isEnabled();
168         }
169 
170         // If there are no mounted, relevant cgroup controllers in 'mountinfo' and only
171         // 0 hierarchy IDs in file 'cgroups' have been seen, we are on a cgroups v1 system.
172         // However, continuing in that case does not make sense as we'd need
173         // information from mountinfo for the mounted controller paths which we wouldn't
174         // find anyway in that case.
175         lines = CgroupUtil.readAllLinesPrivileged(Paths.get(mountInfo));
176         boolean anyCgroupMounted = false;
177         for (String line: lines) {
178             boolean cgroupsControllerFound = amendCgroupInfos(line, infos, isCgroupsV2);
179             anyCgroupMounted = anyCgroupMounted || cgroupsControllerFound;
180         }
181         if (!anyCgroupMounted) {
182             return Optional.empty();
183         }
184 
185         // Map a cgroup version specific 'action' to a line in 'selfCgroup' (i.e.
186         // /proc/self/cgroups) , split on the ':' token, so as to set the appropriate
187         // path to the cgroup controller in cgroup data structures 'infos'.
188         // See:
189         //   setCgroupV1Path() for the action run for cgroups v1 systems
190         //   setCgroupV2Path() for the action run for cgroups v2 systems
191         try (Stream<String> selfCgroupLines =
192              CgroupUtil.readFilePrivileged(Paths.get(selfCgroup))) {
193             Consumer<String[]> action = (tokens -> setCgroupV1Path(infos, tokens));
194             if (isCgroupsV2) {
195                 action = (tokens -> setCgroupV2Path(infos, tokens));
196             }
197             // The limit value of 3 is because /proc/self/cgroup contains three
198             // colon-separated tokens per line. The last token, cgroup path, might
199             // contain a ':'.
200             selfCgroupLines.map(line -> line.split(":", 3)).forEach(action);
201         }
202 
203         CgroupTypeResult result = new CgroupTypeResult(isCgroupsV2,
204                                                        anyControllersEnabled,
205                                                        anyCgroupsV2Controller,
206                                                        anyCgroupsV1Controller,
207                                                        Collections.unmodifiableMap(infos));
208         return Optional.of(result);
209     }
210 
211     /*
212      * Sets the path to the cgroup controller for cgroups v2 based on a line
213      * in /proc/self/cgroup file (represented as the 'tokens' array).
214      *
215      * Example:
216      *
217      * 0::/
218      *
219      * => tokens = [ "0", "", "/" ]
220      */
setCgroupV2Path(Map<String, CgroupInfo> infos, String[] tokens)221     private static void setCgroupV2Path(Map<String, CgroupInfo> infos,
222                                         String[] tokens) {
223         int hierarchyId = Integer.parseInt(tokens[0]);
224         String cgroupPath = tokens[2];
225         for (CgroupInfo info: infos.values()) {
226             assert hierarchyId == info.getHierarchyId() && hierarchyId == 0;
227             info.setCgroupPath(cgroupPath);
228         }
229     }
230 
231     /*
232      * Sets the path to the cgroup controller for cgroups v1 based on a line
233      * in /proc/self/cgroup file (represented as the 'tokens' array).
234      *
235      * Note that multiple controllers might be joined at a single path.
236      *
237      * Example:
238      *
239      * 7:cpu,cpuacct:/system.slice/docker-74ad896fb40bbefe0f181069e4417505fffa19052098f27edf7133f31423bc0b.scope
240      *
241      * => tokens = [ "7", "cpu,cpuacct", "/system.slice/docker-74ad896fb40bbefe0f181069e4417505fffa19052098f27edf7133f31423bc0b.scope" ]
242      */
setCgroupV1Path(Map<String, CgroupInfo> infos, String[] tokens)243     private static void setCgroupV1Path(Map<String, CgroupInfo> infos,
244                                         String[] tokens) {
245         String controllerName = tokens[1];
246         String cgroupPath = tokens[2];
247         if (controllerName != null && cgroupPath != null) {
248             for (String cName: controllerName.split(",")) {
249                 switch (cName) {
250                     case MEMORY_CTRL: // fall through
251                     case CPUSET_CTRL:
252                     case CPUACCT_CTRL:
253                     case CPU_CTRL:
254                     case BLKIO_CTRL:
255                         CgroupInfo info = infos.get(cName);
256                         info.setCgroupPath(cgroupPath);
257                         break;
258                     // Ignore not recognized controllers
259                     default:
260                         break;
261                 }
262             }
263         }
264     }
265 
266     /**
267      * Amends cgroup infos with mount path and mount root. The passed in
268      * 'mntInfoLine' represents a single line in, for example,
269      * /proc/self/mountinfo. Each line is matched with MOUNTINFO_PATTERN
270      * (see above), so as to extract the relevant tokens from the line.
271      *
272      * Host example cgroups v1:
273      *
274      * 44 30 0:41 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,seclabel,devices
275      *
276      * Container example cgroups v1:
277      *
278      * 1901 1894 0:37 /system.slice/docker-2291eeb92093f9d761aaf971782b575e9be56bd5930d4b5759b51017df3c1387.scope /sys/fs/cgroup/cpu,cpuacct ro,nosuid,nodev,noexec,relatime master:12 - cgroup cgroup rw,seclabel,cpu,cpuacct
279      *
280      * Container example cgroups v2:
281      *
282      * 1043 1034 0:27 / /sys/fs/cgroup ro,nosuid,nodev,noexec,relatime - cgroup2 cgroup2 rw,seclabel,nsdelegate
283      *
284      *
285      * @return {@code true} iff a relevant controller has been found at the
286      * given line
287      */
amendCgroupInfos(String mntInfoLine, Map<String, CgroupInfo> infos, boolean isCgroupsV2)288     private static boolean amendCgroupInfos(String mntInfoLine,
289                                             Map<String, CgroupInfo> infos,
290                                             boolean isCgroupsV2) {
291         Matcher lineMatcher = MOUNTINFO_PATTERN.matcher(mntInfoLine.trim());
292         boolean cgroupv1ControllerFound = false;
293         boolean cgroupv2ControllerFound = false;
294         if (lineMatcher.matches()) {
295             String mountRoot = lineMatcher.group(1);
296             String mountPath = lineMatcher.group(2);
297             String fsType = lineMatcher.group(3);
298             if (fsType.equals("cgroup")) {
299                 Path p = Paths.get(mountPath);
300                 String[] controllerNames = p.getFileName().toString().split(",");
301                 for (String controllerName: controllerNames) {
302                     switch (controllerName) {
303                         case MEMORY_CTRL: // fall-through
304                         case CPU_CTRL:
305                         case CPUACCT_CTRL:
306                         case BLKIO_CTRL: {
307                             CgroupInfo info = infos.get(controllerName);
308                             assert info.getMountPoint() == null;
309                             assert info.getMountRoot() == null;
310                             info.setMountPoint(mountPath);
311                             info.setMountRoot(mountRoot);
312                             cgroupv1ControllerFound = true;
313                             break;
314                         }
315                         case CPUSET_CTRL: {
316                             CgroupInfo info = infos.get(controllerName);
317                             if (info.getMountPoint() != null) {
318                                 // On some systems duplicate cpuset controllers get mounted in addition to
319                                 // the main cgroup controllers most likely under /sys/fs/cgroup. In that
320                                 // case pick the one under /sys/fs/cgroup and discard others.
321                                 if (!info.getMountPoint().startsWith("/sys/fs/cgroup")) {
322                                     info.setMountPoint(mountPath);
323                                     info.setMountRoot(mountRoot);
324                                 }
325                             } else {
326                                 info.setMountPoint(mountPath);
327                                 info.setMountRoot(mountRoot);
328                             }
329                             cgroupv1ControllerFound = true;
330                             break;
331                         }
332                         default:
333                             // Ignore controllers which we don't recognize
334                             break;
335                     }
336                 }
337             } else if (fsType.equals("cgroup2")) {
338                 if (isCgroupsV2) { // will be false for hybrid
339                     // All controllers have the same mount point and root mount
340                     // for unified hierarchy.
341                     for (CgroupInfo info: infos.values()) {
342                         assert info.getMountPoint() == null;
343                         assert info.getMountRoot() == null;
344                         info.setMountPoint(mountPath);
345                         info.setMountRoot(mountRoot);
346                     }
347                 }
348                 cgroupv2ControllerFound = true;
349             }
350         }
351         return cgroupv1ControllerFound || cgroupv2ControllerFound;
352     }
353 
354     public static final class CgroupTypeResult {
355         private final boolean isCgroupV2;
356         private final boolean anyControllersEnabled;
357         private final boolean anyCgroupV2Controllers;
358         private final boolean anyCgroupV1Controllers;
359         private final Map<String, CgroupInfo> infos;
360 
CgroupTypeResult(boolean isCgroupV2, boolean anyControllersEnabled, boolean anyCgroupV2Controllers, boolean anyCgroupV1Controllers, Map<String, CgroupInfo> infos)361         private CgroupTypeResult(boolean isCgroupV2,
362                                  boolean anyControllersEnabled,
363                                  boolean anyCgroupV2Controllers,
364                                  boolean anyCgroupV1Controllers,
365                                  Map<String, CgroupInfo> infos) {
366             this.isCgroupV2 = isCgroupV2;
367             this.anyControllersEnabled = anyControllersEnabled;
368             this.anyCgroupV1Controllers = anyCgroupV1Controllers;
369             this.anyCgroupV2Controllers = anyCgroupV2Controllers;
370             this.infos = infos;
371         }
372 
isCgroupV2()373         public boolean isCgroupV2() {
374             return isCgroupV2;
375         }
376 
isAnyControllersEnabled()377         public boolean isAnyControllersEnabled() {
378             return anyControllersEnabled;
379         }
380 
isAnyCgroupV2Controllers()381         public boolean isAnyCgroupV2Controllers() {
382             return anyCgroupV2Controllers;
383         }
384 
isAnyCgroupV1Controllers()385         public boolean isAnyCgroupV1Controllers() {
386             return anyCgroupV1Controllers;
387         }
388 
getInfos()389         public Map<String, CgroupInfo> getInfos() {
390             return infos;
391         }
392     }
393 }
394