1 /*
2 * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include <string.h>
26 #include <math.h>
27 #include <errno.h>
28 #include "utilities/globalDefinitions.hpp"
29 #include "memory/allocation.hpp"
30 #include "runtime/os.hpp"
31 #include "logging/log.hpp"
32 #include "osContainer_linux.hpp"
33
34 #define PER_CPU_SHARES 1024
35
36 bool OSContainer::_is_initialized = false;
37 bool OSContainer::_is_containerized = false;
38 julong _unlimited_memory;
39
40 class CgroupSubsystem: CHeapObj<mtInternal> {
41 friend class OSContainer;
42
43 private:
44 /* mountinfo contents */
45 char *_root;
46 char *_mount_point;
47
48 /* Constructed subsystem directory */
49 char *_path;
50
51 public:
CgroupSubsystem(char * root,char * mountpoint)52 CgroupSubsystem(char *root, char *mountpoint) {
53 _root = os::strdup(root);
54 _mount_point = os::strdup(mountpoint);
55 _path = NULL;
56 }
57
58 /*
59 * Set directory to subsystem specific files based
60 * on the contents of the mountinfo and cgroup files.
61 */
set_subsystem_path(char * cgroup_path)62 void set_subsystem_path(char *cgroup_path) {
63 char buf[MAXPATHLEN+1];
64 if (_root != NULL && cgroup_path != NULL) {
65 if (strcmp(_root, "/") == 0) {
66 int buflen;
67 strncpy(buf, _mount_point, MAXPATHLEN);
68 buf[MAXPATHLEN-1] = '\0';
69 if (strcmp(cgroup_path,"/") != 0) {
70 buflen = strlen(buf);
71 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) {
72 return;
73 }
74 strncat(buf, cgroup_path, MAXPATHLEN-buflen);
75 buf[MAXPATHLEN-1] = '\0';
76 }
77 _path = os::strdup(buf);
78 } else {
79 if (strcmp(_root, cgroup_path) == 0) {
80 strncpy(buf, _mount_point, MAXPATHLEN);
81 buf[MAXPATHLEN-1] = '\0';
82 _path = os::strdup(buf);
83 } else {
84 char *p = strstr(_root, cgroup_path);
85 if (p != NULL && p == _root) {
86 if (strlen(cgroup_path) > strlen(_root)) {
87 int buflen;
88 strncpy(buf, _mount_point, MAXPATHLEN);
89 buf[MAXPATHLEN-1] = '\0';
90 buflen = strlen(buf);
91 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) {
92 return;
93 }
94 strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen);
95 buf[MAXPATHLEN-1] = '\0';
96 _path = os::strdup(buf);
97 }
98 }
99 }
100 }
101 }
102 }
103
subsystem_path()104 char *subsystem_path() { return _path; }
105 };
106
107 CgroupSubsystem* memory = NULL;
108 CgroupSubsystem* cpuset = NULL;
109 CgroupSubsystem* cpu = NULL;
110 CgroupSubsystem* cpuacct = NULL;
111
112 typedef char * cptr;
113
114 PRAGMA_DIAG_PUSH
115 PRAGMA_FORMAT_NONLITERAL_IGNORED
subsystem_file_contents(CgroupSubsystem * c,const char * filename,const char * scan_fmt,T returnval)116 template <typename T> int subsystem_file_contents(CgroupSubsystem* c,
117 const char *filename,
118 const char *scan_fmt,
119 T returnval) {
120 FILE *fp = NULL;
121 char *p;
122 char file[MAXPATHLEN+1];
123 char buf[MAXPATHLEN+1];
124
125 if (c == NULL) {
126 log_debug(os, container)("subsystem_file_contents: CgroupSubsytem* is NULL");
127 return OSCONTAINER_ERROR;
128 }
129 if (c->subsystem_path() == NULL) {
130 log_debug(os, container)("subsystem_file_contents: subsystem path is NULL");
131 return OSCONTAINER_ERROR;
132 }
133
134 strncpy(file, c->subsystem_path(), MAXPATHLEN);
135 file[MAXPATHLEN-1] = '\0';
136 int filelen = strlen(file);
137 if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {
138 log_debug(os, container)("File path too long %s, %s", file, filename);
139 return OSCONTAINER_ERROR;
140 }
141 strncat(file, filename, MAXPATHLEN-filelen);
142 log_trace(os, container)("Path to %s is %s", filename, file);
143 fp = fopen(file, "r");
144 if (fp != NULL) {
145 p = fgets(buf, MAXPATHLEN, fp);
146 if (p != NULL) {
147 int matched = sscanf(p, scan_fmt, returnval);
148 if (matched == 1) {
149 fclose(fp);
150 return 0;
151 } else {
152 log_debug(os, container)("Type %s not found in file %s", scan_fmt, file);
153 }
154 } else {
155 log_debug(os, container)("Empty file %s", file);
156 }
157 } else {
158 log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno));
159 }
160 if (fp != NULL)
161 fclose(fp);
162 return OSCONTAINER_ERROR;
163 }
164 PRAGMA_DIAG_POP
165
166 #define GET_CONTAINER_INFO(return_type, subsystem, filename, \
167 logstring, scan_fmt, variable) \
168 return_type variable; \
169 { \
170 int err; \
171 err = subsystem_file_contents(subsystem, \
172 filename, \
173 scan_fmt, \
174 &variable); \
175 if (err != 0) \
176 return (return_type) OSCONTAINER_ERROR; \
177 \
178 log_trace(os, container)(logstring, variable); \
179 }
180
181 #define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \
182 logstring, scan_fmt, variable, bufsize) \
183 char variable[bufsize]; \
184 { \
185 int err; \
186 err = subsystem_file_contents(subsystem, \
187 filename, \
188 scan_fmt, \
189 variable); \
190 if (err != 0) \
191 return (return_type) NULL; \
192 \
193 log_trace(os, container)(logstring, variable); \
194 }
195
196 /* init
197 *
198 * Initialize the container support and determine if
199 * we are running under cgroup control.
200 */
init()201 void OSContainer::init() {
202 int mountid;
203 int parentid;
204 int major;
205 int minor;
206 FILE *mntinfo = NULL;
207 FILE *cgroup = NULL;
208 char buf[MAXPATHLEN+1];
209 char tmproot[MAXPATHLEN+1];
210 char tmpmount[MAXPATHLEN+1];
211 char tmpbase[MAXPATHLEN+1];
212 char *p;
213 jlong mem_limit;
214
215 assert(!_is_initialized, "Initializing OSContainer more than once");
216
217 _is_initialized = true;
218 _is_containerized = false;
219
220 _unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();
221
222 log_trace(os, container)("OSContainer::init: Initializing Container Support");
223 if (!UseContainerSupport) {
224 log_trace(os, container)("Container Support not enabled");
225 return;
226 }
227
228 /*
229 * Find the cgroup mount point for memory and cpuset
230 * by reading /proc/self/mountinfo
231 *
232 * Example for docker:
233 * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
234 *
235 * Example for host:
236 * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
237 */
238 mntinfo = fopen("/proc/self/mountinfo", "r");
239 if (mntinfo == NULL) {
240 log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
241 os::strerror(errno));
242 return;
243 }
244
245 while ( (p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
246 // Look for the filesystem type and see if it's cgroup
247 char fstype[MAXPATHLEN+1];
248 fstype[0] = '\0';
249 char *s = strstr(p, " - ");
250 if (s != NULL &&
251 sscanf(s, " - %s", fstype) == 1 &&
252 strcmp(fstype, "cgroup") == 0) {
253
254 if (strstr(p, "memory") != NULL) {
255 int matched = sscanf(p, "%d %d %d:%d %s %s",
256 &mountid,
257 &parentid,
258 &major,
259 &minor,
260 tmproot,
261 tmpmount);
262 if (matched == 6) {
263 memory = new CgroupSubsystem(tmproot, tmpmount);
264 }
265 else
266 log_debug(os, container)("Incompatible str containing cgroup and memory: %s", p);
267 } else if (strstr(p, "cpuset") != NULL) {
268 int matched = sscanf(p, "%d %d %d:%d %s %s",
269 &mountid,
270 &parentid,
271 &major,
272 &minor,
273 tmproot,
274 tmpmount);
275 if (matched == 6) {
276 cpuset = new CgroupSubsystem(tmproot, tmpmount);
277 }
278 else {
279 log_debug(os, container)("Incompatible str containing cgroup and cpuset: %s", p);
280 }
281 } else if (strstr(p, "cpu,cpuacct") != NULL || strstr(p, "cpuacct,cpu") != NULL) {
282 int matched = sscanf(p, "%d %d %d:%d %s %s",
283 &mountid,
284 &parentid,
285 &major,
286 &minor,
287 tmproot,
288 tmpmount);
289 if (matched == 6) {
290 cpu = new CgroupSubsystem(tmproot, tmpmount);
291 cpuacct = new CgroupSubsystem(tmproot, tmpmount);
292 }
293 else {
294 log_debug(os, container)("Incompatible str containing cgroup and cpu,cpuacct: %s", p);
295 }
296 } else if (strstr(p, "cpuacct") != NULL) {
297 int matched = sscanf(p, "%d %d %d:%d %s %s",
298 &mountid,
299 &parentid,
300 &major,
301 &minor,
302 tmproot,
303 tmpmount);
304 if (matched == 6) {
305 cpuacct = new CgroupSubsystem(tmproot, tmpmount);
306 }
307 else {
308 log_debug(os, container)("Incompatible str containing cgroup and cpuacct: %s", p);
309 }
310 } else if (strstr(p, "cpu") != NULL) {
311 int matched = sscanf(p, "%d %d %d:%d %s %s",
312 &mountid,
313 &parentid,
314 &major,
315 &minor,
316 tmproot,
317 tmpmount);
318 if (matched == 6) {
319 cpu = new CgroupSubsystem(tmproot, tmpmount);
320 }
321 else {
322 log_debug(os, container)("Incompatible str containing cgroup and cpu: %s", p);
323 }
324 }
325 }
326 }
327
328 fclose(mntinfo);
329
330 if (memory == NULL) {
331 log_debug(os, container)("Required cgroup memory subsystem not found");
332 return;
333 }
334 if (cpuset == NULL) {
335 log_debug(os, container)("Required cgroup cpuset subsystem not found");
336 return;
337 }
338 if (cpu == NULL) {
339 log_debug(os, container)("Required cgroup cpu subsystem not found");
340 return;
341 }
342 if (cpuacct == NULL) {
343 log_debug(os, container)("Required cgroup cpuacct subsystem not found");
344 return;
345 }
346
347 /*
348 * Read /proc/self/cgroup and map host mount point to
349 * local one via /proc/self/mountinfo content above
350 *
351 * Docker example:
352 * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
353 *
354 * Host example:
355 * 5:memory:/user.slice
356 *
357 * Construct a path to the process specific memory and cpuset
358 * cgroup directory.
359 *
360 * For a container running under Docker from memory example above
361 * the paths would be:
362 *
363 * /sys/fs/cgroup/memory
364 *
365 * For a Host from memory example above the path would be:
366 *
367 * /sys/fs/cgroup/memory/user.slice
368 *
369 */
370 cgroup = fopen("/proc/self/cgroup", "r");
371 if (cgroup == NULL) {
372 log_debug(os, container)("Can't open /proc/self/cgroup, %s",
373 os::strerror(errno));
374 return;
375 }
376
377 while ( (p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
378 int cgno;
379 int matched;
380 char *controller;
381 char *base;
382
383 /* Skip cgroup number */
384 strsep(&p, ":");
385 /* Get controller and base */
386 controller = strsep(&p, ":");
387 base = strsep(&p, "\n");
388
389 if (controller != NULL) {
390 if (strstr(controller, "memory") != NULL) {
391 memory->set_subsystem_path(base);
392 } else if (strstr(controller, "cpuset") != NULL) {
393 cpuset->set_subsystem_path(base);
394 } else if (strstr(controller, "cpu,cpuacct") != NULL || strstr(controller, "cpuacct,cpu") != NULL) {
395 cpu->set_subsystem_path(base);
396 cpuacct->set_subsystem_path(base);
397 } else if (strstr(controller, "cpuacct") != NULL) {
398 cpuacct->set_subsystem_path(base);
399 } else if (strstr(controller, "cpu") != NULL) {
400 cpu->set_subsystem_path(base);
401 }
402 }
403 }
404
405 fclose(cgroup);
406
407 // We need to update the amount of physical memory now that
408 // command line arguments have been processed.
409 if ((mem_limit = memory_limit_in_bytes()) > 0) {
410 os::Linux::set_physical_memory(mem_limit);
411 }
412
413 _is_containerized = true;
414
415 }
416
container_type()417 const char * OSContainer::container_type() {
418 if (is_containerized()) {
419 return "cgroupv1";
420 } else {
421 return NULL;
422 }
423 }
424
425
426 /* memory_limit_in_bytes
427 *
428 * Return the limit of available memory for this process.
429 *
430 * return:
431 * memory limit in bytes or
432 * -1 for unlimited
433 * OSCONTAINER_ERROR for not supported
434 */
memory_limit_in_bytes()435 jlong OSContainer::memory_limit_in_bytes() {
436 GET_CONTAINER_INFO(julong, memory, "/memory.limit_in_bytes",
437 "Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit);
438
439 if (memlimit >= _unlimited_memory) {
440 log_trace(os, container)("Memory Limit is: Unlimited");
441 return (jlong)-1;
442 }
443 else {
444 return (jlong)memlimit;
445 }
446 }
447
memory_and_swap_limit_in_bytes()448 jlong OSContainer::memory_and_swap_limit_in_bytes() {
449 GET_CONTAINER_INFO(julong, memory, "/memory.memsw.limit_in_bytes",
450 "Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit);
451 if (memswlimit >= _unlimited_memory) {
452 log_trace(os, container)("Memory and Swap Limit is: Unlimited");
453 return (jlong)-1;
454 } else {
455 return (jlong)memswlimit;
456 }
457 }
458
memory_soft_limit_in_bytes()459 jlong OSContainer::memory_soft_limit_in_bytes() {
460 GET_CONTAINER_INFO(julong, memory, "/memory.soft_limit_in_bytes",
461 "Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit);
462 if (memsoftlimit >= _unlimited_memory) {
463 log_trace(os, container)("Memory Soft Limit is: Unlimited");
464 return (jlong)-1;
465 } else {
466 return (jlong)memsoftlimit;
467 }
468 }
469
470 /* memory_usage_in_bytes
471 *
472 * Return the amount of used memory for this process.
473 *
474 * return:
475 * memory usage in bytes or
476 * -1 for unlimited
477 * OSCONTAINER_ERROR for not supported
478 */
memory_usage_in_bytes()479 jlong OSContainer::memory_usage_in_bytes() {
480 GET_CONTAINER_INFO(jlong, memory, "/memory.usage_in_bytes",
481 "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage);
482 return memusage;
483 }
484
485 /* memory_max_usage_in_bytes
486 *
487 * Return the maximum amount of used memory for this process.
488 *
489 * return:
490 * max memory usage in bytes or
491 * OSCONTAINER_ERROR for not supported
492 */
memory_max_usage_in_bytes()493 jlong OSContainer::memory_max_usage_in_bytes() {
494 GET_CONTAINER_INFO(jlong, memory, "/memory.max_usage_in_bytes",
495 "Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage);
496 return memmaxusage;
497 }
498
499 /* active_processor_count
500 *
501 * Calculate an appropriate number of active processors for the
502 * VM to use based on these three inputs.
503 *
504 * cpu affinity
505 * cgroup cpu quota & cpu period
506 * cgroup cpu shares
507 *
508 * Algorithm:
509 *
510 * Determine the number of available CPUs from sched_getaffinity
511 *
512 * If user specified a quota (quota != -1), calculate the number of
513 * required CPUs by dividing quota by period.
514 *
515 * If shares are in effect (shares != -1), calculate the number
516 * of CPUs required for the shares by dividing the share value
517 * by PER_CPU_SHARES.
518 *
519 * All results of division are rounded up to the next whole number.
520 *
521 * If neither shares or quotas have been specified, return the
522 * number of active processors in the system.
523 *
524 * If both shares and quotas have been specified, the results are
525 * based on the flag PreferContainerQuotaForCPUCount. If true,
526 * return the quota value. If false return the smallest value
527 * between shares or quotas.
528 *
529 * If shares and/or quotas have been specified, the resulting number
530 * returned will never exceed the number of active processors.
531 *
532 * return:
533 * number of CPUs
534 */
active_processor_count()535 int OSContainer::active_processor_count() {
536 int quota_count = 0, share_count = 0;
537 int cpu_count, limit_count;
538 int result;
539
540 cpu_count = limit_count = os::Linux::active_processor_count();
541 int quota = cpu_quota();
542 int period = cpu_period();
543 int share = cpu_shares();
544
545 if (quota > -1 && period > 0) {
546 quota_count = ceilf((float)quota / (float)period);
547 log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count);
548 }
549 if (share > -1) {
550 share_count = ceilf((float)share / (float)PER_CPU_SHARES);
551 log_trace(os, container)("CPU Share count based on shares: %d", share_count);
552 }
553
554 // If both shares and quotas are setup results depend
555 // on flag PreferContainerQuotaForCPUCount.
556 // If true, limit CPU count to quota
557 // If false, use minimum of shares and quotas
558 if (quota_count !=0 && share_count != 0) {
559 if (PreferContainerQuotaForCPUCount) {
560 limit_count = quota_count;
561 } else {
562 limit_count = MIN2(quota_count, share_count);
563 }
564 } else if (quota_count != 0) {
565 limit_count = quota_count;
566 } else if (share_count != 0) {
567 limit_count = share_count;
568 }
569
570 result = MIN2(cpu_count, limit_count);
571 log_trace(os, container)("OSContainer::active_processor_count: %d", result);
572 return result;
573 }
574
cpu_cpuset_cpus()575 char * OSContainer::cpu_cpuset_cpus() {
576 GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.cpus",
577 "cpuset.cpus is: %s", "%1023s", cpus, 1024);
578 return os::strdup(cpus);
579 }
580
cpu_cpuset_memory_nodes()581 char * OSContainer::cpu_cpuset_memory_nodes() {
582 GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.mems",
583 "cpuset.mems is: %s", "%1023s", mems, 1024);
584 return os::strdup(mems);
585 }
586
587 /* cpu_quota
588 *
589 * Return the number of milliseconds per period
590 * process is guaranteed to run.
591 *
592 * return:
593 * quota time in milliseconds
594 * -1 for no quota
595 * OSCONTAINER_ERROR for not supported
596 */
cpu_quota()597 int OSContainer::cpu_quota() {
598 GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_quota_us",
599 "CPU Quota is: %d", "%d", quota);
600 return quota;
601 }
602
cpu_period()603 int OSContainer::cpu_period() {
604 GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_period_us",
605 "CPU Period is: %d", "%d", period);
606 return period;
607 }
608
609 /* cpu_shares
610 *
611 * Return the amount of cpu shares available to the process
612 *
613 * return:
614 * Share number (typically a number relative to 1024)
615 * (2048 typically expresses 2 CPUs worth of processing)
616 * -1 for no share setup
617 * OSCONTAINER_ERROR for not supported
618 */
cpu_shares()619 int OSContainer::cpu_shares() {
620 GET_CONTAINER_INFO(int, cpu, "/cpu.shares",
621 "CPU Shares is: %d", "%d", shares);
622 // Convert 1024 to no shares setup
623 if (shares == 1024) return -1;
624
625 return shares;
626 }
627
628