1 /*
2 * virresctrl.c:
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library. If not, see
16 * <http://www.gnu.org/licenses/>.
17 */
18
19 #include <config.h>
20
21 #include <sys/file.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26
27 #define LIBVIRT_VIRRESCTRLPRIV_H_ALLOW
28 #include "virresctrlpriv.h"
29 #include "viralloc.h"
30 #include "virbuffer.h"
31 #include "virfile.h"
32 #include "virlog.h"
33 #include "virobject.h"
34 #include "virstring.h"
35
36 #define VIR_FROM_THIS VIR_FROM_RESCTRL
37
38 VIR_LOG_INIT("util.virresctrl");
39
40
41 /* Resctrl is short for Resource Control. It might be implemented for various
42 * resources. Currently this supports cache allocation technology (aka CAT),
43 * memory bandwidth allocation (aka MBA) and cache monitoring technology (aka
44 * CMT). More resources technologies may be added in the future.
45 */
46
47
48 /* Common definitions */
49 #define SYSFS_RESCTRL_PATH "/sys/fs/resctrl"
50
51
52 /* Following are three different enum implementations for the same enum. Each
53 * one of them helps translating to/from strings for different interfaces. The
54 * delimiter must be VIR_CACHE_TYPE_LAST for all of them in order to stay
55 * consistent in between all of them. */
56
57 /* Cache name mapping for Linux kernel naming. */
58 VIR_ENUM_IMPL(virCacheKernel,
59 VIR_CACHE_TYPE_LAST,
60 "Unified",
61 "Instruction",
62 "Data",
63 );
64
65 /* Cache name mapping for our XML naming. */
66 VIR_ENUM_IMPL(virCache,
67 VIR_CACHE_TYPE_LAST,
68 "both",
69 "code",
70 "data",
71 );
72
73 /* Cache name mapping for resctrl interface naming. */
74 VIR_ENUM_DECL(virResctrl);
75 VIR_ENUM_IMPL(virResctrl,
76 VIR_CACHE_TYPE_LAST,
77 "",
78 "CODE",
79 "DATA",
80 );
81
82 /* Monitor feature name prefix mapping for monitor naming */
83 VIR_ENUM_IMPL(virResctrlMonitorPrefix,
84 VIR_RESCTRL_MONITOR_TYPE_LAST,
85 "__unsupported__",
86 "llc_",
87 "mbm_",
88 );
89
90
91 /* All private typedefs so that they exist for all later definitions. This way
92 * structs can be included in one or another without reorganizing the code every
93 * time. */
94 typedef struct _virResctrlInfoPerType virResctrlInfoPerType;
95
96 typedef struct _virResctrlInfoPerLevel virResctrlInfoPerLevel;
97
98 typedef struct _virResctrlInfoMemBW virResctrlInfoMemBW;
99
100 typedef struct _virResctrlInfoMongrp virResctrlInfoMongrp;
101
102 typedef struct _virResctrlAllocPerType virResctrlAllocPerType;
103
104 typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
105
106 typedef struct _virResctrlAllocMemBW virResctrlAllocMemBW;
107
108
109 /* Class definitions and initializations */
110 static virClass *virResctrlInfoClass;
111 static virClass *virResctrlAllocClass;
112 static virClass *virResctrlMonitorClass;
113
114
115 /* virResctrlInfo */
116 struct _virResctrlInfoPerType {
117 /* Kernel-provided information */
118 unsigned int min_cbm_bits;
119
120 /* Our computed information from the above */
121 unsigned int bits;
122 unsigned int max_cache_id;
123
124 /* In order to be self-sufficient we need size information per cache.
125 * Funnily enough, one of the outcomes of the resctrl design is that it
126 * does not account for different sizes per cache on the same level. So
127 * for the sake of easiness, let's copy that, for now. */
128 unsigned long long size;
129
130 /* Information that we will return upon request (this is public struct) as
131 * until now all the above is internal to this module */
132 virResctrlInfoPerCache control;
133 };
134
135 struct _virResctrlInfoPerLevel {
136 virResctrlInfoPerType **types;
137 };
138
139 /* Information about memory bandwidth allocation */
140 struct _virResctrlInfoMemBW {
141 /* minimum memory bandwidth allowed */
142 unsigned int min_bandwidth;
143 /* bandwidth granularity */
144 unsigned int bandwidth_granularity;
145 /* Maximum number of simultaneous allocations */
146 unsigned int max_allocation;
147 /* level number of last level cache */
148 unsigned int last_level_cache;
149 /* max id of last level cache, this is used to track
150 * how many last level cache available in host system,
151 * the number of memory bandwidth allocation controller
152 * is identical with last level cache. */
153 unsigned int max_id;
154 };
155
156 struct _virResctrlInfoMongrp {
157 /* Maximum number of simultaneous monitors */
158 unsigned int max_monitor;
159 /* null-terminal string list for monitor features */
160 char **features;
161 /* Number of monitor features */
162 size_t nfeatures;
163
164 /* Last level cache related information */
165
166 /* This adjustable value affects the final reuse of resources used by
167 * monitor. After the action of removing a monitor, the kernel may not
168 * release all hardware resources that monitor used immediately if the
169 * cache occupancy value associated with 'removed' monitor is above this
170 * threshold. Once the cache occupancy is below this threshold, the
171 * underlying hardware resource will be reclaimed and be put into the
172 * resource pool for next reusing.*/
173 unsigned int cache_reuse_threshold;
174 /* The cache 'level' that has the monitor capability */
175 unsigned int cache_level;
176 };
177
178 struct _virResctrlInfo {
179 virObject parent;
180
181 virResctrlInfoPerLevel **levels;
182 size_t nlevels;
183
184 virResctrlInfoMemBW *membw_info;
185
186 virResctrlInfoMongrp *monitor_info;
187 };
188
189
190 static void
virResctrlInfoDispose(void * obj)191 virResctrlInfoDispose(void *obj)
192 {
193 size_t i = 0;
194 size_t j = 0;
195
196 virResctrlInfo *resctrl = obj;
197
198 for (i = 0; i < resctrl->nlevels; i++) {
199 virResctrlInfoPerLevel *level = resctrl->levels[i];
200
201 if (!level)
202 continue;
203
204 if (level->types) {
205 for (j = 0; j < VIR_CACHE_TYPE_LAST; j++)
206 g_free(level->types[j]);
207 }
208 g_free(level->types);
209 g_free(level);
210 }
211
212 if (resctrl->monitor_info)
213 g_strfreev(resctrl->monitor_info->features);
214
215 g_free(resctrl->membw_info);
216 g_free(resctrl->levels);
217 g_free(resctrl->monitor_info);
218 }
219
220
221 void
virResctrlInfoMonFree(virResctrlInfoMon * mon)222 virResctrlInfoMonFree(virResctrlInfoMon *mon)
223 {
224 if (!mon)
225 return;
226
227 g_strfreev(mon->features);
228 g_free(mon);
229 }
230
231
232 /* virResctrlAlloc and virResctrlMonitor */
233
234 /*
235 * virResctrlAlloc and virResctrlMonitor are representing a resource control
236 * group (in XML under cputune/cachetune and consequently a directory under
237 * /sys/fs/resctrl). virResctrlAlloc is the data structure for resource
238 * allocation, while the virResctrlMonitor represents the resource monitoring
239 * part.
240 *
241 * virResctrlAlloc represents one allocation. Since it can have multiple
242 * parts of multiple caches allocated it is represented as bunch of nested
243 * sparse arrays (by sparse I mean array of pointers so that each might be NULL
244 * in case there is no allocation for that particular cache allocation (level,
245 * cache, ...) or memory allocation for particular node).
246 *
247 * Allocation corresponding to root directory, /sys/fs/sysctrl/, defines the
248 * default resource allocating policy, which is created immediately after
249 * mounting, and owns all the tasks and cpus in the system. Cache or memory
250 * bandwidth resource will be shared for tasks in this allocation.
251 *
252 * =====Cache allocation technology (CAT)=====
253 *
254 * Since one allocation can be made for caches on different levels, the first
255 * nested sparse array is of types virResctrlAllocPerLevel. For example if you
256 * have allocation for level 3 cache, there will be three NULL pointers and then
257 * allocated pointer to virResctrlAllocPerLevel. That way you can access it by
258 * `alloc[level]` as O(1) is desired instead of crawling through normal arrays
259 * or lists in three nested loops. The code uses a lot of direct accesses.
260 *
261 * Each virResctrlAllocPerLevel can have allocations for different cache
262 * allocation types. You can allocate instruction cache (VIR_CACHE_TYPE_CODE),
263 * data cache (VIR_CACHE_TYPE_DATA) or unified cache (VIR_CACHE_TYPE_BOTH).
264 * Those allocations are kept in sparse array of virResctrlAllocPerType pointers.
265 *
266 * For each virResctrlAllocPerType users can request some size of the cache to
267 * be allocated. That's what the sparse array `sizes` is for. Non-NULL
268 * pointers represent requested size allocations. The array is indexed by host
269 * cache id (gotten from `/sys/devices/system/cpu/cpuX/cache/indexY/id`). Users
270 * can see this information e.g. in the output of `virsh capabilities` (for that
271 * information there's the other struct, namely `virResctrlInfo`).
272 *
273 * When allocation is being created we need to find unused part of the cache for
274 * all of them. While doing that we store the bitmask in a sparse array of
275 * virBitmaps named `masks` indexed the same way as `sizes`. The upper bounds
276 * of the sparse arrays are stored in nmasks or nsizes, respectively.
277 *
278 * =====Memory Bandwidth allocation technology (MBA)=====
279 *
280 * The memory bandwidth allocation support in virResctrlAlloc works in the
281 * same fashion as CAT. However, memory bandwidth controller doesn't have a
282 * hierarchy organization as cache, each node have one memory bandwidth
283 * controller to memory bandwidth distribution. The number of memory bandwidth
284 * controller is identical with number of last level cache. So MBA also employs
285 * a sparse array to represent whether a memory bandwidth allocation happens
286 * on corresponding node. The available memory controller number is collected
287 * in 'virResctrlInfo'.
288 *
289 * =====Cache monitoring technology (CMT)=====
290 *
291 * Cache monitoring technology is used to perceive how many cache the process
292 * is using actually. virResctrlMonitor represents the resource control
293 * monitoring group, it is supported to monitor resource utilization
294 * information on granularity of vcpu.
295 *
296 * From a hardware perspective, cache monitoring technology (CMT), memory
297 * bandwidth technology (MBM), as well as the CAT and MBA, are all orthogonal
298 * features. The monitor will be created under the scope of default resctrl
299 * group if no specific CAT or MBA entries are provided for the guest."
300 */
301 struct _virResctrlAllocPerType {
302 /* There could be bool saying whether this is set or not, but since everything
303 * in virResctrlAlloc (and most of libvirt) goes with pointer arrays we would
304 * have to have one more level of allocation anyway, so this stays faithful to
305 * the concept */
306 unsigned long long **sizes;
307 size_t nsizes;
308
309 /* Mask for each cache */
310 virBitmap **masks;
311 size_t nmasks;
312 };
313
314 struct _virResctrlAllocPerLevel {
315 virResctrlAllocPerType **types; /* Indexed with enum virCacheType */
316 /* There is no `ntypes` member variable as it is always allocated for
317 * VIR_CACHE_TYPE_LAST number of items */
318 };
319
320 /*
321 * virResctrlAllocMemBW represents one memory bandwidth allocation.
322 * Since it can have several last level caches in a NUMA system, it is
323 * also represented as a nested sparse arrays as virRestrlAllocPerLevel.
324 */
325 struct _virResctrlAllocMemBW {
326 unsigned int **bandwidths;
327 size_t nbandwidths;
328 };
329
330 struct _virResctrlAlloc {
331 virObject parent;
332
333 virResctrlAllocPerLevel **levels;
334 size_t nlevels;
335
336 virResctrlAllocMemBW *mem_bw;
337
338 /* The identifier (any unique string for now) */
339 char *id;
340 /* libvirt-generated path in /sys/fs/resctrl for this particular
341 * allocation */
342 char *path;
343 };
344
345 /*
346 * virResctrlMonitor is the data structure for resctrl monitor. Resctrl
347 * monitor represents a resctrl monitoring group, which can be used to
348 * monitor the resource utilization information for either cache or
349 * memory bandwidth.
350 */
351 struct _virResctrlMonitor {
352 virObject parent;
353
354 /* Each virResctrlMonitor is associated with one specific allocation,
355 * either the root directory allocation under /sys/fs/resctrl or a
356 * specific allocation defined under the root directory.
357 * This pointer points to the allocation this monitor is associated with.
358 */
359 virResctrlAlloc *alloc;
360 /* The monitor identifier. For a monitor has the same @path name as its
361 * @alloc, the @id will be set to the same value as it is in @alloc->id.
362 */
363 char *id;
364 /* libvirt-generated path in /sys/fs/resctrl for this particular
365 * monitor */
366 char *path;
367 };
368
369
370 static void
virResctrlAllocDispose(void * obj)371 virResctrlAllocDispose(void *obj)
372 {
373 size_t i = 0;
374 size_t j = 0;
375 size_t k = 0;
376
377 virResctrlAlloc *alloc = obj;
378
379 for (i = 0; i < alloc->nlevels; i++) {
380 virResctrlAllocPerLevel *level = alloc->levels[i];
381
382 if (!level)
383 continue;
384
385 for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
386 virResctrlAllocPerType *type = level->types[j];
387
388 if (!type)
389 continue;
390
391 for (k = 0; k < type->nsizes; k++)
392 g_free(type->sizes[k]);
393
394 for (k = 0; k < type->nmasks; k++)
395 virBitmapFree(type->masks[k]);
396
397 g_free(type->sizes);
398 g_free(type->masks);
399 g_free(type);
400 }
401 g_free(level->types);
402 g_free(level);
403 }
404
405 if (alloc->mem_bw) {
406 virResctrlAllocMemBW *mem_bw = alloc->mem_bw;
407 for (i = 0; i < mem_bw->nbandwidths; i++)
408 g_free(mem_bw->bandwidths[i]);
409 g_free(alloc->mem_bw->bandwidths);
410 g_free(alloc->mem_bw);
411 }
412
413 g_free(alloc->id);
414 g_free(alloc->path);
415 g_free(alloc->levels);
416 }
417
418
419 static void
virResctrlMonitorDispose(void * obj)420 virResctrlMonitorDispose(void *obj)
421 {
422 virResctrlMonitor *monitor = obj;
423
424 virObjectUnref(monitor->alloc);
425 g_free(monitor->id);
426 g_free(monitor->path);
427 }
428
429
430 /* Global initialization for classes */
431 static int
virResctrlOnceInit(void)432 virResctrlOnceInit(void)
433 {
434 if (!VIR_CLASS_NEW(virResctrlInfo, virClassForObject()))
435 return -1;
436
437 if (!VIR_CLASS_NEW(virResctrlAlloc, virClassForObject()))
438 return -1;
439
440 if (!VIR_CLASS_NEW(virResctrlMonitor, virClassForObject()))
441 return -1;
442
443 return 0;
444 }
445
446 VIR_ONCE_GLOBAL_INIT(virResctrl);
447
448
449 /* Common functions */
450 #ifndef WIN32
451
452 static int
virResctrlLock(void)453 virResctrlLock(void)
454 {
455 int fd = open(SYSFS_RESCTRL_PATH, O_RDONLY | O_CLOEXEC);
456
457 if (fd < 0) {
458 virReportSystemError(errno, "%s", _("Cannot open resctrl"));
459 return -1;
460 }
461
462 if (flock(fd, LOCK_EX) < 0) {
463 virReportSystemError(errno, "%s", _("Cannot lock resctrl"));
464 VIR_FORCE_CLOSE(fd);
465 return -1;
466 }
467
468 return fd;
469 }
470
471
472 static int
virResctrlUnlock(int fd)473 virResctrlUnlock(int fd)
474 {
475 if (fd == -1)
476 return 0;
477
478 /* The lock gets unlocked by closing the fd, which we need to do anyway in
479 * order to clean up properly */
480 if (VIR_CLOSE(fd) < 0) {
481 virReportSystemError(errno, "%s", _("Cannot close resctrl"));
482
483 /* Trying to save the already broken */
484 if (flock(fd, LOCK_UN) < 0)
485 virReportSystemError(errno, "%s", _("Cannot unlock resctrl"));
486
487 return -1;
488 }
489
490 return 0;
491 }
492
493 #else /* WIN32 */
494
495 static int
virResctrlLock(void)496 virResctrlLock(void)
497 {
498 virReportSystemError(ENOSYS, "%s",
499 _("resctrl locking is not supported "
500 "on this platform"));
501 return -1;
502 }
503
504
505 static int
virResctrlUnlock(int fd G_GNUC_UNUSED)506 virResctrlUnlock(int fd G_GNUC_UNUSED)
507 {
508 virReportSystemError(ENOSYS, "%s",
509 _("resctrl locking is not supported "
510 "on this platform"));
511 return -1;
512 }
513
514 #endif /* WIN32 */
515
516
517 /* virResctrlInfo-related definitions */
518 static int
virResctrlGetCacheInfo(virResctrlInfo * resctrl,DIR * dirp)519 virResctrlGetCacheInfo(virResctrlInfo *resctrl,
520 DIR *dirp)
521 {
522 int rv = -1;
523 struct dirent *ent = NULL;
524
525 while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH "/info")) > 0) {
526 g_autofree char *cbm_mask_str = NULL;
527 g_autoptr(virBitmap) cbm_mask_map = NULL;
528 char *endptr = NULL;
529 int type = 0;
530 unsigned int level = 0;
531 virResctrlInfoPerLevel *i_level = NULL;
532 g_autofree virResctrlInfoPerType *i_type = NULL;
533
534 VIR_DEBUG("Parsing info type '%s'", ent->d_name);
535 if (ent->d_name[0] != 'L')
536 continue;
537
538 if (virStrToLong_uip(ent->d_name + 1, &endptr, 10, &level) < 0) {
539 VIR_DEBUG("Cannot parse resctrl cache info level '%s'", ent->d_name + 1);
540 continue;
541 }
542
543 type = virResctrlTypeFromString(endptr);
544 if (type < 0) {
545 VIR_DEBUG("Ignoring resctrl cache info with suffix '%s'", endptr);
546 continue;
547 }
548
549 i_type = g_new0(virResctrlInfoPerType, 1);
550 i_type->control.scope = type;
551
552 rv = virFileReadValueUint(&i_type->control.max_allocation,
553 SYSFS_RESCTRL_PATH "/info/%s/num_closids",
554 ent->d_name);
555 if (rv == -2) {
556 /* The file doesn't exist, so it's unusable for us,
557 * but we can scan further */
558 VIR_WARN("The path '" SYSFS_RESCTRL_PATH "/info/%s/num_closids' "
559 "does not exist",
560 ent->d_name);
561 } else if (rv < 0) {
562 /* Other failures are fatal, so just quit */
563 return -1;
564 }
565
566 rv = virFileReadValueString(&cbm_mask_str,
567 SYSFS_RESCTRL_PATH
568 "/info/%s/cbm_mask",
569 ent->d_name);
570 if (rv == -2) {
571 /* If the previous file exists, so should this one. Hence -2 is
572 * fatal in this case as well (errors out in next condition) - the
573 * kernel interface might've changed too much or something else is
574 * wrong. */
575 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
576 _("Cannot get cbm_mask from resctrl cache info"));
577 }
578 if (rv < 0)
579 return -1;
580
581 virStringTrimOptionalNewline(cbm_mask_str);
582
583 if (!(cbm_mask_map = virBitmapNewString(cbm_mask_str))) {
584 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
585 _("Cannot parse cbm_mask from resctrl cache info"));
586 return -1;
587 }
588
589 i_type->bits = virBitmapCountBits(cbm_mask_map);
590
591 rv = virFileReadValueUint(&i_type->min_cbm_bits,
592 SYSFS_RESCTRL_PATH "/info/%s/min_cbm_bits",
593 ent->d_name);
594 if (rv == -2)
595 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
596 _("Cannot get min_cbm_bits from resctrl cache info"));
597 if (rv < 0)
598 return -1;
599
600 if (resctrl->nlevels <= level)
601 VIR_EXPAND_N(resctrl->levels, resctrl->nlevels,
602 level - resctrl->nlevels + 1);
603
604 if (!resctrl->levels[level]) {
605 virResctrlInfoPerType **types = NULL;
606
607 types = g_new0(virResctrlInfoPerType *, VIR_CACHE_TYPE_LAST);
608
609 resctrl->levels[level] = g_new0(virResctrlInfoPerLevel, 1);
610 resctrl->levels[level]->types = types;
611 }
612
613 i_level = resctrl->levels[level];
614
615 if (i_level->types[type]) {
616 virReportError(VIR_ERR_INTERNAL_ERROR,
617 _("Duplicate cache type in resctrl for level %u"),
618 level);
619 return -1;
620 }
621
622 i_level->types[type] = g_steal_pointer(&i_type);
623 }
624
625 return 0;
626 }
627
628
629 static int
virResctrlGetMemoryBandwidthInfo(virResctrlInfo * resctrl)630 virResctrlGetMemoryBandwidthInfo(virResctrlInfo *resctrl)
631 {
632 int rv = -1;
633 g_autofree virResctrlInfoMemBW *i_membw = NULL;
634
635 /* query memory bandwidth allocation info */
636 i_membw = g_new0(virResctrlInfoMemBW, 1);
637 rv = virFileReadValueUint(&i_membw->bandwidth_granularity,
638 SYSFS_RESCTRL_PATH "/info/MB/bandwidth_gran");
639 if (rv == -2) {
640 /* The file doesn't exist, so it's unusable for us,
641 * probably memory bandwidth allocation unsupported */
642 VIR_INFO("The path '" SYSFS_RESCTRL_PATH "/info/MB/bandwidth_gran'"
643 "does not exist");
644 return 0;
645 } else if (rv < 0) {
646 /* Other failures are fatal, so just quit */
647 return -1;
648 }
649
650 rv = virFileReadValueUint(&i_membw->min_bandwidth,
651 SYSFS_RESCTRL_PATH "/info/MB/min_bandwidth");
652 if (rv == -2) {
653 /* If the previous file exists, so should this one. Hence -2 is
654 * fatal in this case (errors out in next condition) - the kernel
655 * interface might've changed too much or something else is wrong. */
656 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
657 _("Cannot get min bandwidth from resctrl memory info"));
658 }
659 if (rv < 0)
660 return -1;
661
662 rv = virFileReadValueUint(&i_membw->max_allocation,
663 SYSFS_RESCTRL_PATH "/info/MB/num_closids");
664 if (rv == -2) {
665 /* Similar reasoning to min_bandwidth above. */
666 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
667 _("Cannot get max allocation from resctrl memory info"));
668 }
669 if (rv < 0)
670 return -1;
671
672 resctrl->membw_info = g_steal_pointer(&i_membw);
673 return 0;
674 }
675
676
677 /*
678 * Retrieve monitor capability from the resource control file system.
679 *
680 * The monitor capability is exposed through "SYSFS_RESCTRL_PATH/info/L3_MON"
681 * directory under the resource control file system. The monitor capability is
682 * parsed by reading the interface files and stored in the structure
683 * 'virResctrlInfoMongrp'.
684 *
685 * Not all host supports the resource monitor, leave the pointer
686 * @resctrl->monitor_info empty if not supported.
687 */
688 static int
virResctrlGetMonitorInfo(virResctrlInfo * resctrl)689 virResctrlGetMonitorInfo(virResctrlInfo *resctrl)
690 {
691 int rv = -1;
692 g_autofree char *featurestr = NULL;
693 g_autofree virResctrlInfoMongrp *info_monitor = NULL;
694
695 info_monitor = g_new0(virResctrlInfoMongrp, 1);
696
697 /* For now, monitor only exists in level 3 cache */
698 info_monitor->cache_level = 3;
699
700 rv = virFileReadValueUint(&info_monitor->max_monitor,
701 SYSFS_RESCTRL_PATH "/info/L3_MON/num_rmids");
702 if (rv == -2) {
703 /* The file doesn't exist, so it's unusable for us, probably resource
704 * monitor unsupported */
705 VIR_INFO("The file '" SYSFS_RESCTRL_PATH "/info/L3_MON/num_rmids' "
706 "does not exist");
707 return 0;
708 } else if (rv < 0) {
709 /* Other failures are fatal, so just quit */
710 return -1;
711 }
712
713 rv = virFileReadValueUint(&info_monitor->cache_reuse_threshold,
714 SYSFS_RESCTRL_PATH
715 "/info/L3_MON/max_threshold_occupancy");
716 if (rv == -2) {
717 /* If CMT is not supported, then 'max_threshold_occupancy' file
718 * will not exist. */
719 VIR_DEBUG("File '" SYSFS_RESCTRL_PATH
720 "/info/L3_MON/max_threshold_occupancy' does not exist");
721 } else if (rv < 0) {
722 return -1;
723 }
724
725 rv = virFileReadValueString(&featurestr,
726 SYSFS_RESCTRL_PATH
727 "/info/L3_MON/mon_features");
728 if (rv == -2)
729 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
730 _("Cannot get mon_features from resctrl"));
731 if (rv < 0)
732 return -1;
733
734 if (!*featurestr) {
735 /* If no feature found in "/info/L3_MON/mon_features",
736 * some error happens */
737 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
738 _("Got empty feature list from resctrl"));
739 return -1;
740 }
741
742 info_monitor->features = g_strsplit(featurestr, "\n", 0);
743 info_monitor->nfeatures = g_strv_length(info_monitor->features);
744 VIR_DEBUG("Resctrl supported %zd monitoring features", info_monitor->nfeatures);
745
746 resctrl->monitor_info = g_steal_pointer(&info_monitor);
747
748 return 0;
749 }
750
751
752 static int
virResctrlGetInfo(virResctrlInfo * resctrl)753 virResctrlGetInfo(virResctrlInfo *resctrl)
754 {
755 g_autoptr(DIR) dirp = NULL;
756 int ret = -1;
757
758 ret = virDirOpenIfExists(&dirp, SYSFS_RESCTRL_PATH "/info");
759 if (ret <= 0)
760 return ret;
761
762 if ((ret = virResctrlGetMemoryBandwidthInfo(resctrl)) < 0)
763 return -1;
764
765 if ((ret = virResctrlGetCacheInfo(resctrl, dirp)) < 0)
766 return -1;
767
768 if ((ret = virResctrlGetMonitorInfo(resctrl)) < 0)
769 return -1;
770
771 return 0;
772 }
773
774
775 virResctrlInfo *
virResctrlInfoNew(void)776 virResctrlInfoNew(void)
777 {
778 virResctrlInfo *ret = NULL;
779
780 if (virResctrlInitialize() < 0)
781 return NULL;
782
783 ret = virObjectNew(virResctrlInfoClass);
784 if (!ret)
785 return NULL;
786
787 if (virResctrlGetInfo(ret) < 0) {
788 virObjectUnref(ret);
789 return NULL;
790 }
791
792 return ret;
793 }
794
795
796 static bool
virResctrlInfoIsEmpty(virResctrlInfo * resctrl)797 virResctrlInfoIsEmpty(virResctrlInfo *resctrl)
798 {
799 size_t i = 0;
800 size_t j = 0;
801
802 if (!resctrl)
803 return true;
804
805 if (resctrl->membw_info)
806 return false;
807
808 if (resctrl->monitor_info)
809 return false;
810
811 for (i = 0; i < resctrl->nlevels; i++) {
812 virResctrlInfoPerLevel *i_level = resctrl->levels[i];
813
814 if (!i_level)
815 continue;
816
817 for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
818 if (i_level->types[j])
819 return false;
820 }
821 }
822
823 return true;
824 }
825
826
827 int
virResctrlInfoGetMemoryBandwidth(virResctrlInfo * resctrl,unsigned int level,virResctrlInfoMemBWPerNode * control)828 virResctrlInfoGetMemoryBandwidth(virResctrlInfo *resctrl,
829 unsigned int level,
830 virResctrlInfoMemBWPerNode *control)
831 {
832 virResctrlInfoMemBW *membw_info = resctrl->membw_info;
833
834 if (!membw_info)
835 return 0;
836
837 if (membw_info->last_level_cache != level)
838 return 0;
839
840 control->granularity = membw_info->bandwidth_granularity;
841 control->min = membw_info->min_bandwidth;
842 control->max_allocation = membw_info->max_allocation;
843 return 1;
844 }
845
846
847 int
virResctrlInfoGetCache(virResctrlInfo * resctrl,unsigned int level,unsigned long long size,size_t * ncontrols,virResctrlInfoPerCache *** controls)848 virResctrlInfoGetCache(virResctrlInfo *resctrl,
849 unsigned int level,
850 unsigned long long size,
851 size_t *ncontrols,
852 virResctrlInfoPerCache ***controls)
853 {
854 virResctrlInfoPerLevel *i_level = NULL;
855 virResctrlInfoPerType *i_type = NULL;
856 size_t i = 0;
857
858 if (virResctrlInfoIsEmpty(resctrl))
859 return 0;
860
861 /* Let's take the opportunity to update the number of last level
862 * cache. This number of memory bandwidth controller is same with
863 * last level cache */
864 if (resctrl->membw_info) {
865 virResctrlInfoMemBW *membw_info = resctrl->membw_info;
866
867 if (level > membw_info->last_level_cache) {
868 membw_info->last_level_cache = level;
869 membw_info->max_id = 0;
870 } else if (membw_info->last_level_cache == level) {
871 membw_info->max_id++;
872 }
873 }
874
875 if (level >= resctrl->nlevels)
876 return 0;
877
878 i_level = resctrl->levels[level];
879 if (!i_level)
880 return 0;
881
882 for (i = 0; i < VIR_CACHE_TYPE_LAST; i++) {
883 i_type = i_level->types[i];
884 if (!i_type)
885 continue;
886
887 /* Let's take the opportunity to update our internal information about
888 * the cache size */
889 if (!i_type->size) {
890 i_type->size = size;
891 i_type->control.granularity = size / i_type->bits;
892 if (i_type->min_cbm_bits != 1)
893 i_type->control.min = i_type->min_cbm_bits * i_type->control.granularity;
894 } else {
895 if (i_type->size != size) {
896 virReportError(VIR_ERR_INTERNAL_ERROR,
897 _("level %u cache size %llu does not match "
898 "expected size %llu"),
899 level, i_type->size, size);
900 goto error;
901 }
902 i_type->max_cache_id++;
903 }
904
905 VIR_EXPAND_N(*controls, *ncontrols, 1);
906 (*controls)[*ncontrols - 1] = g_new0(virResctrlInfoPerCache, 1);
907 memcpy((*controls)[*ncontrols - 1], &i_type->control, sizeof(i_type->control));
908 }
909
910 return 0;
911 error:
912 while (*ncontrols)
913 VIR_FREE((*controls)[--*ncontrols]);
914 VIR_FREE(*controls);
915 return -1;
916 }
917
918
919 /* virResctrlInfoGetMonitorPrefix
920 *
921 * @resctrl: Pointer to virResctrlInfo
922 * @prefix: Monitor prefix name for monitor looking for.
923 * @monitor: Returns the capability information for target monitor if the
924 * monitor with @prefex is supported by host.
925 *
926 * Return monitor capability information for @prefix through @monitor.
927 * If monitor with @prefix is not supported in system, @monitor will be
928 * cleared to NULL.
929 *
930 * Returns 0 if @monitor is created or monitor type with @prefix is not
931 * supported by host, -1 on failure with error message set.
932 */
933 int
virResctrlInfoGetMonitorPrefix(virResctrlInfo * resctrl,const char * prefix,virResctrlInfoMon ** monitor)934 virResctrlInfoGetMonitorPrefix(virResctrlInfo *resctrl,
935 const char *prefix,
936 virResctrlInfoMon **monitor)
937 {
938 size_t i = 0;
939 virResctrlInfoMongrp *mongrp_info = NULL;
940 virResctrlInfoMon *mon = NULL;
941 int ret = -1;
942
943 if (!prefix) {
944 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
945 _("Empty prefix name for resctrl monitor"));
946 return -1;
947 }
948
949 if (virResctrlInfoIsEmpty(resctrl))
950 return 0;
951
952 mongrp_info = resctrl->monitor_info;
953
954 if (!mongrp_info) {
955 VIR_INFO("Monitor is not supported in host");
956 return 0;
957 }
958
959 for (i = 0; i < VIR_RESCTRL_MONITOR_TYPE_LAST; i++) {
960 if (STREQ(prefix, virResctrlMonitorPrefixTypeToString(i))) {
961 mon = g_new0(virResctrlInfoMon, 1);
962 mon->type = i;
963 break;
964 }
965 }
966
967 if (!mon) {
968 virReportError(VIR_ERR_INTERNAL_ERROR,
969 _("Bad prefix name '%s' for resctrl monitor"),
970 prefix);
971 return -1;
972 }
973
974 mon->max_monitor = mongrp_info->max_monitor;
975
976 if (mon->type == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
977 mon->cache_reuse_threshold = mongrp_info->cache_reuse_threshold;
978 mon->cache_level = mongrp_info->cache_level;
979 }
980
981 mon->features = g_new0(char *, mongrp_info->nfeatures + 1);
982
983 for (i = 0; i < mongrp_info->nfeatures; i++) {
984 if (STRPREFIX(mongrp_info->features[i], prefix))
985 mon->features[mon->nfeatures++] = g_strdup(mongrp_info->features[i]);
986 }
987
988 mon->features = g_renew(char *, mon->features, mon->nfeatures + 1);
989
990 ret = 0;
991
992 /* In case *monitor is pointed to some monitor, clean it. */
993 virResctrlInfoMonFree(*monitor);
994
995 if (mon->nfeatures == 0) {
996 /* No feature found for current monitor, means host does not support
997 * monitor type with @prefix name.
998 * Telling caller this monitor is supported by hardware specification,
999 * but not supported by this host. */
1000 VIR_INFO("No resctrl monitor features using prefix '%s' found", prefix);
1001 goto cleanup;
1002 }
1003
1004 *monitor = g_steal_pointer(&mon);
1005 cleanup:
1006 virResctrlInfoMonFree(mon);
1007 return ret;
1008 }
1009
1010
1011 /* virResctrlAlloc-related definitions */
1012 virResctrlAlloc *
virResctrlAllocNew(void)1013 virResctrlAllocNew(void)
1014 {
1015 if (virResctrlInitialize() < 0)
1016 return NULL;
1017
1018 return virObjectNew(virResctrlAllocClass);
1019 }
1020
1021
1022 bool
virResctrlAllocIsEmpty(virResctrlAlloc * alloc)1023 virResctrlAllocIsEmpty(virResctrlAlloc *alloc)
1024 {
1025 size_t i = 0;
1026 size_t j = 0;
1027 size_t k = 0;
1028
1029 if (!alloc)
1030 return true;
1031
1032 if (alloc->mem_bw)
1033 return false;
1034
1035 for (i = 0; i < alloc->nlevels; i++) {
1036 virResctrlAllocPerLevel *a_level = alloc->levels[i];
1037
1038 if (!a_level)
1039 continue;
1040
1041 for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
1042 virResctrlAllocPerType *a_type = a_level->types[j];
1043
1044 if (!a_type)
1045 continue;
1046
1047 for (k = 0; k < a_type->nsizes; k++) {
1048 if (a_type->sizes[k])
1049 return false;
1050 }
1051
1052 for (k = 0; k < a_type->nmasks; k++) {
1053 if (a_type->masks[k])
1054 return false;
1055 }
1056 }
1057 }
1058
1059 return true;
1060 }
1061
1062
1063 static virResctrlAllocPerType *
virResctrlAllocGetType(virResctrlAlloc * alloc,unsigned int level,virCacheType type)1064 virResctrlAllocGetType(virResctrlAlloc *alloc,
1065 unsigned int level,
1066 virCacheType type)
1067 {
1068 virResctrlAllocPerLevel *a_level = NULL;
1069
1070 if (alloc->nlevels <= level)
1071 VIR_EXPAND_N(alloc->levels, alloc->nlevels, level - alloc->nlevels + 1);
1072
1073 if (!alloc->levels[level]) {
1074 virResctrlAllocPerType **types = NULL;
1075
1076 types = g_new0(virResctrlAllocPerType *, VIR_CACHE_TYPE_LAST);
1077
1078 alloc->levels[level] = g_new0(virResctrlAllocPerLevel, 1);
1079 alloc->levels[level]->types = types;
1080 }
1081
1082 a_level = alloc->levels[level];
1083
1084 if (!a_level->types[type])
1085 a_level->types[type] = g_new0(virResctrlAllocPerType, 1);
1086
1087 return a_level->types[type];
1088 }
1089
1090
1091 static int
virResctrlAllocUpdateMask(virResctrlAlloc * alloc,unsigned int level,virCacheType type,unsigned int cache,virBitmap * mask)1092 virResctrlAllocUpdateMask(virResctrlAlloc *alloc,
1093 unsigned int level,
1094 virCacheType type,
1095 unsigned int cache,
1096 virBitmap *mask)
1097 {
1098 virResctrlAllocPerType *a_type = virResctrlAllocGetType(alloc, level, type);
1099
1100 if (!a_type)
1101 return -1;
1102
1103 if (a_type->nmasks <= cache)
1104 VIR_EXPAND_N(a_type->masks, a_type->nmasks,
1105 cache - a_type->nmasks + 1);
1106
1107 if (a_type->masks[cache])
1108 virBitmapFree(a_type->masks[cache]);
1109
1110 a_type->masks[cache] = virBitmapNewCopy(mask);
1111
1112 return 0;
1113 }
1114
1115
1116 static int
virResctrlAllocUpdateSize(virResctrlAlloc * alloc,unsigned int level,virCacheType type,unsigned int cache,unsigned long long size)1117 virResctrlAllocUpdateSize(virResctrlAlloc *alloc,
1118 unsigned int level,
1119 virCacheType type,
1120 unsigned int cache,
1121 unsigned long long size)
1122 {
1123 virResctrlAllocPerType *a_type = virResctrlAllocGetType(alloc, level, type);
1124
1125 if (!a_type)
1126 return -1;
1127
1128 if (a_type->nsizes <= cache)
1129 VIR_EXPAND_N(a_type->sizes, a_type->nsizes,
1130 cache - a_type->nsizes + 1);
1131
1132 if (!a_type->sizes[cache])
1133 a_type->sizes[cache] = g_new0(unsigned long long, 1);
1134
1135 *(a_type->sizes[cache]) = size;
1136
1137 return 0;
1138 }
1139
1140
1141 /*
1142 * Check if there is an allocation for this level/type/cache already. Called
1143 * before updating the structure. VIR_CACHE_TYPE_BOTH collides with any type,
1144 * the other types collide with itself. This code basically checks if either:
1145 * `alloc[level]->types[type]->sizes[cache]`
1146 * or
1147 * `alloc[level]->types[VIR_CACHE_TYPE_BOTH]->sizes[cache]`
1148 * is non-NULL. All the fuzz around it is checking for NULL pointers along
1149 * the way.
1150 */
1151 static bool
virResctrlAllocCheckCollision(virResctrlAlloc * alloc,unsigned int level,virCacheType type,unsigned int cache)1152 virResctrlAllocCheckCollision(virResctrlAlloc *alloc,
1153 unsigned int level,
1154 virCacheType type,
1155 unsigned int cache)
1156 {
1157 virResctrlAllocPerLevel *a_level = NULL;
1158 virResctrlAllocPerType *a_type = NULL;
1159
1160 if (!alloc)
1161 return false;
1162
1163 if (alloc->nlevels <= level)
1164 return false;
1165
1166 a_level = alloc->levels[level];
1167
1168 if (!a_level)
1169 return false;
1170
1171 a_type = a_level->types[VIR_CACHE_TYPE_BOTH];
1172
1173 /* If there is an allocation for type 'both', there can be no other
1174 * allocation for the same cache */
1175 if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
1176 return true;
1177
1178 if (type == VIR_CACHE_TYPE_BOTH) {
1179 a_type = a_level->types[VIR_CACHE_TYPE_CODE];
1180
1181 if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
1182 return true;
1183
1184 a_type = a_level->types[VIR_CACHE_TYPE_DATA];
1185
1186 if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
1187 return true;
1188 } else {
1189 a_type = a_level->types[type];
1190
1191 if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
1192 return true;
1193 }
1194
1195 return false;
1196 }
1197
1198
1199 int
virResctrlAllocSetCacheSize(virResctrlAlloc * alloc,unsigned int level,virCacheType type,unsigned int cache,unsigned long long size)1200 virResctrlAllocSetCacheSize(virResctrlAlloc *alloc,
1201 unsigned int level,
1202 virCacheType type,
1203 unsigned int cache,
1204 unsigned long long size)
1205 {
1206 if (virResctrlAllocCheckCollision(alloc, level, type, cache)) {
1207 virReportError(VIR_ERR_XML_ERROR,
1208 _("Colliding cache allocations for cache "
1209 "level '%u' id '%u', type '%s'"),
1210 level, cache, virCacheTypeToString(type));
1211 return -1;
1212 }
1213
1214 return virResctrlAllocUpdateSize(alloc, level, type, cache, size);
1215 }
1216
1217
1218 int
virResctrlAllocForeachCache(virResctrlAlloc * alloc,virResctrlAllocForeachCacheCallback cb,void * opaque)1219 virResctrlAllocForeachCache(virResctrlAlloc *alloc,
1220 virResctrlAllocForeachCacheCallback cb,
1221 void *opaque)
1222 {
1223 int ret = 0;
1224 unsigned int level = 0;
1225 unsigned int type = 0;
1226 unsigned int cache = 0;
1227
1228 if (!alloc)
1229 return 0;
1230
1231 for (level = 0; level < alloc->nlevels; level++) {
1232 virResctrlAllocPerLevel *a_level = alloc->levels[level];
1233
1234 if (!a_level)
1235 continue;
1236
1237 for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
1238 virResctrlAllocPerType *a_type = a_level->types[type];
1239
1240 if (!a_type)
1241 continue;
1242
1243 for (cache = 0; cache < a_type->nsizes; cache++) {
1244 unsigned long long *size = a_type->sizes[cache];
1245
1246 if (!size)
1247 continue;
1248
1249 ret = cb(level, type, cache, *size, opaque);
1250 if (ret < 0)
1251 return ret;
1252 }
1253 }
1254 }
1255
1256 return 0;
1257 }
1258
1259
1260 /* virResctrlAllocSetMemoryBandwidth
1261 * @alloc: Pointer to an active allocation
1262 * @id: node id of MBA to be set
1263 * @memory_bandwidth: new memory bandwidth value
1264 *
1265 * Set the @memory_bandwidth for the node @id entry in the @alloc.
1266 *
1267 * Returns 0 on success, -1 on failure with error message set.
1268 */
1269 int
virResctrlAllocSetMemoryBandwidth(virResctrlAlloc * alloc,unsigned int id,unsigned int memory_bandwidth)1270 virResctrlAllocSetMemoryBandwidth(virResctrlAlloc *alloc,
1271 unsigned int id,
1272 unsigned int memory_bandwidth)
1273 {
1274 virResctrlAllocMemBW *mem_bw = alloc->mem_bw;
1275
1276 if (memory_bandwidth > 100) {
1277 virReportError(VIR_ERR_XML_ERROR, "%s",
1278 _("Memory Bandwidth value exceeding 100 is invalid."));
1279 return -1;
1280 }
1281
1282 if (!mem_bw) {
1283 mem_bw = g_new0(virResctrlAllocMemBW, 1);
1284 alloc->mem_bw = mem_bw;
1285 }
1286
1287 if (mem_bw->nbandwidths <= id)
1288 VIR_EXPAND_N(mem_bw->bandwidths, mem_bw->nbandwidths,
1289 id - mem_bw->nbandwidths + 1);
1290
1291 if (mem_bw->bandwidths[id]) {
1292 virReportError(VIR_ERR_XML_ERROR,
1293 _("Memory Bandwidth already defined for node %u"),
1294 id);
1295 return -1;
1296 }
1297
1298 mem_bw->bandwidths[id] = g_new0(unsigned int, 1);
1299 *(mem_bw->bandwidths[id]) = memory_bandwidth;
1300 return 0;
1301 }
1302
1303
1304 /* virResctrlAllocForeachMemory
1305 * @alloc: Pointer to an active allocation
1306 * @cb: Callback function
1307 * @opaque: Opaque data to be passed to @cb
1308 *
1309 * If available, traverse the defined memory bandwidth allocations and
1310 * call the @cb function.
1311 *
1312 * Returns 0 on success, -1 and immediate failure if the @cb has any failure.
1313 */
1314 int
virResctrlAllocForeachMemory(virResctrlAlloc * alloc,virResctrlAllocForeachMemoryCallback cb,void * opaque)1315 virResctrlAllocForeachMemory(virResctrlAlloc *alloc,
1316 virResctrlAllocForeachMemoryCallback cb,
1317 void *opaque)
1318 {
1319 size_t i = 0;
1320 virResctrlAllocMemBW *mem_bw;
1321
1322 if (!alloc || !alloc->mem_bw)
1323 return 0;
1324
1325 mem_bw = alloc->mem_bw;
1326 for (i = 0; i < mem_bw->nbandwidths; i++) {
1327 if (mem_bw->bandwidths[i]) {
1328 if (cb(i, *mem_bw->bandwidths[i], opaque) < 0)
1329 return -1;
1330 }
1331 }
1332
1333 return 0;
1334 }
1335
1336
1337 static int
virResctrlSetID(char ** resctrlid,const char * id)1338 virResctrlSetID(char **resctrlid,
1339 const char *id)
1340 {
1341 if (!id) {
1342 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1343 _("New resctrl 'id' cannot be NULL"));
1344 return -1;
1345 }
1346
1347 if (*resctrlid) {
1348 virReportError(VIR_ERR_INTERNAL_ERROR,
1349 _("Attempt to overwrite resctrlid='%s' with id='%s'"),
1350 *resctrlid, id);
1351 return -1;
1352 }
1353
1354 *resctrlid = g_strdup(id);
1355 return 0;
1356 }
1357
1358
1359 int
virResctrlAllocSetID(virResctrlAlloc * alloc,const char * id)1360 virResctrlAllocSetID(virResctrlAlloc *alloc,
1361 const char *id)
1362 {
1363 return virResctrlSetID(&alloc->id, id);
1364 }
1365
1366
1367 const char *
virResctrlAllocGetID(virResctrlAlloc * alloc)1368 virResctrlAllocGetID(virResctrlAlloc *alloc)
1369 {
1370 return alloc->id;
1371 }
1372
1373
1374 /* Format the Memory Bandwidth Allocation line that will be found in
1375 * the schemata files. The line should be start with "MB:" and be
1376 * followed by "id=value" pairs separated by a semi-colon such as:
1377 *
1378 * MB:0=100;1=100
1379 *
1380 * which indicates node id 0 has 100 percent bandwidth and node id 1
1381 * has 100 percent bandwidth. A trailing semi-colon is not formatted.
1382 */
1383 static int
virResctrlAllocMemoryBandwidthFormat(virResctrlAlloc * alloc,virBuffer * buf)1384 virResctrlAllocMemoryBandwidthFormat(virResctrlAlloc *alloc,
1385 virBuffer *buf)
1386 {
1387 size_t i;
1388
1389 if (!alloc->mem_bw)
1390 return 0;
1391
1392 virBufferAddLit(buf, "MB:");
1393
1394 for (i = 0; i < alloc->mem_bw->nbandwidths; i++) {
1395 if (alloc->mem_bw->bandwidths[i]) {
1396 virBufferAsprintf(buf, "%zd=%u;", i,
1397 *(alloc->mem_bw->bandwidths[i]));
1398 }
1399 }
1400
1401 virBufferTrim(buf, ";");
1402 virBufferAddChar(buf, '\n');
1403 return 0;
1404 }
1405
1406
1407 static int
virResctrlAllocParseProcessMemoryBandwidth(virResctrlInfo * resctrl,virResctrlAlloc * alloc,char * mem_bw)1408 virResctrlAllocParseProcessMemoryBandwidth(virResctrlInfo *resctrl,
1409 virResctrlAlloc *alloc,
1410 char *mem_bw)
1411 {
1412 unsigned int bandwidth;
1413 unsigned int id;
1414 char *tmp = NULL;
1415
1416 tmp = strchr(mem_bw, '=');
1417 if (!tmp)
1418 return 0;
1419 *tmp = '\0';
1420 tmp++;
1421
1422 if (virStrToLong_uip(mem_bw, NULL, 10, &id) < 0) {
1423 virReportError(VIR_ERR_INTERNAL_ERROR,
1424 _("Invalid node id %u "), id);
1425 return -1;
1426 }
1427 if (virStrToLong_uip(tmp, NULL, 10, &bandwidth) < 0) {
1428 virReportError(VIR_ERR_INTERNAL_ERROR,
1429 _("Invalid bandwidth %u"), bandwidth);
1430 return -1;
1431 }
1432 if (bandwidth < resctrl->membw_info->min_bandwidth ||
1433 id > resctrl->membw_info->max_id) {
1434 virReportError(VIR_ERR_INTERNAL_ERROR,
1435 _("Missing or inconsistent resctrl info for "
1436 "memory bandwidth node '%u'"), id);
1437 return -1;
1438 }
1439 if (alloc->mem_bw->nbandwidths <= id) {
1440 VIR_EXPAND_N(alloc->mem_bw->bandwidths, alloc->mem_bw->nbandwidths,
1441 id - alloc->mem_bw->nbandwidths + 1);
1442 }
1443 if (!alloc->mem_bw->bandwidths[id])
1444 alloc->mem_bw->bandwidths[id] = g_new0(unsigned int, 1);
1445
1446 *(alloc->mem_bw->bandwidths[id]) = bandwidth;
1447 return 0;
1448 }
1449
1450
1451 /* Parse a schemata formatted MB: entry. Format details are described in
1452 * virResctrlAllocMemoryBandwidthFormat.
1453 */
1454 static int
virResctrlAllocParseMemoryBandwidthLine(virResctrlInfo * resctrl,virResctrlAlloc * alloc,char * line)1455 virResctrlAllocParseMemoryBandwidthLine(virResctrlInfo *resctrl,
1456 virResctrlAlloc *alloc,
1457 char *line)
1458 {
1459 g_auto(GStrv) mbs = NULL;
1460 GStrv next;
1461 char *tmp = NULL;
1462
1463 /* For no reason there can be spaces */
1464 virSkipSpaces((const char **) &line);
1465
1466 if (STRNEQLEN(line, "MB", 2))
1467 return 0;
1468
1469 if (!resctrl || !resctrl->membw_info ||
1470 !resctrl->membw_info->min_bandwidth ||
1471 !resctrl->membw_info->bandwidth_granularity) {
1472 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1473 _("Missing or inconsistent resctrl info for "
1474 "memory bandwidth allocation"));
1475 return -1;
1476 }
1477
1478 if (!alloc->mem_bw)
1479 alloc->mem_bw = g_new0(virResctrlAllocMemBW, 1);
1480
1481 tmp = strchr(line, ':');
1482 if (!tmp)
1483 return 0;
1484 tmp++;
1485
1486 mbs = g_strsplit(tmp, ";", 0);
1487 for (next = mbs; *next; next++) {
1488 if (virResctrlAllocParseProcessMemoryBandwidth(resctrl, alloc, *next) < 0)
1489 return -1;
1490 }
1491
1492 return 0;
1493 }
1494
1495
1496 static int
virResctrlAllocFormatCache(virResctrlAlloc * alloc,virBuffer * buf)1497 virResctrlAllocFormatCache(virResctrlAlloc *alloc,
1498 virBuffer *buf)
1499 {
1500 unsigned int level = 0;
1501 unsigned int type = 0;
1502 unsigned int cache = 0;
1503
1504 for (level = 0; level < alloc->nlevels; level++) {
1505 virResctrlAllocPerLevel *a_level = alloc->levels[level];
1506
1507 if (!a_level)
1508 continue;
1509
1510 for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
1511 virResctrlAllocPerType *a_type = a_level->types[type];
1512
1513 if (!a_type)
1514 continue;
1515
1516 virBufferAsprintf(buf, "L%u%s:", level, virResctrlTypeToString(type));
1517
1518 for (cache = 0; cache < a_type->nmasks; cache++) {
1519 virBitmap *mask = a_type->masks[cache];
1520 char *mask_str = NULL;
1521
1522 if (!mask)
1523 continue;
1524
1525 mask_str = virBitmapToString(mask);
1526 if (!mask_str)
1527 return -1;
1528
1529 virBufferAsprintf(buf, "%u=%s;", cache, mask_str);
1530 VIR_FREE(mask_str);
1531 }
1532
1533 virBufferTrim(buf, ";");
1534 virBufferAddChar(buf, '\n');
1535 }
1536 }
1537
1538 return 0;
1539 }
1540
1541
1542 char *
virResctrlAllocFormat(virResctrlAlloc * alloc)1543 virResctrlAllocFormat(virResctrlAlloc *alloc)
1544 {
1545 g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
1546
1547 if (!alloc)
1548 return NULL;
1549
1550 if (virResctrlAllocFormatCache(alloc, &buf) < 0)
1551 return NULL;
1552
1553 if (virResctrlAllocMemoryBandwidthFormat(alloc, &buf) < 0)
1554 return NULL;
1555
1556 return virBufferContentAndReset(&buf);
1557 }
1558
1559
1560 static int
virResctrlAllocParseProcessCache(virResctrlInfo * resctrl,virResctrlAlloc * alloc,unsigned int level,virCacheType type,char * cache)1561 virResctrlAllocParseProcessCache(virResctrlInfo *resctrl,
1562 virResctrlAlloc *alloc,
1563 unsigned int level,
1564 virCacheType type,
1565 char *cache)
1566 {
1567 char *tmp = strchr(cache, '=');
1568 unsigned int cache_id = 0;
1569 g_autoptr(virBitmap) mask = NULL;
1570
1571 if (!tmp)
1572 return 0;
1573
1574 *tmp = '\0';
1575 tmp++;
1576
1577 if (virStrToLong_uip(cache, NULL, 10, &cache_id) < 0) {
1578 virReportError(VIR_ERR_INTERNAL_ERROR,
1579 _("Invalid cache id '%s'"), cache);
1580 return -1;
1581 }
1582
1583 mask = virBitmapNewString(tmp);
1584 if (!mask)
1585 return -1;
1586
1587 if (!resctrl ||
1588 level >= resctrl->nlevels ||
1589 !resctrl->levels[level] ||
1590 !resctrl->levels[level]->types[type]) {
1591 virReportError(VIR_ERR_INTERNAL_ERROR,
1592 _("Missing or inconsistent resctrl info for "
1593 "level '%u' type '%s'"),
1594 level, virCacheTypeToString(type));
1595 return -1;
1596 }
1597
1598 virBitmapShrink(mask, resctrl->levels[level]->types[type]->bits);
1599
1600 if (virResctrlAllocUpdateMask(alloc, level, type, cache_id, mask) < 0)
1601 return -1;
1602
1603 return 0;
1604 }
1605
1606
1607 static int
virResctrlAllocParseCacheLine(virResctrlInfo * resctrl,virResctrlAlloc * alloc,char * line)1608 virResctrlAllocParseCacheLine(virResctrlInfo *resctrl,
1609 virResctrlAlloc *alloc,
1610 char *line)
1611 {
1612 g_auto(GStrv) caches = NULL;
1613 GStrv next;
1614 char *tmp = NULL;
1615 unsigned int level = 0;
1616 int type = -1;
1617
1618 /* For no reason there can be spaces */
1619 virSkipSpaces((const char **) &line);
1620
1621 /* Skip lines that don't concern caches, e.g. MB: etc. */
1622 if (line[0] != 'L')
1623 return 0;
1624
1625 /* And lines that we can't parse too */
1626 tmp = strchr(line, ':');
1627 if (!tmp)
1628 return 0;
1629
1630 *tmp = '\0';
1631 tmp++;
1632
1633 if (virStrToLong_uip(line + 1, &line, 10, &level) < 0) {
1634 virReportError(VIR_ERR_INTERNAL_ERROR,
1635 _("Cannot parse resctrl schema level '%s'"),
1636 line + 1);
1637 return -1;
1638 }
1639
1640 type = virResctrlTypeFromString(line);
1641 if (type < 0) {
1642 virReportError(VIR_ERR_INTERNAL_ERROR,
1643 _("Cannot parse resctrl schema level '%s'"),
1644 line + 1);
1645 return -1;
1646 }
1647
1648 caches = g_strsplit(tmp, ";", 0);
1649 if (!caches)
1650 return 0;
1651
1652 for (next = caches; *next; next++) {
1653 if (virResctrlAllocParseProcessCache(resctrl, alloc, level, type, *next) < 0)
1654 return -1;
1655 }
1656
1657 return 0;
1658 }
1659
1660
1661 static int
virResctrlAllocParse(virResctrlInfo * resctrl,virResctrlAlloc * alloc,const char * schemata)1662 virResctrlAllocParse(virResctrlInfo *resctrl,
1663 virResctrlAlloc *alloc,
1664 const char *schemata)
1665 {
1666 g_auto(GStrv) lines = NULL;
1667 GStrv next;
1668
1669 lines = g_strsplit(schemata, "\n", 0);
1670 for (next = lines; *next; next++) {
1671 if (virResctrlAllocParseCacheLine(resctrl, alloc, *next) < 0)
1672 return -1;
1673 if (virResctrlAllocParseMemoryBandwidthLine(resctrl, alloc, *next) < 0)
1674 return -1;
1675 }
1676
1677 return 0;
1678 }
1679
1680
1681 static int
virResctrlAllocGetGroup(virResctrlInfo * resctrl,const char * groupname,virResctrlAlloc ** alloc)1682 virResctrlAllocGetGroup(virResctrlInfo *resctrl,
1683 const char *groupname,
1684 virResctrlAlloc **alloc)
1685 {
1686 char *schemata = NULL;
1687 int rv = virFileReadValueString(&schemata,
1688 SYSFS_RESCTRL_PATH "/%s/schemata",
1689 groupname);
1690
1691 *alloc = NULL;
1692
1693 if (rv < 0)
1694 return rv;
1695
1696 *alloc = virResctrlAllocNew();
1697 if (!*alloc)
1698 goto error;
1699
1700 if (virResctrlAllocParse(resctrl, *alloc, schemata) < 0)
1701 goto error;
1702
1703 VIR_FREE(schemata);
1704 return 0;
1705
1706 error:
1707 VIR_FREE(schemata);
1708 virObjectUnref(*alloc);
1709 *alloc = NULL;
1710 return -1;
1711 }
1712
1713
1714 static virResctrlAlloc *
virResctrlAllocGetDefault(virResctrlInfo * resctrl)1715 virResctrlAllocGetDefault(virResctrlInfo *resctrl)
1716 {
1717 virResctrlAlloc *ret = NULL;
1718 int rv = virResctrlAllocGetGroup(resctrl, ".", &ret);
1719
1720 if (rv == -2) {
1721 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1722 _("Could not read schemata file for the default group"));
1723 }
1724
1725 return ret;
1726 }
1727
1728
1729 static void
virResctrlAllocSubtractPerType(virResctrlAllocPerType * dst,virResctrlAllocPerType * src)1730 virResctrlAllocSubtractPerType(virResctrlAllocPerType *dst,
1731 virResctrlAllocPerType *src)
1732 {
1733 size_t i = 0;
1734
1735 if (!dst || !src)
1736 return;
1737
1738 for (i = 0; i < dst->nmasks && i < src->nmasks; i++) {
1739 if (dst->masks[i] && src->masks[i])
1740 virBitmapSubtract(dst->masks[i], src->masks[i]);
1741 }
1742 }
1743
1744
1745 static void
virResctrlAllocSubtract(virResctrlAlloc * dst,virResctrlAlloc * src)1746 virResctrlAllocSubtract(virResctrlAlloc *dst,
1747 virResctrlAlloc *src)
1748 {
1749 size_t i = 0;
1750 size_t j = 0;
1751
1752 if (!src)
1753 return;
1754
1755 for (i = 0; i < dst->nlevels && i < src->nlevels; i++) {
1756 if (dst->levels[i] && src->levels[i]) {
1757 for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
1758 virResctrlAllocSubtractPerType(dst->levels[i]->types[j],
1759 src->levels[i]->types[j]);
1760 }
1761 }
1762 }
1763 }
1764
1765
1766 static virResctrlAlloc *
virResctrlAllocNewFromInfo(virResctrlInfo * info)1767 virResctrlAllocNewFromInfo(virResctrlInfo *info)
1768 {
1769 size_t i = 0;
1770 g_autoptr(virResctrlAlloc) ret = virResctrlAllocNew();
1771
1772 if (!ret)
1773 return NULL;
1774
1775 for (i = 0; i < info->nlevels; i++) {
1776 virResctrlInfoPerLevel *i_level = info->levels[i];
1777 size_t j = 0;
1778
1779 if (!i_level)
1780 continue;
1781
1782 for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
1783 virResctrlInfoPerType *i_type = i_level->types[j];
1784 g_autoptr(virBitmap) mask = NULL;
1785 size_t k = 0;
1786
1787 if (!i_type)
1788 continue;
1789
1790 mask = virBitmapNew(i_type->bits);
1791 virBitmapSetAll(mask);
1792
1793 for (k = 0; k <= i_type->max_cache_id; k++) {
1794 if (virResctrlAllocUpdateMask(ret, i, j, k, mask) < 0)
1795 return NULL;
1796 }
1797 }
1798 }
1799
1800 /* set default free memory bandwidth to 100% */
1801 if (info->membw_info) {
1802 ret->mem_bw = g_new0(virResctrlAllocMemBW, 1);
1803
1804 VIR_EXPAND_N(ret->mem_bw->bandwidths, ret->mem_bw->nbandwidths,
1805 info->membw_info->max_id + 1);
1806
1807 for (i = 0; i < ret->mem_bw->nbandwidths; i++) {
1808 ret->mem_bw->bandwidths[i] = g_new0(unsigned int, 1);
1809 *(ret->mem_bw->bandwidths[i]) = 100;
1810 }
1811 }
1812
1813 return g_steal_pointer(&ret);
1814 }
1815
1816 /*
1817 * This function creates an allocation that represents all unused parts of all
1818 * caches in the system. It uses virResctrlInfo for creating a new full
1819 * allocation with all bits set (using virResctrlAllocNewFromInfo()) and then
1820 * scans for all allocations under /sys/fs/resctrl and subtracts each one of
1821 * them from it. That way it can then return an allocation with only bit set
1822 * being those that are not mentioned in any other allocation. It is used for
1823 * two things, a) calculating the masks when creating allocations and b) from
1824 * tests.
1825 *
1826 * MBA (Memory Bandwidth Allocation) is not taken into account as it is a
1827 * limiting setting, not an allocating one. The way it works is also vastly
1828 * different from CAT.
1829 */
1830 virResctrlAlloc *
virResctrlAllocGetUnused(virResctrlInfo * resctrl)1831 virResctrlAllocGetUnused(virResctrlInfo *resctrl)
1832 {
1833 g_autoptr(virResctrlAlloc) ret = NULL;
1834 g_autoptr(virResctrlAlloc) alloc_default = NULL;
1835 struct dirent *ent = NULL;
1836 g_autoptr(DIR) dirp = NULL;
1837 int rv = -1;
1838
1839 if (virResctrlInfoIsEmpty(resctrl)) {
1840 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
1841 _("Resource control is not supported on this host"));
1842 return NULL;
1843 }
1844
1845 ret = virResctrlAllocNewFromInfo(resctrl);
1846 if (!ret)
1847 return NULL;
1848
1849 alloc_default = virResctrlAllocGetDefault(resctrl);
1850 if (!alloc_default)
1851 return NULL;
1852
1853 virResctrlAllocSubtract(ret, alloc_default);
1854
1855 if (virDirOpen(&dirp, SYSFS_RESCTRL_PATH) < 0)
1856 return NULL;
1857
1858 while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH)) > 0) {
1859 g_autoptr(virResctrlAlloc) alloc = NULL;
1860
1861 if (STREQ(ent->d_name, "info"))
1862 continue;
1863
1864 rv = virResctrlAllocGetGroup(resctrl, ent->d_name, &alloc);
1865 if (rv == -2)
1866 continue;
1867
1868 if (rv < 0) {
1869 virReportError(VIR_ERR_INTERNAL_ERROR,
1870 _("Could not read schemata file for group %s"),
1871 ent->d_name);
1872 return NULL;
1873 }
1874
1875 virResctrlAllocSubtract(ret, alloc);
1876 }
1877 if (rv < 0)
1878 return NULL;
1879
1880 return g_steal_pointer(&ret);
1881 }
1882
1883
1884 /*
1885 * Given the information about requested allocation type `a_type`, the host
1886 * cache for a particular type `i_type` and unused bits in the system `f_type`
1887 * this function tries to find the smallest free space in which the allocation
1888 * for cache id `cache` would fit. We're looking for the smallest place in
1889 * order to minimize fragmentation and maximize the possibility of succeeding.
1890 *
1891 * Per-cache allocation for the @level, @type and @cache must already be
1892 * allocated for @alloc (does not have to exist though).
1893 */
1894 static int
virResctrlAllocFindUnused(virResctrlAlloc * alloc,virResctrlInfoPerType * i_type,virResctrlAllocPerType * f_type,unsigned int level,unsigned int type,unsigned int cache)1895 virResctrlAllocFindUnused(virResctrlAlloc *alloc,
1896 virResctrlInfoPerType *i_type,
1897 virResctrlAllocPerType *f_type,
1898 unsigned int level,
1899 unsigned int type,
1900 unsigned int cache)
1901 {
1902 unsigned long long *size = alloc->levels[level]->types[type]->sizes[cache];
1903 g_autoptr(virBitmap) a_mask = NULL;
1904 virBitmap *f_mask = NULL;
1905 unsigned long long need_bits;
1906 size_t i = 0;
1907 ssize_t pos = -1;
1908 ssize_t last_bits = 0;
1909 ssize_t last_pos = -1;
1910
1911 if (!size)
1912 return 0;
1913
1914 if (cache >= f_type->nmasks) {
1915 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1916 _("Cache with id %u does not exists for level %d"),
1917 cache, level);
1918 return -1;
1919 }
1920
1921 f_mask = f_type->masks[cache];
1922 if (!f_mask) {
1923 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1924 _("Cache level %d id %u does not support tuning for "
1925 "scope type '%s'"),
1926 level, cache, virCacheTypeToString(type));
1927 return -1;
1928 }
1929
1930 if (*size == i_type->size) {
1931 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1932 _("Cache allocation for the whole cache is not "
1933 "possible, specify size smaller than %llu"),
1934 i_type->size);
1935 return -1;
1936 }
1937
1938 need_bits = *size / i_type->control.granularity;
1939
1940 if (*size % i_type->control.granularity) {
1941 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1942 _("Cache allocation of size %llu is not "
1943 "divisible by granularity %llu"),
1944 *size, i_type->control.granularity);
1945 return -1;
1946 }
1947
1948 if (need_bits < i_type->min_cbm_bits) {
1949 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1950 _("Cache allocation of size %llu is smaller "
1951 "than the minimum allowed allocation %llu"),
1952 *size,
1953 i_type->control.granularity * i_type->min_cbm_bits);
1954 return -1;
1955 }
1956
1957 while ((pos = virBitmapNextSetBit(f_mask, pos)) >= 0) {
1958 ssize_t pos_clear = virBitmapNextClearBit(f_mask, pos);
1959 ssize_t bits;
1960
1961 if (pos_clear < 0)
1962 pos_clear = virBitmapSize(f_mask);
1963
1964 bits = pos_clear - pos;
1965
1966 /* Not enough bits, move on and skip all of them */
1967 if (bits < need_bits) {
1968 pos = pos_clear;
1969 continue;
1970 }
1971
1972 /* This fits perfectly */
1973 if (bits == need_bits) {
1974 last_pos = pos;
1975 break;
1976 }
1977
1978 /* Remember the smaller region if we already found on before */
1979 if (last_pos < 0 || (last_bits && bits < last_bits)) {
1980 last_bits = bits;
1981 last_pos = pos;
1982 }
1983
1984 pos = pos_clear;
1985 }
1986
1987 if (last_pos < 0) {
1988 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1989 _("Not enough room for allocation of "
1990 "%llu bytes for level %u cache %u "
1991 "scope type '%s'"),
1992 *size, level, cache,
1993 virCacheTypeToString(type));
1994 return -1;
1995 }
1996
1997 a_mask = virBitmapNew(i_type->bits);
1998
1999 for (i = last_pos; i < last_pos + need_bits; i++)
2000 ignore_value(virBitmapSetBit(a_mask, i));
2001
2002 if (virResctrlAllocUpdateMask(alloc, level, type, cache, a_mask) < 0)
2003 return -1;
2004
2005 return 0;
2006 }
2007
2008
2009 static int
virResctrlAllocMemoryBandwidth(virResctrlInfo * resctrl,virResctrlAlloc * alloc)2010 virResctrlAllocMemoryBandwidth(virResctrlInfo *resctrl,
2011 virResctrlAlloc *alloc)
2012 {
2013 size_t i;
2014 virResctrlAllocMemBW *mem_bw_alloc = alloc->mem_bw;
2015 virResctrlInfoMemBW *mem_bw_info = resctrl->membw_info;
2016
2017 if (!mem_bw_alloc)
2018 return 0;
2019
2020 if (!mem_bw_info) {
2021 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
2022 _("RDT Memory Bandwidth allocation unsupported"));
2023 return -1;
2024 }
2025
2026 for (i = 0; i < mem_bw_alloc->nbandwidths; i++) {
2027 if (!mem_bw_alloc->bandwidths[i])
2028 continue;
2029
2030 if (*(mem_bw_alloc->bandwidths[i]) % mem_bw_info->bandwidth_granularity) {
2031 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
2032 _("Memory Bandwidth allocation of size "
2033 "%u is not divisible by granularity %u"),
2034 *(mem_bw_alloc->bandwidths[i]),
2035 mem_bw_info->bandwidth_granularity);
2036 return -1;
2037 }
2038 if (*(mem_bw_alloc->bandwidths[i]) < mem_bw_info->min_bandwidth) {
2039 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
2040 _("Memory Bandwidth allocation of size "
2041 "%u is smaller than the minimum "
2042 "allowed allocation %u"),
2043 *(mem_bw_alloc->bandwidths[i]),
2044 mem_bw_info->min_bandwidth);
2045 return -1;
2046 }
2047 if (i > mem_bw_info->max_id) {
2048 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
2049 _("bandwidth controller id %zd does not "
2050 "exist, max controller id %u"),
2051 i, mem_bw_info->max_id);
2052 return -1;
2053 }
2054 }
2055 return 0;
2056 }
2057
2058
2059 static int
virResctrlAllocCopyMemBW(virResctrlAlloc * dst,virResctrlAlloc * src)2060 virResctrlAllocCopyMemBW(virResctrlAlloc *dst,
2061 virResctrlAlloc *src)
2062 {
2063 size_t i = 0;
2064 virResctrlAllocMemBW *dst_bw = NULL;
2065 virResctrlAllocMemBW *src_bw = src->mem_bw;
2066
2067 if (!src->mem_bw)
2068 return 0;
2069
2070 if (!dst->mem_bw)
2071 dst->mem_bw = g_new0(virResctrlAllocMemBW, 1);
2072
2073 dst_bw = dst->mem_bw;
2074
2075 if (src_bw->nbandwidths > dst_bw->nbandwidths)
2076 VIR_EXPAND_N(dst_bw->bandwidths, dst_bw->nbandwidths,
2077 src_bw->nbandwidths - dst_bw->nbandwidths);
2078
2079 for (i = 0; i < src_bw->nbandwidths; i++) {
2080 if (dst_bw->bandwidths[i])
2081 continue;
2082 dst_bw->bandwidths[i] = g_new0(unsigned int, 1);
2083 *dst_bw->bandwidths[i] = *src_bw->bandwidths[i];
2084 }
2085
2086 return 0;
2087 }
2088
2089
2090 static int
virResctrlAllocCopyMasks(virResctrlAlloc * dst,virResctrlAlloc * src)2091 virResctrlAllocCopyMasks(virResctrlAlloc *dst,
2092 virResctrlAlloc *src)
2093 {
2094 unsigned int level = 0;
2095
2096 for (level = 0; level < src->nlevels; level++) {
2097 virResctrlAllocPerLevel *s_level = src->levels[level];
2098 unsigned int type = 0;
2099
2100 if (!s_level)
2101 continue;
2102
2103 for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
2104 virResctrlAllocPerType *s_type = s_level->types[type];
2105 virResctrlAllocPerType *d_type = NULL;
2106 unsigned int cache = 0;
2107
2108 if (!s_type)
2109 continue;
2110
2111 d_type = virResctrlAllocGetType(dst, level, type);
2112 if (!d_type)
2113 return -1;
2114
2115 for (cache = 0; cache < s_type->nmasks; cache++) {
2116 virBitmap *mask = s_type->masks[cache];
2117
2118 if (mask && virResctrlAllocUpdateMask(dst, level, type, cache, mask) < 0)
2119 return -1;
2120 }
2121 }
2122 }
2123
2124 return 0;
2125 }
2126
2127
2128 /*
2129 * This function is called when creating an allocation in the system.
2130 * What it does is that it gets all the unused resources using
2131 * virResctrlAllocGetUnused and then tries to find a proper space for
2132 * every requested allocation effectively transforming `sizes` into `masks`.
2133 */
2134 static int
virResctrlAllocAssign(virResctrlInfo * resctrl,virResctrlAlloc * alloc)2135 virResctrlAllocAssign(virResctrlInfo *resctrl,
2136 virResctrlAlloc *alloc)
2137 {
2138 unsigned int level = 0;
2139 g_autoptr(virResctrlAlloc) alloc_free = NULL;
2140 g_autoptr(virResctrlAlloc) alloc_default = NULL;
2141
2142 alloc_free = virResctrlAllocGetUnused(resctrl);
2143 if (!alloc_free)
2144 return -1;
2145
2146 alloc_default = virResctrlAllocGetDefault(resctrl);
2147 if (!alloc_default)
2148 return -1;
2149
2150 if (virResctrlAllocMemoryBandwidth(resctrl, alloc) < 0)
2151 return -1;
2152
2153 if (virResctrlAllocCopyMasks(alloc, alloc_default) < 0)
2154 return -1;
2155
2156 if (virResctrlAllocCopyMemBW(alloc, alloc_default) < 0)
2157 return -1;
2158
2159 for (level = 0; level < alloc->nlevels; level++) {
2160 virResctrlAllocPerLevel *a_level = alloc->levels[level];
2161 virResctrlAllocPerLevel *f_level = NULL;
2162 unsigned int type = 0;
2163
2164 if (!a_level)
2165 continue;
2166
2167 if (level < alloc_free->nlevels)
2168 f_level = alloc_free->levels[level];
2169
2170 if (!f_level) {
2171 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
2172 _("Cache level %d does not support tuning"),
2173 level);
2174 return -1;
2175 }
2176
2177 for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
2178 virResctrlAllocPerType *a_type = a_level->types[type];
2179 virResctrlAllocPerType *f_type = f_level->types[type];
2180 unsigned int cache = 0;
2181
2182 if (!a_type)
2183 continue;
2184
2185 if (!f_type) {
2186 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
2187 _("Cache level %d does not support tuning for "
2188 "scope type '%s'"),
2189 level, virCacheTypeToString(type));
2190 return -1;
2191 }
2192
2193 for (cache = 0; cache < a_type->nsizes; cache++) {
2194 virResctrlInfoPerLevel *i_level = resctrl->levels[level];
2195 virResctrlInfoPerType *i_type = i_level->types[type];
2196
2197 if (virResctrlAllocFindUnused(alloc, i_type, f_type, level, type, cache) < 0)
2198 return -1;
2199 }
2200 }
2201 }
2202
2203 return 0;
2204 }
2205
2206
2207 static char *
virResctrlDeterminePath(const char * parentpath,const char * prefix,const char * id)2208 virResctrlDeterminePath(const char *parentpath,
2209 const char *prefix,
2210 const char *id)
2211 {
2212 if (!id) {
2213 virReportError(VIR_ERR_INTERNAL_ERROR,
2214 _("Resctrl ID must be set before determining resctrl "
2215 "parentpath='%s' prefix='%s'"), parentpath, prefix);
2216 return NULL;
2217 }
2218
2219 return g_strdup_printf("%s/%s-%s", parentpath, prefix, id);
2220 }
2221
2222
2223 int
virResctrlAllocDeterminePath(virResctrlAlloc * alloc,const char * machinename)2224 virResctrlAllocDeterminePath(virResctrlAlloc *alloc,
2225 const char *machinename)
2226 {
2227 if (alloc->path) {
2228 virReportError(VIR_ERR_INTERNAL_ERROR,
2229 _("Resctrl allocation path is already set to '%s'"),
2230 alloc->path);
2231 return -1;
2232 }
2233
2234 /* If the allocation is empty, then the path will be SYSFS_RESCTRL_PATH */
2235 if (virResctrlAllocIsEmpty(alloc)) {
2236 alloc->path = g_strdup(SYSFS_RESCTRL_PATH);
2237
2238 return 0;
2239 }
2240
2241 alloc->path = virResctrlDeterminePath(SYSFS_RESCTRL_PATH,
2242 machinename, alloc->id);
2243
2244 if (!alloc->path)
2245 return -1;
2246
2247 return 0;
2248 }
2249
2250
2251 /* This function creates a resctrl directory in resource control file system,
2252 * and the directory path is specified by @path. */
2253 static int
virResctrlCreateGroupPath(const char * path)2254 virResctrlCreateGroupPath(const char *path)
2255 {
2256 /* Directory exists, return */
2257 if (virFileExists(path))
2258 return 0;
2259
2260 if (g_mkdir_with_parents(path, 0777) < 0) {
2261 virReportSystemError(errno,
2262 _("Cannot create resctrl directory '%s'"),
2263 path);
2264 return -1;
2265 }
2266
2267 return 0;
2268 }
2269
2270
2271 /* This checks if the directory for the alloc exists. If not it tries to create
2272 * it and apply appropriate alloc settings. */
2273 int
virResctrlAllocCreate(virResctrlInfo * resctrl,virResctrlAlloc * alloc,const char * machinename)2274 virResctrlAllocCreate(virResctrlInfo *resctrl,
2275 virResctrlAlloc *alloc,
2276 const char *machinename)
2277 {
2278 g_autofree char *schemata_path = NULL;
2279 g_autofree char *alloc_str = NULL;
2280 int ret = -1;
2281 int lockfd = -1;
2282
2283 if (!alloc)
2284 return 0;
2285
2286 if (virResctrlInfoIsEmpty(resctrl)) {
2287 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
2288 _("Resource control is not supported on this host"));
2289 return -1;
2290 }
2291
2292 if (virResctrlAllocDeterminePath(alloc, machinename) < 0)
2293 return -1;
2294
2295 /* If using the system/default path for the allocation, then we're done */
2296 if (STREQ(alloc->path, SYSFS_RESCTRL_PATH))
2297 return 0;
2298
2299 lockfd = virResctrlLock();
2300 if (lockfd < 0)
2301 goto cleanup;
2302
2303 if (virResctrlAllocAssign(resctrl, alloc) < 0)
2304 goto cleanup;
2305
2306 if (virResctrlCreateGroupPath(alloc->path) < 0)
2307 goto cleanup;
2308
2309 alloc_str = virResctrlAllocFormat(alloc);
2310 if (!alloc_str)
2311 goto cleanup;
2312
2313 schemata_path = g_strdup_printf("%s/schemata", alloc->path);
2314
2315 VIR_DEBUG("Writing resctrl schemata '%s' into '%s'", alloc_str, schemata_path);
2316 if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) {
2317 rmdir(alloc->path);
2318 virReportSystemError(errno,
2319 _("Cannot write into schemata file '%s'"),
2320 schemata_path);
2321 goto cleanup;
2322 }
2323
2324 ret = 0;
2325 cleanup:
2326 virResctrlUnlock(lockfd);
2327 return ret;
2328 }
2329
2330
2331 static int
virResctrlAddPID(const char * path,pid_t pid)2332 virResctrlAddPID(const char *path,
2333 pid_t pid)
2334 {
2335 g_autofree char *tasks = NULL;
2336 g_autofree char *pidstr = NULL;
2337
2338 if (!path) {
2339 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2340 _("Cannot add pid to non-existing resctrl group"));
2341 return -1;
2342 }
2343
2344 tasks = g_strdup_printf("%s/tasks", path);
2345
2346 pidstr = g_strdup_printf("%lld", (long long int)pid);
2347
2348 if (virFileWriteStr(tasks, pidstr, 0) < 0) {
2349 virReportSystemError(errno,
2350 _("Cannot write pid in tasks file '%s'"),
2351 tasks);
2352 return -1;
2353 }
2354
2355 return 0;
2356 }
2357
2358
2359 int
virResctrlAllocAddPID(virResctrlAlloc * alloc,pid_t pid)2360 virResctrlAllocAddPID(virResctrlAlloc *alloc,
2361 pid_t pid)
2362 {
2363 /* If the allocation is empty, then it is impossible to add a PID to
2364 * allocation due to lacking of its 'tasks' file so just return */
2365 if (virResctrlAllocIsEmpty(alloc))
2366 return 0;
2367
2368 return virResctrlAddPID(alloc->path, pid);
2369 }
2370
2371
2372 int
virResctrlAllocRemove(virResctrlAlloc * alloc)2373 virResctrlAllocRemove(virResctrlAlloc *alloc)
2374 {
2375 int ret = 0;
2376
2377 if (!alloc->path)
2378 return 0;
2379
2380 /* Do not destroy if path is the system/default path for the allocation */
2381 if (STREQ(alloc->path, SYSFS_RESCTRL_PATH))
2382 return 0;
2383
2384 VIR_DEBUG("Removing resctrl allocation %s", alloc->path);
2385 if (rmdir(alloc->path) != 0 && errno != ENOENT) {
2386 ret = -errno;
2387 VIR_ERROR(_("Unable to remove %s (%d)"), alloc->path, errno);
2388 }
2389
2390 return ret;
2391 }
2392
2393
2394 /* virResctrlMonitor-related definitions */
2395
2396 virResctrlMonitor *
virResctrlMonitorNew(void)2397 virResctrlMonitorNew(void)
2398 {
2399 if (virResctrlInitialize() < 0)
2400 return NULL;
2401
2402 return virObjectNew(virResctrlMonitorClass);
2403 }
2404
2405
2406 /*
2407 * virResctrlMonitorDeterminePath
2408 *
2409 * @monitor: Pointer to a resctrl monitor
2410 * @machinename: Name string of the VM
2411 *
2412 * Determines the directory path that the underlying resctrl group will be
2413 * created with.
2414 *
2415 * A monitor represents a directory under resource control file system,
2416 * its directory path could be the same path as @monitor->alloc, could be a
2417 * path of directory under 'mon_groups' of @monitor->alloc, or a path of
2418 * directory under '/sys/fs/resctrl/mon_groups' if @monitor->alloc is NULL.
2419 *
2420 * Returns 0 on success, -1 on error.
2421 */
2422 int
virResctrlMonitorDeterminePath(virResctrlMonitor * monitor,const char * machinename)2423 virResctrlMonitorDeterminePath(virResctrlMonitor *monitor,
2424 const char *machinename)
2425 {
2426 g_autofree char *parentpath = NULL;
2427
2428 if (!monitor) {
2429 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2430 _("Invalid resctrl monitor"));
2431 return -1;
2432 }
2433
2434 if (!monitor->alloc) {
2435 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2436 _("Missing resctrl monitor alloc"));
2437 return -1;
2438 }
2439
2440 if (monitor->path) {
2441 virReportError(VIR_ERR_INTERNAL_ERROR,
2442 _("Resctrl monitor path is already set to '%s'"),
2443 monitor->path);
2444 return -1;
2445 }
2446
2447 if (!virResctrlAllocIsEmpty(monitor->alloc) &&
2448 STREQ_NULLABLE(monitor->id, monitor->alloc->id)) {
2449 monitor->path = g_strdup(monitor->alloc->path);
2450 return 0;
2451 }
2452
2453 parentpath = g_strdup_printf("%s/mon_groups", monitor->alloc->path);
2454
2455 monitor->path = virResctrlDeterminePath(parentpath, machinename,
2456 monitor->id);
2457 if (!monitor->path)
2458 return -1;
2459
2460 return 0;
2461 }
2462
2463
2464 int
virResctrlMonitorAddPID(virResctrlMonitor * monitor,pid_t pid)2465 virResctrlMonitorAddPID(virResctrlMonitor *monitor,
2466 pid_t pid)
2467 {
2468 return virResctrlAddPID(monitor->path, pid);
2469 }
2470
2471
2472 int
virResctrlMonitorCreate(virResctrlMonitor * monitor,const char * machinename)2473 virResctrlMonitorCreate(virResctrlMonitor *monitor,
2474 const char *machinename)
2475 {
2476 int lockfd = -1;
2477 int ret = -1;
2478
2479 if (!monitor)
2480 return 0;
2481
2482 if (virResctrlMonitorDeterminePath(monitor, machinename) < 0)
2483 return -1;
2484
2485 lockfd = virResctrlLock();
2486 if (lockfd < 0)
2487 return -1;
2488
2489 ret = virResctrlCreateGroupPath(monitor->path);
2490
2491 virResctrlUnlock(lockfd);
2492 return ret;
2493 }
2494
2495
2496 int
virResctrlMonitorSetID(virResctrlMonitor * monitor,const char * id)2497 virResctrlMonitorSetID(virResctrlMonitor *monitor,
2498 const char *id)
2499
2500 {
2501 return virResctrlSetID(&monitor->id, id);
2502 }
2503
2504
2505 const char *
virResctrlMonitorGetID(virResctrlMonitor * monitor)2506 virResctrlMonitorGetID(virResctrlMonitor *monitor)
2507 {
2508 return monitor->id;
2509 }
2510
2511
2512 void
virResctrlMonitorSetAlloc(virResctrlMonitor * monitor,virResctrlAlloc * alloc)2513 virResctrlMonitorSetAlloc(virResctrlMonitor *monitor,
2514 virResctrlAlloc *alloc)
2515 {
2516 monitor->alloc = virObjectRef(alloc);
2517 }
2518
2519
2520 int
virResctrlMonitorRemove(virResctrlMonitor * monitor)2521 virResctrlMonitorRemove(virResctrlMonitor *monitor)
2522 {
2523 int ret = 0;
2524
2525 if (!monitor->path)
2526 return 0;
2527
2528 if (STREQ(monitor->path, monitor->alloc->path))
2529 return 0;
2530
2531 VIR_DEBUG("Removing resctrl monitor path=%s", monitor->path);
2532 if (rmdir(monitor->path) != 0 && errno != ENOENT) {
2533 ret = -errno;
2534 VIR_ERROR(_("Unable to remove %s (%d)"), monitor->path, errno);
2535 }
2536
2537 return ret;
2538 }
2539
2540
2541 static int
virResctrlMonitorStatsSorter(const void * a,const void * b)2542 virResctrlMonitorStatsSorter(const void *a,
2543 const void *b)
2544 {
2545 return (*(virResctrlMonitorStats **)a)->id
2546 - (*(virResctrlMonitorStats **)b)->id;
2547 }
2548
2549
2550 /*
2551 * virResctrlMonitorGetStats
2552 *
2553 * @monitor: The monitor that the statistic data will be retrieved from.
2554 * @resources: A string list for the monitor feature names.
2555 * @stats: Pointer of of virResctrlMonitorStats * array for holding cache or
2556 * memory bandwidth usage data.
2557 * @nstats: A size_t pointer to hold the returned array length of @stats
2558 *
2559 * Get cache or memory bandwidth utilization information.
2560 *
2561 * Returns 0 on success, -1 on error.
2562 */
2563 int
virResctrlMonitorGetStats(virResctrlMonitor * monitor,const char ** resources,virResctrlMonitorStats *** stats,size_t * nstats)2564 virResctrlMonitorGetStats(virResctrlMonitor *monitor,
2565 const char **resources,
2566 virResctrlMonitorStats ***stats,
2567 size_t *nstats)
2568 {
2569 int rv = -1;
2570 int ret = -1;
2571 size_t i = 0;
2572 unsigned long long val = 0;
2573 g_autoptr(DIR) dirp = NULL;
2574 g_autofree char *datapath = NULL;
2575 struct dirent *ent = NULL;
2576 virResctrlMonitorStats *stat = NULL;
2577 size_t nresources = g_strv_length((char **) resources);
2578
2579 if (!monitor) {
2580 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2581 _("Invalid resctrl monitor"));
2582 return -1;
2583 }
2584
2585 datapath = g_strdup_printf("%s/mon_data", monitor->path);
2586
2587 if (virDirOpen(&dirp, datapath) < 0)
2588 goto cleanup;
2589
2590 *nstats = 0;
2591 while (virDirRead(dirp, &ent, datapath) > 0) {
2592 g_autofree char *filepath = NULL;
2593 char *node_id = NULL;
2594
2595 /* Looking for directory that contains resource utilization
2596 * information file. The directory name is arranged in format
2597 * "mon_<node_name>_<node_id>". For example, "mon_L3_00" and
2598 * "mon_L3_01" are two target directories for a two nodes system
2599 * with resource utilization data file for each node respectively.
2600 */
2601 filepath = g_strdup_printf("%s/%s", datapath, ent->d_name);
2602
2603 if (!virFileIsDir(filepath))
2604 continue;
2605
2606 /* Looking for directory has a prefix 'mon_L' */
2607 if (!(node_id = STRSKIP(ent->d_name, "mon_L")))
2608 continue;
2609
2610 /* Looking for directory has another '_' */
2611 node_id = strchr(node_id, '_');
2612 if (!node_id)
2613 continue;
2614
2615 /* Skip the character '_' */
2616 if (!(node_id = STRSKIP(node_id, "_")))
2617 continue;
2618
2619 stat = g_new0(virResctrlMonitorStats, 1);
2620 stat->features = g_new0(char *, nresources + 1);
2621
2622 /* The node ID number should be here, parsing it. */
2623 if (virStrToLong_uip(node_id, NULL, 0, &stat->id) < 0)
2624 goto cleanup;
2625
2626 for (i = 0; resources[i]; i++) {
2627 rv = virFileReadValueUllong(&val, "%s/%s/%s", datapath,
2628 ent->d_name, resources[i]);
2629 if (rv == -2) {
2630 virReportError(VIR_ERR_INTERNAL_ERROR,
2631 _("File '%s/%s/%s' does not exist."),
2632 datapath, ent->d_name, resources[i]);
2633 }
2634 if (rv < 0)
2635 goto cleanup;
2636
2637 VIR_APPEND_ELEMENT(stat->vals, stat->nvals, val);
2638
2639 stat->features[i] = g_strdup(resources[i]);
2640 }
2641
2642 VIR_APPEND_ELEMENT(*stats, *nstats, stat);
2643 }
2644
2645 /* Sort in id's ascending order */
2646 if (*nstats)
2647 qsort(*stats, *nstats, sizeof(**stats), virResctrlMonitorStatsSorter);
2648
2649 ret = 0;
2650 cleanup:
2651 virResctrlMonitorStatsFree(stat);
2652 return ret;
2653 }
2654
2655
2656 void
virResctrlMonitorStatsFree(virResctrlMonitorStats * stat)2657 virResctrlMonitorStatsFree(virResctrlMonitorStats *stat)
2658 {
2659 if (!stat)
2660 return;
2661
2662 g_strfreev(stat->features);
2663 g_free(stat->vals);
2664 g_free(stat);
2665 }
2666