1 /*
2  * virresctrl.c:
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library.  If not, see
16  * <http://www.gnu.org/licenses/>.
17  */
18 
19 #include <config.h>
20 
21 #include <sys/file.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 
27 #define LIBVIRT_VIRRESCTRLPRIV_H_ALLOW
28 #include "virresctrlpriv.h"
29 #include "viralloc.h"
30 #include "virbuffer.h"
31 #include "virfile.h"
32 #include "virlog.h"
33 #include "virobject.h"
34 #include "virstring.h"
35 
36 #define VIR_FROM_THIS VIR_FROM_RESCTRL
37 
38 VIR_LOG_INIT("util.virresctrl");
39 
40 
41 /* Resctrl is short for Resource Control.  It might be implemented for various
42  * resources. Currently this supports cache allocation technology (aka CAT),
43  * memory bandwidth allocation (aka MBA) and cache monitoring technology (aka
44  * CMT). More resources technologies may be added in the future.
45  */
46 
47 
48 /* Common definitions */
49 #define SYSFS_RESCTRL_PATH "/sys/fs/resctrl"
50 
51 
52 /* Following are three different enum implementations for the same enum.  Each
53  * one of them helps translating to/from strings for different interfaces.  The
54  * delimiter must be VIR_CACHE_TYPE_LAST for all of them in order to stay
55  * consistent in between all of them. */
56 
57 /* Cache name mapping for Linux kernel naming. */
58 VIR_ENUM_IMPL(virCacheKernel,
59               VIR_CACHE_TYPE_LAST,
60               "Unified",
61               "Instruction",
62               "Data",
63 );
64 
65 /* Cache name mapping for our XML naming. */
66 VIR_ENUM_IMPL(virCache,
67               VIR_CACHE_TYPE_LAST,
68               "both",
69               "code",
70               "data",
71 );
72 
73 /* Cache name mapping for resctrl interface naming. */
74 VIR_ENUM_DECL(virResctrl);
75 VIR_ENUM_IMPL(virResctrl,
76               VIR_CACHE_TYPE_LAST,
77               "",
78               "CODE",
79               "DATA",
80 );
81 
82 /* Monitor feature name prefix mapping for monitor naming */
83 VIR_ENUM_IMPL(virResctrlMonitorPrefix,
84               VIR_RESCTRL_MONITOR_TYPE_LAST,
85               "__unsupported__",
86               "llc_",
87               "mbm_",
88 );
89 
90 
91 /* All private typedefs so that they exist for all later definitions.  This way
92  * structs can be included in one or another without reorganizing the code every
93  * time. */
94 typedef struct _virResctrlInfoPerType virResctrlInfoPerType;
95 
96 typedef struct _virResctrlInfoPerLevel virResctrlInfoPerLevel;
97 
98 typedef struct _virResctrlInfoMemBW virResctrlInfoMemBW;
99 
100 typedef struct _virResctrlInfoMongrp virResctrlInfoMongrp;
101 
102 typedef struct _virResctrlAllocPerType virResctrlAllocPerType;
103 
104 typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
105 
106 typedef struct _virResctrlAllocMemBW virResctrlAllocMemBW;
107 
108 
109 /* Class definitions and initializations */
110 static virClass *virResctrlInfoClass;
111 static virClass *virResctrlAllocClass;
112 static virClass *virResctrlMonitorClass;
113 
114 
115 /* virResctrlInfo */
116 struct _virResctrlInfoPerType {
117     /* Kernel-provided information */
118     unsigned int min_cbm_bits;
119 
120     /* Our computed information from the above */
121     unsigned int bits;
122     unsigned int max_cache_id;
123 
124     /* In order to be self-sufficient we need size information per cache.
125      * Funnily enough, one of the outcomes of the resctrl design is that it
126      * does not account for different sizes per cache on the same level.  So
127      * for the sake of easiness, let's copy that, for now. */
128     unsigned long long size;
129 
130     /* Information that we will return upon request (this is public struct) as
131      * until now all the above is internal to this module */
132     virResctrlInfoPerCache control;
133 };
134 
135 struct _virResctrlInfoPerLevel {
136     virResctrlInfoPerType **types;
137 };
138 
139 /* Information about memory bandwidth allocation */
140 struct _virResctrlInfoMemBW {
141     /* minimum memory bandwidth allowed */
142     unsigned int min_bandwidth;
143     /* bandwidth granularity */
144     unsigned int bandwidth_granularity;
145     /* Maximum number of simultaneous allocations */
146     unsigned int max_allocation;
147     /* level number of last level cache */
148     unsigned int last_level_cache;
149     /* max id of last level cache, this is used to track
150      * how many last level cache available in host system,
151      * the number of memory bandwidth allocation controller
152      * is identical with last level cache. */
153     unsigned int max_id;
154 };
155 
156 struct _virResctrlInfoMongrp {
157     /* Maximum number of simultaneous monitors */
158     unsigned int max_monitor;
159     /* null-terminal string list for monitor features */
160     char **features;
161     /* Number of monitor features */
162     size_t nfeatures;
163 
164     /* Last level cache related information */
165 
166     /* This adjustable value affects the final reuse of resources used by
167      * monitor. After the action of removing a monitor, the kernel may not
168      * release all hardware resources that monitor used immediately if the
169      * cache occupancy value associated with 'removed' monitor is above this
170      * threshold. Once the cache occupancy is below this threshold, the
171      * underlying hardware resource will be reclaimed and be put into the
172      * resource pool for next reusing.*/
173     unsigned int cache_reuse_threshold;
174     /* The cache 'level' that has the monitor capability */
175     unsigned int cache_level;
176 };
177 
178 struct _virResctrlInfo {
179     virObject parent;
180 
181     virResctrlInfoPerLevel **levels;
182     size_t nlevels;
183 
184     virResctrlInfoMemBW *membw_info;
185 
186     virResctrlInfoMongrp *monitor_info;
187 };
188 
189 
190 static void
virResctrlInfoDispose(void * obj)191 virResctrlInfoDispose(void *obj)
192 {
193     size_t i = 0;
194     size_t j = 0;
195 
196     virResctrlInfo *resctrl = obj;
197 
198     for (i = 0; i < resctrl->nlevels; i++) {
199         virResctrlInfoPerLevel *level = resctrl->levels[i];
200 
201         if (!level)
202             continue;
203 
204         if (level->types) {
205             for (j = 0; j < VIR_CACHE_TYPE_LAST; j++)
206                 g_free(level->types[j]);
207         }
208         g_free(level->types);
209         g_free(level);
210     }
211 
212     if (resctrl->monitor_info)
213         g_strfreev(resctrl->monitor_info->features);
214 
215     g_free(resctrl->membw_info);
216     g_free(resctrl->levels);
217     g_free(resctrl->monitor_info);
218 }
219 
220 
221 void
virResctrlInfoMonFree(virResctrlInfoMon * mon)222 virResctrlInfoMonFree(virResctrlInfoMon *mon)
223 {
224     if (!mon)
225         return;
226 
227     g_strfreev(mon->features);
228     g_free(mon);
229 }
230 
231 
232 /* virResctrlAlloc and virResctrlMonitor */
233 
234 /*
235  * virResctrlAlloc and virResctrlMonitor are representing a resource control
236  * group (in XML under cputune/cachetune and consequently a directory under
237  * /sys/fs/resctrl). virResctrlAlloc is the data structure for resource
238  * allocation, while the virResctrlMonitor represents the resource monitoring
239  * part.
240  *
241  * virResctrlAlloc represents one allocation. Since it can have multiple
242  * parts of multiple caches allocated it is represented as bunch of nested
243  * sparse arrays (by sparse I mean array of pointers so that each might be NULL
244  * in case there is no allocation for that particular cache allocation (level,
245  * cache, ...) or memory allocation for particular node).
246  *
247  * Allocation corresponding to root directory, /sys/fs/sysctrl/, defines the
248  * default resource allocating policy, which is created immediately after
249  * mounting, and owns all the tasks and cpus in the system. Cache or memory
250  * bandwidth resource will be shared for tasks in this allocation.
251  *
252  * =====Cache allocation technology (CAT)=====
253  *
254  * Since one allocation can be made for caches on different levels, the first
255  * nested sparse array is of types virResctrlAllocPerLevel.  For example if you
256  * have allocation for level 3 cache, there will be three NULL pointers and then
257  * allocated pointer to virResctrlAllocPerLevel.  That way you can access it by
258  * `alloc[level]` as O(1) is desired instead of crawling through normal arrays
259  * or lists in three nested loops.  The code uses a lot of direct accesses.
260  *
261  * Each virResctrlAllocPerLevel can have allocations for different cache
262  * allocation types.  You can allocate instruction cache (VIR_CACHE_TYPE_CODE),
263  * data cache (VIR_CACHE_TYPE_DATA) or unified cache (VIR_CACHE_TYPE_BOTH).
264  * Those allocations are kept in sparse array of virResctrlAllocPerType pointers.
265  *
266  * For each virResctrlAllocPerType users can request some size of the cache to
267  * be allocated.  That's what the sparse array `sizes` is for.  Non-NULL
268  * pointers represent requested size allocations.  The array is indexed by host
269  * cache id (gotten from `/sys/devices/system/cpu/cpuX/cache/indexY/id`).  Users
270  * can see this information e.g. in the output of `virsh capabilities` (for that
271  * information there's the other struct, namely `virResctrlInfo`).
272  *
273  * When allocation is being created we need to find unused part of the cache for
274  * all of them.  While doing that we store the bitmask in a sparse array of
275  * virBitmaps named `masks` indexed the same way as `sizes`.  The upper bounds
276  * of the sparse arrays are stored in nmasks or nsizes, respectively.
277  *
278  * =====Memory Bandwidth allocation technology (MBA)=====
279  *
280  * The memory bandwidth allocation support in virResctrlAlloc works in the
281  * same fashion as CAT. However, memory bandwidth controller doesn't have a
282  * hierarchy organization as cache, each node have one memory bandwidth
283  * controller to memory bandwidth distribution. The number of memory bandwidth
284  * controller is identical with number of last level cache. So MBA also employs
285  * a sparse array to represent whether a memory bandwidth allocation happens
286  * on corresponding node. The available memory controller number is collected
287  * in 'virResctrlInfo'.
288  *
289  * =====Cache monitoring technology (CMT)=====
290  *
291  * Cache monitoring technology is used to perceive how many cache the process
292  * is using actually. virResctrlMonitor represents the resource control
293  * monitoring group, it is supported to monitor resource utilization
294  * information on granularity of vcpu.
295  *
296  * From a hardware perspective, cache monitoring technology (CMT), memory
297  * bandwidth technology (MBM), as well as the CAT and MBA, are all orthogonal
298  * features. The monitor will be created under the scope of default resctrl
299  * group if no specific CAT or MBA entries are provided for the guest."
300  */
301 struct _virResctrlAllocPerType {
302     /* There could be bool saying whether this is set or not, but since everything
303      * in virResctrlAlloc (and most of libvirt) goes with pointer arrays we would
304      * have to have one more level of allocation anyway, so this stays faithful to
305      * the concept */
306     unsigned long long **sizes;
307     size_t nsizes;
308 
309     /* Mask for each cache */
310     virBitmap **masks;
311     size_t nmasks;
312 };
313 
314 struct _virResctrlAllocPerLevel {
315     virResctrlAllocPerType **types; /* Indexed with enum virCacheType */
316     /* There is no `ntypes` member variable as it is always allocated for
317      * VIR_CACHE_TYPE_LAST number of items */
318 };
319 
320 /*
321  * virResctrlAllocMemBW represents one memory bandwidth allocation.
322  * Since it can have several last level caches in a NUMA system, it is
323  * also represented as a nested sparse arrays as virRestrlAllocPerLevel.
324  */
325 struct _virResctrlAllocMemBW {
326     unsigned int **bandwidths;
327     size_t nbandwidths;
328 };
329 
330 struct _virResctrlAlloc {
331     virObject parent;
332 
333     virResctrlAllocPerLevel **levels;
334     size_t nlevels;
335 
336     virResctrlAllocMemBW *mem_bw;
337 
338     /* The identifier (any unique string for now) */
339     char *id;
340     /* libvirt-generated path in /sys/fs/resctrl for this particular
341      * allocation */
342     char *path;
343 };
344 
345 /*
346  * virResctrlMonitor is the data structure for resctrl monitor. Resctrl
347  * monitor represents a resctrl monitoring group, which can be used to
348  * monitor the resource utilization information for either cache or
349  * memory bandwidth.
350  */
351 struct _virResctrlMonitor {
352     virObject parent;
353 
354     /* Each virResctrlMonitor is associated with one specific allocation,
355      * either the root directory allocation under /sys/fs/resctrl or a
356      * specific allocation defined under the root directory.
357      * This pointer points to the allocation this monitor is associated with.
358      */
359     virResctrlAlloc *alloc;
360     /* The monitor identifier. For a monitor has the same @path name as its
361      * @alloc, the @id will be set to the same value as it is in @alloc->id.
362      */
363     char *id;
364     /* libvirt-generated path in /sys/fs/resctrl for this particular
365      * monitor */
366     char *path;
367 };
368 
369 
370 static void
virResctrlAllocDispose(void * obj)371 virResctrlAllocDispose(void *obj)
372 {
373     size_t i = 0;
374     size_t j = 0;
375     size_t k = 0;
376 
377     virResctrlAlloc *alloc = obj;
378 
379     for (i = 0; i < alloc->nlevels; i++) {
380         virResctrlAllocPerLevel *level = alloc->levels[i];
381 
382         if (!level)
383             continue;
384 
385         for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
386             virResctrlAllocPerType *type = level->types[j];
387 
388             if (!type)
389                 continue;
390 
391             for (k = 0; k < type->nsizes; k++)
392                 g_free(type->sizes[k]);
393 
394             for (k = 0; k < type->nmasks; k++)
395                 virBitmapFree(type->masks[k]);
396 
397             g_free(type->sizes);
398             g_free(type->masks);
399             g_free(type);
400         }
401         g_free(level->types);
402         g_free(level);
403     }
404 
405     if (alloc->mem_bw) {
406         virResctrlAllocMemBW *mem_bw = alloc->mem_bw;
407         for (i = 0; i < mem_bw->nbandwidths; i++)
408             g_free(mem_bw->bandwidths[i]);
409         g_free(alloc->mem_bw->bandwidths);
410         g_free(alloc->mem_bw);
411     }
412 
413     g_free(alloc->id);
414     g_free(alloc->path);
415     g_free(alloc->levels);
416 }
417 
418 
419 static void
virResctrlMonitorDispose(void * obj)420 virResctrlMonitorDispose(void *obj)
421 {
422     virResctrlMonitor *monitor = obj;
423 
424     virObjectUnref(monitor->alloc);
425     g_free(monitor->id);
426     g_free(monitor->path);
427 }
428 
429 
430 /* Global initialization for classes */
431 static int
virResctrlOnceInit(void)432 virResctrlOnceInit(void)
433 {
434     if (!VIR_CLASS_NEW(virResctrlInfo, virClassForObject()))
435         return -1;
436 
437     if (!VIR_CLASS_NEW(virResctrlAlloc, virClassForObject()))
438         return -1;
439 
440     if (!VIR_CLASS_NEW(virResctrlMonitor, virClassForObject()))
441         return -1;
442 
443     return 0;
444 }
445 
446 VIR_ONCE_GLOBAL_INIT(virResctrl);
447 
448 
449 /* Common functions */
450 #ifndef WIN32
451 
452 static int
virResctrlLock(void)453 virResctrlLock(void)
454 {
455     int fd = open(SYSFS_RESCTRL_PATH, O_RDONLY | O_CLOEXEC);
456 
457     if (fd < 0) {
458         virReportSystemError(errno, "%s", _("Cannot open resctrl"));
459         return -1;
460     }
461 
462     if (flock(fd, LOCK_EX) < 0) {
463         virReportSystemError(errno, "%s", _("Cannot lock resctrl"));
464         VIR_FORCE_CLOSE(fd);
465         return -1;
466     }
467 
468     return fd;
469 }
470 
471 
472 static int
virResctrlUnlock(int fd)473 virResctrlUnlock(int fd)
474 {
475     if (fd == -1)
476         return 0;
477 
478     /* The lock gets unlocked by closing the fd, which we need to do anyway in
479      * order to clean up properly */
480     if (VIR_CLOSE(fd) < 0) {
481         virReportSystemError(errno, "%s", _("Cannot close resctrl"));
482 
483         /* Trying to save the already broken */
484         if (flock(fd, LOCK_UN) < 0)
485             virReportSystemError(errno, "%s", _("Cannot unlock resctrl"));
486 
487         return -1;
488     }
489 
490     return 0;
491 }
492 
493 #else /* WIN32 */
494 
495 static int
virResctrlLock(void)496 virResctrlLock(void)
497 {
498     virReportSystemError(ENOSYS, "%s",
499                          _("resctrl locking is not supported "
500                            "on this platform"));
501     return -1;
502 }
503 
504 
505 static int
virResctrlUnlock(int fd G_GNUC_UNUSED)506 virResctrlUnlock(int fd G_GNUC_UNUSED)
507 {
508     virReportSystemError(ENOSYS, "%s",
509                          _("resctrl locking is not supported "
510                            "on this platform"));
511     return -1;
512 }
513 
514 #endif /* WIN32 */
515 
516 
517 /* virResctrlInfo-related definitions */
518 static int
virResctrlGetCacheInfo(virResctrlInfo * resctrl,DIR * dirp)519 virResctrlGetCacheInfo(virResctrlInfo *resctrl,
520                        DIR *dirp)
521 {
522     int rv = -1;
523     struct dirent *ent = NULL;
524 
525     while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH "/info")) > 0) {
526         g_autofree char *cbm_mask_str = NULL;
527         g_autoptr(virBitmap) cbm_mask_map = NULL;
528         char *endptr = NULL;
529         int type = 0;
530         unsigned int level = 0;
531         virResctrlInfoPerLevel *i_level = NULL;
532         g_autofree virResctrlInfoPerType *i_type = NULL;
533 
534         VIR_DEBUG("Parsing info type '%s'", ent->d_name);
535         if (ent->d_name[0] != 'L')
536             continue;
537 
538         if (virStrToLong_uip(ent->d_name + 1, &endptr, 10, &level) < 0) {
539             VIR_DEBUG("Cannot parse resctrl cache info level '%s'", ent->d_name + 1);
540             continue;
541         }
542 
543         type = virResctrlTypeFromString(endptr);
544         if (type < 0) {
545             VIR_DEBUG("Ignoring resctrl cache info with suffix '%s'", endptr);
546             continue;
547         }
548 
549         i_type = g_new0(virResctrlInfoPerType, 1);
550         i_type->control.scope = type;
551 
552         rv = virFileReadValueUint(&i_type->control.max_allocation,
553                                   SYSFS_RESCTRL_PATH "/info/%s/num_closids",
554                                   ent->d_name);
555         if (rv == -2) {
556             /* The file doesn't exist, so it's unusable for us,
557              *  but we can scan further */
558             VIR_WARN("The path '" SYSFS_RESCTRL_PATH "/info/%s/num_closids' "
559                      "does not exist",
560                      ent->d_name);
561         } else if (rv < 0) {
562             /* Other failures are fatal, so just quit */
563             return -1;
564         }
565 
566         rv = virFileReadValueString(&cbm_mask_str,
567                                     SYSFS_RESCTRL_PATH
568                                     "/info/%s/cbm_mask",
569                                     ent->d_name);
570         if (rv == -2) {
571             /* If the previous file exists, so should this one.  Hence -2 is
572              * fatal in this case as well (errors out in next condition) - the
573              * kernel interface might've changed too much or something else is
574              * wrong. */
575             virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
576                            _("Cannot get cbm_mask from resctrl cache info"));
577         }
578         if (rv < 0)
579             return -1;
580 
581         virStringTrimOptionalNewline(cbm_mask_str);
582 
583         if (!(cbm_mask_map = virBitmapNewString(cbm_mask_str))) {
584             virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
585                            _("Cannot parse cbm_mask from resctrl cache info"));
586             return -1;
587         }
588 
589         i_type->bits = virBitmapCountBits(cbm_mask_map);
590 
591         rv = virFileReadValueUint(&i_type->min_cbm_bits,
592                                   SYSFS_RESCTRL_PATH "/info/%s/min_cbm_bits",
593                                   ent->d_name);
594         if (rv == -2)
595             virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
596                            _("Cannot get min_cbm_bits from resctrl cache info"));
597         if (rv < 0)
598             return -1;
599 
600         if (resctrl->nlevels <= level)
601             VIR_EXPAND_N(resctrl->levels, resctrl->nlevels,
602                          level - resctrl->nlevels + 1);
603 
604         if (!resctrl->levels[level]) {
605             virResctrlInfoPerType **types = NULL;
606 
607             types = g_new0(virResctrlInfoPerType *, VIR_CACHE_TYPE_LAST);
608 
609             resctrl->levels[level] = g_new0(virResctrlInfoPerLevel, 1);
610             resctrl->levels[level]->types = types;
611         }
612 
613         i_level = resctrl->levels[level];
614 
615         if (i_level->types[type]) {
616             virReportError(VIR_ERR_INTERNAL_ERROR,
617                            _("Duplicate cache type in resctrl for level %u"),
618                            level);
619             return -1;
620         }
621 
622         i_level->types[type] = g_steal_pointer(&i_type);
623     }
624 
625     return 0;
626 }
627 
628 
629 static int
virResctrlGetMemoryBandwidthInfo(virResctrlInfo * resctrl)630 virResctrlGetMemoryBandwidthInfo(virResctrlInfo *resctrl)
631 {
632     int rv = -1;
633     g_autofree virResctrlInfoMemBW *i_membw = NULL;
634 
635     /* query memory bandwidth allocation info */
636     i_membw = g_new0(virResctrlInfoMemBW, 1);
637     rv = virFileReadValueUint(&i_membw->bandwidth_granularity,
638                               SYSFS_RESCTRL_PATH "/info/MB/bandwidth_gran");
639     if (rv == -2) {
640         /* The file doesn't exist, so it's unusable for us,
641          * probably memory bandwidth allocation unsupported */
642         VIR_INFO("The path '" SYSFS_RESCTRL_PATH "/info/MB/bandwidth_gran'"
643                  "does not exist");
644         return 0;
645     } else if (rv < 0) {
646         /* Other failures are fatal, so just quit */
647         return -1;
648     }
649 
650     rv = virFileReadValueUint(&i_membw->min_bandwidth,
651                               SYSFS_RESCTRL_PATH "/info/MB/min_bandwidth");
652     if (rv == -2) {
653         /* If the previous file exists, so should this one. Hence -2 is
654          * fatal in this case (errors out in next condition) - the kernel
655          * interface might've changed too much or something else is wrong. */
656         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
657                        _("Cannot get min bandwidth from resctrl memory info"));
658     }
659     if (rv < 0)
660         return -1;
661 
662     rv = virFileReadValueUint(&i_membw->max_allocation,
663                               SYSFS_RESCTRL_PATH "/info/MB/num_closids");
664     if (rv == -2) {
665         /* Similar reasoning to min_bandwidth above. */
666         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
667                        _("Cannot get max allocation from resctrl memory info"));
668     }
669     if (rv < 0)
670         return -1;
671 
672     resctrl->membw_info = g_steal_pointer(&i_membw);
673     return 0;
674 }
675 
676 
677 /*
678  * Retrieve monitor capability from the resource control file system.
679  *
680  * The monitor capability is exposed through "SYSFS_RESCTRL_PATH/info/L3_MON"
681  * directory under the resource control file system. The monitor capability is
682  * parsed by reading the interface files and stored in the structure
683  * 'virResctrlInfoMongrp'.
684  *
685  * Not all host supports the resource monitor, leave the pointer
686  * @resctrl->monitor_info empty if not supported.
687  */
688 static int
virResctrlGetMonitorInfo(virResctrlInfo * resctrl)689 virResctrlGetMonitorInfo(virResctrlInfo *resctrl)
690 {
691     int rv = -1;
692     g_autofree char *featurestr = NULL;
693     g_autofree virResctrlInfoMongrp *info_monitor = NULL;
694 
695     info_monitor = g_new0(virResctrlInfoMongrp, 1);
696 
697     /* For now, monitor only exists in level 3 cache */
698     info_monitor->cache_level = 3;
699 
700     rv = virFileReadValueUint(&info_monitor->max_monitor,
701                               SYSFS_RESCTRL_PATH "/info/L3_MON/num_rmids");
702     if (rv == -2) {
703         /* The file doesn't exist, so it's unusable for us, probably resource
704          * monitor unsupported */
705         VIR_INFO("The file '" SYSFS_RESCTRL_PATH "/info/L3_MON/num_rmids' "
706                  "does not exist");
707         return 0;
708     } else if (rv < 0) {
709         /* Other failures are fatal, so just quit */
710         return -1;
711     }
712 
713     rv = virFileReadValueUint(&info_monitor->cache_reuse_threshold,
714                               SYSFS_RESCTRL_PATH
715                               "/info/L3_MON/max_threshold_occupancy");
716     if (rv == -2) {
717         /* If CMT is not supported, then 'max_threshold_occupancy' file
718          * will not exist. */
719         VIR_DEBUG("File '" SYSFS_RESCTRL_PATH
720                   "/info/L3_MON/max_threshold_occupancy' does not exist");
721     } else if (rv < 0) {
722         return -1;
723     }
724 
725     rv = virFileReadValueString(&featurestr,
726                                 SYSFS_RESCTRL_PATH
727                                 "/info/L3_MON/mon_features");
728     if (rv == -2)
729         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
730                        _("Cannot get mon_features from resctrl"));
731     if (rv < 0)
732         return -1;
733 
734     if (!*featurestr) {
735         /* If no feature found in "/info/L3_MON/mon_features",
736          * some error happens */
737         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
738                        _("Got empty feature list from resctrl"));
739         return -1;
740     }
741 
742     info_monitor->features = g_strsplit(featurestr, "\n", 0);
743     info_monitor->nfeatures = g_strv_length(info_monitor->features);
744     VIR_DEBUG("Resctrl supported %zd monitoring features", info_monitor->nfeatures);
745 
746     resctrl->monitor_info = g_steal_pointer(&info_monitor);
747 
748     return 0;
749 }
750 
751 
752 static int
virResctrlGetInfo(virResctrlInfo * resctrl)753 virResctrlGetInfo(virResctrlInfo *resctrl)
754 {
755     g_autoptr(DIR) dirp = NULL;
756     int ret = -1;
757 
758     ret = virDirOpenIfExists(&dirp, SYSFS_RESCTRL_PATH "/info");
759     if (ret <= 0)
760         return ret;
761 
762     if ((ret = virResctrlGetMemoryBandwidthInfo(resctrl)) < 0)
763         return -1;
764 
765     if ((ret = virResctrlGetCacheInfo(resctrl, dirp)) < 0)
766         return -1;
767 
768     if ((ret = virResctrlGetMonitorInfo(resctrl)) < 0)
769         return -1;
770 
771     return 0;
772 }
773 
774 
775 virResctrlInfo *
virResctrlInfoNew(void)776 virResctrlInfoNew(void)
777 {
778     virResctrlInfo *ret = NULL;
779 
780     if (virResctrlInitialize() < 0)
781         return NULL;
782 
783     ret = virObjectNew(virResctrlInfoClass);
784     if (!ret)
785         return NULL;
786 
787     if (virResctrlGetInfo(ret) < 0) {
788         virObjectUnref(ret);
789         return NULL;
790     }
791 
792     return ret;
793 }
794 
795 
796 static bool
virResctrlInfoIsEmpty(virResctrlInfo * resctrl)797 virResctrlInfoIsEmpty(virResctrlInfo *resctrl)
798 {
799     size_t i = 0;
800     size_t j = 0;
801 
802     if (!resctrl)
803         return true;
804 
805     if (resctrl->membw_info)
806         return false;
807 
808     if (resctrl->monitor_info)
809         return false;
810 
811     for (i = 0; i < resctrl->nlevels; i++) {
812         virResctrlInfoPerLevel *i_level = resctrl->levels[i];
813 
814         if (!i_level)
815             continue;
816 
817         for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
818             if (i_level->types[j])
819                 return false;
820         }
821     }
822 
823     return true;
824 }
825 
826 
827 int
virResctrlInfoGetMemoryBandwidth(virResctrlInfo * resctrl,unsigned int level,virResctrlInfoMemBWPerNode * control)828 virResctrlInfoGetMemoryBandwidth(virResctrlInfo *resctrl,
829                                  unsigned int level,
830                                  virResctrlInfoMemBWPerNode *control)
831 {
832     virResctrlInfoMemBW *membw_info = resctrl->membw_info;
833 
834     if (!membw_info)
835         return 0;
836 
837     if (membw_info->last_level_cache != level)
838         return 0;
839 
840     control->granularity = membw_info->bandwidth_granularity;
841     control->min = membw_info->min_bandwidth;
842     control->max_allocation = membw_info->max_allocation;
843     return 1;
844 }
845 
846 
847 int
virResctrlInfoGetCache(virResctrlInfo * resctrl,unsigned int level,unsigned long long size,size_t * ncontrols,virResctrlInfoPerCache *** controls)848 virResctrlInfoGetCache(virResctrlInfo *resctrl,
849                        unsigned int level,
850                        unsigned long long size,
851                        size_t *ncontrols,
852                        virResctrlInfoPerCache ***controls)
853 {
854     virResctrlInfoPerLevel *i_level = NULL;
855     virResctrlInfoPerType *i_type = NULL;
856     size_t i = 0;
857 
858     if (virResctrlInfoIsEmpty(resctrl))
859         return 0;
860 
861     /* Let's take the opportunity to update the number of last level
862      * cache. This number of memory bandwidth controller is same with
863      * last level cache */
864     if (resctrl->membw_info) {
865         virResctrlInfoMemBW *membw_info = resctrl->membw_info;
866 
867         if (level > membw_info->last_level_cache) {
868             membw_info->last_level_cache = level;
869             membw_info->max_id = 0;
870         } else if (membw_info->last_level_cache == level) {
871             membw_info->max_id++;
872         }
873     }
874 
875     if (level >= resctrl->nlevels)
876         return 0;
877 
878     i_level = resctrl->levels[level];
879     if (!i_level)
880         return 0;
881 
882     for (i = 0; i < VIR_CACHE_TYPE_LAST; i++) {
883         i_type = i_level->types[i];
884         if (!i_type)
885             continue;
886 
887         /* Let's take the opportunity to update our internal information about
888          * the cache size */
889         if (!i_type->size) {
890             i_type->size = size;
891             i_type->control.granularity = size / i_type->bits;
892             if (i_type->min_cbm_bits != 1)
893                 i_type->control.min = i_type->min_cbm_bits * i_type->control.granularity;
894         } else {
895             if (i_type->size != size) {
896                 virReportError(VIR_ERR_INTERNAL_ERROR,
897                                _("level %u cache size %llu does not match "
898                                  "expected size %llu"),
899                                level, i_type->size, size);
900                 goto error;
901             }
902             i_type->max_cache_id++;
903         }
904 
905         VIR_EXPAND_N(*controls, *ncontrols, 1);
906         (*controls)[*ncontrols - 1] = g_new0(virResctrlInfoPerCache, 1);
907         memcpy((*controls)[*ncontrols - 1], &i_type->control, sizeof(i_type->control));
908     }
909 
910     return 0;
911  error:
912     while (*ncontrols)
913         VIR_FREE((*controls)[--*ncontrols]);
914     VIR_FREE(*controls);
915     return -1;
916 }
917 
918 
919 /* virResctrlInfoGetMonitorPrefix
920  *
921  * @resctrl: Pointer to virResctrlInfo
922  * @prefix: Monitor prefix name for monitor looking for.
923  * @monitor: Returns the capability information for target monitor if the
924  * monitor with @prefex is supported by host.
925  *
926  * Return monitor capability information for @prefix through @monitor.
927  * If monitor with @prefix is not supported in system, @monitor will be
928  * cleared to NULL.
929  *
930  * Returns 0 if @monitor is created or monitor type with @prefix is not
931  * supported by host, -1 on failure with error message set.
932  */
933 int
virResctrlInfoGetMonitorPrefix(virResctrlInfo * resctrl,const char * prefix,virResctrlInfoMon ** monitor)934 virResctrlInfoGetMonitorPrefix(virResctrlInfo *resctrl,
935                                const char *prefix,
936                                virResctrlInfoMon **monitor)
937 {
938     size_t i = 0;
939     virResctrlInfoMongrp *mongrp_info = NULL;
940     virResctrlInfoMon *mon = NULL;
941     int ret = -1;
942 
943     if (!prefix) {
944         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
945                        _("Empty prefix name for resctrl monitor"));
946         return -1;
947     }
948 
949     if (virResctrlInfoIsEmpty(resctrl))
950         return 0;
951 
952     mongrp_info = resctrl->monitor_info;
953 
954     if (!mongrp_info) {
955         VIR_INFO("Monitor is not supported in host");
956         return 0;
957     }
958 
959     for (i = 0; i < VIR_RESCTRL_MONITOR_TYPE_LAST; i++) {
960         if (STREQ(prefix, virResctrlMonitorPrefixTypeToString(i))) {
961             mon = g_new0(virResctrlInfoMon, 1);
962             mon->type = i;
963             break;
964         }
965     }
966 
967     if (!mon) {
968         virReportError(VIR_ERR_INTERNAL_ERROR,
969                        _("Bad prefix name '%s' for resctrl monitor"),
970                        prefix);
971         return -1;
972     }
973 
974     mon->max_monitor = mongrp_info->max_monitor;
975 
976     if (mon->type == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
977         mon->cache_reuse_threshold =  mongrp_info->cache_reuse_threshold;
978         mon->cache_level = mongrp_info->cache_level;
979     }
980 
981     mon->features = g_new0(char *, mongrp_info->nfeatures + 1);
982 
983     for (i = 0; i < mongrp_info->nfeatures; i++) {
984         if (STRPREFIX(mongrp_info->features[i], prefix))
985             mon->features[mon->nfeatures++] = g_strdup(mongrp_info->features[i]);
986     }
987 
988     mon->features = g_renew(char *, mon->features, mon->nfeatures + 1);
989 
990     ret = 0;
991 
992     /* In case *monitor is pointed to some monitor, clean it. */
993     virResctrlInfoMonFree(*monitor);
994 
995     if (mon->nfeatures == 0) {
996         /* No feature found for current monitor, means host does not support
997          * monitor type with @prefix name.
998          * Telling caller this monitor is supported by hardware specification,
999          * but not supported by this host. */
1000         VIR_INFO("No resctrl monitor features using prefix '%s' found", prefix);
1001         goto cleanup;
1002     }
1003 
1004     *monitor = g_steal_pointer(&mon);
1005  cleanup:
1006     virResctrlInfoMonFree(mon);
1007     return ret;
1008 }
1009 
1010 
1011 /* virResctrlAlloc-related definitions */
1012 virResctrlAlloc *
virResctrlAllocNew(void)1013 virResctrlAllocNew(void)
1014 {
1015     if (virResctrlInitialize() < 0)
1016         return NULL;
1017 
1018     return virObjectNew(virResctrlAllocClass);
1019 }
1020 
1021 
1022 bool
virResctrlAllocIsEmpty(virResctrlAlloc * alloc)1023 virResctrlAllocIsEmpty(virResctrlAlloc *alloc)
1024 {
1025     size_t i = 0;
1026     size_t j = 0;
1027     size_t k = 0;
1028 
1029     if (!alloc)
1030         return true;
1031 
1032     if (alloc->mem_bw)
1033         return false;
1034 
1035     for (i = 0; i < alloc->nlevels; i++) {
1036         virResctrlAllocPerLevel *a_level = alloc->levels[i];
1037 
1038         if (!a_level)
1039             continue;
1040 
1041         for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
1042             virResctrlAllocPerType *a_type = a_level->types[j];
1043 
1044             if (!a_type)
1045                 continue;
1046 
1047             for (k = 0; k < a_type->nsizes; k++) {
1048                 if (a_type->sizes[k])
1049                     return false;
1050             }
1051 
1052             for (k = 0; k < a_type->nmasks; k++) {
1053                 if (a_type->masks[k])
1054                     return false;
1055             }
1056         }
1057     }
1058 
1059     return true;
1060 }
1061 
1062 
1063 static virResctrlAllocPerType *
virResctrlAllocGetType(virResctrlAlloc * alloc,unsigned int level,virCacheType type)1064 virResctrlAllocGetType(virResctrlAlloc *alloc,
1065                        unsigned int level,
1066                        virCacheType type)
1067 {
1068     virResctrlAllocPerLevel *a_level = NULL;
1069 
1070     if (alloc->nlevels <= level)
1071         VIR_EXPAND_N(alloc->levels, alloc->nlevels, level - alloc->nlevels + 1);
1072 
1073     if (!alloc->levels[level]) {
1074         virResctrlAllocPerType **types = NULL;
1075 
1076         types = g_new0(virResctrlAllocPerType *, VIR_CACHE_TYPE_LAST);
1077 
1078         alloc->levels[level] = g_new0(virResctrlAllocPerLevel, 1);
1079         alloc->levels[level]->types = types;
1080     }
1081 
1082     a_level = alloc->levels[level];
1083 
1084     if (!a_level->types[type])
1085         a_level->types[type] = g_new0(virResctrlAllocPerType, 1);
1086 
1087     return a_level->types[type];
1088 }
1089 
1090 
1091 static int
virResctrlAllocUpdateMask(virResctrlAlloc * alloc,unsigned int level,virCacheType type,unsigned int cache,virBitmap * mask)1092 virResctrlAllocUpdateMask(virResctrlAlloc *alloc,
1093                           unsigned int level,
1094                           virCacheType type,
1095                           unsigned int cache,
1096                           virBitmap *mask)
1097 {
1098     virResctrlAllocPerType *a_type = virResctrlAllocGetType(alloc, level, type);
1099 
1100     if (!a_type)
1101         return -1;
1102 
1103     if (a_type->nmasks <= cache)
1104         VIR_EXPAND_N(a_type->masks, a_type->nmasks,
1105                      cache - a_type->nmasks + 1);
1106 
1107     if (a_type->masks[cache])
1108         virBitmapFree(a_type->masks[cache]);
1109 
1110     a_type->masks[cache] = virBitmapNewCopy(mask);
1111 
1112     return 0;
1113 }
1114 
1115 
1116 static int
virResctrlAllocUpdateSize(virResctrlAlloc * alloc,unsigned int level,virCacheType type,unsigned int cache,unsigned long long size)1117 virResctrlAllocUpdateSize(virResctrlAlloc *alloc,
1118                           unsigned int level,
1119                           virCacheType type,
1120                           unsigned int cache,
1121                           unsigned long long size)
1122 {
1123     virResctrlAllocPerType *a_type = virResctrlAllocGetType(alloc, level, type);
1124 
1125     if (!a_type)
1126         return -1;
1127 
1128     if (a_type->nsizes <= cache)
1129         VIR_EXPAND_N(a_type->sizes, a_type->nsizes,
1130                      cache - a_type->nsizes + 1);
1131 
1132     if (!a_type->sizes[cache])
1133         a_type->sizes[cache] = g_new0(unsigned long long, 1);
1134 
1135     *(a_type->sizes[cache]) = size;
1136 
1137     return 0;
1138 }
1139 
1140 
1141 /*
1142  * Check if there is an allocation for this level/type/cache already.  Called
1143  * before updating the structure.  VIR_CACHE_TYPE_BOTH collides with any type,
1144  * the other types collide with itself.  This code basically checks if either:
1145  * `alloc[level]->types[type]->sizes[cache]`
1146  * or
1147  * `alloc[level]->types[VIR_CACHE_TYPE_BOTH]->sizes[cache]`
1148  * is non-NULL.  All the fuzz around it is checking for NULL pointers along
1149  * the way.
1150  */
1151 static bool
virResctrlAllocCheckCollision(virResctrlAlloc * alloc,unsigned int level,virCacheType type,unsigned int cache)1152 virResctrlAllocCheckCollision(virResctrlAlloc *alloc,
1153                               unsigned int level,
1154                               virCacheType type,
1155                               unsigned int cache)
1156 {
1157     virResctrlAllocPerLevel *a_level = NULL;
1158     virResctrlAllocPerType *a_type = NULL;
1159 
1160     if (!alloc)
1161         return false;
1162 
1163     if (alloc->nlevels <= level)
1164         return false;
1165 
1166     a_level = alloc->levels[level];
1167 
1168     if (!a_level)
1169         return false;
1170 
1171     a_type = a_level->types[VIR_CACHE_TYPE_BOTH];
1172 
1173     /* If there is an allocation for type 'both', there can be no other
1174      * allocation for the same cache */
1175     if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
1176         return true;
1177 
1178     if (type == VIR_CACHE_TYPE_BOTH) {
1179         a_type = a_level->types[VIR_CACHE_TYPE_CODE];
1180 
1181         if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
1182             return true;
1183 
1184         a_type = a_level->types[VIR_CACHE_TYPE_DATA];
1185 
1186         if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
1187             return true;
1188     } else {
1189         a_type = a_level->types[type];
1190 
1191         if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
1192             return true;
1193     }
1194 
1195     return false;
1196 }
1197 
1198 
1199 int
virResctrlAllocSetCacheSize(virResctrlAlloc * alloc,unsigned int level,virCacheType type,unsigned int cache,unsigned long long size)1200 virResctrlAllocSetCacheSize(virResctrlAlloc *alloc,
1201                             unsigned int level,
1202                             virCacheType type,
1203                             unsigned int cache,
1204                             unsigned long long size)
1205 {
1206     if (virResctrlAllocCheckCollision(alloc, level, type, cache)) {
1207         virReportError(VIR_ERR_XML_ERROR,
1208                        _("Colliding cache allocations for cache "
1209                          "level '%u' id '%u', type '%s'"),
1210                        level, cache, virCacheTypeToString(type));
1211         return -1;
1212     }
1213 
1214     return virResctrlAllocUpdateSize(alloc, level, type, cache, size);
1215 }
1216 
1217 
1218 int
virResctrlAllocForeachCache(virResctrlAlloc * alloc,virResctrlAllocForeachCacheCallback cb,void * opaque)1219 virResctrlAllocForeachCache(virResctrlAlloc *alloc,
1220                             virResctrlAllocForeachCacheCallback cb,
1221                             void *opaque)
1222 {
1223     int ret = 0;
1224     unsigned int level = 0;
1225     unsigned int type = 0;
1226     unsigned int cache = 0;
1227 
1228     if (!alloc)
1229         return 0;
1230 
1231     for (level = 0; level < alloc->nlevels; level++) {
1232         virResctrlAllocPerLevel *a_level = alloc->levels[level];
1233 
1234         if (!a_level)
1235             continue;
1236 
1237         for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
1238             virResctrlAllocPerType *a_type = a_level->types[type];
1239 
1240             if (!a_type)
1241                 continue;
1242 
1243             for (cache = 0; cache < a_type->nsizes; cache++) {
1244                 unsigned long long *size = a_type->sizes[cache];
1245 
1246                 if (!size)
1247                     continue;
1248 
1249                 ret = cb(level, type, cache, *size, opaque);
1250                 if (ret < 0)
1251                     return ret;
1252             }
1253         }
1254     }
1255 
1256     return 0;
1257 }
1258 
1259 
1260 /* virResctrlAllocSetMemoryBandwidth
1261  * @alloc: Pointer to an active allocation
1262  * @id: node id of MBA to be set
1263  * @memory_bandwidth: new memory bandwidth value
1264  *
1265  * Set the @memory_bandwidth for the node @id entry in the @alloc.
1266  *
1267  * Returns 0 on success, -1 on failure with error message set.
1268  */
1269 int
virResctrlAllocSetMemoryBandwidth(virResctrlAlloc * alloc,unsigned int id,unsigned int memory_bandwidth)1270 virResctrlAllocSetMemoryBandwidth(virResctrlAlloc *alloc,
1271                                   unsigned int id,
1272                                   unsigned int memory_bandwidth)
1273 {
1274     virResctrlAllocMemBW *mem_bw = alloc->mem_bw;
1275 
1276     if (memory_bandwidth > 100) {
1277         virReportError(VIR_ERR_XML_ERROR, "%s",
1278                        _("Memory Bandwidth value exceeding 100 is invalid."));
1279         return -1;
1280     }
1281 
1282     if (!mem_bw) {
1283         mem_bw = g_new0(virResctrlAllocMemBW, 1);
1284         alloc->mem_bw = mem_bw;
1285     }
1286 
1287     if (mem_bw->nbandwidths <= id)
1288         VIR_EXPAND_N(mem_bw->bandwidths, mem_bw->nbandwidths,
1289                      id - mem_bw->nbandwidths + 1);
1290 
1291     if (mem_bw->bandwidths[id]) {
1292         virReportError(VIR_ERR_XML_ERROR,
1293                        _("Memory Bandwidth already defined for node %u"),
1294                        id);
1295         return -1;
1296     }
1297 
1298     mem_bw->bandwidths[id] = g_new0(unsigned int, 1);
1299     *(mem_bw->bandwidths[id]) = memory_bandwidth;
1300     return 0;
1301 }
1302 
1303 
1304 /* virResctrlAllocForeachMemory
1305  * @alloc: Pointer to an active allocation
1306  * @cb: Callback function
1307  * @opaque: Opaque data to be passed to @cb
1308  *
1309  * If available, traverse the defined memory bandwidth allocations and
1310  * call the @cb function.
1311  *
1312  * Returns 0 on success, -1 and immediate failure if the @cb has any failure.
1313  */
1314 int
virResctrlAllocForeachMemory(virResctrlAlloc * alloc,virResctrlAllocForeachMemoryCallback cb,void * opaque)1315 virResctrlAllocForeachMemory(virResctrlAlloc *alloc,
1316                              virResctrlAllocForeachMemoryCallback cb,
1317                              void *opaque)
1318 {
1319     size_t i = 0;
1320     virResctrlAllocMemBW *mem_bw;
1321 
1322     if (!alloc || !alloc->mem_bw)
1323         return 0;
1324 
1325     mem_bw = alloc->mem_bw;
1326     for (i = 0; i < mem_bw->nbandwidths; i++) {
1327         if (mem_bw->bandwidths[i]) {
1328             if (cb(i, *mem_bw->bandwidths[i], opaque) < 0)
1329                 return -1;
1330         }
1331     }
1332 
1333     return 0;
1334 }
1335 
1336 
1337 static int
virResctrlSetID(char ** resctrlid,const char * id)1338 virResctrlSetID(char **resctrlid,
1339                 const char *id)
1340 {
1341     if (!id) {
1342         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1343                        _("New resctrl 'id' cannot be NULL"));
1344         return -1;
1345     }
1346 
1347     if (*resctrlid) {
1348         virReportError(VIR_ERR_INTERNAL_ERROR,
1349                        _("Attempt to overwrite resctrlid='%s' with id='%s'"),
1350                        *resctrlid, id);
1351         return -1;
1352     }
1353 
1354     *resctrlid = g_strdup(id);
1355     return 0;
1356 }
1357 
1358 
1359 int
virResctrlAllocSetID(virResctrlAlloc * alloc,const char * id)1360 virResctrlAllocSetID(virResctrlAlloc *alloc,
1361                      const char *id)
1362 {
1363     return virResctrlSetID(&alloc->id, id);
1364 }
1365 
1366 
1367 const char *
virResctrlAllocGetID(virResctrlAlloc * alloc)1368 virResctrlAllocGetID(virResctrlAlloc *alloc)
1369 {
1370     return alloc->id;
1371 }
1372 
1373 
1374 /* Format the Memory Bandwidth Allocation line that will be found in
1375  * the schemata files. The line should be start with "MB:" and be
1376  * followed by "id=value" pairs separated by a semi-colon such as:
1377  *
1378  *     MB:0=100;1=100
1379  *
1380  * which indicates node id 0 has 100 percent bandwidth and node id 1
1381  * has 100 percent bandwidth. A trailing semi-colon is not formatted.
1382  */
1383 static int
virResctrlAllocMemoryBandwidthFormat(virResctrlAlloc * alloc,virBuffer * buf)1384 virResctrlAllocMemoryBandwidthFormat(virResctrlAlloc *alloc,
1385                                      virBuffer *buf)
1386 {
1387     size_t i;
1388 
1389     if (!alloc->mem_bw)
1390         return 0;
1391 
1392     virBufferAddLit(buf, "MB:");
1393 
1394     for (i = 0; i < alloc->mem_bw->nbandwidths; i++) {
1395         if (alloc->mem_bw->bandwidths[i]) {
1396             virBufferAsprintf(buf, "%zd=%u;", i,
1397                               *(alloc->mem_bw->bandwidths[i]));
1398         }
1399     }
1400 
1401     virBufferTrim(buf, ";");
1402     virBufferAddChar(buf, '\n');
1403     return 0;
1404 }
1405 
1406 
1407 static int
virResctrlAllocParseProcessMemoryBandwidth(virResctrlInfo * resctrl,virResctrlAlloc * alloc,char * mem_bw)1408 virResctrlAllocParseProcessMemoryBandwidth(virResctrlInfo *resctrl,
1409                                            virResctrlAlloc *alloc,
1410                                            char *mem_bw)
1411 {
1412     unsigned int bandwidth;
1413     unsigned int id;
1414     char *tmp = NULL;
1415 
1416     tmp = strchr(mem_bw, '=');
1417     if (!tmp)
1418         return 0;
1419     *tmp = '\0';
1420     tmp++;
1421 
1422     if (virStrToLong_uip(mem_bw, NULL, 10, &id) < 0) {
1423         virReportError(VIR_ERR_INTERNAL_ERROR,
1424                        _("Invalid node id %u "), id);
1425         return -1;
1426     }
1427     if (virStrToLong_uip(tmp, NULL, 10, &bandwidth) < 0) {
1428         virReportError(VIR_ERR_INTERNAL_ERROR,
1429                        _("Invalid bandwidth %u"), bandwidth);
1430         return -1;
1431     }
1432     if (bandwidth < resctrl->membw_info->min_bandwidth ||
1433         id > resctrl->membw_info->max_id) {
1434         virReportError(VIR_ERR_INTERNAL_ERROR,
1435                        _("Missing or inconsistent resctrl info for "
1436                          "memory bandwidth node '%u'"), id);
1437         return -1;
1438     }
1439     if (alloc->mem_bw->nbandwidths <= id) {
1440         VIR_EXPAND_N(alloc->mem_bw->bandwidths, alloc->mem_bw->nbandwidths,
1441                      id - alloc->mem_bw->nbandwidths + 1);
1442     }
1443     if (!alloc->mem_bw->bandwidths[id])
1444         alloc->mem_bw->bandwidths[id] = g_new0(unsigned int, 1);
1445 
1446     *(alloc->mem_bw->bandwidths[id]) = bandwidth;
1447     return 0;
1448 }
1449 
1450 
1451 /* Parse a schemata formatted MB: entry. Format details are described in
1452  * virResctrlAllocMemoryBandwidthFormat.
1453  */
1454 static int
virResctrlAllocParseMemoryBandwidthLine(virResctrlInfo * resctrl,virResctrlAlloc * alloc,char * line)1455 virResctrlAllocParseMemoryBandwidthLine(virResctrlInfo *resctrl,
1456                                         virResctrlAlloc *alloc,
1457                                         char *line)
1458 {
1459     g_auto(GStrv) mbs = NULL;
1460     GStrv next;
1461     char *tmp = NULL;
1462 
1463     /* For no reason there can be spaces */
1464     virSkipSpaces((const char **) &line);
1465 
1466     if (STRNEQLEN(line, "MB", 2))
1467         return 0;
1468 
1469     if (!resctrl || !resctrl->membw_info ||
1470         !resctrl->membw_info->min_bandwidth ||
1471         !resctrl->membw_info->bandwidth_granularity) {
1472         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1473                        _("Missing or inconsistent resctrl info for "
1474                          "memory bandwidth allocation"));
1475         return -1;
1476     }
1477 
1478     if (!alloc->mem_bw)
1479         alloc->mem_bw = g_new0(virResctrlAllocMemBW, 1);
1480 
1481     tmp = strchr(line, ':');
1482     if (!tmp)
1483         return 0;
1484     tmp++;
1485 
1486     mbs = g_strsplit(tmp, ";", 0);
1487     for (next = mbs; *next; next++) {
1488         if (virResctrlAllocParseProcessMemoryBandwidth(resctrl, alloc, *next) < 0)
1489             return -1;
1490     }
1491 
1492     return 0;
1493 }
1494 
1495 
1496 static int
virResctrlAllocFormatCache(virResctrlAlloc * alloc,virBuffer * buf)1497 virResctrlAllocFormatCache(virResctrlAlloc *alloc,
1498                            virBuffer *buf)
1499 {
1500     unsigned int level = 0;
1501     unsigned int type = 0;
1502     unsigned int cache = 0;
1503 
1504     for (level = 0; level < alloc->nlevels; level++) {
1505         virResctrlAllocPerLevel *a_level = alloc->levels[level];
1506 
1507         if (!a_level)
1508             continue;
1509 
1510         for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
1511             virResctrlAllocPerType *a_type = a_level->types[type];
1512 
1513             if (!a_type)
1514                 continue;
1515 
1516             virBufferAsprintf(buf, "L%u%s:", level, virResctrlTypeToString(type));
1517 
1518             for (cache = 0; cache < a_type->nmasks; cache++) {
1519                 virBitmap *mask = a_type->masks[cache];
1520                 char *mask_str = NULL;
1521 
1522                 if (!mask)
1523                     continue;
1524 
1525                 mask_str = virBitmapToString(mask);
1526                 if (!mask_str)
1527                     return -1;
1528 
1529                 virBufferAsprintf(buf, "%u=%s;", cache, mask_str);
1530                 VIR_FREE(mask_str);
1531             }
1532 
1533             virBufferTrim(buf, ";");
1534             virBufferAddChar(buf, '\n');
1535         }
1536     }
1537 
1538     return 0;
1539 }
1540 
1541 
1542 char *
virResctrlAllocFormat(virResctrlAlloc * alloc)1543 virResctrlAllocFormat(virResctrlAlloc *alloc)
1544 {
1545     g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
1546 
1547     if (!alloc)
1548         return NULL;
1549 
1550     if (virResctrlAllocFormatCache(alloc, &buf) < 0)
1551         return NULL;
1552 
1553     if (virResctrlAllocMemoryBandwidthFormat(alloc, &buf) < 0)
1554         return NULL;
1555 
1556     return virBufferContentAndReset(&buf);
1557 }
1558 
1559 
1560 static int
virResctrlAllocParseProcessCache(virResctrlInfo * resctrl,virResctrlAlloc * alloc,unsigned int level,virCacheType type,char * cache)1561 virResctrlAllocParseProcessCache(virResctrlInfo *resctrl,
1562                                  virResctrlAlloc *alloc,
1563                                  unsigned int level,
1564                                  virCacheType type,
1565                                  char *cache)
1566 {
1567     char *tmp = strchr(cache, '=');
1568     unsigned int cache_id = 0;
1569     g_autoptr(virBitmap) mask = NULL;
1570 
1571     if (!tmp)
1572         return 0;
1573 
1574     *tmp = '\0';
1575     tmp++;
1576 
1577     if (virStrToLong_uip(cache, NULL, 10, &cache_id) < 0) {
1578         virReportError(VIR_ERR_INTERNAL_ERROR,
1579                        _("Invalid cache id '%s'"), cache);
1580         return -1;
1581     }
1582 
1583     mask = virBitmapNewString(tmp);
1584     if (!mask)
1585         return -1;
1586 
1587     if (!resctrl ||
1588         level >= resctrl->nlevels ||
1589         !resctrl->levels[level] ||
1590         !resctrl->levels[level]->types[type]) {
1591         virReportError(VIR_ERR_INTERNAL_ERROR,
1592                        _("Missing or inconsistent resctrl info for "
1593                          "level '%u' type '%s'"),
1594                        level, virCacheTypeToString(type));
1595         return -1;
1596     }
1597 
1598     virBitmapShrink(mask, resctrl->levels[level]->types[type]->bits);
1599 
1600     if (virResctrlAllocUpdateMask(alloc, level, type, cache_id, mask) < 0)
1601         return -1;
1602 
1603     return 0;
1604 }
1605 
1606 
1607 static int
virResctrlAllocParseCacheLine(virResctrlInfo * resctrl,virResctrlAlloc * alloc,char * line)1608 virResctrlAllocParseCacheLine(virResctrlInfo *resctrl,
1609                               virResctrlAlloc *alloc,
1610                               char *line)
1611 {
1612     g_auto(GStrv) caches = NULL;
1613     GStrv next;
1614     char *tmp = NULL;
1615     unsigned int level = 0;
1616     int type = -1;
1617 
1618     /* For no reason there can be spaces */
1619     virSkipSpaces((const char **) &line);
1620 
1621     /* Skip lines that don't concern caches, e.g. MB: etc. */
1622     if (line[0] != 'L')
1623         return 0;
1624 
1625     /* And lines that we can't parse too */
1626     tmp = strchr(line, ':');
1627     if (!tmp)
1628         return 0;
1629 
1630     *tmp = '\0';
1631     tmp++;
1632 
1633     if (virStrToLong_uip(line + 1, &line, 10, &level) < 0) {
1634         virReportError(VIR_ERR_INTERNAL_ERROR,
1635                        _("Cannot parse resctrl schema level '%s'"),
1636                        line + 1);
1637         return -1;
1638     }
1639 
1640     type = virResctrlTypeFromString(line);
1641     if (type < 0) {
1642         virReportError(VIR_ERR_INTERNAL_ERROR,
1643                        _("Cannot parse resctrl schema level '%s'"),
1644                        line + 1);
1645         return -1;
1646     }
1647 
1648     caches = g_strsplit(tmp, ";", 0);
1649     if (!caches)
1650         return 0;
1651 
1652     for (next = caches; *next; next++) {
1653         if (virResctrlAllocParseProcessCache(resctrl, alloc, level, type, *next) < 0)
1654             return -1;
1655     }
1656 
1657     return 0;
1658 }
1659 
1660 
1661 static int
virResctrlAllocParse(virResctrlInfo * resctrl,virResctrlAlloc * alloc,const char * schemata)1662 virResctrlAllocParse(virResctrlInfo *resctrl,
1663                      virResctrlAlloc *alloc,
1664                      const char *schemata)
1665 {
1666     g_auto(GStrv) lines = NULL;
1667     GStrv next;
1668 
1669     lines = g_strsplit(schemata, "\n", 0);
1670     for (next = lines; *next; next++) {
1671         if (virResctrlAllocParseCacheLine(resctrl, alloc, *next) < 0)
1672             return -1;
1673         if (virResctrlAllocParseMemoryBandwidthLine(resctrl, alloc, *next) < 0)
1674             return -1;
1675     }
1676 
1677     return 0;
1678 }
1679 
1680 
1681 static int
virResctrlAllocGetGroup(virResctrlInfo * resctrl,const char * groupname,virResctrlAlloc ** alloc)1682 virResctrlAllocGetGroup(virResctrlInfo *resctrl,
1683                         const char *groupname,
1684                         virResctrlAlloc **alloc)
1685 {
1686     char *schemata = NULL;
1687     int rv = virFileReadValueString(&schemata,
1688                                     SYSFS_RESCTRL_PATH "/%s/schemata",
1689                                     groupname);
1690 
1691     *alloc = NULL;
1692 
1693     if (rv < 0)
1694         return rv;
1695 
1696     *alloc = virResctrlAllocNew();
1697     if (!*alloc)
1698         goto error;
1699 
1700     if (virResctrlAllocParse(resctrl, *alloc, schemata) < 0)
1701         goto error;
1702 
1703     VIR_FREE(schemata);
1704     return 0;
1705 
1706  error:
1707     VIR_FREE(schemata);
1708     virObjectUnref(*alloc);
1709     *alloc = NULL;
1710     return -1;
1711 }
1712 
1713 
1714 static virResctrlAlloc *
virResctrlAllocGetDefault(virResctrlInfo * resctrl)1715 virResctrlAllocGetDefault(virResctrlInfo *resctrl)
1716 {
1717     virResctrlAlloc *ret = NULL;
1718     int rv = virResctrlAllocGetGroup(resctrl, ".", &ret);
1719 
1720     if (rv == -2) {
1721         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1722                        _("Could not read schemata file for the default group"));
1723     }
1724 
1725     return ret;
1726 }
1727 
1728 
1729 static void
virResctrlAllocSubtractPerType(virResctrlAllocPerType * dst,virResctrlAllocPerType * src)1730 virResctrlAllocSubtractPerType(virResctrlAllocPerType *dst,
1731                                virResctrlAllocPerType *src)
1732 {
1733     size_t i = 0;
1734 
1735     if (!dst || !src)
1736         return;
1737 
1738     for (i = 0; i < dst->nmasks && i < src->nmasks; i++) {
1739         if (dst->masks[i] && src->masks[i])
1740             virBitmapSubtract(dst->masks[i], src->masks[i]);
1741     }
1742 }
1743 
1744 
1745 static void
virResctrlAllocSubtract(virResctrlAlloc * dst,virResctrlAlloc * src)1746 virResctrlAllocSubtract(virResctrlAlloc *dst,
1747                         virResctrlAlloc *src)
1748 {
1749     size_t i = 0;
1750     size_t j = 0;
1751 
1752     if (!src)
1753         return;
1754 
1755     for (i = 0; i < dst->nlevels && i < src->nlevels; i++) {
1756         if (dst->levels[i] && src->levels[i]) {
1757             for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
1758                 virResctrlAllocSubtractPerType(dst->levels[i]->types[j],
1759                                                src->levels[i]->types[j]);
1760             }
1761         }
1762     }
1763 }
1764 
1765 
1766 static virResctrlAlloc *
virResctrlAllocNewFromInfo(virResctrlInfo * info)1767 virResctrlAllocNewFromInfo(virResctrlInfo *info)
1768 {
1769     size_t i = 0;
1770     g_autoptr(virResctrlAlloc) ret = virResctrlAllocNew();
1771 
1772     if (!ret)
1773         return NULL;
1774 
1775     for (i = 0; i < info->nlevels; i++) {
1776         virResctrlInfoPerLevel *i_level = info->levels[i];
1777         size_t j = 0;
1778 
1779         if (!i_level)
1780             continue;
1781 
1782         for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
1783             virResctrlInfoPerType *i_type = i_level->types[j];
1784             g_autoptr(virBitmap) mask = NULL;
1785             size_t k = 0;
1786 
1787             if (!i_type)
1788                 continue;
1789 
1790             mask = virBitmapNew(i_type->bits);
1791             virBitmapSetAll(mask);
1792 
1793             for (k = 0; k <= i_type->max_cache_id; k++) {
1794                 if (virResctrlAllocUpdateMask(ret, i, j, k, mask) < 0)
1795                     return NULL;
1796             }
1797         }
1798     }
1799 
1800     /* set default free memory bandwidth to 100% */
1801     if (info->membw_info) {
1802         ret->mem_bw = g_new0(virResctrlAllocMemBW, 1);
1803 
1804         VIR_EXPAND_N(ret->mem_bw->bandwidths, ret->mem_bw->nbandwidths,
1805                      info->membw_info->max_id + 1);
1806 
1807         for (i = 0; i < ret->mem_bw->nbandwidths; i++) {
1808             ret->mem_bw->bandwidths[i] = g_new0(unsigned int, 1);
1809             *(ret->mem_bw->bandwidths[i]) = 100;
1810         }
1811     }
1812 
1813     return g_steal_pointer(&ret);
1814 }
1815 
1816 /*
1817  * This function creates an allocation that represents all unused parts of all
1818  * caches in the system.  It uses virResctrlInfo for creating a new full
1819  * allocation with all bits set (using virResctrlAllocNewFromInfo()) and then
1820  * scans for all allocations under /sys/fs/resctrl and subtracts each one of
1821  * them from it.  That way it can then return an allocation with only bit set
1822  * being those that are not mentioned in any other allocation.  It is used for
1823  * two things, a) calculating the masks when creating allocations and b) from
1824  * tests.
1825  *
1826  * MBA (Memory Bandwidth Allocation) is not taken into account as it is a
1827  * limiting setting, not an allocating one.  The way it works is also vastly
1828  * different from CAT.
1829  */
1830 virResctrlAlloc *
virResctrlAllocGetUnused(virResctrlInfo * resctrl)1831 virResctrlAllocGetUnused(virResctrlInfo *resctrl)
1832 {
1833     g_autoptr(virResctrlAlloc) ret = NULL;
1834     g_autoptr(virResctrlAlloc) alloc_default = NULL;
1835     struct dirent *ent = NULL;
1836     g_autoptr(DIR) dirp = NULL;
1837     int rv = -1;
1838 
1839     if (virResctrlInfoIsEmpty(resctrl)) {
1840         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
1841                        _("Resource control is not supported on this host"));
1842         return NULL;
1843     }
1844 
1845     ret = virResctrlAllocNewFromInfo(resctrl);
1846     if (!ret)
1847         return NULL;
1848 
1849     alloc_default = virResctrlAllocGetDefault(resctrl);
1850     if (!alloc_default)
1851         return NULL;
1852 
1853     virResctrlAllocSubtract(ret, alloc_default);
1854 
1855     if (virDirOpen(&dirp, SYSFS_RESCTRL_PATH) < 0)
1856         return NULL;
1857 
1858     while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH)) > 0) {
1859         g_autoptr(virResctrlAlloc) alloc = NULL;
1860 
1861         if (STREQ(ent->d_name, "info"))
1862             continue;
1863 
1864         rv = virResctrlAllocGetGroup(resctrl, ent->d_name, &alloc);
1865         if (rv == -2)
1866             continue;
1867 
1868         if (rv < 0) {
1869             virReportError(VIR_ERR_INTERNAL_ERROR,
1870                            _("Could not read schemata file for group %s"),
1871                            ent->d_name);
1872             return NULL;
1873         }
1874 
1875         virResctrlAllocSubtract(ret, alloc);
1876     }
1877     if (rv < 0)
1878         return NULL;
1879 
1880     return g_steal_pointer(&ret);
1881 }
1882 
1883 
1884 /*
1885  * Given the information about requested allocation type `a_type`, the host
1886  * cache for a particular type `i_type` and unused bits in the system `f_type`
1887  * this function tries to find the smallest free space in which the allocation
1888  * for cache id `cache` would fit.  We're looking for the smallest place in
1889  * order to minimize fragmentation and maximize the possibility of succeeding.
1890  *
1891  * Per-cache allocation for the @level, @type and @cache must already be
1892  * allocated for @alloc (does not have to exist though).
1893  */
1894 static int
virResctrlAllocFindUnused(virResctrlAlloc * alloc,virResctrlInfoPerType * i_type,virResctrlAllocPerType * f_type,unsigned int level,unsigned int type,unsigned int cache)1895 virResctrlAllocFindUnused(virResctrlAlloc *alloc,
1896                           virResctrlInfoPerType *i_type,
1897                           virResctrlAllocPerType *f_type,
1898                           unsigned int level,
1899                           unsigned int type,
1900                           unsigned int cache)
1901 {
1902     unsigned long long *size = alloc->levels[level]->types[type]->sizes[cache];
1903     g_autoptr(virBitmap) a_mask = NULL;
1904     virBitmap *f_mask = NULL;
1905     unsigned long long need_bits;
1906     size_t i = 0;
1907     ssize_t pos = -1;
1908     ssize_t last_bits = 0;
1909     ssize_t last_pos = -1;
1910 
1911     if (!size)
1912         return 0;
1913 
1914     if (cache >= f_type->nmasks) {
1915         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1916                        _("Cache with id %u does not exists for level %d"),
1917                        cache, level);
1918         return -1;
1919     }
1920 
1921     f_mask = f_type->masks[cache];
1922     if (!f_mask) {
1923         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1924                        _("Cache level %d id %u does not support tuning for "
1925                          "scope type '%s'"),
1926                        level, cache, virCacheTypeToString(type));
1927         return -1;
1928     }
1929 
1930     if (*size == i_type->size) {
1931         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1932                        _("Cache allocation for the whole cache is not "
1933                          "possible, specify size smaller than %llu"),
1934                        i_type->size);
1935         return -1;
1936     }
1937 
1938     need_bits = *size / i_type->control.granularity;
1939 
1940     if (*size % i_type->control.granularity) {
1941         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1942                        _("Cache allocation of size %llu is not "
1943                          "divisible by granularity %llu"),
1944                        *size, i_type->control.granularity);
1945         return -1;
1946     }
1947 
1948     if (need_bits < i_type->min_cbm_bits) {
1949         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1950                        _("Cache allocation of size %llu is smaller "
1951                          "than the minimum allowed allocation %llu"),
1952                        *size,
1953                        i_type->control.granularity * i_type->min_cbm_bits);
1954         return -1;
1955     }
1956 
1957     while ((pos = virBitmapNextSetBit(f_mask, pos)) >= 0) {
1958         ssize_t pos_clear = virBitmapNextClearBit(f_mask, pos);
1959         ssize_t bits;
1960 
1961         if (pos_clear < 0)
1962             pos_clear = virBitmapSize(f_mask);
1963 
1964         bits = pos_clear - pos;
1965 
1966         /* Not enough bits, move on and skip all of them */
1967         if (bits < need_bits) {
1968             pos = pos_clear;
1969             continue;
1970         }
1971 
1972         /* This fits perfectly */
1973         if (bits == need_bits) {
1974             last_pos = pos;
1975             break;
1976         }
1977 
1978         /* Remember the smaller region if we already found on before */
1979         if (last_pos < 0 || (last_bits && bits < last_bits)) {
1980             last_bits = bits;
1981             last_pos = pos;
1982         }
1983 
1984         pos = pos_clear;
1985     }
1986 
1987     if (last_pos < 0) {
1988         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1989                        _("Not enough room for allocation of "
1990                          "%llu bytes for level %u cache %u "
1991                          "scope type '%s'"),
1992                        *size, level, cache,
1993                        virCacheTypeToString(type));
1994         return -1;
1995     }
1996 
1997     a_mask = virBitmapNew(i_type->bits);
1998 
1999     for (i = last_pos; i < last_pos + need_bits; i++)
2000         ignore_value(virBitmapSetBit(a_mask, i));
2001 
2002     if (virResctrlAllocUpdateMask(alloc, level, type, cache, a_mask) < 0)
2003         return -1;
2004 
2005     return 0;
2006 }
2007 
2008 
2009 static int
virResctrlAllocMemoryBandwidth(virResctrlInfo * resctrl,virResctrlAlloc * alloc)2010 virResctrlAllocMemoryBandwidth(virResctrlInfo *resctrl,
2011                                virResctrlAlloc *alloc)
2012 {
2013     size_t i;
2014     virResctrlAllocMemBW *mem_bw_alloc = alloc->mem_bw;
2015     virResctrlInfoMemBW *mem_bw_info = resctrl->membw_info;
2016 
2017     if (!mem_bw_alloc)
2018         return 0;
2019 
2020     if (!mem_bw_info) {
2021         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
2022                        _("RDT Memory Bandwidth allocation unsupported"));
2023         return -1;
2024     }
2025 
2026     for (i = 0; i < mem_bw_alloc->nbandwidths; i++) {
2027         if (!mem_bw_alloc->bandwidths[i])
2028             continue;
2029 
2030         if (*(mem_bw_alloc->bandwidths[i]) % mem_bw_info->bandwidth_granularity) {
2031             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
2032                            _("Memory Bandwidth allocation of size "
2033                              "%u is not divisible by granularity %u"),
2034                            *(mem_bw_alloc->bandwidths[i]),
2035                            mem_bw_info->bandwidth_granularity);
2036             return -1;
2037         }
2038         if (*(mem_bw_alloc->bandwidths[i]) < mem_bw_info->min_bandwidth) {
2039             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
2040                            _("Memory Bandwidth allocation of size "
2041                              "%u is smaller than the minimum "
2042                              "allowed allocation %u"),
2043                            *(mem_bw_alloc->bandwidths[i]),
2044                            mem_bw_info->min_bandwidth);
2045             return -1;
2046         }
2047         if (i > mem_bw_info->max_id) {
2048             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
2049                            _("bandwidth controller id %zd does not "
2050                              "exist, max controller id %u"),
2051                            i, mem_bw_info->max_id);
2052             return -1;
2053         }
2054     }
2055     return 0;
2056 }
2057 
2058 
2059 static int
virResctrlAllocCopyMemBW(virResctrlAlloc * dst,virResctrlAlloc * src)2060 virResctrlAllocCopyMemBW(virResctrlAlloc *dst,
2061                          virResctrlAlloc *src)
2062 {
2063     size_t i = 0;
2064     virResctrlAllocMemBW *dst_bw = NULL;
2065     virResctrlAllocMemBW *src_bw = src->mem_bw;
2066 
2067     if (!src->mem_bw)
2068         return 0;
2069 
2070     if (!dst->mem_bw)
2071         dst->mem_bw = g_new0(virResctrlAllocMemBW, 1);
2072 
2073     dst_bw = dst->mem_bw;
2074 
2075     if (src_bw->nbandwidths > dst_bw->nbandwidths)
2076         VIR_EXPAND_N(dst_bw->bandwidths, dst_bw->nbandwidths,
2077                      src_bw->nbandwidths - dst_bw->nbandwidths);
2078 
2079     for (i = 0; i < src_bw->nbandwidths; i++) {
2080         if (dst_bw->bandwidths[i])
2081             continue;
2082         dst_bw->bandwidths[i] = g_new0(unsigned int, 1);
2083         *dst_bw->bandwidths[i] = *src_bw->bandwidths[i];
2084     }
2085 
2086     return 0;
2087 }
2088 
2089 
2090 static int
virResctrlAllocCopyMasks(virResctrlAlloc * dst,virResctrlAlloc * src)2091 virResctrlAllocCopyMasks(virResctrlAlloc *dst,
2092                          virResctrlAlloc *src)
2093 {
2094     unsigned int level = 0;
2095 
2096     for (level = 0; level < src->nlevels; level++) {
2097         virResctrlAllocPerLevel *s_level = src->levels[level];
2098         unsigned int type = 0;
2099 
2100         if (!s_level)
2101             continue;
2102 
2103         for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
2104             virResctrlAllocPerType *s_type = s_level->types[type];
2105             virResctrlAllocPerType *d_type = NULL;
2106             unsigned int cache = 0;
2107 
2108             if (!s_type)
2109                 continue;
2110 
2111             d_type = virResctrlAllocGetType(dst, level, type);
2112             if (!d_type)
2113                 return -1;
2114 
2115             for (cache = 0; cache < s_type->nmasks; cache++) {
2116                 virBitmap *mask = s_type->masks[cache];
2117 
2118                 if (mask && virResctrlAllocUpdateMask(dst, level, type, cache, mask) < 0)
2119                     return -1;
2120             }
2121         }
2122     }
2123 
2124     return 0;
2125 }
2126 
2127 
2128 /*
2129  * This function is called when creating an allocation in the system.
2130  * What it does is that it gets all the unused resources using
2131  * virResctrlAllocGetUnused and then tries to find a proper space for
2132  * every requested allocation effectively transforming `sizes` into `masks`.
2133  */
2134 static int
virResctrlAllocAssign(virResctrlInfo * resctrl,virResctrlAlloc * alloc)2135 virResctrlAllocAssign(virResctrlInfo *resctrl,
2136                       virResctrlAlloc *alloc)
2137 {
2138     unsigned int level = 0;
2139     g_autoptr(virResctrlAlloc) alloc_free = NULL;
2140     g_autoptr(virResctrlAlloc) alloc_default = NULL;
2141 
2142     alloc_free = virResctrlAllocGetUnused(resctrl);
2143     if (!alloc_free)
2144         return -1;
2145 
2146     alloc_default = virResctrlAllocGetDefault(resctrl);
2147     if (!alloc_default)
2148         return -1;
2149 
2150     if (virResctrlAllocMemoryBandwidth(resctrl, alloc) < 0)
2151         return -1;
2152 
2153     if (virResctrlAllocCopyMasks(alloc, alloc_default) < 0)
2154         return -1;
2155 
2156     if (virResctrlAllocCopyMemBW(alloc, alloc_default) < 0)
2157         return -1;
2158 
2159     for (level = 0; level < alloc->nlevels; level++) {
2160         virResctrlAllocPerLevel *a_level = alloc->levels[level];
2161         virResctrlAllocPerLevel *f_level = NULL;
2162         unsigned int type = 0;
2163 
2164         if (!a_level)
2165             continue;
2166 
2167         if (level < alloc_free->nlevels)
2168             f_level = alloc_free->levels[level];
2169 
2170         if (!f_level) {
2171             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
2172                            _("Cache level %d does not support tuning"),
2173                            level);
2174             return -1;
2175         }
2176 
2177         for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
2178             virResctrlAllocPerType *a_type = a_level->types[type];
2179             virResctrlAllocPerType *f_type = f_level->types[type];
2180             unsigned int cache = 0;
2181 
2182             if (!a_type)
2183                 continue;
2184 
2185             if (!f_type) {
2186                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
2187                                _("Cache level %d does not support tuning for "
2188                                  "scope type '%s'"),
2189                                level, virCacheTypeToString(type));
2190                 return -1;
2191             }
2192 
2193             for (cache = 0; cache < a_type->nsizes; cache++) {
2194                 virResctrlInfoPerLevel *i_level = resctrl->levels[level];
2195                 virResctrlInfoPerType *i_type = i_level->types[type];
2196 
2197                 if (virResctrlAllocFindUnused(alloc, i_type, f_type, level, type, cache) < 0)
2198                     return -1;
2199             }
2200         }
2201     }
2202 
2203     return 0;
2204 }
2205 
2206 
2207 static char *
virResctrlDeterminePath(const char * parentpath,const char * prefix,const char * id)2208 virResctrlDeterminePath(const char *parentpath,
2209                         const char *prefix,
2210                         const char *id)
2211 {
2212     if (!id) {
2213         virReportError(VIR_ERR_INTERNAL_ERROR,
2214                        _("Resctrl ID must be set before determining resctrl "
2215                          "parentpath='%s' prefix='%s'"), parentpath, prefix);
2216         return NULL;
2217     }
2218 
2219     return g_strdup_printf("%s/%s-%s", parentpath, prefix, id);
2220 }
2221 
2222 
2223 int
virResctrlAllocDeterminePath(virResctrlAlloc * alloc,const char * machinename)2224 virResctrlAllocDeterminePath(virResctrlAlloc *alloc,
2225                              const char *machinename)
2226 {
2227     if (alloc->path) {
2228         virReportError(VIR_ERR_INTERNAL_ERROR,
2229                        _("Resctrl allocation path is already set to '%s'"),
2230                        alloc->path);
2231         return -1;
2232     }
2233 
2234     /* If the allocation is empty, then the path will be SYSFS_RESCTRL_PATH */
2235     if (virResctrlAllocIsEmpty(alloc)) {
2236         alloc->path = g_strdup(SYSFS_RESCTRL_PATH);
2237 
2238         return 0;
2239     }
2240 
2241     alloc->path = virResctrlDeterminePath(SYSFS_RESCTRL_PATH,
2242                                           machinename, alloc->id);
2243 
2244     if (!alloc->path)
2245         return -1;
2246 
2247     return 0;
2248 }
2249 
2250 
2251 /* This function creates a resctrl directory in resource control file system,
2252  * and the directory path is specified by @path. */
2253 static int
virResctrlCreateGroupPath(const char * path)2254 virResctrlCreateGroupPath(const char *path)
2255 {
2256     /* Directory exists, return */
2257     if (virFileExists(path))
2258         return 0;
2259 
2260     if (g_mkdir_with_parents(path, 0777) < 0) {
2261         virReportSystemError(errno,
2262                              _("Cannot create resctrl directory '%s'"),
2263                              path);
2264         return -1;
2265     }
2266 
2267     return 0;
2268 }
2269 
2270 
2271 /* This checks if the directory for the alloc exists.  If not it tries to create
2272  * it and apply appropriate alloc settings. */
2273 int
virResctrlAllocCreate(virResctrlInfo * resctrl,virResctrlAlloc * alloc,const char * machinename)2274 virResctrlAllocCreate(virResctrlInfo *resctrl,
2275                       virResctrlAlloc *alloc,
2276                       const char *machinename)
2277 {
2278     g_autofree char *schemata_path = NULL;
2279     g_autofree char *alloc_str = NULL;
2280     int ret = -1;
2281     int lockfd = -1;
2282 
2283     if (!alloc)
2284         return 0;
2285 
2286     if (virResctrlInfoIsEmpty(resctrl)) {
2287         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
2288                        _("Resource control is not supported on this host"));
2289         return -1;
2290     }
2291 
2292     if (virResctrlAllocDeterminePath(alloc, machinename) < 0)
2293         return -1;
2294 
2295     /* If using the system/default path for the allocation, then we're done */
2296     if (STREQ(alloc->path, SYSFS_RESCTRL_PATH))
2297         return 0;
2298 
2299     lockfd = virResctrlLock();
2300     if (lockfd < 0)
2301         goto cleanup;
2302 
2303     if (virResctrlAllocAssign(resctrl, alloc) < 0)
2304         goto cleanup;
2305 
2306     if (virResctrlCreateGroupPath(alloc->path) < 0)
2307         goto cleanup;
2308 
2309     alloc_str = virResctrlAllocFormat(alloc);
2310     if (!alloc_str)
2311         goto cleanup;
2312 
2313     schemata_path = g_strdup_printf("%s/schemata", alloc->path);
2314 
2315     VIR_DEBUG("Writing resctrl schemata '%s' into '%s'", alloc_str, schemata_path);
2316     if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) {
2317         rmdir(alloc->path);
2318         virReportSystemError(errno,
2319                              _("Cannot write into schemata file '%s'"),
2320                              schemata_path);
2321         goto cleanup;
2322     }
2323 
2324     ret = 0;
2325  cleanup:
2326     virResctrlUnlock(lockfd);
2327     return ret;
2328 }
2329 
2330 
2331 static int
virResctrlAddPID(const char * path,pid_t pid)2332 virResctrlAddPID(const char *path,
2333                  pid_t pid)
2334 {
2335     g_autofree char *tasks = NULL;
2336     g_autofree char *pidstr = NULL;
2337 
2338     if (!path) {
2339         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2340                        _("Cannot add pid to non-existing resctrl group"));
2341         return -1;
2342     }
2343 
2344     tasks = g_strdup_printf("%s/tasks", path);
2345 
2346     pidstr = g_strdup_printf("%lld", (long long int)pid);
2347 
2348     if (virFileWriteStr(tasks, pidstr, 0) < 0) {
2349         virReportSystemError(errno,
2350                              _("Cannot write pid in tasks file '%s'"),
2351                              tasks);
2352         return -1;
2353     }
2354 
2355     return 0;
2356 }
2357 
2358 
2359 int
virResctrlAllocAddPID(virResctrlAlloc * alloc,pid_t pid)2360 virResctrlAllocAddPID(virResctrlAlloc *alloc,
2361                       pid_t pid)
2362 {
2363     /* If the allocation is empty, then it is impossible to add a PID to
2364      * allocation due to lacking of its 'tasks' file so just return */
2365     if (virResctrlAllocIsEmpty(alloc))
2366         return 0;
2367 
2368     return virResctrlAddPID(alloc->path, pid);
2369 }
2370 
2371 
2372 int
virResctrlAllocRemove(virResctrlAlloc * alloc)2373 virResctrlAllocRemove(virResctrlAlloc *alloc)
2374 {
2375     int ret = 0;
2376 
2377     if (!alloc->path)
2378         return 0;
2379 
2380     /* Do not destroy if path is the system/default path for the allocation */
2381     if (STREQ(alloc->path, SYSFS_RESCTRL_PATH))
2382         return 0;
2383 
2384     VIR_DEBUG("Removing resctrl allocation %s", alloc->path);
2385     if (rmdir(alloc->path) != 0 && errno != ENOENT) {
2386         ret = -errno;
2387         VIR_ERROR(_("Unable to remove %s (%d)"), alloc->path, errno);
2388     }
2389 
2390     return ret;
2391 }
2392 
2393 
2394 /* virResctrlMonitor-related definitions */
2395 
2396 virResctrlMonitor *
virResctrlMonitorNew(void)2397 virResctrlMonitorNew(void)
2398 {
2399     if (virResctrlInitialize() < 0)
2400         return NULL;
2401 
2402     return virObjectNew(virResctrlMonitorClass);
2403 }
2404 
2405 
2406 /*
2407  * virResctrlMonitorDeterminePath
2408  *
2409  * @monitor: Pointer to a resctrl monitor
2410  * @machinename: Name string of the VM
2411  *
2412  * Determines the directory path that the underlying resctrl group will be
2413  * created with.
2414  *
2415  * A monitor represents a directory under resource control file system,
2416  * its directory path could be the same path as @monitor->alloc, could be a
2417  * path of directory under 'mon_groups' of @monitor->alloc, or a path of
2418  * directory under '/sys/fs/resctrl/mon_groups' if @monitor->alloc is NULL.
2419  *
2420  * Returns 0 on success, -1 on error.
2421  */
2422 int
virResctrlMonitorDeterminePath(virResctrlMonitor * monitor,const char * machinename)2423 virResctrlMonitorDeterminePath(virResctrlMonitor *monitor,
2424                                const char *machinename)
2425 {
2426     g_autofree char *parentpath = NULL;
2427 
2428     if (!monitor) {
2429         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2430                        _("Invalid resctrl monitor"));
2431         return -1;
2432     }
2433 
2434     if (!monitor->alloc) {
2435         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2436                        _("Missing resctrl monitor alloc"));
2437         return -1;
2438     }
2439 
2440     if (monitor->path) {
2441         virReportError(VIR_ERR_INTERNAL_ERROR,
2442                        _("Resctrl monitor path is already set to '%s'"),
2443                        monitor->path);
2444         return -1;
2445     }
2446 
2447     if (!virResctrlAllocIsEmpty(monitor->alloc) &&
2448         STREQ_NULLABLE(monitor->id, monitor->alloc->id)) {
2449         monitor->path = g_strdup(monitor->alloc->path);
2450         return 0;
2451     }
2452 
2453     parentpath = g_strdup_printf("%s/mon_groups", monitor->alloc->path);
2454 
2455     monitor->path = virResctrlDeterminePath(parentpath, machinename,
2456                                             monitor->id);
2457     if (!monitor->path)
2458         return -1;
2459 
2460     return 0;
2461 }
2462 
2463 
2464 int
virResctrlMonitorAddPID(virResctrlMonitor * monitor,pid_t pid)2465 virResctrlMonitorAddPID(virResctrlMonitor *monitor,
2466                         pid_t pid)
2467 {
2468     return virResctrlAddPID(monitor->path, pid);
2469 }
2470 
2471 
2472 int
virResctrlMonitorCreate(virResctrlMonitor * monitor,const char * machinename)2473 virResctrlMonitorCreate(virResctrlMonitor *monitor,
2474                         const char *machinename)
2475 {
2476     int lockfd = -1;
2477     int ret = -1;
2478 
2479     if (!monitor)
2480         return 0;
2481 
2482     if (virResctrlMonitorDeterminePath(monitor, machinename) < 0)
2483         return -1;
2484 
2485     lockfd = virResctrlLock();
2486     if (lockfd < 0)
2487         return -1;
2488 
2489     ret = virResctrlCreateGroupPath(monitor->path);
2490 
2491     virResctrlUnlock(lockfd);
2492     return ret;
2493 }
2494 
2495 
2496 int
virResctrlMonitorSetID(virResctrlMonitor * monitor,const char * id)2497 virResctrlMonitorSetID(virResctrlMonitor *monitor,
2498                        const char *id)
2499 
2500 {
2501     return virResctrlSetID(&monitor->id, id);
2502 }
2503 
2504 
2505 const char *
virResctrlMonitorGetID(virResctrlMonitor * monitor)2506 virResctrlMonitorGetID(virResctrlMonitor *monitor)
2507 {
2508     return monitor->id;
2509 }
2510 
2511 
2512 void
virResctrlMonitorSetAlloc(virResctrlMonitor * monitor,virResctrlAlloc * alloc)2513 virResctrlMonitorSetAlloc(virResctrlMonitor *monitor,
2514                           virResctrlAlloc *alloc)
2515 {
2516     monitor->alloc = virObjectRef(alloc);
2517 }
2518 
2519 
2520 int
virResctrlMonitorRemove(virResctrlMonitor * monitor)2521 virResctrlMonitorRemove(virResctrlMonitor *monitor)
2522 {
2523     int ret = 0;
2524 
2525     if (!monitor->path)
2526         return 0;
2527 
2528     if (STREQ(monitor->path, monitor->alloc->path))
2529         return 0;
2530 
2531     VIR_DEBUG("Removing resctrl monitor path=%s", monitor->path);
2532     if (rmdir(monitor->path) != 0 && errno != ENOENT) {
2533         ret = -errno;
2534         VIR_ERROR(_("Unable to remove %s (%d)"), monitor->path, errno);
2535     }
2536 
2537     return ret;
2538 }
2539 
2540 
2541 static int
virResctrlMonitorStatsSorter(const void * a,const void * b)2542 virResctrlMonitorStatsSorter(const void *a,
2543                              const void *b)
2544 {
2545     return (*(virResctrlMonitorStats **)a)->id
2546         - (*(virResctrlMonitorStats **)b)->id;
2547 }
2548 
2549 
2550 /*
2551  * virResctrlMonitorGetStats
2552  *
2553  * @monitor: The monitor that the statistic data will be retrieved from.
2554  * @resources: A string list for the monitor feature names.
2555  * @stats: Pointer of of virResctrlMonitorStats * array for holding cache or
2556  * memory bandwidth usage data.
2557  * @nstats: A size_t pointer to hold the returned array length of @stats
2558  *
2559  * Get cache or memory bandwidth utilization information.
2560  *
2561  * Returns 0 on success, -1 on error.
2562  */
2563 int
virResctrlMonitorGetStats(virResctrlMonitor * monitor,const char ** resources,virResctrlMonitorStats *** stats,size_t * nstats)2564 virResctrlMonitorGetStats(virResctrlMonitor *monitor,
2565                           const char **resources,
2566                           virResctrlMonitorStats ***stats,
2567                           size_t *nstats)
2568 {
2569     int rv = -1;
2570     int ret = -1;
2571     size_t i = 0;
2572     unsigned long long val = 0;
2573     g_autoptr(DIR) dirp = NULL;
2574     g_autofree char *datapath = NULL;
2575     struct dirent *ent = NULL;
2576     virResctrlMonitorStats *stat = NULL;
2577     size_t nresources = g_strv_length((char **) resources);
2578 
2579     if (!monitor) {
2580         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
2581                        _("Invalid resctrl monitor"));
2582         return -1;
2583     }
2584 
2585     datapath = g_strdup_printf("%s/mon_data", monitor->path);
2586 
2587     if (virDirOpen(&dirp, datapath) < 0)
2588         goto cleanup;
2589 
2590     *nstats = 0;
2591     while (virDirRead(dirp, &ent, datapath) > 0) {
2592         g_autofree char *filepath = NULL;
2593         char *node_id = NULL;
2594 
2595         /* Looking for directory that contains resource utilization
2596          * information file. The directory name is arranged in format
2597          * "mon_<node_name>_<node_id>". For example, "mon_L3_00" and
2598          * "mon_L3_01" are two target directories for a two nodes system
2599          * with resource utilization data file for each node respectively.
2600          */
2601         filepath = g_strdup_printf("%s/%s", datapath, ent->d_name);
2602 
2603         if (!virFileIsDir(filepath))
2604             continue;
2605 
2606         /* Looking for directory has a prefix 'mon_L' */
2607         if (!(node_id = STRSKIP(ent->d_name, "mon_L")))
2608             continue;
2609 
2610         /* Looking for directory has another '_' */
2611         node_id = strchr(node_id, '_');
2612         if (!node_id)
2613             continue;
2614 
2615         /* Skip the character '_' */
2616         if (!(node_id = STRSKIP(node_id, "_")))
2617             continue;
2618 
2619         stat = g_new0(virResctrlMonitorStats, 1);
2620         stat->features = g_new0(char *, nresources + 1);
2621 
2622         /* The node ID number should be here, parsing it. */
2623         if (virStrToLong_uip(node_id, NULL, 0, &stat->id) < 0)
2624             goto cleanup;
2625 
2626         for (i = 0; resources[i]; i++) {
2627             rv = virFileReadValueUllong(&val, "%s/%s/%s", datapath,
2628                                         ent->d_name, resources[i]);
2629             if (rv == -2) {
2630                 virReportError(VIR_ERR_INTERNAL_ERROR,
2631                                _("File '%s/%s/%s' does not exist."),
2632                                datapath, ent->d_name, resources[i]);
2633             }
2634             if (rv < 0)
2635                 goto cleanup;
2636 
2637             VIR_APPEND_ELEMENT(stat->vals, stat->nvals, val);
2638 
2639             stat->features[i] = g_strdup(resources[i]);
2640         }
2641 
2642         VIR_APPEND_ELEMENT(*stats, *nstats, stat);
2643     }
2644 
2645     /* Sort in id's ascending order */
2646     if (*nstats)
2647         qsort(*stats, *nstats, sizeof(**stats), virResctrlMonitorStatsSorter);
2648 
2649     ret = 0;
2650  cleanup:
2651     virResctrlMonitorStatsFree(stat);
2652     return ret;
2653 }
2654 
2655 
2656 void
virResctrlMonitorStatsFree(virResctrlMonitorStats * stat)2657 virResctrlMonitorStatsFree(virResctrlMonitorStats *stat)
2658 {
2659     if (!stat)
2660         return;
2661 
2662     g_strfreev(stat->features);
2663     g_free(stat->vals);
2664     g_free(stat);
2665 }
2666