1 /*
2  * capabilities.c: hypervisor capabilities
3  *
4  * Copyright (C) 2006-2015 Red Hat, Inc.
5  * Copyright (C) 2006-2008 Daniel P. Berrange
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library.  If not, see
19  * <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include <unistd.h>
25 
26 #include "capabilities.h"
27 #include "cpu_conf.h"
28 #include "domain_conf.h"
29 #include "storage_conf.h"
30 #include "viralloc.h"
31 #include "virarch.h"
32 #include "virbuffer.h"
33 #include "virerror.h"
34 #include "virfile.h"
35 #include "virhostcpu.h"
36 #include "virhostmem.h"
37 #include "virlog.h"
38 #include "virnuma.h"
39 #include "virstring.h"
40 #include "virtypedparam.h"
41 #include "viruuid.h"
42 #include "virenum.h"
43 #include "virutil.h"
44 
45 #define VIR_FROM_THIS VIR_FROM_CAPABILITIES
46 
47 #define SYSFS_SYSTEM_PATH "/sys/devices/system"
48 
49 VIR_LOG_INIT("conf.capabilities");
50 
51 VIR_ENUM_DECL(virCapsHostPMTarget);
52 VIR_ENUM_IMPL(virCapsHostPMTarget,
53               VIR_NODE_SUSPEND_TARGET_LAST,
54               "suspend_mem", "suspend_disk", "suspend_hybrid",
55 );
56 
57 static virClass *virCapsClass;
58 static void virCapsDispose(void *obj);
59 
virCapabilitiesOnceInit(void)60 static int virCapabilitiesOnceInit(void)
61 {
62     if (!VIR_CLASS_NEW(virCaps, virClassForObject()))
63         return -1;
64 
65     return 0;
66 }
67 
68 VIR_ONCE_GLOBAL_INIT(virCapabilities);
69 
70 /**
71  * virCapabilitiesNew:
72  * @hostarch: host machine architecture
73  * @offlineMigrate: true if offline migration is available
74  * @liveMigrate: true if live migration is available
75  *
76  * Allocate a new capabilities object
77  */
78 virCaps *
virCapabilitiesNew(virArch hostarch,bool offlineMigrate,bool liveMigrate)79 virCapabilitiesNew(virArch hostarch,
80                    bool offlineMigrate,
81                    bool liveMigrate)
82 {
83     virCaps *caps;
84 
85     if (virCapabilitiesInitialize() < 0)
86         return NULL;
87 
88     if (!(caps = virObjectNew(virCapsClass)))
89         return NULL;
90 
91     caps->host.arch = hostarch;
92     caps->host.offlineMigrate = offlineMigrate;
93     caps->host.liveMigrate = liveMigrate;
94 
95     return caps;
96 }
97 
98 void
virCapabilitiesClearHostNUMACellCPUTopology(virCapsHostNUMACellCPU * cpus,size_t ncpus)99 virCapabilitiesClearHostNUMACellCPUTopology(virCapsHostNUMACellCPU *cpus,
100                                             size_t ncpus)
101 {
102     size_t i;
103 
104     if (!cpus)
105         return;
106 
107     for (i = 0; i < ncpus; i++) {
108         virBitmapFree(cpus[i].siblings);
109         cpus[i].siblings = NULL;
110     }
111 }
112 
113 static void
virCapabilitiesFreeHostNUMACell(virCapsHostNUMACell * cell)114 virCapabilitiesFreeHostNUMACell(virCapsHostNUMACell *cell)
115 {
116     if (cell == NULL)
117         return;
118 
119     virCapabilitiesClearHostNUMACellCPUTopology(cell->cpus, cell->ncpus);
120 
121     g_free(cell->cpus);
122     g_free(cell->distances);
123     g_free(cell->pageinfo);
124     if (cell->caches)
125         g_array_unref(cell->caches);
126     g_free(cell);
127 }
128 
129 static void
virCapabilitiesFreeGuestMachine(virCapsGuestMachine * machine)130 virCapabilitiesFreeGuestMachine(virCapsGuestMachine *machine)
131 {
132     if (machine == NULL)
133         return;
134     g_free(machine->name);
135     g_free(machine->canonical);
136     g_free(machine);
137 }
138 
139 static void
virCapabilitiesFreeGuestDomain(virCapsGuestDomain * dom)140 virCapabilitiesFreeGuestDomain(virCapsGuestDomain *dom)
141 {
142     size_t i;
143     if (dom == NULL)
144         return;
145 
146     g_free(dom->info.emulator);
147     g_free(dom->info.loader);
148     for (i = 0; i < dom->info.nmachines; i++)
149         virCapabilitiesFreeGuestMachine(dom->info.machines[i]);
150     g_free(dom->info.machines);
151 
152     g_free(dom);
153 }
154 
155 void
virCapabilitiesFreeGuest(virCapsGuest * guest)156 virCapabilitiesFreeGuest(virCapsGuest *guest)
157 {
158     size_t i;
159     if (guest == NULL)
160         return;
161 
162     g_free(guest->arch.defaultInfo.emulator);
163     g_free(guest->arch.defaultInfo.loader);
164     for (i = 0; i < guest->arch.defaultInfo.nmachines; i++)
165         virCapabilitiesFreeGuestMachine(guest->arch.defaultInfo.machines[i]);
166     g_free(guest->arch.defaultInfo.machines);
167 
168     for (i = 0; i < guest->arch.ndomains; i++)
169         virCapabilitiesFreeGuestDomain(guest->arch.domains[i]);
170     g_free(guest->arch.domains);
171 
172     g_free(guest);
173 }
174 
175 
176 static void
virCapabilitiesFreeStoragePool(virCapsStoragePool * pool)177 virCapabilitiesFreeStoragePool(virCapsStoragePool *pool)
178 {
179     if (!pool)
180         return;
181 
182     g_free(pool);
183 }
184 
185 
186 void
virCapabilitiesHostNUMAUnref(virCapsHostNUMA * caps)187 virCapabilitiesHostNUMAUnref(virCapsHostNUMA *caps)
188 {
189     if (!caps)
190         return;
191 
192     if (g_atomic_int_dec_and_test(&caps->refs)) {
193         g_ptr_array_unref(caps->cells);
194         if (caps->interconnects)
195             g_array_unref(caps->interconnects);
196         g_free(caps);
197     }
198 }
199 
200 void
virCapabilitiesHostNUMARef(virCapsHostNUMA * caps)201 virCapabilitiesHostNUMARef(virCapsHostNUMA *caps)
202 {
203     g_atomic_int_inc(&caps->refs);
204 }
205 
206 static void
virCapsHostMemBWNodeFree(virCapsHostMemBWNode * ptr)207 virCapsHostMemBWNodeFree(virCapsHostMemBWNode *ptr)
208 {
209     if (!ptr)
210         return;
211 
212     virBitmapFree(ptr->cpus);
213     g_free(ptr);
214 }
215 
216 static void
virCapabilitiesClearSecModel(virCapsHostSecModel * secmodel)217 virCapabilitiesClearSecModel(virCapsHostSecModel *secmodel)
218 {
219     size_t i;
220     for (i = 0; i < secmodel->nlabels; i++) {
221         VIR_FREE(secmodel->labels[i].type);
222         VIR_FREE(secmodel->labels[i].label);
223     }
224 
225     VIR_FREE(secmodel->labels);
226     VIR_FREE(secmodel->model);
227     VIR_FREE(secmodel->doi);
228 }
229 
230 static void
virCapsDispose(void * object)231 virCapsDispose(void *object)
232 {
233     virCaps *caps = object;
234     size_t i;
235 
236     for (i = 0; i < caps->npools; i++)
237         virCapabilitiesFreeStoragePool(caps->pools[i]);
238     g_free(caps->pools);
239 
240     for (i = 0; i < caps->nguests; i++)
241         virCapabilitiesFreeGuest(caps->guests[i]);
242     g_free(caps->guests);
243 
244     for (i = 0; i < caps->host.nfeatures; i++)
245         g_free(caps->host.features[i]);
246     g_free(caps->host.features);
247 
248     if (caps->host.numa)
249         virCapabilitiesHostNUMAUnref(caps->host.numa);
250 
251     for (i = 0; i < caps->host.nmigrateTrans; i++)
252         g_free(caps->host.migrateTrans[i]);
253     g_free(caps->host.migrateTrans);
254 
255     for (i = 0; i < caps->host.nsecModels; i++)
256         virCapabilitiesClearSecModel(&caps->host.secModels[i]);
257     g_free(caps->host.secModels);
258 
259     for (i = 0; i < caps->host.cache.nbanks; i++)
260         virCapsHostCacheBankFree(caps->host.cache.banks[i]);
261     virResctrlInfoMonFree(caps->host.cache.monitor);
262     g_free(caps->host.cache.banks);
263 
264     for (i = 0; i < caps->host.memBW.nnodes; i++)
265         virCapsHostMemBWNodeFree(caps->host.memBW.nodes[i]);
266     virResctrlInfoMonFree(caps->host.memBW.monitor);
267     g_free(caps->host.memBW.nodes);
268 
269     g_free(caps->host.netprefix);
270     g_free(caps->host.pagesSize);
271     virCPUDefFree(caps->host.cpu);
272     virObjectUnref(caps->host.resctrl);
273 }
274 
275 /**
276  * virCapabilitiesAddHostFeature:
277  * @caps: capabilities to extend
278  * @name: name of new feature
279  *
280  * Registers a new host CPU feature, eg 'pae', or 'vmx'
281  */
282 int
virCapabilitiesAddHostFeature(virCaps * caps,const char * name)283 virCapabilitiesAddHostFeature(virCaps *caps,
284                               const char *name)
285 {
286     VIR_RESIZE_N(caps->host.features, caps->host.nfeatures_max,
287                  caps->host.nfeatures, 1);
288     caps->host.features[caps->host.nfeatures] = g_strdup(name);
289     caps->host.nfeatures++;
290 
291     return 0;
292 }
293 
294 /**
295  * virCapabilitiesAddHostMigrateTransport:
296  * @caps: capabilities to extend
297  * @name: name of migration transport
298  *
299  * Registers a new domain migration transport URI
300  */
301 int
virCapabilitiesAddHostMigrateTransport(virCaps * caps,const char * name)302 virCapabilitiesAddHostMigrateTransport(virCaps *caps,
303                                        const char *name)
304 {
305     VIR_RESIZE_N(caps->host.migrateTrans, caps->host.nmigrateTrans_max,
306                  caps->host.nmigrateTrans, 1);
307     caps->host.migrateTrans[caps->host.nmigrateTrans] = g_strdup(name);
308     caps->host.nmigrateTrans++;
309 
310     return 0;
311 }
312 
313 /**
314  * virCapabilitiesSetNetPrefix:
315  * @caps: capabilities to extend
316  * @name: prefix for host generated network interfaces
317  *
318  * Registers the prefix that is used for generated network interfaces
319  */
320 int
virCapabilitiesSetNetPrefix(virCaps * caps,const char * prefix)321 virCapabilitiesSetNetPrefix(virCaps *caps,
322                             const char *prefix)
323 {
324     caps->host.netprefix = g_strdup(prefix);
325 
326     return 0;
327 }
328 
329 
330 /**
331  * virCapabilitiesHostNUMAAddCell:
332  * @caps: capabilities to extend
333  * @num: ID number of NUMA cell
334  * @mem: Total size of memory in the NUMA node (in KiB)
335  * @ncpus: number of CPUs in cell
336  * @cpus: array of CPU definition structures
337  * @ndistances: number of sibling NUMA nodes
338  * @distances: NUMA distances to other nodes
339  * @npageinfo: number of pages at node @num
340  * @pageinfo: info on each single memory page
341  * @caches: info on memory side caches
342  *
343  * Registers a new NUMA cell for a host, passing in a array of
344  * CPU IDs belonging to the cell, distances to other NUMA nodes
345  * and info on hugepages on the node.
346  *
347  * All pointers are stolen.
348  */
349 void
virCapabilitiesHostNUMAAddCell(virCapsHostNUMA * caps,int num,unsigned long long mem,int ncpus,virCapsHostNUMACellCPU ** cpus,int ndistances,virNumaDistance ** distances,int npageinfo,virCapsHostNUMACellPageInfo ** pageinfo,GArray ** caches)350 virCapabilitiesHostNUMAAddCell(virCapsHostNUMA *caps,
351                                int num,
352                                unsigned long long mem,
353                                int ncpus,
354                                virCapsHostNUMACellCPU **cpus,
355                                int ndistances,
356                                virNumaDistance **distances,
357                                int npageinfo,
358                                virCapsHostNUMACellPageInfo **pageinfo,
359                                GArray **caches)
360 {
361     virCapsHostNUMACell *cell = g_new0(virCapsHostNUMACell, 1);
362 
363     cell->num = num;
364     cell->mem = mem;
365     if (cpus) {
366         cell->ncpus = ncpus;
367         cell->cpus = g_steal_pointer(cpus);
368     }
369     if (distances) {
370         cell->ndistances = ndistances;
371         cell->distances = g_steal_pointer(distances);
372     }
373     if (pageinfo) {
374         cell->npageinfo = npageinfo;
375         cell->pageinfo = g_steal_pointer(pageinfo);
376     }
377     if (caches) {
378         cell->caches = g_steal_pointer(caches);
379     }
380 
381     g_ptr_array_add(caps->cells, cell);
382 }
383 
384 /**
385  * virCapabilitiesAllocMachines:
386  * @machines: machine variants for emulator ('pc', or 'isapc', etc)
387  * @nmachines: number of machine variants for emulator
388  *
389  * Allocate a table of virCapsGuestMachine *from the supplied table
390  * of machine names.
391  */
392 virCapsGuestMachine **
virCapabilitiesAllocMachines(const char * const * names,int nnames)393 virCapabilitiesAllocMachines(const char *const *names, int nnames)
394 {
395     virCapsGuestMachine **machines;
396     size_t i;
397 
398     machines = g_new0(virCapsGuestMachine *, nnames);
399 
400     for (i = 0; i < nnames; i++) {
401         machines[i] = g_new0(virCapsGuestMachine, 1);
402         machines[i]->name = g_strdup(names[i]);
403     }
404 
405     return machines;
406 }
407 
408 /**
409  * virCapabilitiesFreeMachines:
410  * @machines: table of vircapsGuestMachinePtr
411  *
412  * Free a table of virCapsGuestMachine *
413  */
414 void
virCapabilitiesFreeMachines(virCapsGuestMachine ** machines,int nmachines)415 virCapabilitiesFreeMachines(virCapsGuestMachine **machines,
416                             int nmachines)
417 {
418     size_t i;
419     if (!machines)
420         return;
421     for (i = 0; i < nmachines && machines[i]; i++) {
422         virCapabilitiesFreeGuestMachine(machines[i]);
423         machines[i] = NULL;
424     }
425     g_free(machines);
426 }
427 
428 /**
429  * virCapabilitiesAddGuest:
430  * @caps: capabilities to extend
431  * @ostype: guest operating system type, of enum VIR_DOMAIN_OSTYPE
432  * @arch: guest CPU architecture
433  * @wordsize: number of bits in CPU word
434  * @emulator: path to default device emulator for arch/ostype
435  * @loader: path to default BIOS loader for arch/ostype
436  * @nmachines: number of machine variants for emulator
437  * @machines: machine variants for emulator ('pc', or 'isapc', etc)
438  *
439  * Registers a new guest operating system. This should be
440  * followed by registration of at least one domain for
441  * running the guest
442  */
443 virCapsGuest *
virCapabilitiesAddGuest(virCaps * caps,int ostype,virArch arch,const char * emulator,const char * loader,int nmachines,virCapsGuestMachine ** machines)444 virCapabilitiesAddGuest(virCaps *caps,
445                         int ostype,
446                         virArch arch,
447                         const char *emulator,
448                         const char *loader,
449                         int nmachines,
450                         virCapsGuestMachine **machines)
451 {
452     virCapsGuest *guest;
453 
454     guest = g_new0(virCapsGuest, 1);
455 
456     guest->ostype = ostype;
457     guest->arch.id = arch;
458     guest->arch.wordsize = virArchGetWordSize(arch);
459 
460     guest->arch.defaultInfo.emulator = g_strdup(emulator);
461     guest->arch.defaultInfo.loader = g_strdup(loader);
462 
463     VIR_RESIZE_N(caps->guests, caps->nguests_max, caps->nguests, 1);
464     caps->guests[caps->nguests++] = guest;
465 
466     if (nmachines) {
467         guest->arch.defaultInfo.nmachines = nmachines;
468         guest->arch.defaultInfo.machines = machines;
469     }
470 
471     return guest;
472 }
473 
474 
475 /**
476  * virCapabilitiesAddGuestDomain:
477  * @guest: guest to support
478  * @hvtype: hypervisor type ('xen', 'qemu', 'kvm')
479  * @emulator: specialized device emulator for domain
480  * @loader: specialized BIOS loader for domain
481  * @nmachines: number of machine variants for emulator
482  * @machines: specialized machine variants for emulator
483  *
484  * Registers a virtual domain capable of running a
485  * guest operating system
486  */
487 virCapsGuestDomain *
virCapabilitiesAddGuestDomain(virCapsGuest * guest,int hvtype,const char * emulator,const char * loader,int nmachines,virCapsGuestMachine ** machines)488 virCapabilitiesAddGuestDomain(virCapsGuest *guest,
489                               int hvtype,
490                               const char *emulator,
491                               const char *loader,
492                               int nmachines,
493                               virCapsGuestMachine **machines)
494 {
495     virCapsGuestDomain *dom;
496 
497     dom = g_new0(virCapsGuestDomain, 1);
498 
499     dom->type = hvtype;
500     dom->info.emulator = g_strdup(emulator);
501     dom->info.loader = g_strdup(loader);
502 
503     VIR_RESIZE_N(guest->arch.domains, guest->arch.ndomains_max,
504                  guest->arch.ndomains, 1);
505     guest->arch.domains[guest->arch.ndomains] = dom;
506     guest->arch.ndomains++;
507 
508     if (nmachines) {
509         dom->info.nmachines = nmachines;
510         dom->info.machines = machines;
511     }
512 
513     return dom;
514 }
515 
516 
517 struct virCapsGuestFeatureInfo {
518     const char *name;
519     bool togglesRequired;
520 };
521 
522 static const struct virCapsGuestFeatureInfo virCapsGuestFeatureInfos[VIR_CAPS_GUEST_FEATURE_TYPE_LAST] = {
523     [VIR_CAPS_GUEST_FEATURE_TYPE_PAE] = { "pae", false },
524     [VIR_CAPS_GUEST_FEATURE_TYPE_NONPAE] = { "nonpae", false },
525     [VIR_CAPS_GUEST_FEATURE_TYPE_IA64_BE] = { "ia64_be", false },
526     [VIR_CAPS_GUEST_FEATURE_TYPE_ACPI] = { "acpi", true },
527     [VIR_CAPS_GUEST_FEATURE_TYPE_APIC] = { "apic", true },
528     [VIR_CAPS_GUEST_FEATURE_TYPE_CPUSELECTION] = { "cpuselection", false },
529     [VIR_CAPS_GUEST_FEATURE_TYPE_DEVICEBOOT] = { "deviceboot", false },
530     [VIR_CAPS_GUEST_FEATURE_TYPE_DISKSNAPSHOT] = { "disksnapshot", true },
531     [VIR_CAPS_GUEST_FEATURE_TYPE_HAP] = { "hap", true },
532 };
533 
534 
535 static void
virCapabilitiesAddGuestFeatureInternal(virCapsGuest * guest,virCapsGuestFeatureType feature,bool defaultOn,bool toggle)536 virCapabilitiesAddGuestFeatureInternal(virCapsGuest *guest,
537                                        virCapsGuestFeatureType feature,
538                                        bool defaultOn,
539                                        bool toggle)
540 {
541     guest->features[feature].present = true;
542 
543     if (virCapsGuestFeatureInfos[feature].togglesRequired) {
544         guest->features[feature].defaultOn = virTristateSwitchFromBool(defaultOn);
545         guest->features[feature].toggle = virTristateBoolFromBool(toggle);
546     }
547 }
548 
549 
550 /**
551  * virCapabilitiesAddGuestFeature:
552  * @guest: guest to associate feature with
553  * @feature: feature to add
554  *
555  * Registers a feature for a guest domain.
556  */
557 void
virCapabilitiesAddGuestFeature(virCapsGuest * guest,virCapsGuestFeatureType feature)558 virCapabilitiesAddGuestFeature(virCapsGuest *guest,
559                                virCapsGuestFeatureType feature)
560 {
561     virCapabilitiesAddGuestFeatureInternal(guest, feature, false, false);
562 }
563 
564 
565 /**
566  * virCapabilitiesAddGuestFeatureWithToggle:
567  * @guest: guest to associate feature with
568  * @feature: feature to add
569  * @defaultOn: true if it defaults to on
570  * @toggle: true if its state can be toggled
571  *
572  * Registers a feature with toggles for a guest domain.
573  */
574 void
virCapabilitiesAddGuestFeatureWithToggle(virCapsGuest * guest,virCapsGuestFeatureType feature,bool defaultOn,bool toggle)575 virCapabilitiesAddGuestFeatureWithToggle(virCapsGuest *guest,
576                                          virCapsGuestFeatureType feature,
577                                          bool defaultOn,
578                                          bool toggle)
579 {
580     virCapabilitiesAddGuestFeatureInternal(guest, feature, defaultOn, toggle);
581 }
582 
583 
584 /**
585  * virCapabilitiesHostSecModelAddBaseLabel
586  * @secmodel: Security model to add a base label for
587  * @type: virtualization type
588  * @label: base label
589  *
590  * Returns non-zero on error.
591  */
592 extern int
virCapabilitiesHostSecModelAddBaseLabel(virCapsHostSecModel * secmodel,const char * type,const char * label)593 virCapabilitiesHostSecModelAddBaseLabel(virCapsHostSecModel *secmodel,
594                                         const char *type,
595                                         const char *label)
596 {
597     if (type == NULL || label == NULL)
598         return -1;
599 
600     VIR_EXPAND_N(secmodel->labels, secmodel->nlabels, 1);
601     secmodel->labels[secmodel->nlabels - 1].type = g_strdup(type);
602     secmodel->labels[secmodel->nlabels - 1].label = g_strdup(label);
603 
604     return 0;
605 }
606 
607 
608 static virCapsDomainData *
virCapabilitiesDomainDataLookupInternal(virCaps * caps,int ostype,virArch arch,virDomainVirtType domaintype,const char * emulator,const char * machinetype)609 virCapabilitiesDomainDataLookupInternal(virCaps *caps,
610                                         int ostype,
611                                         virArch arch,
612                                         virDomainVirtType domaintype,
613                                         const char *emulator,
614                                         const char *machinetype)
615 {
616     virCapsGuest *foundguest = NULL;
617     virCapsGuestDomain *founddomain = NULL;
618     virCapsGuestMachine *foundmachine = NULL;
619     virCapsDomainData *ret = NULL;
620     size_t i, j, k;
621 
622     VIR_DEBUG("Lookup ostype=%d arch=%d domaintype=%d emulator=%s machine=%s",
623               ostype, arch, domaintype, NULLSTR(emulator), NULLSTR(machinetype));
624     for (i = 0; i < caps->nguests; i++) {
625         virCapsGuest *guest = caps->guests[i];
626 
627         if (ostype != -1 && guest->ostype != ostype) {
628             VIR_DEBUG("Skip os type want=%d vs got=%d", ostype, guest->ostype);
629             continue;
630         }
631         VIR_DEBUG("Match os type %d", ostype);
632 
633         if ((arch != VIR_ARCH_NONE) && (guest->arch.id != arch)) {
634             VIR_DEBUG("Skip arch want=%d vs got=%d", arch, guest->arch.id);
635             continue;
636         }
637         VIR_DEBUG("Match arch %d", arch);
638 
639         for (j = 0; j < guest->arch.ndomains; j++) {
640             virCapsGuestDomain *domain = guest->arch.domains[j];
641             virCapsGuestMachine **machinelist;
642             int nmachines;
643             const char *check_emulator = NULL;
644 
645             if (domaintype != VIR_DOMAIN_VIRT_NONE &&
646                 (domain->type != domaintype)) {
647                 VIR_DEBUG("Skip domain type want=%d vs got=%d", domaintype, domain->type);
648                 continue;
649             }
650             VIR_DEBUG("Match domain type %d", domaintype);
651 
652             check_emulator = domain->info.emulator;
653             if (!check_emulator)
654                 check_emulator = guest->arch.defaultInfo.emulator;
655             if (emulator && STRNEQ_NULLABLE(check_emulator, emulator)) {
656                 VIR_DEBUG("Skip emulator got=%s vs want=%s",
657                           emulator, NULLSTR(check_emulator));
658                 continue;
659             }
660             VIR_DEBUG("Match emulator %s", NULLSTR(emulator));
661 
662             if (domain->info.nmachines) {
663                 nmachines = domain->info.nmachines;
664                 machinelist = domain->info.machines;
665             } else {
666                 nmachines = guest->arch.defaultInfo.nmachines;
667                 machinelist = guest->arch.defaultInfo.machines;
668             }
669 
670             for (k = 0; k < nmachines; k++) {
671                 virCapsGuestMachine *machine = machinelist[k];
672 
673                 if (machinetype &&
674                     STRNEQ(machine->name, machinetype) &&
675                     STRNEQ_NULLABLE(machine->canonical, machinetype)) {
676                     VIR_DEBUG("Skip machine type want=%s vs got=%s got=%s",
677                               machinetype, machine->name, NULLSTR(machine->canonical));
678                     continue;
679                 }
680                 VIR_DEBUG("Match machine type machine %s", NULLSTR(machinetype));
681 
682                 foundmachine = machine;
683                 break;
684             }
685 
686             if (!foundmachine && nmachines)
687                 continue;
688 
689             founddomain = domain;
690             break;
691         }
692 
693         if (!founddomain)
694             continue;
695 
696         foundguest = guest;
697         break;
698     }
699 
700     /* XXX check default_emulator, see how it uses this */
701     if (!foundguest) {
702         g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
703         if (ostype)
704             virBufferAsprintf(&buf, "ostype=%s ",
705                               virDomainOSTypeToString(ostype));
706         if (arch)
707             virBufferAsprintf(&buf, "arch=%s ", virArchToString(arch));
708         if (domaintype > VIR_DOMAIN_VIRT_NONE)
709             virBufferAsprintf(&buf, "domaintype=%s ",
710                               virDomainVirtTypeToString(domaintype));
711         if (emulator)
712             virBufferEscapeString(&buf, "emulator=%s ", emulator);
713         if (machinetype)
714             virBufferEscapeString(&buf, "machine=%s ", machinetype);
715         if (virBufferCurrentContent(&buf) &&
716             !virBufferCurrentContent(&buf)[0])
717             virBufferAsprintf(&buf, "%s", _("any configuration"));
718 
719         virReportError(VIR_ERR_INVALID_ARG,
720                        _("could not find capabilities for %s"),
721                        virBufferCurrentContent(&buf));
722         return ret;
723     }
724 
725     ret = g_new0(virCapsDomainData, 1);
726 
727     ret->ostype = foundguest->ostype;
728     ret->arch = foundguest->arch.id;
729     if (founddomain) {
730         ret->domaintype = founddomain->type;
731         ret->emulator = founddomain->info.emulator;
732     }
733     if (!ret->emulator)
734         ret->emulator = foundguest->arch.defaultInfo.emulator;
735     if (foundmachine)
736         ret->machinetype = foundmachine->name;
737 
738     return ret;
739 }
740 
741 /**
742  * virCapabilitiesDomainDataLookup:
743  * @caps: capabilities to query
744  * @ostype: guest operating system type, of enum VIR_DOMAIN_OSTYPE
745  * @arch: Architecture to search for
746  * @domaintype: domain type to search for, of enum virDomainVirtType
747  * @emulator: Emulator path to search for
748  * @machinetype: Machine type to search for
749  *
750  * Search capabilities for the passed values, and if found return
751  * virCapabilitiesDomainDataLookup filled in with the default values
752  */
753 virCapsDomainData *
virCapabilitiesDomainDataLookup(virCaps * caps,int ostype,virArch arch,int domaintype,const char * emulator,const char * machinetype)754 virCapabilitiesDomainDataLookup(virCaps *caps,
755                                 int ostype,
756                                 virArch arch,
757                                 int domaintype,
758                                 const char *emulator,
759                                 const char *machinetype)
760 {
761     virCapsDomainData *ret;
762 
763     if (arch == VIR_ARCH_NONE) {
764         /* Prefer host arch if its available */
765         ret = virCapabilitiesDomainDataLookupInternal(caps, ostype,
766                                                       caps->host.arch,
767                                                       domaintype,
768                                                       emulator, machinetype);
769         if (ret)
770             return ret;
771     }
772 
773     return virCapabilitiesDomainDataLookupInternal(caps, ostype,
774                                                    arch, domaintype,
775                                                    emulator, machinetype);
776 }
777 
778 
779 bool
virCapabilitiesDomainSupported(virCaps * caps,int ostype,virArch arch,int virttype)780 virCapabilitiesDomainSupported(virCaps *caps,
781                                int ostype,
782                                virArch arch,
783                                int virttype)
784 {
785     g_autofree virCapsDomainData *capsdata = NULL;
786 
787     capsdata = virCapabilitiesDomainDataLookup(caps, ostype,
788                                                arch,
789                                                virttype,
790                                                NULL, NULL);
791 
792     return capsdata != NULL;
793 }
794 
795 
796 int
virCapabilitiesAddStoragePool(virCaps * caps,int poolType)797 virCapabilitiesAddStoragePool(virCaps *caps,
798                               int poolType)
799 {
800     virCapsStoragePool *pool;
801 
802     pool = g_new0(virCapsStoragePool, 1);
803 
804     pool->type = poolType;
805 
806     VIR_RESIZE_N(caps->pools, caps->npools_max, caps->npools, 1);
807     caps->pools[caps->npools++] = pool;
808 
809     return 0;
810 }
811 
812 
813 static int
virCapsHostNUMACellCPUFormat(virBuffer * buf,const virCapsHostNUMACellCPU * cpus,int ncpus)814 virCapsHostNUMACellCPUFormat(virBuffer *buf,
815                              const virCapsHostNUMACellCPU *cpus,
816                              int ncpus)
817 {
818     g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
819     g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
820     size_t j;
821 
822     virBufferAsprintf(&attrBuf, " num='%d'", ncpus);
823 
824     for (j = 0; j < ncpus; j++) {
825         virBufferAsprintf(&childBuf, "<cpu id='%d'", cpus[j].id);
826 
827         if (cpus[j].siblings) {
828             g_autofree char *siblings = NULL;
829 
830             if (!(siblings = virBitmapFormat(cpus[j].siblings)))
831                 return -1;
832 
833             virBufferAsprintf(&childBuf,
834                               " socket_id='%d' die_id='%d' core_id='%d' siblings='%s'",
835                               cpus[j].socket_id,
836                               cpus[j].die_id,
837                               cpus[j].core_id,
838                               siblings);
839         }
840         virBufferAddLit(&childBuf, "/>\n");
841     }
842 
843     virXMLFormatElement(buf, "cpus", &attrBuf, &childBuf);
844     return 0;
845 }
846 
847 
848 static int
virCapabilitiesHostNUMAFormat(virBuffer * buf,virCapsHostNUMA * caps)849 virCapabilitiesHostNUMAFormat(virBuffer *buf,
850                               virCapsHostNUMA *caps)
851 {
852     size_t i;
853 
854     if (!caps)
855         return 0;
856 
857     virBufferAddLit(buf, "<topology>\n");
858     virBufferAdjustIndent(buf, 2);
859     virBufferAsprintf(buf, "<cells num='%d'>\n", caps->cells->len);
860     virBufferAdjustIndent(buf, 2);
861     for (i = 0; i < caps->cells->len; i++) {
862         virCapsHostNUMACell *cell = g_ptr_array_index(caps->cells, i);
863         size_t j;
864 
865         virBufferAsprintf(buf, "<cell id='%d'>\n", cell->num);
866         virBufferAdjustIndent(buf, 2);
867 
868         /* Print out the numacell memory total if it is available */
869         if (cell->mem)
870             virBufferAsprintf(buf, "<memory unit='KiB'>%llu</memory>\n",
871                               cell->mem);
872 
873         for (j = 0; j < cell->npageinfo; j++) {
874             virBufferAsprintf(buf, "<pages unit='KiB' size='%u'>%llu</pages>\n",
875                               cell->pageinfo[j].size,
876                               cell->pageinfo[j].avail);
877         }
878 
879         virNumaDistanceFormat(buf, cell->distances, cell->ndistances);
880 
881         if (cell->caches) {
882             virNumaCache *caches = &g_array_index(cell->caches, virNumaCache, 0);
883             virNumaCacheFormat(buf, caches, cell->caches->len);
884         }
885 
886         if (virCapsHostNUMACellCPUFormat(buf, cell->cpus, cell->ncpus) < 0)
887             return -1;
888 
889         virBufferAdjustIndent(buf, -2);
890         virBufferAddLit(buf, "</cell>\n");
891     }
892     virBufferAdjustIndent(buf, -2);
893     virBufferAddLit(buf, "</cells>\n");
894 
895     if (caps->interconnects) {
896         const virNumaInterconnect *interconnects;
897         interconnects = &g_array_index(caps->interconnects, virNumaInterconnect, 0);
898         virNumaInterconnectFormat(buf, interconnects, caps->interconnects->len);
899     }
900 
901     virBufferAdjustIndent(buf, -2);
902     virBufferAddLit(buf, "</topology>\n");
903     return 0;
904 }
905 
906 
907 static int
virCapabilitiesFormatResctrlMonitor(virBuffer * buf,virResctrlInfoMon * monitor)908 virCapabilitiesFormatResctrlMonitor(virBuffer *buf,
909                                     virResctrlInfoMon *monitor)
910 {
911     size_t i = 0;
912     g_auto(virBuffer) childrenBuf = VIR_BUFFER_INIT_CHILD(buf);
913 
914     /* monitor not supported, no capability */
915     if (!monitor)
916         return 0;
917 
918     /* no feature found in monitor means no capability, return */
919     if (monitor->nfeatures == 0)
920         return 0;
921 
922     virBufferAddLit(buf, "<monitor ");
923 
924     /* CMT might not enabled, if enabled show related attributes. */
925     if (monitor->type == VIR_RESCTRL_MONITOR_TYPE_CACHE)
926         virBufferAsprintf(buf,
927                           "level='%u' reuseThreshold='%u' ",
928                           monitor->cache_level,
929                           monitor->cache_reuse_threshold);
930     virBufferAsprintf(buf,
931                       "maxMonitors='%u'>\n",
932                       monitor->max_monitor);
933 
934     for (i = 0; i < monitor->nfeatures; i++) {
935         virBufferAsprintf(&childrenBuf,
936                           "<feature name='%s'/>\n",
937                           monitor->features[i]);
938     }
939 
940     virBufferAddBuffer(buf, &childrenBuf);
941     virBufferAddLit(buf, "</monitor>\n");
942 
943     return 0;
944 }
945 
946 static int
virCapabilitiesFormatCaches(virBuffer * buf,virCapsHostCache * cache)947 virCapabilitiesFormatCaches(virBuffer *buf,
948                             virCapsHostCache *cache)
949 {
950     size_t i = 0;
951     size_t j = 0;
952 
953     if (!cache->nbanks && !cache->monitor)
954         return 0;
955 
956     virBufferAddLit(buf, "<cache>\n");
957     virBufferAdjustIndent(buf, 2);
958 
959     for (i = 0; i < cache->nbanks; i++) {
960         g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
961         g_auto(virBuffer) childrenBuf = VIR_BUFFER_INIT_CHILD(buf);
962         virCapsHostCacheBank *bank = cache->banks[i];
963         g_autofree char *cpus_str = virBitmapFormat(bank->cpus);
964         const char *unit = NULL;
965         unsigned long long short_size = virFormatIntPretty(bank->size, &unit);
966 
967         if (!cpus_str)
968             return -1;
969 
970         /*
971          * Let's just *hope* the size is aligned to KiBs so that it does not
972          * bite is back in the future
973          */
974         virBufferAsprintf(&attrBuf,
975                           " id='%u' level='%u' type='%s' "
976                           "size='%llu' unit='%s' cpus='%s'",
977                           bank->id, bank->level,
978                           virCacheTypeToString(bank->type),
979                           short_size, unit, cpus_str);
980 
981         for (j = 0; j < bank->ncontrols; j++) {
982             const char *min_unit;
983             virResctrlInfoPerCache *controls = bank->controls[j];
984             unsigned long long gran_short_size = controls->granularity;
985             unsigned long long min_short_size = controls->min;
986 
987             gran_short_size = virFormatIntPretty(gran_short_size, &unit);
988             min_short_size = virFormatIntPretty(min_short_size, &min_unit);
989 
990             /* Only use the smaller unit if they are different */
991             if (min_short_size) {
992                 unsigned long long gran_div;
993                 unsigned long long min_div;
994 
995                 gran_div = controls->granularity / gran_short_size;
996                 min_div = controls->min / min_short_size;
997 
998                 if (min_div > gran_div) {
999                     min_short_size *= min_div / gran_div;
1000                 } else if (min_div < gran_div) {
1001                     unit = min_unit;
1002                     gran_short_size *= gran_div / min_div;
1003                 }
1004             }
1005 
1006             virBufferAsprintf(&childrenBuf,
1007                               "<control granularity='%llu'",
1008                               gran_short_size);
1009 
1010             if (min_short_size)
1011                 virBufferAsprintf(&childrenBuf, " min='%llu'", min_short_size);
1012 
1013             virBufferAsprintf(&childrenBuf,
1014                               " unit='%s' type='%s' maxAllocs='%u'/>\n",
1015                               unit,
1016                               virCacheTypeToString(controls->scope),
1017                               controls->max_allocation);
1018         }
1019 
1020         virXMLFormatElement(buf, "bank", &attrBuf, &childrenBuf);
1021     }
1022 
1023     if (virCapabilitiesFormatResctrlMonitor(buf, cache->monitor) < 0)
1024         return -1;
1025 
1026     virBufferAdjustIndent(buf, -2);
1027     virBufferAddLit(buf, "</cache>\n");
1028 
1029     return 0;
1030 }
1031 
1032 static int
virCapabilitiesFormatMemoryBandwidth(virBuffer * buf,virCapsHostMemBW * memBW)1033 virCapabilitiesFormatMemoryBandwidth(virBuffer *buf,
1034                                      virCapsHostMemBW *memBW)
1035 {
1036     size_t i = 0;
1037 
1038     if (!memBW->nnodes && !memBW->monitor)
1039         return 0;
1040 
1041     virBufferAddLit(buf, "<memory_bandwidth>\n");
1042     virBufferAdjustIndent(buf, 2);
1043 
1044     for (i = 0; i < memBW->nnodes; i++) {
1045         g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
1046         g_auto(virBuffer) childrenBuf = VIR_BUFFER_INIT_CHILD(buf);
1047         virCapsHostMemBWNode *node = memBW->nodes[i];
1048         virResctrlInfoMemBWPerNode *control = &node->control;
1049         g_autofree char *cpus_str = virBitmapFormat(node->cpus);
1050 
1051         if (!cpus_str)
1052             return -1;
1053 
1054         virBufferAsprintf(&attrBuf,
1055                           " id='%u' cpus='%s'",
1056                           node->id, cpus_str);
1057 
1058         virBufferAsprintf(&childrenBuf,
1059                           "<control granularity='%u' min='%u' "
1060                           "maxAllocs='%u'/>\n",
1061                           control->granularity, control->min,
1062                           control->max_allocation);
1063 
1064         virXMLFormatElement(buf, "node", &attrBuf, &childrenBuf);
1065     }
1066 
1067     if (virCapabilitiesFormatResctrlMonitor(buf, memBW->monitor) < 0)
1068         return -1;
1069 
1070     virBufferAdjustIndent(buf, -2);
1071     virBufferAddLit(buf, "</memory_bandwidth>\n");
1072 
1073     return 0;
1074 }
1075 
1076 
1077 static int
virCapabilitiesFormatHostXML(virCapsHost * host,virBuffer * buf)1078 virCapabilitiesFormatHostXML(virCapsHost *host,
1079                              virBuffer *buf)
1080 {
1081     size_t i, j;
1082     char host_uuid[VIR_UUID_STRING_BUFLEN];
1083 
1084     /* The lack of some data means we have nothing
1085      * minimally to format, so just return. */
1086     if (!virUUIDIsValid(host->host_uuid) &&
1087         !host->arch && !host->powerMgmt && !host->iommu)
1088         return 0;
1089 
1090     virBufferAddLit(buf, "<host>\n");
1091     virBufferAdjustIndent(buf, 2);
1092     if (virUUIDIsValid(host->host_uuid)) {
1093         virUUIDFormat(host->host_uuid, host_uuid);
1094         virBufferAsprintf(buf, "<uuid>%s</uuid>\n", host_uuid);
1095     }
1096     virBufferAddLit(buf, "<cpu>\n");
1097     virBufferAdjustIndent(buf, 2);
1098 
1099     if (host->arch)
1100         virBufferAsprintf(buf, "<arch>%s</arch>\n",
1101                           virArchToString(host->arch));
1102     if (host->nfeatures) {
1103         virBufferAddLit(buf, "<features>\n");
1104         virBufferAdjustIndent(buf, 2);
1105         for (i = 0; i < host->nfeatures; i++) {
1106             virBufferAsprintf(buf, "<%s/>\n",
1107                               host->features[i]);
1108         }
1109         virBufferAdjustIndent(buf, -2);
1110         virBufferAddLit(buf, "</features>\n");
1111     }
1112     virCPUDefFormatBuf(buf, host->cpu);
1113 
1114     for (i = 0; i < host->nPagesSize; i++) {
1115         virBufferAsprintf(buf, "<pages unit='KiB' size='%u'/>\n",
1116                           host->pagesSize[i]);
1117     }
1118 
1119     virBufferAdjustIndent(buf, -2);
1120     virBufferAddLit(buf, "</cpu>\n");
1121 
1122     /* The PM query was successful. */
1123     if (host->powerMgmt) {
1124         /* The host supports some PM features. */
1125         unsigned int pm = host->powerMgmt;
1126         virBufferAddLit(buf, "<power_management>\n");
1127         virBufferAdjustIndent(buf, 2);
1128         while (pm) {
1129             int bit = __builtin_ffs(pm) - 1;
1130             virBufferAsprintf(buf, "<%s/>\n",
1131                               virCapsHostPMTargetTypeToString(bit));
1132             pm &= ~(1U << bit);
1133         }
1134         virBufferAdjustIndent(buf, -2);
1135         virBufferAddLit(buf, "</power_management>\n");
1136     } else {
1137         /* The host does not support any PM feature. */
1138         virBufferAddLit(buf, "<power_management/>\n");
1139     }
1140 
1141     virBufferAsprintf(buf, "<iommu support='%s'/>\n",
1142                       host->iommu  ? "yes" : "no");
1143 
1144     if (host->offlineMigrate) {
1145         virBufferAddLit(buf, "<migration_features>\n");
1146         virBufferAdjustIndent(buf, 2);
1147         if (host->liveMigrate)
1148             virBufferAddLit(buf, "<live/>\n");
1149         if (host->nmigrateTrans) {
1150             virBufferAddLit(buf, "<uri_transports>\n");
1151             virBufferAdjustIndent(buf, 2);
1152             for (i = 0; i < host->nmigrateTrans; i++) {
1153                 virBufferAsprintf(buf, "<uri_transport>%s</uri_transport>\n",
1154                                   host->migrateTrans[i]);
1155             }
1156             virBufferAdjustIndent(buf, -2);
1157             virBufferAddLit(buf, "</uri_transports>\n");
1158         }
1159         virBufferAdjustIndent(buf, -2);
1160         virBufferAddLit(buf, "</migration_features>\n");
1161     }
1162 
1163     if (host->netprefix)
1164         virBufferAsprintf(buf, "<netprefix>%s</netprefix>\n",
1165                           host->netprefix);
1166 
1167     if (virCapabilitiesHostNUMAFormat(buf, host->numa) < 0)
1168         return -1;
1169 
1170     if (virCapabilitiesFormatCaches(buf, &host->cache) < 0)
1171         return -1;
1172 
1173     if (virCapabilitiesFormatMemoryBandwidth(buf, &host->memBW) < 0)
1174         return -1;
1175 
1176     for (i = 0; i < host->nsecModels; i++) {
1177         virBufferAddLit(buf, "<secmodel>\n");
1178         virBufferAdjustIndent(buf, 2);
1179         virBufferAsprintf(buf, "<model>%s</model>\n",
1180                           host->secModels[i].model);
1181         virBufferAsprintf(buf, "<doi>%s</doi>\n",
1182                           host->secModels[i].doi);
1183         for (j = 0; j < host->secModels[i].nlabels; j++) {
1184             virBufferAsprintf(buf, "<baselabel type='%s'>%s</baselabel>\n",
1185                               host->secModels[i].labels[j].type,
1186                               host->secModels[i].labels[j].label);
1187         }
1188         virBufferAdjustIndent(buf, -2);
1189         virBufferAddLit(buf, "</secmodel>\n");
1190     }
1191 
1192     virBufferAdjustIndent(buf, -2);
1193     virBufferAddLit(buf, "</host>\n\n");
1194 
1195     return 0;
1196 }
1197 
1198 
1199 static void
virCapabilitiesFormatGuestFeatures(virCapsGuest * guest,virBuffer * buf)1200 virCapabilitiesFormatGuestFeatures(virCapsGuest *guest,
1201                                    virBuffer *buf)
1202 {
1203     g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
1204     size_t i;
1205 
1206     for (i = 0; i < VIR_CAPS_GUEST_FEATURE_TYPE_LAST; i++) {
1207         virCapsGuestFeature *feature = guest->features + i;
1208 
1209         if (!feature->present)
1210             continue;
1211 
1212         virBufferAsprintf(&childBuf, "<%s", virCapsGuestFeatureInfos[i].name);
1213 
1214         if (feature->defaultOn) {
1215             virBufferAsprintf(&childBuf, " default='%s'",
1216                               virTristateSwitchTypeToString(feature->defaultOn));
1217         }
1218 
1219         if (feature->toggle) {
1220             virBufferAsprintf(&childBuf, " toggle='%s'",
1221                               virTristateBoolTypeToString(feature->toggle));
1222         }
1223 
1224         virBufferAddLit(&childBuf, "/>\n");
1225     }
1226 
1227     virXMLFormatElement(buf, "features", NULL, &childBuf);
1228 }
1229 
1230 
1231 static void
virCapabilitiesFormatGuestXML(virCapsGuest ** guests,size_t nguests,virBuffer * buf)1232 virCapabilitiesFormatGuestXML(virCapsGuest **guests,
1233                               size_t nguests,
1234                               virBuffer *buf)
1235 {
1236     size_t i, j, k;
1237 
1238     for (i = 0; i < nguests; i++) {
1239         virBufferAddLit(buf, "<guest>\n");
1240         virBufferAdjustIndent(buf, 2);
1241         virBufferAsprintf(buf, "<os_type>%s</os_type>\n",
1242                           virDomainOSTypeToString(guests[i]->ostype));
1243         if (guests[i]->arch.id)
1244             virBufferAsprintf(buf, "<arch name='%s'>\n",
1245                               virArchToString(guests[i]->arch.id));
1246         virBufferAdjustIndent(buf, 2);
1247         virBufferAsprintf(buf, "<wordsize>%d</wordsize>\n",
1248                           guests[i]->arch.wordsize);
1249         if (guests[i]->arch.defaultInfo.emulator)
1250             virBufferAsprintf(buf, "<emulator>%s</emulator>\n",
1251                               guests[i]->arch.defaultInfo.emulator);
1252         if (guests[i]->arch.defaultInfo.loader)
1253             virBufferAsprintf(buf, "<loader>%s</loader>\n",
1254                               guests[i]->arch.defaultInfo.loader);
1255 
1256         for (j = 0; j < guests[i]->arch.defaultInfo.nmachines; j++) {
1257             virCapsGuestMachine *machine = guests[i]->arch.defaultInfo.machines[j];
1258             virBufferAddLit(buf, "<machine");
1259             if (machine->canonical)
1260                 virBufferAsprintf(buf, " canonical='%s'", machine->canonical);
1261             if (machine->maxCpus > 0)
1262                 virBufferAsprintf(buf, " maxCpus='%d'", machine->maxCpus);
1263             if (machine->deprecated)
1264                 virBufferAddLit(buf, " deprecated='yes'");
1265             virBufferAsprintf(buf, ">%s</machine>\n", machine->name);
1266         }
1267 
1268         for (j = 0; j < guests[i]->arch.ndomains; j++) {
1269             virBufferAsprintf(buf, "<domain type='%s'",
1270                 virDomainVirtTypeToString(guests[i]->arch.domains[j]->type));
1271             if (!guests[i]->arch.domains[j]->info.emulator &&
1272                 !guests[i]->arch.domains[j]->info.loader &&
1273                 !guests[i]->arch.domains[j]->info.nmachines) {
1274                 virBufferAddLit(buf, "/>\n");
1275                 continue;
1276             }
1277             virBufferAddLit(buf, ">\n");
1278             virBufferAdjustIndent(buf, 2);
1279             if (guests[i]->arch.domains[j]->info.emulator)
1280                 virBufferAsprintf(buf, "<emulator>%s</emulator>\n",
1281                                   guests[i]->arch.domains[j]->info.emulator);
1282             if (guests[i]->arch.domains[j]->info.loader)
1283                 virBufferAsprintf(buf, "<loader>%s</loader>\n",
1284                                   guests[i]->arch.domains[j]->info.loader);
1285 
1286             for (k = 0; k < guests[i]->arch.domains[j]->info.nmachines; k++) {
1287                 virCapsGuestMachine *machine = guests[i]->arch.domains[j]->info.machines[k];
1288                 virBufferAddLit(buf, "<machine");
1289                 if (machine->canonical)
1290                     virBufferAsprintf(buf, " canonical='%s'", machine->canonical);
1291                 if (machine->maxCpus > 0)
1292                     virBufferAsprintf(buf, " maxCpus='%d'", machine->maxCpus);
1293                 virBufferAsprintf(buf, ">%s</machine>\n", machine->name);
1294             }
1295             virBufferAdjustIndent(buf, -2);
1296             virBufferAddLit(buf, "</domain>\n");
1297         }
1298 
1299         virBufferAdjustIndent(buf, -2);
1300         virBufferAddLit(buf, "</arch>\n");
1301 
1302         virCapabilitiesFormatGuestFeatures(guests[i], buf);
1303 
1304         virBufferAdjustIndent(buf, -2);
1305         virBufferAddLit(buf, "</guest>\n\n");
1306     }
1307 }
1308 
1309 
1310 static void
virCapabilitiesFormatStoragePoolXML(virCapsStoragePool ** pools,size_t npools,virBuffer * buf)1311 virCapabilitiesFormatStoragePoolXML(virCapsStoragePool **pools,
1312                                     size_t npools,
1313                                     virBuffer *buf)
1314 {
1315     size_t i;
1316 
1317     if (npools == 0)
1318         return;
1319 
1320     virBufferAddLit(buf, "<pool>\n");
1321     virBufferAdjustIndent(buf, 2);
1322 
1323     virBufferAddLit(buf, "<enum name='type'>\n");
1324     virBufferAdjustIndent(buf, 2);
1325     for (i = 0; i < npools; i++)
1326         virBufferAsprintf(buf, "<value>%s</value>\n",
1327                           virStoragePoolTypeToString(pools[i]->type));
1328     virBufferAdjustIndent(buf, -2);
1329     virBufferAddLit(buf, "</enum>\n");
1330 
1331     virBufferAdjustIndent(buf, -2);
1332     virBufferAddLit(buf, "</pool>\n\n");
1333 }
1334 
1335 
1336 /**
1337  * virCapabilitiesFormatXML:
1338  * @caps: capabilities to format
1339  *
1340  * Convert the capabilities object into an XML representation
1341  *
1342  * Returns the XML document as a string
1343  */
1344 char *
virCapabilitiesFormatXML(virCaps * caps)1345 virCapabilitiesFormatXML(virCaps *caps)
1346 {
1347     g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
1348 
1349     virBufferAddLit(&buf, "<capabilities>\n\n");
1350     virBufferAdjustIndent(&buf, 2);
1351 
1352     if (virCapabilitiesFormatHostXML(&caps->host, &buf) < 0)
1353         return NULL;
1354 
1355     virCapabilitiesFormatGuestXML(caps->guests, caps->nguests, &buf);
1356 
1357     virCapabilitiesFormatStoragePoolXML(caps->pools, caps->npools, &buf);
1358 
1359     virBufferAdjustIndent(&buf, -2);
1360     virBufferAddLit(&buf, "</capabilities>\n");
1361 
1362     return virBufferContentAndReset(&buf);
1363 }
1364 
1365 /* get the maximum ID of cpus in the host */
1366 static unsigned int
virCapabilitiesHostNUMAGetMaxcpu(virCapsHostNUMA * caps)1367 virCapabilitiesHostNUMAGetMaxcpu(virCapsHostNUMA *caps)
1368 {
1369     unsigned int maxcpu = 0;
1370     size_t node;
1371     size_t cpu;
1372 
1373     for (node = 0; node < caps->cells->len; node++) {
1374         virCapsHostNUMACell *cell = g_ptr_array_index(caps->cells, node);
1375 
1376         for (cpu = 0; cpu < cell->ncpus; cpu++) {
1377             if (cell->cpus[cpu].id > maxcpu)
1378                 maxcpu = cell->cpus[cpu].id;
1379         }
1380     }
1381 
1382     return maxcpu;
1383 }
1384 
1385 /* set cpus of a numa node in the bitmask */
1386 static int
virCapabilitiesHostNUMAGetCellCpus(virCapsHostNUMA * caps,size_t node,virBitmap * cpumask)1387 virCapabilitiesHostNUMAGetCellCpus(virCapsHostNUMA *caps,
1388                                    size_t node,
1389                                    virBitmap *cpumask)
1390 {
1391     virCapsHostNUMACell *cell = NULL;
1392     size_t cpu;
1393     size_t i;
1394     /* The numa node numbers can be non-contiguous. Ex: 0,1,16,17. */
1395     for (i = 0; i < caps->cells->len; i++) {
1396         cell = g_ptr_array_index(caps->cells, i);
1397         if (cell->num == node)
1398             break;
1399         cell = NULL;
1400     }
1401 
1402     for (cpu = 0; cell && cpu < cell->ncpus; cpu++) {
1403         if (virBitmapSetBit(cpumask, cell->cpus[cpu].id) < 0) {
1404             virReportError(VIR_ERR_INTERNAL_ERROR,
1405                            _("Cpu '%u' in node '%zu' is out of range "
1406                              "of the provided bitmap"),
1407                            cell->cpus[cpu].id, node);
1408             return -1;
1409         }
1410     }
1411 
1412     return 0;
1413 }
1414 
1415 virBitmap *
virCapabilitiesHostNUMAGetCpus(virCapsHostNUMA * caps,virBitmap * nodemask)1416 virCapabilitiesHostNUMAGetCpus(virCapsHostNUMA *caps,
1417                                virBitmap *nodemask)
1418 {
1419     virBitmap *ret = NULL;
1420     unsigned int maxcpu = virCapabilitiesHostNUMAGetMaxcpu(caps);
1421     ssize_t node = -1;
1422 
1423     ret = virBitmapNew(maxcpu + 1);
1424 
1425     while ((node = virBitmapNextSetBit(nodemask, node)) >= 0) {
1426         if (virCapabilitiesHostNUMAGetCellCpus(caps, node, ret) < 0) {
1427             virBitmapFree(ret);
1428             return NULL;
1429         }
1430     }
1431 
1432     return ret;
1433 }
1434 
1435 
1436 int
virCapabilitiesHostNUMAGetMaxNode(virCapsHostNUMA * caps)1437 virCapabilitiesHostNUMAGetMaxNode(virCapsHostNUMA *caps)
1438 {
1439     virCapsHostNUMACell *cell = g_ptr_array_index(caps->cells, caps->cells->len - 1);
1440 
1441     return cell->num;
1442 }
1443 
1444 
1445 int
virCapabilitiesGetNodeInfo(virNodeInfoPtr nodeinfo)1446 virCapabilitiesGetNodeInfo(virNodeInfoPtr nodeinfo)
1447 {
1448     virArch hostarch = virArchFromHost();
1449     unsigned long long memorybytes;
1450 
1451     memset(nodeinfo, 0, sizeof(*nodeinfo));
1452 
1453     if (virStrcpyStatic(nodeinfo->model, virArchToString(hostarch)) < 0)
1454         return -1;
1455 
1456     if (virHostMemGetInfo(&memorybytes, NULL) < 0)
1457         return -1;
1458     nodeinfo->memory = memorybytes / 1024;
1459 
1460     if (virHostCPUGetInfo(hostarch,
1461                           &nodeinfo->cpus, &nodeinfo->mhz,
1462                           &nodeinfo->nodes, &nodeinfo->sockets,
1463                           &nodeinfo->cores, &nodeinfo->threads) < 0)
1464         return -1;
1465 
1466     return 0;
1467 }
1468 
1469 /* returns 1 on success, 0 if the detection failed and -1 on hard error */
1470 static int
virCapabilitiesFillCPUInfo(int cpu_id G_GNUC_UNUSED,virCapsHostNUMACellCPU * cpu G_GNUC_UNUSED)1471 virCapabilitiesFillCPUInfo(int cpu_id G_GNUC_UNUSED,
1472                            virCapsHostNUMACellCPU *cpu G_GNUC_UNUSED)
1473 {
1474 #ifdef __linux__
1475     cpu->id = cpu_id;
1476 
1477     if (virHostCPUGetSocket(cpu_id, &cpu->socket_id) < 0 ||
1478         virHostCPUGetDie(cpu_id, &cpu->die_id) < 0 ||
1479         virHostCPUGetCore(cpu_id, &cpu->core_id) < 0)
1480         return -1;
1481 
1482     if (!(cpu->siblings = virHostCPUGetSiblingsList(cpu_id)))
1483         return -1;
1484 
1485     return 0;
1486 #else
1487     virReportError(VIR_ERR_NO_SUPPORT, "%s",
1488                    _("node cpu info not implemented on this platform"));
1489     return -1;
1490 #endif
1491 }
1492 
1493 static int
virCapabilitiesGetNUMADistances(int node,virNumaDistance ** distancesRet,int * ndistancesRet)1494 virCapabilitiesGetNUMADistances(int node,
1495                                 virNumaDistance **distancesRet,
1496                                 int *ndistancesRet)
1497 {
1498     virNumaDistance *tmp = NULL;
1499     int tmp_size = 0;
1500     int ret = -1;
1501     int *distances = NULL;
1502     int ndistances = 0;
1503     size_t i;
1504 
1505     if (virNumaGetDistances(node, &distances, &ndistances) < 0)
1506         goto cleanup;
1507 
1508     if (!distances) {
1509         *distancesRet = NULL;
1510         *ndistancesRet = 0;
1511         return 0;
1512     }
1513 
1514     tmp = g_new0(virNumaDistance, ndistances);
1515 
1516     for (i = 0; i < ndistances; i++) {
1517         if (!distances[i])
1518             continue;
1519 
1520         tmp[tmp_size].cellid = i;
1521         tmp[tmp_size].value = distances[i];
1522         tmp_size++;
1523     }
1524 
1525     VIR_REALLOC_N(tmp, tmp_size);
1526 
1527     *ndistancesRet = tmp_size;
1528     *distancesRet = g_steal_pointer(&tmp);
1529     tmp_size = 0;
1530     ret = 0;
1531  cleanup:
1532     VIR_FREE(distances);
1533     VIR_FREE(tmp);
1534     return ret;
1535 }
1536 
1537 static int
virCapabilitiesGetNUMAPagesInfo(int node,virCapsHostNUMACellPageInfo ** pageinfo,int * npageinfo)1538 virCapabilitiesGetNUMAPagesInfo(int node,
1539                                 virCapsHostNUMACellPageInfo **pageinfo,
1540                                 int *npageinfo)
1541 {
1542     int ret = -1;
1543     unsigned int *pages_size = NULL;
1544     unsigned long long *pages_avail = NULL;
1545     size_t npages, i;
1546 
1547     if (virNumaGetPages(node, &pages_size, &pages_avail, NULL, &npages) < 0)
1548         goto cleanup;
1549 
1550     *pageinfo = g_new0(virCapsHostNUMACellPageInfo, npages);
1551     *npageinfo = npages;
1552 
1553     for (i = 0; i < npages; i++) {
1554         (*pageinfo)[i].size = pages_size[i];
1555         (*pageinfo)[i].avail = pages_avail[i];
1556     }
1557 
1558     ret = 0;
1559 
1560  cleanup:
1561     VIR_FREE(pages_avail);
1562     VIR_FREE(pages_size);
1563     return ret;
1564 }
1565 
1566 
1567 static int
virCapabilitiesGetNodeCacheReadFile(const char * prefix,const char * dir,const char * file,unsigned int * value)1568 virCapabilitiesGetNodeCacheReadFile(const char *prefix,
1569                                     const char *dir,
1570                                     const char *file,
1571                                     unsigned int *value)
1572 {
1573     g_autofree char *path = g_build_filename(prefix, dir, file, NULL);
1574     int rv = virFileReadValueUint(value, "%s", path);
1575 
1576     if (rv < 0) {
1577         if (rv == -2) {
1578             virReportError(VIR_ERR_INTERNAL_ERROR,
1579                            _("File '%s' does not exist"),
1580                            path);
1581         }
1582         return -1;
1583     }
1584 
1585     return 0;
1586 }
1587 
1588 
1589 static int
virCapsHostNUMACellCacheComparator(const void * a,const void * b)1590 virCapsHostNUMACellCacheComparator(const void *a,
1591                                    const void *b)
1592 {
1593     const virNumaCache *aa = a;
1594     const virNumaCache *bb = b;
1595 
1596     return aa->level - bb->level;
1597 }
1598 
1599 
1600 static int
virCapabilitiesGetNodeCache(int node,GArray ** cachesRet)1601 virCapabilitiesGetNodeCache(int node,
1602                             GArray **cachesRet)
1603 {
1604     g_autoptr(DIR) dir = NULL;
1605     int direrr = 0;
1606     struct dirent *entry;
1607     g_autofree char *path = NULL;
1608     g_autoptr(GArray) caches = g_array_new(FALSE, FALSE, sizeof(virNumaCache));
1609 
1610     path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/node%d/memory_side_cache", node);
1611 
1612     if (virDirOpenIfExists(&dir, path) < 0)
1613         return -1;
1614 
1615     while (dir && (direrr = virDirRead(dir, &entry, path)) > 0) {
1616         const char *dname = STRSKIP(entry->d_name, "index");
1617         virNumaCache cache = { 0 };
1618         unsigned int indexing;
1619         unsigned int write_policy;
1620 
1621         if (!dname)
1622             continue;
1623 
1624         if (virStrToLong_ui(dname, NULL, 10, &cache.level) < 0) {
1625             virReportError(VIR_ERR_INTERNAL_ERROR,
1626                            _("unable to parse %s"),
1627                            entry->d_name);
1628             return -1;
1629         }
1630 
1631         if (virCapabilitiesGetNodeCacheReadFile(path, entry->d_name,
1632                                                 "size", &cache.size) < 0)
1633             return -1;
1634 
1635         cache.size >>= 10; /* read in bytes but stored in kibibytes */
1636 
1637         if (virCapabilitiesGetNodeCacheReadFile(path, entry->d_name,
1638                                                 "line_size", &cache.line) < 0)
1639             return -1;
1640 
1641         if (virCapabilitiesGetNodeCacheReadFile(path, entry->d_name,
1642                                                 "indexing", &indexing) < 0)
1643             return -1;
1644 
1645         /* see enum cache_indexing in kernel */
1646         switch (indexing) {
1647         case 0: cache.associativity = VIR_NUMA_CACHE_ASSOCIATIVITY_DIRECT; break;
1648         case 1: cache.associativity = VIR_NUMA_CACHE_ASSOCIATIVITY_FULL; break;
1649         case 2: cache.associativity = VIR_NUMA_CACHE_ASSOCIATIVITY_NONE; break;
1650         default:
1651                 virReportError(VIR_ERR_INTERNAL_ERROR,
1652                                _("unknown indexing value '%u'"),
1653                                indexing);
1654                 return -1;
1655         }
1656 
1657         if (virCapabilitiesGetNodeCacheReadFile(path, entry->d_name,
1658                                                 "write_policy", &write_policy) < 0)
1659             return -1;
1660 
1661         /* see enum cache_write_policy in kernel */
1662         switch (write_policy) {
1663         case 0: cache.policy = VIR_NUMA_CACHE_POLICY_WRITEBACK; break;
1664         case 1: cache.policy = VIR_NUMA_CACHE_POLICY_WRITETHROUGH; break;
1665         case 2: cache.policy = VIR_NUMA_CACHE_POLICY_NONE; break;
1666         default:
1667                 virReportError(VIR_ERR_INTERNAL_ERROR,
1668                                _("unknown write_policy value '%u'"),
1669                                write_policy);
1670                 return -1;
1671         }
1672 
1673         g_array_append_val(caches, cache);
1674     }
1675 
1676     if (direrr < 0)
1677         return -1;
1678 
1679     if (caches->len > 0) {
1680         g_array_sort(caches, virCapsHostNUMACellCacheComparator);
1681         *cachesRet = g_steal_pointer(&caches);
1682     } else {
1683         *cachesRet = NULL;
1684     }
1685 
1686     return 0;
1687 }
1688 
1689 
1690 static int
virCapabilitiesHostNUMAInitFake(virCapsHostNUMA * caps)1691 virCapabilitiesHostNUMAInitFake(virCapsHostNUMA *caps)
1692 {
1693     virNodeInfo nodeinfo;
1694     virCapsHostNUMACellCPU *cpus;
1695     int ncpus;
1696     int n, s, c, t;
1697     int id, cid;
1698     int onlinecpus G_GNUC_UNUSED;
1699     bool tmp;
1700 
1701     if (virCapabilitiesGetNodeInfo(&nodeinfo) < 0)
1702         return -1;
1703 
1704     ncpus = VIR_NODEINFO_MAXCPUS(nodeinfo);
1705 
1706 
1707     id = 0;
1708     for (n = 0; n < nodeinfo.nodes; n++) {
1709         int nodecpus = nodeinfo.sockets * nodeinfo.cores * nodeinfo.threads;
1710         cid = 0;
1711 
1712         cpus = g_new0(virCapsHostNUMACellCPU, nodecpus);
1713 
1714         for (s = 0; s < nodeinfo.sockets; s++) {
1715             for (c = 0; c < nodeinfo.cores; c++) {
1716                 g_autoptr(virBitmap) siblings = virBitmapNew(ncpus);
1717                 for (t = 0; t < nodeinfo.threads; t++)
1718                     ignore_value(virBitmapSetBit(siblings, id + t));
1719 
1720                 for (t = 0; t < nodeinfo.threads; t++) {
1721                     if (virHostCPUGetOnline(id, &tmp) < 0)
1722                         goto error;
1723                     if (tmp) {
1724                         cpus[cid].id = id;
1725                         cpus[cid].die_id = 0;
1726                         cpus[cid].socket_id = s;
1727                         cpus[cid].core_id = c;
1728                         cpus[cid].siblings = virBitmapNewCopy(siblings);
1729                         cid++;
1730                     }
1731 
1732                     id++;
1733                 }
1734             }
1735         }
1736 
1737         virCapabilitiesHostNUMAAddCell(caps, 0,
1738                                        nodeinfo.memory,
1739                                        cid, &cpus,
1740                                        0, NULL,
1741                                        0, NULL,
1742                                        NULL);
1743     }
1744 
1745     return 0;
1746 
1747  error:
1748     for (; cid >= 0; cid--)
1749         virBitmapFree(cpus[cid].siblings);
1750     VIR_FREE(cpus);
1751     return -1;
1752 }
1753 
1754 
1755 static void
virCapabilitiesHostInsertHMAT(GArray * interconnects,unsigned int initiator,unsigned int target,unsigned int read_bandwidth,unsigned int write_bandwidth,unsigned int read_latency,unsigned int write_latency)1756 virCapabilitiesHostInsertHMAT(GArray *interconnects,
1757                               unsigned int initiator,
1758                               unsigned int target,
1759                               unsigned int read_bandwidth,
1760                               unsigned int write_bandwidth,
1761                               unsigned int read_latency,
1762                               unsigned int write_latency)
1763 {
1764     virNumaInterconnect ni;
1765 
1766     ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH,
1767         initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_bandwidth};
1768     g_array_append_val(interconnects, ni);
1769 
1770     ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH,
1771         initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_bandwidth};
1772     g_array_append_val(interconnects, ni);
1773 
1774     ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY,
1775         initiator, target, 0, VIR_MEMORY_LATENCY_READ, read_latency};
1776     g_array_append_val(interconnects, ni);
1777 
1778     ni = (virNumaInterconnect) { VIR_NUMA_INTERCONNECT_TYPE_LATENCY,
1779         initiator, target, 0, VIR_MEMORY_LATENCY_WRITE, write_latency};
1780     g_array_append_val(interconnects, ni);
1781 }
1782 
1783 
1784 static int
virCapabilitiesHostNUMAInitInterconnectsNode(GArray * interconnects,unsigned int node)1785 virCapabilitiesHostNUMAInitInterconnectsNode(GArray *interconnects,
1786                                              unsigned int node)
1787 {
1788     g_autofree char *path = NULL;
1789     g_autofree char *initPath = NULL;
1790     g_autoptr(DIR) dir = NULL;
1791     int direrr = 0;
1792     struct dirent *entry;
1793     unsigned int read_bandwidth;
1794     unsigned int write_bandwidth;
1795     unsigned int read_latency;
1796     unsigned int write_latency;
1797 
1798     /* Unfortunately, kernel does not expose full HMAT table. I mean it does,
1799      * in its binary form under /sys/firmware/acpi/tables/HMAT but we don't
1800      * want to parse that. But some important info is still exposed, under
1801      * "access0" and "access1" directories. The former contains the best
1802      * interconnect to given node including CPUs and devices that might do I/O
1803      * (such as GPUs and NICs). The latter contains the best interconnect to
1804      * given node but only CPUs are considered. Stick with access1 until sysfs
1805      * exposes the full table in a sensible way.
1806      * NB on most system access0 and access1 contain the same values. */
1807     path = g_strdup_printf(SYSFS_SYSTEM_PATH "/node/node%d/access1", node);
1808 
1809     if (!virFileExists(path))
1810         return 0;
1811 
1812     if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
1813                                             "read_bandwidth",
1814                                             &read_bandwidth) < 0)
1815         return -1;
1816     if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
1817                                             "write_bandwidth",
1818                                             &write_bandwidth) < 0)
1819         return -1;
1820 
1821     /* Bandwidths are read in MiB but stored in KiB */
1822     read_bandwidth <<= 10;
1823     write_bandwidth <<= 10;
1824 
1825     if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
1826                                             "read_latency",
1827                                             &read_latency) < 0)
1828         return -1;
1829     if (virCapabilitiesGetNodeCacheReadFile(path, "initiators",
1830                                             "write_latency",
1831                                             &write_latency) < 0)
1832         return -1;
1833 
1834     initPath = g_strdup_printf("%s/initiators", path);
1835 
1836     if (virDirOpen(&dir, initPath) < 0)
1837         return -1;
1838 
1839     while ((direrr = virDirRead(dir, &entry, path)) > 0) {
1840         const char *dname = STRSKIP(entry->d_name, "node");
1841         unsigned int initNode;
1842 
1843         if (!dname)
1844             continue;
1845 
1846         if (virStrToLong_ui(dname, NULL, 10, &initNode) < 0) {
1847             virReportError(VIR_ERR_INTERNAL_ERROR,
1848                            _("unable to parse %s"),
1849                            entry->d_name);
1850             return -1;
1851         }
1852 
1853         virCapabilitiesHostInsertHMAT(interconnects,
1854                                       initNode, node,
1855                                       read_bandwidth,
1856                                       write_bandwidth,
1857                                       read_latency,
1858                                       write_latency);
1859     }
1860 
1861     return 0;
1862 }
1863 
1864 
1865 static int
virCapsHostNUMAInterconnectComparator(const void * a,const void * b)1866 virCapsHostNUMAInterconnectComparator(const void *a,
1867                                       const void *b)
1868 {
1869     const virNumaInterconnect *aa = a;
1870     const virNumaInterconnect *bb = b;
1871 
1872     if (aa->type != bb->type)
1873         return aa->type - bb->type;
1874 
1875     if (aa->initiator != bb->initiator)
1876         return aa->initiator - bb->initiator;
1877 
1878     if (aa->target != bb->target)
1879         return aa->target - bb->target;
1880 
1881     if (aa->cache != bb->cache)
1882         return aa->cache - bb->cache;
1883 
1884     if (aa->accessType != bb->accessType)
1885         return aa->accessType - bb->accessType;
1886 
1887     return aa->value - bb->value;
1888 }
1889 
1890 
1891 static int
virCapabilitiesHostNUMAInitInterconnects(virCapsHostNUMA * caps)1892 virCapabilitiesHostNUMAInitInterconnects(virCapsHostNUMA *caps)
1893 {
1894     g_autoptr(DIR) dir = NULL;
1895     int direrr = 0;
1896     struct dirent *entry;
1897     const char *path = SYSFS_SYSTEM_PATH "/node/";
1898     g_autoptr(GArray) interconnects = g_array_new(FALSE, FALSE, sizeof(virNumaInterconnect));
1899 
1900     if (virDirOpenIfExists(&dir, path) < 0)
1901         return -1;
1902 
1903     while (dir && (direrr = virDirRead(dir, &entry, path)) > 0) {
1904         const char *dname = STRSKIP(entry->d_name, "node");
1905         unsigned int node;
1906 
1907         if (!dname)
1908             continue;
1909 
1910         if (virStrToLong_ui(dname, NULL, 10, &node) < 0) {
1911             virReportError(VIR_ERR_INTERNAL_ERROR,
1912                            _("unable to parse %s"),
1913                            entry->d_name);
1914             return -1;
1915         }
1916 
1917         if (virCapabilitiesHostNUMAInitInterconnectsNode(interconnects, node) < 0)
1918             return -1;
1919     }
1920 
1921     if (interconnects->len > 0) {
1922         g_array_sort(interconnects, virCapsHostNUMAInterconnectComparator);
1923         caps->interconnects = g_steal_pointer(&interconnects);
1924     }
1925 
1926     return 0;
1927 }
1928 
1929 
1930 static int
virCapabilitiesHostNUMAInitReal(virCapsHostNUMA * caps)1931 virCapabilitiesHostNUMAInitReal(virCapsHostNUMA *caps)
1932 {
1933     int n;
1934     virCapsHostNUMACellCPU *cpus = NULL;
1935     int ret = -1;
1936     int ncpus = 0;
1937     int max_node;
1938 
1939     if ((max_node = virNumaGetMaxNode()) < 0)
1940         goto cleanup;
1941 
1942     for (n = 0; n <= max_node; n++) {
1943         g_autoptr(virBitmap) cpumap = NULL;
1944         g_autofree virNumaDistance *distances = NULL;
1945         int ndistances = 0;
1946         g_autofree virCapsHostNUMACellPageInfo *pageinfo = NULL;
1947         int npageinfo = 0;
1948         unsigned long long memory;
1949         g_autoptr(GArray) caches = NULL;
1950         int cpu;
1951         size_t i;
1952 
1953         if ((ncpus = virNumaGetNodeCPUs(n, &cpumap)) < 0) {
1954             if (ncpus == -2)
1955                 continue;
1956 
1957             ncpus = 0;
1958             goto cleanup;
1959         }
1960 
1961         cpus = g_new0(virCapsHostNUMACellCPU, ncpus);
1962         cpu = 0;
1963 
1964         for (i = 0; i < virBitmapSize(cpumap); i++) {
1965             if (virBitmapIsBitSet(cpumap, i)) {
1966                 if (virCapabilitiesFillCPUInfo(i, cpus + cpu++) < 0)
1967                     goto cleanup;
1968             }
1969         }
1970 
1971         if (virCapabilitiesGetNUMADistances(n, &distances, &ndistances) < 0)
1972             goto cleanup;
1973 
1974         if (virCapabilitiesGetNUMAPagesInfo(n, &pageinfo, &npageinfo) < 0)
1975             goto cleanup;
1976 
1977         if (virCapabilitiesGetNodeCache(n, &caches) < 0)
1978             goto cleanup;
1979 
1980         /* Detect the amount of memory in the numa cell in KiB */
1981         virNumaGetNodeMemory(n, &memory, NULL);
1982         memory >>= 10;
1983 
1984         virCapabilitiesHostNUMAAddCell(caps, n, memory,
1985                                        ncpus, &cpus,
1986                                        ndistances, &distances,
1987                                        npageinfo, &pageinfo,
1988                                        &caches);
1989     }
1990 
1991     if (virCapabilitiesHostNUMAInitInterconnects(caps) < 0)
1992         goto cleanup;
1993 
1994     ret = 0;
1995 
1996  cleanup:
1997     virCapabilitiesClearHostNUMACellCPUTopology(cpus, ncpus);
1998     VIR_FREE(cpus);
1999     return ret;
2000 }
2001 
2002 
2003 virCapsHostNUMA *
virCapabilitiesHostNUMANew(void)2004 virCapabilitiesHostNUMANew(void)
2005 {
2006     virCapsHostNUMA *caps = NULL;
2007 
2008     caps = g_new0(virCapsHostNUMA, 1);
2009     caps->refs = 1;
2010     caps->cells = g_ptr_array_new_with_free_func(
2011         (GDestroyNotify)virCapabilitiesFreeHostNUMACell);
2012 
2013     return caps;
2014 }
2015 
2016 
2017 virCapsHostNUMA *
virCapabilitiesHostNUMANewHost(void)2018 virCapabilitiesHostNUMANewHost(void)
2019 {
2020     virCapsHostNUMA *caps = virCapabilitiesHostNUMANew();
2021 
2022     if (virNumaIsAvailable()) {
2023         if (virCapabilitiesHostNUMAInitReal(caps) == 0)
2024             return caps;
2025 
2026         virCapabilitiesHostNUMAUnref(caps);
2027         caps = virCapabilitiesHostNUMANew();
2028         VIR_WARN("Failed to query host NUMA topology, faking single NUMA node");
2029     }
2030 
2031     if (virCapabilitiesHostNUMAInitFake(caps) < 0) {
2032         virCapabilitiesHostNUMAUnref(caps);
2033         return NULL;
2034     }
2035 
2036     return caps;
2037 }
2038 
2039 
2040 int
virCapabilitiesInitPages(virCaps * caps)2041 virCapabilitiesInitPages(virCaps *caps)
2042 {
2043     int ret = -1;
2044     unsigned int *pages_size = NULL;
2045     size_t npages;
2046 
2047     if (virNumaGetPages(-1 /* Magic constant for overall info */,
2048                         &pages_size, NULL, NULL, &npages) < 0)
2049         goto cleanup;
2050 
2051     caps->host.pagesSize = g_steal_pointer(&pages_size);
2052     caps->host.nPagesSize = npages;
2053     npages = 0;
2054 
2055     ret = 0;
2056  cleanup:
2057     VIR_FREE(pages_size);
2058     return ret;
2059 }
2060 
2061 
2062 bool
virCapsHostCacheBankEquals(virCapsHostCacheBank * a,virCapsHostCacheBank * b)2063 virCapsHostCacheBankEquals(virCapsHostCacheBank *a,
2064                            virCapsHostCacheBank *b)
2065 {
2066     return (a->id == b->id &&
2067             a->level == b->level &&
2068             a->type == b->type &&
2069             a->size == b->size &&
2070             virBitmapEqual(a->cpus, b->cpus));
2071 }
2072 
2073 void
virCapsHostCacheBankFree(virCapsHostCacheBank * ptr)2074 virCapsHostCacheBankFree(virCapsHostCacheBank *ptr)
2075 {
2076     size_t i;
2077 
2078     if (!ptr)
2079         return;
2080 
2081     virBitmapFree(ptr->cpus);
2082     for (i = 0; i < ptr->ncontrols; i++)
2083         g_free(ptr->controls[i]);
2084     g_free(ptr->controls);
2085     g_free(ptr);
2086 }
2087 
2088 
2089 static int
virCapsHostCacheBankSorter(const void * a,const void * b)2090 virCapsHostCacheBankSorter(const void *a,
2091                            const void *b)
2092 {
2093     virCapsHostCacheBank *ca = *(virCapsHostCacheBank **)a;
2094     virCapsHostCacheBank *cb = *(virCapsHostCacheBank **)b;
2095 
2096     if (ca->level < cb->level)
2097         return -1;
2098     if (ca->level > cb->level)
2099         return 1;
2100 
2101     return ca->id - cb->id;
2102 }
2103 
2104 
2105 static int
virCapabilitiesInitResctrl(virCaps * caps)2106 virCapabilitiesInitResctrl(virCaps *caps)
2107 {
2108     if (caps->host.resctrl)
2109         return 0;
2110 
2111     caps->host.resctrl = virResctrlInfoNew();
2112     if (!caps->host.resctrl)
2113         return -1;
2114 
2115     return 0;
2116 }
2117 
2118 
2119 static int
virCapabilitiesInitResctrlMemory(virCaps * caps)2120 virCapabilitiesInitResctrlMemory(virCaps *caps)
2121 {
2122     virCapsHostMemBWNode *node = NULL;
2123     size_t i = 0;
2124     int ret = -1;
2125     const virResctrlMonitorType montype = VIR_RESCTRL_MONITOR_TYPE_MEMBW;
2126     const char *prefix = virResctrlMonitorPrefixTypeToString(montype);
2127 
2128     for (i = 0; i < caps->host.cache.nbanks; i++) {
2129         virCapsHostCacheBank *bank = caps->host.cache.banks[i];
2130         node = g_new0(virCapsHostMemBWNode, 1);
2131 
2132         if (virResctrlInfoGetMemoryBandwidth(caps->host.resctrl,
2133                                              bank->level, &node->control) > 0) {
2134             node->id = bank->id;
2135             node->cpus = virBitmapNewCopy(bank->cpus);
2136 
2137             VIR_APPEND_ELEMENT(caps->host.memBW.nodes, caps->host.memBW.nnodes, node);
2138         }
2139         virCapsHostMemBWNodeFree(node);
2140         node = NULL;
2141     }
2142 
2143     if (virResctrlInfoGetMonitorPrefix(caps->host.resctrl, prefix,
2144                                        &caps->host.memBW.monitor) < 0)
2145         goto cleanup;
2146 
2147     ret = 0;
2148  cleanup:
2149     virCapsHostMemBWNodeFree(node);
2150     return ret;
2151 }
2152 
2153 
2154 int
virCapabilitiesInitCaches(virCaps * caps)2155 virCapabilitiesInitCaches(virCaps *caps)
2156 {
2157     size_t i = 0;
2158     virBitmap *cpus = NULL;
2159     ssize_t pos = -1;
2160     int ret = -1;
2161     char *path = NULL;
2162     char *type = NULL;
2163     struct dirent *ent = NULL;
2164     virCapsHostCacheBank *bank = NULL;
2165     const virResctrlMonitorType montype = VIR_RESCTRL_MONITOR_TYPE_CACHE;
2166     const char *prefix = virResctrlMonitorPrefixTypeToString(montype);
2167 
2168     /* Minimum level to expose in capabilities.  Can be lowered or removed (with
2169      * the appropriate code below), but should not be increased, because we'd
2170      * lose information. */
2171     const int cache_min_level = 3;
2172 
2173     if (virCapabilitiesInitResctrl(caps) < 0)
2174         return -1;
2175 
2176     /* offline CPUs don't provide cache info */
2177     if (virFileReadValueBitmap(&cpus, "%s/cpu/online", SYSFS_SYSTEM_PATH) < 0)
2178         return -1;
2179 
2180     while ((pos = virBitmapNextSetBit(cpus, pos)) >= 0) {
2181         int rv = -1;
2182         g_autoptr(DIR) dirp = NULL;
2183 
2184         VIR_FREE(path);
2185         path = g_strdup_printf("%s/cpu/cpu%zd/cache/", SYSFS_SYSTEM_PATH, pos);
2186 
2187         rv = virDirOpenIfExists(&dirp, path);
2188         if (rv < 0)
2189             goto cleanup;
2190 
2191         if (!dirp)
2192             continue;
2193 
2194         while ((rv = virDirRead(dirp, &ent, path)) > 0) {
2195             int kernel_type;
2196             unsigned int level;
2197 
2198             if (!STRPREFIX(ent->d_name, "index"))
2199                 continue;
2200 
2201             if (virFileReadValueUint(&level,
2202                                      "%s/cpu/cpu%zd/cache/%s/level",
2203                                      SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
2204                 goto cleanup;
2205 
2206             if (level < cache_min_level)
2207                 continue;
2208 
2209             bank = g_new0(virCapsHostCacheBank, 1);
2210             bank->level = level;
2211 
2212             if (virFileReadValueUint(&bank->id,
2213                                      "%s/cpu/cpu%zd/cache/%s/id",
2214                                      SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
2215                 goto cleanup;
2216 
2217             if (virFileReadValueUint(&bank->level,
2218                                      "%s/cpu/cpu%zd/cache/%s/level",
2219                                      SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
2220                 goto cleanup;
2221 
2222             if (virFileReadValueString(&type,
2223                                        "%s/cpu/cpu%zd/cache/%s/type",
2224                                        SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
2225                 goto cleanup;
2226 
2227             if (virFileReadValueScaledInt(&bank->size,
2228                                           "%s/cpu/cpu%zd/cache/%s/size",
2229                                           SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
2230                 goto cleanup;
2231 
2232             if (virFileReadValueBitmap(&bank->cpus,
2233                                        "%s/cpu/cpu%zd/cache/%s/shared_cpu_list",
2234                                        SYSFS_SYSTEM_PATH, pos, ent->d_name) < 0)
2235                 goto cleanup;
2236 
2237             kernel_type = virCacheKernelTypeFromString(type);
2238             if (kernel_type < 0) {
2239                 virReportError(VIR_ERR_INTERNAL_ERROR,
2240                                _("Unknown cache type '%s'"), type);
2241                 goto cleanup;
2242             }
2243 
2244             bank->type = kernel_type;
2245             VIR_FREE(type);
2246 
2247             for (i = 0; i < caps->host.cache.nbanks; i++) {
2248                 if (virCapsHostCacheBankEquals(bank, caps->host.cache.banks[i]))
2249                     break;
2250             }
2251             if (i == caps->host.cache.nbanks) {
2252                 /* If it is a new cache, then update its resctrl information. */
2253                 if (virResctrlInfoGetCache(caps->host.resctrl,
2254                                            bank->level,
2255                                            bank->size,
2256                                            &bank->ncontrols,
2257                                            &bank->controls) < 0)
2258                     goto cleanup;
2259 
2260                 VIR_APPEND_ELEMENT(caps->host.cache.banks, caps->host.cache.nbanks, bank);
2261             }
2262 
2263             virCapsHostCacheBankFree(bank);
2264             bank = NULL;
2265         }
2266         if (rv < 0)
2267             goto cleanup;
2268     }
2269 
2270     /* Sort the array in order for the tests to be predictable.  This way we can
2271      * still traverse the directory instead of guessing names (in case there is
2272      * 'index1' and 'index3' but no 'index2'). */
2273     if (caps->host.cache.banks) {
2274         qsort(caps->host.cache.banks, caps->host.cache.nbanks,
2275               sizeof(*caps->host.cache.banks), virCapsHostCacheBankSorter);
2276     }
2277 
2278     if (virCapabilitiesInitResctrlMemory(caps) < 0)
2279         goto cleanup;
2280 
2281     if (virResctrlInfoGetMonitorPrefix(caps->host.resctrl, prefix,
2282                                        &caps->host.cache.monitor) < 0)
2283         goto cleanup;
2284 
2285     ret = 0;
2286  cleanup:
2287     VIR_FREE(type);
2288     VIR_FREE(path);
2289     virCapsHostCacheBankFree(bank);
2290     virBitmapFree(cpus);
2291     return ret;
2292 }
2293 
2294 
2295 void
virCapabilitiesHostInitIOMMU(virCaps * caps)2296 virCapabilitiesHostInitIOMMU(virCaps *caps)
2297 {
2298     caps->host.iommu = virHostHasIOMMU();
2299 }
2300