1 /*
2  * numa_conf.c
3  *
4  * Copyright (C) 2014-2015 Red Hat, Inc.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library.  If not, see
18  * <http://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 
23 #include "numa_conf.h"
24 
25 #include "domain_conf.h"
26 #include "viralloc.h"
27 #include "virnuma.h"
28 #include "virstring.h"
29 
30 /*
31  * Distance definitions defined Conform ACPI 2.0 SLIT.
32  * See include/linux/topology.h
33  */
34 #define LOCAL_DISTANCE          10
35 #define REMOTE_DISTANCE         20
36 /* SLIT entry value is a one-byte unsigned integer. */
37 #define UNREACHABLE            255
38 
39 #define VIR_FROM_THIS VIR_FROM_DOMAIN
40 
41 VIR_ENUM_IMPL(virDomainNumatuneMemMode,
42               VIR_DOMAIN_NUMATUNE_MEM_LAST,
43               "strict",
44               "preferred",
45               "interleave",
46               "restrictive",
47 );
48 
49 VIR_ENUM_IMPL(virDomainNumatunePlacement,
50               VIR_DOMAIN_NUMATUNE_PLACEMENT_LAST,
51               "default",
52               "static",
53               "auto",
54 );
55 
56 VIR_ENUM_IMPL(virDomainMemoryAccess,
57               VIR_DOMAIN_MEMORY_ACCESS_LAST,
58               "default",
59               "shared",
60               "private",
61 );
62 
63 VIR_ENUM_IMPL(virNumaCacheAssociativity,
64               VIR_NUMA_CACHE_ASSOCIATIVITY_LAST,
65               "none",
66               "direct",
67               "full",
68 );
69 
70 VIR_ENUM_IMPL(virNumaCachePolicy,
71               VIR_NUMA_CACHE_POLICY_LAST,
72               "none",
73               "writeback",
74               "writethrough",
75 );
76 
77 VIR_ENUM_IMPL(virMemoryLatency,
78               VIR_MEMORY_LATENCY_LAST,
79               "none",
80               "access",
81               "read",
82               "write"
83 );
84 
85 typedef struct _virDomainNumaNode virDomainNumaNode;
86 
87 struct _virDomainNuma {
88     struct {
89         bool specified;
90         virBitmap *nodeset;
91         virDomainNumatuneMemMode mode;
92         virDomainNumatunePlacement placement;
93     } memory;               /* pinning for all the memory */
94 
95     struct _virDomainNumaNode {
96         unsigned long long mem; /* memory size in KiB */
97         virBitmap *cpumask;     /* bitmap of vCPUs corresponding to the node */
98         virBitmap *nodeset;     /* host memory nodes where this guest node resides */
99         virDomainNumatuneMemMode mode;  /* memory mode selection */
100         virDomainMemoryAccess memAccess; /* shared memory access configuration */
101         virTristateBool discard; /* discard-data for memory-backend-file */
102 
103         virNumaDistance *distances; /* remote node distances */
104         size_t ndistances;
105 
106         virNumaCache *caches;
107         size_t ncaches;
108     } *mem_nodes;           /* guest node configuration */
109     size_t nmem_nodes;
110 
111     virNumaInterconnect *interconnects;
112     size_t ninterconnects;
113 
114     /* Future NUMA tuning related stuff should go here. */
115 };
116 
117 
118 bool
virDomainNumatuneNodeSpecified(virDomainNuma * numatune,int cellid)119 virDomainNumatuneNodeSpecified(virDomainNuma *numatune,
120                                int cellid)
121 {
122     if (numatune &&
123         cellid >= 0 &&
124         cellid < numatune->nmem_nodes)
125         return numatune->mem_nodes[cellid].nodeset;
126 
127     return false;
128 }
129 
130 static int
virDomainNumatuneNodeParseXML(virDomainNuma * numa,xmlXPathContextPtr ctxt)131 virDomainNumatuneNodeParseXML(virDomainNuma *numa,
132                               xmlXPathContextPtr ctxt)
133 {
134     int n = 0;
135     size_t i = 0;
136     g_autofree xmlNodePtr *nodes = NULL;
137 
138     if ((n = virXPathNodeSet("./numatune/memnode", ctxt, &nodes)) < 0) {
139         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
140                        _("Cannot extract memnode nodes"));
141         return -1;
142     }
143 
144     if (!n)
145         return 0;
146 
147     if (numa->memory.specified &&
148         numa->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) {
149         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
150                        _("Per-node binding is not compatible with "
151                          "automatic NUMA placement."));
152         return -1;
153     }
154 
155     if (!numa->nmem_nodes) {
156         virReportError(VIR_ERR_XML_ERROR, "%s",
157                        _("Element 'memnode' is invalid without "
158                          "any guest NUMA cells"));
159         return -1;
160     }
161 
162     for (i = 0; i < n; i++) {
163         unsigned int cellid = 0;
164         virDomainNumaNode *mem_node = NULL;
165         xmlNodePtr cur_node = nodes[i];
166         g_autofree char *tmp = NULL;
167 
168         if (virXMLPropUInt(cur_node, "cellid", 10, VIR_XML_PROP_REQUIRED,
169                            &cellid) < 0)
170             return -1;
171 
172         if (cellid >= numa->nmem_nodes) {
173             virReportError(VIR_ERR_XML_ERROR, "%s",
174                            _("Argument 'cellid' in memnode element must "
175                              "correspond to existing guest's NUMA cell"));
176             return -1;
177         }
178 
179         mem_node = &numa->mem_nodes[cellid];
180 
181         if (mem_node->nodeset) {
182             virReportError(VIR_ERR_XML_ERROR,
183                            _("Multiple memnode elements with cellid %u"),
184                            cellid);
185             return -1;
186         }
187 
188         if (virXMLPropEnumDefault(cur_node, "mode",
189                                   virDomainNumatuneMemModeTypeFromString,
190                                   VIR_XML_PROP_NONE, &mem_node->mode,
191                                   VIR_DOMAIN_NUMATUNE_MEM_STRICT) < 0)
192             return -1;
193 
194         if (numa->memory.mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE &&
195             mem_node->mode != VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) {
196             virReportError(VIR_ERR_XML_ERROR, "%s",
197                            _("'restrictive' mode is required in memnode element "
198                              "when mode is 'restrictive' in memory element"));
199             return -1;
200         }
201 
202         tmp = virXMLPropString(cur_node, "nodeset");
203         if (!tmp) {
204             virReportError(VIR_ERR_XML_ERROR, "%s",
205                            _("Missing required nodeset attribute "
206                              "in memnode element"));
207             return -1;
208         }
209         if (virBitmapParse(tmp, &mem_node->nodeset, VIR_DOMAIN_CPUMASK_LEN) < 0)
210             return -1;
211 
212         if (virBitmapIsAllClear(mem_node->nodeset)) {
213             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
214                            _("Invalid value of 'nodeset': %s"), tmp);
215             return -1;
216         }
217     }
218 
219     return 0;
220 }
221 
222 int
virDomainNumatuneParseXML(virDomainNuma * numa,bool placement_static,xmlXPathContextPtr ctxt)223 virDomainNumatuneParseXML(virDomainNuma *numa,
224                           bool placement_static,
225                           xmlXPathContextPtr ctxt)
226 {
227     char *tmp = NULL;
228     int mode = -1;
229     int n = 0;
230     int placement = -1;
231     int ret = -1;
232     virBitmap *nodeset = NULL;
233     xmlNodePtr node = NULL;
234 
235     if (virXPathInt("count(./numatune)", ctxt, &n) < 0) {
236         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
237                        _("cannot extract numatune nodes"));
238         goto cleanup;
239     } else if (n > 1) {
240         virReportError(VIR_ERR_XML_ERROR, "%s",
241                        _("only one numatune is supported"));
242         goto cleanup;
243     }
244 
245     node = virXPathNode("./numatune/memory[1]", ctxt);
246 
247     if (!placement_static && !node)
248         placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO;
249 
250     if (node) {
251         if ((tmp = virXMLPropString(node, "mode")) &&
252             (mode = virDomainNumatuneMemModeTypeFromString(tmp)) < 0) {
253             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
254                            _("Unsupported NUMA memory tuning mode '%s'"), tmp);
255             goto cleanup;
256         }
257         VIR_FREE(tmp);
258 
259         if ((tmp = virXMLPropString(node, "placement")) &&
260             (placement = virDomainNumatunePlacementTypeFromString(tmp)) < 0) {
261             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
262                            _("Unsupported NUMA memory placement mode '%s'"), tmp);
263             goto cleanup;
264         }
265         VIR_FREE(tmp);
266 
267         tmp = virXMLPropString(node, "nodeset");
268         if (tmp) {
269             if (virBitmapParse(tmp, &nodeset, VIR_DOMAIN_CPUMASK_LEN) < 0)
270                 goto cleanup;
271 
272             if (virBitmapIsAllClear(nodeset)) {
273                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
274                                _("Invalid value of 'nodeset': %s"), tmp);
275                 goto cleanup;
276             }
277 
278             VIR_FREE(tmp);
279         }
280     }
281 
282     if (virDomainNumatuneSet(numa,
283                              placement_static,
284                              placement,
285                              mode,
286                              nodeset) < 0)
287         goto cleanup;
288 
289     if (virDomainNumatuneNodeParseXML(numa, ctxt) < 0)
290         goto cleanup;
291 
292     ret = 0;
293  cleanup:
294     virBitmapFree(nodeset);
295     VIR_FREE(tmp);
296     return ret;
297 }
298 
299 int
virDomainNumatuneFormatXML(virBuffer * buf,virDomainNuma * numatune)300 virDomainNumatuneFormatXML(virBuffer *buf,
301                            virDomainNuma *numatune)
302 {
303     const char *tmp = NULL;
304     char *nodeset = NULL;
305     bool nodesetSpecified = false;
306     size_t i = 0;
307 
308     if (!numatune)
309         return 0;
310 
311     for (i = 0; i < numatune->nmem_nodes; i++) {
312         if (numatune->mem_nodes[i].nodeset) {
313             nodesetSpecified = true;
314             break;
315         }
316     }
317 
318     if (!nodesetSpecified && !numatune->memory.specified)
319         return 0;
320 
321     virBufferAddLit(buf, "<numatune>\n");
322     virBufferAdjustIndent(buf, 2);
323 
324     if (numatune->memory.specified) {
325         tmp = virDomainNumatuneMemModeTypeToString(numatune->memory.mode);
326         virBufferAsprintf(buf, "<memory mode='%s' ", tmp);
327 
328         if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) {
329             if (!(nodeset = virBitmapFormat(numatune->memory.nodeset)))
330                 return -1;
331             virBufferAsprintf(buf, "nodeset='%s'/>\n", nodeset);
332             VIR_FREE(nodeset);
333         } else if (numatune->memory.placement) {
334             tmp = virDomainNumatunePlacementTypeToString(numatune->memory.placement);
335             virBufferAsprintf(buf, "placement='%s'/>\n", tmp);
336         }
337     }
338 
339     for (i = 0; i < numatune->nmem_nodes; i++) {
340         virDomainNumaNode *mem_node = &numatune->mem_nodes[i];
341 
342         if (!mem_node->nodeset)
343             continue;
344 
345         if (!(nodeset = virBitmapFormat(mem_node->nodeset)))
346             return -1;
347 
348         virBufferAsprintf(buf,
349                           "<memnode cellid='%zu' mode='%s' nodeset='%s'/>\n",
350                           i,
351                           virDomainNumatuneMemModeTypeToString(mem_node->mode),
352                           nodeset);
353         VIR_FREE(nodeset);
354     }
355 
356     virBufferAdjustIndent(buf, -2);
357     virBufferAddLit(buf, "</numatune>\n");
358     return 0;
359 }
360 
361 void
virDomainNumaFree(virDomainNuma * numa)362 virDomainNumaFree(virDomainNuma *numa)
363 {
364     size_t i = 0;
365 
366     if (!numa)
367         return;
368 
369     virBitmapFree(numa->memory.nodeset);
370     for (i = 0; i < numa->nmem_nodes; i++) {
371         virBitmapFree(numa->mem_nodes[i].cpumask);
372         virBitmapFree(numa->mem_nodes[i].nodeset);
373 
374         if (numa->mem_nodes[i].ndistances > 0)
375             g_free(numa->mem_nodes[i].distances);
376 
377         g_free(numa->mem_nodes[i].caches);
378     }
379     g_free(numa->mem_nodes);
380 
381     g_free(numa->interconnects);
382 
383     g_free(numa);
384 }
385 
386 /**
387  * virDomainNumatuneGetMode:
388  * @numatune: pointer to numatune definition
389  * @cellid: cell selector
390  * @mode: where to store the result
391  *
392  * Get the defined mode for domain's memory. It's safe to pass
393  * NULL to @mode if the return value is the only info needed.
394  *
395  * Returns: 0 on success (with @mode updated)
396  *         -1 if no mode was defined in XML
397  */
virDomainNumatuneGetMode(virDomainNuma * numatune,int cellid,virDomainNumatuneMemMode * mode)398 int virDomainNumatuneGetMode(virDomainNuma *numatune,
399                              int cellid,
400                              virDomainNumatuneMemMode *mode)
401 {
402     virDomainNumatuneMemMode tmp_mode;
403 
404     if (!numatune)
405         return -1;
406 
407     if (virDomainNumatuneNodeSpecified(numatune, cellid))
408         tmp_mode = numatune->mem_nodes[cellid].mode;
409     else if (numatune->memory.specified)
410         tmp_mode = numatune->memory.mode;
411     else
412         return -1;
413 
414     if (mode)
415         *mode = tmp_mode;
416 
417     return 0;
418 }
419 
420 virBitmap *
virDomainNumatuneGetNodeset(virDomainNuma * numatune,virBitmap * auto_nodeset,int cellid)421 virDomainNumatuneGetNodeset(virDomainNuma *numatune,
422                             virBitmap *auto_nodeset,
423                             int cellid)
424 {
425     if (!numatune)
426         return NULL;
427 
428     if (numatune->memory.specified &&
429         numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)
430         return auto_nodeset;
431 
432     if (virDomainNumatuneNodeSpecified(numatune, cellid))
433         return numatune->mem_nodes[cellid].nodeset;
434 
435     if (!numatune->memory.specified)
436         return NULL;
437 
438     return numatune->memory.nodeset;
439 }
440 
441 char *
virDomainNumatuneFormatNodeset(virDomainNuma * numatune,virBitmap * auto_nodeset,int cellid)442 virDomainNumatuneFormatNodeset(virDomainNuma *numatune,
443                                virBitmap *auto_nodeset,
444                                int cellid)
445 {
446     return virBitmapFormat(virDomainNumatuneGetNodeset(numatune,
447                                                        auto_nodeset,
448                                                        cellid));
449 }
450 
451 
452 int
virDomainNumatuneMaybeGetNodeset(virDomainNuma * numatune,virBitmap * auto_nodeset,virBitmap ** retNodeset,int cellid)453 virDomainNumatuneMaybeGetNodeset(virDomainNuma *numatune,
454                                  virBitmap *auto_nodeset,
455                                  virBitmap **retNodeset,
456                                  int cellid)
457 {
458     *retNodeset = NULL;
459 
460     if (!numatune)
461         return 0;
462 
463     if (!virDomainNumatuneNodeSpecified(numatune, cellid) &&
464         !numatune->memory.specified)
465         return 0;
466 
467     if (numatune->memory.specified &&
468         numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO &&
469         !auto_nodeset) {
470         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
471                        _("Advice from numad is needed in case of "
472                          "automatic numa placement"));
473         return -1;
474     }
475 
476     *retNodeset = virDomainNumatuneGetNodeset(numatune, auto_nodeset, cellid);
477 
478     return 0;
479 }
480 
481 
482 int
virDomainNumatuneMaybeFormatNodeset(virDomainNuma * numatune,virBitmap * auto_nodeset,char ** mask,int cellid)483 virDomainNumatuneMaybeFormatNodeset(virDomainNuma *numatune,
484                                     virBitmap *auto_nodeset,
485                                     char **mask,
486                                     int cellid)
487 {
488     virBitmap *nodeset;
489 
490     if (virDomainNumatuneMaybeGetNodeset(numatune, auto_nodeset, &nodeset,
491                                          cellid) < 0)
492         return -1;
493 
494     if (nodeset &&
495         !(*mask = virBitmapFormat(nodeset)))
496         return -1;
497 
498     return 0;
499 }
500 
501 int
virDomainNumatuneSet(virDomainNuma * numa,bool placement_static,int placement,int mode,virBitmap * nodeset)502 virDomainNumatuneSet(virDomainNuma *numa,
503                      bool placement_static,
504                      int placement,
505                      int mode,
506                      virBitmap *nodeset)
507 {
508     /* No need to do anything in this case */
509     if (mode == -1 && placement == -1 && !nodeset)
510         return 0;
511 
512     if (!numa->memory.specified) {
513         if (mode == -1)
514             mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT;
515         if (placement == -1)
516             placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT;
517     }
518 
519     /* Range checks */
520     if (mode != -1 &&
521         (mode < 0 || mode >= VIR_DOMAIN_NUMATUNE_MEM_LAST)) {
522         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
523                        _("Unsupported numatune mode '%d'"),
524                        mode);
525         return -1;
526     }
527 
528     if (placement != -1 &&
529         (placement < 0 || placement >= VIR_DOMAIN_NUMATUNE_PLACEMENT_LAST)) {
530         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
531                        _("Unsupported numatune placement '%d'"),
532                        mode);
533         return -1;
534     }
535 
536     if (mode != -1)
537         numa->memory.mode = mode;
538 
539     if (nodeset) {
540         virBitmapFree(numa->memory.nodeset);
541         numa->memory.nodeset = virBitmapNewCopy(nodeset);
542 
543         if (placement == -1)
544             placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC;
545     }
546 
547     if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT) {
548         if (numa->memory.nodeset || placement_static)
549             placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC;
550         else
551             placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO;
552     }
553 
554     if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC &&
555         !numa->memory.nodeset) {
556         virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
557                        _("nodeset for NUMA memory tuning must be set "
558                          "if 'placement' is 'static'"));
559         return -1;
560     }
561 
562     /* setting nodeset when placement auto is invalid */
563     if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO &&
564         numa->memory.nodeset) {
565         virBitmapFree(numa->memory.nodeset);
566         numa->memory.nodeset = NULL;
567     }
568 
569     if (placement != -1)
570         numa->memory.placement = placement;
571 
572     numa->memory.specified = true;
573 
574     return 0;
575 }
576 
577 static bool
virDomainNumaNodesEqual(virDomainNuma * n1,virDomainNuma * n2)578 virDomainNumaNodesEqual(virDomainNuma *n1,
579                         virDomainNuma *n2)
580 {
581     size_t i = 0;
582 
583     if (n1->nmem_nodes != n2->nmem_nodes)
584         return false;
585 
586     for (i = 0; i < n1->nmem_nodes; i++) {
587         virDomainNumaNode *nd1 = &n1->mem_nodes[i];
588         virDomainNumaNode *nd2 = &n2->mem_nodes[i];
589 
590         if (!nd1->nodeset && !nd2->nodeset)
591             continue;
592 
593         if (nd1->mode != nd2->mode)
594             return false;
595 
596         if (!virBitmapEqual(nd1->nodeset, nd2->nodeset))
597             return false;
598     }
599 
600     return true;
601 }
602 
603 bool
virDomainNumaEquals(virDomainNuma * n1,virDomainNuma * n2)604 virDomainNumaEquals(virDomainNuma *n1,
605                     virDomainNuma *n2)
606 {
607     if (!n1 && !n2)
608         return true;
609 
610     if (!n1 || !n2)
611         return false;
612 
613     if (!n1->memory.specified && !n2->memory.specified)
614         return virDomainNumaNodesEqual(n1, n2);
615 
616     if (!n1->memory.specified || !n2->memory.specified)
617         return false;
618 
619     if (n1->memory.mode != n2->memory.mode)
620         return false;
621 
622     if (n1->memory.placement != n2->memory.placement)
623         return false;
624 
625     if (!virBitmapEqual(n1->memory.nodeset, n2->memory.nodeset))
626         return false;
627 
628     return virDomainNumaNodesEqual(n1, n2);
629 }
630 
631 bool
virDomainNumatuneHasPlacementAuto(virDomainNuma * numatune)632 virDomainNumatuneHasPlacementAuto(virDomainNuma *numatune)
633 {
634     if (!numatune)
635         return false;
636 
637     if (!numatune->memory.specified)
638         return false;
639 
640     if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)
641         return true;
642 
643     return false;
644 }
645 
646 bool
virDomainNumatuneHasPerNodeBinding(virDomainNuma * numatune)647 virDomainNumatuneHasPerNodeBinding(virDomainNuma *numatune)
648 {
649     size_t i = 0;
650 
651     if (!numatune)
652         return false;
653 
654     for (i = 0; i < numatune->nmem_nodes; i++) {
655         if (numatune->mem_nodes[i].nodeset)
656             return true;
657     }
658 
659     return false;
660 }
661 
662 int
virDomainNumatuneSpecifiedMaxNode(virDomainNuma * numatune)663 virDomainNumatuneSpecifiedMaxNode(virDomainNuma *numatune)
664 {
665     int ret = -1;
666     virBitmap *nodemask = NULL;
667     size_t i;
668     int bit;
669 
670     if (!numatune)
671         return ret;
672 
673     nodemask = virDomainNumatuneGetNodeset(numatune, NULL, -1);
674     if (nodemask)
675         ret = virBitmapLastSetBit(nodemask);
676 
677     for (i = 0; i < numatune->nmem_nodes; i++) {
678         nodemask = numatune->mem_nodes[i].nodeset;
679         if (!nodemask)
680             continue;
681 
682         bit = virBitmapLastSetBit(nodemask);
683         if (bit > ret)
684             ret = bit;
685     }
686 
687     return ret;
688 }
689 
690 bool
virDomainNumatuneNodesetIsAvailable(virDomainNuma * numatune,virBitmap * auto_nodeset)691 virDomainNumatuneNodesetIsAvailable(virDomainNuma *numatune,
692                                     virBitmap *auto_nodeset)
693 {
694     size_t i = 0;
695     virBitmap *b = NULL;
696 
697     if (!numatune)
698         return true;
699 
700     b = virDomainNumatuneGetNodeset(numatune, auto_nodeset, -1);
701     if (!virNumaNodesetIsAvailable(b))
702         return false;
703 
704     for (i = 0; i < numatune->nmem_nodes; i++) {
705         b = virDomainNumatuneGetNodeset(numatune, auto_nodeset, i);
706         if (!virNumaNodesetIsAvailable(b))
707             return false;
708     }
709 
710     return true;
711 }
712 
713 
714 static int
virDomainNumaDefNodeDistanceParseXML(virDomainNuma * def,xmlXPathContextPtr ctxt,unsigned int cur_cell)715 virDomainNumaDefNodeDistanceParseXML(virDomainNuma *def,
716                                      xmlXPathContextPtr ctxt,
717                                      unsigned int cur_cell)
718 {
719     int ret = -1;
720     int sibling;
721     xmlNodePtr *nodes = NULL;
722     size_t i, ndistances = def->nmem_nodes;
723 
724     if (ndistances == 0)
725         return 0;
726 
727     /* check if NUMA distances definition is present */
728     if (!virXPathNode("./distances[1]", ctxt))
729         return 0;
730 
731     if ((sibling = virXPathNodeSet("./distances[1]/sibling", ctxt, &nodes)) <= 0) {
732         virReportError(VIR_ERR_XML_ERROR, "%s",
733                        _("NUMA distances defined without siblings"));
734         goto cleanup;
735     }
736 
737     for (i = 0; i < sibling; i++) {
738         virNumaDistance *ldist;
739         virNumaDistance *rdist;
740         unsigned int sibling_id, sibling_value;
741 
742         if (virXMLPropUInt(nodes[i], "id", 10, VIR_XML_PROP_REQUIRED,
743                            &sibling_id) < 0)
744             goto cleanup;
745 
746         /* The "id" needs to be within numa/cell range */
747         if (sibling_id >= ndistances) {
748             virReportError(VIR_ERR_XML_ERROR,
749                            _("'sibling_id %d' does not refer to a "
750                              "valid cell within NUMA 'cell id %d'"),
751                            sibling_id, cur_cell);
752             goto cleanup;
753         }
754 
755         if (virXMLPropUInt(nodes[i], "value", 10, VIR_XML_PROP_REQUIRED,
756                            &sibling_value) < 0)
757             goto cleanup;
758 
759         /* Assure LOCAL_DISTANCE <= "value" <= UNREACHABLE
760          * and correct LOCAL_DISTANCE setting if such applies.
761          */
762         if ((sibling_value < LOCAL_DISTANCE ||
763              sibling_value > UNREACHABLE) ||
764             (sibling_id == cur_cell &&
765              sibling_value != LOCAL_DISTANCE) ||
766             (sibling_id != cur_cell &&
767              sibling_value == LOCAL_DISTANCE)) {
768             virReportError(VIR_ERR_XML_ERROR,
769                            _("'value %d' is invalid for "
770                              "'sibling id %d' under NUMA 'cell id %d'"),
771                            sibling_value, sibling_id, cur_cell);
772             goto cleanup;
773         }
774 
775         /* Apply the local / remote distance */
776         ldist = def->mem_nodes[cur_cell].distances;
777         if (!ldist) {
778             ldist = g_new0(virNumaDistance, ndistances);
779             ldist[cur_cell].value = LOCAL_DISTANCE;
780             ldist[cur_cell].cellid = cur_cell;
781             def->mem_nodes[cur_cell].ndistances = ndistances;
782             def->mem_nodes[cur_cell].distances = ldist;
783         }
784 
785         ldist[sibling_id].cellid = sibling_id;
786         ldist[sibling_id].value = sibling_value;
787 
788         /* Apply symmetry if none given */
789         rdist = def->mem_nodes[sibling_id].distances;
790         if (!rdist) {
791             rdist = g_new0(virNumaDistance, ndistances);
792             rdist[sibling_id].value = LOCAL_DISTANCE;
793             rdist[sibling_id].cellid = sibling_id;
794             def->mem_nodes[sibling_id].ndistances = ndistances;
795             def->mem_nodes[sibling_id].distances = rdist;
796         }
797 
798         rdist[cur_cell].cellid = cur_cell;
799         if (!rdist[cur_cell].value)
800             rdist[cur_cell].value = sibling_value;
801     }
802 
803     ret = 0;
804 
805  cleanup:
806     if (ret < 0) {
807         for (i = 0; i < ndistances; i++)
808             VIR_FREE(def->mem_nodes[i].distances);
809         def->mem_nodes[i].ndistances = 0;
810     }
811     VIR_FREE(nodes);
812 
813     return ret;
814 }
815 
816 
817 static int
virDomainNumaDefNodeCacheParseXML(virDomainNuma * def,xmlXPathContextPtr ctxt,unsigned int cur_cell)818 virDomainNumaDefNodeCacheParseXML(virDomainNuma *def,
819                                   xmlXPathContextPtr ctxt,
820                                   unsigned int cur_cell)
821 {
822     g_autofree xmlNodePtr *nodes = NULL;
823     int n;
824     size_t i;
825 
826     if ((n = virXPathNodeSet("./cache", ctxt, &nodes)) < 0)
827         return -1;
828 
829     def->mem_nodes[cur_cell].caches = g_new0(virNumaCache, n);
830 
831     for (i = 0; i < n; i++) {
832         VIR_XPATH_NODE_AUTORESTORE(ctxt)
833         virNumaCache *cache = &def->mem_nodes[cur_cell].caches[i];
834         g_autofree char *tmp = NULL;
835         unsigned int level;
836         int associativity;
837         int policy;
838         unsigned long long size;
839         unsigned long long line;
840 
841         if (!(tmp = virXMLPropString(nodes[i], "level"))) {
842             virReportError(VIR_ERR_XML_ERROR,
843                            _("Missing 'level' attribute in cache "
844                              "element for NUMA node %d"),
845                            cur_cell);
846             return -1;
847         }
848 
849         if (virStrToLong_uip(tmp, NULL, 10, &level) < 0 ||
850             level == 0) {
851             virReportError(VIR_ERR_XML_ERROR,
852                            _("Invalid 'level' attribute in cache "
853                              "element for NUMA node %d"),
854                            cur_cell);
855             return -1;
856         }
857         VIR_FREE(tmp);
858 
859         if (!(tmp = virXMLPropString(nodes[i], "associativity"))) {
860             virReportError(VIR_ERR_XML_ERROR,
861                            _("Missing 'associativity' attribute in cache "
862                              "element for NUMA node %d"),
863                            cur_cell);
864             return -1;
865         }
866 
867         if ((associativity = virNumaCacheAssociativityTypeFromString(tmp)) < 0) {
868             virReportError(VIR_ERR_XML_ERROR,
869                            _("Invalid cache associativity '%s'"),
870                            tmp);
871             return -1;
872         }
873         VIR_FREE(tmp);
874 
875         if (!(tmp = virXMLPropString(nodes[i], "policy"))) {
876             virReportError(VIR_ERR_XML_ERROR,
877                            _("Missing 'policy' attribute in cache "
878                              "element for NUMA node %d"),
879                            cur_cell);
880         }
881 
882         if ((policy = virNumaCachePolicyTypeFromString(tmp)) < 0) {
883             virReportError(VIR_ERR_XML_ERROR,
884                            _("Invalid cache policy '%s'"),
885                            tmp);
886             return -1;
887         }
888         VIR_FREE(tmp);
889 
890         ctxt->node = nodes[i];
891         if (virDomainParseMemory("./size/@value", "./size/unit",
892                                  ctxt, &size, true, false) < 0)
893             return -1;
894 
895         if (virParseScaledValue("./line/@value", "./line/unit",
896                                 ctxt, &line, 1, ULLONG_MAX, true) < 0)
897             return -1;
898 
899         *cache = (virNumaCache){level, size, line, associativity, policy};
900         def->mem_nodes[cur_cell].ncaches++;
901     }
902 
903     return 0;
904 }
905 
906 
907 int
virDomainNumaDefParseXML(virDomainNuma * def,xmlXPathContextPtr ctxt)908 virDomainNumaDefParseXML(virDomainNuma *def,
909                          xmlXPathContextPtr ctxt)
910 {
911     g_autofree xmlNodePtr *cell = NULL;
912     g_autofree xmlNodePtr *interconnect = NULL;
913 
914     int n;
915     size_t i, j;
916 
917     /* check if NUMA definition is present */
918     if (!virXPathNode("./cpu/numa[1]", ctxt))
919         return 0;
920 
921     if ((n = virXPathNodeSet("./cpu/numa[1]/cell", ctxt, &cell)) <= 0) {
922         virReportError(VIR_ERR_XML_ERROR, "%s",
923                        _("NUMA topology defined without NUMA cells"));
924         return -1;
925     }
926 
927     def->mem_nodes = g_new0(struct _virDomainNumaNode, n);
928     def->nmem_nodes = n;
929 
930     for (i = 0; i < n; i++) {
931         VIR_XPATH_NODE_AUTORESTORE(ctxt)
932         g_autofree char *tmp = NULL;
933         int rc;
934         unsigned int cur_cell;
935 
936         if ((rc = virXMLPropUInt(cell[i], "id", 10, VIR_XML_PROP_NONE,
937                                  &cur_cell)) < 0)
938             return -1;
939 
940         if (rc == 0)
941             cur_cell = i;
942 
943         /* cells are in order of parsing or explicitly numbered */
944         if (cur_cell >= n) {
945             virReportError(VIR_ERR_XML_ERROR, "%s",
946                            _("Exactly one 'cell' element per guest "
947                              "NUMA cell allowed, non-contiguous ranges or "
948                              "ranges not starting from 0 are not allowed"));
949             return -1;
950         }
951 
952         if (def->mem_nodes[cur_cell].mem) {
953             virReportError(VIR_ERR_XML_ERROR,
954                            _("Duplicate NUMA cell info for cell id '%u'"),
955                            cur_cell);
956             return -1;
957         }
958 
959         if ((tmp = virXMLPropString(cell[i], "cpus"))) {
960             g_autoptr(virBitmap) cpumask = NULL;
961 
962             if (virBitmapParse(tmp, &cpumask, VIR_DOMAIN_CPUMASK_LEN) < 0)
963                 return -1;
964 
965             if (!virBitmapIsAllClear(cpumask))
966                 def->mem_nodes[cur_cell].cpumask = g_steal_pointer(&cpumask);
967         }
968 
969         for (j = 0; j < n; j++) {
970             if (j == cur_cell ||
971                 !def->mem_nodes[j].cpumask ||
972                 !def->mem_nodes[cur_cell].cpumask)
973                 continue;
974 
975             if (virBitmapOverlaps(def->mem_nodes[j].cpumask,
976                                   def->mem_nodes[cur_cell].cpumask)) {
977                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
978                                _("NUMA cells %u and %zu have overlapping vCPU ids"),
979                                cur_cell, j);
980                 return -1;
981             }
982         }
983 
984         ctxt->node = cell[i];
985         if (virDomainParseMemory("./@memory", "./@unit", ctxt,
986                                  &def->mem_nodes[cur_cell].mem, true, false) < 0)
987             return -1;
988 
989         if (virXMLPropEnum(cell[i], "memAccess",
990                            virDomainMemoryAccessTypeFromString,
991                            VIR_XML_PROP_NONZERO,
992                            &def->mem_nodes[cur_cell].memAccess) < 0)
993             return -1;
994 
995         if (virXMLPropTristateBool(cell[i], "discard", VIR_XML_PROP_NONE,
996                                    &def->mem_nodes[cur_cell].discard) < 0)
997             return -1;
998 
999         /* Parse NUMA distances info */
1000         if (virDomainNumaDefNodeDistanceParseXML(def, ctxt, cur_cell) < 0)
1001             return -1;
1002 
1003         /* Parse cache info */
1004         if (virDomainNumaDefNodeCacheParseXML(def, ctxt, cur_cell) < 0)
1005             return -1;
1006     }
1007 
1008     if ((n = virXPathNodeSet("./cpu/numa[1]/interconnects[1]/latency|"
1009                              "./cpu/numa[1]/interconnects[1]/bandwidth", ctxt,
1010                              &interconnect)) < 0)
1011         return -1;
1012 
1013     def->interconnects = g_new0(virNumaInterconnect, n);
1014     for (i = 0; i < n; i++) {
1015         virNumaInterconnectType type;
1016         unsigned int initiator;
1017         unsigned int target;
1018         unsigned int cache = 0;
1019         virMemoryLatency accessType;
1020         unsigned long long value;
1021 
1022         if (virXMLNodeNameEqual(interconnect[i], "latency")) {
1023             type = VIR_NUMA_INTERCONNECT_TYPE_LATENCY;
1024 
1025             if (virXMLPropULongLong(interconnect[i], "value", 10,
1026                                     VIR_XML_PROP_REQUIRED, &value) < 0)
1027                 return -1;
1028         } else if (virXMLNodeNameEqual(interconnect[i], "bandwidth")) {
1029             VIR_XPATH_NODE_AUTORESTORE(ctxt)
1030             type = VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH;
1031 
1032             ctxt->node = interconnect[i];
1033 
1034             if (virDomainParseMemory("./@value", "./@unit", ctxt, &value, true, false) < 0)
1035                 return -1;
1036         } else {
1037             /* Ignore yet unknown child elements. */
1038             continue;
1039         }
1040 
1041         if (virXMLPropUInt(interconnect[i], "initiator", 10, VIR_XML_PROP_REQUIRED,
1042                            &initiator) < 0)
1043             return -1;
1044 
1045         if (virXMLPropUInt(interconnect[i], "target", 10, VIR_XML_PROP_REQUIRED,
1046                            &target) < 0)
1047             return -1;
1048 
1049         if (virXMLPropUInt(interconnect[i], "cache", 10, VIR_XML_PROP_NONE,
1050                            &cache) < 0)
1051             return -1;
1052 
1053         if (virXMLPropEnum(interconnect[i], "type",
1054                            virMemoryLatencyTypeFromString,
1055                            VIR_XML_PROP_REQUIRED | VIR_XML_PROP_NONZERO,
1056                            &accessType) < 0)
1057             return -1;
1058 
1059         def->interconnects[i] = (virNumaInterconnect) {type, initiator, target,
1060                                                        cache, accessType, value};
1061         def->ninterconnects++;
1062     }
1063 
1064     return 0;
1065 }
1066 
1067 
1068 int
virDomainNumaDefFormatXML(virBuffer * buf,virDomainNuma * def)1069 virDomainNumaDefFormatXML(virBuffer *buf,
1070                           virDomainNuma *def)
1071 {
1072     virDomainMemoryAccess memAccess;
1073     virTristateBool discard;
1074     size_t ncells = virDomainNumaGetNodeCount(def);
1075     size_t i;
1076 
1077     if (ncells == 0)
1078         return 0;
1079 
1080     virBufferAddLit(buf, "<numa>\n");
1081     virBufferAdjustIndent(buf, 2);
1082     for (i = 0; i < ncells; i++) {
1083         virBitmap *cpumask = virDomainNumaGetNodeCpumask(def, i);
1084         g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
1085         g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
1086 
1087         memAccess = virDomainNumaGetNodeMemoryAccessMode(def, i);
1088         discard = virDomainNumaGetNodeDiscard(def, i);
1089 
1090         virBufferAsprintf(&attrBuf, " id='%zu'", i);
1091 
1092         if (cpumask) {
1093             g_autofree char *cpustr = virBitmapFormat(cpumask);
1094 
1095             if (!cpustr)
1096                 return -1;
1097             virBufferAsprintf(&attrBuf, " cpus='%s'", cpustr);
1098         }
1099         virBufferAsprintf(&attrBuf, " memory='%llu'",
1100                           virDomainNumaGetNodeMemorySize(def, i));
1101         virBufferAddLit(&attrBuf, " unit='KiB'");
1102         if (memAccess)
1103             virBufferAsprintf(&attrBuf, " memAccess='%s'",
1104                               virDomainMemoryAccessTypeToString(memAccess));
1105 
1106         if (discard)
1107             virBufferAsprintf(&attrBuf, " discard='%s'",
1108                               virTristateBoolTypeToString(discard));
1109 
1110         virNumaDistanceFormat(&childBuf,
1111                               def->mem_nodes[i].distances,
1112                               def->mem_nodes[i].ndistances);
1113 
1114         virNumaCacheFormat(&childBuf,
1115                            def->mem_nodes[i].caches,
1116                            def->mem_nodes[i].ncaches);
1117 
1118         virXMLFormatElement(buf, "cell", &attrBuf, &childBuf);
1119     }
1120 
1121     virNumaInterconnectFormat(buf, def->interconnects, def->ninterconnects);
1122 
1123     virBufferAdjustIndent(buf, -2);
1124     virBufferAddLit(buf, "</numa>\n");
1125 
1126     return 0;
1127 }
1128 
1129 
1130 int
virDomainNumaDefValidate(const virDomainNuma * def)1131 virDomainNumaDefValidate(const virDomainNuma *def)
1132 {
1133     size_t i;
1134     size_t j;
1135 
1136     if (!def)
1137         return 0;
1138 
1139     for (i = 0; i < def->nmem_nodes; i++) {
1140         const virDomainNumaNode *node = &def->mem_nodes[i];
1141         g_autoptr(virBitmap) levelsSeen = virBitmapNew(0);
1142 
1143         for (j = 0; j < node->ncaches; j++) {
1144             const virNumaCache *cache = &node->caches[j];
1145 
1146             /* Relax this if there's ever fourth layer of cache */
1147             if (cache->level > 3) {
1148                 virReportError(VIR_ERR_XML_ERROR, "%s",
1149                                _("Ain't nobody heard of that much cache level"));
1150                 return -1;
1151             }
1152 
1153             if (virBitmapIsBitSet(levelsSeen, cache->level)) {
1154                 virReportError(VIR_ERR_XML_ERROR,
1155                                _("Cache level '%u' already defined"),
1156                                cache->level);
1157                 return -1;
1158             }
1159 
1160             if (virBitmapSetBitExpand(levelsSeen, cache->level))
1161                 return -1;
1162         }
1163     }
1164 
1165     for (i = 0; i < def->ninterconnects; i++) {
1166         const virNumaInterconnect *l = &def->interconnects[i];
1167 
1168         if (l->initiator >= def->nmem_nodes) {
1169             virReportError(VIR_ERR_XML_ERROR, "%s",
1170                            _("'initiator' refers to a non-existent NUMA node"));
1171             return -1;
1172         }
1173 
1174         if (l->target >= def->nmem_nodes) {
1175             virReportError(VIR_ERR_XML_ERROR, "%s",
1176                            _("'target' refers to a non-existent NUMA node"));
1177             return -1;
1178         }
1179 
1180         if (!def->mem_nodes[l->initiator].cpumask) {
1181             virReportError(VIR_ERR_XML_ERROR, "%s",
1182                            _("NUMA nodes without CPUs can't be initiator"));
1183             return -1;
1184         }
1185 
1186         if (l->cache > 0) {
1187             for (j = 0; j < def->mem_nodes[l->target].ncaches; j++) {
1188                 const virNumaCache *cache = &def->mem_nodes[l->target].caches[j];
1189 
1190                 if (l->cache == cache->level)
1191                     break;
1192             }
1193 
1194             if (j == def->mem_nodes[l->target].ncaches) {
1195                 virReportError(VIR_ERR_XML_ERROR, "%s",
1196                                _("'cache' refers to a non-existent NUMA node cache"));
1197                 return -1;
1198             }
1199         }
1200 
1201         for (j = 0; j < i; j++) {
1202             const virNumaInterconnect *ll = &def->interconnects[j];
1203 
1204             if (l->type == ll->type &&
1205                 l->initiator == ll->initiator &&
1206                 l->target == ll->target &&
1207                 l->cache == ll->cache &&
1208                 l->accessType == ll->accessType) {
1209                 virReportError(VIR_ERR_XML_ERROR, "%s",
1210                                _("Duplicate info for NUMA latencies"));
1211                 return -1;
1212             }
1213 
1214 
1215             if (l->initiator != l->target &&
1216                 l->initiator == ll->target &&
1217                 l->target == ll->initiator) {
1218                 virReportError(VIR_ERR_XML_ERROR, "%s",
1219                                _("Link already defined"));
1220                 return -1;
1221             }
1222         }
1223     }
1224 
1225     return 0;
1226 }
1227 
1228 
1229 unsigned int
virDomainNumaGetCPUCountTotal(virDomainNuma * numa)1230 virDomainNumaGetCPUCountTotal(virDomainNuma *numa)
1231 {
1232     size_t i;
1233     unsigned int ret = 0;
1234 
1235     for (i = 0; i < numa->nmem_nodes; i++) {
1236         virBitmap *cpumask = virDomainNumaGetNodeCpumask(numa, i);
1237 
1238         if (cpumask)
1239             ret += virBitmapCountBits(cpumask);
1240     }
1241 
1242     return ret;
1243 }
1244 
1245 unsigned int
virDomainNumaGetMaxCPUID(virDomainNuma * numa)1246 virDomainNumaGetMaxCPUID(virDomainNuma *numa)
1247 {
1248     size_t i;
1249     unsigned int ret = 0;
1250 
1251     for (i = 0; i < numa->nmem_nodes; i++) {
1252         virBitmap *cpumask = virDomainNumaGetNodeCpumask(numa, i);
1253         int bit;
1254 
1255         if (cpumask) {
1256             bit = virBitmapLastSetBit(cpumask);
1257             if (bit > ret)
1258                 ret = bit;
1259         }
1260     }
1261 
1262     return ret;
1263 }
1264 
1265 
1266 virDomainNuma *
virDomainNumaNew(void)1267 virDomainNumaNew(void)
1268 {
1269     return g_new0(virDomainNuma, 1);
1270 }
1271 
1272 
1273 bool
virDomainNumaCheckABIStability(virDomainNuma * src,virDomainNuma * tgt)1274 virDomainNumaCheckABIStability(virDomainNuma *src,
1275                                virDomainNuma *tgt)
1276 {
1277     size_t i;
1278     size_t j;
1279 
1280     if (virDomainNumaGetNodeCount(src) != virDomainNumaGetNodeCount(tgt)) {
1281         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1282                        _("Target NUMA node count '%zu' doesn't match "
1283                          "source '%zu'"),
1284                        virDomainNumaGetNodeCount(tgt),
1285                        virDomainNumaGetNodeCount(src));
1286         return false;
1287     }
1288 
1289     for (i = 0; i < virDomainNumaGetNodeCount(src); i++) {
1290         if (virDomainNumaGetNodeMemorySize(src, i) !=
1291             virDomainNumaGetNodeMemorySize(tgt, i)) {
1292             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1293                            _("Size of target NUMA node %zu (%llu) doesn't "
1294                              "match source (%llu)"), i,
1295                            virDomainNumaGetNodeMemorySize(tgt, i),
1296                            virDomainNumaGetNodeMemorySize(src, i));
1297             return false;
1298         }
1299 
1300         if (!virBitmapEqual(virDomainNumaGetNodeCpumask(src, i),
1301                             virDomainNumaGetNodeCpumask(tgt, i))) {
1302             virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1303                            _("Processor mask of target NUMA node %zu doesn't "
1304                              "match source"), i);
1305             return false;
1306         }
1307 
1308         for (j = 0; j < virDomainNumaGetNodeCount(src); j++) {
1309             if (virDomainNumaGetNodeDistance(src, i, j) !=
1310                 virDomainNumaGetNodeDistance(tgt, i, j)) {
1311                 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1312                                _("Target NUMA distance from %zu to %zu "
1313                                  "doesn't match source"), i, j);
1314 
1315                 return false;
1316             }
1317         }
1318     }
1319 
1320     return true;
1321 }
1322 
1323 
1324 size_t
virDomainNumaGetNodeCount(virDomainNuma * numa)1325 virDomainNumaGetNodeCount(virDomainNuma *numa)
1326 {
1327     if (!numa)
1328         return 0;
1329 
1330     return numa->nmem_nodes;
1331 }
1332 
1333 
1334 size_t
virDomainNumaSetNodeCount(virDomainNuma * numa,size_t nmem_nodes)1335 virDomainNumaSetNodeCount(virDomainNuma *numa, size_t nmem_nodes)
1336 {
1337     if (!nmem_nodes) {
1338         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1339                        _("Cannot set an empty mem_nodes set"));
1340         return 0;
1341     }
1342 
1343     if (numa->mem_nodes) {
1344         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1345                        _("Cannot alter an existing mem_nodes set"));
1346         return 0;
1347     }
1348 
1349     numa->mem_nodes = g_new0(struct _virDomainNumaNode, nmem_nodes);
1350 
1351     numa->nmem_nodes = nmem_nodes;
1352 
1353     return numa->nmem_nodes;
1354 }
1355 
1356 
1357 bool
virDomainNumaNodeDistanceIsUsingDefaults(virDomainNuma * numa,size_t node,size_t sibling)1358 virDomainNumaNodeDistanceIsUsingDefaults(virDomainNuma *numa,
1359                                          size_t node,
1360                                          size_t sibling)
1361 {
1362     if (node >= numa->nmem_nodes ||
1363         sibling >= numa->nmem_nodes)
1364         return false;
1365 
1366     if (!numa->mem_nodes[node].distances)
1367         return true;
1368 
1369     if (numa->mem_nodes[node].distances[sibling].value == LOCAL_DISTANCE ||
1370         numa->mem_nodes[node].distances[sibling].value == REMOTE_DISTANCE)
1371         return true;
1372 
1373     return false;
1374 }
1375 
1376 
1377 bool
virDomainNumaNodesDistancesAreBeingSet(virDomainNuma * numa)1378 virDomainNumaNodesDistancesAreBeingSet(virDomainNuma *numa)
1379 {
1380     size_t ncells = virDomainNumaGetNodeCount(numa);
1381     size_t i, j;
1382 
1383     for (i = 0; i < ncells; i++) {
1384         for (j = 0; j < ncells; j++) {
1385             if (virDomainNumaNodeDistanceIsUsingDefaults(numa, i, j))
1386                 continue;
1387 
1388             return true;
1389         }
1390     }
1391 
1392     return false;
1393 }
1394 
1395 
1396 size_t
virDomainNumaGetNodeDistance(virDomainNuma * numa,size_t node,size_t cellid)1397 virDomainNumaGetNodeDistance(virDomainNuma *numa,
1398                              size_t node,
1399                              size_t cellid)
1400 {
1401     virNumaDistance *distances = NULL;
1402 
1403     if (node < numa->nmem_nodes)
1404         distances = numa->mem_nodes[node].distances;
1405 
1406     /*
1407      * Present the configured distance value. If
1408      * out of range or not available set the platform
1409      * defined default for local and remote nodes.
1410      */
1411     if (!distances ||
1412         cellid >= numa->nmem_nodes ||
1413         !distances[cellid].value)
1414         return (node == cellid) ? LOCAL_DISTANCE : REMOTE_DISTANCE;
1415 
1416     return distances[cellid].value;
1417 }
1418 
1419 
1420 int
virDomainNumaSetNodeDistance(virDomainNuma * numa,size_t node,size_t cellid,unsigned int value)1421 virDomainNumaSetNodeDistance(virDomainNuma *numa,
1422                              size_t node,
1423                              size_t cellid,
1424                              unsigned int value)
1425 {
1426     virNumaDistance *distances;
1427 
1428     if (node >= numa->nmem_nodes) {
1429         virReportError(VIR_ERR_INTERNAL_ERROR,
1430                        _("Argument 'node' %zu outranges "
1431                          "defined number of NUMA nodes"),
1432                        node);
1433         return -1;
1434     }
1435 
1436     distances = numa->mem_nodes[node].distances;
1437     if (!distances ||
1438         cellid >= numa->mem_nodes[node].ndistances) {
1439         virReportError(VIR_ERR_XML_ERROR, "%s",
1440                        _("Arguments under memnode element do not "
1441                          "correspond with existing guest's NUMA cell"));
1442         return -1;
1443     }
1444 
1445     /*
1446      * Advanced Configuration and Power Interface
1447      * Specification version 6.1. Chapter 5.2.17
1448      * System Locality Distance Information Table
1449      * ... Distance values of 0-9 are reserved.
1450      */
1451     if (value < LOCAL_DISTANCE ||
1452         value > UNREACHABLE) {
1453         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1454                        _("Distance value of %d is not in valid range"),
1455                        value);
1456         return -1;
1457     }
1458 
1459     if (value == LOCAL_DISTANCE && node != cellid) {
1460         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1461                        _("Distance value %d under node %zu is "
1462                          "LOCAL_DISTANCE and should be set to 10"),
1463                        value, node);
1464         return -1;
1465     }
1466 
1467     distances[cellid].cellid = cellid;
1468     distances[cellid].value = value;
1469 
1470     return distances[cellid].value;
1471 }
1472 
1473 
1474 size_t
virDomainNumaSetNodeDistanceCount(virDomainNuma * numa,size_t node,size_t ndistances)1475 virDomainNumaSetNodeDistanceCount(virDomainNuma *numa,
1476                                   size_t node,
1477                                   size_t ndistances)
1478 {
1479     virNumaDistance *distances;
1480 
1481     distances = numa->mem_nodes[node].distances;
1482     if (distances) {
1483         virReportError(VIR_ERR_INTERNAL_ERROR,
1484                        _("Cannot alter an existing nmem_nodes distances set for node: %zu"),
1485                        node);
1486         return 0;
1487     }
1488 
1489     distances = g_new0(virNumaDistance, ndistances);
1490 
1491     numa->mem_nodes[node].distances = distances;
1492     numa->mem_nodes[node].ndistances = ndistances;
1493 
1494     return numa->mem_nodes[node].ndistances;
1495 }
1496 
1497 
1498 virBitmap *
virDomainNumaGetNodeCpumask(virDomainNuma * numa,size_t node)1499 virDomainNumaGetNodeCpumask(virDomainNuma *numa,
1500                             size_t node)
1501 {
1502     return numa->mem_nodes[node].cpumask;
1503 }
1504 
1505 
1506 void
virDomainNumaSetNodeCpumask(virDomainNuma * numa,size_t node,virBitmap * cpumask)1507 virDomainNumaSetNodeCpumask(virDomainNuma *numa,
1508                             size_t node,
1509                             virBitmap *cpumask)
1510 {
1511     numa->mem_nodes[node].cpumask = cpumask;
1512 }
1513 
1514 
1515 virDomainMemoryAccess
virDomainNumaGetNodeMemoryAccessMode(virDomainNuma * numa,size_t node)1516 virDomainNumaGetNodeMemoryAccessMode(virDomainNuma *numa,
1517                                      size_t node)
1518 {
1519     return numa->mem_nodes[node].memAccess;
1520 }
1521 
1522 
1523 virTristateBool
virDomainNumaGetNodeDiscard(virDomainNuma * numa,size_t node)1524 virDomainNumaGetNodeDiscard(virDomainNuma *numa,
1525                             size_t node)
1526 {
1527     return numa->mem_nodes[node].discard;
1528 }
1529 
1530 
1531 unsigned long long
virDomainNumaGetNodeMemorySize(virDomainNuma * numa,size_t node)1532 virDomainNumaGetNodeMemorySize(virDomainNuma *numa,
1533                                size_t node)
1534 {
1535     return numa->mem_nodes[node].mem;
1536 }
1537 
1538 
1539 void
virDomainNumaSetNodeMemorySize(virDomainNuma * numa,size_t node,unsigned long long size)1540 virDomainNumaSetNodeMemorySize(virDomainNuma *numa,
1541                                size_t node,
1542                                unsigned long long size)
1543 {
1544     numa->mem_nodes[node].mem = size;
1545 }
1546 
1547 
1548 unsigned long long
virDomainNumaGetMemorySize(virDomainNuma * numa)1549 virDomainNumaGetMemorySize(virDomainNuma *numa)
1550 {
1551     size_t i;
1552     unsigned long long ret = 0;
1553 
1554     for (i = 0; i < numa->nmem_nodes; i++)
1555         ret += numa->mem_nodes[i].mem;
1556 
1557     return ret;
1558 }
1559 
1560 
1561 int
virDomainNumaFillCPUsInNode(virDomainNuma * numa,size_t node,unsigned int maxCpus)1562 virDomainNumaFillCPUsInNode(virDomainNuma *numa,
1563                             size_t node,
1564                             unsigned int maxCpus)
1565 {
1566     g_autoptr(virBitmap) maxCPUsBitmap = virBitmapNew(maxCpus);
1567     size_t i;
1568 
1569     if (node >= virDomainNumaGetNodeCount(numa))
1570         return -1;
1571 
1572     virBitmapSetAll(maxCPUsBitmap);
1573 
1574     for (i = 0; i < numa->nmem_nodes; i++) {
1575         virBitmap *nodeCpus = virDomainNumaGetNodeCpumask(numa, i);
1576 
1577         if (i == node || !nodeCpus)
1578             continue;
1579 
1580         virBitmapSubtract(maxCPUsBitmap, nodeCpus);
1581     }
1582 
1583     if (!virBitmapEqual(numa->mem_nodes[node].cpumask, maxCPUsBitmap)) {
1584         virBitmapFree(numa->mem_nodes[node].cpumask);
1585         numa->mem_nodes[node].cpumask = g_steal_pointer(&maxCPUsBitmap);
1586     }
1587 
1588     return 0;
1589 }
1590 
1591 
1592 bool
virDomainNumaHasHMAT(const virDomainNuma * numa)1593 virDomainNumaHasHMAT(const virDomainNuma *numa)
1594 {
1595     size_t i;
1596 
1597     if (!numa)
1598         return false;
1599 
1600     if (numa->ninterconnects)
1601         return true;
1602 
1603     for (i = 0; i < numa->nmem_nodes; i++) {
1604         if (numa->mem_nodes[i].ncaches)
1605             return true;
1606     }
1607 
1608     return false;
1609 }
1610 
1611 
1612 size_t
virDomainNumaGetNodeCacheCount(const virDomainNuma * numa,size_t node)1613 virDomainNumaGetNodeCacheCount(const virDomainNuma *numa,
1614                                size_t node)
1615 {
1616     if (!numa || node >= numa->nmem_nodes)
1617         return 0;
1618 
1619     return numa->mem_nodes[node].ncaches;
1620 }
1621 
1622 
1623 int
virDomainNumaGetNodeCache(const virDomainNuma * numa,size_t node,size_t cache,unsigned int * level,unsigned int * size,unsigned int * line,virNumaCacheAssociativity * associativity,virNumaCachePolicy * policy)1624 virDomainNumaGetNodeCache(const virDomainNuma *numa,
1625                           size_t node,
1626                           size_t cache,
1627                           unsigned int *level,
1628                           unsigned int *size,
1629                           unsigned int *line,
1630                           virNumaCacheAssociativity *associativity,
1631                           virNumaCachePolicy *policy)
1632 {
1633     const virDomainNumaNode *cell;
1634 
1635     if (!numa || node >= numa->nmem_nodes)
1636         return -1;
1637 
1638     cell = &numa->mem_nodes[node];
1639 
1640     if (cache >= cell->ncaches)
1641         return -1;
1642 
1643     *level = cell->caches[cache].level;
1644     *size = cell->caches[cache].size;
1645     *line = cell->caches[cache].line;
1646     *associativity = cell->caches[cache].associativity;
1647     *policy = cell->caches[cache].policy;
1648     return 0;
1649 }
1650 
1651 
1652 ssize_t
virDomainNumaGetNodeInitiator(const virDomainNuma * numa,size_t node)1653 virDomainNumaGetNodeInitiator(const virDomainNuma *numa,
1654                               size_t node)
1655 {
1656     size_t i;
1657     unsigned int maxBandwidth = 0;
1658     ssize_t candidateBandwidth = -1;
1659     unsigned int minLatency = UINT_MAX;
1660     ssize_t candidateLatency = -1;
1661 
1662     if (!numa || node >= numa->nmem_nodes)
1663         return -1;
1664 
1665     /* A NUMA node which has at least one vCPU is initiator to itself by
1666      * definition. */
1667     if (numa->mem_nodes[node].cpumask)
1668         return node;
1669 
1670     /* For the rest, "NUMA node that has best performance (the lowest
1671      * latency or largest bandwidth) to this NUMA node." */
1672     for (i = 0; i < numa->ninterconnects; i++) {
1673         const virNumaInterconnect *l = &numa->interconnects[i];
1674 
1675         if (l->target != node)
1676             continue;
1677 
1678         switch (l->type) {
1679         case VIR_NUMA_INTERCONNECT_TYPE_LATENCY:
1680             if (l->value < minLatency) {
1681                 minLatency = l->value;
1682                 candidateLatency = l->initiator;
1683             }
1684             break;
1685 
1686         case VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH:
1687             if (l->value > maxBandwidth) {
1688                 maxBandwidth = l->value;
1689                 candidateBandwidth = l->initiator;
1690             }
1691             break;
1692         }
1693     }
1694 
1695     if (candidateLatency >= 0)
1696         return candidateLatency;
1697 
1698     return candidateBandwidth;
1699 }
1700 
1701 
1702 size_t
virDomainNumaGetInterconnectsCount(const virDomainNuma * numa)1703 virDomainNumaGetInterconnectsCount(const virDomainNuma *numa)
1704 {
1705     if (!numa)
1706         return 0;
1707 
1708     return numa->ninterconnects;
1709 }
1710 
1711 
1712 int
virDomainNumaGetInterconnect(const virDomainNuma * numa,size_t i,virNumaInterconnectType * type,unsigned int * initiator,unsigned int * target,unsigned int * cache,virMemoryLatency * accessType,unsigned long * value)1713 virDomainNumaGetInterconnect(const virDomainNuma *numa,
1714                              size_t i,
1715                              virNumaInterconnectType *type,
1716                              unsigned int *initiator,
1717                              unsigned int *target,
1718                              unsigned int *cache,
1719                              virMemoryLatency *accessType,
1720                              unsigned long *value)
1721 {
1722     const virNumaInterconnect *l;
1723 
1724     if (!numa || i >= numa->ninterconnects)
1725         return -1;
1726 
1727     l = &numa->interconnects[i];
1728     *type = l->type;
1729     *initiator = l->initiator;
1730     *target = l->target;
1731     *cache = l->cache;
1732     *accessType = l->accessType;
1733     *value = l->value;
1734     return 0;
1735 }
1736 
1737 
1738 void
virNumaDistanceFormat(virBuffer * buf,const virNumaDistance * distances,size_t ndistances)1739 virNumaDistanceFormat(virBuffer *buf,
1740                       const virNumaDistance *distances,
1741                       size_t ndistances)
1742 {
1743     g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
1744     size_t i;
1745 
1746     for (i = 0; i < ndistances; i++) {
1747         if (distances[i].value == 0)
1748             continue;
1749         virBufferAddLit(&childBuf, "<sibling");
1750         virBufferAsprintf(&childBuf, " id='%d'", distances[i].cellid);
1751         virBufferAsprintf(&childBuf, " value='%d'", distances[i].value);
1752         virBufferAddLit(&childBuf, "/>\n");
1753     }
1754 
1755     virXMLFormatElement(buf, "distances", NULL, &childBuf);
1756 }
1757 
1758 
1759 void
virNumaCacheFormat(virBuffer * buf,const virNumaCache * caches,size_t ncaches)1760 virNumaCacheFormat(virBuffer *buf,
1761                    const virNumaCache *caches,
1762                    size_t ncaches)
1763 {
1764     size_t i;
1765 
1766     for (i = 0; i < ncaches; i++) {
1767         const virNumaCache *cache = &caches[i];
1768         g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
1769         g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
1770 
1771         virBufferAsprintf(&attrBuf, " level='%u'", cache->level);
1772         if (cache->associativity) {
1773             virBufferAsprintf(&attrBuf, " associativity='%s'",
1774                               virNumaCacheAssociativityTypeToString(cache->associativity));
1775         }
1776 
1777         if (cache->policy) {
1778             virBufferAsprintf(&attrBuf, " policy='%s'",
1779                               virNumaCachePolicyTypeToString(cache->policy));
1780         }
1781 
1782         virBufferAsprintf(&childBuf,
1783                           "<size value='%u' unit='KiB'/>\n",
1784                           cache->size);
1785 
1786         if (cache->line) {
1787             virBufferAsprintf(&childBuf,
1788                               "<line value='%u' unit='B'/>\n",
1789                               cache->line);
1790         }
1791 
1792         virXMLFormatElement(buf, "cache", &attrBuf, &childBuf);
1793     }
1794 }
1795 
1796 
1797 void
virNumaInterconnectFormat(virBuffer * buf,const virNumaInterconnect * interconnects,size_t ninterconnects)1798 virNumaInterconnectFormat(virBuffer *buf,
1799                           const virNumaInterconnect *interconnects,
1800                           size_t ninterconnects)
1801 {
1802     g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
1803     size_t i;
1804 
1805     for (i = 0; i < ninterconnects; i++) {
1806         const virNumaInterconnect *l = &interconnects[i];
1807         g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
1808         const char *elem = NULL;
1809 
1810         switch (l->type) {
1811         case VIR_NUMA_INTERCONNECT_TYPE_LATENCY:
1812             elem = "latency";
1813             break;
1814         case VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH:
1815             elem = "bandwidth";
1816             break;
1817         }
1818 
1819         virBufferAsprintf(&attrBuf,
1820                           " initiator='%u' target='%u'",
1821                           l->initiator, l->target);
1822 
1823         if (l->cache > 0) {
1824             virBufferAsprintf(&attrBuf,
1825                               " cache='%u'",
1826                               l->cache);
1827         }
1828 
1829         virBufferAsprintf(&attrBuf,
1830                           " type='%s' value='%lu'",
1831                           virMemoryLatencyTypeToString(l->accessType),
1832                           l->value);
1833 
1834         if (l->type == VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH)
1835             virBufferAddLit(&attrBuf, " unit='KiB'");
1836 
1837         virXMLFormatElement(&childBuf, elem, &attrBuf, NULL);
1838     }
1839 
1840     virXMLFormatElement(buf, "interconnects", NULL, &childBuf);
1841 }
1842