1 /*
2 * numa_conf.c
3 *
4 * Copyright (C) 2014-2015 Red Hat, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library. If not, see
18 * <http://www.gnu.org/licenses/>.
19 */
20
21 #include <config.h>
22
23 #include "numa_conf.h"
24
25 #include "domain_conf.h"
26 #include "viralloc.h"
27 #include "virnuma.h"
28 #include "virstring.h"
29
30 /*
31 * Distance definitions defined Conform ACPI 2.0 SLIT.
32 * See include/linux/topology.h
33 */
34 #define LOCAL_DISTANCE 10
35 #define REMOTE_DISTANCE 20
36 /* SLIT entry value is a one-byte unsigned integer. */
37 #define UNREACHABLE 255
38
39 #define VIR_FROM_THIS VIR_FROM_DOMAIN
40
41 VIR_ENUM_IMPL(virDomainNumatuneMemMode,
42 VIR_DOMAIN_NUMATUNE_MEM_LAST,
43 "strict",
44 "preferred",
45 "interleave",
46 "restrictive",
47 );
48
49 VIR_ENUM_IMPL(virDomainNumatunePlacement,
50 VIR_DOMAIN_NUMATUNE_PLACEMENT_LAST,
51 "default",
52 "static",
53 "auto",
54 );
55
56 VIR_ENUM_IMPL(virDomainMemoryAccess,
57 VIR_DOMAIN_MEMORY_ACCESS_LAST,
58 "default",
59 "shared",
60 "private",
61 );
62
63 VIR_ENUM_IMPL(virNumaCacheAssociativity,
64 VIR_NUMA_CACHE_ASSOCIATIVITY_LAST,
65 "none",
66 "direct",
67 "full",
68 );
69
70 VIR_ENUM_IMPL(virNumaCachePolicy,
71 VIR_NUMA_CACHE_POLICY_LAST,
72 "none",
73 "writeback",
74 "writethrough",
75 );
76
77 VIR_ENUM_IMPL(virMemoryLatency,
78 VIR_MEMORY_LATENCY_LAST,
79 "none",
80 "access",
81 "read",
82 "write"
83 );
84
85 typedef struct _virDomainNumaNode virDomainNumaNode;
86
87 struct _virDomainNuma {
88 struct {
89 bool specified;
90 virBitmap *nodeset;
91 virDomainNumatuneMemMode mode;
92 virDomainNumatunePlacement placement;
93 } memory; /* pinning for all the memory */
94
95 struct _virDomainNumaNode {
96 unsigned long long mem; /* memory size in KiB */
97 virBitmap *cpumask; /* bitmap of vCPUs corresponding to the node */
98 virBitmap *nodeset; /* host memory nodes where this guest node resides */
99 virDomainNumatuneMemMode mode; /* memory mode selection */
100 virDomainMemoryAccess memAccess; /* shared memory access configuration */
101 virTristateBool discard; /* discard-data for memory-backend-file */
102
103 virNumaDistance *distances; /* remote node distances */
104 size_t ndistances;
105
106 virNumaCache *caches;
107 size_t ncaches;
108 } *mem_nodes; /* guest node configuration */
109 size_t nmem_nodes;
110
111 virNumaInterconnect *interconnects;
112 size_t ninterconnects;
113
114 /* Future NUMA tuning related stuff should go here. */
115 };
116
117
118 bool
virDomainNumatuneNodeSpecified(virDomainNuma * numatune,int cellid)119 virDomainNumatuneNodeSpecified(virDomainNuma *numatune,
120 int cellid)
121 {
122 if (numatune &&
123 cellid >= 0 &&
124 cellid < numatune->nmem_nodes)
125 return numatune->mem_nodes[cellid].nodeset;
126
127 return false;
128 }
129
130 static int
virDomainNumatuneNodeParseXML(virDomainNuma * numa,xmlXPathContextPtr ctxt)131 virDomainNumatuneNodeParseXML(virDomainNuma *numa,
132 xmlXPathContextPtr ctxt)
133 {
134 int n = 0;
135 size_t i = 0;
136 g_autofree xmlNodePtr *nodes = NULL;
137
138 if ((n = virXPathNodeSet("./numatune/memnode", ctxt, &nodes)) < 0) {
139 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
140 _("Cannot extract memnode nodes"));
141 return -1;
142 }
143
144 if (!n)
145 return 0;
146
147 if (numa->memory.specified &&
148 numa->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) {
149 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
150 _("Per-node binding is not compatible with "
151 "automatic NUMA placement."));
152 return -1;
153 }
154
155 if (!numa->nmem_nodes) {
156 virReportError(VIR_ERR_XML_ERROR, "%s",
157 _("Element 'memnode' is invalid without "
158 "any guest NUMA cells"));
159 return -1;
160 }
161
162 for (i = 0; i < n; i++) {
163 unsigned int cellid = 0;
164 virDomainNumaNode *mem_node = NULL;
165 xmlNodePtr cur_node = nodes[i];
166 g_autofree char *tmp = NULL;
167
168 if (virXMLPropUInt(cur_node, "cellid", 10, VIR_XML_PROP_REQUIRED,
169 &cellid) < 0)
170 return -1;
171
172 if (cellid >= numa->nmem_nodes) {
173 virReportError(VIR_ERR_XML_ERROR, "%s",
174 _("Argument 'cellid' in memnode element must "
175 "correspond to existing guest's NUMA cell"));
176 return -1;
177 }
178
179 mem_node = &numa->mem_nodes[cellid];
180
181 if (mem_node->nodeset) {
182 virReportError(VIR_ERR_XML_ERROR,
183 _("Multiple memnode elements with cellid %u"),
184 cellid);
185 return -1;
186 }
187
188 if (virXMLPropEnumDefault(cur_node, "mode",
189 virDomainNumatuneMemModeTypeFromString,
190 VIR_XML_PROP_NONE, &mem_node->mode,
191 VIR_DOMAIN_NUMATUNE_MEM_STRICT) < 0)
192 return -1;
193
194 if (numa->memory.mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE &&
195 mem_node->mode != VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) {
196 virReportError(VIR_ERR_XML_ERROR, "%s",
197 _("'restrictive' mode is required in memnode element "
198 "when mode is 'restrictive' in memory element"));
199 return -1;
200 }
201
202 tmp = virXMLPropString(cur_node, "nodeset");
203 if (!tmp) {
204 virReportError(VIR_ERR_XML_ERROR, "%s",
205 _("Missing required nodeset attribute "
206 "in memnode element"));
207 return -1;
208 }
209 if (virBitmapParse(tmp, &mem_node->nodeset, VIR_DOMAIN_CPUMASK_LEN) < 0)
210 return -1;
211
212 if (virBitmapIsAllClear(mem_node->nodeset)) {
213 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
214 _("Invalid value of 'nodeset': %s"), tmp);
215 return -1;
216 }
217 }
218
219 return 0;
220 }
221
222 int
virDomainNumatuneParseXML(virDomainNuma * numa,bool placement_static,xmlXPathContextPtr ctxt)223 virDomainNumatuneParseXML(virDomainNuma *numa,
224 bool placement_static,
225 xmlXPathContextPtr ctxt)
226 {
227 char *tmp = NULL;
228 int mode = -1;
229 int n = 0;
230 int placement = -1;
231 int ret = -1;
232 virBitmap *nodeset = NULL;
233 xmlNodePtr node = NULL;
234
235 if (virXPathInt("count(./numatune)", ctxt, &n) < 0) {
236 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
237 _("cannot extract numatune nodes"));
238 goto cleanup;
239 } else if (n > 1) {
240 virReportError(VIR_ERR_XML_ERROR, "%s",
241 _("only one numatune is supported"));
242 goto cleanup;
243 }
244
245 node = virXPathNode("./numatune/memory[1]", ctxt);
246
247 if (!placement_static && !node)
248 placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO;
249
250 if (node) {
251 if ((tmp = virXMLPropString(node, "mode")) &&
252 (mode = virDomainNumatuneMemModeTypeFromString(tmp)) < 0) {
253 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
254 _("Unsupported NUMA memory tuning mode '%s'"), tmp);
255 goto cleanup;
256 }
257 VIR_FREE(tmp);
258
259 if ((tmp = virXMLPropString(node, "placement")) &&
260 (placement = virDomainNumatunePlacementTypeFromString(tmp)) < 0) {
261 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
262 _("Unsupported NUMA memory placement mode '%s'"), tmp);
263 goto cleanup;
264 }
265 VIR_FREE(tmp);
266
267 tmp = virXMLPropString(node, "nodeset");
268 if (tmp) {
269 if (virBitmapParse(tmp, &nodeset, VIR_DOMAIN_CPUMASK_LEN) < 0)
270 goto cleanup;
271
272 if (virBitmapIsAllClear(nodeset)) {
273 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
274 _("Invalid value of 'nodeset': %s"), tmp);
275 goto cleanup;
276 }
277
278 VIR_FREE(tmp);
279 }
280 }
281
282 if (virDomainNumatuneSet(numa,
283 placement_static,
284 placement,
285 mode,
286 nodeset) < 0)
287 goto cleanup;
288
289 if (virDomainNumatuneNodeParseXML(numa, ctxt) < 0)
290 goto cleanup;
291
292 ret = 0;
293 cleanup:
294 virBitmapFree(nodeset);
295 VIR_FREE(tmp);
296 return ret;
297 }
298
299 int
virDomainNumatuneFormatXML(virBuffer * buf,virDomainNuma * numatune)300 virDomainNumatuneFormatXML(virBuffer *buf,
301 virDomainNuma *numatune)
302 {
303 const char *tmp = NULL;
304 char *nodeset = NULL;
305 bool nodesetSpecified = false;
306 size_t i = 0;
307
308 if (!numatune)
309 return 0;
310
311 for (i = 0; i < numatune->nmem_nodes; i++) {
312 if (numatune->mem_nodes[i].nodeset) {
313 nodesetSpecified = true;
314 break;
315 }
316 }
317
318 if (!nodesetSpecified && !numatune->memory.specified)
319 return 0;
320
321 virBufferAddLit(buf, "<numatune>\n");
322 virBufferAdjustIndent(buf, 2);
323
324 if (numatune->memory.specified) {
325 tmp = virDomainNumatuneMemModeTypeToString(numatune->memory.mode);
326 virBufferAsprintf(buf, "<memory mode='%s' ", tmp);
327
328 if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) {
329 if (!(nodeset = virBitmapFormat(numatune->memory.nodeset)))
330 return -1;
331 virBufferAsprintf(buf, "nodeset='%s'/>\n", nodeset);
332 VIR_FREE(nodeset);
333 } else if (numatune->memory.placement) {
334 tmp = virDomainNumatunePlacementTypeToString(numatune->memory.placement);
335 virBufferAsprintf(buf, "placement='%s'/>\n", tmp);
336 }
337 }
338
339 for (i = 0; i < numatune->nmem_nodes; i++) {
340 virDomainNumaNode *mem_node = &numatune->mem_nodes[i];
341
342 if (!mem_node->nodeset)
343 continue;
344
345 if (!(nodeset = virBitmapFormat(mem_node->nodeset)))
346 return -1;
347
348 virBufferAsprintf(buf,
349 "<memnode cellid='%zu' mode='%s' nodeset='%s'/>\n",
350 i,
351 virDomainNumatuneMemModeTypeToString(mem_node->mode),
352 nodeset);
353 VIR_FREE(nodeset);
354 }
355
356 virBufferAdjustIndent(buf, -2);
357 virBufferAddLit(buf, "</numatune>\n");
358 return 0;
359 }
360
361 void
virDomainNumaFree(virDomainNuma * numa)362 virDomainNumaFree(virDomainNuma *numa)
363 {
364 size_t i = 0;
365
366 if (!numa)
367 return;
368
369 virBitmapFree(numa->memory.nodeset);
370 for (i = 0; i < numa->nmem_nodes; i++) {
371 virBitmapFree(numa->mem_nodes[i].cpumask);
372 virBitmapFree(numa->mem_nodes[i].nodeset);
373
374 if (numa->mem_nodes[i].ndistances > 0)
375 g_free(numa->mem_nodes[i].distances);
376
377 g_free(numa->mem_nodes[i].caches);
378 }
379 g_free(numa->mem_nodes);
380
381 g_free(numa->interconnects);
382
383 g_free(numa);
384 }
385
386 /**
387 * virDomainNumatuneGetMode:
388 * @numatune: pointer to numatune definition
389 * @cellid: cell selector
390 * @mode: where to store the result
391 *
392 * Get the defined mode for domain's memory. It's safe to pass
393 * NULL to @mode if the return value is the only info needed.
394 *
395 * Returns: 0 on success (with @mode updated)
396 * -1 if no mode was defined in XML
397 */
virDomainNumatuneGetMode(virDomainNuma * numatune,int cellid,virDomainNumatuneMemMode * mode)398 int virDomainNumatuneGetMode(virDomainNuma *numatune,
399 int cellid,
400 virDomainNumatuneMemMode *mode)
401 {
402 virDomainNumatuneMemMode tmp_mode;
403
404 if (!numatune)
405 return -1;
406
407 if (virDomainNumatuneNodeSpecified(numatune, cellid))
408 tmp_mode = numatune->mem_nodes[cellid].mode;
409 else if (numatune->memory.specified)
410 tmp_mode = numatune->memory.mode;
411 else
412 return -1;
413
414 if (mode)
415 *mode = tmp_mode;
416
417 return 0;
418 }
419
420 virBitmap *
virDomainNumatuneGetNodeset(virDomainNuma * numatune,virBitmap * auto_nodeset,int cellid)421 virDomainNumatuneGetNodeset(virDomainNuma *numatune,
422 virBitmap *auto_nodeset,
423 int cellid)
424 {
425 if (!numatune)
426 return NULL;
427
428 if (numatune->memory.specified &&
429 numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)
430 return auto_nodeset;
431
432 if (virDomainNumatuneNodeSpecified(numatune, cellid))
433 return numatune->mem_nodes[cellid].nodeset;
434
435 if (!numatune->memory.specified)
436 return NULL;
437
438 return numatune->memory.nodeset;
439 }
440
441 char *
virDomainNumatuneFormatNodeset(virDomainNuma * numatune,virBitmap * auto_nodeset,int cellid)442 virDomainNumatuneFormatNodeset(virDomainNuma *numatune,
443 virBitmap *auto_nodeset,
444 int cellid)
445 {
446 return virBitmapFormat(virDomainNumatuneGetNodeset(numatune,
447 auto_nodeset,
448 cellid));
449 }
450
451
452 int
virDomainNumatuneMaybeGetNodeset(virDomainNuma * numatune,virBitmap * auto_nodeset,virBitmap ** retNodeset,int cellid)453 virDomainNumatuneMaybeGetNodeset(virDomainNuma *numatune,
454 virBitmap *auto_nodeset,
455 virBitmap **retNodeset,
456 int cellid)
457 {
458 *retNodeset = NULL;
459
460 if (!numatune)
461 return 0;
462
463 if (!virDomainNumatuneNodeSpecified(numatune, cellid) &&
464 !numatune->memory.specified)
465 return 0;
466
467 if (numatune->memory.specified &&
468 numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO &&
469 !auto_nodeset) {
470 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
471 _("Advice from numad is needed in case of "
472 "automatic numa placement"));
473 return -1;
474 }
475
476 *retNodeset = virDomainNumatuneGetNodeset(numatune, auto_nodeset, cellid);
477
478 return 0;
479 }
480
481
482 int
virDomainNumatuneMaybeFormatNodeset(virDomainNuma * numatune,virBitmap * auto_nodeset,char ** mask,int cellid)483 virDomainNumatuneMaybeFormatNodeset(virDomainNuma *numatune,
484 virBitmap *auto_nodeset,
485 char **mask,
486 int cellid)
487 {
488 virBitmap *nodeset;
489
490 if (virDomainNumatuneMaybeGetNodeset(numatune, auto_nodeset, &nodeset,
491 cellid) < 0)
492 return -1;
493
494 if (nodeset &&
495 !(*mask = virBitmapFormat(nodeset)))
496 return -1;
497
498 return 0;
499 }
500
501 int
virDomainNumatuneSet(virDomainNuma * numa,bool placement_static,int placement,int mode,virBitmap * nodeset)502 virDomainNumatuneSet(virDomainNuma *numa,
503 bool placement_static,
504 int placement,
505 int mode,
506 virBitmap *nodeset)
507 {
508 /* No need to do anything in this case */
509 if (mode == -1 && placement == -1 && !nodeset)
510 return 0;
511
512 if (!numa->memory.specified) {
513 if (mode == -1)
514 mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT;
515 if (placement == -1)
516 placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT;
517 }
518
519 /* Range checks */
520 if (mode != -1 &&
521 (mode < 0 || mode >= VIR_DOMAIN_NUMATUNE_MEM_LAST)) {
522 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
523 _("Unsupported numatune mode '%d'"),
524 mode);
525 return -1;
526 }
527
528 if (placement != -1 &&
529 (placement < 0 || placement >= VIR_DOMAIN_NUMATUNE_PLACEMENT_LAST)) {
530 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
531 _("Unsupported numatune placement '%d'"),
532 mode);
533 return -1;
534 }
535
536 if (mode != -1)
537 numa->memory.mode = mode;
538
539 if (nodeset) {
540 virBitmapFree(numa->memory.nodeset);
541 numa->memory.nodeset = virBitmapNewCopy(nodeset);
542
543 if (placement == -1)
544 placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC;
545 }
546
547 if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT) {
548 if (numa->memory.nodeset || placement_static)
549 placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC;
550 else
551 placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO;
552 }
553
554 if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC &&
555 !numa->memory.nodeset) {
556 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
557 _("nodeset for NUMA memory tuning must be set "
558 "if 'placement' is 'static'"));
559 return -1;
560 }
561
562 /* setting nodeset when placement auto is invalid */
563 if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO &&
564 numa->memory.nodeset) {
565 virBitmapFree(numa->memory.nodeset);
566 numa->memory.nodeset = NULL;
567 }
568
569 if (placement != -1)
570 numa->memory.placement = placement;
571
572 numa->memory.specified = true;
573
574 return 0;
575 }
576
577 static bool
virDomainNumaNodesEqual(virDomainNuma * n1,virDomainNuma * n2)578 virDomainNumaNodesEqual(virDomainNuma *n1,
579 virDomainNuma *n2)
580 {
581 size_t i = 0;
582
583 if (n1->nmem_nodes != n2->nmem_nodes)
584 return false;
585
586 for (i = 0; i < n1->nmem_nodes; i++) {
587 virDomainNumaNode *nd1 = &n1->mem_nodes[i];
588 virDomainNumaNode *nd2 = &n2->mem_nodes[i];
589
590 if (!nd1->nodeset && !nd2->nodeset)
591 continue;
592
593 if (nd1->mode != nd2->mode)
594 return false;
595
596 if (!virBitmapEqual(nd1->nodeset, nd2->nodeset))
597 return false;
598 }
599
600 return true;
601 }
602
603 bool
virDomainNumaEquals(virDomainNuma * n1,virDomainNuma * n2)604 virDomainNumaEquals(virDomainNuma *n1,
605 virDomainNuma *n2)
606 {
607 if (!n1 && !n2)
608 return true;
609
610 if (!n1 || !n2)
611 return false;
612
613 if (!n1->memory.specified && !n2->memory.specified)
614 return virDomainNumaNodesEqual(n1, n2);
615
616 if (!n1->memory.specified || !n2->memory.specified)
617 return false;
618
619 if (n1->memory.mode != n2->memory.mode)
620 return false;
621
622 if (n1->memory.placement != n2->memory.placement)
623 return false;
624
625 if (!virBitmapEqual(n1->memory.nodeset, n2->memory.nodeset))
626 return false;
627
628 return virDomainNumaNodesEqual(n1, n2);
629 }
630
631 bool
virDomainNumatuneHasPlacementAuto(virDomainNuma * numatune)632 virDomainNumatuneHasPlacementAuto(virDomainNuma *numatune)
633 {
634 if (!numatune)
635 return false;
636
637 if (!numatune->memory.specified)
638 return false;
639
640 if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)
641 return true;
642
643 return false;
644 }
645
646 bool
virDomainNumatuneHasPerNodeBinding(virDomainNuma * numatune)647 virDomainNumatuneHasPerNodeBinding(virDomainNuma *numatune)
648 {
649 size_t i = 0;
650
651 if (!numatune)
652 return false;
653
654 for (i = 0; i < numatune->nmem_nodes; i++) {
655 if (numatune->mem_nodes[i].nodeset)
656 return true;
657 }
658
659 return false;
660 }
661
662 int
virDomainNumatuneSpecifiedMaxNode(virDomainNuma * numatune)663 virDomainNumatuneSpecifiedMaxNode(virDomainNuma *numatune)
664 {
665 int ret = -1;
666 virBitmap *nodemask = NULL;
667 size_t i;
668 int bit;
669
670 if (!numatune)
671 return ret;
672
673 nodemask = virDomainNumatuneGetNodeset(numatune, NULL, -1);
674 if (nodemask)
675 ret = virBitmapLastSetBit(nodemask);
676
677 for (i = 0; i < numatune->nmem_nodes; i++) {
678 nodemask = numatune->mem_nodes[i].nodeset;
679 if (!nodemask)
680 continue;
681
682 bit = virBitmapLastSetBit(nodemask);
683 if (bit > ret)
684 ret = bit;
685 }
686
687 return ret;
688 }
689
690 bool
virDomainNumatuneNodesetIsAvailable(virDomainNuma * numatune,virBitmap * auto_nodeset)691 virDomainNumatuneNodesetIsAvailable(virDomainNuma *numatune,
692 virBitmap *auto_nodeset)
693 {
694 size_t i = 0;
695 virBitmap *b = NULL;
696
697 if (!numatune)
698 return true;
699
700 b = virDomainNumatuneGetNodeset(numatune, auto_nodeset, -1);
701 if (!virNumaNodesetIsAvailable(b))
702 return false;
703
704 for (i = 0; i < numatune->nmem_nodes; i++) {
705 b = virDomainNumatuneGetNodeset(numatune, auto_nodeset, i);
706 if (!virNumaNodesetIsAvailable(b))
707 return false;
708 }
709
710 return true;
711 }
712
713
714 static int
virDomainNumaDefNodeDistanceParseXML(virDomainNuma * def,xmlXPathContextPtr ctxt,unsigned int cur_cell)715 virDomainNumaDefNodeDistanceParseXML(virDomainNuma *def,
716 xmlXPathContextPtr ctxt,
717 unsigned int cur_cell)
718 {
719 int ret = -1;
720 int sibling;
721 xmlNodePtr *nodes = NULL;
722 size_t i, ndistances = def->nmem_nodes;
723
724 if (ndistances == 0)
725 return 0;
726
727 /* check if NUMA distances definition is present */
728 if (!virXPathNode("./distances[1]", ctxt))
729 return 0;
730
731 if ((sibling = virXPathNodeSet("./distances[1]/sibling", ctxt, &nodes)) <= 0) {
732 virReportError(VIR_ERR_XML_ERROR, "%s",
733 _("NUMA distances defined without siblings"));
734 goto cleanup;
735 }
736
737 for (i = 0; i < sibling; i++) {
738 virNumaDistance *ldist;
739 virNumaDistance *rdist;
740 unsigned int sibling_id, sibling_value;
741
742 if (virXMLPropUInt(nodes[i], "id", 10, VIR_XML_PROP_REQUIRED,
743 &sibling_id) < 0)
744 goto cleanup;
745
746 /* The "id" needs to be within numa/cell range */
747 if (sibling_id >= ndistances) {
748 virReportError(VIR_ERR_XML_ERROR,
749 _("'sibling_id %d' does not refer to a "
750 "valid cell within NUMA 'cell id %d'"),
751 sibling_id, cur_cell);
752 goto cleanup;
753 }
754
755 if (virXMLPropUInt(nodes[i], "value", 10, VIR_XML_PROP_REQUIRED,
756 &sibling_value) < 0)
757 goto cleanup;
758
759 /* Assure LOCAL_DISTANCE <= "value" <= UNREACHABLE
760 * and correct LOCAL_DISTANCE setting if such applies.
761 */
762 if ((sibling_value < LOCAL_DISTANCE ||
763 sibling_value > UNREACHABLE) ||
764 (sibling_id == cur_cell &&
765 sibling_value != LOCAL_DISTANCE) ||
766 (sibling_id != cur_cell &&
767 sibling_value == LOCAL_DISTANCE)) {
768 virReportError(VIR_ERR_XML_ERROR,
769 _("'value %d' is invalid for "
770 "'sibling id %d' under NUMA 'cell id %d'"),
771 sibling_value, sibling_id, cur_cell);
772 goto cleanup;
773 }
774
775 /* Apply the local / remote distance */
776 ldist = def->mem_nodes[cur_cell].distances;
777 if (!ldist) {
778 ldist = g_new0(virNumaDistance, ndistances);
779 ldist[cur_cell].value = LOCAL_DISTANCE;
780 ldist[cur_cell].cellid = cur_cell;
781 def->mem_nodes[cur_cell].ndistances = ndistances;
782 def->mem_nodes[cur_cell].distances = ldist;
783 }
784
785 ldist[sibling_id].cellid = sibling_id;
786 ldist[sibling_id].value = sibling_value;
787
788 /* Apply symmetry if none given */
789 rdist = def->mem_nodes[sibling_id].distances;
790 if (!rdist) {
791 rdist = g_new0(virNumaDistance, ndistances);
792 rdist[sibling_id].value = LOCAL_DISTANCE;
793 rdist[sibling_id].cellid = sibling_id;
794 def->mem_nodes[sibling_id].ndistances = ndistances;
795 def->mem_nodes[sibling_id].distances = rdist;
796 }
797
798 rdist[cur_cell].cellid = cur_cell;
799 if (!rdist[cur_cell].value)
800 rdist[cur_cell].value = sibling_value;
801 }
802
803 ret = 0;
804
805 cleanup:
806 if (ret < 0) {
807 for (i = 0; i < ndistances; i++)
808 VIR_FREE(def->mem_nodes[i].distances);
809 def->mem_nodes[i].ndistances = 0;
810 }
811 VIR_FREE(nodes);
812
813 return ret;
814 }
815
816
817 static int
virDomainNumaDefNodeCacheParseXML(virDomainNuma * def,xmlXPathContextPtr ctxt,unsigned int cur_cell)818 virDomainNumaDefNodeCacheParseXML(virDomainNuma *def,
819 xmlXPathContextPtr ctxt,
820 unsigned int cur_cell)
821 {
822 g_autofree xmlNodePtr *nodes = NULL;
823 int n;
824 size_t i;
825
826 if ((n = virXPathNodeSet("./cache", ctxt, &nodes)) < 0)
827 return -1;
828
829 def->mem_nodes[cur_cell].caches = g_new0(virNumaCache, n);
830
831 for (i = 0; i < n; i++) {
832 VIR_XPATH_NODE_AUTORESTORE(ctxt)
833 virNumaCache *cache = &def->mem_nodes[cur_cell].caches[i];
834 g_autofree char *tmp = NULL;
835 unsigned int level;
836 int associativity;
837 int policy;
838 unsigned long long size;
839 unsigned long long line;
840
841 if (!(tmp = virXMLPropString(nodes[i], "level"))) {
842 virReportError(VIR_ERR_XML_ERROR,
843 _("Missing 'level' attribute in cache "
844 "element for NUMA node %d"),
845 cur_cell);
846 return -1;
847 }
848
849 if (virStrToLong_uip(tmp, NULL, 10, &level) < 0 ||
850 level == 0) {
851 virReportError(VIR_ERR_XML_ERROR,
852 _("Invalid 'level' attribute in cache "
853 "element for NUMA node %d"),
854 cur_cell);
855 return -1;
856 }
857 VIR_FREE(tmp);
858
859 if (!(tmp = virXMLPropString(nodes[i], "associativity"))) {
860 virReportError(VIR_ERR_XML_ERROR,
861 _("Missing 'associativity' attribute in cache "
862 "element for NUMA node %d"),
863 cur_cell);
864 return -1;
865 }
866
867 if ((associativity = virNumaCacheAssociativityTypeFromString(tmp)) < 0) {
868 virReportError(VIR_ERR_XML_ERROR,
869 _("Invalid cache associativity '%s'"),
870 tmp);
871 return -1;
872 }
873 VIR_FREE(tmp);
874
875 if (!(tmp = virXMLPropString(nodes[i], "policy"))) {
876 virReportError(VIR_ERR_XML_ERROR,
877 _("Missing 'policy' attribute in cache "
878 "element for NUMA node %d"),
879 cur_cell);
880 }
881
882 if ((policy = virNumaCachePolicyTypeFromString(tmp)) < 0) {
883 virReportError(VIR_ERR_XML_ERROR,
884 _("Invalid cache policy '%s'"),
885 tmp);
886 return -1;
887 }
888 VIR_FREE(tmp);
889
890 ctxt->node = nodes[i];
891 if (virDomainParseMemory("./size/@value", "./size/unit",
892 ctxt, &size, true, false) < 0)
893 return -1;
894
895 if (virParseScaledValue("./line/@value", "./line/unit",
896 ctxt, &line, 1, ULLONG_MAX, true) < 0)
897 return -1;
898
899 *cache = (virNumaCache){level, size, line, associativity, policy};
900 def->mem_nodes[cur_cell].ncaches++;
901 }
902
903 return 0;
904 }
905
906
907 int
virDomainNumaDefParseXML(virDomainNuma * def,xmlXPathContextPtr ctxt)908 virDomainNumaDefParseXML(virDomainNuma *def,
909 xmlXPathContextPtr ctxt)
910 {
911 g_autofree xmlNodePtr *cell = NULL;
912 g_autofree xmlNodePtr *interconnect = NULL;
913
914 int n;
915 size_t i, j;
916
917 /* check if NUMA definition is present */
918 if (!virXPathNode("./cpu/numa[1]", ctxt))
919 return 0;
920
921 if ((n = virXPathNodeSet("./cpu/numa[1]/cell", ctxt, &cell)) <= 0) {
922 virReportError(VIR_ERR_XML_ERROR, "%s",
923 _("NUMA topology defined without NUMA cells"));
924 return -1;
925 }
926
927 def->mem_nodes = g_new0(struct _virDomainNumaNode, n);
928 def->nmem_nodes = n;
929
930 for (i = 0; i < n; i++) {
931 VIR_XPATH_NODE_AUTORESTORE(ctxt)
932 g_autofree char *tmp = NULL;
933 int rc;
934 unsigned int cur_cell;
935
936 if ((rc = virXMLPropUInt(cell[i], "id", 10, VIR_XML_PROP_NONE,
937 &cur_cell)) < 0)
938 return -1;
939
940 if (rc == 0)
941 cur_cell = i;
942
943 /* cells are in order of parsing or explicitly numbered */
944 if (cur_cell >= n) {
945 virReportError(VIR_ERR_XML_ERROR, "%s",
946 _("Exactly one 'cell' element per guest "
947 "NUMA cell allowed, non-contiguous ranges or "
948 "ranges not starting from 0 are not allowed"));
949 return -1;
950 }
951
952 if (def->mem_nodes[cur_cell].mem) {
953 virReportError(VIR_ERR_XML_ERROR,
954 _("Duplicate NUMA cell info for cell id '%u'"),
955 cur_cell);
956 return -1;
957 }
958
959 if ((tmp = virXMLPropString(cell[i], "cpus"))) {
960 g_autoptr(virBitmap) cpumask = NULL;
961
962 if (virBitmapParse(tmp, &cpumask, VIR_DOMAIN_CPUMASK_LEN) < 0)
963 return -1;
964
965 if (!virBitmapIsAllClear(cpumask))
966 def->mem_nodes[cur_cell].cpumask = g_steal_pointer(&cpumask);
967 }
968
969 for (j = 0; j < n; j++) {
970 if (j == cur_cell ||
971 !def->mem_nodes[j].cpumask ||
972 !def->mem_nodes[cur_cell].cpumask)
973 continue;
974
975 if (virBitmapOverlaps(def->mem_nodes[j].cpumask,
976 def->mem_nodes[cur_cell].cpumask)) {
977 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
978 _("NUMA cells %u and %zu have overlapping vCPU ids"),
979 cur_cell, j);
980 return -1;
981 }
982 }
983
984 ctxt->node = cell[i];
985 if (virDomainParseMemory("./@memory", "./@unit", ctxt,
986 &def->mem_nodes[cur_cell].mem, true, false) < 0)
987 return -1;
988
989 if (virXMLPropEnum(cell[i], "memAccess",
990 virDomainMemoryAccessTypeFromString,
991 VIR_XML_PROP_NONZERO,
992 &def->mem_nodes[cur_cell].memAccess) < 0)
993 return -1;
994
995 if (virXMLPropTristateBool(cell[i], "discard", VIR_XML_PROP_NONE,
996 &def->mem_nodes[cur_cell].discard) < 0)
997 return -1;
998
999 /* Parse NUMA distances info */
1000 if (virDomainNumaDefNodeDistanceParseXML(def, ctxt, cur_cell) < 0)
1001 return -1;
1002
1003 /* Parse cache info */
1004 if (virDomainNumaDefNodeCacheParseXML(def, ctxt, cur_cell) < 0)
1005 return -1;
1006 }
1007
1008 if ((n = virXPathNodeSet("./cpu/numa[1]/interconnects[1]/latency|"
1009 "./cpu/numa[1]/interconnects[1]/bandwidth", ctxt,
1010 &interconnect)) < 0)
1011 return -1;
1012
1013 def->interconnects = g_new0(virNumaInterconnect, n);
1014 for (i = 0; i < n; i++) {
1015 virNumaInterconnectType type;
1016 unsigned int initiator;
1017 unsigned int target;
1018 unsigned int cache = 0;
1019 virMemoryLatency accessType;
1020 unsigned long long value;
1021
1022 if (virXMLNodeNameEqual(interconnect[i], "latency")) {
1023 type = VIR_NUMA_INTERCONNECT_TYPE_LATENCY;
1024
1025 if (virXMLPropULongLong(interconnect[i], "value", 10,
1026 VIR_XML_PROP_REQUIRED, &value) < 0)
1027 return -1;
1028 } else if (virXMLNodeNameEqual(interconnect[i], "bandwidth")) {
1029 VIR_XPATH_NODE_AUTORESTORE(ctxt)
1030 type = VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH;
1031
1032 ctxt->node = interconnect[i];
1033
1034 if (virDomainParseMemory("./@value", "./@unit", ctxt, &value, true, false) < 0)
1035 return -1;
1036 } else {
1037 /* Ignore yet unknown child elements. */
1038 continue;
1039 }
1040
1041 if (virXMLPropUInt(interconnect[i], "initiator", 10, VIR_XML_PROP_REQUIRED,
1042 &initiator) < 0)
1043 return -1;
1044
1045 if (virXMLPropUInt(interconnect[i], "target", 10, VIR_XML_PROP_REQUIRED,
1046 &target) < 0)
1047 return -1;
1048
1049 if (virXMLPropUInt(interconnect[i], "cache", 10, VIR_XML_PROP_NONE,
1050 &cache) < 0)
1051 return -1;
1052
1053 if (virXMLPropEnum(interconnect[i], "type",
1054 virMemoryLatencyTypeFromString,
1055 VIR_XML_PROP_REQUIRED | VIR_XML_PROP_NONZERO,
1056 &accessType) < 0)
1057 return -1;
1058
1059 def->interconnects[i] = (virNumaInterconnect) {type, initiator, target,
1060 cache, accessType, value};
1061 def->ninterconnects++;
1062 }
1063
1064 return 0;
1065 }
1066
1067
1068 int
virDomainNumaDefFormatXML(virBuffer * buf,virDomainNuma * def)1069 virDomainNumaDefFormatXML(virBuffer *buf,
1070 virDomainNuma *def)
1071 {
1072 virDomainMemoryAccess memAccess;
1073 virTristateBool discard;
1074 size_t ncells = virDomainNumaGetNodeCount(def);
1075 size_t i;
1076
1077 if (ncells == 0)
1078 return 0;
1079
1080 virBufferAddLit(buf, "<numa>\n");
1081 virBufferAdjustIndent(buf, 2);
1082 for (i = 0; i < ncells; i++) {
1083 virBitmap *cpumask = virDomainNumaGetNodeCpumask(def, i);
1084 g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
1085 g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
1086
1087 memAccess = virDomainNumaGetNodeMemoryAccessMode(def, i);
1088 discard = virDomainNumaGetNodeDiscard(def, i);
1089
1090 virBufferAsprintf(&attrBuf, " id='%zu'", i);
1091
1092 if (cpumask) {
1093 g_autofree char *cpustr = virBitmapFormat(cpumask);
1094
1095 if (!cpustr)
1096 return -1;
1097 virBufferAsprintf(&attrBuf, " cpus='%s'", cpustr);
1098 }
1099 virBufferAsprintf(&attrBuf, " memory='%llu'",
1100 virDomainNumaGetNodeMemorySize(def, i));
1101 virBufferAddLit(&attrBuf, " unit='KiB'");
1102 if (memAccess)
1103 virBufferAsprintf(&attrBuf, " memAccess='%s'",
1104 virDomainMemoryAccessTypeToString(memAccess));
1105
1106 if (discard)
1107 virBufferAsprintf(&attrBuf, " discard='%s'",
1108 virTristateBoolTypeToString(discard));
1109
1110 virNumaDistanceFormat(&childBuf,
1111 def->mem_nodes[i].distances,
1112 def->mem_nodes[i].ndistances);
1113
1114 virNumaCacheFormat(&childBuf,
1115 def->mem_nodes[i].caches,
1116 def->mem_nodes[i].ncaches);
1117
1118 virXMLFormatElement(buf, "cell", &attrBuf, &childBuf);
1119 }
1120
1121 virNumaInterconnectFormat(buf, def->interconnects, def->ninterconnects);
1122
1123 virBufferAdjustIndent(buf, -2);
1124 virBufferAddLit(buf, "</numa>\n");
1125
1126 return 0;
1127 }
1128
1129
1130 int
virDomainNumaDefValidate(const virDomainNuma * def)1131 virDomainNumaDefValidate(const virDomainNuma *def)
1132 {
1133 size_t i;
1134 size_t j;
1135
1136 if (!def)
1137 return 0;
1138
1139 for (i = 0; i < def->nmem_nodes; i++) {
1140 const virDomainNumaNode *node = &def->mem_nodes[i];
1141 g_autoptr(virBitmap) levelsSeen = virBitmapNew(0);
1142
1143 for (j = 0; j < node->ncaches; j++) {
1144 const virNumaCache *cache = &node->caches[j];
1145
1146 /* Relax this if there's ever fourth layer of cache */
1147 if (cache->level > 3) {
1148 virReportError(VIR_ERR_XML_ERROR, "%s",
1149 _("Ain't nobody heard of that much cache level"));
1150 return -1;
1151 }
1152
1153 if (virBitmapIsBitSet(levelsSeen, cache->level)) {
1154 virReportError(VIR_ERR_XML_ERROR,
1155 _("Cache level '%u' already defined"),
1156 cache->level);
1157 return -1;
1158 }
1159
1160 if (virBitmapSetBitExpand(levelsSeen, cache->level))
1161 return -1;
1162 }
1163 }
1164
1165 for (i = 0; i < def->ninterconnects; i++) {
1166 const virNumaInterconnect *l = &def->interconnects[i];
1167
1168 if (l->initiator >= def->nmem_nodes) {
1169 virReportError(VIR_ERR_XML_ERROR, "%s",
1170 _("'initiator' refers to a non-existent NUMA node"));
1171 return -1;
1172 }
1173
1174 if (l->target >= def->nmem_nodes) {
1175 virReportError(VIR_ERR_XML_ERROR, "%s",
1176 _("'target' refers to a non-existent NUMA node"));
1177 return -1;
1178 }
1179
1180 if (!def->mem_nodes[l->initiator].cpumask) {
1181 virReportError(VIR_ERR_XML_ERROR, "%s",
1182 _("NUMA nodes without CPUs can't be initiator"));
1183 return -1;
1184 }
1185
1186 if (l->cache > 0) {
1187 for (j = 0; j < def->mem_nodes[l->target].ncaches; j++) {
1188 const virNumaCache *cache = &def->mem_nodes[l->target].caches[j];
1189
1190 if (l->cache == cache->level)
1191 break;
1192 }
1193
1194 if (j == def->mem_nodes[l->target].ncaches) {
1195 virReportError(VIR_ERR_XML_ERROR, "%s",
1196 _("'cache' refers to a non-existent NUMA node cache"));
1197 return -1;
1198 }
1199 }
1200
1201 for (j = 0; j < i; j++) {
1202 const virNumaInterconnect *ll = &def->interconnects[j];
1203
1204 if (l->type == ll->type &&
1205 l->initiator == ll->initiator &&
1206 l->target == ll->target &&
1207 l->cache == ll->cache &&
1208 l->accessType == ll->accessType) {
1209 virReportError(VIR_ERR_XML_ERROR, "%s",
1210 _("Duplicate info for NUMA latencies"));
1211 return -1;
1212 }
1213
1214
1215 if (l->initiator != l->target &&
1216 l->initiator == ll->target &&
1217 l->target == ll->initiator) {
1218 virReportError(VIR_ERR_XML_ERROR, "%s",
1219 _("Link already defined"));
1220 return -1;
1221 }
1222 }
1223 }
1224
1225 return 0;
1226 }
1227
1228
1229 unsigned int
virDomainNumaGetCPUCountTotal(virDomainNuma * numa)1230 virDomainNumaGetCPUCountTotal(virDomainNuma *numa)
1231 {
1232 size_t i;
1233 unsigned int ret = 0;
1234
1235 for (i = 0; i < numa->nmem_nodes; i++) {
1236 virBitmap *cpumask = virDomainNumaGetNodeCpumask(numa, i);
1237
1238 if (cpumask)
1239 ret += virBitmapCountBits(cpumask);
1240 }
1241
1242 return ret;
1243 }
1244
1245 unsigned int
virDomainNumaGetMaxCPUID(virDomainNuma * numa)1246 virDomainNumaGetMaxCPUID(virDomainNuma *numa)
1247 {
1248 size_t i;
1249 unsigned int ret = 0;
1250
1251 for (i = 0; i < numa->nmem_nodes; i++) {
1252 virBitmap *cpumask = virDomainNumaGetNodeCpumask(numa, i);
1253 int bit;
1254
1255 if (cpumask) {
1256 bit = virBitmapLastSetBit(cpumask);
1257 if (bit > ret)
1258 ret = bit;
1259 }
1260 }
1261
1262 return ret;
1263 }
1264
1265
1266 virDomainNuma *
virDomainNumaNew(void)1267 virDomainNumaNew(void)
1268 {
1269 return g_new0(virDomainNuma, 1);
1270 }
1271
1272
1273 bool
virDomainNumaCheckABIStability(virDomainNuma * src,virDomainNuma * tgt)1274 virDomainNumaCheckABIStability(virDomainNuma *src,
1275 virDomainNuma *tgt)
1276 {
1277 size_t i;
1278 size_t j;
1279
1280 if (virDomainNumaGetNodeCount(src) != virDomainNumaGetNodeCount(tgt)) {
1281 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1282 _("Target NUMA node count '%zu' doesn't match "
1283 "source '%zu'"),
1284 virDomainNumaGetNodeCount(tgt),
1285 virDomainNumaGetNodeCount(src));
1286 return false;
1287 }
1288
1289 for (i = 0; i < virDomainNumaGetNodeCount(src); i++) {
1290 if (virDomainNumaGetNodeMemorySize(src, i) !=
1291 virDomainNumaGetNodeMemorySize(tgt, i)) {
1292 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1293 _("Size of target NUMA node %zu (%llu) doesn't "
1294 "match source (%llu)"), i,
1295 virDomainNumaGetNodeMemorySize(tgt, i),
1296 virDomainNumaGetNodeMemorySize(src, i));
1297 return false;
1298 }
1299
1300 if (!virBitmapEqual(virDomainNumaGetNodeCpumask(src, i),
1301 virDomainNumaGetNodeCpumask(tgt, i))) {
1302 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1303 _("Processor mask of target NUMA node %zu doesn't "
1304 "match source"), i);
1305 return false;
1306 }
1307
1308 for (j = 0; j < virDomainNumaGetNodeCount(src); j++) {
1309 if (virDomainNumaGetNodeDistance(src, i, j) !=
1310 virDomainNumaGetNodeDistance(tgt, i, j)) {
1311 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1312 _("Target NUMA distance from %zu to %zu "
1313 "doesn't match source"), i, j);
1314
1315 return false;
1316 }
1317 }
1318 }
1319
1320 return true;
1321 }
1322
1323
1324 size_t
virDomainNumaGetNodeCount(virDomainNuma * numa)1325 virDomainNumaGetNodeCount(virDomainNuma *numa)
1326 {
1327 if (!numa)
1328 return 0;
1329
1330 return numa->nmem_nodes;
1331 }
1332
1333
1334 size_t
virDomainNumaSetNodeCount(virDomainNuma * numa,size_t nmem_nodes)1335 virDomainNumaSetNodeCount(virDomainNuma *numa, size_t nmem_nodes)
1336 {
1337 if (!nmem_nodes) {
1338 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1339 _("Cannot set an empty mem_nodes set"));
1340 return 0;
1341 }
1342
1343 if (numa->mem_nodes) {
1344 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
1345 _("Cannot alter an existing mem_nodes set"));
1346 return 0;
1347 }
1348
1349 numa->mem_nodes = g_new0(struct _virDomainNumaNode, nmem_nodes);
1350
1351 numa->nmem_nodes = nmem_nodes;
1352
1353 return numa->nmem_nodes;
1354 }
1355
1356
1357 bool
virDomainNumaNodeDistanceIsUsingDefaults(virDomainNuma * numa,size_t node,size_t sibling)1358 virDomainNumaNodeDistanceIsUsingDefaults(virDomainNuma *numa,
1359 size_t node,
1360 size_t sibling)
1361 {
1362 if (node >= numa->nmem_nodes ||
1363 sibling >= numa->nmem_nodes)
1364 return false;
1365
1366 if (!numa->mem_nodes[node].distances)
1367 return true;
1368
1369 if (numa->mem_nodes[node].distances[sibling].value == LOCAL_DISTANCE ||
1370 numa->mem_nodes[node].distances[sibling].value == REMOTE_DISTANCE)
1371 return true;
1372
1373 return false;
1374 }
1375
1376
1377 bool
virDomainNumaNodesDistancesAreBeingSet(virDomainNuma * numa)1378 virDomainNumaNodesDistancesAreBeingSet(virDomainNuma *numa)
1379 {
1380 size_t ncells = virDomainNumaGetNodeCount(numa);
1381 size_t i, j;
1382
1383 for (i = 0; i < ncells; i++) {
1384 for (j = 0; j < ncells; j++) {
1385 if (virDomainNumaNodeDistanceIsUsingDefaults(numa, i, j))
1386 continue;
1387
1388 return true;
1389 }
1390 }
1391
1392 return false;
1393 }
1394
1395
1396 size_t
virDomainNumaGetNodeDistance(virDomainNuma * numa,size_t node,size_t cellid)1397 virDomainNumaGetNodeDistance(virDomainNuma *numa,
1398 size_t node,
1399 size_t cellid)
1400 {
1401 virNumaDistance *distances = NULL;
1402
1403 if (node < numa->nmem_nodes)
1404 distances = numa->mem_nodes[node].distances;
1405
1406 /*
1407 * Present the configured distance value. If
1408 * out of range or not available set the platform
1409 * defined default for local and remote nodes.
1410 */
1411 if (!distances ||
1412 cellid >= numa->nmem_nodes ||
1413 !distances[cellid].value)
1414 return (node == cellid) ? LOCAL_DISTANCE : REMOTE_DISTANCE;
1415
1416 return distances[cellid].value;
1417 }
1418
1419
1420 int
virDomainNumaSetNodeDistance(virDomainNuma * numa,size_t node,size_t cellid,unsigned int value)1421 virDomainNumaSetNodeDistance(virDomainNuma *numa,
1422 size_t node,
1423 size_t cellid,
1424 unsigned int value)
1425 {
1426 virNumaDistance *distances;
1427
1428 if (node >= numa->nmem_nodes) {
1429 virReportError(VIR_ERR_INTERNAL_ERROR,
1430 _("Argument 'node' %zu outranges "
1431 "defined number of NUMA nodes"),
1432 node);
1433 return -1;
1434 }
1435
1436 distances = numa->mem_nodes[node].distances;
1437 if (!distances ||
1438 cellid >= numa->mem_nodes[node].ndistances) {
1439 virReportError(VIR_ERR_XML_ERROR, "%s",
1440 _("Arguments under memnode element do not "
1441 "correspond with existing guest's NUMA cell"));
1442 return -1;
1443 }
1444
1445 /*
1446 * Advanced Configuration and Power Interface
1447 * Specification version 6.1. Chapter 5.2.17
1448 * System Locality Distance Information Table
1449 * ... Distance values of 0-9 are reserved.
1450 */
1451 if (value < LOCAL_DISTANCE ||
1452 value > UNREACHABLE) {
1453 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1454 _("Distance value of %d is not in valid range"),
1455 value);
1456 return -1;
1457 }
1458
1459 if (value == LOCAL_DISTANCE && node != cellid) {
1460 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
1461 _("Distance value %d under node %zu is "
1462 "LOCAL_DISTANCE and should be set to 10"),
1463 value, node);
1464 return -1;
1465 }
1466
1467 distances[cellid].cellid = cellid;
1468 distances[cellid].value = value;
1469
1470 return distances[cellid].value;
1471 }
1472
1473
1474 size_t
virDomainNumaSetNodeDistanceCount(virDomainNuma * numa,size_t node,size_t ndistances)1475 virDomainNumaSetNodeDistanceCount(virDomainNuma *numa,
1476 size_t node,
1477 size_t ndistances)
1478 {
1479 virNumaDistance *distances;
1480
1481 distances = numa->mem_nodes[node].distances;
1482 if (distances) {
1483 virReportError(VIR_ERR_INTERNAL_ERROR,
1484 _("Cannot alter an existing nmem_nodes distances set for node: %zu"),
1485 node);
1486 return 0;
1487 }
1488
1489 distances = g_new0(virNumaDistance, ndistances);
1490
1491 numa->mem_nodes[node].distances = distances;
1492 numa->mem_nodes[node].ndistances = ndistances;
1493
1494 return numa->mem_nodes[node].ndistances;
1495 }
1496
1497
1498 virBitmap *
virDomainNumaGetNodeCpumask(virDomainNuma * numa,size_t node)1499 virDomainNumaGetNodeCpumask(virDomainNuma *numa,
1500 size_t node)
1501 {
1502 return numa->mem_nodes[node].cpumask;
1503 }
1504
1505
1506 void
virDomainNumaSetNodeCpumask(virDomainNuma * numa,size_t node,virBitmap * cpumask)1507 virDomainNumaSetNodeCpumask(virDomainNuma *numa,
1508 size_t node,
1509 virBitmap *cpumask)
1510 {
1511 numa->mem_nodes[node].cpumask = cpumask;
1512 }
1513
1514
1515 virDomainMemoryAccess
virDomainNumaGetNodeMemoryAccessMode(virDomainNuma * numa,size_t node)1516 virDomainNumaGetNodeMemoryAccessMode(virDomainNuma *numa,
1517 size_t node)
1518 {
1519 return numa->mem_nodes[node].memAccess;
1520 }
1521
1522
1523 virTristateBool
virDomainNumaGetNodeDiscard(virDomainNuma * numa,size_t node)1524 virDomainNumaGetNodeDiscard(virDomainNuma *numa,
1525 size_t node)
1526 {
1527 return numa->mem_nodes[node].discard;
1528 }
1529
1530
1531 unsigned long long
virDomainNumaGetNodeMemorySize(virDomainNuma * numa,size_t node)1532 virDomainNumaGetNodeMemorySize(virDomainNuma *numa,
1533 size_t node)
1534 {
1535 return numa->mem_nodes[node].mem;
1536 }
1537
1538
1539 void
virDomainNumaSetNodeMemorySize(virDomainNuma * numa,size_t node,unsigned long long size)1540 virDomainNumaSetNodeMemorySize(virDomainNuma *numa,
1541 size_t node,
1542 unsigned long long size)
1543 {
1544 numa->mem_nodes[node].mem = size;
1545 }
1546
1547
1548 unsigned long long
virDomainNumaGetMemorySize(virDomainNuma * numa)1549 virDomainNumaGetMemorySize(virDomainNuma *numa)
1550 {
1551 size_t i;
1552 unsigned long long ret = 0;
1553
1554 for (i = 0; i < numa->nmem_nodes; i++)
1555 ret += numa->mem_nodes[i].mem;
1556
1557 return ret;
1558 }
1559
1560
1561 int
virDomainNumaFillCPUsInNode(virDomainNuma * numa,size_t node,unsigned int maxCpus)1562 virDomainNumaFillCPUsInNode(virDomainNuma *numa,
1563 size_t node,
1564 unsigned int maxCpus)
1565 {
1566 g_autoptr(virBitmap) maxCPUsBitmap = virBitmapNew(maxCpus);
1567 size_t i;
1568
1569 if (node >= virDomainNumaGetNodeCount(numa))
1570 return -1;
1571
1572 virBitmapSetAll(maxCPUsBitmap);
1573
1574 for (i = 0; i < numa->nmem_nodes; i++) {
1575 virBitmap *nodeCpus = virDomainNumaGetNodeCpumask(numa, i);
1576
1577 if (i == node || !nodeCpus)
1578 continue;
1579
1580 virBitmapSubtract(maxCPUsBitmap, nodeCpus);
1581 }
1582
1583 if (!virBitmapEqual(numa->mem_nodes[node].cpumask, maxCPUsBitmap)) {
1584 virBitmapFree(numa->mem_nodes[node].cpumask);
1585 numa->mem_nodes[node].cpumask = g_steal_pointer(&maxCPUsBitmap);
1586 }
1587
1588 return 0;
1589 }
1590
1591
1592 bool
virDomainNumaHasHMAT(const virDomainNuma * numa)1593 virDomainNumaHasHMAT(const virDomainNuma *numa)
1594 {
1595 size_t i;
1596
1597 if (!numa)
1598 return false;
1599
1600 if (numa->ninterconnects)
1601 return true;
1602
1603 for (i = 0; i < numa->nmem_nodes; i++) {
1604 if (numa->mem_nodes[i].ncaches)
1605 return true;
1606 }
1607
1608 return false;
1609 }
1610
1611
1612 size_t
virDomainNumaGetNodeCacheCount(const virDomainNuma * numa,size_t node)1613 virDomainNumaGetNodeCacheCount(const virDomainNuma *numa,
1614 size_t node)
1615 {
1616 if (!numa || node >= numa->nmem_nodes)
1617 return 0;
1618
1619 return numa->mem_nodes[node].ncaches;
1620 }
1621
1622
1623 int
virDomainNumaGetNodeCache(const virDomainNuma * numa,size_t node,size_t cache,unsigned int * level,unsigned int * size,unsigned int * line,virNumaCacheAssociativity * associativity,virNumaCachePolicy * policy)1624 virDomainNumaGetNodeCache(const virDomainNuma *numa,
1625 size_t node,
1626 size_t cache,
1627 unsigned int *level,
1628 unsigned int *size,
1629 unsigned int *line,
1630 virNumaCacheAssociativity *associativity,
1631 virNumaCachePolicy *policy)
1632 {
1633 const virDomainNumaNode *cell;
1634
1635 if (!numa || node >= numa->nmem_nodes)
1636 return -1;
1637
1638 cell = &numa->mem_nodes[node];
1639
1640 if (cache >= cell->ncaches)
1641 return -1;
1642
1643 *level = cell->caches[cache].level;
1644 *size = cell->caches[cache].size;
1645 *line = cell->caches[cache].line;
1646 *associativity = cell->caches[cache].associativity;
1647 *policy = cell->caches[cache].policy;
1648 return 0;
1649 }
1650
1651
1652 ssize_t
virDomainNumaGetNodeInitiator(const virDomainNuma * numa,size_t node)1653 virDomainNumaGetNodeInitiator(const virDomainNuma *numa,
1654 size_t node)
1655 {
1656 size_t i;
1657 unsigned int maxBandwidth = 0;
1658 ssize_t candidateBandwidth = -1;
1659 unsigned int minLatency = UINT_MAX;
1660 ssize_t candidateLatency = -1;
1661
1662 if (!numa || node >= numa->nmem_nodes)
1663 return -1;
1664
1665 /* A NUMA node which has at least one vCPU is initiator to itself by
1666 * definition. */
1667 if (numa->mem_nodes[node].cpumask)
1668 return node;
1669
1670 /* For the rest, "NUMA node that has best performance (the lowest
1671 * latency or largest bandwidth) to this NUMA node." */
1672 for (i = 0; i < numa->ninterconnects; i++) {
1673 const virNumaInterconnect *l = &numa->interconnects[i];
1674
1675 if (l->target != node)
1676 continue;
1677
1678 switch (l->type) {
1679 case VIR_NUMA_INTERCONNECT_TYPE_LATENCY:
1680 if (l->value < minLatency) {
1681 minLatency = l->value;
1682 candidateLatency = l->initiator;
1683 }
1684 break;
1685
1686 case VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH:
1687 if (l->value > maxBandwidth) {
1688 maxBandwidth = l->value;
1689 candidateBandwidth = l->initiator;
1690 }
1691 break;
1692 }
1693 }
1694
1695 if (candidateLatency >= 0)
1696 return candidateLatency;
1697
1698 return candidateBandwidth;
1699 }
1700
1701
1702 size_t
virDomainNumaGetInterconnectsCount(const virDomainNuma * numa)1703 virDomainNumaGetInterconnectsCount(const virDomainNuma *numa)
1704 {
1705 if (!numa)
1706 return 0;
1707
1708 return numa->ninterconnects;
1709 }
1710
1711
1712 int
virDomainNumaGetInterconnect(const virDomainNuma * numa,size_t i,virNumaInterconnectType * type,unsigned int * initiator,unsigned int * target,unsigned int * cache,virMemoryLatency * accessType,unsigned long * value)1713 virDomainNumaGetInterconnect(const virDomainNuma *numa,
1714 size_t i,
1715 virNumaInterconnectType *type,
1716 unsigned int *initiator,
1717 unsigned int *target,
1718 unsigned int *cache,
1719 virMemoryLatency *accessType,
1720 unsigned long *value)
1721 {
1722 const virNumaInterconnect *l;
1723
1724 if (!numa || i >= numa->ninterconnects)
1725 return -1;
1726
1727 l = &numa->interconnects[i];
1728 *type = l->type;
1729 *initiator = l->initiator;
1730 *target = l->target;
1731 *cache = l->cache;
1732 *accessType = l->accessType;
1733 *value = l->value;
1734 return 0;
1735 }
1736
1737
1738 void
virNumaDistanceFormat(virBuffer * buf,const virNumaDistance * distances,size_t ndistances)1739 virNumaDistanceFormat(virBuffer *buf,
1740 const virNumaDistance *distances,
1741 size_t ndistances)
1742 {
1743 g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
1744 size_t i;
1745
1746 for (i = 0; i < ndistances; i++) {
1747 if (distances[i].value == 0)
1748 continue;
1749 virBufferAddLit(&childBuf, "<sibling");
1750 virBufferAsprintf(&childBuf, " id='%d'", distances[i].cellid);
1751 virBufferAsprintf(&childBuf, " value='%d'", distances[i].value);
1752 virBufferAddLit(&childBuf, "/>\n");
1753 }
1754
1755 virXMLFormatElement(buf, "distances", NULL, &childBuf);
1756 }
1757
1758
1759 void
virNumaCacheFormat(virBuffer * buf,const virNumaCache * caches,size_t ncaches)1760 virNumaCacheFormat(virBuffer *buf,
1761 const virNumaCache *caches,
1762 size_t ncaches)
1763 {
1764 size_t i;
1765
1766 for (i = 0; i < ncaches; i++) {
1767 const virNumaCache *cache = &caches[i];
1768 g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
1769 g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
1770
1771 virBufferAsprintf(&attrBuf, " level='%u'", cache->level);
1772 if (cache->associativity) {
1773 virBufferAsprintf(&attrBuf, " associativity='%s'",
1774 virNumaCacheAssociativityTypeToString(cache->associativity));
1775 }
1776
1777 if (cache->policy) {
1778 virBufferAsprintf(&attrBuf, " policy='%s'",
1779 virNumaCachePolicyTypeToString(cache->policy));
1780 }
1781
1782 virBufferAsprintf(&childBuf,
1783 "<size value='%u' unit='KiB'/>\n",
1784 cache->size);
1785
1786 if (cache->line) {
1787 virBufferAsprintf(&childBuf,
1788 "<line value='%u' unit='B'/>\n",
1789 cache->line);
1790 }
1791
1792 virXMLFormatElement(buf, "cache", &attrBuf, &childBuf);
1793 }
1794 }
1795
1796
1797 void
virNumaInterconnectFormat(virBuffer * buf,const virNumaInterconnect * interconnects,size_t ninterconnects)1798 virNumaInterconnectFormat(virBuffer *buf,
1799 const virNumaInterconnect *interconnects,
1800 size_t ninterconnects)
1801 {
1802 g_auto(virBuffer) childBuf = VIR_BUFFER_INIT_CHILD(buf);
1803 size_t i;
1804
1805 for (i = 0; i < ninterconnects; i++) {
1806 const virNumaInterconnect *l = &interconnects[i];
1807 g_auto(virBuffer) attrBuf = VIR_BUFFER_INITIALIZER;
1808 const char *elem = NULL;
1809
1810 switch (l->type) {
1811 case VIR_NUMA_INTERCONNECT_TYPE_LATENCY:
1812 elem = "latency";
1813 break;
1814 case VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH:
1815 elem = "bandwidth";
1816 break;
1817 }
1818
1819 virBufferAsprintf(&attrBuf,
1820 " initiator='%u' target='%u'",
1821 l->initiator, l->target);
1822
1823 if (l->cache > 0) {
1824 virBufferAsprintf(&attrBuf,
1825 " cache='%u'",
1826 l->cache);
1827 }
1828
1829 virBufferAsprintf(&attrBuf,
1830 " type='%s' value='%lu'",
1831 virMemoryLatencyTypeToString(l->accessType),
1832 l->value);
1833
1834 if (l->type == VIR_NUMA_INTERCONNECT_TYPE_BANDWIDTH)
1835 virBufferAddLit(&attrBuf, " unit='KiB'");
1836
1837 virXMLFormatElement(&childBuf, elem, &attrBuf, NULL);
1838 }
1839
1840 virXMLFormatElement(buf, "interconnects", NULL, &childBuf);
1841 }
1842