1 /*
2  * virhostcpu.c: helper APIs for host CPU info
3  *
4  * Copyright (C) 2006-2016 Red Hat, Inc.
5  * Copyright (C) 2006 Daniel P. Berrange
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library.  If not, see
19  * <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
24 #include <dirent.h>
25 #include <fcntl.h>
26 #ifndef WIN32
27 # include <sys/ioctl.h>
28 #endif
29 #include <unistd.h>
30 
31 #if WITH_LINUX_KVM_H
32 # include <linux/kvm.h>
33 #endif
34 
35 #if defined(__FreeBSD__) || defined(__APPLE__) || defined(__DragonFly__)
36 # include <sys/time.h>
37 # include <sys/types.h>
38 # include <sys/sysctl.h>
39 # include <sys/resource.h>
40 #endif
41 
42 #include "viralloc.h"
43 #define LIBVIRT_VIRHOSTCPUPRIV_H_ALLOW
44 #include "virhostcpupriv.h"
45 #include "virerror.h"
46 #include "virarch.h"
47 #include "virfile.h"
48 #include "virtypedparam.h"
49 #include "virstring.h"
50 #include "virnuma.h"
51 #include "virlog.h"
52 
53 #define VIR_FROM_THIS VIR_FROM_NONE
54 
55 VIR_LOG_INIT("util.hostcpu");
56 
57 #define KVM_DEVICE "/dev/kvm"
58 #define MSR_DEVICE "/dev/cpu/0/msr"
59 
60 
61 #if defined(__FreeBSD__) || defined(__APPLE__) || defined(__DragonFly__)
62 static int
virHostCPUGetCountAppleFreeBSD(void)63 virHostCPUGetCountAppleFreeBSD(void)
64 {
65     int ncpu_mib[2] = { CTL_HW, HW_NCPU };
66     unsigned long ncpu;
67     size_t ncpu_len = sizeof(ncpu);
68 
69     if (sysctl(ncpu_mib, 2, &ncpu, &ncpu_len, NULL, 0) == -1) {
70         virReportSystemError(errno, "%s", _("Cannot obtain CPU count"));
71         return -1;
72     }
73 
74     return ncpu;
75 }
76 #endif /* defined(__FreeBSD__) || defined(__APPLE__) || defined(__DragonFly__) */
77 
78 #if defined(__FreeBSD__) || defined(__DragonFly__)
79 # define BSD_CPU_STATS_ALL 4
80 # define BSD_MEMORY_STATS_ALL 4
81 
82 # define TICK_TO_NSEC (1000ull * 1000ull * 1000ull / (stathz ? stathz : hz))
83 
84 static int
virHostCPUGetStatsFreeBSD(int cpuNum,virNodeCPUStatsPtr params,int * nparams)85 virHostCPUGetStatsFreeBSD(int cpuNum,
86                           virNodeCPUStatsPtr params,
87                           int *nparams)
88 {
89     const char *sysctl_name;
90     g_autofree long *cpu_times = NULL;
91     struct clockinfo clkinfo;
92     size_t i, j, cpu_times_size, clkinfo_size;
93     int cpu_times_num, offset, hz, stathz;
94     struct field_cpu_map {
95         const char *field;
96         int idx[CPUSTATES];
97     } cpu_map[] = {
98         {VIR_NODE_CPU_STATS_KERNEL, {CP_SYS}},
99         {VIR_NODE_CPU_STATS_USER, {CP_USER, CP_NICE}},
100         {VIR_NODE_CPU_STATS_IDLE, {CP_IDLE}},
101         {VIR_NODE_CPU_STATS_INTR, {CP_INTR}},
102         {NULL, {0}}
103     };
104 
105     if ((*nparams) == 0) {
106         *nparams = BSD_CPU_STATS_ALL;
107         return 0;
108     }
109 
110     if ((*nparams) != BSD_CPU_STATS_ALL) {
111         virReportInvalidArg(*nparams,
112                             _("nparams in %s must be equal to %d"),
113                             __FUNCTION__, BSD_CPU_STATS_ALL);
114         return -1;
115     }
116 
117     clkinfo_size = sizeof(clkinfo);
118     if (sysctlbyname("kern.clockrate", &clkinfo, &clkinfo_size, NULL, 0) < 0) {
119         virReportSystemError(errno,
120                              _("sysctl failed for '%s'"),
121                              "kern.clockrate");
122         return -1;
123     }
124 
125     stathz = clkinfo.stathz;
126     hz = clkinfo.hz;
127 
128     if (cpuNum == VIR_NODE_CPU_STATS_ALL_CPUS) {
129         sysctl_name = "kern.cp_time";
130         cpu_times_num = 1;
131         offset = 0;
132     } else {
133         sysctl_name = "kern.cp_times";
134         cpu_times_num = virHostCPUGetCountAppleFreeBSD();
135 
136         if (cpuNum >= cpu_times_num) {
137             virReportInvalidArg(cpuNum,
138                                 _("Invalid cpuNum in %s"),
139                                 __FUNCTION__);
140             return -1;
141         }
142 
143         offset = cpu_times_num * CPUSTATES;
144     }
145 
146     cpu_times_size = sizeof(long) * cpu_times_num * CPUSTATES;
147 
148     cpu_times = g_new0(long, cpu_times_num * CPUSTATES);
149 
150     if (sysctlbyname(sysctl_name, cpu_times, &cpu_times_size, NULL, 0) < 0) {
151         virReportSystemError(errno,
152                              _("sysctl failed for '%s'"),
153                              sysctl_name);
154         return -1;
155     }
156 
157     for (i = 0; cpu_map[i].field != NULL; i++) {
158         virNodeCPUStatsPtr param = &params[i];
159 
160         if (virStrcpyStatic(param->field, cpu_map[i].field) < 0) {
161             virReportError(VIR_ERR_INTERNAL_ERROR,
162                            _("Field '%s' too long for destination"),
163                            cpu_map[i].field);
164             return -1;
165         }
166 
167         param->value = 0;
168         for (j = 0; j < G_N_ELEMENTS(cpu_map[i].idx); j++)
169             param->value += cpu_times[offset + cpu_map[i].idx[j]] * TICK_TO_NSEC;
170     }
171 
172     return 0;
173 }
174 
175 #endif /* __FreeBSD__ */
176 
177 /*
178  * Even though it doesn't exist on some platforms, the code is adjusted for
179  * graceful handling of that so that we don't have too many stub functions.
180  */
181 #define SYSFS_SYSTEM_PATH "/sys/devices/system"
182 
183 #ifdef __linux__
184 # define CPUINFO_PATH "/proc/cpuinfo"
185 # define PROCSTAT_PATH "/proc/stat"
186 
187 # define LINUX_NB_CPU_STATS 4
188 
189 int
virHostCPUGetSocket(unsigned int cpu,unsigned int * socket)190 virHostCPUGetSocket(unsigned int cpu, unsigned int *socket)
191 {
192     int tmp;
193     int ret = virFileReadValueInt(&tmp,
194                                   "%s/cpu/cpu%u/topology/physical_package_id",
195                                   SYSFS_SYSTEM_PATH, cpu);
196 
197     /* If the file is not there, it's 0 */
198     if (ret == -2)
199         tmp = 0;
200     else if (ret < 0)
201         return -1;
202 
203     /* Some architectures might have '-1' validly in the file, but that actually
204      * means there are no sockets, so from our point of view it's all one socket,
205      * i.e. socket 0.  Similarly when the file does not exist. */
206     if (tmp < 0)
207         tmp = 0;
208 
209     *socket = tmp;
210 
211     return 0;
212 }
213 
214 int
virHostCPUGetDie(unsigned int cpu,unsigned int * die)215 virHostCPUGetDie(unsigned int cpu, unsigned int *die)
216 {
217     int die_id;
218     int ret = virFileReadValueInt(&die_id,
219                                   "%s/cpu/cpu%u/topology/die_id",
220                                   SYSFS_SYSTEM_PATH, cpu);
221 
222     if (ret == -1)
223         return -1;
224 
225     /* If the file is not there, it's 0.
226      * Another alternative is die_id set to -1, meaning that
227      * the arch does not have die_id support. Set @die to
228      * 0 in this case too. */
229     if (ret == -2 || die_id < 0)
230         *die = 0;
231     else
232         *die = die_id;
233 
234     return 0;
235 }
236 
237 int
virHostCPUGetCore(unsigned int cpu,unsigned int * core)238 virHostCPUGetCore(unsigned int cpu, unsigned int *core)
239 {
240     int ret = virFileReadValueUint(core,
241                                    "%s/cpu/cpu%u/topology/core_id",
242                                    SYSFS_SYSTEM_PATH, cpu);
243 
244     /* If the file is not there, it's 0 */
245     if (ret == -2)
246         *core = 0;
247     else if (ret < 0)
248         return -1;
249 
250     return 0;
251 }
252 
253 virBitmap *
virHostCPUGetSiblingsList(unsigned int cpu)254 virHostCPUGetSiblingsList(unsigned int cpu)
255 {
256     virBitmap *ret = NULL;
257     int rv = -1;
258 
259     rv = virFileReadValueBitmap(&ret,
260                                 "%s/cpu/cpu%u/topology/thread_siblings_list",
261                                 SYSFS_SYSTEM_PATH, cpu);
262     if (rv == -2) {
263         /* If the file doesn't exist, the threadis its only sibling */
264         ret = virBitmapNew(cpu + 1);
265         ignore_value(virBitmapSetBit(ret, cpu));
266     }
267 
268     return ret;
269 }
270 
271 static unsigned long
virHostCPUCountThreadSiblings(unsigned int cpu)272 virHostCPUCountThreadSiblings(unsigned int cpu)
273 {
274     virBitmap *siblings_map;
275     unsigned long ret = 0;
276 
277     if (!(siblings_map = virHostCPUGetSiblingsList(cpu)))
278         goto cleanup;
279 
280     ret = virBitmapCountBits(siblings_map);
281 
282  cleanup:
283     virBitmapFree(siblings_map);
284     return ret;
285 }
286 
287 /* parses a node entry, returning number of processors in the node and
288  * filling arguments */
289 static int
290 ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(3)
291 ATTRIBUTE_NONNULL(4) ATTRIBUTE_NONNULL(6)
292 ATTRIBUTE_NONNULL(7) ATTRIBUTE_NONNULL(8)
293 ATTRIBUTE_NONNULL(9)
virHostCPUParseNode(const char * node,virArch arch,virBitmap * present_cpus_map,virBitmap * online_cpus_map,int threads_per_subcore,int * sockets,int * cores,int * threads,int * offline)294 virHostCPUParseNode(const char *node,
295                     virArch arch,
296                     virBitmap *present_cpus_map,
297                     virBitmap *online_cpus_map,
298                     int threads_per_subcore,
299                     int *sockets,
300                     int *cores,
301                     int *threads,
302                     int *offline)
303 {
304     int ret = -1;
305     int processors = 0;
306     g_autoptr(DIR) cpudir = NULL;
307     struct dirent *cpudirent = NULL;
308     virBitmap *node_cpus_map = NULL;
309     virBitmap *sockets_map = NULL;
310     virBitmap **cores_maps = NULL;
311     int npresent_cpus = virBitmapSize(present_cpus_map);
312     unsigned int sock_max = 0;
313     unsigned int sock;
314     unsigned int core;
315     size_t i;
316     int siblings;
317     unsigned int cpu;
318     int direrr;
319 
320     *threads = 0;
321     *cores = 0;
322     *sockets = 0;
323 
324     if (virDirOpen(&cpudir, node) < 0)
325         goto cleanup;
326 
327     /* Keep track of the CPUs that belong to the current node */
328     node_cpus_map = virBitmapNew(npresent_cpus);
329 
330     /* enumerate sockets in the node */
331     sockets_map = virBitmapNew(0);
332 
333     while ((direrr = virDirRead(cpudir, &cpudirent, node)) > 0) {
334         if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
335             continue;
336 
337         if (!virBitmapIsBitSet(present_cpus_map, cpu))
338             continue;
339 
340         /* Mark this CPU as part of the current node */
341         if (virBitmapSetBit(node_cpus_map, cpu) < 0)
342             goto cleanup;
343 
344         if (!virBitmapIsBitSet(online_cpus_map, cpu))
345             continue;
346 
347         if (virHostCPUGetSocket(cpu, &sock) < 0)
348             goto cleanup;
349 
350         if (virBitmapSetBitExpand(sockets_map, sock) < 0)
351             goto cleanup;
352 
353         if (sock > sock_max)
354             sock_max = sock;
355     }
356 
357     if (direrr < 0)
358         goto cleanup;
359 
360     sock_max++;
361 
362     /* allocate cores maps for each socket */
363     cores_maps = g_new0(virBitmap *, sock_max);
364 
365     for (i = 0; i < sock_max; i++)
366         cores_maps[i] = virBitmapNew(0);
367 
368     /* Iterate over all CPUs in the node, in ascending order */
369     for (cpu = 0; cpu < npresent_cpus; cpu++) {
370 
371         /* Skip CPUs that are not part of the current node */
372         if (!virBitmapIsBitSet(node_cpus_map, cpu))
373             continue;
374 
375         if (!virBitmapIsBitSet(online_cpus_map, cpu)) {
376             if (threads_per_subcore > 0 &&
377                 cpu % threads_per_subcore != 0 &&
378                 virBitmapIsBitSet(online_cpus_map,
379                                   cpu - (cpu % threads_per_subcore))) {
380                 /* Secondary offline threads are counted as online when
381                  * subcores are in use and the corresponding primary
382                  * thread is online */
383                 processors++;
384             } else {
385                 /* But they are counted as offline otherwise */
386                 (*offline)++;
387             }
388             continue;
389         }
390 
391         processors++;
392 
393         if (virHostCPUGetSocket(cpu, &sock) < 0)
394             goto cleanup;
395         if (!virBitmapIsBitSet(sockets_map, sock)) {
396             virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
397                            _("CPU socket topology has changed"));
398             goto cleanup;
399         }
400 
401         /* Parse core */
402         if (ARCH_IS_S390(arch)) {
403             /* logical cpu is equivalent to a core on s390 */
404             core = cpu;
405         } else {
406             if (virHostCPUGetCore(cpu, &core) < 0)
407                 goto cleanup;
408         }
409 
410         if (virBitmapSetBitExpand(cores_maps[sock], core) < 0)
411             goto cleanup;
412 
413         if (!(siblings = virHostCPUCountThreadSiblings(cpu)))
414             goto cleanup;
415 
416         if (siblings > *threads)
417             *threads = siblings;
418     }
419 
420     /* finalize the returned data */
421     *sockets = virBitmapCountBits(sockets_map);
422 
423     for (i = 0; i < sock_max; i++) {
424         if (!virBitmapIsBitSet(sockets_map, i))
425             continue;
426 
427         core = virBitmapCountBits(cores_maps[i]);
428         if (core > *cores)
429             *cores = core;
430     }
431 
432     if (threads_per_subcore > 0) {
433         /* The thread count ignores offline threads, which means that only
434          * only primary threads have been considered so far. If subcores
435          * are in use, we need to also account for secondary threads */
436         *threads *= threads_per_subcore;
437     }
438     ret = processors;
439 
440  cleanup:
441     if (cores_maps)
442         for (i = 0; i < sock_max; i++)
443             virBitmapFree(cores_maps[i]);
444     VIR_FREE(cores_maps);
445     virBitmapFree(sockets_map);
446     virBitmapFree(node_cpus_map);
447 
448     return ret;
449 }
450 
451 /* Check whether the host subcore configuration is valid.
452  *
453  * A valid configuration is one where no secondary thread is online;
454  * the primary thread in a subcore is always the first one */
455 static bool
virHostCPUHasValidSubcoreConfiguration(int threads_per_subcore)456 virHostCPUHasValidSubcoreConfiguration(int threads_per_subcore)
457 {
458     virBitmap *online_cpus = NULL;
459     int cpu = -1;
460     bool ret = false;
461 
462     /* No point in checking if subcores are not in use */
463     if (threads_per_subcore <= 0)
464         goto cleanup;
465 
466     if (!(online_cpus = virHostCPUGetOnlineBitmap()))
467         goto cleanup;
468 
469     while ((cpu = virBitmapNextSetBit(online_cpus, cpu)) >= 0) {
470 
471         /* A single online secondary thread is enough to
472          * make the configuration invalid */
473         if (cpu % threads_per_subcore != 0)
474             goto cleanup;
475     }
476 
477     ret = true;
478 
479  cleanup:
480     virBitmapFree(online_cpus);
481 
482     return ret;
483 }
484 
485 
486 /**
487  * virHostCPUParseFrequencyString:
488  * @str: string to be parsed
489  * @prefix: expected prefix
490  * @mhz: output location
491  *
492  * Parse a /proc/cpuinfo line and extract the CPU frequency, if present.
493  *
494  * The expected format of @str looks like
495  *
496  *   cpu MHz : 2100.000
497  *
498  * where @prefix ("cpu MHz" in the example), is architecture-dependent.
499  *
500  * The decimal part of the CPU frequency, as well as all whitespace, is
501  * ignored.
502  *
503  * Returns: 0 when the string has been parsed successfully and the CPU
504  *          frequency has been stored in @mhz, >0 when the string has not
505  *          been parsed but no error has occurred, <0 on failure.
506  */
507 static int
virHostCPUParseFrequencyString(const char * str,const char * prefix,unsigned int * mhz)508 virHostCPUParseFrequencyString(const char *str,
509                                const char *prefix,
510                                unsigned int *mhz)
511 {
512     char *p;
513     unsigned int ui;
514 
515     /* If the string doesn't start with the expected prefix, then
516      * we're not looking at the right string and we should move on */
517     if (!STRPREFIX(str, prefix))
518         return 1;
519 
520     /* Skip the prefix */
521     str += strlen(prefix);
522 
523     /* Skip all whitespace */
524     while (g_ascii_isspace(*str))
525         str++;
526     if (*str == '\0')
527         goto error;
528 
529     /* Skip the colon. If anything but a colon is found, then we're
530      * not looking at the right string and we should move on */
531     if (*str != ':')
532         return 1;
533     str++;
534 
535     /* Skip all whitespace */
536     while (g_ascii_isspace(*str))
537         str++;
538     if (*str == '\0')
539         goto error;
540 
541     /* Parse the frequency. We expect an unsigned value, optionally
542      * followed by a fractional part (which gets discarded) or some
543      * leading whitespace */
544     if (virStrToLong_ui(str, &p, 10, &ui) < 0 ||
545         (*p != '.' && *p != '\0' && !g_ascii_isspace(*p))) {
546         goto error;
547     }
548 
549     *mhz = ui;
550 
551     return 0;
552 
553  error:
554     virReportError(VIR_ERR_INTERNAL_ERROR,
555                    _("Missing or invalid CPU frequency in %s"),
556                    CPUINFO_PATH);
557     return -1;
558 }
559 
560 
561 static int
virHostCPUParseFrequency(FILE * cpuinfo,virArch arch,unsigned int * mhz)562 virHostCPUParseFrequency(FILE *cpuinfo,
563                          virArch arch,
564                          unsigned int *mhz)
565 {
566     const char *prefix = NULL;
567     char line[1024];
568 
569     /* No sensible way to retrieve CPU frequency */
570     if (ARCH_IS_ARM(arch))
571         return 0;
572 
573     if (ARCH_IS_X86(arch))
574         prefix = "cpu MHz";
575     else if (ARCH_IS_PPC(arch))
576         prefix = "clock";
577     else if (ARCH_IS_S390(arch))
578         prefix = "cpu MHz dynamic";
579 
580     if (!prefix) {
581         VIR_WARN("%s is not supported by the %s parser",
582                  virArchToString(arch),
583                  CPUINFO_PATH);
584         return 1;
585     }
586 
587     while (fgets(line, sizeof(line), cpuinfo) != NULL) {
588         if (virHostCPUParseFrequencyString(line, prefix, mhz) < 0)
589             return -1;
590     }
591 
592     return 0;
593 }
594 
595 
596 int
virHostCPUGetInfoPopulateLinux(FILE * cpuinfo,virArch arch,unsigned int * cpus,unsigned int * mhz,unsigned int * nodes,unsigned int * sockets,unsigned int * cores,unsigned int * threads)597 virHostCPUGetInfoPopulateLinux(FILE *cpuinfo,
598                                virArch arch,
599                                unsigned int *cpus,
600                                unsigned int *mhz,
601                                unsigned int *nodes,
602                                unsigned int *sockets,
603                                unsigned int *cores,
604                                unsigned int *threads)
605 {
606     virBitmap *present_cpus_map = NULL;
607     virBitmap *online_cpus_map = NULL;
608     g_autoptr(DIR) nodedir = NULL;
609     struct dirent *nodedirent = NULL;
610     int nodecpus, nodecores, nodesockets, nodethreads, offline = 0;
611     int threads_per_subcore = 0;
612     unsigned int node;
613     int ret = -1;
614     char *sysfs_nodedir = NULL;
615     char *sysfs_cpudir = NULL;
616     int direrr;
617 
618     *mhz = 0;
619     *cpus = *nodes = *sockets = *cores = *threads = 0;
620 
621     /* Start with parsing CPU clock speed from /proc/cpuinfo */
622     if (virHostCPUParseFrequency(cpuinfo, arch, mhz) < 0) {
623         VIR_WARN("Unable to parse CPU frequency information from %s",
624                  CPUINFO_PATH);
625     }
626 
627     /* Get information about what CPUs are present in the host and what
628      * CPUs are online, so that we don't have to so for each node */
629     present_cpus_map = virHostCPUGetPresentBitmap();
630     if (!present_cpus_map)
631         goto cleanup;
632     online_cpus_map = virHostCPUGetOnlineBitmap();
633     if (!online_cpus_map)
634         goto cleanup;
635 
636     /* OK, we've parsed clock speed out of /proc/cpuinfo. Get the
637      * core, node, socket, thread and topology information from /sys
638      */
639     sysfs_nodedir = g_strdup_printf("%s/node", SYSFS_SYSTEM_PATH);
640 
641     if (virDirOpenQuiet(&nodedir, sysfs_nodedir) < 0) {
642         /* the host isn't probably running a NUMA architecture */
643         goto fallback;
644     }
645 
646     /* PPC-KVM needs the secondary threads of a core to be offline on the
647      * host. The kvm scheduler brings the secondary threads online in the
648      * guest context. Moreover, P8 processor has split-core capability
649      * where, there can be 1,2 or 4 subcores per core. The primaries of the
650      * subcores alone will be online on the host for a subcore in the
651      * host. Even though the actual threads per core for P8 processor is 8,
652      * depending on the subcores_per_core = 1, 2 or 4, the threads per
653      * subcore will vary accordingly to 8, 4 and 2 respectively.
654      * So, On host threads_per_core what is arrived at from sysfs in the
655      * current logic is actually the subcores_per_core. Threads per subcore
656      * can only be obtained from the kvm device. For example, on P8 with 1
657      * core having 8 threads, sub_cores_percore=4, the threads 0,2,4 & 6
658      * will be online. The sysfs reflects this and in the current logic
659      * variable 'threads' will be 4 which is nothing but subcores_per_core.
660      * If the user tampers the cpu online/offline states using chcpu or other
661      * means, then it is an unsupported configuration for kvm.
662      * The code below tries to keep in mind
663      *  - when the libvirtd is run inside a KVM guest or Phyp based guest.
664      *  - Or on the kvm host where user manually tampers the cpu states to
665      *    offline/online randomly.
666      * On hosts other than POWER this will be 0, in which case a simpler
667      * thread-counting logic will be used  */
668     if ((threads_per_subcore = virHostCPUGetThreadsPerSubcore(arch)) < 0)
669         goto cleanup;
670 
671     /* If the subcore configuration is not valid, just pretend subcores
672      * are not in use and count threads one by one */
673     if (!virHostCPUHasValidSubcoreConfiguration(threads_per_subcore))
674         threads_per_subcore = 0;
675 
676     while ((direrr = virDirRead(nodedir, &nodedirent, sysfs_nodedir)) > 0) {
677         if (sscanf(nodedirent->d_name, "node%u", &node) != 1)
678             continue;
679 
680         (*nodes)++;
681 
682         sysfs_cpudir = g_strdup_printf("%s/node/%s", SYSFS_SYSTEM_PATH,
683                                        nodedirent->d_name);
684 
685         if ((nodecpus = virHostCPUParseNode(sysfs_cpudir, arch,
686                                             present_cpus_map,
687                                             online_cpus_map,
688                                             threads_per_subcore,
689                                             &nodesockets, &nodecores,
690                                             &nodethreads, &offline)) < 0)
691             goto cleanup;
692 
693         VIR_FREE(sysfs_cpudir);
694 
695         *cpus += nodecpus;
696 
697         if (nodesockets > *sockets)
698             *sockets = nodesockets;
699 
700         if (nodecores > *cores)
701             *cores = nodecores;
702 
703         if (nodethreads > *threads)
704             *threads = nodethreads;
705     }
706 
707     if (direrr < 0)
708         goto cleanup;
709 
710     if (*cpus && *nodes)
711         goto done;
712 
713  fallback:
714     VIR_FREE(sysfs_cpudir);
715 
716     sysfs_cpudir = g_strdup_printf("%s/cpu", SYSFS_SYSTEM_PATH);
717 
718     if ((nodecpus = virHostCPUParseNode(sysfs_cpudir, arch,
719                                         present_cpus_map,
720                                         online_cpus_map,
721                                         threads_per_subcore,
722                                         &nodesockets, &nodecores,
723                                         &nodethreads, &offline)) < 0)
724         goto cleanup;
725 
726     *nodes = 1;
727     *cpus = nodecpus;
728     *sockets = nodesockets;
729     *cores = nodecores;
730     *threads = nodethreads;
731 
732  done:
733     /* There should always be at least one cpu, socket, node, and thread. */
734     if (*cpus == 0) {
735         virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no CPUs found"));
736         goto cleanup;
737     }
738 
739     if (*sockets == 0) {
740         virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no sockets found"));
741         goto cleanup;
742     }
743 
744     if (*threads == 0) {
745         virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no threads found"));
746         goto cleanup;
747     }
748 
749     /* Now check if the topology makes sense. There are machines that don't
750      * expose their real number of nodes or for example the AMD Bulldozer
751      * architecture that exposes their Clustered integer core modules as both
752      * threads and cores. This approach throws off our detection. Unfortunately
753      * the nodeinfo structure isn't designed to carry the full topology so
754      * we're going to lie about the detected topology to notify the user
755      * to check the host capabilities for the actual topology. */
756     if ((*nodes *
757          *sockets *
758          *cores *
759          *threads) != (*cpus + offline)) {
760         *nodes = 1;
761         *sockets = 1;
762         *cores = *cpus + offline;
763         *threads = 1;
764     }
765 
766     ret = 0;
767 
768  cleanup:
769     virBitmapFree(present_cpus_map);
770     virBitmapFree(online_cpus_map);
771     VIR_FREE(sysfs_nodedir);
772     VIR_FREE(sysfs_cpudir);
773     return ret;
774 }
775 
776 # define TICK_TO_NSEC (1000ull * 1000ull * 1000ull / sysconf(_SC_CLK_TCK))
777 
778 int
virHostCPUGetStatsLinux(FILE * procstat,int cpuNum,virNodeCPUStatsPtr params,int * nparams)779 virHostCPUGetStatsLinux(FILE *procstat,
780                         int cpuNum,
781                         virNodeCPUStatsPtr params,
782                         int *nparams)
783 {
784     char line[1024];
785     unsigned long long usr, ni, sys, idle, iowait;
786     unsigned long long irq, softirq, steal, guest, guest_nice;
787     g_autofree char *cpu_header = NULL;
788 
789     if ((*nparams) == 0) {
790         /* Current number of cpu stats supported by linux */
791         *nparams = LINUX_NB_CPU_STATS;
792         return 0;
793     }
794 
795     if ((*nparams) != LINUX_NB_CPU_STATS) {
796         virReportInvalidArg(*nparams,
797                             _("nparams in %s must be equal to %d"),
798                             __FUNCTION__, LINUX_NB_CPU_STATS);
799         return -1;
800     }
801 
802     if (cpuNum == VIR_NODE_CPU_STATS_ALL_CPUS) {
803         cpu_header = g_strdup("cpu ");
804     } else {
805         cpu_header = g_strdup_printf("cpu%d ", cpuNum);
806     }
807 
808     while (fgets(line, sizeof(line), procstat) != NULL) {
809         char *buf = line;
810 
811         if (STRPREFIX(buf, cpu_header)) { /* aka logical CPU time */
812             if (sscanf(buf,
813                        "%*s %llu %llu %llu %llu %llu" /* user ~ iowait */
814                        "%llu %llu %llu %llu %llu",    /* irq  ~ guest_nice */
815                        &usr, &ni, &sys, &idle, &iowait,
816                        &irq, &softirq, &steal, &guest, &guest_nice) < 4) {
817                 continue;
818             }
819 
820             if (virHostCPUStatsAssign(&params[0], VIR_NODE_CPU_STATS_KERNEL,
821                                       (sys + irq + softirq) * TICK_TO_NSEC) < 0)
822                 return -1;
823 
824             if (virHostCPUStatsAssign(&params[1], VIR_NODE_CPU_STATS_USER,
825                                       (usr + ni) * TICK_TO_NSEC) < 0)
826                 return -1;
827 
828             if (virHostCPUStatsAssign(&params[2], VIR_NODE_CPU_STATS_IDLE,
829                                       idle * TICK_TO_NSEC) < 0)
830                 return -1;
831 
832             if (virHostCPUStatsAssign(&params[3], VIR_NODE_CPU_STATS_IOWAIT,
833                                       iowait * TICK_TO_NSEC) < 0)
834                 return -1;
835 
836             return 0;
837         }
838     }
839 
840     virReportInvalidArg(cpuNum,
841                         _("Invalid cpuNum in %s"),
842                         __FUNCTION__);
843 
844     return -1;
845 }
846 
847 
848 /* Determine the number of CPUs (maximum CPU id + 1) present in
849  * the host. */
850 static int
virHostCPUCountLinux(void)851 virHostCPUCountLinux(void)
852 {
853     g_autoptr(virBitmap) present = virHostCPUGetPresentBitmap();
854 
855     if (!present)
856         return -1;
857 
858     return virBitmapSize(present);
859 }
860 #endif
861 
862 int
virHostCPUGetOnline(unsigned int cpu,bool * online)863 virHostCPUGetOnline(unsigned int cpu, bool *online)
864 {
865     unsigned int tmp = 0;
866     int ret = virFileReadValueUint(&tmp,
867                                    "%s/cpu/cpu%u/online",
868                                    SYSFS_SYSTEM_PATH, cpu);
869 
870     /* If the file is not there, it's online (doesn't support offlining) */
871     if (ret == -2)
872         tmp = 1;
873     else if (ret < 0)
874         return -1;
875 
876     *online = tmp;
877 
878     return 0;
879 }
880 
881 int
virHostCPUStatsAssign(virNodeCPUStatsPtr param,const char * name,unsigned long long value)882 virHostCPUStatsAssign(virNodeCPUStatsPtr param,
883                       const char *name,
884                       unsigned long long value)
885 {
886     if (virStrcpyStatic(param->field, name) < 0) {
887         virReportError(VIR_ERR_INTERNAL_ERROR,
888                        "%s", _("kernel cpu time field is too long"
889                                " for the destination"));
890         return -1;
891     }
892     param->value = value;
893     return 0;
894 }
895 
896 
897 int
virHostCPUGetInfo(virArch hostarch G_GNUC_UNUSED,unsigned int * cpus G_GNUC_UNUSED,unsigned int * mhz G_GNUC_UNUSED,unsigned int * nodes G_GNUC_UNUSED,unsigned int * sockets G_GNUC_UNUSED,unsigned int * cores G_GNUC_UNUSED,unsigned int * threads G_GNUC_UNUSED)898 virHostCPUGetInfo(virArch hostarch G_GNUC_UNUSED,
899                   unsigned int *cpus G_GNUC_UNUSED,
900                   unsigned int *mhz G_GNUC_UNUSED,
901                   unsigned int *nodes G_GNUC_UNUSED,
902                   unsigned int *sockets G_GNUC_UNUSED,
903                   unsigned int *cores G_GNUC_UNUSED,
904                   unsigned int *threads G_GNUC_UNUSED)
905 {
906 #ifdef __linux__
907     int ret = -1;
908     FILE *cpuinfo = fopen(CPUINFO_PATH, "r");
909 
910     if (!cpuinfo) {
911         virReportSystemError(errno,
912                              _("cannot open %s"), CPUINFO_PATH);
913         return -1;
914     }
915 
916     ret = virHostCPUGetInfoPopulateLinux(cpuinfo, hostarch,
917                                          cpus, mhz, nodes,
918                                          sockets, cores, threads);
919     if (ret < 0)
920         goto cleanup;
921 
922  cleanup:
923     VIR_FORCE_FCLOSE(cpuinfo);
924     return ret;
925 #elif defined(__FreeBSD__) || defined(__APPLE__) || defined(__DragonFly__)
926     unsigned long cpu_freq;
927     size_t cpu_freq_len = sizeof(cpu_freq);
928 
929     *cpus = virHostCPUGetCountAppleFreeBSD();
930     if (*cpus == -1)
931         return -1;
932 
933     *nodes = 1;
934     *sockets = 1;
935     *cores = *cpus;
936     *threads = 1;
937 
938 # if defined(__FreeBSD__) || defined(__DragonFly__)
939     /* dev.cpu.%d.freq reports current active CPU frequency. It is provided by
940      * the cpufreq(4) framework. However, it might be disabled or no driver
941      * available. In this case fallback to "hw.clockrate" which reports boot time
942      * CPU frequency. */
943 
944     if (sysctlbyname("dev.cpu.0.freq", &cpu_freq, &cpu_freq_len, NULL, 0) < 0) {
945         if (sysctlbyname("hw.clockrate", &cpu_freq, &cpu_freq_len, NULL, 0) < 0) {
946             virReportSystemError(errno, "%s", _("cannot obtain CPU freq"));
947             return -1;
948         }
949     }
950 
951     *mhz = cpu_freq;
952 # else
953     if (sysctlbyname("hw.cpufrequency", &cpu_freq, &cpu_freq_len, NULL, 0) < 0) {
954         virReportSystemError(errno, "%s", _("cannot obtain CPU freq"));
955         return -1;
956     }
957 
958     *mhz = cpu_freq / 1000000;
959 # endif
960 
961     return 0;
962 #else
963     /* XXX Solaris will need an impl later if they port QEMU driver */
964     virReportError(VIR_ERR_NO_SUPPORT, "%s",
965                    _("node info not implemented on this platform"));
966     return -1;
967 #endif
968 }
969 
970 
971 int
virHostCPUGetStats(int cpuNum G_GNUC_UNUSED,virNodeCPUStatsPtr params G_GNUC_UNUSED,int * nparams G_GNUC_UNUSED,unsigned int flags)972 virHostCPUGetStats(int cpuNum G_GNUC_UNUSED,
973                    virNodeCPUStatsPtr params G_GNUC_UNUSED,
974                    int *nparams G_GNUC_UNUSED,
975                    unsigned int flags)
976 {
977     virCheckFlags(0, -1);
978 
979 #ifdef __linux__
980     {
981         int ret;
982         FILE *procstat = fopen(PROCSTAT_PATH, "r");
983         if (!procstat) {
984             virReportSystemError(errno,
985                                  _("cannot open %s"), PROCSTAT_PATH);
986             return -1;
987         }
988         ret = virHostCPUGetStatsLinux(procstat, cpuNum, params, nparams);
989         VIR_FORCE_FCLOSE(procstat);
990 
991         return ret;
992     }
993 #elif defined(__FreeBSD__) || defined(__DragonFly__)
994     return virHostCPUGetStatsFreeBSD(cpuNum, params, nparams);
995 #else
996     virReportError(VIR_ERR_NO_SUPPORT, "%s",
997                    _("node CPU stats not implemented on this platform"));
998     return -1;
999 #endif
1000 }
1001 
1002 
1003 int
virHostCPUGetCount(void)1004 virHostCPUGetCount(void)
1005 {
1006 #if defined(__linux__)
1007     return virHostCPUCountLinux();
1008 #elif defined(__FreeBSD__) || defined(__APPLE__) || defined(__DragonFly__)
1009     return virHostCPUGetCountAppleFreeBSD();
1010 #else
1011     virReportError(VIR_ERR_NO_SUPPORT, "%s",
1012                    _("host cpu counting not implemented on this platform"));
1013     return -1;
1014 #endif
1015 }
1016 
1017 bool
virHostCPUHasBitmap(void)1018 virHostCPUHasBitmap(void)
1019 {
1020 #ifdef __linux__
1021     return true;
1022 #else
1023     return false;
1024 #endif
1025 }
1026 
1027 virBitmap *
virHostCPUGetPresentBitmap(void)1028 virHostCPUGetPresentBitmap(void)
1029 {
1030 #ifdef __linux__
1031     virBitmap *ret = NULL;
1032 
1033     virFileReadValueBitmap(&ret, "%s/cpu/present", SYSFS_SYSTEM_PATH);
1034 
1035     return ret;
1036 #else
1037     virReportError(VIR_ERR_NO_SUPPORT, "%s",
1038                    _("node present CPU map not implemented on this platform"));
1039     return NULL;
1040 #endif
1041 }
1042 
1043 virBitmap *
virHostCPUGetOnlineBitmap(void)1044 virHostCPUGetOnlineBitmap(void)
1045 {
1046 #ifdef __linux__
1047     virBitmap *ret = NULL;
1048 
1049     virFileReadValueBitmap(&ret, "%s/cpu/online", SYSFS_SYSTEM_PATH);
1050 
1051     return ret;
1052 #else
1053     virReportError(VIR_ERR_NO_SUPPORT, "%s",
1054                    _("node online CPU map not implemented on this platform"));
1055     return NULL;
1056 #endif
1057 }
1058 
1059 
1060 int
virHostCPUGetMap(unsigned char ** cpumap,unsigned int * online,unsigned int flags)1061 virHostCPUGetMap(unsigned char **cpumap,
1062                  unsigned int *online,
1063                  unsigned int flags)
1064 {
1065     g_autoptr(virBitmap) cpus = NULL;
1066     int ret = -1;
1067     int dummy;
1068 
1069     virCheckFlags(0, -1);
1070 
1071     if (!cpumap && !online)
1072         return virHostCPUGetCount();
1073 
1074     if (!(cpus = virHostCPUGetOnlineBitmap()))
1075         goto cleanup;
1076 
1077     if (cpumap && virBitmapToData(cpus, cpumap, &dummy) < 0)
1078         goto cleanup;
1079     if (online)
1080         *online = virBitmapCountBits(cpus);
1081 
1082     ret = virHostCPUGetCount();
1083 
1084  cleanup:
1085     if (ret < 0 && cpumap)
1086         VIR_FREE(*cpumap);
1087     return ret;
1088 }
1089 
1090 
1091 /* virHostCPUGetAvailableCPUsBitmap():
1092  *
1093  * Returns a virBitmap object with all available host CPUs.
1094  *
1095  * This is a glorified wrapper of virHostCPUGetOnlineBitmap()
1096  * that, instead of returning NULL when 'ifndef __linux__' and
1097  * the caller having to handle it outside the function, returns
1098  * a virBitmap with all the possible CPUs in the host, up to
1099  * virHostCPUGetCount(). */
1100 virBitmap *
virHostCPUGetAvailableCPUsBitmap(void)1101 virHostCPUGetAvailableCPUsBitmap(void)
1102 {
1103     g_autoptr(virBitmap) bitmap = NULL;
1104 
1105     if (!(bitmap = virHostCPUGetOnlineBitmap())) {
1106         int hostcpus;
1107 
1108         if ((hostcpus = virHostCPUGetCount()) < 0)
1109             return NULL;
1110 
1111         bitmap = virBitmapNew(hostcpus);
1112         virBitmapSetAll(bitmap);
1113     }
1114 
1115     return g_steal_pointer(&bitmap);
1116 }
1117 
1118 
1119 #if WITH_LINUX_KVM_H && defined(KVM_CAP_PPC_SMT)
1120 
1121 /* Get the number of threads per subcore.
1122  *
1123  * This will be 2, 4 or 8 on POWER hosts, depending on the current
1124  * micro-threading configuration, and 0 everywhere else.
1125  *
1126  * Returns the number of threads per subcore if subcores are in use, zero
1127  * if subcores are not in use, and a negative value on error */
1128 int
virHostCPUGetThreadsPerSubcore(virArch arch)1129 virHostCPUGetThreadsPerSubcore(virArch arch)
1130 {
1131     int threads_per_subcore = 0;
1132     int kvmfd;
1133 
1134     if (ARCH_IS_PPC64(arch)) {
1135 
1136         /* It's okay if /dev/kvm doesn't exist, because
1137          *   a. we might be running in a guest
1138          *   b. the kvm module might not be installed or enabled
1139          * In either case, falling back to the subcore-unaware thread
1140          * counting logic is the right thing to do */
1141         if (!virFileExists(KVM_DEVICE))
1142             return 0;
1143 
1144         if ((kvmfd = open(KVM_DEVICE, O_RDONLY)) < 0) {
1145             /* This can happen when running as a regular user if
1146              * permissions are tight enough, in which case erroring out
1147              * is better than silently falling back and reporting
1148              * different nodeinfo depending on the user */
1149             virReportSystemError(errno,
1150                                  _("Failed to open '%s'"),
1151                                  KVM_DEVICE);
1152             return -1;
1153         }
1154 
1155         /* For Phyp and KVM based guests the ioctl for KVM_CAP_PPC_SMT
1156          * returns zero and both primary and secondary threads will be
1157          * online */
1158         threads_per_subcore = ioctl(kvmfd,
1159                                     KVM_CHECK_EXTENSION,
1160                                     KVM_CAP_PPC_SMT);
1161 
1162         VIR_FORCE_CLOSE(kvmfd);
1163     }
1164 
1165     return threads_per_subcore;
1166 }
1167 
1168 #else
1169 
1170 /* Fallback for nodeGetThreadsPerSubcore() used when KVM headers
1171  * are not available on the system */
1172 int
virHostCPUGetThreadsPerSubcore(virArch arch G_GNUC_UNUSED)1173 virHostCPUGetThreadsPerSubcore(virArch arch G_GNUC_UNUSED)
1174 {
1175     return 0;
1176 }
1177 
1178 #endif /* WITH_LINUX_KVM_H && defined(KVM_CAP_PPC_SMT) */
1179 
1180 #if WITH_LINUX_KVM_H
1181 int
virHostCPUGetKVMMaxVCPUs(void)1182 virHostCPUGetKVMMaxVCPUs(void)
1183 {
1184     int fd;
1185     int ret;
1186 
1187     if ((fd = open(KVM_DEVICE, O_RDONLY)) < 0) {
1188         virReportSystemError(errno, _("Unable to open %s"), KVM_DEVICE);
1189         return -1;
1190     }
1191 
1192 # ifdef KVM_CAP_MAX_VCPUS
1193     /* at first try KVM_CAP_MAX_VCPUS to determine the maximum count */
1194     if ((ret = ioctl(fd, KVM_CHECK_EXTENSION, KVM_CAP_MAX_VCPUS)) > 0)
1195         goto cleanup;
1196 # endif /* KVM_CAP_MAX_VCPUS */
1197 
1198     /* as a fallback get KVM_CAP_NR_VCPUS (the recommended maximum number of
1199      * vcpus). Note that on most machines this is set to 160. */
1200     if ((ret = ioctl(fd, KVM_CHECK_EXTENSION, KVM_CAP_NR_VCPUS)) > 0)
1201         goto cleanup;
1202 
1203     /* if KVM_CAP_NR_VCPUS doesn't exist either, kernel documentation states
1204      * that 4 should be used as the maximum number of cpus */
1205     ret = 4;
1206 
1207  cleanup:
1208     VIR_FORCE_CLOSE(fd);
1209     return ret;
1210 }
1211 #else
1212 int
virHostCPUGetKVMMaxVCPUs(void)1213 virHostCPUGetKVMMaxVCPUs(void)
1214 {
1215     virReportSystemError(ENOSYS, "%s",
1216                          _("KVM is not supported on this platform"));
1217     return -1;
1218 }
1219 #endif /* WITH_LINUX_KVM_H */
1220 
1221 
1222 #ifdef __linux__
1223 
1224 /*
1225  * Returns 0 if the microcode version is unknown or cannot be read for
1226  * some reason.
1227  */
1228 unsigned int
virHostCPUGetMicrocodeVersion(virArch hostArch)1229 virHostCPUGetMicrocodeVersion(virArch hostArch)
1230 {
1231     g_autofree char *outbuf = NULL;
1232     char *cur;
1233     unsigned int version = 0;
1234 
1235     if (!ARCH_IS_X86(hostArch))
1236         return 0;
1237 
1238     if (virFileReadHeaderQuiet(CPUINFO_PATH, 4096, &outbuf) < 0) {
1239         VIR_DEBUG("Failed to read microcode version from %s: %s",
1240                   CPUINFO_PATH, g_strerror(errno));
1241         return 0;
1242     }
1243 
1244     /* Account for format 'microcode    : XXXX'*/
1245     if (!(cur = strstr(outbuf, "microcode")) ||
1246         !(cur = strchr(cur, ':')))
1247         return 0;
1248     cur++;
1249 
1250     /* Linux places the microcode revision in a 32-bit integer, so
1251      * ui is fine for us too.  */
1252     if (virStrToLong_ui(cur, &cur, 0, &version) < 0)
1253         return 0;
1254 
1255     return version;
1256 }
1257 
1258 #else
1259 
1260 unsigned int
virHostCPUGetMicrocodeVersion(virArch hostArch G_GNUC_UNUSED)1261 virHostCPUGetMicrocodeVersion(virArch hostArch G_GNUC_UNUSED)
1262 {
1263     return 0;
1264 }
1265 
1266 #endif /* __linux__ */
1267 
1268 
1269 #if WITH_LINUX_KVM_H && defined(KVM_GET_MSRS) && \
1270     (defined(__i386__) || defined(__x86_64__)) && \
1271     (defined(__linux__) || defined(__FreeBSD__) || defined(__DragonFly__))
1272 static int
virHostCPUGetMSRFromKVM(unsigned long index,uint64_t * result)1273 virHostCPUGetMSRFromKVM(unsigned long index,
1274                         uint64_t *result)
1275 {
1276     VIR_AUTOCLOSE fd = -1;
1277     struct {
1278         struct kvm_msrs header;
1279         struct kvm_msr_entry entry;
1280     } msr = {
1281         .header = { .nmsrs = 1 },
1282         .entry = { .index = index },
1283     };
1284 
1285     if ((fd = open(KVM_DEVICE, O_RDONLY)) < 0) {
1286         virReportSystemError(errno, _("Unable to open %s"), KVM_DEVICE);
1287         return -1;
1288     }
1289 
1290     if (ioctl(fd, KVM_GET_MSRS, &msr) < 0) {
1291         VIR_DEBUG("Cannot get MSR 0x%lx from KVM", index);
1292         return 1;
1293     }
1294 
1295     *result = msr.entry.data;
1296     return 0;
1297 }
1298 
1299 /*
1300  * Returns 0 on success,
1301  *         1 when the MSR is not supported by the host CPU,
1302  *        -1 on error.
1303  */
1304 int
virHostCPUGetMSR(unsigned long index,uint64_t * msr)1305 virHostCPUGetMSR(unsigned long index,
1306                  uint64_t *msr)
1307 {
1308     VIR_AUTOCLOSE fd = -1;
1309 
1310     *msr = 0;
1311 
1312     if ((fd = open(MSR_DEVICE, O_RDONLY)) < 0) {
1313         VIR_DEBUG("Unable to open %s: %s",
1314                   MSR_DEVICE, g_strerror(errno));
1315     } else {
1316         int rc = pread(fd, msr, sizeof(*msr), index);
1317 
1318         if (rc == sizeof(*msr))
1319             return 0;
1320 
1321         if (rc < 0 && errno == EIO) {
1322             VIR_DEBUG("CPU does not support MSR 0x%lx", index);
1323             return 1;
1324         }
1325 
1326         VIR_DEBUG("Cannot read MSR 0x%lx from %s: %s",
1327                   index, MSR_DEVICE, g_strerror(errno));
1328     }
1329 
1330     VIR_DEBUG("Falling back to KVM ioctl");
1331 
1332     return virHostCPUGetMSRFromKVM(index, msr);
1333 }
1334 
1335 
1336 struct kvm_cpuid2 *
virHostCPUGetCPUID(void)1337 virHostCPUGetCPUID(void)
1338 {
1339     size_t i;
1340     VIR_AUTOCLOSE fd = open(KVM_DEVICE, O_RDONLY);
1341 
1342     if (fd < 0) {
1343         virReportSystemError(errno, _("Unable to open %s"), KVM_DEVICE);
1344         return NULL;
1345     }
1346 
1347     for (i = 1; i < INT32_MAX; i *= 2) {
1348         g_autofree struct kvm_cpuid2 *kvm_cpuid = NULL;
1349         kvm_cpuid = g_malloc0(sizeof(struct kvm_cpuid2) +
1350                               sizeof(struct kvm_cpuid_entry2) * i);
1351         kvm_cpuid->nent = i;
1352 
1353         if (ioctl(fd, KVM_GET_SUPPORTED_CPUID, kvm_cpuid) == 0) {
1354             /* filter out local apic id */
1355             for (i = 0; i < kvm_cpuid->nent; ++i) {
1356                 struct kvm_cpuid_entry2 *entry = &kvm_cpuid->entries[i];
1357                 if (entry->function == 0x01 && entry->index == 0x00)
1358                     entry->ebx &= 0x00ffffff;
1359                 if (entry->function == 0x0b)
1360                     entry->edx &= 0xffffff00;
1361             }
1362 
1363             return g_steal_pointer(&kvm_cpuid);
1364         }
1365     }
1366 
1367     virReportSystemError(errno, "%s", _("Cannot read host CPUID"));
1368     return NULL;
1369 }
1370 
1371 /*
1372  * This function should only be called when the host CPU supports invariant TSC
1373  * (invtsc CPUID feature).
1374  *
1375  * Returns pointer to the TSC info structure on success,
1376  *         NULL when TSC cannot be probed otherwise.
1377  */
1378 virHostCPUTscInfo *
virHostCPUGetTscInfo(void)1379 virHostCPUGetTscInfo(void)
1380 {
1381     g_autofree virHostCPUTscInfo *info = g_new0(virHostCPUTscInfo, 1);
1382     VIR_AUTOCLOSE kvmFd = -1;
1383     VIR_AUTOCLOSE vmFd = -1;
1384     VIR_AUTOCLOSE vcpuFd = -1;
1385     int rc;
1386 
1387     if ((kvmFd = open(KVM_DEVICE, O_RDONLY)) < 0) {
1388         virReportSystemError(errno, _("Unable to open %s"), KVM_DEVICE);
1389         return NULL;
1390     }
1391 
1392     if ((vmFd = ioctl(kvmFd, KVM_CREATE_VM, 0)) < 0) {
1393         virReportSystemError(errno, "%s",
1394                              _("Unable to create KVM VM for TSC probing"));
1395         return NULL;
1396     }
1397 
1398     if ((vcpuFd = ioctl(vmFd, KVM_CREATE_VCPU, 0)) < 0) {
1399         virReportSystemError(errno, "%s",
1400                              _("Unable to create KVM vCPU for TSC probing"));
1401         return NULL;
1402     }
1403 
1404     if ((rc = ioctl(vcpuFd, KVM_GET_TSC_KHZ)) < 0) {
1405         virReportSystemError(errno, "%s",
1406                              _("Unable to probe TSC frequency"));
1407         return NULL;
1408     }
1409     info->frequency = rc * 1000ULL;
1410 
1411     if ((rc = ioctl(kvmFd, KVM_CHECK_EXTENSION, KVM_CAP_TSC_CONTROL)) < 0) {
1412         virReportSystemError(errno, "%s",
1413                              _("Unable to query TSC scaling support"));
1414         return NULL;
1415     }
1416     info->scaling = rc ? VIR_TRISTATE_BOOL_YES : VIR_TRISTATE_BOOL_NO;
1417 
1418     VIR_DEBUG("Detected TSC frequency %llu Hz, scaling %s",
1419               info->frequency, virTristateBoolTypeToString(info->scaling));
1420 
1421     return g_steal_pointer(&info);
1422 }
1423 
1424 #else
1425 
1426 struct kvm_cpuid2 *
virHostCPUGetCPUID(void)1427 virHostCPUGetCPUID(void)
1428 {
1429     virReportSystemError(ENOSYS, "%s",
1430                          _("Reading CPUID is not supported on this platform"));
1431     return NULL;
1432 }
1433 
1434 int
virHostCPUGetMSR(unsigned long index G_GNUC_UNUSED,uint64_t * msr G_GNUC_UNUSED)1435 virHostCPUGetMSR(unsigned long index G_GNUC_UNUSED,
1436                  uint64_t *msr G_GNUC_UNUSED)
1437 {
1438     virReportSystemError(ENOSYS, "%s",
1439                          _("Reading MSRs is not supported on this platform"));
1440     return -1;
1441 }
1442 
1443 virHostCPUTscInfo *
virHostCPUGetTscInfo(void)1444 virHostCPUGetTscInfo(void)
1445 {
1446     virReportSystemError(ENOSYS, "%s",
1447                          _("Probing TSC is not supported on this platform"));
1448     return NULL;
1449 }
1450 
1451 #endif /* WITH_LINUX_KVM_H && defined(KVM_GET_MSRS) && \
1452           (defined(__i386__) || defined(__x86_64__)) && \
1453           (defined(__linux__) || defined(__FreeBSD__) || defined(__DragonFly__)) */
1454 
1455 int
virHostCPUReadSignature(virArch arch,FILE * cpuinfo,char ** signature)1456 virHostCPUReadSignature(virArch arch,
1457                         FILE *cpuinfo,
1458                         char **signature)
1459 {
1460     size_t lineLen = 1024;
1461     g_autofree char *line = g_new0(char, lineLen);
1462     g_autofree char *vendor = NULL;
1463     g_autofree char *name = NULL;
1464     g_autofree char *family = NULL;
1465     g_autofree char *model = NULL;
1466     g_autofree char *stepping = NULL;
1467     g_autofree char *revision = NULL;
1468     g_autofree char *proc = NULL;
1469     g_autofree char *facilities = NULL;
1470 
1471     if (!ARCH_IS_X86(arch) && !ARCH_IS_PPC64(arch) && !ARCH_IS_S390(arch))
1472         return 0;
1473 
1474     while (fgets(line, lineLen, cpuinfo)) {
1475         g_auto(GStrv) parts = g_strsplit(line, ": ", 2);
1476 
1477         if (g_strv_length(parts) != 2)
1478             continue;
1479 
1480         g_strstrip(parts[0]);
1481         g_strstrip(parts[1]);
1482 
1483         if (ARCH_IS_X86(arch)) {
1484             if (STREQ(parts[0], "vendor_id")) {
1485                 if (!vendor)
1486                     vendor = g_steal_pointer(&parts[1]);
1487             } else if (STREQ(parts[0], "model name")) {
1488                 if (!name)
1489                     name = g_steal_pointer(&parts[1]);
1490             } else if (STREQ(parts[0], "cpu family")) {
1491                 if (!family)
1492                     family = g_steal_pointer(&parts[1]);
1493             } else if (STREQ(parts[0], "model")) {
1494                 if (!model)
1495                     model = g_steal_pointer(&parts[1]);
1496             } else if (STREQ(parts[0], "stepping")) {
1497                 if (!stepping)
1498                     stepping = g_steal_pointer(&parts[1]);
1499             }
1500 
1501             if (vendor && name && family && model && stepping) {
1502                 *signature = g_strdup_printf("%s, %s, family: %s, model: %s, stepping: %s",
1503                                              vendor, name, family, model, stepping);
1504                 return 0;
1505             }
1506         } else if (ARCH_IS_PPC64(arch)) {
1507             if (STREQ(parts[0], "cpu")) {
1508                 if (!name)
1509                     name = g_steal_pointer(&parts[1]);
1510             } else if (STREQ(parts[0], "revision")) {
1511                 if (!revision)
1512                     revision = g_steal_pointer(&parts[1]);
1513             }
1514 
1515             if (name && revision) {
1516                 *signature = g_strdup_printf("%s, rev %s", name, revision);
1517                 return 0;
1518             }
1519         } else if (ARCH_IS_S390(arch)) {
1520             if (STREQ(parts[0], "vendor_id")) {
1521                 if (!vendor)
1522                     vendor = g_steal_pointer(&parts[1]);
1523             } else if (STREQ(parts[0], "processor 0")) {
1524                 if (!proc)
1525                     proc = g_steal_pointer(&parts[1]);
1526             } else if (STREQ(parts[0], "facilities")) {
1527                 if (!facilities)
1528                     facilities = g_steal_pointer(&parts[1]);
1529             }
1530 
1531             if (vendor && proc && facilities) {
1532                 *signature = g_strdup_printf("%s, %s, facilities: %s",
1533                                              vendor, proc, facilities);
1534                 return 0;
1535             }
1536         }
1537     }
1538 
1539     return 0;
1540 }
1541 
1542 #ifdef __linux__
1543 
1544 int
virHostCPUGetSignature(char ** signature)1545 virHostCPUGetSignature(char **signature)
1546 {
1547     g_autoptr(FILE) cpuinfo = NULL;
1548 
1549     *signature = NULL;
1550 
1551     if (!(cpuinfo = fopen(CPUINFO_PATH, "r"))) {
1552         virReportSystemError(errno, _("Failed to open cpuinfo file '%s'"),
1553                              CPUINFO_PATH);
1554         return -1;
1555     }
1556 
1557     return virHostCPUReadSignature(virArchFromHost(), cpuinfo, signature);
1558 }
1559 
1560 #else
1561 
1562 int
virHostCPUGetSignature(char ** signature)1563 virHostCPUGetSignature(char **signature)
1564 {
1565     *signature = NULL;
1566     return 0;
1567 }
1568 
1569 #endif /* __linux__ */
1570 
1571 int
virHostCPUGetHaltPollTime(pid_t pid,unsigned long long * haltPollSuccess,unsigned long long * haltPollFail)1572 virHostCPUGetHaltPollTime(pid_t pid,
1573                       unsigned long long *haltPollSuccess,
1574                       unsigned long long *haltPollFail)
1575 {
1576     g_autofree char *pidToStr = NULL;
1577     g_autofree char *debugFsPath = NULL;
1578     g_autofree char *kvmPath = NULL;
1579     struct dirent *ent = NULL;
1580     g_autoptr(DIR) dir = NULL;
1581     bool found = false;
1582 
1583     if (!(debugFsPath = virFileFindMountPoint("debugfs")))
1584         return -1;
1585 
1586     kvmPath = g_strdup_printf("%s/%s", debugFsPath, "kvm");
1587     if (virDirOpenQuiet(&dir, kvmPath) != 1)
1588         return -1;
1589 
1590     pidToStr = g_strdup_printf("%lld-", (long long)pid);
1591     while (virDirRead(dir, &ent, NULL) > 0) {
1592         if (STRPREFIX(ent->d_name, pidToStr)) {
1593             found = true;
1594             break;
1595         }
1596     }
1597 
1598     if (!found)
1599         return -1;
1600 
1601     if (virFileReadValueUllongQuiet(haltPollSuccess, "%s/%s/%s", kvmPath,
1602                                     ent->d_name, "halt_poll_success_ns") < 0 ||
1603         virFileReadValueUllongQuiet(haltPollFail, "%s/%s/%s", kvmPath,
1604                                     ent->d_name, "halt_poll_fail_ns") < 0)
1605         return -1;
1606 
1607     return 0;
1608 }
1609