1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Test for x86 KVM_SET_PMU_EVENT_FILTER.
4  *
5  * Copyright (C) 2022, Google LLC.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2.
8  *
9  * Verifies the expected behavior of allow lists and deny lists for
10  * virtual PMU events.
11  */
12 
13 #define _GNU_SOURCE /* for program_invocation_short_name */
14 #include "test_util.h"
15 #include "kvm_util.h"
16 #include "processor.h"
17 
18 /*
19  * In lieu of copying perf_event.h into tools...
20  */
21 #define ARCH_PERFMON_EVENTSEL_OS			(1ULL << 17)
22 #define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)
23 
24 union cpuid10_eax {
25 	struct {
26 		unsigned int version_id:8;
27 		unsigned int num_counters:8;
28 		unsigned int bit_width:8;
29 		unsigned int mask_length:8;
30 	} split;
31 	unsigned int full;
32 };
33 
34 union cpuid10_ebx {
35 	struct {
36 		unsigned int no_unhalted_core_cycles:1;
37 		unsigned int no_instructions_retired:1;
38 		unsigned int no_unhalted_reference_cycles:1;
39 		unsigned int no_llc_reference:1;
40 		unsigned int no_llc_misses:1;
41 		unsigned int no_branch_instruction_retired:1;
42 		unsigned int no_branch_misses_retired:1;
43 	} split;
44 	unsigned int full;
45 };
46 
47 /* End of stuff taken from perf_event.h. */
48 
49 /* Oddly, this isn't in perf_event.h. */
50 #define ARCH_PERFMON_BRANCHES_RETIRED		5
51 
52 #define NUM_BRANCHES 42
53 
54 /*
55  * This is how the event selector and unit mask are stored in an AMD
56  * core performance event-select register. Intel's format is similar,
57  * but the event selector is only 8 bits.
58  */
59 #define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
60 			      (umask & 0xff) << 8)
61 
62 /*
63  * "Branch instructions retired", from the Intel SDM, volume 3,
64  * "Pre-defined Architectural Performance Events."
65  */
66 
67 #define INTEL_BR_RETIRED EVENT(0xc4, 0)
68 
69 /*
70  * "Retired branch instructions", from Processor Programming Reference
71  * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
72  * Preliminary Processor Programming Reference (PPR) for AMD Family
73  * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
74  * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
75  * B1 Processors Volume 1 of 2.
76  */
77 
78 #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
79 
80 /*
81  * This event list comprises Intel's eight architectural events plus
82  * AMD's "retired branch instructions" for Zen[123] (and possibly
83  * other AMD CPUs).
84  */
85 static const uint64_t event_list[] = {
86 	EVENT(0x3c, 0),
87 	EVENT(0xc0, 0),
88 	EVENT(0x3c, 1),
89 	EVENT(0x2e, 0x4f),
90 	EVENT(0x2e, 0x41),
91 	EVENT(0xc4, 0),
92 	EVENT(0xc5, 0),
93 	EVENT(0xa4, 1),
94 	AMD_ZEN_BR_RETIRED,
95 };
96 
97 /*
98  * If we encounter a #GP during the guest PMU sanity check, then the guest
99  * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
100  */
101 static void guest_gp_handler(struct ex_regs *regs)
102 {
103 	GUEST_SYNC(0);
104 }
105 
106 /*
107  * Check that we can write a new value to the given MSR and read it back.
108  * The caller should provide a non-empty set of bits that are safe to flip.
109  *
110  * Return on success. GUEST_SYNC(0) on error.
111  */
112 static void check_msr(uint32_t msr, uint64_t bits_to_flip)
113 {
114 	uint64_t v = rdmsr(msr) ^ bits_to_flip;
115 
116 	wrmsr(msr, v);
117 	if (rdmsr(msr) != v)
118 		GUEST_SYNC(0);
119 
120 	v ^= bits_to_flip;
121 	wrmsr(msr, v);
122 	if (rdmsr(msr) != v)
123 		GUEST_SYNC(0);
124 }
125 
126 static void intel_guest_code(void)
127 {
128 	check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
129 	check_msr(MSR_P6_EVNTSEL0, 0xffff);
130 	check_msr(MSR_IA32_PMC0, 0xffff);
131 	GUEST_SYNC(1);
132 
133 	for (;;) {
134 		uint64_t br0, br1;
135 
136 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
137 		wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
138 		      ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
139 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
140 		br0 = rdmsr(MSR_IA32_PMC0);
141 		__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
142 		br1 = rdmsr(MSR_IA32_PMC0);
143 		GUEST_SYNC(br1 - br0);
144 	}
145 }
146 
147 /*
148  * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
149  * this code uses the always-available, legacy K7 PMU MSRs, which alias to
150  * the first four of the six extended core PMU MSRs.
151  */
152 static void amd_guest_code(void)
153 {
154 	check_msr(MSR_K7_EVNTSEL0, 0xffff);
155 	check_msr(MSR_K7_PERFCTR0, 0xffff);
156 	GUEST_SYNC(1);
157 
158 	for (;;) {
159 		uint64_t br0, br1;
160 
161 		wrmsr(MSR_K7_EVNTSEL0, 0);
162 		wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
163 		      ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
164 		br0 = rdmsr(MSR_K7_PERFCTR0);
165 		__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
166 		br1 = rdmsr(MSR_K7_PERFCTR0);
167 		GUEST_SYNC(br1 - br0);
168 	}
169 }
170 
171 /*
172  * Run the VM to the next GUEST_SYNC(value), and return the value passed
173  * to the sync. Any other exit from the guest is fatal.
174  */
175 static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
176 {
177 	struct kvm_run *run = vcpu->run;
178 	struct ucall uc;
179 
180 	vcpu_run(vcpu);
181 	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
182 		    "Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
183 		    run->exit_reason,
184 		    exit_reason_str(run->exit_reason));
185 	get_ucall(vcpu, &uc);
186 	TEST_ASSERT(uc.cmd == UCALL_SYNC,
187 		    "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
188 	return uc.args[1];
189 }
190 
191 /*
192  * In a nested environment or if the vPMU is disabled, the guest PMU
193  * might not work as architected (accessing the PMU MSRs may raise
194  * #GP, or writes could simply be discarded). In those situations,
195  * there is no point in running these tests. The guest code will perform
196  * a sanity check and then GUEST_SYNC(success). In the case of failure,
197  * the behavior of the guest on resumption is undefined.
198  */
199 static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
200 {
201 	bool success;
202 
203 	vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
204 	success = run_vcpu_to_sync(vcpu);
205 	vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
206 
207 	return success;
208 }
209 
210 static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
211 {
212 	struct kvm_pmu_event_filter *f;
213 	int size = sizeof(*f) + nevents * sizeof(f->events[0]);
214 
215 	f = malloc(size);
216 	TEST_ASSERT(f, "Out of memory");
217 	memset(f, 0, size);
218 	f->nevents = nevents;
219 	return f;
220 }
221 
222 
223 static struct kvm_pmu_event_filter *
224 create_pmu_event_filter(const uint64_t event_list[],
225 			int nevents, uint32_t action)
226 {
227 	struct kvm_pmu_event_filter *f;
228 	int i;
229 
230 	f = alloc_pmu_event_filter(nevents);
231 	f->action = action;
232 	for (i = 0; i < nevents; i++)
233 		f->events[i] = event_list[i];
234 
235 	return f;
236 }
237 
238 static struct kvm_pmu_event_filter *event_filter(uint32_t action)
239 {
240 	return create_pmu_event_filter(event_list,
241 				       ARRAY_SIZE(event_list),
242 				       action);
243 }
244 
245 /*
246  * Remove the first occurrence of 'event' (if any) from the filter's
247  * event list.
248  */
249 static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
250 						 uint64_t event)
251 {
252 	bool found = false;
253 	int i;
254 
255 	for (i = 0; i < f->nevents; i++) {
256 		if (found)
257 			f->events[i - 1] = f->events[i];
258 		else
259 			found = f->events[i] == event;
260 	}
261 	if (found)
262 		f->nevents--;
263 	return f;
264 }
265 
266 static void test_without_filter(struct kvm_vcpu *vcpu)
267 {
268 	uint64_t count = run_vcpu_to_sync(vcpu);
269 
270 	if (count != NUM_BRANCHES)
271 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
272 			__func__, count, NUM_BRANCHES);
273 	TEST_ASSERT(count, "Allowed PMU event is not counting");
274 }
275 
276 static uint64_t test_with_filter(struct kvm_vcpu *vcpu,
277 				 struct kvm_pmu_event_filter *f)
278 {
279 	vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
280 	return run_vcpu_to_sync(vcpu);
281 }
282 
283 static void test_amd_deny_list(struct kvm_vcpu *vcpu)
284 {
285 	uint64_t event = EVENT(0x1C2, 0);
286 	struct kvm_pmu_event_filter *f;
287 	uint64_t count;
288 
289 	f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY);
290 	count = test_with_filter(vcpu, f);
291 
292 	free(f);
293 	if (count != NUM_BRANCHES)
294 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
295 			__func__, count, NUM_BRANCHES);
296 	TEST_ASSERT(count, "Allowed PMU event is not counting");
297 }
298 
299 static void test_member_deny_list(struct kvm_vcpu *vcpu)
300 {
301 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
302 	uint64_t count = test_with_filter(vcpu, f);
303 
304 	free(f);
305 	if (count)
306 		pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
307 			__func__, count);
308 	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
309 }
310 
311 static void test_member_allow_list(struct kvm_vcpu *vcpu)
312 {
313 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
314 	uint64_t count = test_with_filter(vcpu, f);
315 
316 	free(f);
317 	if (count != NUM_BRANCHES)
318 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
319 			__func__, count, NUM_BRANCHES);
320 	TEST_ASSERT(count, "Allowed PMU event is not counting");
321 }
322 
323 static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
324 {
325 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
326 	uint64_t count;
327 
328 	remove_event(f, INTEL_BR_RETIRED);
329 	remove_event(f, AMD_ZEN_BR_RETIRED);
330 	count = test_with_filter(vcpu, f);
331 	free(f);
332 	if (count != NUM_BRANCHES)
333 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
334 			__func__, count, NUM_BRANCHES);
335 	TEST_ASSERT(count, "Allowed PMU event is not counting");
336 }
337 
338 static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
339 {
340 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
341 	uint64_t count;
342 
343 	remove_event(f, INTEL_BR_RETIRED);
344 	remove_event(f, AMD_ZEN_BR_RETIRED);
345 	count = test_with_filter(vcpu, f);
346 	free(f);
347 	if (count)
348 		pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
349 			__func__, count);
350 	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
351 }
352 
353 /*
354  * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
355  *
356  * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
357  */
358 static void test_pmu_config_disable(void (*guest_code)(void))
359 {
360 	struct kvm_vcpu *vcpu;
361 	int r;
362 	struct kvm_vm *vm;
363 
364 	r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
365 	if (!(r & KVM_PMU_CAP_DISABLE))
366 		return;
367 
368 	vm = vm_create(1);
369 
370 	vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
371 
372 	vcpu = vm_vcpu_add(vm, 0, guest_code);
373 	vm_init_descriptor_tables(vm);
374 	vcpu_init_descriptor_tables(vcpu);
375 
376 	TEST_ASSERT(!sanity_check_pmu(vcpu),
377 		    "Guest should not be able to use disabled PMU.");
378 
379 	kvm_vm_free(vm);
380 }
381 
382 /*
383  * Check for a non-zero PMU version, at least one general-purpose
384  * counter per logical processor, an EBX bit vector of length greater
385  * than 5, and EBX[5] clear.
386  */
387 static bool check_intel_pmu_leaf(const struct kvm_cpuid_entry2 *entry)
388 {
389 	union cpuid10_eax eax = { .full = entry->eax };
390 	union cpuid10_ebx ebx = { .full = entry->ebx };
391 
392 	return eax.split.version_id && eax.split.num_counters > 0 &&
393 		eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
394 		!ebx.split.no_branch_instruction_retired;
395 }
396 
397 /*
398  * Note that CPUID leaf 0xa is Intel-specific. This leaf should be
399  * clear on AMD hardware.
400  */
401 static bool use_intel_pmu(void)
402 {
403 	const struct kvm_cpuid_entry2 *entry;
404 
405 	entry = kvm_get_supported_cpuid_entry(0xa);
406 	return is_intel_cpu() && check_intel_pmu_leaf(entry);
407 }
408 
409 static bool is_zen1(uint32_t eax)
410 {
411 	return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
412 }
413 
414 static bool is_zen2(uint32_t eax)
415 {
416 	return x86_family(eax) == 0x17 &&
417 		x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
418 }
419 
420 static bool is_zen3(uint32_t eax)
421 {
422 	return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
423 }
424 
425 /*
426  * Determining AMD support for a PMU event requires consulting the AMD
427  * PPR for the CPU or reference material derived therefrom. The AMD
428  * test code herein has been verified to work on Zen1, Zen2, and Zen3.
429  *
430  * Feel free to add more AMD CPUs that are documented to support event
431  * select 0xc2 umask 0 as "retired branch instructions."
432  */
433 static bool use_amd_pmu(void)
434 {
435 	const struct kvm_cpuid_entry2 *entry;
436 
437 	entry = kvm_get_supported_cpuid_entry(1);
438 	return is_amd_cpu() &&
439 		(is_zen1(entry->eax) ||
440 		 is_zen2(entry->eax) ||
441 		 is_zen3(entry->eax));
442 }
443 
444 int main(int argc, char *argv[])
445 {
446 	void (*guest_code)(void);
447 	struct kvm_vcpu *vcpu;
448 	struct kvm_vm *vm;
449 
450 	/* Tell stdout not to buffer its content */
451 	setbuf(stdout, NULL);
452 
453 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
454 
455 	TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
456 	guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
457 
458 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
459 
460 	vm_init_descriptor_tables(vm);
461 	vcpu_init_descriptor_tables(vcpu);
462 
463 	TEST_REQUIRE(sanity_check_pmu(vcpu));
464 
465 	if (use_amd_pmu())
466 		test_amd_deny_list(vcpu);
467 
468 	test_without_filter(vcpu);
469 	test_member_deny_list(vcpu);
470 	test_member_allow_list(vcpu);
471 	test_not_member_deny_list(vcpu);
472 	test_not_member_allow_list(vcpu);
473 
474 	kvm_vm_free(vm);
475 
476 	test_pmu_config_disable(guest_code);
477 
478 	return 0;
479 }
480