xref: /freebsd/sys/dev/hwpmc/hwpmc_core.c (revision fdafd315)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008 Joseph Koshy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Intel Core PMCs.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/bus.h>
35 #include <sys/pmc.h>
36 #include <sys/pmckern.h>
37 #include <sys/smp.h>
38 #include <sys/systm.h>
39 
40 #include <machine/intr_machdep.h>
41 #include <x86/apicvar.h>
42 #include <machine/cpu.h>
43 #include <machine/cpufunc.h>
44 #include <machine/md_var.h>
45 #include <machine/specialreg.h>
46 
47 #define	CORE_CPUID_REQUEST		0xA
48 #define	CORE_CPUID_REQUEST_SIZE		0x4
49 #define	CORE_CPUID_EAX			0x0
50 #define	CORE_CPUID_EBX			0x1
51 #define	CORE_CPUID_ECX			0x2
52 #define	CORE_CPUID_EDX			0x3
53 
54 #define	IAF_PMC_CAPS			\
55 	(PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INTERRUPT | \
56 	 PMC_CAP_USER | PMC_CAP_SYSTEM)
57 #define	IAF_RI_TO_MSR(RI)		((RI) + (1 << 30))
58 
59 #define	IAP_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
60     PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE |	 \
61     PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
62 
63 #define	EV_IS_NOTARCH		0
64 #define	EV_IS_ARCH_SUPP		1
65 #define	EV_IS_ARCH_NOTSUPP	-1
66 
67 /*
68  * "Architectural" events defined by Intel.  The values of these
69  * symbols correspond to positions in the bitmask returned by
70  * the CPUID.0AH instruction.
71  */
72 enum core_arch_events {
73 	CORE_AE_BRANCH_INSTRUCTION_RETIRED	= 5,
74 	CORE_AE_BRANCH_MISSES_RETIRED		= 6,
75 	CORE_AE_INSTRUCTION_RETIRED		= 1,
76 	CORE_AE_LLC_MISSES			= 4,
77 	CORE_AE_LLC_REFERENCE			= 3,
78 	CORE_AE_UNHALTED_REFERENCE_CYCLES	= 2,
79 	CORE_AE_UNHALTED_CORE_CYCLES		= 0
80 };
81 
82 static enum pmc_cputype	core_cputype;
83 static int core_version;
84 
85 struct core_cpu {
86 	volatile uint32_t	pc_iafctrl;	/* Fixed function control. */
87 	volatile uint64_t	pc_globalctrl;	/* Global control register. */
88 	struct pmc_hw		pc_corepmcs[];
89 };
90 
91 static struct core_cpu **core_pcpu;
92 
93 static uint32_t core_architectural_events;
94 static uint64_t core_pmcmask;
95 
96 static int core_iaf_ri;		/* relative index of fixed counters */
97 static int core_iaf_width;
98 static int core_iaf_npmc;
99 
100 static int core_iap_width;
101 static int core_iap_npmc;
102 static int core_iap_wroffset;
103 
104 static u_int pmc_alloc_refs;
105 static bool pmc_tsx_force_abort_set;
106 
107 static int
core_pcpu_noop(struct pmc_mdep * md,int cpu)108 core_pcpu_noop(struct pmc_mdep *md, int cpu)
109 {
110 	(void) md;
111 	(void) cpu;
112 	return (0);
113 }
114 
115 static int
core_pcpu_init(struct pmc_mdep * md,int cpu)116 core_pcpu_init(struct pmc_mdep *md, int cpu)
117 {
118 	struct pmc_cpu *pc;
119 	struct core_cpu *cc;
120 	struct pmc_hw *phw;
121 	int core_ri, n, npmc;
122 
123 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
124 	    ("[iaf,%d] insane cpu number %d", __LINE__, cpu));
125 
126 	PMCDBG1(MDP,INI,1,"core-init cpu=%d", cpu);
127 
128 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
129 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
130 
131 	if (core_version >= 2)
132 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
133 
134 	cc = malloc(sizeof(struct core_cpu) + npmc * sizeof(struct pmc_hw),
135 	    M_PMC, M_WAITOK | M_ZERO);
136 
137 	core_pcpu[cpu] = cc;
138 	pc = pmc_pcpu[cpu];
139 
140 	KASSERT(pc != NULL && cc != NULL,
141 	    ("[core,%d] NULL per-cpu structures cpu=%d", __LINE__, cpu));
142 
143 	for (n = 0, phw = cc->pc_corepmcs; n < npmc; n++, phw++) {
144 		phw->phw_state 	  = PMC_PHW_FLAG_IS_ENABLED |
145 		    PMC_PHW_CPU_TO_STATE(cpu) |
146 		    PMC_PHW_INDEX_TO_STATE(n + core_ri);
147 		phw->phw_pmc	  = NULL;
148 		pc->pc_hwpmcs[n + core_ri]  = phw;
149 	}
150 
151 	if (core_version >= 2 && vm_guest == VM_GUEST_NO) {
152 		/* Enable Freezing PMCs on PMI. */
153 		wrmsr(MSR_DEBUGCTLMSR, rdmsr(MSR_DEBUGCTLMSR) | 0x1000);
154 	}
155 
156 	return (0);
157 }
158 
159 static int
core_pcpu_fini(struct pmc_mdep * md,int cpu)160 core_pcpu_fini(struct pmc_mdep *md, int cpu)
161 {
162 	int core_ri, n, npmc;
163 	struct pmc_cpu *pc;
164 	struct core_cpu *cc;
165 
166 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
167 	    ("[core,%d] insane cpu number (%d)", __LINE__, cpu));
168 
169 	PMCDBG1(MDP,INI,1,"core-pcpu-fini cpu=%d", cpu);
170 
171 	if ((cc = core_pcpu[cpu]) == NULL)
172 		return (0);
173 
174 	core_pcpu[cpu] = NULL;
175 
176 	pc = pmc_pcpu[cpu];
177 
178 	KASSERT(pc != NULL, ("[core,%d] NULL per-cpu %d state", __LINE__,
179 		cpu));
180 
181 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
182 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
183 
184 	for (n = 0; n < npmc; n++)
185 		wrmsr(IAP_EVSEL0 + n, 0);
186 
187 	if (core_version >= 2) {
188 		wrmsr(IAF_CTRL, 0);
189 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
190 	}
191 
192 	for (n = 0; n < npmc; n++)
193 		pc->pc_hwpmcs[n + core_ri] = NULL;
194 
195 	free(cc, M_PMC);
196 
197 	return (0);
198 }
199 
200 /*
201  * Fixed function counters.
202  */
203 
204 static pmc_value_t
iaf_perfctr_value_to_reload_count(pmc_value_t v)205 iaf_perfctr_value_to_reload_count(pmc_value_t v)
206 {
207 
208 	/* If the PMC has overflowed, return a reload count of zero. */
209 	if ((v & (1ULL << (core_iaf_width - 1))) == 0)
210 		return (0);
211 	v &= (1ULL << core_iaf_width) - 1;
212 	return (1ULL << core_iaf_width) - v;
213 }
214 
215 static pmc_value_t
iaf_reload_count_to_perfctr_value(pmc_value_t rlc)216 iaf_reload_count_to_perfctr_value(pmc_value_t rlc)
217 {
218 	return (1ULL << core_iaf_width) - rlc;
219 }
220 
221 static int
iaf_allocate_pmc(int cpu,int ri,struct pmc * pm,const struct pmc_op_pmcallocate * a)222 iaf_allocate_pmc(int cpu, int ri, struct pmc *pm,
223     const struct pmc_op_pmcallocate *a)
224 {
225 	uint8_t ev, umask;
226 	uint32_t caps;
227 	uint64_t config, flags;
228 	const struct pmc_md_iap_op_pmcallocate *iap;
229 
230 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
231 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
232 
233 	PMCDBG2(MDP,ALL,1, "iaf-allocate ri=%d reqcaps=0x%x", ri, pm->pm_caps);
234 
235 	if (ri < 0 || ri > core_iaf_npmc)
236 		return (EINVAL);
237 
238 	if (a->pm_class != PMC_CLASS_IAF)
239 		return (EINVAL);
240 
241 	if ((a->pm_flags & PMC_F_EV_PMU) == 0)
242 		return (EINVAL);
243 
244 	iap = &a->pm_md.pm_iap;
245 	config = iap->pm_iap_config;
246 	ev = IAP_EVSEL_GET(config);
247 	umask = IAP_UMASK_GET(config);
248 
249 	if (ev == 0x0) {
250 		if (umask != ri + 1)
251 			return (EINVAL);
252 	} else {
253 		switch (ri) {
254 		case 0:	/* INST_RETIRED.ANY */
255 			if (ev != 0xC0 || umask != 0x00)
256 				return (EINVAL);
257 			break;
258 		case 1:	/* CPU_CLK_UNHALTED.THREAD */
259 			if (ev != 0x3C || umask != 0x00)
260 				return (EINVAL);
261 			break;
262 		case 2:	/* CPU_CLK_UNHALTED.REF */
263 			if (ev != 0x3C || umask != 0x01)
264 				return (EINVAL);
265 			break;
266 		case 3:	/* TOPDOWN.SLOTS */
267 			if (ev != 0xA4 || umask != 0x01)
268 				return (EINVAL);
269 			break;
270 		default:
271 			return (EINVAL);
272 		}
273 	}
274 
275 	pmc_alloc_refs++;
276 	if ((cpu_stdext_feature3 & CPUID_STDEXT3_TSXFA) != 0 &&
277 	    !pmc_tsx_force_abort_set) {
278 		pmc_tsx_force_abort_set = true;
279 		x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS_ALL |
280 		    MSR_OP_WRITE, 1, NULL);
281 	}
282 
283 	flags = 0;
284 	if (config & IAP_OS)
285 		flags |= IAF_OS;
286 	if (config & IAP_USR)
287 		flags |= IAF_USR;
288 	if (config & IAP_ANY)
289 		flags |= IAF_ANY;
290 	if (config & IAP_INT)
291 		flags |= IAF_PMI;
292 
293 	caps = a->pm_caps;
294 	if (caps & PMC_CAP_INTERRUPT)
295 		flags |= IAF_PMI;
296 	if (caps & PMC_CAP_SYSTEM)
297 		flags |= IAF_OS;
298 	if (caps & PMC_CAP_USER)
299 		flags |= IAF_USR;
300 	if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0)
301 		flags |= (IAF_OS | IAF_USR);
302 
303 	pm->pm_md.pm_iaf.pm_iaf_ctrl = (flags << (ri * 4));
304 
305 	PMCDBG1(MDP,ALL,2, "iaf-allocate config=0x%jx",
306 	    (uintmax_t) pm->pm_md.pm_iaf.pm_iaf_ctrl);
307 
308 	return (0);
309 }
310 
311 static int
iaf_config_pmc(int cpu,int ri,struct pmc * pm)312 iaf_config_pmc(int cpu, int ri, struct pmc *pm)
313 {
314 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
315 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
316 
317 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
318 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
319 
320 	PMCDBG3(MDP,CFG,1, "iaf-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
321 
322 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
323 	    cpu));
324 
325 	core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc = pm;
326 
327 	return (0);
328 }
329 
330 static int
iaf_describe(int cpu,int ri,struct pmc_info * pi,struct pmc ** ppmc)331 iaf_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
332 {
333 	struct pmc_hw *phw;
334 
335 	phw = &core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri];
336 
337 	snprintf(pi->pm_name, sizeof(pi->pm_name), "IAF-%d", ri);
338 	pi->pm_class = PMC_CLASS_IAF;
339 
340 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
341 		pi->pm_enabled = TRUE;
342 		*ppmc          = phw->phw_pmc;
343 	} else {
344 		pi->pm_enabled = FALSE;
345 		*ppmc          = NULL;
346 	}
347 
348 	return (0);
349 }
350 
351 static int
iaf_get_config(int cpu,int ri,struct pmc ** ppm)352 iaf_get_config(int cpu, int ri, struct pmc **ppm)
353 {
354 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
355 
356 	return (0);
357 }
358 
359 static int
iaf_get_msr(int ri,uint32_t * msr)360 iaf_get_msr(int ri, uint32_t *msr)
361 {
362 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
363 	    ("[iaf,%d] ri %d out of range", __LINE__, ri));
364 
365 	*msr = IAF_RI_TO_MSR(ri);
366 
367 	return (0);
368 }
369 
370 static int
iaf_read_pmc(int cpu,int ri,struct pmc * pm,pmc_value_t * v)371 iaf_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v)
372 {
373 	pmc_value_t tmp;
374 
375 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
376 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
377 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
378 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
379 
380 	tmp = rdpmc(IAF_RI_TO_MSR(ri));
381 
382 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
383 		*v = iaf_perfctr_value_to_reload_count(tmp);
384 	else
385 		*v = tmp & ((1ULL << core_iaf_width) - 1);
386 
387 	PMCDBG4(MDP,REA,1, "iaf-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
388 	    IAF_RI_TO_MSR(ri), *v);
389 
390 	return (0);
391 }
392 
393 static int
iaf_release_pmc(int cpu,int ri,struct pmc * pmc)394 iaf_release_pmc(int cpu, int ri, struct pmc *pmc)
395 {
396 	PMCDBG3(MDP,REL,1, "iaf-release cpu=%d ri=%d pm=%p", cpu, ri, pmc);
397 
398 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
399 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
400 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
401 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
402 
403 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc == NULL,
404 	    ("[core,%d] PHW pmc non-NULL", __LINE__));
405 
406 	MPASS(pmc_alloc_refs > 0);
407 	if (pmc_alloc_refs-- == 1 && pmc_tsx_force_abort_set) {
408 		pmc_tsx_force_abort_set = false;
409 		x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS_ALL |
410 		    MSR_OP_WRITE, 0, NULL);
411 	}
412 
413 	return (0);
414 }
415 
416 static int
iaf_start_pmc(int cpu,int ri,struct pmc * pm)417 iaf_start_pmc(int cpu, int ri, struct pmc *pm)
418 {
419 	struct core_cpu *cc;
420 
421 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
422 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
423 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
424 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
425 
426 	PMCDBG2(MDP,STA,1,"iaf-start cpu=%d ri=%d", cpu, ri);
427 
428 	cc = core_pcpu[cpu];
429 	cc->pc_iafctrl |= pm->pm_md.pm_iaf.pm_iaf_ctrl;
430 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
431 
432 	cc->pc_globalctrl |= (1ULL << (ri + IAF_OFFSET));
433 	wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
434 
435 	PMCDBG4(MDP,STA,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
436 	    cc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
437 	    cc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
438 
439 	return (0);
440 }
441 
442 static int
iaf_stop_pmc(int cpu,int ri,struct pmc * pm)443 iaf_stop_pmc(int cpu, int ri, struct pmc *pm)
444 {
445 	struct core_cpu *cc;
446 
447 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
448 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
449 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
450 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
451 
452 	PMCDBG2(MDP,STA,1,"iaf-stop cpu=%d ri=%d", cpu, ri);
453 
454 	cc = core_pcpu[cpu];
455 
456 	cc->pc_iafctrl &= ~(IAF_MASK << (ri * 4));
457 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
458 
459 	/* Don't need to write IA_GLOBAL_CTRL, one disable is enough. */
460 
461 	PMCDBG4(MDP,STO,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
462 	    cc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
463 	    cc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
464 
465 	return (0);
466 }
467 
468 static int
iaf_write_pmc(int cpu,int ri,struct pmc * pm,pmc_value_t v)469 iaf_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v)
470 {
471 	struct core_cpu *cc;
472 
473 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
474 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
475 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
476 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
477 
478 	cc = core_pcpu[cpu];
479 
480 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
481 		v = iaf_reload_count_to_perfctr_value(v);
482 
483 	/* Turn off the fixed counter */
484 	wrmsr(IAF_CTRL, cc->pc_iafctrl & ~(IAF_MASK << (ri * 4)));
485 
486 	wrmsr(IAF_CTR0 + ri, v & ((1ULL << core_iaf_width) - 1));
487 
488 	/* Turn on fixed counters */
489 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
490 
491 	PMCDBG6(MDP,WRI,1, "iaf-write cpu=%d ri=%d msr=0x%x v=%jx iafctrl=%jx "
492 	    "pmc=%jx", cpu, ri, IAF_RI_TO_MSR(ri), v,
493 	    (uintmax_t) rdmsr(IAF_CTRL),
494 	    (uintmax_t) rdpmc(IAF_RI_TO_MSR(ri)));
495 
496 	return (0);
497 }
498 
499 
500 static void
iaf_initialize(struct pmc_mdep * md,int maxcpu,int npmc,int pmcwidth)501 iaf_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth)
502 {
503 	struct pmc_classdep *pcd;
504 
505 	KASSERT(md != NULL, ("[iaf,%d] md is NULL", __LINE__));
506 
507 	PMCDBG0(MDP,INI,1, "iaf-initialize");
508 
509 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF];
510 
511 	pcd->pcd_caps	= IAF_PMC_CAPS;
512 	pcd->pcd_class	= PMC_CLASS_IAF;
513 	pcd->pcd_num	= npmc;
514 	pcd->pcd_ri	= md->pmd_npmc;
515 	pcd->pcd_width	= pmcwidth;
516 
517 	pcd->pcd_allocate_pmc	= iaf_allocate_pmc;
518 	pcd->pcd_config_pmc	= iaf_config_pmc;
519 	pcd->pcd_describe	= iaf_describe;
520 	pcd->pcd_get_config	= iaf_get_config;
521 	pcd->pcd_get_msr	= iaf_get_msr;
522 	pcd->pcd_pcpu_fini	= core_pcpu_noop;
523 	pcd->pcd_pcpu_init	= core_pcpu_noop;
524 	pcd->pcd_read_pmc	= iaf_read_pmc;
525 	pcd->pcd_release_pmc	= iaf_release_pmc;
526 	pcd->pcd_start_pmc	= iaf_start_pmc;
527 	pcd->pcd_stop_pmc	= iaf_stop_pmc;
528 	pcd->pcd_write_pmc	= iaf_write_pmc;
529 
530 	md->pmd_npmc	       += npmc;
531 }
532 
533 /*
534  * Intel programmable PMCs.
535  */
536 
537 /* Sub fields of UMASK that this event supports. */
538 #define	IAP_M_CORE		(1 << 0) /* Core specificity */
539 #define	IAP_M_AGENT		(1 << 1) /* Agent specificity */
540 #define	IAP_M_PREFETCH		(1 << 2) /* Prefetch */
541 #define	IAP_M_MESI		(1 << 3) /* MESI */
542 #define	IAP_M_SNOOPRESPONSE	(1 << 4) /* Snoop response */
543 #define	IAP_M_SNOOPTYPE		(1 << 5) /* Snoop type */
544 #define	IAP_M_TRANSITION	(1 << 6) /* Transition */
545 
546 #define	IAP_F_CORE		(0x3 << 14) /* Core specificity */
547 #define	IAP_F_AGENT		(0x1 << 13) /* Agent specificity */
548 #define	IAP_F_PREFETCH		(0x3 << 12) /* Prefetch */
549 #define	IAP_F_MESI		(0xF <<  8) /* MESI */
550 #define	IAP_F_SNOOPRESPONSE	(0xB <<  8) /* Snoop response */
551 #define	IAP_F_SNOOPTYPE		(0x3 <<  8) /* Snoop type */
552 #define	IAP_F_TRANSITION	(0x1 << 12) /* Transition */
553 
554 #define	IAP_PREFETCH_RESERVED	(0x2 << 12)
555 #define	IAP_CORE_THIS		(0x1 << 14)
556 #define	IAP_CORE_ALL		(0x3 << 14)
557 #define	IAP_F_CMASK		0xFF000000
558 
559 static pmc_value_t
iap_perfctr_value_to_reload_count(pmc_value_t v)560 iap_perfctr_value_to_reload_count(pmc_value_t v)
561 {
562 
563 	/* If the PMC has overflowed, return a reload count of zero. */
564 	if ((v & (1ULL << (core_iap_width - 1))) == 0)
565 		return (0);
566 	v &= (1ULL << core_iap_width) - 1;
567 	return (1ULL << core_iap_width) - v;
568 }
569 
570 static pmc_value_t
iap_reload_count_to_perfctr_value(pmc_value_t rlc)571 iap_reload_count_to_perfctr_value(pmc_value_t rlc)
572 {
573 	return (1ULL << core_iap_width) - rlc;
574 }
575 
576 static int
iap_pmc_has_overflowed(int ri)577 iap_pmc_has_overflowed(int ri)
578 {
579 	uint64_t v;
580 
581 	/*
582 	 * We treat a Core (i.e., Intel architecture v1) PMC as has
583 	 * having overflowed if its MSB is zero.
584 	 */
585 	v = rdpmc(ri);
586 	return ((v & (1ULL << (core_iap_width - 1))) == 0);
587 }
588 
589 static int
iap_event_corei7_ok_on_counter(uint8_t evsel,int ri)590 iap_event_corei7_ok_on_counter(uint8_t evsel, int ri)
591 {
592 	uint32_t mask;
593 
594 	switch (evsel) {
595 	/* Events valid only on counter 0, 1. */
596 	case 0x40:
597 	case 0x41:
598 	case 0x42:
599 	case 0x43:
600 	case 0x4C:
601 	case 0x4E:
602 	case 0x51:
603 	case 0x52:
604 	case 0x53:
605 	case 0x63:
606 		mask = 0x3;
607 		break;
608 	/* Any row index is ok. */
609 	default:
610 		mask = ~0;
611 	}
612 
613 	return (mask & (1 << ri));
614 }
615 
616 static int
iap_event_westmere_ok_on_counter(uint8_t evsel,int ri)617 iap_event_westmere_ok_on_counter(uint8_t evsel, int ri)
618 {
619 	uint32_t mask;
620 
621 	switch (evsel) {
622 	/* Events valid only on counter 0. */
623 	case 0x60:
624 	case 0xB3:
625 		mask = 0x1;
626 		break;
627 
628 	/* Events valid only on counter 0, 1. */
629 	case 0x4C:
630 	case 0x4E:
631 	case 0x51:
632 	case 0x52:
633 	case 0x63:
634 		mask = 0x3;
635 		break;
636 	/* Any row index is ok. */
637 	default:
638 		mask = ~0;
639 	}
640 
641 	return (mask & (1 << ri));
642 }
643 
644 static int
iap_event_sb_sbx_ib_ibx_ok_on_counter(uint8_t evsel,int ri)645 iap_event_sb_sbx_ib_ibx_ok_on_counter(uint8_t evsel, int ri)
646 {
647 	uint32_t mask;
648 
649 	switch (evsel) {
650 	/* Events valid only on counter 0. */
651 	case 0xB7:
652 		mask = 0x1;
653 		break;
654 	/* Events valid only on counter 1. */
655 	case 0xC0:
656 		mask = 0x2;
657 		break;
658 	/* Events valid only on counter 2. */
659 	case 0x48:
660 	case 0xA2:
661 	case 0xA3:
662 		mask = 0x4;
663 		break;
664 	/* Events valid only on counter 3. */
665 	case 0xBB:
666 	case 0xCD:
667 		mask = 0x8;
668 		break;
669 	/* Any row index is ok. */
670 	default:
671 		mask = ~0;
672 	}
673 
674 	return (mask & (1 << ri));
675 }
676 
677 static int
iap_event_core_ok_on_counter(uint8_t evsel,int ri)678 iap_event_core_ok_on_counter(uint8_t evsel, int ri)
679 {
680 	uint32_t mask;
681 
682 	switch (evsel) {
683 		/*
684 		 * Events valid only on counter 0.
685 		 */
686 	case 0x10:
687 	case 0x14:
688 	case 0x18:
689 	case 0xB3:
690 	case 0xC1:
691 	case 0xCB:
692 		mask = (1 << 0);
693 		break;
694 
695 		/*
696 		 * Events valid only on counter 1.
697 		 */
698 	case 0x11:
699 	case 0x12:
700 	case 0x13:
701 		mask = (1 << 1);
702 		break;
703 
704 	default:
705 		mask = ~0;	/* Any row index is ok. */
706 	}
707 
708 	return (mask & (1 << ri));
709 }
710 
711 static int
iap_allocate_pmc(int cpu,int ri,struct pmc * pm,const struct pmc_op_pmcallocate * a)712 iap_allocate_pmc(int cpu, int ri, struct pmc *pm,
713     const struct pmc_op_pmcallocate *a)
714 {
715 	uint8_t ev;
716 	const struct pmc_md_iap_op_pmcallocate *iap;
717 
718 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
719 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
720 	KASSERT(ri >= 0 && ri < core_iap_npmc,
721 	    ("[core,%d] illegal row-index value %d", __LINE__, ri));
722 
723 	if (a->pm_class != PMC_CLASS_IAP)
724 		return (EINVAL);
725 
726 	if ((a->pm_flags & PMC_F_EV_PMU) == 0)
727 		return (EINVAL);
728 
729 	iap = &a->pm_md.pm_iap;
730 	ev = IAP_EVSEL_GET(iap->pm_iap_config);
731 
732 	switch (core_cputype) {
733 	case PMC_CPU_INTEL_CORE:
734 	case PMC_CPU_INTEL_CORE2:
735 	case PMC_CPU_INTEL_CORE2EXTREME:
736 		if (iap_event_core_ok_on_counter(ev, ri) == 0)
737 			return (EINVAL);
738 	case PMC_CPU_INTEL_COREI7:
739 	case PMC_CPU_INTEL_NEHALEM_EX:
740 		if (iap_event_corei7_ok_on_counter(ev, ri) == 0)
741 			return (EINVAL);
742 		break;
743 	case PMC_CPU_INTEL_WESTMERE:
744 	case PMC_CPU_INTEL_WESTMERE_EX:
745 		if (iap_event_westmere_ok_on_counter(ev, ri) == 0)
746 			return (EINVAL);
747 		break;
748 	case PMC_CPU_INTEL_SANDYBRIDGE:
749 	case PMC_CPU_INTEL_SANDYBRIDGE_XEON:
750 	case PMC_CPU_INTEL_IVYBRIDGE:
751 	case PMC_CPU_INTEL_IVYBRIDGE_XEON:
752 	case PMC_CPU_INTEL_HASWELL:
753 	case PMC_CPU_INTEL_HASWELL_XEON:
754 	case PMC_CPU_INTEL_BROADWELL:
755 	case PMC_CPU_INTEL_BROADWELL_XEON:
756 		if (iap_event_sb_sbx_ib_ibx_ok_on_counter(ev, ri) == 0)
757 			return (EINVAL);
758 		break;
759 	case PMC_CPU_INTEL_ATOM:
760 	case PMC_CPU_INTEL_ATOM_SILVERMONT:
761 	case PMC_CPU_INTEL_ATOM_GOLDMONT:
762 	case PMC_CPU_INTEL_ATOM_GOLDMONT_P:
763 	case PMC_CPU_INTEL_ATOM_TREMONT:
764 	case PMC_CPU_INTEL_SKYLAKE:
765 	case PMC_CPU_INTEL_SKYLAKE_XEON:
766 	case PMC_CPU_INTEL_ICELAKE:
767 	case PMC_CPU_INTEL_ICELAKE_XEON:
768 	case PMC_CPU_INTEL_ALDERLAKE:
769 	default:
770 		break;
771 	}
772 
773 	pm->pm_md.pm_iap.pm_iap_evsel = iap->pm_iap_config;
774 	return (0);
775 }
776 
777 static int
iap_config_pmc(int cpu,int ri,struct pmc * pm)778 iap_config_pmc(int cpu, int ri, struct pmc *pm)
779 {
780 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
781 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
782 
783 	KASSERT(ri >= 0 && ri < core_iap_npmc,
784 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
785 
786 	PMCDBG3(MDP,CFG,1, "iap-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
787 
788 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
789 	    cpu));
790 
791 	core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc = pm;
792 
793 	return (0);
794 }
795 
796 static int
iap_describe(int cpu,int ri,struct pmc_info * pi,struct pmc ** ppmc)797 iap_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
798 {
799 	struct pmc_hw *phw;
800 
801 	phw = &core_pcpu[cpu]->pc_corepmcs[ri];
802 
803 	snprintf(pi->pm_name, sizeof(pi->pm_name), "IAP-%d", ri);
804 	pi->pm_class = PMC_CLASS_IAP;
805 
806 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
807 		pi->pm_enabled = TRUE;
808 		*ppmc          = phw->phw_pmc;
809 	} else {
810 		pi->pm_enabled = FALSE;
811 		*ppmc          = NULL;
812 	}
813 
814 	return (0);
815 }
816 
817 static int
iap_get_config(int cpu,int ri,struct pmc ** ppm)818 iap_get_config(int cpu, int ri, struct pmc **ppm)
819 {
820 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
821 
822 	return (0);
823 }
824 
825 static int
iap_get_msr(int ri,uint32_t * msr)826 iap_get_msr(int ri, uint32_t *msr)
827 {
828 	KASSERT(ri >= 0 && ri < core_iap_npmc,
829 	    ("[iap,%d] ri %d out of range", __LINE__, ri));
830 
831 	*msr = ri;
832 
833 	return (0);
834 }
835 
836 static int
iap_read_pmc(int cpu,int ri,struct pmc * pm,pmc_value_t * v)837 iap_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v)
838 {
839 	pmc_value_t tmp;
840 
841 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
842 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
843 	KASSERT(ri >= 0 && ri < core_iap_npmc,
844 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
845 
846 	tmp = rdpmc(ri);
847 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
848 		*v = iap_perfctr_value_to_reload_count(tmp);
849 	else
850 		*v = tmp & ((1ULL << core_iap_width) - 1);
851 
852 	PMCDBG4(MDP,REA,1, "iap-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
853 	    IAP_PMC0 + ri, *v);
854 
855 	return (0);
856 }
857 
858 static int
iap_release_pmc(int cpu,int ri,struct pmc * pm)859 iap_release_pmc(int cpu, int ri, struct pmc *pm)
860 {
861 	(void) pm;
862 
863 	PMCDBG3(MDP,REL,1, "iap-release cpu=%d ri=%d pm=%p", cpu, ri,
864 	    pm);
865 
866 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
867 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
868 	KASSERT(ri >= 0 && ri < core_iap_npmc,
869 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
870 
871 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc
872 	    == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__));
873 
874 	return (0);
875 }
876 
877 static int
iap_start_pmc(int cpu,int ri,struct pmc * pm)878 iap_start_pmc(int cpu, int ri, struct pmc *pm)
879 {
880 	uint64_t evsel;
881 	struct core_cpu *cc;
882 
883 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
884 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
885 	KASSERT(ri >= 0 && ri < core_iap_npmc,
886 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
887 
888 	cc = core_pcpu[cpu];
889 
890 	PMCDBG2(MDP,STA,1, "iap-start cpu=%d ri=%d", cpu, ri);
891 
892 	evsel = pm->pm_md.pm_iap.pm_iap_evsel;
893 
894 	PMCDBG4(MDP,STA,2, "iap-start/2 cpu=%d ri=%d evselmsr=0x%x evsel=0x%x",
895 	    cpu, ri, IAP_EVSEL0 + ri, evsel);
896 
897 	/* Event specific configuration. */
898 
899 	switch (IAP_EVSEL_GET(evsel)) {
900 	case 0xB7:
901 		wrmsr(IA_OFFCORE_RSP0, pm->pm_md.pm_iap.pm_iap_rsp);
902 		break;
903 	case 0xBB:
904 		wrmsr(IA_OFFCORE_RSP1, pm->pm_md.pm_iap.pm_iap_rsp);
905 		break;
906 	default:
907 		break;
908 	}
909 
910 	wrmsr(IAP_EVSEL0 + ri, evsel | IAP_EN);
911 
912 	if (core_version >= 2) {
913 		cc->pc_globalctrl |= (1ULL << ri);
914 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
915 	}
916 
917 	return (0);
918 }
919 
920 static int
iap_stop_pmc(int cpu,int ri,struct pmc * pm __unused)921 iap_stop_pmc(int cpu, int ri, struct pmc *pm __unused)
922 {
923 
924 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
925 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
926 	KASSERT(ri >= 0 && ri < core_iap_npmc,
927 	    ("[core,%d] illegal row index %d", __LINE__, ri));
928 
929 	PMCDBG2(MDP,STO,1, "iap-stop cpu=%d ri=%d", cpu, ri);
930 
931 	wrmsr(IAP_EVSEL0 + ri, 0);
932 
933 	/* Don't need to write IA_GLOBAL_CTRL, one disable is enough. */
934 
935 	return (0);
936 }
937 
938 static int
iap_write_pmc(int cpu,int ri,struct pmc * pm,pmc_value_t v)939 iap_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v)
940 {
941 
942 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
943 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
944 	KASSERT(ri >= 0 && ri < core_iap_npmc,
945 	    ("[core,%d] illegal row index %d", __LINE__, ri));
946 
947 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
948 		v = iap_reload_count_to_perfctr_value(v);
949 
950 	v &= (1ULL << core_iap_width) - 1;
951 
952 	PMCDBG4(MDP,WRI,1, "iap-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri,
953 	    IAP_PMC0 + ri, v);
954 
955 	/*
956 	 * Write the new value to the counter (or it's alias).  The
957 	 * counter will be in a stopped state when the pcd_write()
958 	 * entry point is called.
959 	 */
960 	wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
961 	return (0);
962 }
963 
964 
965 static void
iap_initialize(struct pmc_mdep * md,int maxcpu,int npmc,int pmcwidth,int flags)966 iap_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth,
967     int flags)
968 {
969 	struct pmc_classdep *pcd;
970 
971 	KASSERT(md != NULL, ("[iap,%d] md is NULL", __LINE__));
972 
973 	PMCDBG0(MDP,INI,1, "iap-initialize");
974 
975 	/* Remember the set of architectural events supported. */
976 	core_architectural_events = ~flags;
977 
978 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP];
979 
980 	pcd->pcd_caps	= IAP_PMC_CAPS;
981 	pcd->pcd_class	= PMC_CLASS_IAP;
982 	pcd->pcd_num	= npmc;
983 	pcd->pcd_ri	= md->pmd_npmc;
984 	pcd->pcd_width	= pmcwidth;
985 
986 	pcd->pcd_allocate_pmc	= iap_allocate_pmc;
987 	pcd->pcd_config_pmc	= iap_config_pmc;
988 	pcd->pcd_describe	= iap_describe;
989 	pcd->pcd_get_config	= iap_get_config;
990 	pcd->pcd_get_msr	= iap_get_msr;
991 	pcd->pcd_pcpu_fini	= core_pcpu_fini;
992 	pcd->pcd_pcpu_init	= core_pcpu_init;
993 	pcd->pcd_read_pmc	= iap_read_pmc;
994 	pcd->pcd_release_pmc	= iap_release_pmc;
995 	pcd->pcd_start_pmc	= iap_start_pmc;
996 	pcd->pcd_stop_pmc	= iap_stop_pmc;
997 	pcd->pcd_write_pmc	= iap_write_pmc;
998 
999 	md->pmd_npmc	       += npmc;
1000 }
1001 
1002 static int
core_intr(struct trapframe * tf)1003 core_intr(struct trapframe *tf)
1004 {
1005 	pmc_value_t v;
1006 	struct pmc *pm;
1007 	struct core_cpu *cc;
1008 	int error, found_interrupt, ri;
1009 
1010 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=%p um=%d", curcpu, (void *) tf,
1011 	    TRAPF_USERMODE(tf));
1012 
1013 	found_interrupt = 0;
1014 	cc = core_pcpu[curcpu];
1015 
1016 	for (ri = 0; ri < core_iap_npmc; ri++) {
1017 
1018 		if ((pm = cc->pc_corepmcs[ri].phw_pmc) == NULL ||
1019 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1020 			continue;
1021 
1022 		if (!iap_pmc_has_overflowed(ri))
1023 			continue;
1024 
1025 		found_interrupt = 1;
1026 
1027 		if (pm->pm_state != PMC_STATE_RUNNING)
1028 			continue;
1029 
1030 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1031 
1032 		v = pm->pm_sc.pm_reloadcount;
1033 		v = iap_reload_count_to_perfctr_value(v);
1034 
1035 		/*
1036 		 * Stop the counter, reload it but only restart it if
1037 		 * the PMC is not stalled.
1038 		 */
1039 		wrmsr(IAP_EVSEL0 + ri, pm->pm_md.pm_iap.pm_iap_evsel);
1040 		wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1041 
1042 		if (__predict_false(error))
1043 			continue;
1044 
1045 		wrmsr(IAP_EVSEL0 + ri, pm->pm_md.pm_iap.pm_iap_evsel | IAP_EN);
1046 	}
1047 
1048 	if (found_interrupt)
1049 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1050 	else
1051 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1052 
1053 	if (found_interrupt)
1054 		lapic_reenable_pmc();
1055 
1056 	return (found_interrupt);
1057 }
1058 
1059 static int
core2_intr(struct trapframe * tf)1060 core2_intr(struct trapframe *tf)
1061 {
1062 	int error, found_interrupt = 0, n, cpu;
1063 	uint64_t flag, intrstatus, intrdisable = 0;
1064 	struct pmc *pm;
1065 	struct core_cpu *cc;
1066 	pmc_value_t v;
1067 
1068 	cpu = curcpu;
1069 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1070 	    TRAPF_USERMODE(tf));
1071 
1072 	/*
1073 	 * The IA_GLOBAL_STATUS (MSR 0x38E) register indicates which
1074 	 * PMCs have a pending PMI interrupt.  We take a 'snapshot' of
1075 	 * the current set of interrupting PMCs and process these
1076 	 * after stopping them.
1077 	 */
1078 	intrstatus = rdmsr(IA_GLOBAL_STATUS);
1079 	PMCDBG2(MDP,INT, 1, "cpu=%d intrstatus=%jx", cpu,
1080 	    (uintmax_t) intrstatus);
1081 
1082 	/*
1083 	 * Stop PMCs unless hardware already done it.
1084 	 */
1085 	if ((intrstatus & IA_GLOBAL_STATUS_FLAG_CTR_FRZ) == 0)
1086 		wrmsr(IA_GLOBAL_CTRL, 0);
1087 
1088 	cc = core_pcpu[cpu];
1089 	KASSERT(cc != NULL, ("[core,%d] null pcpu", __LINE__));
1090 
1091 	/*
1092 	 * Look for interrupts from fixed function PMCs.
1093 	 */
1094 	for (n = 0, flag = (1ULL << IAF_OFFSET); n < core_iaf_npmc;
1095 	     n++, flag <<= 1) {
1096 
1097 		if ((intrstatus & flag) == 0)
1098 			continue;
1099 
1100 		found_interrupt = 1;
1101 
1102 		pm = cc->pc_corepmcs[n + core_iaf_ri].phw_pmc;
1103 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1104 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1105 			continue;
1106 
1107 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1108 		if (__predict_false(error))
1109 			intrdisable |= flag;
1110 
1111 		v = iaf_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1112 
1113 		/* Reload sampling count. */
1114 		wrmsr(IAF_CTR0 + n, v);
1115 
1116 		PMCDBG4(MDP,INT, 1, "iaf-intr cpu=%d error=%d v=%jx(%jx)", curcpu,
1117 		    error, (uintmax_t) v, (uintmax_t) rdpmc(IAF_RI_TO_MSR(n)));
1118 	}
1119 
1120 	/*
1121 	 * Process interrupts from the programmable counters.
1122 	 */
1123 	for (n = 0, flag = 1; n < core_iap_npmc; n++, flag <<= 1) {
1124 		if ((intrstatus & flag) == 0)
1125 			continue;
1126 
1127 		found_interrupt = 1;
1128 
1129 		pm = cc->pc_corepmcs[n].phw_pmc;
1130 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1131 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1132 			continue;
1133 
1134 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1135 		if (__predict_false(error))
1136 			intrdisable |= flag;
1137 
1138 		v = iap_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1139 
1140 		PMCDBG3(MDP,INT, 1, "iap-intr cpu=%d error=%d v=%jx", cpu, error,
1141 		    (uintmax_t) v);
1142 
1143 		/* Reload sampling count. */
1144 		wrmsr(core_iap_wroffset + IAP_PMC0 + n, v);
1145 	}
1146 
1147 	if (found_interrupt)
1148 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1149 	else
1150 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1151 
1152 	if (found_interrupt)
1153 		lapic_reenable_pmc();
1154 
1155 	/*
1156 	 * Reenable all non-stalled PMCs.
1157 	 */
1158 	if ((intrstatus & IA_GLOBAL_STATUS_FLAG_CTR_FRZ) == 0) {
1159 		wrmsr(IA_GLOBAL_OVF_CTRL, intrstatus);
1160 		cc->pc_globalctrl &= ~intrdisable;
1161 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
1162 	} else {
1163 		if (__predict_false(intrdisable)) {
1164 			cc->pc_globalctrl &= ~intrdisable;
1165 			wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
1166 		}
1167 		wrmsr(IA_GLOBAL_OVF_CTRL, intrstatus);
1168 	}
1169 
1170 	PMCDBG4(MDP, INT, 1, "cpu=%d fixedctrl=%jx globalctrl=%jx status=%jx",
1171 	    cpu, (uintmax_t) rdmsr(IAF_CTRL),
1172 	    (uintmax_t) rdmsr(IA_GLOBAL_CTRL),
1173 	    (uintmax_t) rdmsr(IA_GLOBAL_STATUS));
1174 
1175 	return (found_interrupt);
1176 }
1177 
1178 int
pmc_core_initialize(struct pmc_mdep * md,int maxcpu,int version_override)1179 pmc_core_initialize(struct pmc_mdep *md, int maxcpu, int version_override)
1180 {
1181 	int cpuid[CORE_CPUID_REQUEST_SIZE];
1182 	int flags, nflags;
1183 
1184 	do_cpuid(CORE_CPUID_REQUEST, cpuid);
1185 
1186 	core_cputype = md->pmd_cputype;
1187 	core_version = (version_override > 0) ? version_override :
1188 	    cpuid[CORE_CPUID_EAX] & 0xFF;
1189 
1190 	PMCDBG3(MDP,INI,1,"core-init cputype=%d ncpu=%d version=%d",
1191 	    core_cputype, maxcpu, core_version);
1192 
1193 	if (core_version < 1 || core_version > 5 ||
1194 	    (core_cputype != PMC_CPU_INTEL_CORE && core_version == 1)) {
1195 		/* Unknown PMC architecture. */
1196 		printf("hwpmc_core: unknown PMC architecture: %d\n",
1197 		    core_version);
1198 		return (EPROGMISMATCH);
1199 	}
1200 
1201 	core_iap_wroffset = 0;
1202 	if (cpu_feature2 & CPUID2_PDCM) {
1203 		if (rdmsr(IA32_PERF_CAPABILITIES) & PERFCAP_FW_WRITE) {
1204 			PMCDBG0(MDP, INI, 1,
1205 			    "core-init full-width write supported");
1206 			core_iap_wroffset = IAP_A_PMC0 - IAP_PMC0;
1207 		} else
1208 			PMCDBG0(MDP, INI, 1,
1209 			    "core-init full-width write NOT supported");
1210 	} else
1211 		PMCDBG0(MDP, INI, 1, "core-init pdcm not supported");
1212 
1213 	core_pmcmask = 0;
1214 
1215 	/*
1216 	 * Initialize programmable counters.
1217 	 */
1218 	core_iap_npmc = (cpuid[CORE_CPUID_EAX] >> 8) & 0xFF;
1219 	core_iap_width = (cpuid[CORE_CPUID_EAX] >> 16) & 0xFF;
1220 
1221 	core_pmcmask |= ((1ULL << core_iap_npmc) - 1);
1222 
1223 	nflags = (cpuid[CORE_CPUID_EAX] >> 24) & 0xFF;
1224 	flags = cpuid[CORE_CPUID_EBX] & ((1 << nflags) - 1);
1225 
1226 	iap_initialize(md, maxcpu, core_iap_npmc, core_iap_width, flags);
1227 
1228 	/*
1229 	 * Initialize fixed function counters, if present.
1230 	 */
1231 	if (core_version >= 2) {
1232 		core_iaf_ri = core_iap_npmc;
1233 		core_iaf_npmc = cpuid[CORE_CPUID_EDX] & 0x1F;
1234 		core_iaf_width = (cpuid[CORE_CPUID_EDX] >> 5) & 0xFF;
1235 
1236 		iaf_initialize(md, maxcpu, core_iaf_npmc, core_iaf_width);
1237 		core_pmcmask |= ((1ULL << core_iaf_npmc) - 1) << IAF_OFFSET;
1238 	}
1239 
1240 	PMCDBG2(MDP,INI,1,"core-init pmcmask=0x%jx iafri=%d", core_pmcmask,
1241 	    core_iaf_ri);
1242 
1243 	core_pcpu = malloc(sizeof(*core_pcpu) * maxcpu, M_PMC,
1244 	    M_ZERO | M_WAITOK);
1245 
1246 	/*
1247 	 * Choose the appropriate interrupt handler.
1248 	 */
1249 	if (core_version >= 2)
1250 		md->pmd_intr = core2_intr;
1251 	else
1252 		md->pmd_intr = core_intr;
1253 
1254 	return (0);
1255 }
1256 
1257 void
pmc_core_finalize(struct pmc_mdep * md)1258 pmc_core_finalize(struct pmc_mdep *md)
1259 {
1260 	PMCDBG0(MDP,INI,1, "core-finalize");
1261 
1262 	for (int i = 0; i < pmc_cpu_max(); i++)
1263 		KASSERT(core_pcpu[i] == NULL,
1264 		    ("[core,%d] non-null pcpu cpu %d", __LINE__, i));
1265 
1266 	free(core_pcpu, M_PMC);
1267 	core_pcpu = NULL;
1268 }
1269