1 /*- 2 * Copyright 1996 Massachusetts Institute of Technology 3 * 4 * Permission to use, copy, modify, and distribute this software and 5 * its documentation for any purpose and without fee is hereby 6 * granted, provided that both the above copyright notice and this 7 * permission notice appear in all copies, that both the above 8 * copyright notice and this permission notice appear in all 9 * supporting documentation, and that the name of M.I.T. not be used 10 * in advertising or publicity pertaining to distribution of the 11 * software without specific, written prior permission. M.I.T. makes 12 * no representations about the suitability of this software for any 13 * purpose. It is provided "as is" without express or implied 14 * warranty. 15 * 16 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 17 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 20 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/conf.h> 36 #include <sys/fcntl.h> 37 #include <sys/kernel.h> 38 39 #ifndef SMP 40 #include <machine/cputypes.h> 41 #endif 42 #include <machine/clock.h> 43 #include <machine/perfmon.h> 44 #include <machine/specialreg.h> 45 46 static int perfmon_inuse; 47 static int perfmon_cpuok; 48 #ifndef SMP 49 static int msr_ctl[NPMC]; 50 #endif 51 static int msr_pmc[NPMC]; 52 static unsigned int ctl_shadow[NPMC]; 53 static quad_t pmc_shadow[NPMC]; /* used when ctr is stopped on P5 */ 54 static int (*writectl)(int); 55 #ifndef SMP 56 static int writectl5(int); 57 static int writectl6(int); 58 #endif 59 60 static d_close_t perfmon_close; 61 static d_open_t perfmon_open; 62 static d_ioctl_t perfmon_ioctl; 63 64 /* 65 * XXX perfmon_init_dev(void *) is a split from the perfmon_init() function. 66 * This solves a problem for DEVFS users. It loads the "perfmon" driver after 67 * the DEVFS subsystem has been kicked into action. The SI_ORDER_ANY is to 68 * assure that it is the most lowest priority task which, guarantees the 69 * above. 70 */ 71 static void perfmon_init_dev(void *); 72 SYSINIT(cpu, SI_SUB_DRIVERS, SI_ORDER_ANY, perfmon_init_dev, NULL); 73 74 static struct cdevsw perfmon_cdevsw = { 75 .d_version = D_VERSION, 76 .d_flags = D_NEEDGIANT, 77 .d_open = perfmon_open, 78 .d_close = perfmon_close, 79 .d_ioctl = perfmon_ioctl, 80 .d_name = "perfmon", 81 }; 82 83 /* 84 * Must be called after cpu_class is set up. 85 */ 86 void 87 perfmon_init(void) 88 { 89 #ifndef SMP 90 switch(cpu_class) { 91 case CPUCLASS_586: 92 perfmon_cpuok = 1; 93 msr_ctl[0] = MSR_P5_CESR; 94 msr_ctl[1] = MSR_P5_CESR; 95 msr_pmc[0] = MSR_P5_CTR0; 96 msr_pmc[1] = MSR_P5_CTR1; 97 writectl = writectl5; 98 break; 99 case CPUCLASS_686: 100 perfmon_cpuok = 1; 101 msr_ctl[0] = MSR_EVNTSEL0; 102 msr_ctl[1] = MSR_EVNTSEL1; 103 msr_pmc[0] = MSR_PERFCTR0; 104 msr_pmc[1] = MSR_PERFCTR1; 105 writectl = writectl6; 106 break; 107 108 default: 109 perfmon_cpuok = 0; 110 break; 111 } 112 #endif /* SMP */ 113 } 114 115 static void 116 perfmon_init_dev(dummy) 117 void *dummy; 118 { 119 make_dev(&perfmon_cdevsw, 32, UID_ROOT, GID_KMEM, 0640, "perfmon"); 120 } 121 122 int 123 perfmon_avail(void) 124 { 125 return perfmon_cpuok; 126 } 127 128 int 129 perfmon_setup(int pmc, unsigned int control) 130 { 131 register_t saveintr; 132 133 if (pmc < 0 || pmc >= NPMC) 134 return EINVAL; 135 136 perfmon_inuse |= (1 << pmc); 137 control &= ~(PMCF_SYS_FLAGS << 16); 138 saveintr = intr_disable(); 139 ctl_shadow[pmc] = control; 140 writectl(pmc); 141 wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0); 142 intr_restore(saveintr); 143 return 0; 144 } 145 146 int 147 perfmon_get(int pmc, unsigned int *control) 148 { 149 if (pmc < 0 || pmc >= NPMC) 150 return EINVAL; 151 152 if (perfmon_inuse & (1 << pmc)) { 153 *control = ctl_shadow[pmc]; 154 return 0; 155 } 156 return EBUSY; /* XXX reversed sense */ 157 } 158 159 int 160 perfmon_fini(int pmc) 161 { 162 if (pmc < 0 || pmc >= NPMC) 163 return EINVAL; 164 165 if (perfmon_inuse & (1 << pmc)) { 166 perfmon_stop(pmc); 167 ctl_shadow[pmc] = 0; 168 perfmon_inuse &= ~(1 << pmc); 169 return 0; 170 } 171 return EBUSY; /* XXX reversed sense */ 172 } 173 174 int 175 perfmon_start(int pmc) 176 { 177 register_t saveintr; 178 179 if (pmc < 0 || pmc >= NPMC) 180 return EINVAL; 181 182 if (perfmon_inuse & (1 << pmc)) { 183 saveintr = intr_disable(); 184 ctl_shadow[pmc] |= (PMCF_EN << 16); 185 wrmsr(msr_pmc[pmc], pmc_shadow[pmc]); 186 writectl(pmc); 187 intr_restore(saveintr); 188 return 0; 189 } 190 return EBUSY; 191 } 192 193 int 194 perfmon_stop(int pmc) 195 { 196 register_t saveintr; 197 198 if (pmc < 0 || pmc >= NPMC) 199 return EINVAL; 200 201 if (perfmon_inuse & (1 << pmc)) { 202 saveintr = intr_disable(); 203 pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL; 204 ctl_shadow[pmc] &= ~(PMCF_EN << 16); 205 writectl(pmc); 206 intr_restore(saveintr); 207 return 0; 208 } 209 return EBUSY; 210 } 211 212 int 213 perfmon_read(int pmc, quad_t *val) 214 { 215 if (pmc < 0 || pmc >= NPMC) 216 return EINVAL; 217 218 if (perfmon_inuse & (1 << pmc)) { 219 if (ctl_shadow[pmc] & (PMCF_EN << 16)) 220 *val = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL; 221 else 222 *val = pmc_shadow[pmc]; 223 return 0; 224 } 225 226 return EBUSY; 227 } 228 229 int 230 perfmon_reset(int pmc) 231 { 232 if (pmc < 0 || pmc >= NPMC) 233 return EINVAL; 234 235 if (perfmon_inuse & (1 << pmc)) { 236 wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0); 237 return 0; 238 } 239 return EBUSY; 240 } 241 242 #ifndef SMP 243 /* 244 * Unfortunately, the performance-monitoring registers are laid out 245 * differently in the P5 and P6. We keep everything in P6 format 246 * internally (except for the event code), and convert to P5 247 * format as needed on those CPUs. The writectl function pointer 248 * is set up to point to one of these functions by perfmon_init(). 249 */ 250 int 251 writectl6(int pmc) 252 { 253 if (pmc > 0 && !(ctl_shadow[pmc] & (PMCF_EN << 16))) { 254 wrmsr(msr_ctl[pmc], 0); 255 } else { 256 wrmsr(msr_ctl[pmc], ctl_shadow[pmc]); 257 } 258 return 0; 259 } 260 261 #define P5FLAG_P 0x200 262 #define P5FLAG_E 0x100 263 #define P5FLAG_USR 0x80 264 #define P5FLAG_OS 0x40 265 266 int 267 writectl5(int pmc) 268 { 269 quad_t newval = 0; 270 271 if (ctl_shadow[1] & (PMCF_EN << 16)) { 272 if (ctl_shadow[1] & (PMCF_USR << 16)) 273 newval |= P5FLAG_USR << 16; 274 if (ctl_shadow[1] & (PMCF_OS << 16)) 275 newval |= P5FLAG_OS << 16; 276 if (!(ctl_shadow[1] & (PMCF_E << 16))) 277 newval |= P5FLAG_E << 16; 278 newval |= (ctl_shadow[1] & 0x3f) << 16; 279 } 280 if (ctl_shadow[0] & (PMCF_EN << 16)) { 281 if (ctl_shadow[0] & (PMCF_USR << 16)) 282 newval |= P5FLAG_USR; 283 if (ctl_shadow[0] & (PMCF_OS << 16)) 284 newval |= P5FLAG_OS; 285 if (!(ctl_shadow[0] & (PMCF_E << 16))) 286 newval |= P5FLAG_E; 287 newval |= ctl_shadow[0] & 0x3f; 288 } 289 290 wrmsr(msr_ctl[0], newval); 291 return 0; /* XXX should check for unimplemented bits */ 292 } 293 #endif /* !SMP */ 294 295 /* 296 * Now the user-mode interface, called from a subdevice of mem.c. 297 */ 298 static int writer; 299 static int writerpmc; 300 301 static int 302 perfmon_open(struct cdev *dev, int flags, int fmt, struct thread *td) 303 { 304 if (!perfmon_cpuok) 305 return ENXIO; 306 307 if (flags & FWRITE) { 308 if (writer) { 309 return EBUSY; 310 } else { 311 writer = 1; 312 writerpmc = 0; 313 } 314 } 315 return 0; 316 } 317 318 static int 319 perfmon_close(struct cdev *dev, int flags, int fmt, struct thread *td) 320 { 321 if (flags & FWRITE) { 322 int i; 323 324 for (i = 0; i < NPMC; i++) { 325 if (writerpmc & (1 << i)) 326 perfmon_fini(i); 327 } 328 writer = 0; 329 } 330 return 0; 331 } 332 333 static int 334 perfmon_ioctl(struct cdev *dev, u_long cmd, caddr_t param, int flags, struct thread *td) 335 { 336 struct pmc *pmc; 337 struct pmc_data *pmcd; 338 struct pmc_tstamp *pmct; 339 uint64_t freq; 340 int *ip; 341 int rv; 342 343 switch(cmd) { 344 case PMIOSETUP: 345 if (!(flags & FWRITE)) 346 return EPERM; 347 pmc = (struct pmc *)param; 348 349 rv = perfmon_setup(pmc->pmc_num, pmc->pmc_val); 350 if (!rv) { 351 writerpmc |= (1 << pmc->pmc_num); 352 } 353 break; 354 355 case PMIOGET: 356 pmc = (struct pmc *)param; 357 rv = perfmon_get(pmc->pmc_num, &pmc->pmc_val); 358 break; 359 360 case PMIOSTART: 361 if (!(flags & FWRITE)) 362 return EPERM; 363 364 ip = (int *)param; 365 rv = perfmon_start(*ip); 366 break; 367 368 case PMIOSTOP: 369 if (!(flags & FWRITE)) 370 return EPERM; 371 372 ip = (int *)param; 373 rv = perfmon_stop(*ip); 374 break; 375 376 case PMIORESET: 377 if (!(flags & FWRITE)) 378 return EPERM; 379 380 ip = (int *)param; 381 rv = perfmon_reset(*ip); 382 break; 383 384 case PMIOREAD: 385 pmcd = (struct pmc_data *)param; 386 rv = perfmon_read(pmcd->pmcd_num, &pmcd->pmcd_value); 387 break; 388 389 case PMIOTSTAMP: 390 freq = atomic_load_acq_64(&tsc_freq); 391 if (freq == 0) { 392 rv = ENOTTY; 393 break; 394 } 395 pmct = (struct pmc_tstamp *)param; 396 /* XXX interface loses precision. */ 397 pmct->pmct_rate = freq / 1000000; 398 pmct->pmct_value = rdtsc(); 399 rv = 0; 400 break; 401 default: 402 rv = ENOTTY; 403 } 404 405 return rv; 406 } 407