1 /* $NetBSD: dtrace_subr.c,v 1.5 2010/04/23 11:39:53 ahoka Exp $ */ 2 3 /* 4 * CDDL HEADER START 5 * 6 * The contents of this file are subject to the terms of the 7 * Common Development and Distribution License, Version 1.0 only 8 * (the "License"). You may not use this file except in compliance 9 * with the License. 10 * 11 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 12 * or http://www.opensolaris.org/os/licensing. 13 * See the License for the specific language governing permissions 14 * and limitations under the License. 15 * 16 * When distributing Covered Code, include this CDDL HEADER in each 17 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 18 * If applicable, add the following below this CDDL HEADER, with the 19 * fields enclosed by brackets "[]" replaced with your own identifying 20 * information: Portions Copyright [yyyy] [name of copyright owner] 21 * 22 * CDDL HEADER END 23 * 24 * $FreeBSD: src/sys/cddl/dev/dtrace/i386/dtrace_subr.c,v 1.3.2.1 2009/08/03 08:13:06 kensmith Exp $ 25 * 26 */ 27 /* 28 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 29 * Use is subject to license terms. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/types.h> 35 #include <sys/kernel.h> 36 #include <sys/malloc.h> 37 #include <sys/kmem.h> 38 #include <sys/xcall.h> 39 #include <sys/cpu.h> 40 #include <sys/cpuvar.h> 41 //#include <sys/smp.h> 42 #include <sys/dtrace_impl.h> 43 #include <sys/dtrace_bsd.h> 44 #include <machine/cpu.h> 45 #include <machine/clock.h> 46 #include <machine/frame.h> 47 #include <uvm/uvm_pglist.h> 48 #include <uvm/uvm_prot.h> 49 #include <uvm/uvm_pmap.h> 50 51 #include <x86/include/cpu_counter.h> 52 53 extern uintptr_t kernelbase; 54 extern uintptr_t dtrace_in_probe_addr; 55 extern int dtrace_in_probe; 56 57 int dtrace_invop(uintptr_t, uintptr_t *, uintptr_t); 58 59 typedef struct dtrace_invop_hdlr { 60 int (*dtih_func)(uintptr_t, uintptr_t *, uintptr_t); 61 struct dtrace_invop_hdlr *dtih_next; 62 } dtrace_invop_hdlr_t; 63 64 dtrace_invop_hdlr_t *dtrace_invop_hdlr; 65 66 void dtrace_gethrtime_init(void *arg); 67 68 int 69 dtrace_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) 70 { 71 dtrace_invop_hdlr_t *hdlr; 72 int rval; 73 74 for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) 75 if ((rval = hdlr->dtih_func(addr, stack, eax)) != 0) 76 return (rval); 77 78 return (0); 79 } 80 81 void 82 dtrace_invop_add(int (*func)(uintptr_t, uintptr_t *, uintptr_t)) 83 { 84 dtrace_invop_hdlr_t *hdlr; 85 86 hdlr = kmem_alloc(sizeof (dtrace_invop_hdlr_t), KM_SLEEP); 87 hdlr->dtih_func = func; 88 hdlr->dtih_next = dtrace_invop_hdlr; 89 dtrace_invop_hdlr = hdlr; 90 } 91 92 void 93 dtrace_invop_remove(int (*func)(uintptr_t, uintptr_t *, uintptr_t)) 94 { 95 dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlr, *prev = NULL; 96 97 for (;;) { 98 if (hdlr == NULL) 99 panic("attempt to remove non-existent invop handler"); 100 101 if (hdlr->dtih_func == func) 102 break; 103 104 prev = hdlr; 105 hdlr = hdlr->dtih_next; 106 } 107 108 if (prev == NULL) { 109 ASSERT(dtrace_invop_hdlr == hdlr); 110 dtrace_invop_hdlr = hdlr->dtih_next; 111 } else { 112 ASSERT(dtrace_invop_hdlr != hdlr); 113 prev->dtih_next = hdlr->dtih_next; 114 } 115 116 kmem_free(hdlr, sizeof (dtrace_invop_hdlr_t)); 117 } 118 119 void 120 dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) 121 { 122 (*func)(0, kernelbase); 123 } 124 125 static void 126 xcall_func(void *arg0, void *arg1) 127 { 128 dtrace_xcall_t func = arg0; 129 130 (*func)(arg1); 131 } 132 133 void 134 dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) 135 { 136 uint64_t where; 137 138 if (cpu == DTRACE_CPUALL) { 139 where = xc_broadcast(0, xcall_func, func, arg); 140 } else { 141 struct cpu_info *cinfo = cpu_lookup(cpu); 142 143 KASSERT(cinfo != NULL); 144 where = xc_unicast(0, xcall_func, func, arg, cinfo); 145 } 146 xc_wait(where); 147 148 /* XXX Q. Do we really need the other cpus to wait also? 149 * (see solaris:xc_sync()) 150 */ 151 } 152 153 static void 154 dtrace_sync_func(void) 155 { 156 } 157 158 void 159 dtrace_sync(void) 160 { 161 dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL); 162 } 163 164 #ifdef notyet 165 int (*dtrace_fasttrap_probe_ptr)(struct regs *); 166 int (*dtrace_pid_probe_ptr)(struct regs *); 167 int (*dtrace_return_probe_ptr)(struct regs *); 168 169 void 170 dtrace_user_probe(struct regs *rp, caddr_t addr, processorid_t cpuid) 171 { 172 krwlock_t *rwp; 173 proc_t *p = curproc; 174 extern void trap(struct regs *, caddr_t, processorid_t); 175 176 if (USERMODE(rp->r_cs) || (rp->r_ps & PS_VM)) { 177 if (curthread->t_cred != p->p_cred) { 178 cred_t *oldcred = curthread->t_cred; 179 /* 180 * DTrace accesses t_cred in probe context. t_cred 181 * must always be either NULL, or point to a valid, 182 * allocated cred structure. 183 */ 184 curthread->t_cred = crgetcred(); 185 crfree(oldcred); 186 } 187 } 188 189 if (rp->r_trapno == T_DTRACE_RET) { 190 uint8_t step = curthread->t_dtrace_step; 191 uint8_t ret = curthread->t_dtrace_ret; 192 uintptr_t npc = curthread->t_dtrace_npc; 193 194 if (curthread->t_dtrace_ast) { 195 aston(curthread); 196 curthread->t_sig_check = 1; 197 } 198 199 /* 200 * Clear all user tracing flags. 201 */ 202 curthread->t_dtrace_ft = 0; 203 204 /* 205 * If we weren't expecting to take a return probe trap, kill 206 * the process as though it had just executed an unassigned 207 * trap instruction. 208 */ 209 if (step == 0) { 210 tsignal(curthread, SIGILL); 211 return; 212 } 213 214 /* 215 * If we hit this trap unrelated to a return probe, we're 216 * just here to reset the AST flag since we deferred a signal 217 * until after we logically single-stepped the instruction we 218 * copied out. 219 */ 220 if (ret == 0) { 221 rp->r_pc = npc; 222 return; 223 } 224 225 /* 226 * We need to wait until after we've called the 227 * dtrace_return_probe_ptr function pointer to set %pc. 228 */ 229 rwp = &CPU->cpu_ft_lock; 230 rw_enter(rwp, RW_READER); 231 if (dtrace_return_probe_ptr != NULL) 232 (void) (*dtrace_return_probe_ptr)(rp); 233 rw_exit(rwp); 234 rp->r_pc = npc; 235 236 } else if (rp->r_trapno == T_DTRACE_PROBE) { 237 rwp = &CPU->cpu_ft_lock; 238 rw_enter(rwp, RW_READER); 239 if (dtrace_fasttrap_probe_ptr != NULL) 240 (void) (*dtrace_fasttrap_probe_ptr)(rp); 241 rw_exit(rwp); 242 243 } else if (rp->r_trapno == T_BPTFLT) { 244 uint8_t instr; 245 rwp = &CPU->cpu_ft_lock; 246 247 /* 248 * The DTrace fasttrap provider uses the breakpoint trap 249 * (int 3). We let DTrace take the first crack at handling 250 * this trap; if it's not a probe that DTrace knowns about, 251 * we call into the trap() routine to handle it like a 252 * breakpoint placed by a conventional debugger. 253 */ 254 rw_enter(rwp, RW_READER); 255 if (dtrace_pid_probe_ptr != NULL && 256 (*dtrace_pid_probe_ptr)(rp) == 0) { 257 rw_exit(rwp); 258 return; 259 } 260 rw_exit(rwp); 261 262 /* 263 * If the instruction that caused the breakpoint trap doesn't 264 * look like an int 3 anymore, it may be that this tracepoint 265 * was removed just after the user thread executed it. In 266 * that case, return to user land to retry the instuction. 267 */ 268 if (fuword8((void *)(rp->r_pc - 1), &instr) == 0 && 269 instr != FASTTRAP_INSTR) { 270 rp->r_pc--; 271 return; 272 } 273 274 trap(rp, addr, cpuid); 275 276 } else { 277 trap(rp, addr, cpuid); 278 } 279 } 280 281 void 282 dtrace_safe_synchronous_signal(void) 283 { 284 kthread_t *t = curthread; 285 struct regs *rp = lwptoregs(ttolwp(t)); 286 size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; 287 288 ASSERT(t->t_dtrace_on); 289 290 /* 291 * If we're not in the range of scratch addresses, we're not actually 292 * tracing user instructions so turn off the flags. If the instruction 293 * we copied out caused a synchonous trap, reset the pc back to its 294 * original value and turn off the flags. 295 */ 296 if (rp->r_pc < t->t_dtrace_scrpc || 297 rp->r_pc > t->t_dtrace_astpc + isz) { 298 t->t_dtrace_ft = 0; 299 } else if (rp->r_pc == t->t_dtrace_scrpc || 300 rp->r_pc == t->t_dtrace_astpc) { 301 rp->r_pc = t->t_dtrace_pc; 302 t->t_dtrace_ft = 0; 303 } 304 } 305 306 int 307 dtrace_safe_defer_signal(void) 308 { 309 kthread_t *t = curthread; 310 struct regs *rp = lwptoregs(ttolwp(t)); 311 size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; 312 313 ASSERT(t->t_dtrace_on); 314 315 /* 316 * If we're not in the range of scratch addresses, we're not actually 317 * tracing user instructions so turn off the flags. 318 */ 319 if (rp->r_pc < t->t_dtrace_scrpc || 320 rp->r_pc > t->t_dtrace_astpc + isz) { 321 t->t_dtrace_ft = 0; 322 return (0); 323 } 324 325 /* 326 * If we've executed the original instruction, but haven't performed 327 * the jmp back to t->t_dtrace_npc or the clean up of any registers 328 * used to emulate %rip-relative instructions in 64-bit mode, do that 329 * here and take the signal right away. We detect this condition by 330 * seeing if the program counter is the range [scrpc + isz, astpc). 331 */ 332 if (t->t_dtrace_astpc - rp->r_pc < 333 t->t_dtrace_astpc - t->t_dtrace_scrpc - isz) { 334 #ifdef __amd64 335 /* 336 * If there is a scratch register and we're on the 337 * instruction immediately after the modified instruction, 338 * restore the value of that scratch register. 339 */ 340 if (t->t_dtrace_reg != 0 && 341 rp->r_pc == t->t_dtrace_scrpc + isz) { 342 switch (t->t_dtrace_reg) { 343 case REG_RAX: 344 rp->r_rax = t->t_dtrace_regv; 345 break; 346 case REG_RCX: 347 rp->r_rcx = t->t_dtrace_regv; 348 break; 349 case REG_R8: 350 rp->r_r8 = t->t_dtrace_regv; 351 break; 352 case REG_R9: 353 rp->r_r9 = t->t_dtrace_regv; 354 break; 355 } 356 } 357 #endif 358 rp->r_pc = t->t_dtrace_npc; 359 t->t_dtrace_ft = 0; 360 return (0); 361 } 362 363 /* 364 * Otherwise, make sure we'll return to the kernel after executing 365 * the copied out instruction and defer the signal. 366 */ 367 if (!t->t_dtrace_step) { 368 ASSERT(rp->r_pc < t->t_dtrace_astpc); 369 rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc; 370 t->t_dtrace_step = 1; 371 } 372 373 t->t_dtrace_ast = 1; 374 375 return (1); 376 } 377 #endif 378 379 #if 0 380 static int64_t tgt_cpu_tsc; 381 static int64_t hst_cpu_tsc; 382 #endif 383 static int64_t tsc_skew[MAXCPUS]; 384 static uint64_t nsec_scale; 385 386 /* See below for the explanation of this macro. */ 387 #define SCALE_SHIFT 28 388 389 static __inline uint64_t 390 dtrace_rdtsc(void) 391 { 392 uint64_t rv; 393 394 __asm __volatile("rdtsc" : "=A" (rv)); 395 return (rv); 396 } 397 398 #if 0 399 static void 400 dtrace_gethrtime_init_sync(void *arg) 401 { 402 #ifdef CHECK_SYNC 403 /* 404 * Delay this function from returning on one 405 * of the CPUs to check that the synchronisation 406 * works. 407 */ 408 uintptr_t cpu = (uintptr_t) arg; 409 410 if (cpu == curcpu) { 411 int i; 412 for (i = 0; i < 1000000000; i++) 413 tgt_cpu_tsc = dtrace_rdtsc(); 414 tgt_cpu_tsc = 0; 415 } 416 #endif 417 } 418 #endif 419 420 #if 0 421 static void 422 dtrace_gethrtime_init_cpu(void *arg) 423 { 424 uintptr_t cpu = (uintptr_t) arg; 425 426 if (cpu == cpu_number()) 427 tgt_cpu_tsc = dtrace_rdtsc(); 428 else 429 hst_cpu_tsc = dtrace_rdtsc(); 430 } 431 #endif 432 433 void 434 dtrace_gethrtime_init(void *arg) 435 { 436 uint64_t tsc_f; 437 CPU_INFO_ITERATOR cpuind; 438 struct cpu_info *cinfo = curcpu(); 439 cpuid_t cur_cpuid = cpu_number(); /* current cpu id */ 440 441 /* 442 * Get TSC frequency known at this moment. 443 * This should be constant if TSC is invariant. 444 * Otherwise tick->time conversion will be inaccurate, but 445 * will preserve monotonic property of TSC. 446 */ 447 tsc_f = cpu_frequency(cinfo); 448 449 /* 450 * The following line checks that nsec_scale calculated below 451 * doesn't overflow 32-bit unsigned integer, so that it can multiply 452 * another 32-bit integer without overflowing 64-bit. 453 * Thus minimum supported TSC frequency is 62.5MHz. 454 */ 455 //KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)), ("TSC frequency is too low")); 456 KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT))); 457 458 /* 459 * We scale up NANOSEC/tsc_f ratio to preserve as much precision 460 * as possible. 461 * 2^28 factor was chosen quite arbitrarily from practical 462 * considerations: 463 * - it supports TSC frequencies as low as 62.5MHz (see above); 464 * - it provides quite good precision (e < 0.01%) up to THz 465 * (terahertz) values; 466 */ 467 nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f; 468 469 /* The current CPU is the reference one. */ 470 tsc_skew[cur_cpuid] = 0; 471 472 for (CPU_INFO_FOREACH(cpuind, cinfo)) { 473 /* use skew relative to cpu 0 */ 474 tsc_skew[cpu_index(cinfo)] = cinfo->ci_data.cpu_cc_skew; 475 } 476 477 /* Already handled in x86/tsc.c for ci_data.cpu_cc_skew */ 478 #if 0 479 for (i = 0; i <= mp_maxid; i++) { 480 if (i == curcpu) 481 continue; 482 483 if (pcpu_find(i) == NULL) 484 continue; 485 486 map = 0; 487 map |= (1 << curcpu); 488 map |= (1 << i); 489 490 smp_rendezvous_cpus(map, dtrace_gethrtime_init_sync, 491 dtrace_gethrtime_init_cpu, 492 smp_no_rendevous_barrier, (void *)(uintptr_t) i); 493 494 tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc; 495 } 496 #endif 497 } 498 499 /* 500 * DTrace needs a high resolution time function which can 501 * be called from a probe context and guaranteed not to have 502 * instrumented with probes itself. 503 * 504 * Returns nanoseconds since boot. 505 */ 506 uint64_t 507 dtrace_gethrtime() 508 { 509 uint64_t tsc; 510 uint32_t lo; 511 uint32_t hi; 512 513 /* 514 * We split TSC value into lower and higher 32-bit halves and separately 515 * scale them with nsec_scale, then we scale them down by 2^28 516 * (see nsec_scale calculations) taking into account 32-bit shift of 517 * the higher half and finally add. 518 */ 519 tsc = dtrace_rdtsc() + tsc_skew[cpu_number()]; 520 lo = tsc; 521 hi = tsc >> 32; 522 return (((lo * nsec_scale) >> SCALE_SHIFT) + 523 ((hi * nsec_scale) << (32 - SCALE_SHIFT))); 524 } 525 526 uint64_t 527 dtrace_gethrestime(void) 528 { 529 printf("%s(%d): XXX\n",__func__,__LINE__); 530 return (0); 531 } 532 533 /* Function to handle DTrace traps during probes. See i386/i386/trap.c */ 534 int 535 dtrace_trap(struct trapframe *frame, u_int type) 536 { 537 cpuid_t cpuid = cpu_number(); /* current cpu id */ 538 539 /* 540 * A trap can occur while DTrace executes a probe. Before 541 * executing the probe, DTrace blocks re-scheduling and sets 542 * a flag in it's per-cpu flags to indicate that it doesn't 543 * want to fault. On returning from the the probe, the no-fault 544 * flag is cleared and finally re-scheduling is enabled. 545 * 546 * Check if DTrace has enabled 'no-fault' mode: 547 * 548 */ 549 if ((cpu_core[cpuid].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0) { 550 /* 551 * There are only a couple of trap types that are expected. 552 * All the rest will be handled in the usual way. 553 */ 554 switch (type) { 555 /* General protection fault. */ 556 case T_PROTFLT: 557 /* Flag an illegal operation. */ 558 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 559 560 /* 561 * Offset the instruction pointer to the instruction 562 * following the one causing the fault. 563 */ 564 frame->tf_eip += dtrace_instr_size((u_char *) frame->tf_eip); 565 return (1); 566 /* Page fault. */ 567 case T_PAGEFLT: 568 /* Flag a bad address. */ 569 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; 570 cpu_core[cpuid].cpuc_dtrace_illval = rcr2(); 571 572 /* 573 * Offset the instruction pointer to the instruction 574 * following the one causing the fault. 575 */ 576 frame->tf_eip += dtrace_instr_size((u_char *) frame->tf_eip); 577 return (1); 578 default: 579 /* Handle all other traps in the usual way. */ 580 break; 581 } 582 } 583 584 /* Handle the trap in the usual way. */ 585 return (0); 586 } 587