1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/fasttrap_isa.h>
28 #include <sys/fasttrap_impl.h>
29 #include <sys/dtrace.h>
30 #include <sys/dtrace_impl.h>
31 #include <sys/cmn_err.h>
32 #include <sys/regset.h>
33 #include <sys/privregs.h>
34 #include <sys/segments.h>
35 #include <sys/x86_archext.h>
36 #include <sys/sysmacros.h>
37 #include <sys/trap.h>
38 #include <sys/archsystm.h>
39
40 /*
41 * Lossless User-Land Tracing on x86
42 * ---------------------------------
43 *
44 * The execution of most instructions is not dependent on the address; for
45 * these instructions it is sufficient to copy them into the user process's
46 * address space and execute them. To effectively single-step an instruction
47 * in user-land, we copy out the following sequence of instructions to scratch
48 * space in the user thread's ulwp_t structure.
49 *
50 * We then set the program counter (%eip or %rip) to point to this scratch
51 * space. Once execution resumes, the original instruction is executed and
52 * then control flow is redirected to what was originally the subsequent
53 * instruction. If the kernel attemps to deliver a signal while single-
54 * stepping, the signal is deferred and the program counter is moved into the
55 * second sequence of instructions. The second sequence ends in a trap into
56 * the kernel where the deferred signal is then properly handled and delivered.
57 *
58 * For instructions whose execute is position dependent, we perform simple
59 * emulation. These instructions are limited to control transfer
60 * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
61 * of %rip-relative addressing that means that almost any instruction can be
62 * position dependent. For all the details on how we emulate generic
63 * instructions included %rip-relative instructions, see the code in
64 * fasttrap_pid_probe() below where we handle instructions of type
65 * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
66 */
67
68 #define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3)
69 #define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7)
70 #define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7)
71 #define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm))
72
73 #define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3)
74 #define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7)
75 #define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7)
76
77 #define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1)
78 #define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1)
79 #define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1)
80 #define FASTTRAP_REX_B(rex) ((rex) & 1)
81 #define FASTTRAP_REX(w, r, x, b) \
82 (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
83
84 /*
85 * Single-byte op-codes.
86 */
87 #define FASTTRAP_PUSHL_EBP 0x55
88
89 #define FASTTRAP_JO 0x70
90 #define FASTTRAP_JNO 0x71
91 #define FASTTRAP_JB 0x72
92 #define FASTTRAP_JAE 0x73
93 #define FASTTRAP_JE 0x74
94 #define FASTTRAP_JNE 0x75
95 #define FASTTRAP_JBE 0x76
96 #define FASTTRAP_JA 0x77
97 #define FASTTRAP_JS 0x78
98 #define FASTTRAP_JNS 0x79
99 #define FASTTRAP_JP 0x7a
100 #define FASTTRAP_JNP 0x7b
101 #define FASTTRAP_JL 0x7c
102 #define FASTTRAP_JGE 0x7d
103 #define FASTTRAP_JLE 0x7e
104 #define FASTTRAP_JG 0x7f
105
106 #define FASTTRAP_NOP 0x90
107
108 #define FASTTRAP_MOV_EAX 0xb8
109 #define FASTTRAP_MOV_ECX 0xb9
110
111 #define FASTTRAP_RET16 0xc2
112 #define FASTTRAP_RET 0xc3
113
114 #define FASTTRAP_LOOPNZ 0xe0
115 #define FASTTRAP_LOOPZ 0xe1
116 #define FASTTRAP_LOOP 0xe2
117 #define FASTTRAP_JCXZ 0xe3
118
119 #define FASTTRAP_CALL 0xe8
120 #define FASTTRAP_JMP32 0xe9
121 #define FASTTRAP_JMP8 0xeb
122
123 #define FASTTRAP_INT3 0xcc
124 #define FASTTRAP_INT 0xcd
125
126 #define FASTTRAP_2_BYTE_OP 0x0f
127 #define FASTTRAP_GROUP5_OP 0xff
128
129 /*
130 * Two-byte op-codes (second byte only).
131 */
132 #define FASTTRAP_0F_JO 0x80
133 #define FASTTRAP_0F_JNO 0x81
134 #define FASTTRAP_0F_JB 0x82
135 #define FASTTRAP_0F_JAE 0x83
136 #define FASTTRAP_0F_JE 0x84
137 #define FASTTRAP_0F_JNE 0x85
138 #define FASTTRAP_0F_JBE 0x86
139 #define FASTTRAP_0F_JA 0x87
140 #define FASTTRAP_0F_JS 0x88
141 #define FASTTRAP_0F_JNS 0x89
142 #define FASTTRAP_0F_JP 0x8a
143 #define FASTTRAP_0F_JNP 0x8b
144 #define FASTTRAP_0F_JL 0x8c
145 #define FASTTRAP_0F_JGE 0x8d
146 #define FASTTRAP_0F_JLE 0x8e
147 #define FASTTRAP_0F_JG 0x8f
148
149 #define FASTTRAP_EFLAGS_OF 0x800
150 #define FASTTRAP_EFLAGS_DF 0x400
151 #define FASTTRAP_EFLAGS_SF 0x080
152 #define FASTTRAP_EFLAGS_ZF 0x040
153 #define FASTTRAP_EFLAGS_AF 0x010
154 #define FASTTRAP_EFLAGS_PF 0x004
155 #define FASTTRAP_EFLAGS_CF 0x001
156
157 /*
158 * Instruction prefixes.
159 */
160 #define FASTTRAP_PREFIX_OPERAND 0x66
161 #define FASTTRAP_PREFIX_ADDRESS 0x67
162 #define FASTTRAP_PREFIX_CS 0x2E
163 #define FASTTRAP_PREFIX_DS 0x3E
164 #define FASTTRAP_PREFIX_ES 0x26
165 #define FASTTRAP_PREFIX_FS 0x64
166 #define FASTTRAP_PREFIX_GS 0x65
167 #define FASTTRAP_PREFIX_SS 0x36
168 #define FASTTRAP_PREFIX_LOCK 0xF0
169 #define FASTTRAP_PREFIX_REP 0xF3
170 #define FASTTRAP_PREFIX_REPNE 0xF2
171
172 #define FASTTRAP_NOREG 0xff
173
174 /*
175 * Map between instruction register encodings and the kernel constants which
176 * correspond to indicies into struct regs.
177 */
178 static const uint8_t regmap[16] = {
179 REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
180 REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
181 };
182
183 static ulong_t fasttrap_getreg(struct regs *, uint_t);
184
185 static uint64_t
fasttrap_anarg(struct regs * rp,int function_entry,int argno)186 fasttrap_anarg(struct regs *rp, int function_entry, int argno)
187 {
188 uint64_t value;
189 int shift = function_entry ? 1 : 0;
190
191 if (curproc->p_model == DATAMODEL_LP64) {
192 uintptr_t *stack;
193
194 /*
195 * In 64-bit mode, the first six arguments are stored in
196 * registers.
197 */
198 if (argno < 6)
199 return ((&rp->r_rdi)[argno]);
200
201 stack = (uintptr_t *)rp->r_sp;
202 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
203 value = dtrace_fulword(&stack[argno - 6 + shift]);
204 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
205 } else {
206 uint32_t *stack = (uint32_t *)rp->r_sp;
207 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
208 value = dtrace_fuword32(&stack[argno + shift]);
209 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
210 }
211
212 return (value);
213 }
214
215 /*ARGSUSED*/
216 int
fasttrap_tracepoint_init(proc_t * p,fasttrap_tracepoint_t * tp,uintptr_t pc,fasttrap_probe_type_t type)217 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
218 fasttrap_probe_type_t type)
219 {
220 uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10];
221 size_t len = FASTTRAP_MAX_INSTR_SIZE;
222 size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET));
223 uint_t start = 0;
224 int rmindex, size;
225 uint8_t seg, rex = 0;
226
227 /*
228 * Read the instruction at the given address out of the process's
229 * address space. We don't have to worry about a debugger
230 * changing this instruction before we overwrite it with our trap
231 * instruction since P_PR_LOCK is set. Since instructions can span
232 * pages, we potentially read the instruction in two parts. If the
233 * second part fails, we just zero out that part of the instruction.
234 */
235 if (uread(p, &instr[0], first, pc) != 0)
236 return (-1);
237 if (len > first &&
238 uread(p, &instr[first], len - first, pc + first) != 0) {
239 bzero(&instr[first], len - first);
240 len = first;
241 }
242
243 /*
244 * If the disassembly fails, then we have a malformed instruction.
245 */
246 if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0)
247 return (-1);
248
249 /*
250 * Make sure the disassembler isn't completely broken.
251 */
252 ASSERT(-1 <= rmindex && rmindex < size);
253
254 /*
255 * If the computed size is greater than the number of bytes read,
256 * then it was a malformed instruction possibly because it fell on a
257 * page boundary and the subsequent page was missing or because of
258 * some malicious user.
259 */
260 if (size > len)
261 return (-1);
262
263 tp->ftt_size = (uint8_t)size;
264 tp->ftt_segment = FASTTRAP_SEG_NONE;
265
266 /*
267 * Find the start of the instruction's opcode by processing any
268 * legacy prefixes.
269 */
270 for (;;) {
271 seg = 0;
272 switch (instr[start]) {
273 case FASTTRAP_PREFIX_SS:
274 seg++;
275 /*FALLTHRU*/
276 case FASTTRAP_PREFIX_GS:
277 seg++;
278 /*FALLTHRU*/
279 case FASTTRAP_PREFIX_FS:
280 seg++;
281 /*FALLTHRU*/
282 case FASTTRAP_PREFIX_ES:
283 seg++;
284 /*FALLTHRU*/
285 case FASTTRAP_PREFIX_DS:
286 seg++;
287 /*FALLTHRU*/
288 case FASTTRAP_PREFIX_CS:
289 seg++;
290 /*FALLTHRU*/
291 case FASTTRAP_PREFIX_OPERAND:
292 case FASTTRAP_PREFIX_ADDRESS:
293 case FASTTRAP_PREFIX_LOCK:
294 case FASTTRAP_PREFIX_REP:
295 case FASTTRAP_PREFIX_REPNE:
296 if (seg != 0) {
297 /*
298 * It's illegal for an instruction to specify
299 * two segment prefixes -- give up on this
300 * illegal instruction.
301 */
302 if (tp->ftt_segment != FASTTRAP_SEG_NONE)
303 return (-1);
304
305 tp->ftt_segment = seg;
306 }
307 start++;
308 continue;
309 }
310 break;
311 }
312
313 /*
314 * Identify the REX prefix on 64-bit processes.
315 */
316 if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40)
317 rex = instr[start++];
318
319 /*
320 * Now that we're pretty sure that the instruction is okay, copy the
321 * valid part to the tracepoint.
322 */
323 bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE);
324
325 tp->ftt_type = FASTTRAP_T_COMMON;
326 if (instr[start] == FASTTRAP_2_BYTE_OP) {
327 switch (instr[start + 1]) {
328 case FASTTRAP_0F_JO:
329 case FASTTRAP_0F_JNO:
330 case FASTTRAP_0F_JB:
331 case FASTTRAP_0F_JAE:
332 case FASTTRAP_0F_JE:
333 case FASTTRAP_0F_JNE:
334 case FASTTRAP_0F_JBE:
335 case FASTTRAP_0F_JA:
336 case FASTTRAP_0F_JS:
337 case FASTTRAP_0F_JNS:
338 case FASTTRAP_0F_JP:
339 case FASTTRAP_0F_JNP:
340 case FASTTRAP_0F_JL:
341 case FASTTRAP_0F_JGE:
342 case FASTTRAP_0F_JLE:
343 case FASTTRAP_0F_JG:
344 tp->ftt_type = FASTTRAP_T_JCC;
345 tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO;
346 tp->ftt_dest = pc + tp->ftt_size +
347 /* LINTED - alignment */
348 *(int32_t *)&instr[start + 2];
349 break;
350 }
351 } else if (instr[start] == FASTTRAP_GROUP5_OP) {
352 uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]);
353 uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]);
354 uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]);
355
356 if (reg == 2 || reg == 4) {
357 uint_t i, sz;
358
359 if (reg == 2)
360 tp->ftt_type = FASTTRAP_T_CALL;
361 else
362 tp->ftt_type = FASTTRAP_T_JMP;
363
364 if (mod == 3)
365 tp->ftt_code = 2;
366 else
367 tp->ftt_code = 1;
368
369 ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
370
371 /*
372 * See AMD x86-64 Architecture Programmer's Manual
373 * Volume 3, Section 1.2.7, Table 1-12, and
374 * Appendix A.3.1, Table A-15.
375 */
376 if (mod != 3 && rm == 4) {
377 uint8_t sib = instr[start + 2];
378 uint_t index = FASTTRAP_SIB_INDEX(sib);
379 uint_t base = FASTTRAP_SIB_BASE(sib);
380
381 tp->ftt_scale = FASTTRAP_SIB_SCALE(sib);
382
383 tp->ftt_index = (index == 4) ?
384 FASTTRAP_NOREG :
385 regmap[index | (FASTTRAP_REX_X(rex) << 3)];
386 tp->ftt_base = (mod == 0 && base == 5) ?
387 FASTTRAP_NOREG :
388 regmap[base | (FASTTRAP_REX_B(rex) << 3)];
389
390 i = 3;
391 sz = mod == 1 ? 1 : 4;
392 } else {
393 /*
394 * In 64-bit mode, mod == 0 and r/m == 5
395 * denotes %rip-relative addressing; in 32-bit
396 * mode, the base register isn't used. In both
397 * modes, there is a 32-bit operand.
398 */
399 if (mod == 0 && rm == 5) {
400 if (p->p_model == DATAMODEL_LP64)
401 tp->ftt_base = REG_RIP;
402 else
403 tp->ftt_base = FASTTRAP_NOREG;
404 sz = 4;
405 } else {
406 uint8_t base = rm |
407 (FASTTRAP_REX_B(rex) << 3);
408
409 tp->ftt_base = regmap[base];
410 sz = mod == 1 ? 1 : mod == 2 ? 4 : 0;
411 }
412 tp->ftt_index = FASTTRAP_NOREG;
413 i = 2;
414 }
415
416 if (sz == 1) {
417 tp->ftt_dest = *(int8_t *)&instr[start + i];
418 } else if (sz == 4) {
419 /* LINTED - alignment */
420 tp->ftt_dest = *(int32_t *)&instr[start + i];
421 } else {
422 tp->ftt_dest = 0;
423 }
424 }
425 } else {
426 switch (instr[start]) {
427 case FASTTRAP_RET:
428 tp->ftt_type = FASTTRAP_T_RET;
429 break;
430
431 case FASTTRAP_RET16:
432 tp->ftt_type = FASTTRAP_T_RET16;
433 /* LINTED - alignment */
434 tp->ftt_dest = *(uint16_t *)&instr[start + 1];
435 break;
436
437 case FASTTRAP_JO:
438 case FASTTRAP_JNO:
439 case FASTTRAP_JB:
440 case FASTTRAP_JAE:
441 case FASTTRAP_JE:
442 case FASTTRAP_JNE:
443 case FASTTRAP_JBE:
444 case FASTTRAP_JA:
445 case FASTTRAP_JS:
446 case FASTTRAP_JNS:
447 case FASTTRAP_JP:
448 case FASTTRAP_JNP:
449 case FASTTRAP_JL:
450 case FASTTRAP_JGE:
451 case FASTTRAP_JLE:
452 case FASTTRAP_JG:
453 tp->ftt_type = FASTTRAP_T_JCC;
454 tp->ftt_code = instr[start];
455 tp->ftt_dest = pc + tp->ftt_size +
456 (int8_t)instr[start + 1];
457 break;
458
459 case FASTTRAP_LOOPNZ:
460 case FASTTRAP_LOOPZ:
461 case FASTTRAP_LOOP:
462 tp->ftt_type = FASTTRAP_T_LOOP;
463 tp->ftt_code = instr[start];
464 tp->ftt_dest = pc + tp->ftt_size +
465 (int8_t)instr[start + 1];
466 break;
467
468 case FASTTRAP_JCXZ:
469 tp->ftt_type = FASTTRAP_T_JCXZ;
470 tp->ftt_dest = pc + tp->ftt_size +
471 (int8_t)instr[start + 1];
472 break;
473
474 case FASTTRAP_CALL:
475 tp->ftt_type = FASTTRAP_T_CALL;
476 tp->ftt_dest = pc + tp->ftt_size +
477 /* LINTED - alignment */
478 *(int32_t *)&instr[start + 1];
479 tp->ftt_code = 0;
480 break;
481
482 case FASTTRAP_JMP32:
483 tp->ftt_type = FASTTRAP_T_JMP;
484 tp->ftt_dest = pc + tp->ftt_size +
485 /* LINTED - alignment */
486 *(int32_t *)&instr[start + 1];
487 break;
488 case FASTTRAP_JMP8:
489 tp->ftt_type = FASTTRAP_T_JMP;
490 tp->ftt_dest = pc + tp->ftt_size +
491 (int8_t)instr[start + 1];
492 break;
493
494 case FASTTRAP_PUSHL_EBP:
495 if (start == 0)
496 tp->ftt_type = FASTTRAP_T_PUSHL_EBP;
497 break;
498
499 case FASTTRAP_NOP:
500 ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
501
502 /*
503 * On amd64 we have to be careful not to confuse a nop
504 * (actually xchgl %eax, %eax) with an instruction using
505 * the same opcode, but that does something different
506 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).
507 */
508 if (FASTTRAP_REX_B(rex) == 0)
509 tp->ftt_type = FASTTRAP_T_NOP;
510 break;
511
512 case FASTTRAP_INT3:
513 /*
514 * The pid provider shares the int3 trap with debugger
515 * breakpoints so we can't instrument them.
516 */
517 ASSERT(instr[start] == FASTTRAP_INSTR);
518 return (-1);
519
520 case FASTTRAP_INT:
521 /*
522 * Interrupts seem like they could be traced with
523 * no negative implications, but it's possible that
524 * a thread could be redirected by the trap handling
525 * code which would eventually return to the
526 * instruction after the interrupt. If the interrupt
527 * were in our scratch space, the subsequent
528 * instruction might be overwritten before we return.
529 * Accordingly we refuse to instrument any interrupt.
530 */
531 return (-1);
532 }
533 }
534
535 if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) {
536 /*
537 * If the process is 64-bit and the instruction type is still
538 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
539 * execute it -- we need to watch for %rip-relative
540 * addressing mode. See the portion of fasttrap_pid_probe()
541 * below where we handle tracepoints with type
542 * FASTTRAP_T_COMMON for how we emulate instructions that
543 * employ %rip-relative addressing.
544 */
545 if (rmindex != -1) {
546 uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]);
547 uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]);
548 uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]);
549
550 ASSERT(rmindex > start);
551
552 if (mod == 0 && rm == 5) {
553 /*
554 * We need to be sure to avoid other
555 * registers used by this instruction. While
556 * the reg field may determine the op code
557 * rather than denoting a register, assuming
558 * that it denotes a register is always safe.
559 * We leave the REX field intact and use
560 * whatever value's there for simplicity.
561 */
562 if (reg != 0) {
563 tp->ftt_ripmode = FASTTRAP_RIP_1 |
564 (FASTTRAP_RIP_X *
565 FASTTRAP_REX_B(rex));
566 rm = 0;
567 } else {
568 tp->ftt_ripmode = FASTTRAP_RIP_2 |
569 (FASTTRAP_RIP_X *
570 FASTTRAP_REX_B(rex));
571 rm = 1;
572 }
573
574 tp->ftt_modrm = tp->ftt_instr[rmindex];
575 tp->ftt_instr[rmindex] =
576 FASTTRAP_MODRM(2, reg, rm);
577 }
578 }
579 }
580
581 return (0);
582 }
583
584 int
fasttrap_tracepoint_install(proc_t * p,fasttrap_tracepoint_t * tp)585 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
586 {
587 fasttrap_instr_t instr = FASTTRAP_INSTR;
588
589 if (uwrite(p, &instr, 1, tp->ftt_pc) != 0)
590 return (-1);
591
592 return (0);
593 }
594
595 int
fasttrap_tracepoint_remove(proc_t * p,fasttrap_tracepoint_t * tp)596 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
597 {
598 uint8_t instr;
599
600 /*
601 * Distinguish between read or write failures and a changed
602 * instruction.
603 */
604 if (uread(p, &instr, 1, tp->ftt_pc) != 0)
605 return (0);
606 if (instr != FASTTRAP_INSTR)
607 return (0);
608 if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0)
609 return (-1);
610
611 return (0);
612 }
613
614 static uintptr_t
fasttrap_fulword_noerr(const void * uaddr)615 fasttrap_fulword_noerr(const void *uaddr)
616 {
617 uintptr_t ret;
618
619 if (fasttrap_fulword(uaddr, &ret) == 0)
620 return (ret);
621
622 return (0);
623 }
624
625 static uint32_t
fasttrap_fuword32_noerr(const void * uaddr)626 fasttrap_fuword32_noerr(const void *uaddr)
627 {
628 uint32_t ret;
629
630 if (fasttrap_fuword32(uaddr, &ret) == 0)
631 return (ret);
632
633 return (0);
634 }
635
636 static void
fasttrap_return_common(struct regs * rp,uintptr_t pc,pid_t pid,uintptr_t new_pc)637 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
638 uintptr_t new_pc)
639 {
640 fasttrap_tracepoint_t *tp;
641 fasttrap_bucket_t *bucket;
642 fasttrap_id_t *id;
643 kmutex_t *pid_mtx;
644
645 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
646 mutex_enter(pid_mtx);
647 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
648
649 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
650 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
651 tp->ftt_proc->ftpc_acount != 0)
652 break;
653 }
654
655 /*
656 * Don't sweat it if we can't find the tracepoint again; unlike
657 * when we're in fasttrap_pid_probe(), finding the tracepoint here
658 * is not essential to the correct execution of the process.
659 */
660 if (tp == NULL) {
661 mutex_exit(pid_mtx);
662 return;
663 }
664
665 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
666 /*
667 * If there's a branch that could act as a return site, we
668 * need to trace it, and check here if the program counter is
669 * external to the function.
670 */
671 if (tp->ftt_type != FASTTRAP_T_RET &&
672 tp->ftt_type != FASTTRAP_T_RET16 &&
673 new_pc - id->fti_probe->ftp_faddr <
674 id->fti_probe->ftp_fsize)
675 continue;
676
677 dtrace_probe(id->fti_probe->ftp_id,
678 pc - id->fti_probe->ftp_faddr,
679 rp->r_r0, rp->r_r1, 0, 0);
680 }
681
682 mutex_exit(pid_mtx);
683 }
684
685 static void
fasttrap_sigsegv(proc_t * p,kthread_t * t,uintptr_t addr)686 fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr)
687 {
688 sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
689
690 sqp->sq_info.si_signo = SIGSEGV;
691 sqp->sq_info.si_code = SEGV_MAPERR;
692 sqp->sq_info.si_addr = (caddr_t)addr;
693
694 mutex_enter(&p->p_lock);
695 sigaddqa(p, t, sqp);
696 mutex_exit(&p->p_lock);
697
698 if (t != NULL)
699 aston(t);
700 }
701
702 static void
fasttrap_usdt_args64(fasttrap_probe_t * probe,struct regs * rp,int argc,uintptr_t * argv)703 fasttrap_usdt_args64(fasttrap_probe_t *probe, struct regs *rp, int argc,
704 uintptr_t *argv)
705 {
706 int i, x, cap = MIN(argc, probe->ftp_nargs);
707 uintptr_t *stack = (uintptr_t *)rp->r_sp;
708
709 for (i = 0; i < cap; i++) {
710 x = probe->ftp_argmap[i];
711
712 if (x < 6)
713 argv[i] = (&rp->r_rdi)[x];
714 else
715 argv[i] = fasttrap_fulword_noerr(&stack[x]);
716 }
717
718 for (; i < argc; i++) {
719 argv[i] = 0;
720 }
721 }
722
723 static void
fasttrap_usdt_args32(fasttrap_probe_t * probe,struct regs * rp,int argc,uint32_t * argv)724 fasttrap_usdt_args32(fasttrap_probe_t *probe, struct regs *rp, int argc,
725 uint32_t *argv)
726 {
727 int i, x, cap = MIN(argc, probe->ftp_nargs);
728 uint32_t *stack = (uint32_t *)rp->r_sp;
729
730 for (i = 0; i < cap; i++) {
731 x = probe->ftp_argmap[i];
732
733 argv[i] = fasttrap_fuword32_noerr(&stack[x]);
734 }
735
736 for (; i < argc; i++) {
737 argv[i] = 0;
738 }
739 }
740
741 static int
fasttrap_do_seg(fasttrap_tracepoint_t * tp,struct regs * rp,uintptr_t * addr)742 fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct regs *rp, uintptr_t *addr)
743 {
744 proc_t *p = curproc;
745 user_desc_t *desc;
746 uint16_t sel, ndx, type;
747 uintptr_t limit;
748
749 switch (tp->ftt_segment) {
750 case FASTTRAP_SEG_CS:
751 sel = rp->r_cs;
752 break;
753 case FASTTRAP_SEG_DS:
754 sel = rp->r_ds;
755 break;
756 case FASTTRAP_SEG_ES:
757 sel = rp->r_es;
758 break;
759 case FASTTRAP_SEG_FS:
760 sel = rp->r_fs;
761 break;
762 case FASTTRAP_SEG_GS:
763 sel = rp->r_gs;
764 break;
765 case FASTTRAP_SEG_SS:
766 sel = rp->r_ss;
767 break;
768 }
769
770 /*
771 * Make sure the given segment register specifies a user priority
772 * selector rather than a kernel selector.
773 */
774 if (!SELISUPL(sel))
775 return (-1);
776
777 ndx = SELTOIDX(sel);
778
779 /*
780 * Check the bounds and grab the descriptor out of the specified
781 * descriptor table.
782 */
783 if (SELISLDT(sel)) {
784 if (ndx > p->p_ldtlimit)
785 return (-1);
786
787 desc = p->p_ldt + ndx;
788
789 } else {
790 if (ndx >= NGDT)
791 return (-1);
792
793 desc = cpu_get_gdt() + ndx;
794 }
795
796 /*
797 * The descriptor must have user privilege level and it must be
798 * present in memory.
799 */
800 if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1)
801 return (-1);
802
803 type = desc->usd_type;
804
805 /*
806 * If the S bit in the type field is not set, this descriptor can
807 * only be used in system context.
808 */
809 if ((type & 0x10) != 0x10)
810 return (-1);
811
812 limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1);
813
814 if (tp->ftt_segment == FASTTRAP_SEG_CS) {
815 /*
816 * The code/data bit and readable bit must both be set.
817 */
818 if ((type & 0xa) != 0xa)
819 return (-1);
820
821 if (*addr > limit)
822 return (-1);
823 } else {
824 /*
825 * The code/data bit must be clear.
826 */
827 if ((type & 0x8) != 0)
828 return (-1);
829
830 /*
831 * If the expand-down bit is clear, we just check the limit as
832 * it would naturally be applied. Otherwise, we need to check
833 * that the address is the range [limit + 1 .. 0xffff] or
834 * [limit + 1 ... 0xffffffff] depending on if the default
835 * operand size bit is set.
836 */
837 if ((type & 0x4) == 0) {
838 if (*addr > limit)
839 return (-1);
840 } else if (desc->usd_def32) {
841 if (*addr < limit + 1 || 0xffff < *addr)
842 return (-1);
843 } else {
844 if (*addr < limit + 1 || 0xffffffff < *addr)
845 return (-1);
846 }
847 }
848
849 *addr += USEGD_GETBASE(desc);
850
851 return (0);
852 }
853
854 int
fasttrap_pid_probe(struct regs * rp)855 fasttrap_pid_probe(struct regs *rp)
856 {
857 proc_t *p = curproc;
858 uintptr_t pc = rp->r_pc - 1, new_pc = 0;
859 fasttrap_bucket_t *bucket;
860 kmutex_t *pid_mtx;
861 fasttrap_tracepoint_t *tp, tp_local;
862 pid_t pid;
863 dtrace_icookie_t cookie;
864 uint_t is_enabled = 0;
865
866 /*
867 * It's possible that a user (in a veritable orgy of bad planning)
868 * could redirect this thread's flow of control before it reached the
869 * return probe fasttrap. In this case we need to kill the process
870 * since it's in a unrecoverable state.
871 */
872 if (curthread->t_dtrace_step) {
873 ASSERT(curthread->t_dtrace_on);
874 fasttrap_sigtrap(p, curthread, pc);
875 return (0);
876 }
877
878 /*
879 * Clear all user tracing flags.
880 */
881 curthread->t_dtrace_ft = 0;
882 curthread->t_dtrace_pc = 0;
883 curthread->t_dtrace_npc = 0;
884 curthread->t_dtrace_scrpc = 0;
885 curthread->t_dtrace_astpc = 0;
886 curthread->t_dtrace_regv = 0;
887
888 /*
889 * Treat a child created by a call to vfork(2) as if it were its
890 * parent. We know that there's only one thread of control in such a
891 * process: this one.
892 */
893 while (p->p_flag & SVFORK) {
894 p = p->p_parent;
895 }
896
897 pid = p->p_pid;
898 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
899 mutex_enter(pid_mtx);
900 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
901
902 /*
903 * Lookup the tracepoint that the process just hit.
904 */
905 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
906 if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
907 tp->ftt_proc->ftpc_acount != 0)
908 break;
909 }
910
911 /*
912 * If we couldn't find a matching tracepoint, either a tracepoint has
913 * been inserted without using the pid<pid> ioctl interface (see
914 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
915 */
916 if (tp == NULL) {
917 mutex_exit(pid_mtx);
918 return (-1);
919 }
920
921 /*
922 * Set the program counter to the address of the traced instruction
923 * so that it looks right in ustack() output.
924 */
925 rp->r_pc = pc;
926
927 if (tp->ftt_ids != NULL) {
928 fasttrap_id_t *id;
929
930 if (p->p_model == DATAMODEL_LP64) {
931 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
932 fasttrap_probe_t *probe = id->fti_probe;
933
934 if (id->fti_ptype == DTFTP_ENTRY) {
935 /*
936 * We note that this was an entry
937 * probe to help ustack() find the
938 * first caller.
939 */
940 cookie = dtrace_interrupt_disable();
941 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
942 dtrace_probe(probe->ftp_id, rp->r_rdi,
943 rp->r_rsi, rp->r_rdx, rp->r_rcx,
944 rp->r_r8);
945 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
946 dtrace_interrupt_enable(cookie);
947 } else if (id->fti_ptype == DTFTP_IS_ENABLED) {
948 /*
949 * Note that in this case, we don't
950 * call dtrace_probe() since it's only
951 * an artificial probe meant to change
952 * the flow of control so that it
953 * encounters the true probe.
954 */
955 is_enabled = 1;
956 } else if (probe->ftp_argmap == NULL) {
957 dtrace_probe(probe->ftp_id, rp->r_rdi,
958 rp->r_rsi, rp->r_rdx, rp->r_rcx,
959 rp->r_r8);
960 } else {
961 uintptr_t t[5];
962
963 fasttrap_usdt_args64(probe, rp,
964 sizeof (t) / sizeof (t[0]), t);
965
966 dtrace_probe(probe->ftp_id, t[0], t[1],
967 t[2], t[3], t[4]);
968 }
969 }
970 } else {
971 uintptr_t s0, s1, s2, s3, s4, s5;
972 uint32_t *stack = (uint32_t *)rp->r_sp;
973
974 /*
975 * In 32-bit mode, all arguments are passed on the
976 * stack. If this is a function entry probe, we need
977 * to skip the first entry on the stack as it
978 * represents the return address rather than a
979 * parameter to the function.
980 */
981 s0 = fasttrap_fuword32_noerr(&stack[0]);
982 s1 = fasttrap_fuword32_noerr(&stack[1]);
983 s2 = fasttrap_fuword32_noerr(&stack[2]);
984 s3 = fasttrap_fuword32_noerr(&stack[3]);
985 s4 = fasttrap_fuword32_noerr(&stack[4]);
986 s5 = fasttrap_fuword32_noerr(&stack[5]);
987
988 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
989 fasttrap_probe_t *probe = id->fti_probe;
990
991 if (id->fti_ptype == DTFTP_ENTRY) {
992 /*
993 * We note that this was an entry
994 * probe to help ustack() find the
995 * first caller.
996 */
997 cookie = dtrace_interrupt_disable();
998 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
999 dtrace_probe(probe->ftp_id, s1, s2,
1000 s3, s4, s5);
1001 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1002 dtrace_interrupt_enable(cookie);
1003 } else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1004 /*
1005 * Note that in this case, we don't
1006 * call dtrace_probe() since it's only
1007 * an artificial probe meant to change
1008 * the flow of control so that it
1009 * encounters the true probe.
1010 */
1011 is_enabled = 1;
1012 } else if (probe->ftp_argmap == NULL) {
1013 dtrace_probe(probe->ftp_id, s0, s1,
1014 s2, s3, s4);
1015 } else {
1016 uint32_t t[5];
1017
1018 fasttrap_usdt_args32(probe, rp,
1019 sizeof (t) / sizeof (t[0]), t);
1020
1021 dtrace_probe(probe->ftp_id, t[0], t[1],
1022 t[2], t[3], t[4]);
1023 }
1024 }
1025 }
1026 }
1027
1028 /*
1029 * We're about to do a bunch of work so we cache a local copy of
1030 * the tracepoint to emulate the instruction, and then find the
1031 * tracepoint again later if we need to light up any return probes.
1032 */
1033 tp_local = *tp;
1034 mutex_exit(pid_mtx);
1035 tp = &tp_local;
1036
1037 /*
1038 * Set the program counter to appear as though the traced instruction
1039 * had completely executed. This ensures that fasttrap_getreg() will
1040 * report the expected value for REG_RIP.
1041 */
1042 rp->r_pc = pc + tp->ftt_size;
1043
1044 /*
1045 * If there's an is-enabled probe connected to this tracepoint it
1046 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1047 * instruction that was placed there by DTrace when the binary was
1048 * linked. As this probe is, in fact, enabled, we need to stuff 1
1049 * into %eax or %rax. Accordingly, we can bypass all the instruction
1050 * emulation logic since we know the inevitable result. It's possible
1051 * that a user could construct a scenario where the 'is-enabled'
1052 * probe was on some other instruction, but that would be a rather
1053 * exotic way to shoot oneself in the foot.
1054 */
1055 if (is_enabled) {
1056 rp->r_r0 = 1;
1057 new_pc = rp->r_pc;
1058 goto done;
1059 }
1060
1061 /*
1062 * We emulate certain types of instructions to ensure correctness
1063 * (in the case of position dependent instructions) or optimize
1064 * common cases. The rest we have the thread execute back in user-
1065 * land.
1066 */
1067 switch (tp->ftt_type) {
1068 case FASTTRAP_T_RET:
1069 case FASTTRAP_T_RET16:
1070 {
1071 uintptr_t dst;
1072 uintptr_t addr;
1073 int ret;
1074
1075 /*
1076 * We have to emulate _every_ facet of the behavior of a ret
1077 * instruction including what happens if the load from %esp
1078 * fails; in that case, we send a SIGSEGV.
1079 */
1080 if (p->p_model == DATAMODEL_NATIVE) {
1081 ret = fasttrap_fulword((void *)rp->r_sp, &dst);
1082 addr = rp->r_sp + sizeof (uintptr_t);
1083 } else {
1084 uint32_t dst32;
1085 ret = fasttrap_fuword32((void *)rp->r_sp, &dst32);
1086 dst = dst32;
1087 addr = rp->r_sp + sizeof (uint32_t);
1088 }
1089
1090 if (ret == -1) {
1091 fasttrap_sigsegv(p, curthread, rp->r_sp);
1092 new_pc = pc;
1093 break;
1094 }
1095
1096 if (tp->ftt_type == FASTTRAP_T_RET16)
1097 addr += tp->ftt_dest;
1098
1099 rp->r_sp = addr;
1100 new_pc = dst;
1101 break;
1102 }
1103
1104 case FASTTRAP_T_JCC:
1105 {
1106 uint_t taken;
1107
1108 switch (tp->ftt_code) {
1109 case FASTTRAP_JO:
1110 taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) != 0;
1111 break;
1112 case FASTTRAP_JNO:
1113 taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) == 0;
1114 break;
1115 case FASTTRAP_JB:
1116 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0;
1117 break;
1118 case FASTTRAP_JAE:
1119 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0;
1120 break;
1121 case FASTTRAP_JE:
1122 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0;
1123 break;
1124 case FASTTRAP_JNE:
1125 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0;
1126 break;
1127 case FASTTRAP_JBE:
1128 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0 ||
1129 (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0;
1130 break;
1131 case FASTTRAP_JA:
1132 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0 &&
1133 (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0;
1134 break;
1135 case FASTTRAP_JS:
1136 taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) != 0;
1137 break;
1138 case FASTTRAP_JNS:
1139 taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) == 0;
1140 break;
1141 case FASTTRAP_JP:
1142 taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) != 0;
1143 break;
1144 case FASTTRAP_JNP:
1145 taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) == 0;
1146 break;
1147 case FASTTRAP_JL:
1148 taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) !=
1149 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1150 break;
1151 case FASTTRAP_JGE:
1152 taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) ==
1153 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1154 break;
1155 case FASTTRAP_JLE:
1156 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 ||
1157 ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) !=
1158 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1159 break;
1160 case FASTTRAP_JG:
1161 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 &&
1162 ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) ==
1163 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1164 break;
1165
1166 }
1167
1168 if (taken)
1169 new_pc = tp->ftt_dest;
1170 else
1171 new_pc = pc + tp->ftt_size;
1172 break;
1173 }
1174
1175 case FASTTRAP_T_LOOP:
1176 {
1177 uint_t taken;
1178 greg_t cx = rp->r_rcx--;
1179
1180 switch (tp->ftt_code) {
1181 case FASTTRAP_LOOPNZ:
1182 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 &&
1183 cx != 0;
1184 break;
1185 case FASTTRAP_LOOPZ:
1186 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 &&
1187 cx != 0;
1188 break;
1189 case FASTTRAP_LOOP:
1190 taken = (cx != 0);
1191 break;
1192 }
1193
1194 if (taken)
1195 new_pc = tp->ftt_dest;
1196 else
1197 new_pc = pc + tp->ftt_size;
1198 break;
1199 }
1200
1201 case FASTTRAP_T_JCXZ:
1202 {
1203 greg_t cx = rp->r_rcx;
1204
1205 if (cx == 0)
1206 new_pc = tp->ftt_dest;
1207 else
1208 new_pc = pc + tp->ftt_size;
1209 break;
1210 }
1211
1212 case FASTTRAP_T_PUSHL_EBP:
1213 {
1214 int ret;
1215 uintptr_t addr;
1216 if (p->p_model == DATAMODEL_NATIVE) {
1217 addr = rp->r_sp - sizeof (uintptr_t);
1218 ret = fasttrap_sulword((void *)addr, rp->r_fp);
1219 } else {
1220 addr = rp->r_sp - sizeof (uint32_t);
1221 ret = fasttrap_suword32((void *)addr,
1222 (uint32_t)rp->r_fp);
1223 }
1224
1225 if (ret == -1) {
1226 fasttrap_sigsegv(p, curthread, addr);
1227 new_pc = pc;
1228 break;
1229 }
1230
1231 rp->r_sp = addr;
1232 new_pc = pc + tp->ftt_size;
1233 break;
1234 }
1235
1236 case FASTTRAP_T_NOP:
1237 new_pc = pc + tp->ftt_size;
1238 break;
1239
1240 case FASTTRAP_T_JMP:
1241 case FASTTRAP_T_CALL:
1242 if (tp->ftt_code == 0) {
1243 new_pc = tp->ftt_dest;
1244 } else {
1245 uintptr_t value, addr = tp->ftt_dest;
1246
1247 if (tp->ftt_base != FASTTRAP_NOREG)
1248 addr += fasttrap_getreg(rp, tp->ftt_base);
1249 if (tp->ftt_index != FASTTRAP_NOREG)
1250 addr += fasttrap_getreg(rp, tp->ftt_index) <<
1251 tp->ftt_scale;
1252
1253 if (tp->ftt_code == 1) {
1254 /*
1255 * If there's a segment prefix for this
1256 * instruction, we'll need to check permissions
1257 * and bounds on the given selector, and adjust
1258 * the address accordingly.
1259 */
1260 if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
1261 fasttrap_do_seg(tp, rp, &addr) != 0) {
1262 fasttrap_sigsegv(p, curthread, addr);
1263 new_pc = pc;
1264 break;
1265 }
1266
1267 if (p->p_model == DATAMODEL_NATIVE) {
1268 if (fasttrap_fulword((void *)addr,
1269 &value) == -1) {
1270 fasttrap_sigsegv(p, curthread,
1271 addr);
1272 new_pc = pc;
1273 break;
1274 }
1275 new_pc = value;
1276 } else {
1277 uint32_t value32;
1278 addr = (uintptr_t)(uint32_t)addr;
1279 if (fasttrap_fuword32((void *)addr,
1280 &value32) == -1) {
1281 fasttrap_sigsegv(p, curthread,
1282 addr);
1283 new_pc = pc;
1284 break;
1285 }
1286 new_pc = value32;
1287 }
1288 } else {
1289 new_pc = addr;
1290 }
1291 }
1292
1293 /*
1294 * If this is a call instruction, we need to push the return
1295 * address onto the stack. If this fails, we send the process
1296 * a SIGSEGV and reset the pc to emulate what would happen if
1297 * this instruction weren't traced.
1298 */
1299 if (tp->ftt_type == FASTTRAP_T_CALL) {
1300 int ret;
1301 uintptr_t addr;
1302 if (p->p_model == DATAMODEL_NATIVE) {
1303 addr = rp->r_sp - sizeof (uintptr_t);
1304 ret = fasttrap_sulword((void *)addr,
1305 pc + tp->ftt_size);
1306 } else {
1307 addr = rp->r_sp - sizeof (uint32_t);
1308 ret = fasttrap_suword32((void *)addr,
1309 (uint32_t)(pc + tp->ftt_size));
1310 }
1311
1312 if (ret == -1) {
1313 fasttrap_sigsegv(p, curthread, addr);
1314 new_pc = pc;
1315 break;
1316 }
1317
1318 rp->r_sp = addr;
1319 }
1320
1321 break;
1322
1323 case FASTTRAP_T_COMMON:
1324 {
1325 uintptr_t addr;
1326 uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];
1327 uint_t i = 0;
1328 klwp_t *lwp = ttolwp(curthread);
1329
1330 /*
1331 * Compute the address of the ulwp_t and step over the
1332 * ul_self pointer. The method used to store the user-land
1333 * thread pointer is very different on 32- and 64-bit
1334 * kernels.
1335 */
1336 if (p->p_model == DATAMODEL_LP64) {
1337 addr = lwp->lwp_pcb.pcb_fsbase;
1338 addr += sizeof (void *);
1339 } else {
1340 addr = lwp->lwp_pcb.pcb_gsbase;
1341 addr += sizeof (caddr32_t);
1342 }
1343
1344 /*
1345 * Generic Instruction Tracing
1346 * ---------------------------
1347 *
1348 * This is the layout of the scratch space in the user-land
1349 * thread structure for our generated instructions.
1350 *
1351 * 32-bit mode bytes
1352 * ------------------------ -----
1353 * a: <original instruction> <= 15
1354 * jmp <pc + tp->ftt_size> 5
1355 * b: <original instrction> <= 15
1356 * int T_DTRACE_RET 2
1357 * -----
1358 * <= 37
1359 *
1360 * 64-bit mode bytes
1361 * ------------------------ -----
1362 * a: <original instruction> <= 15
1363 * jmp 0(%rip) 6
1364 * <pc + tp->ftt_size> 8
1365 * b: <original instruction> <= 15
1366 * int T_DTRACE_RET 2
1367 * -----
1368 * <= 46
1369 *
1370 * The %pc is set to a, and curthread->t_dtrace_astpc is set
1371 * to b. If we encounter a signal on the way out of the
1372 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1373 * so that we execute the original instruction and re-enter
1374 * the kernel rather than redirecting to the next instruction.
1375 *
1376 * If there are return probes (so we know that we're going to
1377 * need to reenter the kernel after executing the original
1378 * instruction), the scratch space will just contain the
1379 * original instruction followed by an interrupt -- the same
1380 * data as at b.
1381 *
1382 * %rip-relative Addressing
1383 * ------------------------
1384 *
1385 * There's a further complication in 64-bit mode due to %rip-
1386 * relative addressing. While this is clearly a beneficial
1387 * architectural decision for position independent code, it's
1388 * hard not to see it as a personal attack against the pid
1389 * provider since before there was a relatively small set of
1390 * instructions to emulate; with %rip-relative addressing,
1391 * almost every instruction can potentially depend on the
1392 * address at which it's executed. Rather than emulating
1393 * the broad spectrum of instructions that can now be
1394 * position dependent, we emulate jumps and others as in
1395 * 32-bit mode, and take a different tack for instructions
1396 * using %rip-relative addressing.
1397 *
1398 * For every instruction that uses the ModRM byte, the
1399 * in-kernel disassembler reports its location. We use the
1400 * ModRM byte to identify that an instruction uses
1401 * %rip-relative addressing and to see what other registers
1402 * the instruction uses. To emulate those instructions,
1403 * we modify the instruction to be %rax-relative rather than
1404 * %rip-relative (or %rcx-relative if the instruction uses
1405 * %rax; or %r8- or %r9-relative if the REX.B is present so
1406 * we don't have to rewrite the REX prefix). We then load
1407 * the value that %rip would have been into the scratch
1408 * register and generate an instruction to reset the scratch
1409 * register back to its original value. The instruction
1410 * sequence looks like this:
1411 *
1412 * 64-mode %rip-relative bytes
1413 * ------------------------ -----
1414 * a: <modified instruction> <= 15
1415 * movq $<value>, %<scratch> 6
1416 * jmp 0(%rip) 6
1417 * <pc + tp->ftt_size> 8
1418 * b: <modified instruction> <= 15
1419 * int T_DTRACE_RET 2
1420 * -----
1421 * 52
1422 *
1423 * We set curthread->t_dtrace_regv so that upon receiving
1424 * a signal we can reset the value of the scratch register.
1425 */
1426
1427 ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE);
1428
1429 curthread->t_dtrace_scrpc = addr;
1430 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1431 i += tp->ftt_size;
1432
1433 if (tp->ftt_ripmode != 0) {
1434 greg_t *reg;
1435
1436 ASSERT(p->p_model == DATAMODEL_LP64);
1437 ASSERT(tp->ftt_ripmode &
1438 (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));
1439
1440 /*
1441 * If this was a %rip-relative instruction, we change
1442 * it to be either a %rax- or %rcx-relative
1443 * instruction (depending on whether those registers
1444 * are used as another operand; or %r8- or %r9-
1445 * relative depending on the value of REX.B). We then
1446 * set that register and generate a movq instruction
1447 * to reset the value.
1448 */
1449 if (tp->ftt_ripmode & FASTTRAP_RIP_X)
1450 scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
1451 else
1452 scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);
1453
1454 if (tp->ftt_ripmode & FASTTRAP_RIP_1)
1455 scratch[i++] = FASTTRAP_MOV_EAX;
1456 else
1457 scratch[i++] = FASTTRAP_MOV_ECX;
1458
1459 switch (tp->ftt_ripmode) {
1460 case FASTTRAP_RIP_1:
1461 reg = &rp->r_rax;
1462 curthread->t_dtrace_reg = REG_RAX;
1463 break;
1464 case FASTTRAP_RIP_2:
1465 reg = &rp->r_rcx;
1466 curthread->t_dtrace_reg = REG_RCX;
1467 break;
1468 case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
1469 reg = &rp->r_r8;
1470 curthread->t_dtrace_reg = REG_R8;
1471 break;
1472 case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
1473 reg = &rp->r_r9;
1474 curthread->t_dtrace_reg = REG_R9;
1475 break;
1476 }
1477
1478 /* LINTED - alignment */
1479 *(uint64_t *)&scratch[i] = *reg;
1480 curthread->t_dtrace_regv = *reg;
1481 *reg = pc + tp->ftt_size;
1482 i += sizeof (uint64_t);
1483 }
1484
1485 /*
1486 * Generate the branch instruction to what would have
1487 * normally been the subsequent instruction. In 32-bit mode,
1488 * this is just a relative branch; in 64-bit mode this is a
1489 * %rip-relative branch that loads the 64-bit pc value
1490 * immediately after the jmp instruction.
1491 */
1492 if (p->p_model == DATAMODEL_LP64) {
1493 scratch[i++] = FASTTRAP_GROUP5_OP;
1494 scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
1495 /* LINTED - alignment */
1496 *(uint32_t *)&scratch[i] = 0;
1497 i += sizeof (uint32_t);
1498 /* LINTED - alignment */
1499 *(uint64_t *)&scratch[i] = pc + tp->ftt_size;
1500 i += sizeof (uint64_t);
1501 } else {
1502 /*
1503 * Set up the jmp to the next instruction; note that
1504 * the size of the traced instruction cancels out.
1505 */
1506 scratch[i++] = FASTTRAP_JMP32;
1507 /* LINTED - alignment */
1508 *(uint32_t *)&scratch[i] = pc - addr - 5;
1509 i += sizeof (uint32_t);
1510 }
1511
1512 curthread->t_dtrace_astpc = addr + i;
1513 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1514 i += tp->ftt_size;
1515 scratch[i++] = FASTTRAP_INT;
1516 scratch[i++] = T_DTRACE_RET;
1517
1518 ASSERT(i <= sizeof (scratch));
1519
1520 if (fasttrap_copyout(scratch, (char *)addr, i)) {
1521 fasttrap_sigtrap(p, curthread, pc);
1522 new_pc = pc;
1523 break;
1524 }
1525
1526 if (tp->ftt_retids != NULL) {
1527 curthread->t_dtrace_step = 1;
1528 curthread->t_dtrace_ret = 1;
1529 new_pc = curthread->t_dtrace_astpc;
1530 } else {
1531 new_pc = curthread->t_dtrace_scrpc;
1532 }
1533
1534 curthread->t_dtrace_pc = pc;
1535 curthread->t_dtrace_npc = pc + tp->ftt_size;
1536 curthread->t_dtrace_on = 1;
1537 break;
1538 }
1539
1540 default:
1541 panic("fasttrap: mishandled an instruction");
1542 }
1543
1544 done:
1545 /*
1546 * If there were no return probes when we first found the tracepoint,
1547 * we should feel no obligation to honor any return probes that were
1548 * subsequently enabled -- they'll just have to wait until the next
1549 * time around.
1550 */
1551 if (tp->ftt_retids != NULL) {
1552 /*
1553 * We need to wait until the results of the instruction are
1554 * apparent before invoking any return probes. If this
1555 * instruction was emulated we can just call
1556 * fasttrap_return_common(); if it needs to be executed, we
1557 * need to wait until the user thread returns to the kernel.
1558 */
1559 if (tp->ftt_type != FASTTRAP_T_COMMON) {
1560 /*
1561 * Set the program counter to the address of the traced
1562 * instruction so that it looks right in ustack()
1563 * output. We had previously set it to the end of the
1564 * instruction to simplify %rip-relative addressing.
1565 */
1566 rp->r_pc = pc;
1567
1568 fasttrap_return_common(rp, pc, pid, new_pc);
1569 } else {
1570 ASSERT(curthread->t_dtrace_ret != 0);
1571 ASSERT(curthread->t_dtrace_pc == pc);
1572 ASSERT(curthread->t_dtrace_scrpc != 0);
1573 ASSERT(new_pc == curthread->t_dtrace_astpc);
1574 }
1575 }
1576
1577 rp->r_pc = new_pc;
1578
1579 return (0);
1580 }
1581
1582 int
fasttrap_return_probe(struct regs * rp)1583 fasttrap_return_probe(struct regs *rp)
1584 {
1585 proc_t *p = curproc;
1586 uintptr_t pc = curthread->t_dtrace_pc;
1587 uintptr_t npc = curthread->t_dtrace_npc;
1588
1589 curthread->t_dtrace_pc = 0;
1590 curthread->t_dtrace_npc = 0;
1591 curthread->t_dtrace_scrpc = 0;
1592 curthread->t_dtrace_astpc = 0;
1593
1594 /*
1595 * Treat a child created by a call to vfork(2) as if it were its
1596 * parent. We know that there's only one thread of control in such a
1597 * process: this one.
1598 */
1599 while (p->p_flag & SVFORK) {
1600 p = p->p_parent;
1601 }
1602
1603 /*
1604 * We set rp->r_pc to the address of the traced instruction so
1605 * that it appears to dtrace_probe() that we're on the original
1606 * instruction, and so that the user can't easily detect our
1607 * complex web of lies. dtrace_return_probe() (our caller)
1608 * will correctly set %pc after we return.
1609 */
1610 rp->r_pc = pc;
1611
1612 fasttrap_return_common(rp, pc, p->p_pid, npc);
1613
1614 return (0);
1615 }
1616
1617 /*ARGSUSED*/
1618 uint64_t
fasttrap_pid_getarg(void * arg,dtrace_id_t id,void * parg,int argno,int aframes)1619 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1620 int aframes)
1621 {
1622 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 1, argno));
1623 }
1624
1625 /*ARGSUSED*/
1626 uint64_t
fasttrap_usdt_getarg(void * arg,dtrace_id_t id,void * parg,int argno,int aframes)1627 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1628 int aframes)
1629 {
1630 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 0, argno));
1631 }
1632
1633 static ulong_t
fasttrap_getreg(struct regs * rp,uint_t reg)1634 fasttrap_getreg(struct regs *rp, uint_t reg)
1635 {
1636 switch (reg) {
1637 case REG_R15: return (rp->r_r15);
1638 case REG_R14: return (rp->r_r14);
1639 case REG_R13: return (rp->r_r13);
1640 case REG_R12: return (rp->r_r12);
1641 case REG_R11: return (rp->r_r11);
1642 case REG_R10: return (rp->r_r10);
1643 case REG_R9: return (rp->r_r9);
1644 case REG_R8: return (rp->r_r8);
1645 case REG_RDI: return (rp->r_rdi);
1646 case REG_RSI: return (rp->r_rsi);
1647 case REG_RBP: return (rp->r_rbp);
1648 case REG_RBX: return (rp->r_rbx);
1649 case REG_RDX: return (rp->r_rdx);
1650 case REG_RCX: return (rp->r_rcx);
1651 case REG_RAX: return (rp->r_rax);
1652 case REG_TRAPNO: return (rp->r_trapno);
1653 case REG_ERR: return (rp->r_err);
1654 case REG_RIP: return (rp->r_rip);
1655 case REG_CS: return (rp->r_cs);
1656 case REG_RFL: return (rp->r_rfl);
1657 case REG_RSP: return (rp->r_rsp);
1658 case REG_SS: return (rp->r_ss);
1659 case REG_FS: return (rp->r_fs);
1660 case REG_GS: return (rp->r_gs);
1661 case REG_DS: return (rp->r_ds);
1662 case REG_ES: return (rp->r_es);
1663 case REG_FSBASE: return (rdmsr(MSR_AMD_FSBASE));
1664 case REG_GSBASE: return (rdmsr(MSR_AMD_GSBASE));
1665 }
1666
1667 panic("dtrace: illegal register constant");
1668 /*NOTREACHED*/
1669 }
1670