1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _LINUX_PTRACE_H 3 #define _LINUX_PTRACE_H 4 5 #include <linux/compiler.h> /* For unlikely. */ 6 #include <linux/sched.h> /* For struct task_struct. */ 7 #include <linux/sched/signal.h> /* For send_sig(), same_thread_group(), etc. */ 8 #include <linux/err.h> /* for IS_ERR_VALUE */ 9 #include <linux/bug.h> /* For BUG_ON. */ 10 #include <linux/pid_namespace.h> /* For task_active_pid_ns. */ 11 #include <uapi/linux/ptrace.h> 12 #include <linux/seccomp.h> 13 14 /* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */ 15 struct syscall_info { 16 __u64 sp; 17 struct seccomp_data data; 18 }; 19 20 extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, 21 void *buf, int len, unsigned int gup_flags); 22 23 /* 24 * Ptrace flags 25 * 26 * The owner ship rules for task->ptrace which holds the ptrace 27 * flags is simple. When a task is running it owns it's task->ptrace 28 * flags. When the a task is stopped the ptracer owns task->ptrace. 29 */ 30 31 #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ 32 #define PT_PTRACED 0x00000001 33 #define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ 34 35 #define PT_OPT_FLAG_SHIFT 3 36 /* PT_TRACE_* event enable flags */ 37 #define PT_EVENT_FLAG(event) (1 << (PT_OPT_FLAG_SHIFT + (event))) 38 #define PT_TRACESYSGOOD PT_EVENT_FLAG(0) 39 #define PT_TRACE_FORK PT_EVENT_FLAG(PTRACE_EVENT_FORK) 40 #define PT_TRACE_VFORK PT_EVENT_FLAG(PTRACE_EVENT_VFORK) 41 #define PT_TRACE_CLONE PT_EVENT_FLAG(PTRACE_EVENT_CLONE) 42 #define PT_TRACE_EXEC PT_EVENT_FLAG(PTRACE_EVENT_EXEC) 43 #define PT_TRACE_VFORK_DONE PT_EVENT_FLAG(PTRACE_EVENT_VFORK_DONE) 44 #define PT_TRACE_EXIT PT_EVENT_FLAG(PTRACE_EVENT_EXIT) 45 #define PT_TRACE_SECCOMP PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP) 46 47 #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) 48 #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) 49 50 /* single stepping state bits (used on ARM and PA-RISC) */ 51 #define PT_SINGLESTEP_BIT 31 52 #define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT) 53 #define PT_BLOCKSTEP_BIT 30 54 #define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT) 55 56 extern long arch_ptrace(struct task_struct *child, long request, 57 unsigned long addr, unsigned long data); 58 extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); 59 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); 60 extern void ptrace_disable(struct task_struct *); 61 extern int ptrace_request(struct task_struct *child, long request, 62 unsigned long addr, unsigned long data); 63 extern int ptrace_notify(int exit_code, unsigned long message); 64 extern void __ptrace_link(struct task_struct *child, 65 struct task_struct *new_parent, 66 const struct cred *ptracer_cred); 67 extern void __ptrace_unlink(struct task_struct *child); 68 extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); 69 #define PTRACE_MODE_READ 0x01 70 #define PTRACE_MODE_ATTACH 0x02 71 #define PTRACE_MODE_NOAUDIT 0x04 72 #define PTRACE_MODE_FSCREDS 0x08 73 #define PTRACE_MODE_REALCREDS 0x10 74 75 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ 76 #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) 77 #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) 78 #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) 79 #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) 80 81 /** 82 * ptrace_may_access - check whether the caller is permitted to access 83 * a target task. 84 * @task: target task 85 * @mode: selects type of access and caller credentials 86 * 87 * Returns true on success, false on denial. 88 * 89 * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must 90 * be set in @mode to specify whether the access was requested through 91 * a filesystem syscall (should use effective capabilities and fsuid 92 * of the caller) or through an explicit syscall such as 93 * process_vm_writev or ptrace (and should use the real credentials). 94 */ 95 extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); 96 97 static inline int ptrace_reparented(struct task_struct *child) 98 { 99 return !same_thread_group(child->real_parent, child->parent); 100 } 101 102 static inline void ptrace_unlink(struct task_struct *child) 103 { 104 if (unlikely(child->ptrace)) 105 __ptrace_unlink(child); 106 } 107 108 int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, 109 unsigned long data); 110 int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, 111 unsigned long data); 112 113 /** 114 * ptrace_parent - return the task that is tracing the given task 115 * @task: task to consider 116 * 117 * Returns %NULL if no one is tracing @task, or the &struct task_struct 118 * pointer to its tracer. 119 * 120 * Must called under rcu_read_lock(). The pointer returned might be kept 121 * live only by RCU. During exec, this may be called with task_lock() held 122 * on @task, still held from when check_unsafe_exec() was called. 123 */ 124 static inline struct task_struct *ptrace_parent(struct task_struct *task) 125 { 126 if (unlikely(task->ptrace)) 127 return rcu_dereference(task->parent); 128 return NULL; 129 } 130 131 /** 132 * ptrace_event_enabled - test whether a ptrace event is enabled 133 * @task: ptracee of interest 134 * @event: %PTRACE_EVENT_* to test 135 * 136 * Test whether @event is enabled for ptracee @task. 137 * 138 * Returns %true if @event is enabled, %false otherwise. 139 */ 140 static inline bool ptrace_event_enabled(struct task_struct *task, int event) 141 { 142 return task->ptrace & PT_EVENT_FLAG(event); 143 } 144 145 /** 146 * ptrace_event - possibly stop for a ptrace event notification 147 * @event: %PTRACE_EVENT_* value to report 148 * @message: value for %PTRACE_GETEVENTMSG to return 149 * 150 * Check whether @event is enabled and, if so, report @event and @message 151 * to the ptrace parent. 152 * 153 * Called without locks. 154 */ 155 static inline void ptrace_event(int event, unsigned long message) 156 { 157 if (unlikely(ptrace_event_enabled(current, event))) { 158 ptrace_notify((event << 8) | SIGTRAP, message); 159 } else if (event == PTRACE_EVENT_EXEC) { 160 /* legacy EXEC report via SIGTRAP */ 161 if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED) 162 send_sig(SIGTRAP, current, 0); 163 } 164 } 165 166 /** 167 * ptrace_event_pid - possibly stop for a ptrace event notification 168 * @event: %PTRACE_EVENT_* value to report 169 * @pid: process identifier for %PTRACE_GETEVENTMSG to return 170 * 171 * Check whether @event is enabled and, if so, report @event and @pid 172 * to the ptrace parent. @pid is reported as the pid_t seen from the 173 * ptrace parent's pid namespace. 174 * 175 * Called without locks. 176 */ 177 static inline void ptrace_event_pid(int event, struct pid *pid) 178 { 179 /* 180 * FIXME: There's a potential race if a ptracer in a different pid 181 * namespace than parent attaches between computing message below and 182 * when we acquire tasklist_lock in ptrace_stop(). If this happens, 183 * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG. 184 */ 185 unsigned long message = 0; 186 struct pid_namespace *ns; 187 188 rcu_read_lock(); 189 ns = task_active_pid_ns(rcu_dereference(current->parent)); 190 if (ns) 191 message = pid_nr_ns(pid, ns); 192 rcu_read_unlock(); 193 194 ptrace_event(event, message); 195 } 196 197 /** 198 * ptrace_init_task - initialize ptrace state for a new child 199 * @child: new child task 200 * @ptrace: true if child should be ptrace'd by parent's tracer 201 * 202 * This is called immediately after adding @child to its parent's children 203 * list. @ptrace is false in the normal case, and true to ptrace @child. 204 * 205 * Called with current's siglock and write_lock_irq(&tasklist_lock) held. 206 */ 207 static inline void ptrace_init_task(struct task_struct *child, bool ptrace) 208 { 209 INIT_LIST_HEAD(&child->ptrace_entry); 210 INIT_LIST_HEAD(&child->ptraced); 211 child->jobctl = 0; 212 child->ptrace = 0; 213 child->parent = child->real_parent; 214 215 if (unlikely(ptrace) && current->ptrace) { 216 child->ptrace = current->ptrace; 217 __ptrace_link(child, current->parent, current->ptracer_cred); 218 219 if (child->ptrace & PT_SEIZED) 220 task_set_jobctl_pending(child, JOBCTL_TRAP_STOP); 221 else 222 sigaddset(&child->pending.signal, SIGSTOP); 223 } 224 else 225 child->ptracer_cred = NULL; 226 } 227 228 /** 229 * ptrace_release_task - final ptrace-related cleanup of a zombie being reaped 230 * @task: task in %EXIT_DEAD state 231 * 232 * Called with write_lock(&tasklist_lock) held. 233 */ 234 static inline void ptrace_release_task(struct task_struct *task) 235 { 236 BUG_ON(!list_empty(&task->ptraced)); 237 ptrace_unlink(task); 238 BUG_ON(!list_empty(&task->ptrace_entry)); 239 } 240 241 #ifndef force_successful_syscall_return 242 /* 243 * System call handlers that, upon successful completion, need to return a 244 * negative value should call force_successful_syscall_return() right before 245 * returning. On architectures where the syscall convention provides for a 246 * separate error flag (e.g., alpha, ia64, ppc{,64}, sparc{,64}, possibly 247 * others), this macro can be used to ensure that the error flag will not get 248 * set. On architectures which do not support a separate error flag, the macro 249 * is a no-op and the spurious error condition needs to be filtered out by some 250 * other means (e.g., in user-level, by passing an extra argument to the 251 * syscall handler, or something along those lines). 252 */ 253 #define force_successful_syscall_return() do { } while (0) 254 #endif 255 256 #ifndef is_syscall_success 257 /* 258 * On most systems we can tell if a syscall is a success based on if the retval 259 * is an error value. On some systems like ia64 and powerpc they have different 260 * indicators of success/failure and must define their own. 261 */ 262 #define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs)))) 263 #endif 264 265 /* 266 * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__. 267 * 268 * These do-nothing inlines are used when the arch does not 269 * implement single-step. The kerneldoc comments are here 270 * to document the interface for all arch definitions. 271 */ 272 273 #ifndef arch_has_single_step 274 /** 275 * arch_has_single_step - does this CPU support user-mode single-step? 276 * 277 * If this is defined, then there must be function declarations or 278 * inlines for user_enable_single_step() and user_disable_single_step(). 279 * arch_has_single_step() should evaluate to nonzero iff the machine 280 * supports instruction single-step for user mode. 281 * It can be a constant or it can test a CPU feature bit. 282 */ 283 #define arch_has_single_step() (0) 284 285 /** 286 * user_enable_single_step - single-step in user-mode task 287 * @task: either current or a task stopped in %TASK_TRACED 288 * 289 * This can only be called when arch_has_single_step() has returned nonzero. 290 * Set @task so that when it returns to user mode, it will trap after the 291 * next single instruction executes. If arch_has_block_step() is defined, 292 * this must clear the effects of user_enable_block_step() too. 293 */ 294 static inline void user_enable_single_step(struct task_struct *task) 295 { 296 BUG(); /* This can never be called. */ 297 } 298 299 /** 300 * user_disable_single_step - cancel user-mode single-step 301 * @task: either current or a task stopped in %TASK_TRACED 302 * 303 * Clear @task of the effects of user_enable_single_step() and 304 * user_enable_block_step(). This can be called whether or not either 305 * of those was ever called on @task, and even if arch_has_single_step() 306 * returned zero. 307 */ 308 static inline void user_disable_single_step(struct task_struct *task) 309 { 310 } 311 #else 312 extern void user_enable_single_step(struct task_struct *); 313 extern void user_disable_single_step(struct task_struct *); 314 #endif /* arch_has_single_step */ 315 316 #ifndef arch_has_block_step 317 /** 318 * arch_has_block_step - does this CPU support user-mode block-step? 319 * 320 * If this is defined, then there must be a function declaration or inline 321 * for user_enable_block_step(), and arch_has_single_step() must be defined 322 * too. arch_has_block_step() should evaluate to nonzero iff the machine 323 * supports step-until-branch for user mode. It can be a constant or it 324 * can test a CPU feature bit. 325 */ 326 #define arch_has_block_step() (0) 327 328 /** 329 * user_enable_block_step - step until branch in user-mode task 330 * @task: either current or a task stopped in %TASK_TRACED 331 * 332 * This can only be called when arch_has_block_step() has returned nonzero, 333 * and will never be called when single-instruction stepping is being used. 334 * Set @task so that when it returns to user mode, it will trap after the 335 * next branch or trap taken. 336 */ 337 static inline void user_enable_block_step(struct task_struct *task) 338 { 339 BUG(); /* This can never be called. */ 340 } 341 #else 342 extern void user_enable_block_step(struct task_struct *); 343 #endif /* arch_has_block_step */ 344 345 #ifdef ARCH_HAS_USER_SINGLE_STEP_REPORT 346 extern void user_single_step_report(struct pt_regs *regs); 347 #else 348 static inline void user_single_step_report(struct pt_regs *regs) 349 { 350 kernel_siginfo_t info; 351 clear_siginfo(&info); 352 info.si_signo = SIGTRAP; 353 info.si_errno = 0; 354 info.si_code = SI_USER; 355 info.si_pid = 0; 356 info.si_uid = 0; 357 force_sig_info(&info); 358 } 359 #endif 360 361 #ifndef arch_ptrace_stop_needed 362 /** 363 * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called 364 * 365 * This is called with the siglock held, to decide whether or not it's 366 * necessary to release the siglock and call arch_ptrace_stop(). It can be 367 * defined to a constant if arch_ptrace_stop() is never required, or always 368 * is. On machines where this makes sense, it should be defined to a quick 369 * test to optimize out calling arch_ptrace_stop() when it would be 370 * superfluous. For example, if the thread has not been back to user mode 371 * since the last stop, the thread state might indicate that nothing needs 372 * to be done. 373 * 374 * This is guaranteed to be invoked once before a task stops for ptrace and 375 * may include arch-specific operations necessary prior to a ptrace stop. 376 */ 377 #define arch_ptrace_stop_needed() (0) 378 #endif 379 380 #ifndef arch_ptrace_stop 381 /** 382 * arch_ptrace_stop - Do machine-specific work before stopping for ptrace 383 * 384 * This is called with no locks held when arch_ptrace_stop_needed() has 385 * just returned nonzero. It is allowed to block, e.g. for user memory 386 * access. The arch can have machine-specific work to be done before 387 * ptrace stops. On ia64, register backing store gets written back to user 388 * memory here. Since this can be costly (requires dropping the siglock), 389 * we only do it when the arch requires it for this particular stop, as 390 * indicated by arch_ptrace_stop_needed(). 391 */ 392 #define arch_ptrace_stop() do { } while (0) 393 #endif 394 395 #ifndef current_pt_regs 396 #define current_pt_regs() task_pt_regs(current) 397 #endif 398 399 /* 400 * unlike current_pt_regs(), this one is equal to task_pt_regs(current) 401 * on *all* architectures; the only reason to have a per-arch definition 402 * is optimisation. 403 */ 404 #ifndef signal_pt_regs 405 #define signal_pt_regs() task_pt_regs(current) 406 #endif 407 408 #ifndef current_user_stack_pointer 409 #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) 410 #endif 411 412 extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); 413 414 extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); 415 416 /* 417 * ptrace report for syscall entry and exit looks identical. 418 */ 419 static inline int ptrace_report_syscall(unsigned long message) 420 { 421 int ptrace = current->ptrace; 422 int signr; 423 424 if (!(ptrace & PT_PTRACED)) 425 return 0; 426 427 signr = ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0), 428 message); 429 430 /* 431 * this isn't the same as continuing with a signal, but it will do 432 * for normal use. strace only continues with a signal if the 433 * stopping signal is not SIGTRAP. -brl 434 */ 435 if (signr) 436 send_sig(signr, current, 1); 437 438 return fatal_signal_pending(current); 439 } 440 441 /** 442 * ptrace_report_syscall_entry - task is about to attempt a system call 443 * @regs: user register state of current task 444 * 445 * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or 446 * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just 447 * entered the kernel for a system call. Full user register state is 448 * available here. Changing the values in @regs can affect the system 449 * call number and arguments to be tried. It is safe to block here, 450 * preventing the system call from beginning. 451 * 452 * Returns zero normally, or nonzero if the calling arch code should abort 453 * the system call. That must prevent normal entry so no system call is 454 * made. If @task ever returns to user mode after this, its register state 455 * is unspecified, but should be something harmless like an %ENOSYS error 456 * return. It should preserve enough information so that syscall_rollback() 457 * can work (see asm-generic/syscall.h). 458 * 459 * Called without locks, just after entering kernel mode. 460 */ 461 static inline __must_check int ptrace_report_syscall_entry( 462 struct pt_regs *regs) 463 { 464 return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); 465 } 466 467 /** 468 * ptrace_report_syscall_exit - task has just finished a system call 469 * @regs: user register state of current task 470 * @step: nonzero if simulating single-step or block-step 471 * 472 * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when 473 * the current task has just finished an attempted system call. Full 474 * user register state is available here. It is safe to block here, 475 * preventing signals from being processed. 476 * 477 * If @step is nonzero, this report is also in lieu of the normal 478 * trap that would follow the system call instruction because 479 * user_enable_block_step() or user_enable_single_step() was used. 480 * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. 481 * 482 * Called without locks, just before checking for pending signals. 483 */ 484 static inline void ptrace_report_syscall_exit(struct pt_regs *regs, int step) 485 { 486 if (step) 487 user_single_step_report(regs); 488 else 489 ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT); 490 } 491 #endif 492