xref: /linux/kernel/kprobes.c (revision 223a76b2)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Kernel Probes (KProbes)
4  *
5  * Copyright (C) IBM Corporation, 2002, 2004
6  *
7  * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
8  *		Probes initial implementation (includes suggestions from
9  *		Rusty Russell).
10  * 2004-Aug	Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
11  *		hlists and exceptions notifier as suggested by Andi Kleen.
12  * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
13  *		interface to access function arguments.
14  * 2004-Sep	Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
15  *		exceptions notifier to be first on the priority list.
16  * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
17  *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
18  *		<prasanna@in.ibm.com> added function-return probes.
19  */
20 
21 #define pr_fmt(fmt) "kprobes: " fmt
22 
23 #include <linux/kprobes.h>
24 #include <linux/hash.h>
25 #include <linux/init.h>
26 #include <linux/slab.h>
27 #include <linux/stddef.h>
28 #include <linux/export.h>
29 #include <linux/moduleloader.h>
30 #include <linux/kallsyms.h>
31 #include <linux/freezer.h>
32 #include <linux/seq_file.h>
33 #include <linux/debugfs.h>
34 #include <linux/sysctl.h>
35 #include <linux/kdebug.h>
36 #include <linux/memory.h>
37 #include <linux/ftrace.h>
38 #include <linux/cpu.h>
39 #include <linux/jump_label.h>
40 #include <linux/static_call.h>
41 #include <linux/perf_event.h>
42 
43 #include <asm/sections.h>
44 #include <asm/cacheflush.h>
45 #include <asm/errno.h>
46 #include <linux/uaccess.h>
47 
48 #define KPROBE_HASH_BITS 6
49 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
50 
51 
52 static int kprobes_initialized;
53 /* kprobe_table can be accessed by
54  * - Normal hlist traversal and RCU add/del under 'kprobe_mutex' is held.
55  * Or
56  * - RCU hlist traversal under disabling preempt (breakpoint handlers)
57  */
58 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
59 
60 /* NOTE: change this value only with 'kprobe_mutex' held */
61 static bool kprobes_all_disarmed;
62 
63 /* This protects 'kprobe_table' and 'optimizing_list' */
64 static DEFINE_MUTEX(kprobe_mutex);
65 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance);
66 
67 kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
68 					unsigned int __unused)
69 {
70 	return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
71 }
72 
73 /*
74  * Blacklist -- list of 'struct kprobe_blacklist_entry' to store info where
75  * kprobes can not probe.
76  */
77 static LIST_HEAD(kprobe_blacklist);
78 
79 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
80 /*
81  * 'kprobe::ainsn.insn' points to the copy of the instruction to be
82  * single-stepped. x86_64, POWER4 and above have no-exec support and
83  * stepping on the instruction on a vmalloced/kmalloced/data page
84  * is a recipe for disaster
85  */
86 struct kprobe_insn_page {
87 	struct list_head list;
88 	kprobe_opcode_t *insns;		/* Page of instruction slots */
89 	struct kprobe_insn_cache *cache;
90 	int nused;
91 	int ngarbage;
92 	char slot_used[];
93 };
94 
95 #define KPROBE_INSN_PAGE_SIZE(slots)			\
96 	(offsetof(struct kprobe_insn_page, slot_used) +	\
97 	 (sizeof(char) * (slots)))
98 
99 static int slots_per_page(struct kprobe_insn_cache *c)
100 {
101 	return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
102 }
103 
104 enum kprobe_slot_state {
105 	SLOT_CLEAN = 0,
106 	SLOT_DIRTY = 1,
107 	SLOT_USED = 2,
108 };
109 
110 void __weak *alloc_insn_page(void)
111 {
112 	/*
113 	 * Use module_alloc() so this page is within +/- 2GB of where the
114 	 * kernel image and loaded module images reside. This is required
115 	 * for most of the architectures.
116 	 * (e.g. x86-64 needs this to handle the %rip-relative fixups.)
117 	 */
118 	return module_alloc(PAGE_SIZE);
119 }
120 
121 static void free_insn_page(void *page)
122 {
123 	module_memfree(page);
124 }
125 
126 struct kprobe_insn_cache kprobe_insn_slots = {
127 	.mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
128 	.alloc = alloc_insn_page,
129 	.free = free_insn_page,
130 	.sym = KPROBE_INSN_PAGE_SYM,
131 	.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
132 	.insn_size = MAX_INSN_SIZE,
133 	.nr_garbage = 0,
134 };
135 static int collect_garbage_slots(struct kprobe_insn_cache *c);
136 
137 /**
138  * __get_insn_slot() - Find a slot on an executable page for an instruction.
139  * We allocate an executable page if there's no room on existing ones.
140  */
141 kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
142 {
143 	struct kprobe_insn_page *kip;
144 	kprobe_opcode_t *slot = NULL;
145 
146 	/* Since the slot array is not protected by rcu, we need a mutex */
147 	mutex_lock(&c->mutex);
148  retry:
149 	rcu_read_lock();
150 	list_for_each_entry_rcu(kip, &c->pages, list) {
151 		if (kip->nused < slots_per_page(c)) {
152 			int i;
153 
154 			for (i = 0; i < slots_per_page(c); i++) {
155 				if (kip->slot_used[i] == SLOT_CLEAN) {
156 					kip->slot_used[i] = SLOT_USED;
157 					kip->nused++;
158 					slot = kip->insns + (i * c->insn_size);
159 					rcu_read_unlock();
160 					goto out;
161 				}
162 			}
163 			/* kip->nused is broken. Fix it. */
164 			kip->nused = slots_per_page(c);
165 			WARN_ON(1);
166 		}
167 	}
168 	rcu_read_unlock();
169 
170 	/* If there are any garbage slots, collect it and try again. */
171 	if (c->nr_garbage && collect_garbage_slots(c) == 0)
172 		goto retry;
173 
174 	/* All out of space.  Need to allocate a new page. */
175 	kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
176 	if (!kip)
177 		goto out;
178 
179 	kip->insns = c->alloc();
180 	if (!kip->insns) {
181 		kfree(kip);
182 		goto out;
183 	}
184 	INIT_LIST_HEAD(&kip->list);
185 	memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
186 	kip->slot_used[0] = SLOT_USED;
187 	kip->nused = 1;
188 	kip->ngarbage = 0;
189 	kip->cache = c;
190 	list_add_rcu(&kip->list, &c->pages);
191 	slot = kip->insns;
192 
193 	/* Record the perf ksymbol register event after adding the page */
194 	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns,
195 			   PAGE_SIZE, false, c->sym);
196 out:
197 	mutex_unlock(&c->mutex);
198 	return slot;
199 }
200 
201 /* Return 1 if all garbages are collected, otherwise 0. */
202 static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
203 {
204 	kip->slot_used[idx] = SLOT_CLEAN;
205 	kip->nused--;
206 	if (kip->nused == 0) {
207 		/*
208 		 * Page is no longer in use.  Free it unless
209 		 * it's the last one.  We keep the last one
210 		 * so as not to have to set it up again the
211 		 * next time somebody inserts a probe.
212 		 */
213 		if (!list_is_singular(&kip->list)) {
214 			/*
215 			 * Record perf ksymbol unregister event before removing
216 			 * the page.
217 			 */
218 			perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
219 					   (unsigned long)kip->insns, PAGE_SIZE, true,
220 					   kip->cache->sym);
221 			list_del_rcu(&kip->list);
222 			synchronize_rcu();
223 			kip->cache->free(kip->insns);
224 			kfree(kip);
225 		}
226 		return 1;
227 	}
228 	return 0;
229 }
230 
231 static int collect_garbage_slots(struct kprobe_insn_cache *c)
232 {
233 	struct kprobe_insn_page *kip, *next;
234 
235 	/* Ensure no-one is interrupted on the garbages */
236 	synchronize_rcu();
237 
238 	list_for_each_entry_safe(kip, next, &c->pages, list) {
239 		int i;
240 
241 		if (kip->ngarbage == 0)
242 			continue;
243 		kip->ngarbage = 0;	/* we will collect all garbages */
244 		for (i = 0; i < slots_per_page(c); i++) {
245 			if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i))
246 				break;
247 		}
248 	}
249 	c->nr_garbage = 0;
250 	return 0;
251 }
252 
253 void __free_insn_slot(struct kprobe_insn_cache *c,
254 		      kprobe_opcode_t *slot, int dirty)
255 {
256 	struct kprobe_insn_page *kip;
257 	long idx;
258 
259 	mutex_lock(&c->mutex);
260 	rcu_read_lock();
261 	list_for_each_entry_rcu(kip, &c->pages, list) {
262 		idx = ((long)slot - (long)kip->insns) /
263 			(c->insn_size * sizeof(kprobe_opcode_t));
264 		if (idx >= 0 && idx < slots_per_page(c))
265 			goto out;
266 	}
267 	/* Could not find this slot. */
268 	WARN_ON(1);
269 	kip = NULL;
270 out:
271 	rcu_read_unlock();
272 	/* Mark and sweep: this may sleep */
273 	if (kip) {
274 		/* Check double free */
275 		WARN_ON(kip->slot_used[idx] != SLOT_USED);
276 		if (dirty) {
277 			kip->slot_used[idx] = SLOT_DIRTY;
278 			kip->ngarbage++;
279 			if (++c->nr_garbage > slots_per_page(c))
280 				collect_garbage_slots(c);
281 		} else {
282 			collect_one_slot(kip, idx);
283 		}
284 	}
285 	mutex_unlock(&c->mutex);
286 }
287 
288 /*
289  * Check given address is on the page of kprobe instruction slots.
290  * This will be used for checking whether the address on a stack
291  * is on a text area or not.
292  */
293 bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
294 {
295 	struct kprobe_insn_page *kip;
296 	bool ret = false;
297 
298 	rcu_read_lock();
299 	list_for_each_entry_rcu(kip, &c->pages, list) {
300 		if (addr >= (unsigned long)kip->insns &&
301 		    addr < (unsigned long)kip->insns + PAGE_SIZE) {
302 			ret = true;
303 			break;
304 		}
305 	}
306 	rcu_read_unlock();
307 
308 	return ret;
309 }
310 
311 int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
312 			     unsigned long *value, char *type, char *sym)
313 {
314 	struct kprobe_insn_page *kip;
315 	int ret = -ERANGE;
316 
317 	rcu_read_lock();
318 	list_for_each_entry_rcu(kip, &c->pages, list) {
319 		if ((*symnum)--)
320 			continue;
321 		strscpy(sym, c->sym, KSYM_NAME_LEN);
322 		*type = 't';
323 		*value = (unsigned long)kip->insns;
324 		ret = 0;
325 		break;
326 	}
327 	rcu_read_unlock();
328 
329 	return ret;
330 }
331 
332 #ifdef CONFIG_OPTPROBES
333 void __weak *alloc_optinsn_page(void)
334 {
335 	return alloc_insn_page();
336 }
337 
338 void __weak free_optinsn_page(void *page)
339 {
340 	free_insn_page(page);
341 }
342 
343 /* For optimized_kprobe buffer */
344 struct kprobe_insn_cache kprobe_optinsn_slots = {
345 	.mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
346 	.alloc = alloc_optinsn_page,
347 	.free = free_optinsn_page,
348 	.sym = KPROBE_OPTINSN_PAGE_SYM,
349 	.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
350 	/* .insn_size is initialized later */
351 	.nr_garbage = 0,
352 };
353 #endif
354 #endif
355 
356 /* We have preemption disabled.. so it is safe to use __ versions */
357 static inline void set_kprobe_instance(struct kprobe *kp)
358 {
359 	__this_cpu_write(kprobe_instance, kp);
360 }
361 
362 static inline void reset_kprobe_instance(void)
363 {
364 	__this_cpu_write(kprobe_instance, NULL);
365 }
366 
367 /*
368  * This routine is called either:
369  *	- under the 'kprobe_mutex' - during kprobe_[un]register().
370  *				OR
371  *	- with preemption disabled - from architecture specific code.
372  */
373 struct kprobe *get_kprobe(void *addr)
374 {
375 	struct hlist_head *head;
376 	struct kprobe *p;
377 
378 	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
379 	hlist_for_each_entry_rcu(p, head, hlist,
380 				 lockdep_is_held(&kprobe_mutex)) {
381 		if (p->addr == addr)
382 			return p;
383 	}
384 
385 	return NULL;
386 }
387 NOKPROBE_SYMBOL(get_kprobe);
388 
389 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
390 
391 /* Return true if 'p' is an aggregator */
392 static inline int kprobe_aggrprobe(struct kprobe *p)
393 {
394 	return p->pre_handler == aggr_pre_handler;
395 }
396 
397 /* Return true if 'p' is unused */
398 static inline int kprobe_unused(struct kprobe *p)
399 {
400 	return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
401 	       list_empty(&p->list);
402 }
403 
404 /* Keep all fields in the kprobe consistent. */
405 static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
406 {
407 	memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
408 	memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
409 }
410 
411 #ifdef CONFIG_OPTPROBES
412 /* NOTE: This is protected by 'kprobe_mutex'. */
413 static bool kprobes_allow_optimization;
414 
415 /*
416  * Call all 'kprobe::pre_handler' on the list, but ignores its return value.
417  * This must be called from arch-dep optimized caller.
418  */
419 void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
420 {
421 	struct kprobe *kp;
422 
423 	list_for_each_entry_rcu(kp, &p->list, list) {
424 		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
425 			set_kprobe_instance(kp);
426 			kp->pre_handler(kp, regs);
427 		}
428 		reset_kprobe_instance();
429 	}
430 }
431 NOKPROBE_SYMBOL(opt_pre_handler);
432 
433 /* Free optimized instructions and optimized_kprobe */
434 static void free_aggr_kprobe(struct kprobe *p)
435 {
436 	struct optimized_kprobe *op;
437 
438 	op = container_of(p, struct optimized_kprobe, kp);
439 	arch_remove_optimized_kprobe(op);
440 	arch_remove_kprobe(p);
441 	kfree(op);
442 }
443 
444 /* Return true if the kprobe is ready for optimization. */
445 static inline int kprobe_optready(struct kprobe *p)
446 {
447 	struct optimized_kprobe *op;
448 
449 	if (kprobe_aggrprobe(p)) {
450 		op = container_of(p, struct optimized_kprobe, kp);
451 		return arch_prepared_optinsn(&op->optinsn);
452 	}
453 
454 	return 0;
455 }
456 
457 /* Return true if the kprobe is disarmed. Note: p must be on hash list */
458 static inline int kprobe_disarmed(struct kprobe *p)
459 {
460 	struct optimized_kprobe *op;
461 
462 	/* If kprobe is not aggr/opt probe, just return kprobe is disabled */
463 	if (!kprobe_aggrprobe(p))
464 		return kprobe_disabled(p);
465 
466 	op = container_of(p, struct optimized_kprobe, kp);
467 
468 	return kprobe_disabled(p) && list_empty(&op->list);
469 }
470 
471 /* Return true if the probe is queued on (un)optimizing lists */
472 static int kprobe_queued(struct kprobe *p)
473 {
474 	struct optimized_kprobe *op;
475 
476 	if (kprobe_aggrprobe(p)) {
477 		op = container_of(p, struct optimized_kprobe, kp);
478 		if (!list_empty(&op->list))
479 			return 1;
480 	}
481 	return 0;
482 }
483 
484 /*
485  * Return an optimized kprobe whose optimizing code replaces
486  * instructions including 'addr' (exclude breakpoint).
487  */
488 static struct kprobe *get_optimized_kprobe(unsigned long addr)
489 {
490 	int i;
491 	struct kprobe *p = NULL;
492 	struct optimized_kprobe *op;
493 
494 	/* Don't check i == 0, since that is a breakpoint case. */
495 	for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++)
496 		p = get_kprobe((void *)(addr - i));
497 
498 	if (p && kprobe_optready(p)) {
499 		op = container_of(p, struct optimized_kprobe, kp);
500 		if (arch_within_optimized_kprobe(op, addr))
501 			return p;
502 	}
503 
504 	return NULL;
505 }
506 
507 /* Optimization staging list, protected by 'kprobe_mutex' */
508 static LIST_HEAD(optimizing_list);
509 static LIST_HEAD(unoptimizing_list);
510 static LIST_HEAD(freeing_list);
511 
512 static void kprobe_optimizer(struct work_struct *work);
513 static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
514 #define OPTIMIZE_DELAY 5
515 
516 /*
517  * Optimize (replace a breakpoint with a jump) kprobes listed on
518  * 'optimizing_list'.
519  */
520 static void do_optimize_kprobes(void)
521 {
522 	lockdep_assert_held(&text_mutex);
523 	/*
524 	 * The optimization/unoptimization refers 'online_cpus' via
525 	 * stop_machine() and cpu-hotplug modifies the 'online_cpus'.
526 	 * And same time, 'text_mutex' will be held in cpu-hotplug and here.
527 	 * This combination can cause a deadlock (cpu-hotplug tries to lock
528 	 * 'text_mutex' but stop_machine() can not be done because
529 	 * the 'online_cpus' has been changed)
530 	 * To avoid this deadlock, caller must have locked cpu-hotplug
531 	 * for preventing cpu-hotplug outside of 'text_mutex' locking.
532 	 */
533 	lockdep_assert_cpus_held();
534 
535 	/* Optimization never be done when disarmed */
536 	if (kprobes_all_disarmed || !kprobes_allow_optimization ||
537 	    list_empty(&optimizing_list))
538 		return;
539 
540 	arch_optimize_kprobes(&optimizing_list);
541 }
542 
543 /*
544  * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
545  * if need) kprobes listed on 'unoptimizing_list'.
546  */
547 static void do_unoptimize_kprobes(void)
548 {
549 	struct optimized_kprobe *op, *tmp;
550 
551 	lockdep_assert_held(&text_mutex);
552 	/* See comment in do_optimize_kprobes() */
553 	lockdep_assert_cpus_held();
554 
555 	/* Unoptimization must be done anytime */
556 	if (list_empty(&unoptimizing_list))
557 		return;
558 
559 	arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
560 	/* Loop on 'freeing_list' for disarming */
561 	list_for_each_entry_safe(op, tmp, &freeing_list, list) {
562 		/* Switching from detour code to origin */
563 		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
564 		/* Disarm probes if marked disabled */
565 		if (kprobe_disabled(&op->kp))
566 			arch_disarm_kprobe(&op->kp);
567 		if (kprobe_unused(&op->kp)) {
568 			/*
569 			 * Remove unused probes from hash list. After waiting
570 			 * for synchronization, these probes are reclaimed.
571 			 * (reclaiming is done by do_free_cleaned_kprobes().)
572 			 */
573 			hlist_del_rcu(&op->kp.hlist);
574 		} else
575 			list_del_init(&op->list);
576 	}
577 }
578 
579 /* Reclaim all kprobes on the 'freeing_list' */
580 static void do_free_cleaned_kprobes(void)
581 {
582 	struct optimized_kprobe *op, *tmp;
583 
584 	list_for_each_entry_safe(op, tmp, &freeing_list, list) {
585 		list_del_init(&op->list);
586 		if (WARN_ON_ONCE(!kprobe_unused(&op->kp))) {
587 			/*
588 			 * This must not happen, but if there is a kprobe
589 			 * still in use, keep it on kprobes hash list.
590 			 */
591 			continue;
592 		}
593 		free_aggr_kprobe(&op->kp);
594 	}
595 }
596 
597 /* Start optimizer after OPTIMIZE_DELAY passed */
598 static void kick_kprobe_optimizer(void)
599 {
600 	schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
601 }
602 
603 /* Kprobe jump optimizer */
604 static void kprobe_optimizer(struct work_struct *work)
605 {
606 	mutex_lock(&kprobe_mutex);
607 	cpus_read_lock();
608 	mutex_lock(&text_mutex);
609 
610 	/*
611 	 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
612 	 * kprobes before waiting for quiesence period.
613 	 */
614 	do_unoptimize_kprobes();
615 
616 	/*
617 	 * Step 2: Wait for quiesence period to ensure all potentially
618 	 * preempted tasks to have normally scheduled. Because optprobe
619 	 * may modify multiple instructions, there is a chance that Nth
620 	 * instruction is preempted. In that case, such tasks can return
621 	 * to 2nd-Nth byte of jump instruction. This wait is for avoiding it.
622 	 * Note that on non-preemptive kernel, this is transparently converted
623 	 * to synchronoze_sched() to wait for all interrupts to have completed.
624 	 */
625 	synchronize_rcu_tasks();
626 
627 	/* Step 3: Optimize kprobes after quiesence period */
628 	do_optimize_kprobes();
629 
630 	/* Step 4: Free cleaned kprobes after quiesence period */
631 	do_free_cleaned_kprobes();
632 
633 	mutex_unlock(&text_mutex);
634 	cpus_read_unlock();
635 
636 	/* Step 5: Kick optimizer again if needed */
637 	if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
638 		kick_kprobe_optimizer();
639 
640 	mutex_unlock(&kprobe_mutex);
641 }
642 
643 /* Wait for completing optimization and unoptimization */
644 void wait_for_kprobe_optimizer(void)
645 {
646 	mutex_lock(&kprobe_mutex);
647 
648 	while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) {
649 		mutex_unlock(&kprobe_mutex);
650 
651 		/* This will also make 'optimizing_work' execute immmediately */
652 		flush_delayed_work(&optimizing_work);
653 		/* 'optimizing_work' might not have been queued yet, relax */
654 		cpu_relax();
655 
656 		mutex_lock(&kprobe_mutex);
657 	}
658 
659 	mutex_unlock(&kprobe_mutex);
660 }
661 
662 static bool optprobe_queued_unopt(struct optimized_kprobe *op)
663 {
664 	struct optimized_kprobe *_op;
665 
666 	list_for_each_entry(_op, &unoptimizing_list, list) {
667 		if (op == _op)
668 			return true;
669 	}
670 
671 	return false;
672 }
673 
674 /* Optimize kprobe if p is ready to be optimized */
675 static void optimize_kprobe(struct kprobe *p)
676 {
677 	struct optimized_kprobe *op;
678 
679 	/* Check if the kprobe is disabled or not ready for optimization. */
680 	if (!kprobe_optready(p) || !kprobes_allow_optimization ||
681 	    (kprobe_disabled(p) || kprobes_all_disarmed))
682 		return;
683 
684 	/* kprobes with 'post_handler' can not be optimized */
685 	if (p->post_handler)
686 		return;
687 
688 	op = container_of(p, struct optimized_kprobe, kp);
689 
690 	/* Check there is no other kprobes at the optimized instructions */
691 	if (arch_check_optimized_kprobe(op) < 0)
692 		return;
693 
694 	/* Check if it is already optimized. */
695 	if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) {
696 		if (optprobe_queued_unopt(op)) {
697 			/* This is under unoptimizing. Just dequeue the probe */
698 			list_del_init(&op->list);
699 		}
700 		return;
701 	}
702 	op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
703 
704 	/*
705 	 * On the 'unoptimizing_list' and 'optimizing_list',
706 	 * 'op' must have OPTIMIZED flag
707 	 */
708 	if (WARN_ON_ONCE(!list_empty(&op->list)))
709 		return;
710 
711 	list_add(&op->list, &optimizing_list);
712 	kick_kprobe_optimizer();
713 }
714 
715 /* Short cut to direct unoptimizing */
716 static void force_unoptimize_kprobe(struct optimized_kprobe *op)
717 {
718 	lockdep_assert_cpus_held();
719 	arch_unoptimize_kprobe(op);
720 	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
721 }
722 
723 /* Unoptimize a kprobe if p is optimized */
724 static void unoptimize_kprobe(struct kprobe *p, bool force)
725 {
726 	struct optimized_kprobe *op;
727 
728 	if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
729 		return; /* This is not an optprobe nor optimized */
730 
731 	op = container_of(p, struct optimized_kprobe, kp);
732 	if (!kprobe_optimized(p))
733 		return;
734 
735 	if (!list_empty(&op->list)) {
736 		if (optprobe_queued_unopt(op)) {
737 			/* Queued in unoptimizing queue */
738 			if (force) {
739 				/*
740 				 * Forcibly unoptimize the kprobe here, and queue it
741 				 * in the freeing list for release afterwards.
742 				 */
743 				force_unoptimize_kprobe(op);
744 				list_move(&op->list, &freeing_list);
745 			}
746 		} else {
747 			/* Dequeue from the optimizing queue */
748 			list_del_init(&op->list);
749 			op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
750 		}
751 		return;
752 	}
753 
754 	/* Optimized kprobe case */
755 	if (force) {
756 		/* Forcibly update the code: this is a special case */
757 		force_unoptimize_kprobe(op);
758 	} else {
759 		list_add(&op->list, &unoptimizing_list);
760 		kick_kprobe_optimizer();
761 	}
762 }
763 
764 /* Cancel unoptimizing for reusing */
765 static int reuse_unused_kprobe(struct kprobe *ap)
766 {
767 	struct optimized_kprobe *op;
768 
769 	/*
770 	 * Unused kprobe MUST be on the way of delayed unoptimizing (means
771 	 * there is still a relative jump) and disabled.
772 	 */
773 	op = container_of(ap, struct optimized_kprobe, kp);
774 	WARN_ON_ONCE(list_empty(&op->list));
775 	/* Enable the probe again */
776 	ap->flags &= ~KPROBE_FLAG_DISABLED;
777 	/* Optimize it again. (remove from 'op->list') */
778 	if (!kprobe_optready(ap))
779 		return -EINVAL;
780 
781 	optimize_kprobe(ap);
782 	return 0;
783 }
784 
785 /* Remove optimized instructions */
786 static void kill_optimized_kprobe(struct kprobe *p)
787 {
788 	struct optimized_kprobe *op;
789 
790 	op = container_of(p, struct optimized_kprobe, kp);
791 	if (!list_empty(&op->list))
792 		/* Dequeue from the (un)optimization queue */
793 		list_del_init(&op->list);
794 	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
795 
796 	if (kprobe_unused(p)) {
797 		/* Enqueue if it is unused */
798 		list_add(&op->list, &freeing_list);
799 		/*
800 		 * Remove unused probes from the hash list. After waiting
801 		 * for synchronization, this probe is reclaimed.
802 		 * (reclaiming is done by do_free_cleaned_kprobes().)
803 		 */
804 		hlist_del_rcu(&op->kp.hlist);
805 	}
806 
807 	/* Don't touch the code, because it is already freed. */
808 	arch_remove_optimized_kprobe(op);
809 }
810 
811 static inline
812 void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
813 {
814 	if (!kprobe_ftrace(p))
815 		arch_prepare_optimized_kprobe(op, p);
816 }
817 
818 /* Try to prepare optimized instructions */
819 static void prepare_optimized_kprobe(struct kprobe *p)
820 {
821 	struct optimized_kprobe *op;
822 
823 	op = container_of(p, struct optimized_kprobe, kp);
824 	__prepare_optimized_kprobe(op, p);
825 }
826 
827 /* Allocate new optimized_kprobe and try to prepare optimized instructions. */
828 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
829 {
830 	struct optimized_kprobe *op;
831 
832 	op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL);
833 	if (!op)
834 		return NULL;
835 
836 	INIT_LIST_HEAD(&op->list);
837 	op->kp.addr = p->addr;
838 	__prepare_optimized_kprobe(op, p);
839 
840 	return &op->kp;
841 }
842 
843 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
844 
845 /*
846  * Prepare an optimized_kprobe and optimize it.
847  * NOTE: 'p' must be a normal registered kprobe.
848  */
849 static void try_to_optimize_kprobe(struct kprobe *p)
850 {
851 	struct kprobe *ap;
852 	struct optimized_kprobe *op;
853 
854 	/* Impossible to optimize ftrace-based kprobe. */
855 	if (kprobe_ftrace(p))
856 		return;
857 
858 	/* For preparing optimization, jump_label_text_reserved() is called. */
859 	cpus_read_lock();
860 	jump_label_lock();
861 	mutex_lock(&text_mutex);
862 
863 	ap = alloc_aggr_kprobe(p);
864 	if (!ap)
865 		goto out;
866 
867 	op = container_of(ap, struct optimized_kprobe, kp);
868 	if (!arch_prepared_optinsn(&op->optinsn)) {
869 		/* If failed to setup optimizing, fallback to kprobe. */
870 		arch_remove_optimized_kprobe(op);
871 		kfree(op);
872 		goto out;
873 	}
874 
875 	init_aggr_kprobe(ap, p);
876 	optimize_kprobe(ap);	/* This just kicks optimizer thread. */
877 
878 out:
879 	mutex_unlock(&text_mutex);
880 	jump_label_unlock();
881 	cpus_read_unlock();
882 }
883 
884 static void optimize_all_kprobes(void)
885 {
886 	struct hlist_head *head;
887 	struct kprobe *p;
888 	unsigned int i;
889 
890 	mutex_lock(&kprobe_mutex);
891 	/* If optimization is already allowed, just return. */
892 	if (kprobes_allow_optimization)
893 		goto out;
894 
895 	cpus_read_lock();
896 	kprobes_allow_optimization = true;
897 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
898 		head = &kprobe_table[i];
899 		hlist_for_each_entry(p, head, hlist)
900 			if (!kprobe_disabled(p))
901 				optimize_kprobe(p);
902 	}
903 	cpus_read_unlock();
904 	pr_info("kprobe jump-optimization is enabled. All kprobes are optimized if possible.\n");
905 out:
906 	mutex_unlock(&kprobe_mutex);
907 }
908 
909 #ifdef CONFIG_SYSCTL
910 static void unoptimize_all_kprobes(void)
911 {
912 	struct hlist_head *head;
913 	struct kprobe *p;
914 	unsigned int i;
915 
916 	mutex_lock(&kprobe_mutex);
917 	/* If optimization is already prohibited, just return. */
918 	if (!kprobes_allow_optimization) {
919 		mutex_unlock(&kprobe_mutex);
920 		return;
921 	}
922 
923 	cpus_read_lock();
924 	kprobes_allow_optimization = false;
925 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
926 		head = &kprobe_table[i];
927 		hlist_for_each_entry(p, head, hlist) {
928 			if (!kprobe_disabled(p))
929 				unoptimize_kprobe(p, false);
930 		}
931 	}
932 	cpus_read_unlock();
933 	mutex_unlock(&kprobe_mutex);
934 
935 	/* Wait for unoptimizing completion. */
936 	wait_for_kprobe_optimizer();
937 	pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n");
938 }
939 
940 static DEFINE_MUTEX(kprobe_sysctl_mutex);
941 int sysctl_kprobes_optimization;
942 int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
943 				      void *buffer, size_t *length,
944 				      loff_t *ppos)
945 {
946 	int ret;
947 
948 	mutex_lock(&kprobe_sysctl_mutex);
949 	sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
950 	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
951 
952 	if (sysctl_kprobes_optimization)
953 		optimize_all_kprobes();
954 	else
955 		unoptimize_all_kprobes();
956 	mutex_unlock(&kprobe_sysctl_mutex);
957 
958 	return ret;
959 }
960 #endif /* CONFIG_SYSCTL */
961 
962 /* Put a breakpoint for a probe. Must be called with 'text_mutex' locked. */
963 static void __arm_kprobe(struct kprobe *p)
964 {
965 	struct kprobe *_p;
966 
967 	/* Find the overlapping optimized kprobes. */
968 	_p = get_optimized_kprobe((unsigned long)p->addr);
969 	if (unlikely(_p))
970 		/* Fallback to unoptimized kprobe */
971 		unoptimize_kprobe(_p, true);
972 
973 	arch_arm_kprobe(p);
974 	optimize_kprobe(p);	/* Try to optimize (add kprobe to a list) */
975 }
976 
977 /* Remove the breakpoint of a probe. Must be called with 'text_mutex' locked. */
978 static void __disarm_kprobe(struct kprobe *p, bool reopt)
979 {
980 	struct kprobe *_p;
981 
982 	/* Try to unoptimize */
983 	unoptimize_kprobe(p, kprobes_all_disarmed);
984 
985 	if (!kprobe_queued(p)) {
986 		arch_disarm_kprobe(p);
987 		/* If another kprobe was blocked, re-optimize it. */
988 		_p = get_optimized_kprobe((unsigned long)p->addr);
989 		if (unlikely(_p) && reopt)
990 			optimize_kprobe(_p);
991 	}
992 	/*
993 	 * TODO: Since unoptimization and real disarming will be done by
994 	 * the worker thread, we can not check whether another probe are
995 	 * unoptimized because of this probe here. It should be re-optimized
996 	 * by the worker thread.
997 	 */
998 }
999 
1000 #else /* !CONFIG_OPTPROBES */
1001 
1002 #define optimize_kprobe(p)			do {} while (0)
1003 #define unoptimize_kprobe(p, f)			do {} while (0)
1004 #define kill_optimized_kprobe(p)		do {} while (0)
1005 #define prepare_optimized_kprobe(p)		do {} while (0)
1006 #define try_to_optimize_kprobe(p)		do {} while (0)
1007 #define __arm_kprobe(p)				arch_arm_kprobe(p)
1008 #define __disarm_kprobe(p, o)			arch_disarm_kprobe(p)
1009 #define kprobe_disarmed(p)			kprobe_disabled(p)
1010 #define wait_for_kprobe_optimizer()		do {} while (0)
1011 
1012 static int reuse_unused_kprobe(struct kprobe *ap)
1013 {
1014 	/*
1015 	 * If the optimized kprobe is NOT supported, the aggr kprobe is
1016 	 * released at the same time that the last aggregated kprobe is
1017 	 * unregistered.
1018 	 * Thus there should be no chance to reuse unused kprobe.
1019 	 */
1020 	WARN_ON_ONCE(1);
1021 	return -EINVAL;
1022 }
1023 
1024 static void free_aggr_kprobe(struct kprobe *p)
1025 {
1026 	arch_remove_kprobe(p);
1027 	kfree(p);
1028 }
1029 
1030 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
1031 {
1032 	return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
1033 }
1034 #endif /* CONFIG_OPTPROBES */
1035 
1036 #ifdef CONFIG_KPROBES_ON_FTRACE
1037 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
1038 	.func = kprobe_ftrace_handler,
1039 	.flags = FTRACE_OPS_FL_SAVE_REGS,
1040 };
1041 
1042 static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
1043 	.func = kprobe_ftrace_handler,
1044 	.flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
1045 };
1046 
1047 static int kprobe_ipmodify_enabled;
1048 static int kprobe_ftrace_enabled;
1049 
1050 /* Caller must lock 'kprobe_mutex' */
1051 static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1052 			       int *cnt)
1053 {
1054 	int ret = 0;
1055 
1056 	ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0);
1057 	if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret))
1058 		return ret;
1059 
1060 	if (*cnt == 0) {
1061 		ret = register_ftrace_function(ops);
1062 		if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret))
1063 			goto err_ftrace;
1064 	}
1065 
1066 	(*cnt)++;
1067 	return ret;
1068 
1069 err_ftrace:
1070 	/*
1071 	 * At this point, sinec ops is not registered, we should be sefe from
1072 	 * registering empty filter.
1073 	 */
1074 	ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1075 	return ret;
1076 }
1077 
1078 static int arm_kprobe_ftrace(struct kprobe *p)
1079 {
1080 	bool ipmodify = (p->post_handler != NULL);
1081 
1082 	return __arm_kprobe_ftrace(p,
1083 		ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1084 		ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1085 }
1086 
1087 /* Caller must lock 'kprobe_mutex'. */
1088 static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1089 				  int *cnt)
1090 {
1091 	int ret = 0;
1092 
1093 	if (*cnt == 1) {
1094 		ret = unregister_ftrace_function(ops);
1095 		if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (error %d)\n", ret))
1096 			return ret;
1097 	}
1098 
1099 	(*cnt)--;
1100 
1101 	ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1102 	WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (error %d)\n",
1103 		  p->addr, ret);
1104 	return ret;
1105 }
1106 
1107 static int disarm_kprobe_ftrace(struct kprobe *p)
1108 {
1109 	bool ipmodify = (p->post_handler != NULL);
1110 
1111 	return __disarm_kprobe_ftrace(p,
1112 		ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1113 		ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1114 }
1115 #else	/* !CONFIG_KPROBES_ON_FTRACE */
1116 static inline int arm_kprobe_ftrace(struct kprobe *p)
1117 {
1118 	return -ENODEV;
1119 }
1120 
1121 static inline int disarm_kprobe_ftrace(struct kprobe *p)
1122 {
1123 	return -ENODEV;
1124 }
1125 #endif
1126 
1127 static int prepare_kprobe(struct kprobe *p)
1128 {
1129 	/* Must ensure p->addr is really on ftrace */
1130 	if (kprobe_ftrace(p))
1131 		return arch_prepare_kprobe_ftrace(p);
1132 
1133 	return arch_prepare_kprobe(p);
1134 }
1135 
1136 /* Arm a kprobe with 'text_mutex'. */
1137 static int arm_kprobe(struct kprobe *kp)
1138 {
1139 	if (unlikely(kprobe_ftrace(kp)))
1140 		return arm_kprobe_ftrace(kp);
1141 
1142 	cpus_read_lock();
1143 	mutex_lock(&text_mutex);
1144 	__arm_kprobe(kp);
1145 	mutex_unlock(&text_mutex);
1146 	cpus_read_unlock();
1147 
1148 	return 0;
1149 }
1150 
1151 /* Disarm a kprobe with 'text_mutex'. */
1152 static int disarm_kprobe(struct kprobe *kp, bool reopt)
1153 {
1154 	if (unlikely(kprobe_ftrace(kp)))
1155 		return disarm_kprobe_ftrace(kp);
1156 
1157 	cpus_read_lock();
1158 	mutex_lock(&text_mutex);
1159 	__disarm_kprobe(kp, reopt);
1160 	mutex_unlock(&text_mutex);
1161 	cpus_read_unlock();
1162 
1163 	return 0;
1164 }
1165 
1166 /*
1167  * Aggregate handlers for multiple kprobes support - these handlers
1168  * take care of invoking the individual kprobe handlers on p->list
1169  */
1170 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
1171 {
1172 	struct kprobe *kp;
1173 
1174 	list_for_each_entry_rcu(kp, &p->list, list) {
1175 		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
1176 			set_kprobe_instance(kp);
1177 			if (kp->pre_handler(kp, regs))
1178 				return 1;
1179 		}
1180 		reset_kprobe_instance();
1181 	}
1182 	return 0;
1183 }
1184 NOKPROBE_SYMBOL(aggr_pre_handler);
1185 
1186 static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
1187 			      unsigned long flags)
1188 {
1189 	struct kprobe *kp;
1190 
1191 	list_for_each_entry_rcu(kp, &p->list, list) {
1192 		if (kp->post_handler && likely(!kprobe_disabled(kp))) {
1193 			set_kprobe_instance(kp);
1194 			kp->post_handler(kp, regs, flags);
1195 			reset_kprobe_instance();
1196 		}
1197 	}
1198 }
1199 NOKPROBE_SYMBOL(aggr_post_handler);
1200 
1201 /* Walks the list and increments 'nmissed' if 'p' has child probes. */
1202 void kprobes_inc_nmissed_count(struct kprobe *p)
1203 {
1204 	struct kprobe *kp;
1205 
1206 	if (!kprobe_aggrprobe(p)) {
1207 		p->nmissed++;
1208 	} else {
1209 		list_for_each_entry_rcu(kp, &p->list, list)
1210 			kp->nmissed++;
1211 	}
1212 }
1213 NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
1214 
1215 static void free_rp_inst_rcu(struct rcu_head *head)
1216 {
1217 	struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
1218 
1219 	if (refcount_dec_and_test(&ri->rph->ref))
1220 		kfree(ri->rph);
1221 	kfree(ri);
1222 }
1223 NOKPROBE_SYMBOL(free_rp_inst_rcu);
1224 
1225 static void recycle_rp_inst(struct kretprobe_instance *ri)
1226 {
1227 	struct kretprobe *rp = get_kretprobe(ri);
1228 
1229 	if (likely(rp))
1230 		freelist_add(&ri->freelist, &rp->freelist);
1231 	else
1232 		call_rcu(&ri->rcu, free_rp_inst_rcu);
1233 }
1234 NOKPROBE_SYMBOL(recycle_rp_inst);
1235 
1236 static struct kprobe kprobe_busy = {
1237 	.addr = (void *) get_kprobe,
1238 };
1239 
1240 void kprobe_busy_begin(void)
1241 {
1242 	struct kprobe_ctlblk *kcb;
1243 
1244 	preempt_disable();
1245 	__this_cpu_write(current_kprobe, &kprobe_busy);
1246 	kcb = get_kprobe_ctlblk();
1247 	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1248 }
1249 
1250 void kprobe_busy_end(void)
1251 {
1252 	__this_cpu_write(current_kprobe, NULL);
1253 	preempt_enable();
1254 }
1255 
1256 /*
1257  * This function is called from finish_task_switch() when task 'tk' becomes
1258  * dead, so that we can recycle any kretprobe instances associated
1259  * with this task. These left over instances represent probed functions
1260  * that have been called but will never return.
1261  */
1262 void kprobe_flush_task(struct task_struct *tk)
1263 {
1264 	struct kretprobe_instance *ri;
1265 	struct llist_node *node;
1266 
1267 	/* Early boot, not yet initialized. */
1268 	if (unlikely(!kprobes_initialized))
1269 		return;
1270 
1271 	kprobe_busy_begin();
1272 
1273 	node = __llist_del_all(&tk->kretprobe_instances);
1274 	while (node) {
1275 		ri = container_of(node, struct kretprobe_instance, llist);
1276 		node = node->next;
1277 
1278 		recycle_rp_inst(ri);
1279 	}
1280 
1281 	kprobe_busy_end();
1282 }
1283 NOKPROBE_SYMBOL(kprobe_flush_task);
1284 
1285 static inline void free_rp_inst(struct kretprobe *rp)
1286 {
1287 	struct kretprobe_instance *ri;
1288 	struct freelist_node *node;
1289 	int count = 0;
1290 
1291 	node = rp->freelist.head;
1292 	while (node) {
1293 		ri = container_of(node, struct kretprobe_instance, freelist);
1294 		node = node->next;
1295 
1296 		kfree(ri);
1297 		count++;
1298 	}
1299 
1300 	if (refcount_sub_and_test(count, &rp->rph->ref)) {
1301 		kfree(rp->rph);
1302 		rp->rph = NULL;
1303 	}
1304 }
1305 
1306 /* Add the new probe to 'ap->list'. */
1307 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
1308 {
1309 	if (p->post_handler)
1310 		unoptimize_kprobe(ap, true);	/* Fall back to normal kprobe */
1311 
1312 	list_add_rcu(&p->list, &ap->list);
1313 	if (p->post_handler && !ap->post_handler)
1314 		ap->post_handler = aggr_post_handler;
1315 
1316 	return 0;
1317 }
1318 
1319 /*
1320  * Fill in the required fields of the aggregator kprobe. Replace the
1321  * earlier kprobe in the hlist with the aggregator kprobe.
1322  */
1323 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
1324 {
1325 	/* Copy the insn slot of 'p' to 'ap'. */
1326 	copy_kprobe(p, ap);
1327 	flush_insn_slot(ap);
1328 	ap->addr = p->addr;
1329 	ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
1330 	ap->pre_handler = aggr_pre_handler;
1331 	/* We don't care the kprobe which has gone. */
1332 	if (p->post_handler && !kprobe_gone(p))
1333 		ap->post_handler = aggr_post_handler;
1334 
1335 	INIT_LIST_HEAD(&ap->list);
1336 	INIT_HLIST_NODE(&ap->hlist);
1337 
1338 	list_add_rcu(&p->list, &ap->list);
1339 	hlist_replace_rcu(&p->hlist, &ap->hlist);
1340 }
1341 
1342 /*
1343  * This registers the second or subsequent kprobe at the same address.
1344  */
1345 static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
1346 {
1347 	int ret = 0;
1348 	struct kprobe *ap = orig_p;
1349 
1350 	cpus_read_lock();
1351 
1352 	/* For preparing optimization, jump_label_text_reserved() is called */
1353 	jump_label_lock();
1354 	mutex_lock(&text_mutex);
1355 
1356 	if (!kprobe_aggrprobe(orig_p)) {
1357 		/* If 'orig_p' is not an 'aggr_kprobe', create new one. */
1358 		ap = alloc_aggr_kprobe(orig_p);
1359 		if (!ap) {
1360 			ret = -ENOMEM;
1361 			goto out;
1362 		}
1363 		init_aggr_kprobe(ap, orig_p);
1364 	} else if (kprobe_unused(ap)) {
1365 		/* This probe is going to die. Rescue it */
1366 		ret = reuse_unused_kprobe(ap);
1367 		if (ret)
1368 			goto out;
1369 	}
1370 
1371 	if (kprobe_gone(ap)) {
1372 		/*
1373 		 * Attempting to insert new probe at the same location that
1374 		 * had a probe in the module vaddr area which already
1375 		 * freed. So, the instruction slot has already been
1376 		 * released. We need a new slot for the new probe.
1377 		 */
1378 		ret = arch_prepare_kprobe(ap);
1379 		if (ret)
1380 			/*
1381 			 * Even if fail to allocate new slot, don't need to
1382 			 * free the 'ap'. It will be used next time, or
1383 			 * freed by unregister_kprobe().
1384 			 */
1385 			goto out;
1386 
1387 		/* Prepare optimized instructions if possible. */
1388 		prepare_optimized_kprobe(ap);
1389 
1390 		/*
1391 		 * Clear gone flag to prevent allocating new slot again, and
1392 		 * set disabled flag because it is not armed yet.
1393 		 */
1394 		ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
1395 			    | KPROBE_FLAG_DISABLED;
1396 	}
1397 
1398 	/* Copy the insn slot of 'p' to 'ap'. */
1399 	copy_kprobe(ap, p);
1400 	ret = add_new_kprobe(ap, p);
1401 
1402 out:
1403 	mutex_unlock(&text_mutex);
1404 	jump_label_unlock();
1405 	cpus_read_unlock();
1406 
1407 	if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
1408 		ap->flags &= ~KPROBE_FLAG_DISABLED;
1409 		if (!kprobes_all_disarmed) {
1410 			/* Arm the breakpoint again. */
1411 			ret = arm_kprobe(ap);
1412 			if (ret) {
1413 				ap->flags |= KPROBE_FLAG_DISABLED;
1414 				list_del_rcu(&p->list);
1415 				synchronize_rcu();
1416 			}
1417 		}
1418 	}
1419 	return ret;
1420 }
1421 
1422 bool __weak arch_within_kprobe_blacklist(unsigned long addr)
1423 {
1424 	/* The '__kprobes' functions and entry code must not be probed. */
1425 	return addr >= (unsigned long)__kprobes_text_start &&
1426 	       addr < (unsigned long)__kprobes_text_end;
1427 }
1428 
1429 static bool __within_kprobe_blacklist(unsigned long addr)
1430 {
1431 	struct kprobe_blacklist_entry *ent;
1432 
1433 	if (arch_within_kprobe_blacklist(addr))
1434 		return true;
1435 	/*
1436 	 * If 'kprobe_blacklist' is defined, check the address and
1437 	 * reject any probe registration in the prohibited area.
1438 	 */
1439 	list_for_each_entry(ent, &kprobe_blacklist, list) {
1440 		if (addr >= ent->start_addr && addr < ent->end_addr)
1441 			return true;
1442 	}
1443 	return false;
1444 }
1445 
1446 bool within_kprobe_blacklist(unsigned long addr)
1447 {
1448 	char symname[KSYM_NAME_LEN], *p;
1449 
1450 	if (__within_kprobe_blacklist(addr))
1451 		return true;
1452 
1453 	/* Check if the address is on a suffixed-symbol */
1454 	if (!lookup_symbol_name(addr, symname)) {
1455 		p = strchr(symname, '.');
1456 		if (!p)
1457 			return false;
1458 		*p = '\0';
1459 		addr = (unsigned long)kprobe_lookup_name(symname, 0);
1460 		if (addr)
1461 			return __within_kprobe_blacklist(addr);
1462 	}
1463 	return false;
1464 }
1465 
1466 /*
1467  * If 'symbol_name' is specified, look it up and add the 'offset'
1468  * to it. This way, we can specify a relative address to a symbol.
1469  * This returns encoded errors if it fails to look up symbol or invalid
1470  * combination of parameters.
1471  */
1472 static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr,
1473 			const char *symbol_name, unsigned int offset)
1474 {
1475 	if ((symbol_name && addr) || (!symbol_name && !addr))
1476 		goto invalid;
1477 
1478 	if (symbol_name) {
1479 		addr = kprobe_lookup_name(symbol_name, offset);
1480 		if (!addr)
1481 			return ERR_PTR(-ENOENT);
1482 	}
1483 
1484 	addr = (kprobe_opcode_t *)(((char *)addr) + offset);
1485 	if (addr)
1486 		return addr;
1487 
1488 invalid:
1489 	return ERR_PTR(-EINVAL);
1490 }
1491 
1492 static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
1493 {
1494 	return _kprobe_addr(p->addr, p->symbol_name, p->offset);
1495 }
1496 
1497 /*
1498  * Check the 'p' is valid and return the aggregator kprobe
1499  * at the same address.
1500  */
1501 static struct kprobe *__get_valid_kprobe(struct kprobe *p)
1502 {
1503 	struct kprobe *ap, *list_p;
1504 
1505 	lockdep_assert_held(&kprobe_mutex);
1506 
1507 	ap = get_kprobe(p->addr);
1508 	if (unlikely(!ap))
1509 		return NULL;
1510 
1511 	if (p != ap) {
1512 		list_for_each_entry(list_p, &ap->list, list)
1513 			if (list_p == p)
1514 			/* kprobe p is a valid probe */
1515 				goto valid;
1516 		return NULL;
1517 	}
1518 valid:
1519 	return ap;
1520 }
1521 
1522 /*
1523  * Warn and return error if the kprobe is being re-registered since
1524  * there must be a software bug.
1525  */
1526 static inline int warn_kprobe_rereg(struct kprobe *p)
1527 {
1528 	int ret = 0;
1529 
1530 	mutex_lock(&kprobe_mutex);
1531 	if (WARN_ON_ONCE(__get_valid_kprobe(p)))
1532 		ret = -EINVAL;
1533 	mutex_unlock(&kprobe_mutex);
1534 
1535 	return ret;
1536 }
1537 
1538 static int check_ftrace_location(struct kprobe *p)
1539 {
1540 	unsigned long ftrace_addr;
1541 
1542 	ftrace_addr = ftrace_location((unsigned long)p->addr);
1543 	if (ftrace_addr) {
1544 #ifdef CONFIG_KPROBES_ON_FTRACE
1545 		/* Given address is not on the instruction boundary */
1546 		if ((unsigned long)p->addr != ftrace_addr)
1547 			return -EILSEQ;
1548 		p->flags |= KPROBE_FLAG_FTRACE;
1549 #else	/* !CONFIG_KPROBES_ON_FTRACE */
1550 		return -EINVAL;
1551 #endif
1552 	}
1553 	return 0;
1554 }
1555 
1556 static int check_kprobe_address_safe(struct kprobe *p,
1557 				     struct module **probed_mod)
1558 {
1559 	int ret;
1560 
1561 	ret = check_ftrace_location(p);
1562 	if (ret)
1563 		return ret;
1564 	jump_label_lock();
1565 	preempt_disable();
1566 
1567 	/* Ensure it is not in reserved area nor out of text */
1568 	if (!kernel_text_address((unsigned long) p->addr) ||
1569 	    within_kprobe_blacklist((unsigned long) p->addr) ||
1570 	    jump_label_text_reserved(p->addr, p->addr) ||
1571 	    static_call_text_reserved(p->addr, p->addr) ||
1572 	    find_bug((unsigned long)p->addr)) {
1573 		ret = -EINVAL;
1574 		goto out;
1575 	}
1576 
1577 	/* Check if 'p' is probing a module. */
1578 	*probed_mod = __module_text_address((unsigned long) p->addr);
1579 	if (*probed_mod) {
1580 		/*
1581 		 * We must hold a refcount of the probed module while updating
1582 		 * its code to prohibit unexpected unloading.
1583 		 */
1584 		if (unlikely(!try_module_get(*probed_mod))) {
1585 			ret = -ENOENT;
1586 			goto out;
1587 		}
1588 
1589 		/*
1590 		 * If the module freed '.init.text', we couldn't insert
1591 		 * kprobes in there.
1592 		 */
1593 		if (within_module_init((unsigned long)p->addr, *probed_mod) &&
1594 		    (*probed_mod)->state != MODULE_STATE_COMING) {
1595 			module_put(*probed_mod);
1596 			*probed_mod = NULL;
1597 			ret = -ENOENT;
1598 		}
1599 	}
1600 out:
1601 	preempt_enable();
1602 	jump_label_unlock();
1603 
1604 	return ret;
1605 }
1606 
1607 int register_kprobe(struct kprobe *p)
1608 {
1609 	int ret;
1610 	struct kprobe *old_p;
1611 	struct module *probed_mod;
1612 	kprobe_opcode_t *addr;
1613 
1614 	/* Adjust probe address from symbol */
1615 	addr = kprobe_addr(p);
1616 	if (IS_ERR(addr))
1617 		return PTR_ERR(addr);
1618 	p->addr = addr;
1619 
1620 	ret = warn_kprobe_rereg(p);
1621 	if (ret)
1622 		return ret;
1623 
1624 	/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
1625 	p->flags &= KPROBE_FLAG_DISABLED;
1626 	p->nmissed = 0;
1627 	INIT_LIST_HEAD(&p->list);
1628 
1629 	ret = check_kprobe_address_safe(p, &probed_mod);
1630 	if (ret)
1631 		return ret;
1632 
1633 	mutex_lock(&kprobe_mutex);
1634 
1635 	old_p = get_kprobe(p->addr);
1636 	if (old_p) {
1637 		/* Since this may unoptimize 'old_p', locking 'text_mutex'. */
1638 		ret = register_aggr_kprobe(old_p, p);
1639 		goto out;
1640 	}
1641 
1642 	cpus_read_lock();
1643 	/* Prevent text modification */
1644 	mutex_lock(&text_mutex);
1645 	ret = prepare_kprobe(p);
1646 	mutex_unlock(&text_mutex);
1647 	cpus_read_unlock();
1648 	if (ret)
1649 		goto out;
1650 
1651 	INIT_HLIST_NODE(&p->hlist);
1652 	hlist_add_head_rcu(&p->hlist,
1653 		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
1654 
1655 	if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
1656 		ret = arm_kprobe(p);
1657 		if (ret) {
1658 			hlist_del_rcu(&p->hlist);
1659 			synchronize_rcu();
1660 			goto out;
1661 		}
1662 	}
1663 
1664 	/* Try to optimize kprobe */
1665 	try_to_optimize_kprobe(p);
1666 out:
1667 	mutex_unlock(&kprobe_mutex);
1668 
1669 	if (probed_mod)
1670 		module_put(probed_mod);
1671 
1672 	return ret;
1673 }
1674 EXPORT_SYMBOL_GPL(register_kprobe);
1675 
1676 /* Check if all probes on the 'ap' are disabled. */
1677 static int aggr_kprobe_disabled(struct kprobe *ap)
1678 {
1679 	struct kprobe *kp;
1680 
1681 	lockdep_assert_held(&kprobe_mutex);
1682 
1683 	list_for_each_entry(kp, &ap->list, list)
1684 		if (!kprobe_disabled(kp))
1685 			/*
1686 			 * Since there is an active probe on the list,
1687 			 * we can't disable this 'ap'.
1688 			 */
1689 			return 0;
1690 
1691 	return 1;
1692 }
1693 
1694 /* Disable one kprobe: Make sure called under 'kprobe_mutex' is locked. */
1695 static struct kprobe *__disable_kprobe(struct kprobe *p)
1696 {
1697 	struct kprobe *orig_p;
1698 	int ret;
1699 
1700 	/* Get an original kprobe for return */
1701 	orig_p = __get_valid_kprobe(p);
1702 	if (unlikely(orig_p == NULL))
1703 		return ERR_PTR(-EINVAL);
1704 
1705 	if (!kprobe_disabled(p)) {
1706 		/* Disable probe if it is a child probe */
1707 		if (p != orig_p)
1708 			p->flags |= KPROBE_FLAG_DISABLED;
1709 
1710 		/* Try to disarm and disable this/parent probe */
1711 		if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1712 			/*
1713 			 * If 'kprobes_all_disarmed' is set, 'orig_p'
1714 			 * should have already been disarmed, so
1715 			 * skip unneed disarming process.
1716 			 */
1717 			if (!kprobes_all_disarmed) {
1718 				ret = disarm_kprobe(orig_p, true);
1719 				if (ret) {
1720 					p->flags &= ~KPROBE_FLAG_DISABLED;
1721 					return ERR_PTR(ret);
1722 				}
1723 			}
1724 			orig_p->flags |= KPROBE_FLAG_DISABLED;
1725 		}
1726 	}
1727 
1728 	return orig_p;
1729 }
1730 
1731 /*
1732  * Unregister a kprobe without a scheduler synchronization.
1733  */
1734 static int __unregister_kprobe_top(struct kprobe *p)
1735 {
1736 	struct kprobe *ap, *list_p;
1737 
1738 	/* Disable kprobe. This will disarm it if needed. */
1739 	ap = __disable_kprobe(p);
1740 	if (IS_ERR(ap))
1741 		return PTR_ERR(ap);
1742 
1743 	if (ap == p)
1744 		/*
1745 		 * This probe is an independent(and non-optimized) kprobe
1746 		 * (not an aggrprobe). Remove from the hash list.
1747 		 */
1748 		goto disarmed;
1749 
1750 	/* Following process expects this probe is an aggrprobe */
1751 	WARN_ON(!kprobe_aggrprobe(ap));
1752 
1753 	if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
1754 		/*
1755 		 * !disarmed could be happen if the probe is under delayed
1756 		 * unoptimizing.
1757 		 */
1758 		goto disarmed;
1759 	else {
1760 		/* If disabling probe has special handlers, update aggrprobe */
1761 		if (p->post_handler && !kprobe_gone(p)) {
1762 			list_for_each_entry(list_p, &ap->list, list) {
1763 				if ((list_p != p) && (list_p->post_handler))
1764 					goto noclean;
1765 			}
1766 			ap->post_handler = NULL;
1767 		}
1768 noclean:
1769 		/*
1770 		 * Remove from the aggrprobe: this path will do nothing in
1771 		 * __unregister_kprobe_bottom().
1772 		 */
1773 		list_del_rcu(&p->list);
1774 		if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
1775 			/*
1776 			 * Try to optimize this probe again, because post
1777 			 * handler may have been changed.
1778 			 */
1779 			optimize_kprobe(ap);
1780 	}
1781 	return 0;
1782 
1783 disarmed:
1784 	hlist_del_rcu(&ap->hlist);
1785 	return 0;
1786 }
1787 
1788 static void __unregister_kprobe_bottom(struct kprobe *p)
1789 {
1790 	struct kprobe *ap;
1791 
1792 	if (list_empty(&p->list))
1793 		/* This is an independent kprobe */
1794 		arch_remove_kprobe(p);
1795 	else if (list_is_singular(&p->list)) {
1796 		/* This is the last child of an aggrprobe */
1797 		ap = list_entry(p->list.next, struct kprobe, list);
1798 		list_del(&p->list);
1799 		free_aggr_kprobe(ap);
1800 	}
1801 	/* Otherwise, do nothing. */
1802 }
1803 
1804 int register_kprobes(struct kprobe **kps, int num)
1805 {
1806 	int i, ret = 0;
1807 
1808 	if (num <= 0)
1809 		return -EINVAL;
1810 	for (i = 0; i < num; i++) {
1811 		ret = register_kprobe(kps[i]);
1812 		if (ret < 0) {
1813 			if (i > 0)
1814 				unregister_kprobes(kps, i);
1815 			break;
1816 		}
1817 	}
1818 	return ret;
1819 }
1820 EXPORT_SYMBOL_GPL(register_kprobes);
1821 
1822 void unregister_kprobe(struct kprobe *p)
1823 {
1824 	unregister_kprobes(&p, 1);
1825 }
1826 EXPORT_SYMBOL_GPL(unregister_kprobe);
1827 
1828 void unregister_kprobes(struct kprobe **kps, int num)
1829 {
1830 	int i;
1831 
1832 	if (num <= 0)
1833 		return;
1834 	mutex_lock(&kprobe_mutex);
1835 	for (i = 0; i < num; i++)
1836 		if (__unregister_kprobe_top(kps[i]) < 0)
1837 			kps[i]->addr = NULL;
1838 	mutex_unlock(&kprobe_mutex);
1839 
1840 	synchronize_rcu();
1841 	for (i = 0; i < num; i++)
1842 		if (kps[i]->addr)
1843 			__unregister_kprobe_bottom(kps[i]);
1844 }
1845 EXPORT_SYMBOL_GPL(unregister_kprobes);
1846 
1847 int __weak kprobe_exceptions_notify(struct notifier_block *self,
1848 					unsigned long val, void *data)
1849 {
1850 	return NOTIFY_DONE;
1851 }
1852 NOKPROBE_SYMBOL(kprobe_exceptions_notify);
1853 
1854 static struct notifier_block kprobe_exceptions_nb = {
1855 	.notifier_call = kprobe_exceptions_notify,
1856 	.priority = 0x7fffffff /* we need to be notified first */
1857 };
1858 
1859 unsigned long __weak arch_deref_entry_point(void *entry)
1860 {
1861 	return (unsigned long)entry;
1862 }
1863 
1864 #ifdef CONFIG_KRETPROBES
1865 
1866 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
1867 					     void *trampoline_address,
1868 					     void *frame_pointer)
1869 {
1870 	kprobe_opcode_t *correct_ret_addr = NULL;
1871 	struct kretprobe_instance *ri = NULL;
1872 	struct llist_node *first, *node;
1873 	struct kretprobe *rp;
1874 
1875 	/* Find all nodes for this frame. */
1876 	first = node = current->kretprobe_instances.first;
1877 	while (node) {
1878 		ri = container_of(node, struct kretprobe_instance, llist);
1879 
1880 		BUG_ON(ri->fp != frame_pointer);
1881 
1882 		if (ri->ret_addr != trampoline_address) {
1883 			correct_ret_addr = ri->ret_addr;
1884 			/*
1885 			 * This is the real return address. Any other
1886 			 * instances associated with this task are for
1887 			 * other calls deeper on the call stack
1888 			 */
1889 			goto found;
1890 		}
1891 
1892 		node = node->next;
1893 	}
1894 	pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n");
1895 	BUG_ON(1);
1896 
1897 found:
1898 	/* Unlink all nodes for this frame. */
1899 	current->kretprobe_instances.first = node->next;
1900 	node->next = NULL;
1901 
1902 	/* Run them..  */
1903 	while (first) {
1904 		ri = container_of(first, struct kretprobe_instance, llist);
1905 		first = first->next;
1906 
1907 		rp = get_kretprobe(ri);
1908 		if (rp && rp->handler) {
1909 			struct kprobe *prev = kprobe_running();
1910 
1911 			__this_cpu_write(current_kprobe, &rp->kp);
1912 			ri->ret_addr = correct_ret_addr;
1913 			rp->handler(ri, regs);
1914 			__this_cpu_write(current_kprobe, prev);
1915 		}
1916 
1917 		recycle_rp_inst(ri);
1918 	}
1919 
1920 	return (unsigned long)correct_ret_addr;
1921 }
1922 NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
1923 
1924 /*
1925  * This kprobe pre_handler is registered with every kretprobe. When probe
1926  * hits it will set up the return probe.
1927  */
1928 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
1929 {
1930 	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
1931 	struct kretprobe_instance *ri;
1932 	struct freelist_node *fn;
1933 
1934 	fn = freelist_try_get(&rp->freelist);
1935 	if (!fn) {
1936 		rp->nmissed++;
1937 		return 0;
1938 	}
1939 
1940 	ri = container_of(fn, struct kretprobe_instance, freelist);
1941 
1942 	if (rp->entry_handler && rp->entry_handler(ri, regs)) {
1943 		freelist_add(&ri->freelist, &rp->freelist);
1944 		return 0;
1945 	}
1946 
1947 	arch_prepare_kretprobe(ri, regs);
1948 
1949 	__llist_add(&ri->llist, &current->kretprobe_instances);
1950 
1951 	return 0;
1952 }
1953 NOKPROBE_SYMBOL(pre_handler_kretprobe);
1954 
1955 bool __weak arch_kprobe_on_func_entry(unsigned long offset)
1956 {
1957 	return !offset;
1958 }
1959 
1960 /**
1961  * kprobe_on_func_entry() -- check whether given address is function entry
1962  * @addr: Target address
1963  * @sym:  Target symbol name
1964  * @offset: The offset from the symbol or the address
1965  *
1966  * This checks whether the given @addr+@offset or @sym+@offset is on the
1967  * function entry address or not.
1968  * This returns 0 if it is the function entry, or -EINVAL if it is not.
1969  * And also it returns -ENOENT if it fails the symbol or address lookup.
1970  * Caller must pass @addr or @sym (either one must be NULL), or this
1971  * returns -EINVAL.
1972  */
1973 int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
1974 {
1975 	kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
1976 
1977 	if (IS_ERR(kp_addr))
1978 		return PTR_ERR(kp_addr);
1979 
1980 	if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset))
1981 		return -ENOENT;
1982 
1983 	if (!arch_kprobe_on_func_entry(offset))
1984 		return -EINVAL;
1985 
1986 	return 0;
1987 }
1988 
1989 int register_kretprobe(struct kretprobe *rp)
1990 {
1991 	int ret;
1992 	struct kretprobe_instance *inst;
1993 	int i;
1994 	void *addr;
1995 
1996 	ret = kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset);
1997 	if (ret)
1998 		return ret;
1999 
2000 	/* If only 'rp->kp.addr' is specified, check reregistering kprobes */
2001 	if (rp->kp.addr && warn_kprobe_rereg(&rp->kp))
2002 		return -EINVAL;
2003 
2004 	if (kretprobe_blacklist_size) {
2005 		addr = kprobe_addr(&rp->kp);
2006 		if (IS_ERR(addr))
2007 			return PTR_ERR(addr);
2008 
2009 		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
2010 			if (kretprobe_blacklist[i].addr == addr)
2011 				return -EINVAL;
2012 		}
2013 	}
2014 
2015 	rp->kp.pre_handler = pre_handler_kretprobe;
2016 	rp->kp.post_handler = NULL;
2017 
2018 	/* Pre-allocate memory for max kretprobe instances */
2019 	if (rp->maxactive <= 0) {
2020 #ifdef CONFIG_PREEMPTION
2021 		rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
2022 #else
2023 		rp->maxactive = num_possible_cpus();
2024 #endif
2025 	}
2026 	rp->freelist.head = NULL;
2027 	rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL);
2028 	if (!rp->rph)
2029 		return -ENOMEM;
2030 
2031 	rp->rph->rp = rp;
2032 	for (i = 0; i < rp->maxactive; i++) {
2033 		inst = kzalloc(sizeof(struct kretprobe_instance) +
2034 			       rp->data_size, GFP_KERNEL);
2035 		if (inst == NULL) {
2036 			refcount_set(&rp->rph->ref, i);
2037 			free_rp_inst(rp);
2038 			return -ENOMEM;
2039 		}
2040 		inst->rph = rp->rph;
2041 		freelist_add(&inst->freelist, &rp->freelist);
2042 	}
2043 	refcount_set(&rp->rph->ref, i);
2044 
2045 	rp->nmissed = 0;
2046 	/* Establish function entry probe point */
2047 	ret = register_kprobe(&rp->kp);
2048 	if (ret != 0)
2049 		free_rp_inst(rp);
2050 	return ret;
2051 }
2052 EXPORT_SYMBOL_GPL(register_kretprobe);
2053 
2054 int register_kretprobes(struct kretprobe **rps, int num)
2055 {
2056 	int ret = 0, i;
2057 
2058 	if (num <= 0)
2059 		return -EINVAL;
2060 	for (i = 0; i < num; i++) {
2061 		ret = register_kretprobe(rps[i]);
2062 		if (ret < 0) {
2063 			if (i > 0)
2064 				unregister_kretprobes(rps, i);
2065 			break;
2066 		}
2067 	}
2068 	return ret;
2069 }
2070 EXPORT_SYMBOL_GPL(register_kretprobes);
2071 
2072 void unregister_kretprobe(struct kretprobe *rp)
2073 {
2074 	unregister_kretprobes(&rp, 1);
2075 }
2076 EXPORT_SYMBOL_GPL(unregister_kretprobe);
2077 
2078 void unregister_kretprobes(struct kretprobe **rps, int num)
2079 {
2080 	int i;
2081 
2082 	if (num <= 0)
2083 		return;
2084 	mutex_lock(&kprobe_mutex);
2085 	for (i = 0; i < num; i++) {
2086 		if (__unregister_kprobe_top(&rps[i]->kp) < 0)
2087 			rps[i]->kp.addr = NULL;
2088 		rps[i]->rph->rp = NULL;
2089 	}
2090 	mutex_unlock(&kprobe_mutex);
2091 
2092 	synchronize_rcu();
2093 	for (i = 0; i < num; i++) {
2094 		if (rps[i]->kp.addr) {
2095 			__unregister_kprobe_bottom(&rps[i]->kp);
2096 			free_rp_inst(rps[i]);
2097 		}
2098 	}
2099 }
2100 EXPORT_SYMBOL_GPL(unregister_kretprobes);
2101 
2102 #else /* CONFIG_KRETPROBES */
2103 int register_kretprobe(struct kretprobe *rp)
2104 {
2105 	return -EOPNOTSUPP;
2106 }
2107 EXPORT_SYMBOL_GPL(register_kretprobe);
2108 
2109 int register_kretprobes(struct kretprobe **rps, int num)
2110 {
2111 	return -EOPNOTSUPP;
2112 }
2113 EXPORT_SYMBOL_GPL(register_kretprobes);
2114 
2115 void unregister_kretprobe(struct kretprobe *rp)
2116 {
2117 }
2118 EXPORT_SYMBOL_GPL(unregister_kretprobe);
2119 
2120 void unregister_kretprobes(struct kretprobe **rps, int num)
2121 {
2122 }
2123 EXPORT_SYMBOL_GPL(unregister_kretprobes);
2124 
2125 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2126 {
2127 	return 0;
2128 }
2129 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2130 
2131 #endif /* CONFIG_KRETPROBES */
2132 
2133 /* Set the kprobe gone and remove its instruction buffer. */
2134 static void kill_kprobe(struct kprobe *p)
2135 {
2136 	struct kprobe *kp;
2137 
2138 	lockdep_assert_held(&kprobe_mutex);
2139 
2140 	p->flags |= KPROBE_FLAG_GONE;
2141 	if (kprobe_aggrprobe(p)) {
2142 		/*
2143 		 * If this is an aggr_kprobe, we have to list all the
2144 		 * chained probes and mark them GONE.
2145 		 */
2146 		list_for_each_entry(kp, &p->list, list)
2147 			kp->flags |= KPROBE_FLAG_GONE;
2148 		p->post_handler = NULL;
2149 		kill_optimized_kprobe(p);
2150 	}
2151 	/*
2152 	 * Here, we can remove insn_slot safely, because no thread calls
2153 	 * the original probed function (which will be freed soon) any more.
2154 	 */
2155 	arch_remove_kprobe(p);
2156 
2157 	/*
2158 	 * The module is going away. We should disarm the kprobe which
2159 	 * is using ftrace, because ftrace framework is still available at
2160 	 * 'MODULE_STATE_GOING' notification.
2161 	 */
2162 	if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed)
2163 		disarm_kprobe_ftrace(p);
2164 }
2165 
2166 /* Disable one kprobe */
2167 int disable_kprobe(struct kprobe *kp)
2168 {
2169 	int ret = 0;
2170 	struct kprobe *p;
2171 
2172 	mutex_lock(&kprobe_mutex);
2173 
2174 	/* Disable this kprobe */
2175 	p = __disable_kprobe(kp);
2176 	if (IS_ERR(p))
2177 		ret = PTR_ERR(p);
2178 
2179 	mutex_unlock(&kprobe_mutex);
2180 	return ret;
2181 }
2182 EXPORT_SYMBOL_GPL(disable_kprobe);
2183 
2184 /* Enable one kprobe */
2185 int enable_kprobe(struct kprobe *kp)
2186 {
2187 	int ret = 0;
2188 	struct kprobe *p;
2189 
2190 	mutex_lock(&kprobe_mutex);
2191 
2192 	/* Check whether specified probe is valid. */
2193 	p = __get_valid_kprobe(kp);
2194 	if (unlikely(p == NULL)) {
2195 		ret = -EINVAL;
2196 		goto out;
2197 	}
2198 
2199 	if (kprobe_gone(kp)) {
2200 		/* This kprobe has gone, we couldn't enable it. */
2201 		ret = -EINVAL;
2202 		goto out;
2203 	}
2204 
2205 	if (p != kp)
2206 		kp->flags &= ~KPROBE_FLAG_DISABLED;
2207 
2208 	if (!kprobes_all_disarmed && kprobe_disabled(p)) {
2209 		p->flags &= ~KPROBE_FLAG_DISABLED;
2210 		ret = arm_kprobe(p);
2211 		if (ret)
2212 			p->flags |= KPROBE_FLAG_DISABLED;
2213 	}
2214 out:
2215 	mutex_unlock(&kprobe_mutex);
2216 	return ret;
2217 }
2218 EXPORT_SYMBOL_GPL(enable_kprobe);
2219 
2220 /* Caller must NOT call this in usual path. This is only for critical case */
2221 void dump_kprobe(struct kprobe *kp)
2222 {
2223 	pr_err("Dump kprobe:\n.symbol_name = %s, .offset = %x, .addr = %pS\n",
2224 	       kp->symbol_name, kp->offset, kp->addr);
2225 }
2226 NOKPROBE_SYMBOL(dump_kprobe);
2227 
2228 int kprobe_add_ksym_blacklist(unsigned long entry)
2229 {
2230 	struct kprobe_blacklist_entry *ent;
2231 	unsigned long offset = 0, size = 0;
2232 
2233 	if (!kernel_text_address(entry) ||
2234 	    !kallsyms_lookup_size_offset(entry, &size, &offset))
2235 		return -EINVAL;
2236 
2237 	ent = kmalloc(sizeof(*ent), GFP_KERNEL);
2238 	if (!ent)
2239 		return -ENOMEM;
2240 	ent->start_addr = entry;
2241 	ent->end_addr = entry + size;
2242 	INIT_LIST_HEAD(&ent->list);
2243 	list_add_tail(&ent->list, &kprobe_blacklist);
2244 
2245 	return (int)size;
2246 }
2247 
2248 /* Add all symbols in given area into kprobe blacklist */
2249 int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
2250 {
2251 	unsigned long entry;
2252 	int ret = 0;
2253 
2254 	for (entry = start; entry < end; entry += ret) {
2255 		ret = kprobe_add_ksym_blacklist(entry);
2256 		if (ret < 0)
2257 			return ret;
2258 		if (ret == 0)	/* In case of alias symbol */
2259 			ret = 1;
2260 	}
2261 	return 0;
2262 }
2263 
2264 /* Remove all symbols in given area from kprobe blacklist */
2265 static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
2266 {
2267 	struct kprobe_blacklist_entry *ent, *n;
2268 
2269 	list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) {
2270 		if (ent->start_addr < start || ent->start_addr >= end)
2271 			continue;
2272 		list_del(&ent->list);
2273 		kfree(ent);
2274 	}
2275 }
2276 
2277 static void kprobe_remove_ksym_blacklist(unsigned long entry)
2278 {
2279 	kprobe_remove_area_blacklist(entry, entry + 1);
2280 }
2281 
2282 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
2283 				   char *type, char *sym)
2284 {
2285 	return -ERANGE;
2286 }
2287 
2288 int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
2289 		       char *sym)
2290 {
2291 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
2292 	if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym))
2293 		return 0;
2294 #ifdef CONFIG_OPTPROBES
2295 	if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym))
2296 		return 0;
2297 #endif
2298 #endif
2299 	if (!arch_kprobe_get_kallsym(&symnum, value, type, sym))
2300 		return 0;
2301 	return -ERANGE;
2302 }
2303 
2304 int __init __weak arch_populate_kprobe_blacklist(void)
2305 {
2306 	return 0;
2307 }
2308 
2309 /*
2310  * Lookup and populate the kprobe_blacklist.
2311  *
2312  * Unlike the kretprobe blacklist, we'll need to determine
2313  * the range of addresses that belong to the said functions,
2314  * since a kprobe need not necessarily be at the beginning
2315  * of a function.
2316  */
2317 static int __init populate_kprobe_blacklist(unsigned long *start,
2318 					     unsigned long *end)
2319 {
2320 	unsigned long entry;
2321 	unsigned long *iter;
2322 	int ret;
2323 
2324 	for (iter = start; iter < end; iter++) {
2325 		entry = arch_deref_entry_point((void *)*iter);
2326 		ret = kprobe_add_ksym_blacklist(entry);
2327 		if (ret == -EINVAL)
2328 			continue;
2329 		if (ret < 0)
2330 			return ret;
2331 	}
2332 
2333 	/* Symbols in '__kprobes_text' are blacklisted */
2334 	ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
2335 					(unsigned long)__kprobes_text_end);
2336 	if (ret)
2337 		return ret;
2338 
2339 	/* Symbols in 'noinstr' section are blacklisted */
2340 	ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start,
2341 					(unsigned long)__noinstr_text_end);
2342 
2343 	return ret ? : arch_populate_kprobe_blacklist();
2344 }
2345 
2346 static void add_module_kprobe_blacklist(struct module *mod)
2347 {
2348 	unsigned long start, end;
2349 	int i;
2350 
2351 	if (mod->kprobe_blacklist) {
2352 		for (i = 0; i < mod->num_kprobe_blacklist; i++)
2353 			kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]);
2354 	}
2355 
2356 	start = (unsigned long)mod->kprobes_text_start;
2357 	if (start) {
2358 		end = start + mod->kprobes_text_size;
2359 		kprobe_add_area_blacklist(start, end);
2360 	}
2361 
2362 	start = (unsigned long)mod->noinstr_text_start;
2363 	if (start) {
2364 		end = start + mod->noinstr_text_size;
2365 		kprobe_add_area_blacklist(start, end);
2366 	}
2367 }
2368 
2369 static void remove_module_kprobe_blacklist(struct module *mod)
2370 {
2371 	unsigned long start, end;
2372 	int i;
2373 
2374 	if (mod->kprobe_blacklist) {
2375 		for (i = 0; i < mod->num_kprobe_blacklist; i++)
2376 			kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]);
2377 	}
2378 
2379 	start = (unsigned long)mod->kprobes_text_start;
2380 	if (start) {
2381 		end = start + mod->kprobes_text_size;
2382 		kprobe_remove_area_blacklist(start, end);
2383 	}
2384 
2385 	start = (unsigned long)mod->noinstr_text_start;
2386 	if (start) {
2387 		end = start + mod->noinstr_text_size;
2388 		kprobe_remove_area_blacklist(start, end);
2389 	}
2390 }
2391 
2392 /* Module notifier call back, checking kprobes on the module */
2393 static int kprobes_module_callback(struct notifier_block *nb,
2394 				   unsigned long val, void *data)
2395 {
2396 	struct module *mod = data;
2397 	struct hlist_head *head;
2398 	struct kprobe *p;
2399 	unsigned int i;
2400 	int checkcore = (val == MODULE_STATE_GOING);
2401 
2402 	if (val == MODULE_STATE_COMING) {
2403 		mutex_lock(&kprobe_mutex);
2404 		add_module_kprobe_blacklist(mod);
2405 		mutex_unlock(&kprobe_mutex);
2406 	}
2407 	if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
2408 		return NOTIFY_DONE;
2409 
2410 	/*
2411 	 * When 'MODULE_STATE_GOING' was notified, both of module '.text' and
2412 	 * '.init.text' sections would be freed. When 'MODULE_STATE_LIVE' was
2413 	 * notified, only '.init.text' section would be freed. We need to
2414 	 * disable kprobes which have been inserted in the sections.
2415 	 */
2416 	mutex_lock(&kprobe_mutex);
2417 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2418 		head = &kprobe_table[i];
2419 		hlist_for_each_entry(p, head, hlist)
2420 			if (within_module_init((unsigned long)p->addr, mod) ||
2421 			    (checkcore &&
2422 			     within_module_core((unsigned long)p->addr, mod))) {
2423 				/*
2424 				 * The vaddr this probe is installed will soon
2425 				 * be vfreed buy not synced to disk. Hence,
2426 				 * disarming the breakpoint isn't needed.
2427 				 *
2428 				 * Note, this will also move any optimized probes
2429 				 * that are pending to be removed from their
2430 				 * corresponding lists to the 'freeing_list' and
2431 				 * will not be touched by the delayed
2432 				 * kprobe_optimizer() work handler.
2433 				 */
2434 				kill_kprobe(p);
2435 			}
2436 	}
2437 	if (val == MODULE_STATE_GOING)
2438 		remove_module_kprobe_blacklist(mod);
2439 	mutex_unlock(&kprobe_mutex);
2440 	return NOTIFY_DONE;
2441 }
2442 
2443 static struct notifier_block kprobe_module_nb = {
2444 	.notifier_call = kprobes_module_callback,
2445 	.priority = 0
2446 };
2447 
2448 void kprobe_free_init_mem(void)
2449 {
2450 	void *start = (void *)(&__init_begin);
2451 	void *end = (void *)(&__init_end);
2452 	struct hlist_head *head;
2453 	struct kprobe *p;
2454 	int i;
2455 
2456 	mutex_lock(&kprobe_mutex);
2457 
2458 	/* Kill all kprobes on initmem because the target code has been freed. */
2459 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2460 		head = &kprobe_table[i];
2461 		hlist_for_each_entry(p, head, hlist) {
2462 			if (start <= (void *)p->addr && (void *)p->addr < end)
2463 				kill_kprobe(p);
2464 		}
2465 	}
2466 
2467 	mutex_unlock(&kprobe_mutex);
2468 }
2469 
2470 static int __init init_kprobes(void)
2471 {
2472 	int i, err = 0;
2473 
2474 	/* FIXME allocate the probe table, currently defined statically */
2475 	/* initialize all list heads */
2476 	for (i = 0; i < KPROBE_TABLE_SIZE; i++)
2477 		INIT_HLIST_HEAD(&kprobe_table[i]);
2478 
2479 	err = populate_kprobe_blacklist(__start_kprobe_blacklist,
2480 					__stop_kprobe_blacklist);
2481 	if (err)
2482 		pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err);
2483 
2484 	if (kretprobe_blacklist_size) {
2485 		/* lookup the function address from its name */
2486 		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
2487 			kretprobe_blacklist[i].addr =
2488 				kprobe_lookup_name(kretprobe_blacklist[i].name, 0);
2489 			if (!kretprobe_blacklist[i].addr)
2490 				pr_err("Failed to lookup symbol '%s' for kretprobe blacklist. Maybe the target function is removed or renamed.\n",
2491 				       kretprobe_blacklist[i].name);
2492 		}
2493 	}
2494 
2495 	/* By default, kprobes are armed */
2496 	kprobes_all_disarmed = false;
2497 
2498 #if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT)
2499 	/* Init 'kprobe_optinsn_slots' for allocation */
2500 	kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
2501 #endif
2502 
2503 	err = arch_init_kprobes();
2504 	if (!err)
2505 		err = register_die_notifier(&kprobe_exceptions_nb);
2506 	if (!err)
2507 		err = register_module_notifier(&kprobe_module_nb);
2508 
2509 	kprobes_initialized = (err == 0);
2510 
2511 	if (!err)
2512 		init_test_probes();
2513 	return err;
2514 }
2515 early_initcall(init_kprobes);
2516 
2517 #if defined(CONFIG_OPTPROBES)
2518 static int __init init_optprobes(void)
2519 {
2520 	/*
2521 	 * Enable kprobe optimization - this kicks the optimizer which
2522 	 * depends on synchronize_rcu_tasks() and ksoftirqd, that is
2523 	 * not spawned in early initcall. So delay the optimization.
2524 	 */
2525 	optimize_all_kprobes();
2526 
2527 	return 0;
2528 }
2529 subsys_initcall(init_optprobes);
2530 #endif
2531 
2532 #ifdef CONFIG_DEBUG_FS
2533 static void report_probe(struct seq_file *pi, struct kprobe *p,
2534 		const char *sym, int offset, char *modname, struct kprobe *pp)
2535 {
2536 	char *kprobe_type;
2537 	void *addr = p->addr;
2538 
2539 	if (p->pre_handler == pre_handler_kretprobe)
2540 		kprobe_type = "r";
2541 	else
2542 		kprobe_type = "k";
2543 
2544 	if (!kallsyms_show_value(pi->file->f_cred))
2545 		addr = NULL;
2546 
2547 	if (sym)
2548 		seq_printf(pi, "%px  %s  %s+0x%x  %s ",
2549 			addr, kprobe_type, sym, offset,
2550 			(modname ? modname : " "));
2551 	else	/* try to use %pS */
2552 		seq_printf(pi, "%px  %s  %pS ",
2553 			addr, kprobe_type, p->addr);
2554 
2555 	if (!pp)
2556 		pp = p;
2557 	seq_printf(pi, "%s%s%s%s\n",
2558 		(kprobe_gone(p) ? "[GONE]" : ""),
2559 		((kprobe_disabled(p) && !kprobe_gone(p)) ?  "[DISABLED]" : ""),
2560 		(kprobe_optimized(pp) ? "[OPTIMIZED]" : ""),
2561 		(kprobe_ftrace(pp) ? "[FTRACE]" : ""));
2562 }
2563 
2564 static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
2565 {
2566 	return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
2567 }
2568 
2569 static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
2570 {
2571 	(*pos)++;
2572 	if (*pos >= KPROBE_TABLE_SIZE)
2573 		return NULL;
2574 	return pos;
2575 }
2576 
2577 static void kprobe_seq_stop(struct seq_file *f, void *v)
2578 {
2579 	/* Nothing to do */
2580 }
2581 
2582 static int show_kprobe_addr(struct seq_file *pi, void *v)
2583 {
2584 	struct hlist_head *head;
2585 	struct kprobe *p, *kp;
2586 	const char *sym = NULL;
2587 	unsigned int i = *(loff_t *) v;
2588 	unsigned long offset = 0;
2589 	char *modname, namebuf[KSYM_NAME_LEN];
2590 
2591 	head = &kprobe_table[i];
2592 	preempt_disable();
2593 	hlist_for_each_entry_rcu(p, head, hlist) {
2594 		sym = kallsyms_lookup((unsigned long)p->addr, NULL,
2595 					&offset, &modname, namebuf);
2596 		if (kprobe_aggrprobe(p)) {
2597 			list_for_each_entry_rcu(kp, &p->list, list)
2598 				report_probe(pi, kp, sym, offset, modname, p);
2599 		} else
2600 			report_probe(pi, p, sym, offset, modname, NULL);
2601 	}
2602 	preempt_enable();
2603 	return 0;
2604 }
2605 
2606 static const struct seq_operations kprobes_sops = {
2607 	.start = kprobe_seq_start,
2608 	.next  = kprobe_seq_next,
2609 	.stop  = kprobe_seq_stop,
2610 	.show  = show_kprobe_addr
2611 };
2612 
2613 DEFINE_SEQ_ATTRIBUTE(kprobes);
2614 
2615 /* kprobes/blacklist -- shows which functions can not be probed */
2616 static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
2617 {
2618 	mutex_lock(&kprobe_mutex);
2619 	return seq_list_start(&kprobe_blacklist, *pos);
2620 }
2621 
2622 static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
2623 {
2624 	return seq_list_next(v, &kprobe_blacklist, pos);
2625 }
2626 
2627 static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
2628 {
2629 	struct kprobe_blacklist_entry *ent =
2630 		list_entry(v, struct kprobe_blacklist_entry, list);
2631 
2632 	/*
2633 	 * If '/proc/kallsyms' is not showing kernel address, we won't
2634 	 * show them here either.
2635 	 */
2636 	if (!kallsyms_show_value(m->file->f_cred))
2637 		seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL,
2638 			   (void *)ent->start_addr);
2639 	else
2640 		seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr,
2641 			   (void *)ent->end_addr, (void *)ent->start_addr);
2642 	return 0;
2643 }
2644 
2645 static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v)
2646 {
2647 	mutex_unlock(&kprobe_mutex);
2648 }
2649 
2650 static const struct seq_operations kprobe_blacklist_sops = {
2651 	.start = kprobe_blacklist_seq_start,
2652 	.next  = kprobe_blacklist_seq_next,
2653 	.stop  = kprobe_blacklist_seq_stop,
2654 	.show  = kprobe_blacklist_seq_show,
2655 };
2656 DEFINE_SEQ_ATTRIBUTE(kprobe_blacklist);
2657 
2658 static int arm_all_kprobes(void)
2659 {
2660 	struct hlist_head *head;
2661 	struct kprobe *p;
2662 	unsigned int i, total = 0, errors = 0;
2663 	int err, ret = 0;
2664 
2665 	mutex_lock(&kprobe_mutex);
2666 
2667 	/* If kprobes are armed, just return */
2668 	if (!kprobes_all_disarmed)
2669 		goto already_enabled;
2670 
2671 	/*
2672 	 * optimize_kprobe() called by arm_kprobe() checks
2673 	 * kprobes_all_disarmed, so set kprobes_all_disarmed before
2674 	 * arm_kprobe.
2675 	 */
2676 	kprobes_all_disarmed = false;
2677 	/* Arming kprobes doesn't optimize kprobe itself */
2678 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2679 		head = &kprobe_table[i];
2680 		/* Arm all kprobes on a best-effort basis */
2681 		hlist_for_each_entry(p, head, hlist) {
2682 			if (!kprobe_disabled(p)) {
2683 				err = arm_kprobe(p);
2684 				if (err)  {
2685 					errors++;
2686 					ret = err;
2687 				}
2688 				total++;
2689 			}
2690 		}
2691 	}
2692 
2693 	if (errors)
2694 		pr_warn("Kprobes globally enabled, but failed to enable %d out of %d probes. Please check which kprobes are kept disabled via debugfs.\n",
2695 			errors, total);
2696 	else
2697 		pr_info("Kprobes globally enabled\n");
2698 
2699 already_enabled:
2700 	mutex_unlock(&kprobe_mutex);
2701 	return ret;
2702 }
2703 
2704 static int disarm_all_kprobes(void)
2705 {
2706 	struct hlist_head *head;
2707 	struct kprobe *p;
2708 	unsigned int i, total = 0, errors = 0;
2709 	int err, ret = 0;
2710 
2711 	mutex_lock(&kprobe_mutex);
2712 
2713 	/* If kprobes are already disarmed, just return */
2714 	if (kprobes_all_disarmed) {
2715 		mutex_unlock(&kprobe_mutex);
2716 		return 0;
2717 	}
2718 
2719 	kprobes_all_disarmed = true;
2720 
2721 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2722 		head = &kprobe_table[i];
2723 		/* Disarm all kprobes on a best-effort basis */
2724 		hlist_for_each_entry(p, head, hlist) {
2725 			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
2726 				err = disarm_kprobe(p, false);
2727 				if (err) {
2728 					errors++;
2729 					ret = err;
2730 				}
2731 				total++;
2732 			}
2733 		}
2734 	}
2735 
2736 	if (errors)
2737 		pr_warn("Kprobes globally disabled, but failed to disable %d out of %d probes. Please check which kprobes are kept enabled via debugfs.\n",
2738 			errors, total);
2739 	else
2740 		pr_info("Kprobes globally disabled\n");
2741 
2742 	mutex_unlock(&kprobe_mutex);
2743 
2744 	/* Wait for disarming all kprobes by optimizer */
2745 	wait_for_kprobe_optimizer();
2746 
2747 	return ret;
2748 }
2749 
2750 /*
2751  * XXX: The debugfs bool file interface doesn't allow for callbacks
2752  * when the bool state is switched. We can reuse that facility when
2753  * available
2754  */
2755 static ssize_t read_enabled_file_bool(struct file *file,
2756 	       char __user *user_buf, size_t count, loff_t *ppos)
2757 {
2758 	char buf[3];
2759 
2760 	if (!kprobes_all_disarmed)
2761 		buf[0] = '1';
2762 	else
2763 		buf[0] = '0';
2764 	buf[1] = '\n';
2765 	buf[2] = 0x00;
2766 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
2767 }
2768 
2769 static ssize_t write_enabled_file_bool(struct file *file,
2770 	       const char __user *user_buf, size_t count, loff_t *ppos)
2771 {
2772 	bool enable;
2773 	int ret;
2774 
2775 	ret = kstrtobool_from_user(user_buf, count, &enable);
2776 	if (ret)
2777 		return ret;
2778 
2779 	ret = enable ? arm_all_kprobes() : disarm_all_kprobes();
2780 	if (ret)
2781 		return ret;
2782 
2783 	return count;
2784 }
2785 
2786 static const struct file_operations fops_kp = {
2787 	.read =         read_enabled_file_bool,
2788 	.write =        write_enabled_file_bool,
2789 	.llseek =	default_llseek,
2790 };
2791 
2792 static int __init debugfs_kprobe_init(void)
2793 {
2794 	struct dentry *dir;
2795 
2796 	dir = debugfs_create_dir("kprobes", NULL);
2797 
2798 	debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops);
2799 
2800 	debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp);
2801 
2802 	debugfs_create_file("blacklist", 0400, dir, NULL,
2803 			    &kprobe_blacklist_fops);
2804 
2805 	return 0;
2806 }
2807 
2808 late_initcall(debugfs_kprobe_init);
2809 #endif /* CONFIG_DEBUG_FS */
2810