1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #include <vmlinux.h>
4 #include <bpf/bpf_core_read.h>
5 #include <bpf/bpf_helpers.h>
6 #include <bpf/bpf_tracing.h>
7 
8 #include "profiler.h"
9 #include "err.h"
10 #include "bpf_experimental.h"
11 #include "bpf_compiler.h"
12 
13 #ifndef NULL
14 #define NULL 0
15 #endif
16 
17 #define O_WRONLY 00000001
18 #define O_RDWR 00000002
19 #define O_DIRECTORY 00200000
20 #define __O_TMPFILE 020000000
21 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
22 #define S_IFMT 00170000
23 #define S_IFSOCK 0140000
24 #define S_IFLNK 0120000
25 #define S_IFREG 0100000
26 #define S_IFBLK 0060000
27 #define S_IFDIR 0040000
28 #define S_IFCHR 0020000
29 #define S_IFIFO 0010000
30 #define S_ISUID 0004000
31 #define S_ISGID 0002000
32 #define S_ISVTX 0001000
33 #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
34 #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
35 #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
36 #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
37 #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
38 #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
39 
40 #define KILL_DATA_ARRAY_SIZE 8
41 
42 struct var_kill_data_arr_t {
43 	struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
44 };
45 
46 union any_profiler_data_t {
47 	struct var_exec_data_t var_exec;
48 	struct var_kill_data_t var_kill;
49 	struct var_sysctl_data_t var_sysctl;
50 	struct var_filemod_data_t var_filemod;
51 	struct var_fork_data_t var_fork;
52 	struct var_kill_data_arr_t var_kill_data_arr;
53 };
54 
55 volatile struct profiler_config_struct bpf_config = {};
56 
57 #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
58 #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
59 #define CGROUP_LOGIN_SESSION_INODE \
60 	(bpf_config.cgroup_login_session_inode)
61 #define KILL_SIGNALS (bpf_config.kill_signals_mask)
62 #define STALE_INFO (bpf_config.stale_info_secs)
63 #define INODE_FILTER (bpf_config.inode_filter)
64 #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
65 #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
66 
67 struct kernfs_iattrs___52 {
68 	struct iattr ia_iattr;
69 };
70 
71 struct kernfs_node___52 {
72 	union /* kernfs_node_id */ {
73 		struct {
74 			u32 ino;
75 			u32 generation;
76 		};
77 		u64 id;
78 	} id;
79 };
80 
81 struct {
82 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
83 	__uint(max_entries, 1);
84 	__type(key, u32);
85 	__type(value, union any_profiler_data_t);
86 } data_heap SEC(".maps");
87 
88 struct {
89 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
90 	__uint(key_size, sizeof(int));
91 	__uint(value_size, sizeof(int));
92 } events SEC(".maps");
93 
94 struct {
95 	__uint(type, BPF_MAP_TYPE_HASH);
96 	__uint(max_entries, KILL_DATA_ARRAY_SIZE);
97 	__type(key, u32);
98 	__type(value, struct var_kill_data_arr_t);
99 } var_tpid_to_data SEC(".maps");
100 
101 struct {
102 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
103 	__uint(max_entries, profiler_bpf_max_function_id);
104 	__type(key, u32);
105 	__type(value, struct bpf_func_stats_data);
106 } bpf_func_stats SEC(".maps");
107 
108 struct {
109 	__uint(type, BPF_MAP_TYPE_HASH);
110 	__type(key, u32);
111 	__type(value, bool);
112 	__uint(max_entries, 16);
113 } allowed_devices SEC(".maps");
114 
115 struct {
116 	__uint(type, BPF_MAP_TYPE_HASH);
117 	__type(key, u64);
118 	__type(value, bool);
119 	__uint(max_entries, 1024);
120 } allowed_file_inodes SEC(".maps");
121 
122 struct {
123 	__uint(type, BPF_MAP_TYPE_HASH);
124 	__type(key, u64);
125 	__type(value, bool);
126 	__uint(max_entries, 1024);
127 } allowed_directory_inodes SEC(".maps");
128 
129 struct {
130 	__uint(type, BPF_MAP_TYPE_HASH);
131 	__type(key, u32);
132 	__type(value, bool);
133 	__uint(max_entries, 16);
134 } disallowed_exec_inodes SEC(".maps");
135 
136 #ifndef ARRAY_SIZE
137 #define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0]))
138 #endif
139 
140 static INLINE bool IS_ERR(const void* ptr)
141 {
142 	return IS_ERR_VALUE((unsigned long)ptr);
143 }
144 
145 static INLINE u32 get_userspace_pid()
146 {
147 	return bpf_get_current_pid_tgid() >> 32;
148 }
149 
150 static INLINE bool is_init_process(u32 tgid)
151 {
152 	return tgid == 1 || tgid == 0;
153 }
154 
155 static INLINE unsigned long
156 probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
157 {
158 	len = len < max ? len : max;
159 	if (len > 1) {
160 		if (bpf_probe_read_kernel(dst, len, src))
161 			return 0;
162 	} else if (len == 1) {
163 		if (bpf_probe_read_kernel(dst, 1, src))
164 			return 0;
165 	}
166 	return len;
167 }
168 
169 static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
170 				     int spid)
171 {
172 #ifdef UNROLL
173 	__pragma_loop_unroll
174 #endif
175 	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
176 		if (arr_struct->array[i].meta.pid == spid)
177 			return i;
178 	return -1;
179 }
180 
181 static INLINE void populate_ancestors(struct task_struct* task,
182 				      struct ancestors_data_t* ancestors_data)
183 {
184 	struct task_struct* parent = task;
185 	u32 num_ancestors, ppid;
186 
187 	ancestors_data->num_ancestors = 0;
188 #ifdef UNROLL
189 	__pragma_loop_unroll
190 #endif
191 	for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
192 		parent = BPF_CORE_READ(parent, real_parent);
193 		if (parent == NULL)
194 			break;
195 		ppid = BPF_CORE_READ(parent, tgid);
196 		if (is_init_process(ppid))
197 			break;
198 		ancestors_data->ancestor_pids[num_ancestors] = ppid;
199 		ancestors_data->ancestor_exec_ids[num_ancestors] =
200 			BPF_CORE_READ(parent, self_exec_id);
201 		ancestors_data->ancestor_start_times[num_ancestors] =
202 			BPF_CORE_READ(parent, start_time);
203 		ancestors_data->num_ancestors = num_ancestors;
204 	}
205 }
206 
207 static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
208 					  struct kernfs_node* cgroup_root_node,
209 					  void* payload,
210 					  int* root_pos)
211 {
212 	void* payload_start = payload;
213 	size_t filepart_length;
214 
215 #ifdef UNROLL
216 	__pragma_loop_unroll
217 #endif
218 	for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
219 		filepart_length =
220 			bpf_probe_read_kernel_str(payload, MAX_PATH,
221 						  BPF_CORE_READ(cgroup_node, name));
222 		if (!cgroup_node)
223 			return payload;
224 		if (cgroup_node == cgroup_root_node)
225 			*root_pos = payload - payload_start;
226 		if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) {
227 			payload += filepart_length;
228 		}
229 		cgroup_node = BPF_CORE_READ(cgroup_node, parent);
230 	}
231 	return payload;
232 }
233 
234 static ino_t get_inode_from_kernfs(struct kernfs_node* node)
235 {
236 	struct kernfs_node___52* node52 = (void*)node;
237 
238 	if (bpf_core_field_exists(node52->id.ino)) {
239 		barrier_var(node52);
240 		return BPF_CORE_READ(node52, id.ino);
241 	} else {
242 		barrier_var(node);
243 		return (u64)BPF_CORE_READ(node, id);
244 	}
245 }
246 
247 extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
248 enum cgroup_subsys_id___local {
249 	pids_cgrp_id___local = 123, /* value doesn't matter */
250 };
251 
252 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
253 					 struct task_struct* task,
254 					 void* payload)
255 {
256 	struct kernfs_node* root_kernfs =
257 		BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
258 	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
259 
260 #if __has_builtin(__builtin_preserve_enum_value)
261 	if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
262 		int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
263 						  pids_cgrp_id___local);
264 #ifdef UNROLL
265 		__pragma_loop_unroll
266 #endif
267 		for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
268 			struct cgroup_subsys_state* subsys =
269 				BPF_CORE_READ(task, cgroups, subsys[i]);
270 			if (subsys != NULL) {
271 				int subsys_id = BPF_CORE_READ(subsys, ss, id);
272 				if (subsys_id == cgrp_id) {
273 					proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
274 					root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
275 					break;
276 				}
277 			}
278 		}
279 	}
280 #endif
281 
282 	cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
283 	cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
284 
285 	if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
286 		cgroup_data->cgroup_root_mtime =
287 			BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
288 		cgroup_data->cgroup_proc_mtime =
289 			BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
290 	} else {
291 		struct kernfs_iattrs___52* root_iattr =
292 			(struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
293 		cgroup_data->cgroup_root_mtime =
294 			BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
295 
296 		struct kernfs_iattrs___52* proc_iattr =
297 			(struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
298 		cgroup_data->cgroup_proc_mtime =
299 			BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
300 	}
301 
302 	cgroup_data->cgroup_root_length = 0;
303 	cgroup_data->cgroup_proc_length = 0;
304 	cgroup_data->cgroup_full_length = 0;
305 
306 	size_t cgroup_root_length =
307 		bpf_probe_read_kernel_str(payload, MAX_PATH,
308 					  BPF_CORE_READ(root_kernfs, name));
309 	if (bpf_cmp_likely(cgroup_root_length, <=, MAX_PATH)) {
310 		cgroup_data->cgroup_root_length = cgroup_root_length;
311 		payload += cgroup_root_length;
312 	}
313 
314 	size_t cgroup_proc_length =
315 		bpf_probe_read_kernel_str(payload, MAX_PATH,
316 					  BPF_CORE_READ(proc_kernfs, name));
317 	if (bpf_cmp_likely(cgroup_proc_length, <=, MAX_PATH)) {
318 		cgroup_data->cgroup_proc_length = cgroup_proc_length;
319 		payload += cgroup_proc_length;
320 	}
321 
322 	if (FETCH_CGROUPS_FROM_BPF) {
323 		cgroup_data->cgroup_full_path_root_pos = -1;
324 		void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
325 							      &cgroup_data->cgroup_full_path_root_pos);
326 		cgroup_data->cgroup_full_length = payload_end_pos - payload;
327 		payload = payload_end_pos;
328 	}
329 
330 	return (void*)payload;
331 }
332 
333 static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
334 					  struct task_struct* task,
335 					  u32 pid, void* payload)
336 {
337 	u64 uid_gid = bpf_get_current_uid_gid();
338 
339 	metadata->uid = (u32)uid_gid;
340 	metadata->gid = uid_gid >> 32;
341 	metadata->pid = pid;
342 	metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
343 	metadata->start_time = BPF_CORE_READ(task, start_time);
344 	metadata->comm_length = 0;
345 
346 	size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
347 	if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
348 		metadata->comm_length = comm_length;
349 		payload += comm_length;
350 	}
351 
352 	return (void*)payload;
353 }
354 
355 static INLINE struct var_kill_data_t*
356 get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
357 {
358 	int zero = 0;
359 	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
360 
361 	if (kill_data == NULL)
362 		return NULL;
363 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
364 
365 	void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
366 	payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
367 	size_t payload_length = payload - (void*)kill_data->payload;
368 	kill_data->payload_length = payload_length;
369 	populate_ancestors(task, &kill_data->ancestors_info);
370 	kill_data->meta.type = KILL_EVENT;
371 	kill_data->kill_target_pid = tpid;
372 	kill_data->kill_sig = sig;
373 	kill_data->kill_count = 1;
374 	kill_data->last_kill_time = bpf_ktime_get_ns();
375 	return kill_data;
376 }
377 
378 static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
379 {
380 	if ((KILL_SIGNALS & (1ULL << sig)) == 0)
381 		return 0;
382 
383 	u32 spid = get_userspace_pid();
384 	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
385 
386 	if (arr_struct == NULL) {
387 		struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
388 		int zero = 0;
389 
390 		if (kill_data == NULL)
391 			return 0;
392 		arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
393 		if (arr_struct == NULL)
394 			return 0;
395 		bpf_probe_read_kernel(&arr_struct->array[0],
396 				      sizeof(arr_struct->array[0]), kill_data);
397 	} else {
398 		int index = get_var_spid_index(arr_struct, spid);
399 
400 		if (index == -1) {
401 			struct var_kill_data_t* kill_data =
402 				get_var_kill_data(ctx, spid, tpid, sig);
403 			if (kill_data == NULL)
404 				return 0;
405 #ifdef UNROLL
406 			__pragma_loop_unroll
407 #endif
408 			for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
409 				if (arr_struct->array[i].meta.pid == 0) {
410 					bpf_probe_read_kernel(&arr_struct->array[i],
411 							      sizeof(arr_struct->array[i]),
412 							      kill_data);
413 					bpf_map_update_elem(&var_tpid_to_data, &tpid,
414 							    arr_struct, 0);
415 
416 					return 0;
417 				}
418 			return 0;
419 		}
420 
421 		struct var_kill_data_t* kill_data = &arr_struct->array[index];
422 
423 		u64 delta_sec =
424 			(bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
425 
426 		if (delta_sec < STALE_INFO) {
427 			kill_data->kill_count++;
428 			kill_data->last_kill_time = bpf_ktime_get_ns();
429 			bpf_probe_read_kernel(&arr_struct->array[index],
430 					      sizeof(arr_struct->array[index]),
431 					      kill_data);
432 		} else {
433 			struct var_kill_data_t* kill_data =
434 				get_var_kill_data(ctx, spid, tpid, sig);
435 			if (kill_data == NULL)
436 				return 0;
437 			bpf_probe_read_kernel(&arr_struct->array[index],
438 					      sizeof(arr_struct->array[index]),
439 					      kill_data);
440 		}
441 	}
442 	bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
443 	return 0;
444 }
445 
446 static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
447 				   enum bpf_function_id func_id)
448 {
449 	int func_id_key = func_id;
450 
451 	bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
452 	bpf_stat_ctx->bpf_func_stats_data_val =
453 		bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
454 	if (bpf_stat_ctx->bpf_func_stats_data_val)
455 		bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
456 }
457 
458 static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
459 {
460 	if (bpf_stat_ctx->bpf_func_stats_data_val)
461 		bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
462 			bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
463 }
464 
465 static INLINE void
466 bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
467 				    struct var_metadata_t* meta)
468 {
469 	if (bpf_stat_ctx->bpf_func_stats_data_val) {
470 		bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
471 		meta->bpf_stats_num_perf_events =
472 			bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
473 	}
474 	meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
475 	meta->cpu_id = bpf_get_smp_processor_id();
476 }
477 
478 static INLINE size_t
479 read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
480 {
481 	size_t length = 0;
482 	size_t filepart_length;
483 	struct dentry* parent_dentry;
484 
485 #ifdef UNROLL
486 	__pragma_loop_unroll
487 #endif
488 	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
489 		filepart_length =
490 			bpf_probe_read_kernel_str(payload, MAX_PATH,
491 						  BPF_CORE_READ(filp_dentry, d_name.name));
492 		bpf_nop_mov(filepart_length);
493 		if (bpf_cmp_unlikely(filepart_length, >, MAX_PATH))
494 			break;
495 		payload += filepart_length;
496 		length += filepart_length;
497 
498 		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
499 		if (filp_dentry == parent_dentry)
500 			break;
501 		filp_dentry = parent_dentry;
502 	}
503 
504 	return length;
505 }
506 
507 static INLINE bool
508 is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
509 {
510 	struct dentry* parent_dentry;
511 #ifdef UNROLL
512 	__pragma_loop_unroll
513 #endif
514 	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
515 		u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
516 		bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
517 
518 		if (allowed_dir != NULL)
519 			return true;
520 		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
521 		if (filp_dentry == parent_dentry)
522 			break;
523 		filp_dentry = parent_dentry;
524 	}
525 	return false;
526 }
527 
528 static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
529 						 u32* device_id,
530 						 u64* file_ino)
531 {
532 	u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
533 	*device_id = dev_id;
534 	bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
535 
536 	if (allowed_device == NULL)
537 		return false;
538 
539 	u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
540 	*file_ino = ino;
541 	bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
542 
543 	if (allowed_file == NULL)
544 		if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
545 			return false;
546 	return true;
547 }
548 
549 SEC("kprobe/proc_sys_write")
550 ssize_t BPF_KPROBE(kprobe__proc_sys_write,
551 		   struct file* filp, const char* buf,
552 		   size_t count, loff_t* ppos)
553 {
554 	struct bpf_func_stats_ctx stats_ctx;
555 	bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
556 
557 	u32 pid = get_userspace_pid();
558 	int zero = 0;
559 	struct var_sysctl_data_t* sysctl_data =
560 		bpf_map_lookup_elem(&data_heap, &zero);
561 	if (!sysctl_data)
562 		goto out;
563 
564 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
565 	sysctl_data->meta.type = SYSCTL_EVENT;
566 	void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
567 	payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
568 
569 	populate_ancestors(task, &sysctl_data->ancestors_info);
570 
571 	sysctl_data->sysctl_val_length = 0;
572 	sysctl_data->sysctl_path_length = 0;
573 
574 	size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
575 							     CTL_MAXNAME, buf);
576 	if (bpf_cmp_likely(sysctl_val_length, <=, CTL_MAXNAME)) {
577 		sysctl_data->sysctl_val_length = sysctl_val_length;
578 		payload += sysctl_val_length;
579 	}
580 
581 	size_t sysctl_path_length =
582 		bpf_probe_read_kernel_str(payload, MAX_PATH,
583 					  BPF_CORE_READ(filp, f_path.dentry,
584 							d_name.name));
585 	if (bpf_cmp_likely(sysctl_path_length, <=, MAX_PATH)) {
586 		sysctl_data->sysctl_path_length = sysctl_path_length;
587 		payload += sysctl_path_length;
588 	}
589 
590 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
591 	unsigned long data_len = payload - (void*)sysctl_data;
592 	data_len = data_len > sizeof(struct var_sysctl_data_t)
593 		? sizeof(struct var_sysctl_data_t)
594 		: data_len;
595 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
596 out:
597 	bpf_stats_exit(&stats_ctx);
598 	return 0;
599 }
600 
601 SEC("tracepoint/syscalls/sys_enter_kill")
602 int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx)
603 {
604 	struct bpf_func_stats_ctx stats_ctx;
605 
606 	bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
607 	int pid = ctx->args[0];
608 	int sig = ctx->args[1];
609 	int ret = trace_var_sys_kill(ctx, pid, sig);
610 	bpf_stats_exit(&stats_ctx);
611 	return ret;
612 };
613 
614 SEC("raw_tracepoint/sched_process_exit")
615 int raw_tracepoint__sched_process_exit(void* ctx)
616 {
617 	int zero = 0;
618 	struct bpf_func_stats_ctx stats_ctx;
619 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
620 
621 	u32 tpid = get_userspace_pid();
622 
623 	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
624 	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
625 
626 	if (arr_struct == NULL || kill_data == NULL)
627 		goto out;
628 
629 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
630 	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
631 
632 #ifdef UNROLL
633 	__pragma_loop_unroll
634 #endif
635 	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
636 		struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
637 
638 		if (past_kill_data != NULL && past_kill_data->kill_target_pid == (pid_t)tpid) {
639 			bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
640 					      past_kill_data);
641 			void* payload = kill_data->payload;
642 			size_t offset = kill_data->payload_length;
643 			if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
644 				return 0;
645 			payload += offset;
646 
647 			kill_data->kill_target_name_length = 0;
648 			kill_data->kill_target_cgroup_proc_length = 0;
649 
650 			size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
651 			if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
652 				kill_data->kill_target_name_length = comm_length;
653 				payload += comm_length;
654 			}
655 
656 			size_t cgroup_proc_length =
657 				bpf_probe_read_kernel_str(payload,
658 							  KILL_TARGET_LEN,
659 							  BPF_CORE_READ(proc_kernfs, name));
660 			if (bpf_cmp_likely(cgroup_proc_length, <=, KILL_TARGET_LEN)) {
661 				kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
662 				payload += cgroup_proc_length;
663 			}
664 
665 			bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
666 			unsigned long data_len = (void*)payload - (void*)kill_data;
667 			data_len = data_len > sizeof(struct var_kill_data_t)
668 				? sizeof(struct var_kill_data_t)
669 				: data_len;
670 			bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
671 		}
672 	}
673 	bpf_map_delete_elem(&var_tpid_to_data, &tpid);
674 out:
675 	bpf_stats_exit(&stats_ctx);
676 	return 0;
677 }
678 
679 SEC("raw_tracepoint/sched_process_exec")
680 int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
681 {
682 	struct bpf_func_stats_ctx stats_ctx;
683 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
684 
685 	struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
686 	u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
687 
688 	bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
689 	if (should_filter_binprm != NULL)
690 		goto out;
691 
692 	int zero = 0;
693 	struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
694 	if (!proc_exec_data)
695 		goto out;
696 
697 	if (INODE_FILTER && inode != INODE_FILTER)
698 		return 0;
699 
700 	u32 pid = get_userspace_pid();
701 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
702 
703 	proc_exec_data->meta.type = EXEC_EVENT;
704 	proc_exec_data->bin_path_length = 0;
705 	proc_exec_data->cmdline_length = 0;
706 	proc_exec_data->environment_length = 0;
707 	void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
708 					      proc_exec_data->payload);
709 	payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
710 
711 	struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
712 	proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
713 	proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
714 	proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
715 	proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
716 
717 	const char* filename = BPF_CORE_READ(bprm, filename);
718 	size_t bin_path_length =
719 		bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
720 	if (bpf_cmp_likely(bin_path_length, <=, MAX_FILENAME_LEN)) {
721 		proc_exec_data->bin_path_length = bin_path_length;
722 		payload += bin_path_length;
723 	}
724 
725 	void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
726 	void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
727 	unsigned int cmdline_length = probe_read_lim(payload, arg_start,
728 						     arg_end - arg_start, MAX_ARGS_LEN);
729 
730 	if (bpf_cmp_likely(cmdline_length, <=, MAX_ARGS_LEN)) {
731 		proc_exec_data->cmdline_length = cmdline_length;
732 		payload += cmdline_length;
733 	}
734 
735 	if (READ_ENVIRON_FROM_EXEC) {
736 		void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
737 		void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
738 		unsigned long env_len = probe_read_lim(payload, env_start,
739 						       env_end - env_start, MAX_ENVIRON_LEN);
740 		if (cmdline_length <= MAX_ENVIRON_LEN) {
741 			proc_exec_data->environment_length = env_len;
742 			payload += env_len;
743 		}
744 	}
745 
746 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
747 	unsigned long data_len = payload - (void*)proc_exec_data;
748 	data_len = data_len > sizeof(struct var_exec_data_t)
749 		? sizeof(struct var_exec_data_t)
750 		: data_len;
751 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
752 out:
753 	bpf_stats_exit(&stats_ctx);
754 	return 0;
755 }
756 
757 SEC("kretprobe/do_filp_open")
758 int kprobe_ret__do_filp_open(struct pt_regs* ctx)
759 {
760 	struct bpf_func_stats_ctx stats_ctx;
761 	bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
762 
763 	struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
764 
765 	if (filp == NULL || IS_ERR(filp))
766 		goto out;
767 	unsigned int flags = BPF_CORE_READ(filp, f_flags);
768 	if ((flags & (O_RDWR | O_WRONLY)) == 0)
769 		goto out;
770 	if ((flags & O_TMPFILE) > 0)
771 		goto out;
772 	struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
773 	umode_t mode = BPF_CORE_READ(file_inode, i_mode);
774 	if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
775 	    S_ISSOCK(mode))
776 		goto out;
777 
778 	struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
779 	u32 device_id = 0;
780 	u64 file_ino = 0;
781 	if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
782 		goto out;
783 
784 	int zero = 0;
785 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
786 	if (!filemod_data)
787 		goto out;
788 
789 	u32 pid = get_userspace_pid();
790 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
791 
792 	filemod_data->meta.type = FILEMOD_EVENT;
793 	filemod_data->fmod_type = FMOD_OPEN;
794 	filemod_data->dst_flags = flags;
795 	filemod_data->src_inode = 0;
796 	filemod_data->dst_inode = file_ino;
797 	filemod_data->src_device_id = 0;
798 	filemod_data->dst_device_id = device_id;
799 	filemod_data->src_filepath_length = 0;
800 	filemod_data->dst_filepath_length = 0;
801 
802 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
803 					      filemod_data->payload);
804 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
805 
806 	size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
807 	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
808 		payload += len;
809 		filemod_data->dst_filepath_length = len;
810 	}
811 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
812 	unsigned long data_len = payload - (void*)filemod_data;
813 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
814 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
815 out:
816 	bpf_stats_exit(&stats_ctx);
817 	return 0;
818 }
819 
820 SEC("kprobe/vfs_link")
821 int BPF_KPROBE(kprobe__vfs_link,
822 	       struct dentry* old_dentry, struct mnt_idmap *idmap,
823 	       struct inode* dir, struct dentry* new_dentry,
824 	       struct inode** delegated_inode)
825 {
826 	struct bpf_func_stats_ctx stats_ctx;
827 	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
828 
829 	u32 src_device_id = 0;
830 	u64 src_file_ino = 0;
831 	u32 dst_device_id = 0;
832 	u64 dst_file_ino = 0;
833 	if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
834 	    !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
835 		goto out;
836 
837 	int zero = 0;
838 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
839 	if (!filemod_data)
840 		goto out;
841 
842 	u32 pid = get_userspace_pid();
843 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
844 
845 	filemod_data->meta.type = FILEMOD_EVENT;
846 	filemod_data->fmod_type = FMOD_LINK;
847 	filemod_data->dst_flags = 0;
848 	filemod_data->src_inode = src_file_ino;
849 	filemod_data->dst_inode = dst_file_ino;
850 	filemod_data->src_device_id = src_device_id;
851 	filemod_data->dst_device_id = dst_device_id;
852 	filemod_data->src_filepath_length = 0;
853 	filemod_data->dst_filepath_length = 0;
854 
855 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
856 					      filemod_data->payload);
857 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
858 
859 	size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
860 	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
861 		payload += len;
862 		filemod_data->src_filepath_length = len;
863 	}
864 
865 	len = read_absolute_file_path_from_dentry(new_dentry, payload);
866 	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
867 		payload += len;
868 		filemod_data->dst_filepath_length = len;
869 	}
870 
871 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
872 	unsigned long data_len = payload - (void*)filemod_data;
873 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
874 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
875 out:
876 	bpf_stats_exit(&stats_ctx);
877 	return 0;
878 }
879 
880 SEC("kprobe/vfs_symlink")
881 int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
882 	       const char* oldname)
883 {
884 	struct bpf_func_stats_ctx stats_ctx;
885 	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
886 
887 	u32 dst_device_id = 0;
888 	u64 dst_file_ino = 0;
889 	if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
890 		goto out;
891 
892 	int zero = 0;
893 	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
894 	if (!filemod_data)
895 		goto out;
896 
897 	u32 pid = get_userspace_pid();
898 	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
899 
900 	filemod_data->meta.type = FILEMOD_EVENT;
901 	filemod_data->fmod_type = FMOD_SYMLINK;
902 	filemod_data->dst_flags = 0;
903 	filemod_data->src_inode = 0;
904 	filemod_data->dst_inode = dst_file_ino;
905 	filemod_data->src_device_id = 0;
906 	filemod_data->dst_device_id = dst_device_id;
907 	filemod_data->src_filepath_length = 0;
908 	filemod_data->dst_filepath_length = 0;
909 
910 	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
911 					      filemod_data->payload);
912 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
913 
914 	size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
915 					       oldname);
916 	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
917 		payload += len;
918 		filemod_data->src_filepath_length = len;
919 	}
920 	len = read_absolute_file_path_from_dentry(dentry, payload);
921 	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
922 		payload += len;
923 		filemod_data->dst_filepath_length = len;
924 	}
925 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
926 	unsigned long data_len = payload - (void*)filemod_data;
927 	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
928 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
929 out:
930 	bpf_stats_exit(&stats_ctx);
931 	return 0;
932 }
933 
934 SEC("raw_tracepoint/sched_process_fork")
935 int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
936 {
937 	struct bpf_func_stats_ctx stats_ctx;
938 	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
939 
940 	int zero = 0;
941 	struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
942 	if (!fork_data)
943 		goto out;
944 
945 	struct task_struct* parent = (struct task_struct*)ctx->args[0];
946 	struct task_struct* child = (struct task_struct*)ctx->args[1];
947 	fork_data->meta.type = FORK_EVENT;
948 
949 	void* payload = populate_var_metadata(&fork_data->meta, child,
950 					      BPF_CORE_READ(child, pid), fork_data->payload);
951 	fork_data->parent_pid = BPF_CORE_READ(parent, pid);
952 	fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
953 	fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
954 	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
955 
956 	unsigned long data_len = payload - (void*)fork_data;
957 	data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
958 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
959 out:
960 	bpf_stats_exit(&stats_ctx);
961 	return 0;
962 }
963 char _license[] SEC("license") = "GPL";
964