1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * sysctl.c: General linux system control interface
4  *
5  * Begun 24 March 1995, Stephen Tweedie
6  * Added /proc support, Dec 1995
7  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10  * Dynamic registration fixes, Stephen Tweedie.
11  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13  *  Horn.
14  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17  *  Wendling.
18  * The list_for_each() macro wasn't appropriate for the sysctl loop.
19  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
20  */
21 
22 #include <linux/module.h>
23 #include <linux/aio.h>
24 #include <linux/mm.h>
25 #include <linux/swap.h>
26 #include <linux/slab.h>
27 #include <linux/sysctl.h>
28 #include <linux/bitmap.h>
29 #include <linux/signal.h>
30 #include <linux/printk.h>
31 #include <linux/proc_fs.h>
32 #include <linux/security.h>
33 #include <linux/ctype.h>
34 #include <linux/kmemleak.h>
35 #include <linux/fs.h>
36 #include <linux/init.h>
37 #include <linux/kernel.h>
38 #include <linux/kobject.h>
39 #include <linux/net.h>
40 #include <linux/sysrq.h>
41 #include <linux/highuid.h>
42 #include <linux/writeback.h>
43 #include <linux/ratelimit.h>
44 #include <linux/compaction.h>
45 #include <linux/hugetlb.h>
46 #include <linux/initrd.h>
47 #include <linux/key.h>
48 #include <linux/times.h>
49 #include <linux/limits.h>
50 #include <linux/dcache.h>
51 #include <linux/dnotify.h>
52 #include <linux/syscalls.h>
53 #include <linux/vmstat.h>
54 #include <linux/nfs_fs.h>
55 #include <linux/acpi.h>
56 #include <linux/reboot.h>
57 #include <linux/ftrace.h>
58 #include <linux/perf_event.h>
59 #include <linux/kprobes.h>
60 #include <linux/pipe_fs_i.h>
61 #include <linux/oom.h>
62 #include <linux/kmod.h>
63 #include <linux/capability.h>
64 #include <linux/binfmts.h>
65 #include <linux/sched/sysctl.h>
66 #include <linux/sched/coredump.h>
67 #include <linux/kexec.h>
68 #include <linux/bpf.h>
69 #include <linux/mount.h>
70 #include <linux/userfaultfd_k.h>
71 #include <linux/coredump.h>
72 #include <linux/latencytop.h>
73 #include <linux/pid.h>
74 
75 #include "../lib/kstrtox.h"
76 
77 #include <linux/uaccess.h>
78 #include <asm/processor.h>
79 
80 #ifdef CONFIG_X86
81 #include <asm/nmi.h>
82 #include <asm/stacktrace.h>
83 #include <asm/io.h>
84 #endif
85 #ifdef CONFIG_SPARC
86 #include <asm/setup.h>
87 #endif
88 #ifdef CONFIG_BSD_PROCESS_ACCT
89 #include <linux/acct.h>
90 #endif
91 #ifdef CONFIG_RT_MUTEXES
92 #include <linux/rtmutex.h>
93 #endif
94 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
95 #include <linux/lockdep.h>
96 #endif
97 #ifdef CONFIG_CHR_DEV_SG
98 #include <scsi/sg.h>
99 #endif
100 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
101 #include <linux/stackleak.h>
102 #endif
103 #ifdef CONFIG_LOCKUP_DETECTOR
104 #include <linux/nmi.h>
105 #endif
106 
107 #if defined(CONFIG_SYSCTL)
108 
109 /* Constants used for minimum and  maximum */
110 #ifdef CONFIG_LOCKUP_DETECTOR
111 static int sixty = 60;
112 #endif
113 
114 static int __maybe_unused neg_one = -1;
115 static int __maybe_unused two = 2;
116 static int __maybe_unused four = 4;
117 static unsigned long zero_ul;
118 static unsigned long one_ul = 1;
119 static unsigned long long_max = LONG_MAX;
120 static int one_hundred = 100;
121 static int two_hundred = 200;
122 static int one_thousand = 1000;
123 #ifdef CONFIG_PRINTK
124 static int ten_thousand = 10000;
125 #endif
126 #ifdef CONFIG_PERF_EVENTS
127 static int six_hundred_forty_kb = 640 * 1024;
128 #endif
129 
130 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
131 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
132 
133 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
134 static int maxolduid = 65535;
135 static int minolduid;
136 
137 static int ngroups_max = NGROUPS_MAX;
138 static const int cap_last_cap = CAP_LAST_CAP;
139 
140 /*
141  * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
142  * and hung_task_check_interval_secs
143  */
144 #ifdef CONFIG_DETECT_HUNG_TASK
145 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
146 #endif
147 
148 #ifdef CONFIG_INOTIFY_USER
149 #include <linux/inotify.h>
150 #endif
151 #ifdef CONFIG_FANOTIFY
152 #include <linux/fanotify.h>
153 #endif
154 
155 #ifdef CONFIG_PROC_SYSCTL
156 
157 /**
158  * enum sysctl_writes_mode - supported sysctl write modes
159  *
160  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
161  *	to be written, and multiple writes on the same sysctl file descriptor
162  *	will rewrite the sysctl value, regardless of file position. No warning
163  *	is issued when the initial position is not 0.
164  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
165  *	not 0.
166  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
167  *	file position 0 and the value must be fully contained in the buffer
168  *	sent to the write syscall. If dealing with strings respect the file
169  *	position, but restrict this to the max length of the buffer, anything
170  *	passed the max length will be ignored. Multiple writes will append
171  *	to the buffer.
172  *
173  * These write modes control how current file position affects the behavior of
174  * updating sysctl values through the proc interface on each write.
175  */
176 enum sysctl_writes_mode {
177 	SYSCTL_WRITES_LEGACY		= -1,
178 	SYSCTL_WRITES_WARN		= 0,
179 	SYSCTL_WRITES_STRICT		= 1,
180 };
181 
182 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
183 #endif /* CONFIG_PROC_SYSCTL */
184 
185 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
186     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
187 int sysctl_legacy_va_layout;
188 #endif
189 
190 #ifdef CONFIG_COMPACTION
191 static int min_extfrag_threshold;
192 static int max_extfrag_threshold = 1000;
193 #endif
194 
195 #endif /* CONFIG_SYSCTL */
196 
197 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
bpf_stats_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)198 static int bpf_stats_handler(struct ctl_table *table, int write,
199 			     void *buffer, size_t *lenp, loff_t *ppos)
200 {
201 	struct static_key *key = (struct static_key *)table->data;
202 	static int saved_val;
203 	int val, ret;
204 	struct ctl_table tmp = {
205 		.data   = &val,
206 		.maxlen = sizeof(val),
207 		.mode   = table->mode,
208 		.extra1 = SYSCTL_ZERO,
209 		.extra2 = SYSCTL_ONE,
210 	};
211 
212 	if (write && !capable(CAP_SYS_ADMIN))
213 		return -EPERM;
214 
215 	mutex_lock(&bpf_stats_enabled_mutex);
216 	val = saved_val;
217 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
218 	if (write && !ret && val != saved_val) {
219 		if (val)
220 			static_key_slow_inc(key);
221 		else
222 			static_key_slow_dec(key);
223 		saved_val = val;
224 	}
225 	mutex_unlock(&bpf_stats_enabled_mutex);
226 	return ret;
227 }
228 #endif
229 
230 /*
231  * /proc/sys support
232  */
233 
234 #ifdef CONFIG_PROC_SYSCTL
235 
_proc_do_string(char * data,int maxlen,int write,char * buffer,size_t * lenp,loff_t * ppos)236 static int _proc_do_string(char *data, int maxlen, int write,
237 		char *buffer, size_t *lenp, loff_t *ppos)
238 {
239 	size_t len;
240 	char c, *p;
241 
242 	if (!data || !maxlen || !*lenp) {
243 		*lenp = 0;
244 		return 0;
245 	}
246 
247 	if (write) {
248 		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
249 			/* Only continue writes not past the end of buffer. */
250 			len = strlen(data);
251 			if (len > maxlen - 1)
252 				len = maxlen - 1;
253 
254 			if (*ppos > len)
255 				return 0;
256 			len = *ppos;
257 		} else {
258 			/* Start writing from beginning of buffer. */
259 			len = 0;
260 		}
261 
262 		*ppos += *lenp;
263 		p = buffer;
264 		while ((p - buffer) < *lenp && len < maxlen - 1) {
265 			c = *(p++);
266 			if (c == 0 || c == '\n')
267 				break;
268 			data[len++] = c;
269 		}
270 		data[len] = 0;
271 	} else {
272 		len = strlen(data);
273 		if (len > maxlen)
274 			len = maxlen;
275 
276 		if (*ppos > len) {
277 			*lenp = 0;
278 			return 0;
279 		}
280 
281 		data += *ppos;
282 		len  -= *ppos;
283 
284 		if (len > *lenp)
285 			len = *lenp;
286 		if (len)
287 			memcpy(buffer, data, len);
288 		if (len < *lenp) {
289 			buffer[len] = '\n';
290 			len++;
291 		}
292 		*lenp = len;
293 		*ppos += len;
294 	}
295 	return 0;
296 }
297 
warn_sysctl_write(struct ctl_table * table)298 static void warn_sysctl_write(struct ctl_table *table)
299 {
300 	pr_warn_once("%s wrote to %s when file position was not 0!\n"
301 		"This will not be supported in the future. To silence this\n"
302 		"warning, set kernel.sysctl_writes_strict = -1\n",
303 		current->comm, table->procname);
304 }
305 
306 /**
307  * proc_first_pos_non_zero_ignore - check if first position is allowed
308  * @ppos: file position
309  * @table: the sysctl table
310  *
311  * Returns true if the first position is non-zero and the sysctl_writes_strict
312  * mode indicates this is not allowed for numeric input types. String proc
313  * handlers can ignore the return value.
314  */
proc_first_pos_non_zero_ignore(loff_t * ppos,struct ctl_table * table)315 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
316 					   struct ctl_table *table)
317 {
318 	if (!*ppos)
319 		return false;
320 
321 	switch (sysctl_writes_strict) {
322 	case SYSCTL_WRITES_STRICT:
323 		return true;
324 	case SYSCTL_WRITES_WARN:
325 		warn_sysctl_write(table);
326 		return false;
327 	default:
328 		return false;
329 	}
330 }
331 
332 /**
333  * proc_dostring - read a string sysctl
334  * @table: the sysctl table
335  * @write: %TRUE if this is a write to the sysctl file
336  * @buffer: the user buffer
337  * @lenp: the size of the user buffer
338  * @ppos: file position
339  *
340  * Reads/writes a string from/to the user buffer. If the kernel
341  * buffer provided is not large enough to hold the string, the
342  * string is truncated. The copied string is %NULL-terminated.
343  * If the string is being read by the user process, it is copied
344  * and a newline '\n' is added. It is truncated if the buffer is
345  * not large enough.
346  *
347  * Returns 0 on success.
348  */
proc_dostring(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)349 int proc_dostring(struct ctl_table *table, int write,
350 		  void *buffer, size_t *lenp, loff_t *ppos)
351 {
352 	if (write)
353 		proc_first_pos_non_zero_ignore(ppos, table);
354 
355 	return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
356 			ppos);
357 }
358 
proc_skip_spaces(char ** buf)359 static size_t proc_skip_spaces(char **buf)
360 {
361 	size_t ret;
362 	char *tmp = skip_spaces(*buf);
363 	ret = tmp - *buf;
364 	*buf = tmp;
365 	return ret;
366 }
367 
proc_skip_char(char ** buf,size_t * size,const char v)368 static void proc_skip_char(char **buf, size_t *size, const char v)
369 {
370 	while (*size) {
371 		if (**buf != v)
372 			break;
373 		(*size)--;
374 		(*buf)++;
375 	}
376 }
377 
378 /**
379  * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
380  *                   fail on overflow
381  *
382  * @cp: kernel buffer containing the string to parse
383  * @endp: pointer to store the trailing characters
384  * @base: the base to use
385  * @res: where the parsed integer will be stored
386  *
387  * In case of success 0 is returned and @res will contain the parsed integer,
388  * @endp will hold any trailing characters.
389  * This function will fail the parse on overflow. If there wasn't an overflow
390  * the function will defer the decision what characters count as invalid to the
391  * caller.
392  */
strtoul_lenient(const char * cp,char ** endp,unsigned int base,unsigned long * res)393 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
394 			   unsigned long *res)
395 {
396 	unsigned long long result;
397 	unsigned int rv;
398 
399 	cp = _parse_integer_fixup_radix(cp, &base);
400 	rv = _parse_integer(cp, base, &result);
401 	if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
402 		return -ERANGE;
403 
404 	cp += rv;
405 
406 	if (endp)
407 		*endp = (char *)cp;
408 
409 	*res = (unsigned long)result;
410 	return 0;
411 }
412 
413 #define TMPBUFLEN 22
414 /**
415  * proc_get_long - reads an ASCII formatted integer from a user buffer
416  *
417  * @buf: a kernel buffer
418  * @size: size of the kernel buffer
419  * @val: this is where the number will be stored
420  * @neg: set to %TRUE if number is negative
421  * @perm_tr: a vector which contains the allowed trailers
422  * @perm_tr_len: size of the perm_tr vector
423  * @tr: pointer to store the trailer character
424  *
425  * In case of success %0 is returned and @buf and @size are updated with
426  * the amount of bytes read. If @tr is non-NULL and a trailing
427  * character exists (size is non-zero after returning from this
428  * function), @tr is updated with the trailing character.
429  */
proc_get_long(char ** buf,size_t * size,unsigned long * val,bool * neg,const char * perm_tr,unsigned perm_tr_len,char * tr)430 static int proc_get_long(char **buf, size_t *size,
431 			  unsigned long *val, bool *neg,
432 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
433 {
434 	int len;
435 	char *p, tmp[TMPBUFLEN];
436 
437 	if (!*size)
438 		return -EINVAL;
439 
440 	len = *size;
441 	if (len > TMPBUFLEN - 1)
442 		len = TMPBUFLEN - 1;
443 
444 	memcpy(tmp, *buf, len);
445 
446 	tmp[len] = 0;
447 	p = tmp;
448 	if (*p == '-' && *size > 1) {
449 		*neg = true;
450 		p++;
451 	} else
452 		*neg = false;
453 	if (!isdigit(*p))
454 		return -EINVAL;
455 
456 	if (strtoul_lenient(p, &p, 0, val))
457 		return -EINVAL;
458 
459 	len = p - tmp;
460 
461 	/* We don't know if the next char is whitespace thus we may accept
462 	 * invalid integers (e.g. 1234...a) or two integers instead of one
463 	 * (e.g. 123...1). So lets not allow such large numbers. */
464 	if (len == TMPBUFLEN - 1)
465 		return -EINVAL;
466 
467 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
468 		return -EINVAL;
469 
470 	if (tr && (len < *size))
471 		*tr = *p;
472 
473 	*buf += len;
474 	*size -= len;
475 
476 	return 0;
477 }
478 
479 /**
480  * proc_put_long - converts an integer to a decimal ASCII formatted string
481  *
482  * @buf: the user buffer
483  * @size: the size of the user buffer
484  * @val: the integer to be converted
485  * @neg: sign of the number, %TRUE for negative
486  *
487  * In case of success @buf and @size are updated with the amount of bytes
488  * written.
489  */
proc_put_long(void ** buf,size_t * size,unsigned long val,bool neg)490 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
491 {
492 	int len;
493 	char tmp[TMPBUFLEN], *p = tmp;
494 
495 	sprintf(p, "%s%lu", neg ? "-" : "", val);
496 	len = strlen(tmp);
497 	if (len > *size)
498 		len = *size;
499 	memcpy(*buf, tmp, len);
500 	*size -= len;
501 	*buf += len;
502 }
503 #undef TMPBUFLEN
504 
proc_put_char(void ** buf,size_t * size,char c)505 static void proc_put_char(void **buf, size_t *size, char c)
506 {
507 	if (*size) {
508 		char **buffer = (char **)buf;
509 		**buffer = c;
510 
511 		(*size)--;
512 		(*buffer)++;
513 		*buf = *buffer;
514 	}
515 }
516 
do_proc_dointvec_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)517 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
518 				 int *valp,
519 				 int write, void *data)
520 {
521 	if (write) {
522 		if (*negp) {
523 			if (*lvalp > (unsigned long) INT_MAX + 1)
524 				return -EINVAL;
525 			*valp = -*lvalp;
526 		} else {
527 			if (*lvalp > (unsigned long) INT_MAX)
528 				return -EINVAL;
529 			*valp = *lvalp;
530 		}
531 	} else {
532 		int val = *valp;
533 		if (val < 0) {
534 			*negp = true;
535 			*lvalp = -(unsigned long)val;
536 		} else {
537 			*negp = false;
538 			*lvalp = (unsigned long)val;
539 		}
540 	}
541 	return 0;
542 }
543 
do_proc_douintvec_conv(unsigned long * lvalp,unsigned int * valp,int write,void * data)544 static int do_proc_douintvec_conv(unsigned long *lvalp,
545 				  unsigned int *valp,
546 				  int write, void *data)
547 {
548 	if (write) {
549 		if (*lvalp > UINT_MAX)
550 			return -EINVAL;
551 		*valp = *lvalp;
552 	} else {
553 		unsigned int val = *valp;
554 		*lvalp = (unsigned long)val;
555 	}
556 	return 0;
557 }
558 
559 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
560 
__do_proc_dointvec(void * tbl_data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(bool * negp,unsigned long * lvalp,int * valp,int write,void * data),void * data)561 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
562 		  int write, void *buffer,
563 		  size_t *lenp, loff_t *ppos,
564 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
565 			      int write, void *data),
566 		  void *data)
567 {
568 	int *i, vleft, first = 1, err = 0;
569 	size_t left;
570 	char *p;
571 
572 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
573 		*lenp = 0;
574 		return 0;
575 	}
576 
577 	i = (int *) tbl_data;
578 	vleft = table->maxlen / sizeof(*i);
579 	left = *lenp;
580 
581 	if (!conv)
582 		conv = do_proc_dointvec_conv;
583 
584 	if (write) {
585 		if (proc_first_pos_non_zero_ignore(ppos, table))
586 			goto out;
587 
588 		if (left > PAGE_SIZE - 1)
589 			left = PAGE_SIZE - 1;
590 		p = buffer;
591 	}
592 
593 	for (; left && vleft--; i++, first=0) {
594 		unsigned long lval;
595 		bool neg;
596 
597 		if (write) {
598 			left -= proc_skip_spaces(&p);
599 
600 			if (!left)
601 				break;
602 			err = proc_get_long(&p, &left, &lval, &neg,
603 					     proc_wspace_sep,
604 					     sizeof(proc_wspace_sep), NULL);
605 			if (err)
606 				break;
607 			if (conv(&neg, &lval, i, 1, data)) {
608 				err = -EINVAL;
609 				break;
610 			}
611 		} else {
612 			if (conv(&neg, &lval, i, 0, data)) {
613 				err = -EINVAL;
614 				break;
615 			}
616 			if (!first)
617 				proc_put_char(&buffer, &left, '\t');
618 			proc_put_long(&buffer, &left, lval, neg);
619 		}
620 	}
621 
622 	if (!write && !first && left && !err)
623 		proc_put_char(&buffer, &left, '\n');
624 	if (write && !err && left)
625 		left -= proc_skip_spaces(&p);
626 	if (write && first)
627 		return err ? : -EINVAL;
628 	*lenp -= left;
629 out:
630 	*ppos += *lenp;
631 	return err;
632 }
633 
do_proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(bool * negp,unsigned long * lvalp,int * valp,int write,void * data),void * data)634 static int do_proc_dointvec(struct ctl_table *table, int write,
635 		  void *buffer, size_t *lenp, loff_t *ppos,
636 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
637 			      int write, void *data),
638 		  void *data)
639 {
640 	return __do_proc_dointvec(table->data, table, write,
641 			buffer, lenp, ppos, conv, data);
642 }
643 
do_proc_douintvec_w(unsigned int * tbl_data,struct ctl_table * table,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)644 static int do_proc_douintvec_w(unsigned int *tbl_data,
645 			       struct ctl_table *table,
646 			       void *buffer,
647 			       size_t *lenp, loff_t *ppos,
648 			       int (*conv)(unsigned long *lvalp,
649 					   unsigned int *valp,
650 					   int write, void *data),
651 			       void *data)
652 {
653 	unsigned long lval;
654 	int err = 0;
655 	size_t left;
656 	bool neg;
657 	char *p = buffer;
658 
659 	left = *lenp;
660 
661 	if (proc_first_pos_non_zero_ignore(ppos, table))
662 		goto bail_early;
663 
664 	if (left > PAGE_SIZE - 1)
665 		left = PAGE_SIZE - 1;
666 
667 	left -= proc_skip_spaces(&p);
668 	if (!left) {
669 		err = -EINVAL;
670 		goto out_free;
671 	}
672 
673 	err = proc_get_long(&p, &left, &lval, &neg,
674 			     proc_wspace_sep,
675 			     sizeof(proc_wspace_sep), NULL);
676 	if (err || neg) {
677 		err = -EINVAL;
678 		goto out_free;
679 	}
680 
681 	if (conv(&lval, tbl_data, 1, data)) {
682 		err = -EINVAL;
683 		goto out_free;
684 	}
685 
686 	if (!err && left)
687 		left -= proc_skip_spaces(&p);
688 
689 out_free:
690 	if (err)
691 		return -EINVAL;
692 
693 	return 0;
694 
695 	/* This is in keeping with old __do_proc_dointvec() */
696 bail_early:
697 	*ppos += *lenp;
698 	return err;
699 }
700 
do_proc_douintvec_r(unsigned int * tbl_data,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)701 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
702 			       size_t *lenp, loff_t *ppos,
703 			       int (*conv)(unsigned long *lvalp,
704 					   unsigned int *valp,
705 					   int write, void *data),
706 			       void *data)
707 {
708 	unsigned long lval;
709 	int err = 0;
710 	size_t left;
711 
712 	left = *lenp;
713 
714 	if (conv(&lval, tbl_data, 0, data)) {
715 		err = -EINVAL;
716 		goto out;
717 	}
718 
719 	proc_put_long(&buffer, &left, lval, false);
720 	if (!left)
721 		goto out;
722 
723 	proc_put_char(&buffer, &left, '\n');
724 
725 out:
726 	*lenp -= left;
727 	*ppos += *lenp;
728 
729 	return err;
730 }
731 
__do_proc_douintvec(void * tbl_data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)732 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
733 			       int write, void *buffer,
734 			       size_t *lenp, loff_t *ppos,
735 			       int (*conv)(unsigned long *lvalp,
736 					   unsigned int *valp,
737 					   int write, void *data),
738 			       void *data)
739 {
740 	unsigned int *i, vleft;
741 
742 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
743 		*lenp = 0;
744 		return 0;
745 	}
746 
747 	i = (unsigned int *) tbl_data;
748 	vleft = table->maxlen / sizeof(*i);
749 
750 	/*
751 	 * Arrays are not supported, keep this simple. *Do not* add
752 	 * support for them.
753 	 */
754 	if (vleft != 1) {
755 		*lenp = 0;
756 		return -EINVAL;
757 	}
758 
759 	if (!conv)
760 		conv = do_proc_douintvec_conv;
761 
762 	if (write)
763 		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
764 					   conv, data);
765 	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
766 }
767 
do_proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)768 static int do_proc_douintvec(struct ctl_table *table, int write,
769 			     void *buffer, size_t *lenp, loff_t *ppos,
770 			     int (*conv)(unsigned long *lvalp,
771 					 unsigned int *valp,
772 					 int write, void *data),
773 			     void *data)
774 {
775 	return __do_proc_douintvec(table->data, table, write,
776 				   buffer, lenp, ppos, conv, data);
777 }
778 
779 /**
780  * proc_dointvec - read a vector of integers
781  * @table: the sysctl table
782  * @write: %TRUE if this is a write to the sysctl file
783  * @buffer: the user buffer
784  * @lenp: the size of the user buffer
785  * @ppos: file position
786  *
787  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
788  * values from/to the user buffer, treated as an ASCII string.
789  *
790  * Returns 0 on success.
791  */
proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)792 int proc_dointvec(struct ctl_table *table, int write, void *buffer,
793 		  size_t *lenp, loff_t *ppos)
794 {
795 	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
796 }
797 
798 #ifdef CONFIG_COMPACTION
proc_dointvec_minmax_warn_RT_change(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)799 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
800 		int write, void *buffer, size_t *lenp, loff_t *ppos)
801 {
802 	int ret, old;
803 
804 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
805 		return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
806 
807 	old = *(int *)table->data;
808 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
809 	if (ret)
810 		return ret;
811 	if (old != *(int *)table->data)
812 		pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
813 			     table->procname, current->comm,
814 			     task_pid_nr(current));
815 	return ret;
816 }
817 #endif
818 
819 /**
820  * proc_douintvec - read a vector of unsigned integers
821  * @table: the sysctl table
822  * @write: %TRUE if this is a write to the sysctl file
823  * @buffer: the user buffer
824  * @lenp: the size of the user buffer
825  * @ppos: file position
826  *
827  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
828  * values from/to the user buffer, treated as an ASCII string.
829  *
830  * Returns 0 on success.
831  */
proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)832 int proc_douintvec(struct ctl_table *table, int write, void *buffer,
833 		size_t *lenp, loff_t *ppos)
834 {
835 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
836 				 do_proc_douintvec_conv, NULL);
837 }
838 
839 /*
840  * Taint values can only be increased
841  * This means we can safely use a temporary.
842  */
proc_taint(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)843 static int proc_taint(struct ctl_table *table, int write,
844 			       void *buffer, size_t *lenp, loff_t *ppos)
845 {
846 	struct ctl_table t;
847 	unsigned long tmptaint = get_taint();
848 	int err;
849 
850 	if (write && !capable(CAP_SYS_ADMIN))
851 		return -EPERM;
852 
853 	t = *table;
854 	t.data = &tmptaint;
855 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
856 	if (err < 0)
857 		return err;
858 
859 	if (write) {
860 		int i;
861 
862 		/*
863 		 * If we are relying on panic_on_taint not producing
864 		 * false positives due to userspace input, bail out
865 		 * before setting the requested taint flags.
866 		 */
867 		if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
868 			return -EINVAL;
869 
870 		/*
871 		 * Poor man's atomic or. Not worth adding a primitive
872 		 * to everyone's atomic.h for this
873 		 */
874 		for (i = 0; i < TAINT_FLAGS_COUNT; i++)
875 			if ((1UL << i) & tmptaint)
876 				add_taint(i, LOCKDEP_STILL_OK);
877 	}
878 
879 	return err;
880 }
881 
882 #ifdef CONFIG_PRINTK
proc_dointvec_minmax_sysadmin(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)883 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
884 				void *buffer, size_t *lenp, loff_t *ppos)
885 {
886 	if (write && !capable(CAP_SYS_ADMIN))
887 		return -EPERM;
888 
889 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
890 }
891 #endif
892 
893 /**
894  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
895  * @min: pointer to minimum allowable value
896  * @max: pointer to maximum allowable value
897  *
898  * The do_proc_dointvec_minmax_conv_param structure provides the
899  * minimum and maximum values for doing range checking for those sysctl
900  * parameters that use the proc_dointvec_minmax() handler.
901  */
902 struct do_proc_dointvec_minmax_conv_param {
903 	int *min;
904 	int *max;
905 };
906 
do_proc_dointvec_minmax_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)907 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
908 					int *valp,
909 					int write, void *data)
910 {
911 	int tmp, ret;
912 	struct do_proc_dointvec_minmax_conv_param *param = data;
913 	/*
914 	 * If writing, first do so via a temporary local int so we can
915 	 * bounds-check it before touching *valp.
916 	 */
917 	int *ip = write ? &tmp : valp;
918 
919 	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
920 	if (ret)
921 		return ret;
922 
923 	if (write) {
924 		if ((param->min && *param->min > tmp) ||
925 		    (param->max && *param->max < tmp))
926 			return -EINVAL;
927 		*valp = tmp;
928 	}
929 
930 	return 0;
931 }
932 
933 /**
934  * proc_dointvec_minmax - read a vector of integers with min/max values
935  * @table: the sysctl table
936  * @write: %TRUE if this is a write to the sysctl file
937  * @buffer: the user buffer
938  * @lenp: the size of the user buffer
939  * @ppos: file position
940  *
941  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
942  * values from/to the user buffer, treated as an ASCII string.
943  *
944  * This routine will ensure the values are within the range specified by
945  * table->extra1 (min) and table->extra2 (max).
946  *
947  * Returns 0 on success or -EINVAL on write when the range check fails.
948  */
proc_dointvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)949 int proc_dointvec_minmax(struct ctl_table *table, int write,
950 		  void *buffer, size_t *lenp, loff_t *ppos)
951 {
952 	struct do_proc_dointvec_minmax_conv_param param = {
953 		.min = (int *) table->extra1,
954 		.max = (int *) table->extra2,
955 	};
956 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
957 				do_proc_dointvec_minmax_conv, &param);
958 }
959 
960 /**
961  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
962  * @min: pointer to minimum allowable value
963  * @max: pointer to maximum allowable value
964  *
965  * The do_proc_douintvec_minmax_conv_param structure provides the
966  * minimum and maximum values for doing range checking for those sysctl
967  * parameters that use the proc_douintvec_minmax() handler.
968  */
969 struct do_proc_douintvec_minmax_conv_param {
970 	unsigned int *min;
971 	unsigned int *max;
972 };
973 
do_proc_douintvec_minmax_conv(unsigned long * lvalp,unsigned int * valp,int write,void * data)974 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
975 					 unsigned int *valp,
976 					 int write, void *data)
977 {
978 	int ret;
979 	unsigned int tmp;
980 	struct do_proc_douintvec_minmax_conv_param *param = data;
981 	/* write via temporary local uint for bounds-checking */
982 	unsigned int *up = write ? &tmp : valp;
983 
984 	ret = do_proc_douintvec_conv(lvalp, up, write, data);
985 	if (ret)
986 		return ret;
987 
988 	if (write) {
989 		if ((param->min && *param->min > tmp) ||
990 		    (param->max && *param->max < tmp))
991 			return -ERANGE;
992 
993 		*valp = tmp;
994 	}
995 
996 	return 0;
997 }
998 
999 /**
1000  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
1001  * @table: the sysctl table
1002  * @write: %TRUE if this is a write to the sysctl file
1003  * @buffer: the user buffer
1004  * @lenp: the size of the user buffer
1005  * @ppos: file position
1006  *
1007  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
1008  * values from/to the user buffer, treated as an ASCII string. Negative
1009  * strings are not allowed.
1010  *
1011  * This routine will ensure the values are within the range specified by
1012  * table->extra1 (min) and table->extra2 (max). There is a final sanity
1013  * check for UINT_MAX to avoid having to support wrap around uses from
1014  * userspace.
1015  *
1016  * Returns 0 on success or -ERANGE on write when the range check fails.
1017  */
proc_douintvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1018 int proc_douintvec_minmax(struct ctl_table *table, int write,
1019 			  void *buffer, size_t *lenp, loff_t *ppos)
1020 {
1021 	struct do_proc_douintvec_minmax_conv_param param = {
1022 		.min = (unsigned int *) table->extra1,
1023 		.max = (unsigned int *) table->extra2,
1024 	};
1025 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
1026 				 do_proc_douintvec_minmax_conv, &param);
1027 }
1028 
1029 /**
1030  * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
1031  * @table: the sysctl table
1032  * @write: %TRUE if this is a write to the sysctl file
1033  * @buffer: the user buffer
1034  * @lenp: the size of the user buffer
1035  * @ppos: file position
1036  *
1037  * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
1038  * values from/to the user buffer, treated as an ASCII string. Negative
1039  * strings are not allowed.
1040  *
1041  * This routine will ensure the values are within the range specified by
1042  * table->extra1 (min) and table->extra2 (max).
1043  *
1044  * Returns 0 on success or an error on write when the range check fails.
1045  */
proc_dou8vec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1046 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1047 			void *buffer, size_t *lenp, loff_t *ppos)
1048 {
1049 	struct ctl_table tmp;
1050 	unsigned int min = 0, max = 255U, val;
1051 	u8 *data = table->data;
1052 	struct do_proc_douintvec_minmax_conv_param param = {
1053 		.min = &min,
1054 		.max = &max,
1055 	};
1056 	int res;
1057 
1058 	/* Do not support arrays yet. */
1059 	if (table->maxlen != sizeof(u8))
1060 		return -EINVAL;
1061 
1062 	if (table->extra1) {
1063 		min = *(unsigned int *) table->extra1;
1064 		if (min > 255U)
1065 			return -EINVAL;
1066 	}
1067 	if (table->extra2) {
1068 		max = *(unsigned int *) table->extra2;
1069 		if (max > 255U)
1070 			return -EINVAL;
1071 	}
1072 
1073 	tmp = *table;
1074 
1075 	tmp.maxlen = sizeof(val);
1076 	tmp.data = &val;
1077 	val = *data;
1078 	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1079 				do_proc_douintvec_minmax_conv, &param);
1080 	if (res)
1081 		return res;
1082 	if (write)
1083 		*data = val;
1084 	return 0;
1085 }
1086 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1087 
do_proc_dopipe_max_size_conv(unsigned long * lvalp,unsigned int * valp,int write,void * data)1088 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
1089 					unsigned int *valp,
1090 					int write, void *data)
1091 {
1092 	if (write) {
1093 		unsigned int val;
1094 
1095 		val = round_pipe_size(*lvalp);
1096 		if (val == 0)
1097 			return -EINVAL;
1098 
1099 		*valp = val;
1100 	} else {
1101 		unsigned int val = *valp;
1102 		*lvalp = (unsigned long) val;
1103 	}
1104 
1105 	return 0;
1106 }
1107 
proc_dopipe_max_size(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1108 static int proc_dopipe_max_size(struct ctl_table *table, int write,
1109 				void *buffer, size_t *lenp, loff_t *ppos)
1110 {
1111 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
1112 				 do_proc_dopipe_max_size_conv, NULL);
1113 }
1114 
validate_coredump_safety(void)1115 static void validate_coredump_safety(void)
1116 {
1117 #ifdef CONFIG_COREDUMP
1118 	if (suid_dumpable == SUID_DUMP_ROOT &&
1119 	    core_pattern[0] != '/' && core_pattern[0] != '|') {
1120 		printk(KERN_WARNING
1121 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
1122 "Pipe handler or fully qualified core dump path required.\n"
1123 "Set kernel.core_pattern before fs.suid_dumpable.\n"
1124 		);
1125 	}
1126 #endif
1127 }
1128 
proc_dointvec_minmax_coredump(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1129 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
1130 		void *buffer, size_t *lenp, loff_t *ppos)
1131 {
1132 	int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1133 	if (!error)
1134 		validate_coredump_safety();
1135 	return error;
1136 }
1137 
1138 #ifdef CONFIG_COREDUMP
proc_dostring_coredump(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1139 static int proc_dostring_coredump(struct ctl_table *table, int write,
1140 		  void *buffer, size_t *lenp, loff_t *ppos)
1141 {
1142 	int error = proc_dostring(table, write, buffer, lenp, ppos);
1143 	if (!error)
1144 		validate_coredump_safety();
1145 	return error;
1146 }
1147 #endif
1148 
1149 #ifdef CONFIG_MAGIC_SYSRQ
sysrq_sysctl_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1150 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1151 				void *buffer, size_t *lenp, loff_t *ppos)
1152 {
1153 	int tmp, ret;
1154 
1155 	tmp = sysrq_mask();
1156 
1157 	ret = __do_proc_dointvec(&tmp, table, write, buffer,
1158 			       lenp, ppos, NULL, NULL);
1159 	if (ret || !write)
1160 		return ret;
1161 
1162 	if (write)
1163 		sysrq_toggle_support(tmp);
1164 
1165 	return 0;
1166 }
1167 #endif
1168 
__do_proc_doulongvec_minmax(void * data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,unsigned long convmul,unsigned long convdiv)1169 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1170 		int write, void *buffer, size_t *lenp, loff_t *ppos,
1171 		unsigned long convmul, unsigned long convdiv)
1172 {
1173 	unsigned long *i, *min, *max;
1174 	int vleft, first = 1, err = 0;
1175 	size_t left;
1176 	char *p;
1177 
1178 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1179 		*lenp = 0;
1180 		return 0;
1181 	}
1182 
1183 	i = (unsigned long *) data;
1184 	min = (unsigned long *) table->extra1;
1185 	max = (unsigned long *) table->extra2;
1186 	vleft = table->maxlen / sizeof(unsigned long);
1187 	left = *lenp;
1188 
1189 	if (write) {
1190 		if (proc_first_pos_non_zero_ignore(ppos, table))
1191 			goto out;
1192 
1193 		if (left > PAGE_SIZE - 1)
1194 			left = PAGE_SIZE - 1;
1195 		p = buffer;
1196 	}
1197 
1198 	for (; left && vleft--; i++, first = 0) {
1199 		unsigned long val;
1200 
1201 		if (write) {
1202 			bool neg;
1203 
1204 			left -= proc_skip_spaces(&p);
1205 			if (!left)
1206 				break;
1207 
1208 			err = proc_get_long(&p, &left, &val, &neg,
1209 					     proc_wspace_sep,
1210 					     sizeof(proc_wspace_sep), NULL);
1211 			if (err)
1212 				break;
1213 			if (neg)
1214 				continue;
1215 			val = convmul * val / convdiv;
1216 			if ((min && val < *min) || (max && val > *max)) {
1217 				err = -EINVAL;
1218 				break;
1219 			}
1220 			*i = val;
1221 		} else {
1222 			val = convdiv * (*i) / convmul;
1223 			if (!first)
1224 				proc_put_char(&buffer, &left, '\t');
1225 			proc_put_long(&buffer, &left, val, false);
1226 		}
1227 	}
1228 
1229 	if (!write && !first && left && !err)
1230 		proc_put_char(&buffer, &left, '\n');
1231 	if (write && !err)
1232 		left -= proc_skip_spaces(&p);
1233 	if (write && first)
1234 		return err ? : -EINVAL;
1235 	*lenp -= left;
1236 out:
1237 	*ppos += *lenp;
1238 	return err;
1239 }
1240 
do_proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,unsigned long convmul,unsigned long convdiv)1241 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1242 		void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1243 		unsigned long convdiv)
1244 {
1245 	return __do_proc_doulongvec_minmax(table->data, table, write,
1246 			buffer, lenp, ppos, convmul, convdiv);
1247 }
1248 
1249 /**
1250  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1251  * @table: the sysctl table
1252  * @write: %TRUE if this is a write to the sysctl file
1253  * @buffer: the user buffer
1254  * @lenp: the size of the user buffer
1255  * @ppos: file position
1256  *
1257  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1258  * values from/to the user buffer, treated as an ASCII string.
1259  *
1260  * This routine will ensure the values are within the range specified by
1261  * table->extra1 (min) and table->extra2 (max).
1262  *
1263  * Returns 0 on success.
1264  */
proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1265 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1266 			   void *buffer, size_t *lenp, loff_t *ppos)
1267 {
1268     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1269 }
1270 
1271 /**
1272  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1273  * @table: the sysctl table
1274  * @write: %TRUE if this is a write to the sysctl file
1275  * @buffer: the user buffer
1276  * @lenp: the size of the user buffer
1277  * @ppos: file position
1278  *
1279  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1280  * values from/to the user buffer, treated as an ASCII string. The values
1281  * are treated as milliseconds, and converted to jiffies when they are stored.
1282  *
1283  * This routine will ensure the values are within the range specified by
1284  * table->extra1 (min) and table->extra2 (max).
1285  *
1286  * Returns 0 on success.
1287  */
proc_doulongvec_ms_jiffies_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1288 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1289 				      void *buffer, size_t *lenp, loff_t *ppos)
1290 {
1291     return do_proc_doulongvec_minmax(table, write, buffer,
1292 				     lenp, ppos, HZ, 1000l);
1293 }
1294 
1295 
do_proc_dointvec_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1296 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1297 					 int *valp,
1298 					 int write, void *data)
1299 {
1300 	if (write) {
1301 		if (*lvalp > INT_MAX / HZ)
1302 			return 1;
1303 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
1304 	} else {
1305 		int val = *valp;
1306 		unsigned long lval;
1307 		if (val < 0) {
1308 			*negp = true;
1309 			lval = -(unsigned long)val;
1310 		} else {
1311 			*negp = false;
1312 			lval = (unsigned long)val;
1313 		}
1314 		*lvalp = lval / HZ;
1315 	}
1316 	return 0;
1317 }
1318 
do_proc_dointvec_userhz_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1319 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1320 						int *valp,
1321 						int write, void *data)
1322 {
1323 	if (write) {
1324 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1325 			return 1;
1326 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1327 	} else {
1328 		int val = *valp;
1329 		unsigned long lval;
1330 		if (val < 0) {
1331 			*negp = true;
1332 			lval = -(unsigned long)val;
1333 		} else {
1334 			*negp = false;
1335 			lval = (unsigned long)val;
1336 		}
1337 		*lvalp = jiffies_to_clock_t(lval);
1338 	}
1339 	return 0;
1340 }
1341 
do_proc_dointvec_ms_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1342 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1343 					    int *valp,
1344 					    int write, void *data)
1345 {
1346 	if (write) {
1347 		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1348 
1349 		if (jif > INT_MAX)
1350 			return 1;
1351 		*valp = (int)jif;
1352 	} else {
1353 		int val = *valp;
1354 		unsigned long lval;
1355 		if (val < 0) {
1356 			*negp = true;
1357 			lval = -(unsigned long)val;
1358 		} else {
1359 			*negp = false;
1360 			lval = (unsigned long)val;
1361 		}
1362 		*lvalp = jiffies_to_msecs(lval);
1363 	}
1364 	return 0;
1365 }
1366 
1367 /**
1368  * proc_dointvec_jiffies - read a vector of integers as seconds
1369  * @table: the sysctl table
1370  * @write: %TRUE if this is a write to the sysctl file
1371  * @buffer: the user buffer
1372  * @lenp: the size of the user buffer
1373  * @ppos: file position
1374  *
1375  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1376  * values from/to the user buffer, treated as an ASCII string.
1377  * The values read are assumed to be in seconds, and are converted into
1378  * jiffies.
1379  *
1380  * Returns 0 on success.
1381  */
proc_dointvec_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1382 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1383 			  void *buffer, size_t *lenp, loff_t *ppos)
1384 {
1385     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1386 		    	    do_proc_dointvec_jiffies_conv,NULL);
1387 }
1388 
1389 /**
1390  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1391  * @table: the sysctl table
1392  * @write: %TRUE if this is a write to the sysctl file
1393  * @buffer: the user buffer
1394  * @lenp: the size of the user buffer
1395  * @ppos: pointer to the file position
1396  *
1397  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1398  * values from/to the user buffer, treated as an ASCII string.
1399  * The values read are assumed to be in 1/USER_HZ seconds, and
1400  * are converted into jiffies.
1401  *
1402  * Returns 0 on success.
1403  */
proc_dointvec_userhz_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1404 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1405 				 void *buffer, size_t *lenp, loff_t *ppos)
1406 {
1407     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1408 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
1409 }
1410 
1411 /**
1412  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1413  * @table: the sysctl table
1414  * @write: %TRUE if this is a write to the sysctl file
1415  * @buffer: the user buffer
1416  * @lenp: the size of the user buffer
1417  * @ppos: file position
1418  * @ppos: the current position in the file
1419  *
1420  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1421  * values from/to the user buffer, treated as an ASCII string.
1422  * The values read are assumed to be in 1/1000 seconds, and
1423  * are converted into jiffies.
1424  *
1425  * Returns 0 on success.
1426  */
proc_dointvec_ms_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1427 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1428 		size_t *lenp, loff_t *ppos)
1429 {
1430 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1431 				do_proc_dointvec_ms_jiffies_conv, NULL);
1432 }
1433 
proc_do_cad_pid(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1434 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1435 		size_t *lenp, loff_t *ppos)
1436 {
1437 	struct pid *new_pid;
1438 	pid_t tmp;
1439 	int r;
1440 
1441 	tmp = pid_vnr(cad_pid);
1442 
1443 	r = __do_proc_dointvec(&tmp, table, write, buffer,
1444 			       lenp, ppos, NULL, NULL);
1445 	if (r || !write)
1446 		return r;
1447 
1448 	new_pid = find_get_pid(tmp);
1449 	if (!new_pid)
1450 		return -ESRCH;
1451 
1452 	put_pid(xchg(&cad_pid, new_pid));
1453 	return 0;
1454 }
1455 
1456 /**
1457  * proc_do_large_bitmap - read/write from/to a large bitmap
1458  * @table: the sysctl table
1459  * @write: %TRUE if this is a write to the sysctl file
1460  * @buffer: the user buffer
1461  * @lenp: the size of the user buffer
1462  * @ppos: file position
1463  *
1464  * The bitmap is stored at table->data and the bitmap length (in bits)
1465  * in table->maxlen.
1466  *
1467  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1468  * large bitmaps may be represented in a compact manner. Writing into
1469  * the file will clear the bitmap then update it with the given input.
1470  *
1471  * Returns 0 on success.
1472  */
proc_do_large_bitmap(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1473 int proc_do_large_bitmap(struct ctl_table *table, int write,
1474 			 void *buffer, size_t *lenp, loff_t *ppos)
1475 {
1476 	int err = 0;
1477 	bool first = 1;
1478 	size_t left = *lenp;
1479 	unsigned long bitmap_len = table->maxlen;
1480 	unsigned long *bitmap = *(unsigned long **) table->data;
1481 	unsigned long *tmp_bitmap = NULL;
1482 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1483 
1484 	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1485 		*lenp = 0;
1486 		return 0;
1487 	}
1488 
1489 	if (write) {
1490 		char *p = buffer;
1491 		size_t skipped = 0;
1492 
1493 		if (left > PAGE_SIZE - 1) {
1494 			left = PAGE_SIZE - 1;
1495 			/* How much of the buffer we'll skip this pass */
1496 			skipped = *lenp - left;
1497 		}
1498 
1499 		tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1500 		if (!tmp_bitmap)
1501 			return -ENOMEM;
1502 		proc_skip_char(&p, &left, '\n');
1503 		while (!err && left) {
1504 			unsigned long val_a, val_b;
1505 			bool neg;
1506 			size_t saved_left;
1507 
1508 			/* In case we stop parsing mid-number, we can reset */
1509 			saved_left = left;
1510 			err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1511 					     sizeof(tr_a), &c);
1512 			/*
1513 			 * If we consumed the entirety of a truncated buffer or
1514 			 * only one char is left (may be a "-"), then stop here,
1515 			 * reset, & come back for more.
1516 			 */
1517 			if ((left <= 1) && skipped) {
1518 				left = saved_left;
1519 				break;
1520 			}
1521 
1522 			if (err)
1523 				break;
1524 			if (val_a >= bitmap_len || neg) {
1525 				err = -EINVAL;
1526 				break;
1527 			}
1528 
1529 			val_b = val_a;
1530 			if (left) {
1531 				p++;
1532 				left--;
1533 			}
1534 
1535 			if (c == '-') {
1536 				err = proc_get_long(&p, &left, &val_b,
1537 						     &neg, tr_b, sizeof(tr_b),
1538 						     &c);
1539 				/*
1540 				 * If we consumed all of a truncated buffer or
1541 				 * then stop here, reset, & come back for more.
1542 				 */
1543 				if (!left && skipped) {
1544 					left = saved_left;
1545 					break;
1546 				}
1547 
1548 				if (err)
1549 					break;
1550 				if (val_b >= bitmap_len || neg ||
1551 				    val_a > val_b) {
1552 					err = -EINVAL;
1553 					break;
1554 				}
1555 				if (left) {
1556 					p++;
1557 					left--;
1558 				}
1559 			}
1560 
1561 			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1562 			first = 0;
1563 			proc_skip_char(&p, &left, '\n');
1564 		}
1565 		left += skipped;
1566 	} else {
1567 		unsigned long bit_a, bit_b = 0;
1568 
1569 		while (left) {
1570 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1571 			if (bit_a >= bitmap_len)
1572 				break;
1573 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
1574 						   bit_a + 1) - 1;
1575 
1576 			if (!first)
1577 				proc_put_char(&buffer, &left, ',');
1578 			proc_put_long(&buffer, &left, bit_a, false);
1579 			if (bit_a != bit_b) {
1580 				proc_put_char(&buffer, &left, '-');
1581 				proc_put_long(&buffer, &left, bit_b, false);
1582 			}
1583 
1584 			first = 0; bit_b++;
1585 		}
1586 		proc_put_char(&buffer, &left, '\n');
1587 	}
1588 
1589 	if (!err) {
1590 		if (write) {
1591 			if (*ppos)
1592 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1593 			else
1594 				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1595 		}
1596 		*lenp -= left;
1597 		*ppos += *lenp;
1598 	}
1599 
1600 	bitmap_free(tmp_bitmap);
1601 	return err;
1602 }
1603 
1604 #else /* CONFIG_PROC_SYSCTL */
1605 
proc_dostring(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1606 int proc_dostring(struct ctl_table *table, int write,
1607 		  void *buffer, size_t *lenp, loff_t *ppos)
1608 {
1609 	return -ENOSYS;
1610 }
1611 
proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1612 int proc_dointvec(struct ctl_table *table, int write,
1613 		  void *buffer, size_t *lenp, loff_t *ppos)
1614 {
1615 	return -ENOSYS;
1616 }
1617 
proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1618 int proc_douintvec(struct ctl_table *table, int write,
1619 		  void *buffer, size_t *lenp, loff_t *ppos)
1620 {
1621 	return -ENOSYS;
1622 }
1623 
proc_dointvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1624 int proc_dointvec_minmax(struct ctl_table *table, int write,
1625 		    void *buffer, size_t *lenp, loff_t *ppos)
1626 {
1627 	return -ENOSYS;
1628 }
1629 
proc_douintvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1630 int proc_douintvec_minmax(struct ctl_table *table, int write,
1631 			  void *buffer, size_t *lenp, loff_t *ppos)
1632 {
1633 	return -ENOSYS;
1634 }
1635 
proc_dou8vec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1636 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1637 			void *buffer, size_t *lenp, loff_t *ppos)
1638 {
1639 	return -ENOSYS;
1640 }
1641 
proc_dointvec_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1642 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1643 		    void *buffer, size_t *lenp, loff_t *ppos)
1644 {
1645 	return -ENOSYS;
1646 }
1647 
proc_dointvec_userhz_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1648 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1649 		    void *buffer, size_t *lenp, loff_t *ppos)
1650 {
1651 	return -ENOSYS;
1652 }
1653 
proc_dointvec_ms_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1654 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1655 			     void *buffer, size_t *lenp, loff_t *ppos)
1656 {
1657 	return -ENOSYS;
1658 }
1659 
proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1660 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1661 		    void *buffer, size_t *lenp, loff_t *ppos)
1662 {
1663 	return -ENOSYS;
1664 }
1665 
proc_doulongvec_ms_jiffies_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1666 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1667 				      void *buffer, size_t *lenp, loff_t *ppos)
1668 {
1669 	return -ENOSYS;
1670 }
1671 
proc_do_large_bitmap(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1672 int proc_do_large_bitmap(struct ctl_table *table, int write,
1673 			 void *buffer, size_t *lenp, loff_t *ppos)
1674 {
1675 	return -ENOSYS;
1676 }
1677 
1678 #endif /* CONFIG_PROC_SYSCTL */
1679 
1680 #if defined(CONFIG_SYSCTL)
proc_do_static_key(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1681 int proc_do_static_key(struct ctl_table *table, int write,
1682 		       void *buffer, size_t *lenp, loff_t *ppos)
1683 {
1684 	struct static_key *key = (struct static_key *)table->data;
1685 	static DEFINE_MUTEX(static_key_mutex);
1686 	int val, ret;
1687 	struct ctl_table tmp = {
1688 		.data   = &val,
1689 		.maxlen = sizeof(val),
1690 		.mode   = table->mode,
1691 		.extra1 = SYSCTL_ZERO,
1692 		.extra2 = SYSCTL_ONE,
1693 	};
1694 
1695 	if (write && !capable(CAP_SYS_ADMIN))
1696 		return -EPERM;
1697 
1698 	mutex_lock(&static_key_mutex);
1699 	val = static_key_enabled(key);
1700 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1701 	if (write && !ret) {
1702 		if (val)
1703 			static_key_enable(key);
1704 		else
1705 			static_key_disable(key);
1706 	}
1707 	mutex_unlock(&static_key_mutex);
1708 	return ret;
1709 }
1710 
1711 static struct ctl_table kern_table[] = {
1712 	{
1713 		.procname	= "sched_child_runs_first",
1714 		.data		= &sysctl_sched_child_runs_first,
1715 		.maxlen		= sizeof(unsigned int),
1716 		.mode		= 0644,
1717 		.proc_handler	= proc_dointvec,
1718 	},
1719 #ifdef CONFIG_SCHEDSTATS
1720 	{
1721 		.procname	= "sched_schedstats",
1722 		.data		= NULL,
1723 		.maxlen		= sizeof(unsigned int),
1724 		.mode		= 0644,
1725 		.proc_handler	= sysctl_schedstats,
1726 		.extra1		= SYSCTL_ZERO,
1727 		.extra2		= SYSCTL_ONE,
1728 	},
1729 #endif /* CONFIG_SCHEDSTATS */
1730 #ifdef CONFIG_NUMA_BALANCING
1731 	{
1732 		.procname	= "numa_balancing",
1733 		.data		= NULL, /* filled in by handler */
1734 		.maxlen		= sizeof(unsigned int),
1735 		.mode		= 0644,
1736 		.proc_handler	= sysctl_numa_balancing,
1737 		.extra1		= SYSCTL_ZERO,
1738 		.extra2		= SYSCTL_ONE,
1739 	},
1740 #endif /* CONFIG_NUMA_BALANCING */
1741 	{
1742 		.procname	= "sched_rt_period_us",
1743 		.data		= &sysctl_sched_rt_period,
1744 		.maxlen		= sizeof(unsigned int),
1745 		.mode		= 0644,
1746 		.proc_handler	= sched_rt_handler,
1747 	},
1748 	{
1749 		.procname	= "sched_rt_runtime_us",
1750 		.data		= &sysctl_sched_rt_runtime,
1751 		.maxlen		= sizeof(int),
1752 		.mode		= 0644,
1753 		.proc_handler	= sched_rt_handler,
1754 	},
1755 	{
1756 		.procname	= "sched_deadline_period_max_us",
1757 		.data		= &sysctl_sched_dl_period_max,
1758 		.maxlen		= sizeof(unsigned int),
1759 		.mode		= 0644,
1760 		.proc_handler	= proc_dointvec,
1761 	},
1762 	{
1763 		.procname	= "sched_deadline_period_min_us",
1764 		.data		= &sysctl_sched_dl_period_min,
1765 		.maxlen		= sizeof(unsigned int),
1766 		.mode		= 0644,
1767 		.proc_handler	= proc_dointvec,
1768 	},
1769 	{
1770 		.procname	= "sched_rr_timeslice_ms",
1771 		.data		= &sysctl_sched_rr_timeslice,
1772 		.maxlen		= sizeof(int),
1773 		.mode		= 0644,
1774 		.proc_handler	= sched_rr_handler,
1775 	},
1776 #ifdef CONFIG_UCLAMP_TASK
1777 	{
1778 		.procname	= "sched_util_clamp_min",
1779 		.data		= &sysctl_sched_uclamp_util_min,
1780 		.maxlen		= sizeof(unsigned int),
1781 		.mode		= 0644,
1782 		.proc_handler	= sysctl_sched_uclamp_handler,
1783 	},
1784 	{
1785 		.procname	= "sched_util_clamp_max",
1786 		.data		= &sysctl_sched_uclamp_util_max,
1787 		.maxlen		= sizeof(unsigned int),
1788 		.mode		= 0644,
1789 		.proc_handler	= sysctl_sched_uclamp_handler,
1790 	},
1791 	{
1792 		.procname	= "sched_util_clamp_min_rt_default",
1793 		.data		= &sysctl_sched_uclamp_util_min_rt_default,
1794 		.maxlen		= sizeof(unsigned int),
1795 		.mode		= 0644,
1796 		.proc_handler	= sysctl_sched_uclamp_handler,
1797 	},
1798 #endif
1799 #ifdef CONFIG_SCHED_AUTOGROUP
1800 	{
1801 		.procname	= "sched_autogroup_enabled",
1802 		.data		= &sysctl_sched_autogroup_enabled,
1803 		.maxlen		= sizeof(unsigned int),
1804 		.mode		= 0644,
1805 		.proc_handler	= proc_dointvec_minmax,
1806 		.extra1		= SYSCTL_ZERO,
1807 		.extra2		= SYSCTL_ONE,
1808 	},
1809 #endif
1810 #ifdef CONFIG_CFS_BANDWIDTH
1811 	{
1812 		.procname	= "sched_cfs_bandwidth_slice_us",
1813 		.data		= &sysctl_sched_cfs_bandwidth_slice,
1814 		.maxlen		= sizeof(unsigned int),
1815 		.mode		= 0644,
1816 		.proc_handler	= proc_dointvec_minmax,
1817 		.extra1		= SYSCTL_ONE,
1818 	},
1819 #endif
1820 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
1821 	{
1822 		.procname	= "sched_energy_aware",
1823 		.data		= &sysctl_sched_energy_aware,
1824 		.maxlen		= sizeof(unsigned int),
1825 		.mode		= 0644,
1826 		.proc_handler	= sched_energy_aware_handler,
1827 		.extra1		= SYSCTL_ZERO,
1828 		.extra2		= SYSCTL_ONE,
1829 	},
1830 #endif
1831 #ifdef CONFIG_PROVE_LOCKING
1832 	{
1833 		.procname	= "prove_locking",
1834 		.data		= &prove_locking,
1835 		.maxlen		= sizeof(int),
1836 		.mode		= 0644,
1837 		.proc_handler	= proc_dointvec,
1838 	},
1839 #endif
1840 #ifdef CONFIG_LOCK_STAT
1841 	{
1842 		.procname	= "lock_stat",
1843 		.data		= &lock_stat,
1844 		.maxlen		= sizeof(int),
1845 		.mode		= 0644,
1846 		.proc_handler	= proc_dointvec,
1847 	},
1848 #endif
1849 	{
1850 		.procname	= "panic",
1851 		.data		= &panic_timeout,
1852 		.maxlen		= sizeof(int),
1853 		.mode		= 0644,
1854 		.proc_handler	= proc_dointvec,
1855 	},
1856 #ifdef CONFIG_COREDUMP
1857 	{
1858 		.procname	= "core_uses_pid",
1859 		.data		= &core_uses_pid,
1860 		.maxlen		= sizeof(int),
1861 		.mode		= 0644,
1862 		.proc_handler	= proc_dointvec,
1863 	},
1864 	{
1865 		.procname	= "core_pattern",
1866 		.data		= core_pattern,
1867 		.maxlen		= CORENAME_MAX_SIZE,
1868 		.mode		= 0644,
1869 		.proc_handler	= proc_dostring_coredump,
1870 	},
1871 	{
1872 		.procname	= "core_pipe_limit",
1873 		.data		= &core_pipe_limit,
1874 		.maxlen		= sizeof(unsigned int),
1875 		.mode		= 0644,
1876 		.proc_handler	= proc_dointvec,
1877 	},
1878 #endif
1879 #ifdef CONFIG_PROC_SYSCTL
1880 	{
1881 		.procname	= "tainted",
1882 		.maxlen 	= sizeof(long),
1883 		.mode		= 0644,
1884 		.proc_handler	= proc_taint,
1885 	},
1886 	{
1887 		.procname	= "sysctl_writes_strict",
1888 		.data		= &sysctl_writes_strict,
1889 		.maxlen		= sizeof(int),
1890 		.mode		= 0644,
1891 		.proc_handler	= proc_dointvec_minmax,
1892 		.extra1		= &neg_one,
1893 		.extra2		= SYSCTL_ONE,
1894 	},
1895 #endif
1896 #ifdef CONFIG_LATENCYTOP
1897 	{
1898 		.procname	= "latencytop",
1899 		.data		= &latencytop_enabled,
1900 		.maxlen		= sizeof(int),
1901 		.mode		= 0644,
1902 		.proc_handler	= sysctl_latencytop,
1903 	},
1904 #endif
1905 #ifdef CONFIG_BLK_DEV_INITRD
1906 	{
1907 		.procname	= "real-root-dev",
1908 		.data		= &real_root_dev,
1909 		.maxlen		= sizeof(int),
1910 		.mode		= 0644,
1911 		.proc_handler	= proc_dointvec,
1912 	},
1913 #endif
1914 	{
1915 		.procname	= "print-fatal-signals",
1916 		.data		= &print_fatal_signals,
1917 		.maxlen		= sizeof(int),
1918 		.mode		= 0644,
1919 		.proc_handler	= proc_dointvec,
1920 	},
1921 #ifdef CONFIG_SPARC
1922 	{
1923 		.procname	= "reboot-cmd",
1924 		.data		= reboot_command,
1925 		.maxlen		= 256,
1926 		.mode		= 0644,
1927 		.proc_handler	= proc_dostring,
1928 	},
1929 	{
1930 		.procname	= "stop-a",
1931 		.data		= &stop_a_enabled,
1932 		.maxlen		= sizeof (int),
1933 		.mode		= 0644,
1934 		.proc_handler	= proc_dointvec,
1935 	},
1936 	{
1937 		.procname	= "scons-poweroff",
1938 		.data		= &scons_pwroff,
1939 		.maxlen		= sizeof (int),
1940 		.mode		= 0644,
1941 		.proc_handler	= proc_dointvec,
1942 	},
1943 #endif
1944 #ifdef CONFIG_SPARC64
1945 	{
1946 		.procname	= "tsb-ratio",
1947 		.data		= &sysctl_tsb_ratio,
1948 		.maxlen		= sizeof (int),
1949 		.mode		= 0644,
1950 		.proc_handler	= proc_dointvec,
1951 	},
1952 #endif
1953 #ifdef CONFIG_PARISC
1954 	{
1955 		.procname	= "soft-power",
1956 		.data		= &pwrsw_enabled,
1957 		.maxlen		= sizeof (int),
1958 		.mode		= 0644,
1959 		.proc_handler	= proc_dointvec,
1960 	},
1961 #endif
1962 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1963 	{
1964 		.procname	= "unaligned-trap",
1965 		.data		= &unaligned_enabled,
1966 		.maxlen		= sizeof (int),
1967 		.mode		= 0644,
1968 		.proc_handler	= proc_dointvec,
1969 	},
1970 #endif
1971 	{
1972 		.procname	= "ctrl-alt-del",
1973 		.data		= &C_A_D,
1974 		.maxlen		= sizeof(int),
1975 		.mode		= 0644,
1976 		.proc_handler	= proc_dointvec,
1977 	},
1978 #ifdef CONFIG_FUNCTION_TRACER
1979 	{
1980 		.procname	= "ftrace_enabled",
1981 		.data		= &ftrace_enabled,
1982 		.maxlen		= sizeof(int),
1983 		.mode		= 0644,
1984 		.proc_handler	= ftrace_enable_sysctl,
1985 	},
1986 #endif
1987 #ifdef CONFIG_STACK_TRACER
1988 	{
1989 		.procname	= "stack_tracer_enabled",
1990 		.data		= &stack_tracer_enabled,
1991 		.maxlen		= sizeof(int),
1992 		.mode		= 0644,
1993 		.proc_handler	= stack_trace_sysctl,
1994 	},
1995 #endif
1996 #ifdef CONFIG_TRACING
1997 	{
1998 		.procname	= "ftrace_dump_on_oops",
1999 		.data		= &ftrace_dump_on_oops,
2000 		.maxlen		= sizeof(int),
2001 		.mode		= 0644,
2002 		.proc_handler	= proc_dointvec,
2003 	},
2004 	{
2005 		.procname	= "traceoff_on_warning",
2006 		.data		= &__disable_trace_on_warning,
2007 		.maxlen		= sizeof(__disable_trace_on_warning),
2008 		.mode		= 0644,
2009 		.proc_handler	= proc_dointvec,
2010 	},
2011 	{
2012 		.procname	= "tracepoint_printk",
2013 		.data		= &tracepoint_printk,
2014 		.maxlen		= sizeof(tracepoint_printk),
2015 		.mode		= 0644,
2016 		.proc_handler	= tracepoint_printk_sysctl,
2017 	},
2018 #endif
2019 #ifdef CONFIG_KEXEC_CORE
2020 	{
2021 		.procname	= "kexec_load_disabled",
2022 		.data		= &kexec_load_disabled,
2023 		.maxlen		= sizeof(int),
2024 		.mode		= 0644,
2025 		/* only handle a transition from default "0" to "1" */
2026 		.proc_handler	= proc_dointvec_minmax,
2027 		.extra1		= SYSCTL_ONE,
2028 		.extra2		= SYSCTL_ONE,
2029 	},
2030 #endif
2031 #ifdef CONFIG_MODULES
2032 	{
2033 		.procname	= "modprobe",
2034 		.data		= &modprobe_path,
2035 		.maxlen		= KMOD_PATH_LEN,
2036 		.mode		= 0644,
2037 		.proc_handler	= proc_dostring,
2038 	},
2039 	{
2040 		.procname	= "modules_disabled",
2041 		.data		= &modules_disabled,
2042 		.maxlen		= sizeof(int),
2043 		.mode		= 0644,
2044 		/* only handle a transition from default "0" to "1" */
2045 		.proc_handler	= proc_dointvec_minmax,
2046 		.extra1		= SYSCTL_ONE,
2047 		.extra2		= SYSCTL_ONE,
2048 	},
2049 #endif
2050 #ifdef CONFIG_UEVENT_HELPER
2051 	{
2052 		.procname	= "hotplug",
2053 		.data		= &uevent_helper,
2054 		.maxlen		= UEVENT_HELPER_PATH_LEN,
2055 		.mode		= 0644,
2056 		.proc_handler	= proc_dostring,
2057 	},
2058 #endif
2059 #ifdef CONFIG_CHR_DEV_SG
2060 	{
2061 		.procname	= "sg-big-buff",
2062 		.data		= &sg_big_buff,
2063 		.maxlen		= sizeof (int),
2064 		.mode		= 0444,
2065 		.proc_handler	= proc_dointvec,
2066 	},
2067 #endif
2068 #ifdef CONFIG_BSD_PROCESS_ACCT
2069 	{
2070 		.procname	= "acct",
2071 		.data		= &acct_parm,
2072 		.maxlen		= 3*sizeof(int),
2073 		.mode		= 0644,
2074 		.proc_handler	= proc_dointvec,
2075 	},
2076 #endif
2077 #ifdef CONFIG_MAGIC_SYSRQ
2078 	{
2079 		.procname	= "sysrq",
2080 		.data		= NULL,
2081 		.maxlen		= sizeof (int),
2082 		.mode		= 0644,
2083 		.proc_handler	= sysrq_sysctl_handler,
2084 	},
2085 #endif
2086 #ifdef CONFIG_PROC_SYSCTL
2087 	{
2088 		.procname	= "cad_pid",
2089 		.data		= NULL,
2090 		.maxlen		= sizeof (int),
2091 		.mode		= 0600,
2092 		.proc_handler	= proc_do_cad_pid,
2093 	},
2094 #endif
2095 	{
2096 		.procname	= "threads-max",
2097 		.data		= NULL,
2098 		.maxlen		= sizeof(int),
2099 		.mode		= 0644,
2100 		.proc_handler	= sysctl_max_threads,
2101 	},
2102 	{
2103 		.procname	= "random",
2104 		.mode		= 0555,
2105 		.child		= random_table,
2106 	},
2107 	{
2108 		.procname	= "usermodehelper",
2109 		.mode		= 0555,
2110 		.child		= usermodehelper_table,
2111 	},
2112 #ifdef CONFIG_FW_LOADER_USER_HELPER
2113 	{
2114 		.procname	= "firmware_config",
2115 		.mode		= 0555,
2116 		.child		= firmware_config_table,
2117 	},
2118 #endif
2119 	{
2120 		.procname	= "overflowuid",
2121 		.data		= &overflowuid,
2122 		.maxlen		= sizeof(int),
2123 		.mode		= 0644,
2124 		.proc_handler	= proc_dointvec_minmax,
2125 		.extra1		= &minolduid,
2126 		.extra2		= &maxolduid,
2127 	},
2128 	{
2129 		.procname	= "overflowgid",
2130 		.data		= &overflowgid,
2131 		.maxlen		= sizeof(int),
2132 		.mode		= 0644,
2133 		.proc_handler	= proc_dointvec_minmax,
2134 		.extra1		= &minolduid,
2135 		.extra2		= &maxolduid,
2136 	},
2137 #ifdef CONFIG_S390
2138 	{
2139 		.procname	= "userprocess_debug",
2140 		.data		= &show_unhandled_signals,
2141 		.maxlen		= sizeof(int),
2142 		.mode		= 0644,
2143 		.proc_handler	= proc_dointvec,
2144 	},
2145 #endif
2146 #ifdef CONFIG_SMP
2147 	{
2148 		.procname	= "oops_all_cpu_backtrace",
2149 		.data		= &sysctl_oops_all_cpu_backtrace,
2150 		.maxlen		= sizeof(int),
2151 		.mode		= 0644,
2152 		.proc_handler	= proc_dointvec_minmax,
2153 		.extra1		= SYSCTL_ZERO,
2154 		.extra2		= SYSCTL_ONE,
2155 	},
2156 #endif /* CONFIG_SMP */
2157 	{
2158 		.procname	= "pid_max",
2159 		.data		= &pid_max,
2160 		.maxlen		= sizeof (int),
2161 		.mode		= 0644,
2162 		.proc_handler	= proc_dointvec_minmax,
2163 		.extra1		= &pid_max_min,
2164 		.extra2		= &pid_max_max,
2165 	},
2166 	{
2167 		.procname	= "panic_on_oops",
2168 		.data		= &panic_on_oops,
2169 		.maxlen		= sizeof(int),
2170 		.mode		= 0644,
2171 		.proc_handler	= proc_dointvec,
2172 	},
2173 	{
2174 		.procname	= "panic_print",
2175 		.data		= &panic_print,
2176 		.maxlen		= sizeof(unsigned long),
2177 		.mode		= 0644,
2178 		.proc_handler	= proc_doulongvec_minmax,
2179 	},
2180 #if defined CONFIG_PRINTK
2181 	{
2182 		.procname	= "printk",
2183 		.data		= &console_loglevel,
2184 		.maxlen		= 4*sizeof(int),
2185 		.mode		= 0644,
2186 		.proc_handler	= proc_dointvec,
2187 	},
2188 	{
2189 		.procname	= "printk_ratelimit",
2190 		.data		= &printk_ratelimit_state.interval,
2191 		.maxlen		= sizeof(int),
2192 		.mode		= 0644,
2193 		.proc_handler	= proc_dointvec_jiffies,
2194 	},
2195 	{
2196 		.procname	= "printk_ratelimit_burst",
2197 		.data		= &printk_ratelimit_state.burst,
2198 		.maxlen		= sizeof(int),
2199 		.mode		= 0644,
2200 		.proc_handler	= proc_dointvec,
2201 	},
2202 	{
2203 		.procname	= "printk_delay",
2204 		.data		= &printk_delay_msec,
2205 		.maxlen		= sizeof(int),
2206 		.mode		= 0644,
2207 		.proc_handler	= proc_dointvec_minmax,
2208 		.extra1		= SYSCTL_ZERO,
2209 		.extra2		= &ten_thousand,
2210 	},
2211 	{
2212 		.procname	= "printk_devkmsg",
2213 		.data		= devkmsg_log_str,
2214 		.maxlen		= DEVKMSG_STR_MAX_SIZE,
2215 		.mode		= 0644,
2216 		.proc_handler	= devkmsg_sysctl_set_loglvl,
2217 	},
2218 	{
2219 		.procname	= "dmesg_restrict",
2220 		.data		= &dmesg_restrict,
2221 		.maxlen		= sizeof(int),
2222 		.mode		= 0644,
2223 		.proc_handler	= proc_dointvec_minmax_sysadmin,
2224 		.extra1		= SYSCTL_ZERO,
2225 		.extra2		= SYSCTL_ONE,
2226 	},
2227 	{
2228 		.procname	= "kptr_restrict",
2229 		.data		= &kptr_restrict,
2230 		.maxlen		= sizeof(int),
2231 		.mode		= 0644,
2232 		.proc_handler	= proc_dointvec_minmax_sysadmin,
2233 		.extra1		= SYSCTL_ZERO,
2234 		.extra2		= &two,
2235 	},
2236 #endif
2237 	{
2238 		.procname	= "ngroups_max",
2239 		.data		= &ngroups_max,
2240 		.maxlen		= sizeof (int),
2241 		.mode		= 0444,
2242 		.proc_handler	= proc_dointvec,
2243 	},
2244 	{
2245 		.procname	= "cap_last_cap",
2246 		.data		= (void *)&cap_last_cap,
2247 		.maxlen		= sizeof(int),
2248 		.mode		= 0444,
2249 		.proc_handler	= proc_dointvec,
2250 	},
2251 #if defined(CONFIG_LOCKUP_DETECTOR)
2252 	{
2253 		.procname       = "watchdog",
2254 		.data		= &watchdog_user_enabled,
2255 		.maxlen		= sizeof(int),
2256 		.mode		= 0644,
2257 		.proc_handler   = proc_watchdog,
2258 		.extra1		= SYSCTL_ZERO,
2259 		.extra2		= SYSCTL_ONE,
2260 	},
2261 	{
2262 		.procname	= "watchdog_thresh",
2263 		.data		= &watchdog_thresh,
2264 		.maxlen		= sizeof(int),
2265 		.mode		= 0644,
2266 		.proc_handler	= proc_watchdog_thresh,
2267 		.extra1		= SYSCTL_ZERO,
2268 		.extra2		= &sixty,
2269 	},
2270 	{
2271 		.procname       = "nmi_watchdog",
2272 		.data		= &nmi_watchdog_user_enabled,
2273 		.maxlen		= sizeof(int),
2274 		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
2275 		.proc_handler   = proc_nmi_watchdog,
2276 		.extra1		= SYSCTL_ZERO,
2277 		.extra2		= SYSCTL_ONE,
2278 	},
2279 	{
2280 		.procname	= "watchdog_cpumask",
2281 		.data		= &watchdog_cpumask_bits,
2282 		.maxlen		= NR_CPUS,
2283 		.mode		= 0644,
2284 		.proc_handler	= proc_watchdog_cpumask,
2285 	},
2286 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
2287 	{
2288 		.procname       = "soft_watchdog",
2289 		.data		= &soft_watchdog_user_enabled,
2290 		.maxlen		= sizeof(int),
2291 		.mode		= 0644,
2292 		.proc_handler   = proc_soft_watchdog,
2293 		.extra1		= SYSCTL_ZERO,
2294 		.extra2		= SYSCTL_ONE,
2295 	},
2296 	{
2297 		.procname	= "softlockup_panic",
2298 		.data		= &softlockup_panic,
2299 		.maxlen		= sizeof(int),
2300 		.mode		= 0644,
2301 		.proc_handler	= proc_dointvec_minmax,
2302 		.extra1		= SYSCTL_ZERO,
2303 		.extra2		= SYSCTL_ONE,
2304 	},
2305 #ifdef CONFIG_SMP
2306 	{
2307 		.procname	= "softlockup_all_cpu_backtrace",
2308 		.data		= &sysctl_softlockup_all_cpu_backtrace,
2309 		.maxlen		= sizeof(int),
2310 		.mode		= 0644,
2311 		.proc_handler	= proc_dointvec_minmax,
2312 		.extra1		= SYSCTL_ZERO,
2313 		.extra2		= SYSCTL_ONE,
2314 	},
2315 #endif /* CONFIG_SMP */
2316 #endif
2317 #ifdef CONFIG_HARDLOCKUP_DETECTOR
2318 	{
2319 		.procname	= "hardlockup_panic",
2320 		.data		= &hardlockup_panic,
2321 		.maxlen		= sizeof(int),
2322 		.mode		= 0644,
2323 		.proc_handler	= proc_dointvec_minmax,
2324 		.extra1		= SYSCTL_ZERO,
2325 		.extra2		= SYSCTL_ONE,
2326 	},
2327 #ifdef CONFIG_SMP
2328 	{
2329 		.procname	= "hardlockup_all_cpu_backtrace",
2330 		.data		= &sysctl_hardlockup_all_cpu_backtrace,
2331 		.maxlen		= sizeof(int),
2332 		.mode		= 0644,
2333 		.proc_handler	= proc_dointvec_minmax,
2334 		.extra1		= SYSCTL_ZERO,
2335 		.extra2		= SYSCTL_ONE,
2336 	},
2337 #endif /* CONFIG_SMP */
2338 #endif
2339 #endif
2340 
2341 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
2342 	{
2343 		.procname       = "unknown_nmi_panic",
2344 		.data           = &unknown_nmi_panic,
2345 		.maxlen         = sizeof (int),
2346 		.mode           = 0644,
2347 		.proc_handler   = proc_dointvec,
2348 	},
2349 #endif
2350 
2351 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
2352 	defined(CONFIG_DEBUG_STACKOVERFLOW)
2353 	{
2354 		.procname	= "panic_on_stackoverflow",
2355 		.data		= &sysctl_panic_on_stackoverflow,
2356 		.maxlen		= sizeof(int),
2357 		.mode		= 0644,
2358 		.proc_handler	= proc_dointvec,
2359 	},
2360 #endif
2361 #if defined(CONFIG_X86)
2362 	{
2363 		.procname	= "panic_on_unrecovered_nmi",
2364 		.data		= &panic_on_unrecovered_nmi,
2365 		.maxlen		= sizeof(int),
2366 		.mode		= 0644,
2367 		.proc_handler	= proc_dointvec,
2368 	},
2369 	{
2370 		.procname	= "panic_on_io_nmi",
2371 		.data		= &panic_on_io_nmi,
2372 		.maxlen		= sizeof(int),
2373 		.mode		= 0644,
2374 		.proc_handler	= proc_dointvec,
2375 	},
2376 	{
2377 		.procname	= "bootloader_type",
2378 		.data		= &bootloader_type,
2379 		.maxlen		= sizeof (int),
2380 		.mode		= 0444,
2381 		.proc_handler	= proc_dointvec,
2382 	},
2383 	{
2384 		.procname	= "bootloader_version",
2385 		.data		= &bootloader_version,
2386 		.maxlen		= sizeof (int),
2387 		.mode		= 0444,
2388 		.proc_handler	= proc_dointvec,
2389 	},
2390 	{
2391 		.procname	= "io_delay_type",
2392 		.data		= &io_delay_type,
2393 		.maxlen		= sizeof(int),
2394 		.mode		= 0644,
2395 		.proc_handler	= proc_dointvec,
2396 	},
2397 #endif
2398 #if defined(CONFIG_MMU)
2399 	{
2400 		.procname	= "randomize_va_space",
2401 		.data		= &randomize_va_space,
2402 		.maxlen		= sizeof(int),
2403 		.mode		= 0644,
2404 		.proc_handler	= proc_dointvec,
2405 	},
2406 #endif
2407 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
2408 	{
2409 		.procname	= "spin_retry",
2410 		.data		= &spin_retry,
2411 		.maxlen		= sizeof (int),
2412 		.mode		= 0644,
2413 		.proc_handler	= proc_dointvec,
2414 	},
2415 #endif
2416 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
2417 	{
2418 		.procname	= "acpi_video_flags",
2419 		.data		= &acpi_realmode_flags,
2420 		.maxlen		= sizeof (unsigned long),
2421 		.mode		= 0644,
2422 		.proc_handler	= proc_doulongvec_minmax,
2423 	},
2424 #endif
2425 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
2426 	{
2427 		.procname	= "ignore-unaligned-usertrap",
2428 		.data		= &no_unaligned_warning,
2429 		.maxlen		= sizeof (int),
2430 		.mode		= 0644,
2431 		.proc_handler	= proc_dointvec,
2432 	},
2433 #endif
2434 #ifdef CONFIG_IA64
2435 	{
2436 		.procname	= "unaligned-dump-stack",
2437 		.data		= &unaligned_dump_stack,
2438 		.maxlen		= sizeof (int),
2439 		.mode		= 0644,
2440 		.proc_handler	= proc_dointvec,
2441 	},
2442 #endif
2443 #ifdef CONFIG_DETECT_HUNG_TASK
2444 #ifdef CONFIG_SMP
2445 	{
2446 		.procname	= "hung_task_all_cpu_backtrace",
2447 		.data		= &sysctl_hung_task_all_cpu_backtrace,
2448 		.maxlen		= sizeof(int),
2449 		.mode		= 0644,
2450 		.proc_handler	= proc_dointvec_minmax,
2451 		.extra1		= SYSCTL_ZERO,
2452 		.extra2		= SYSCTL_ONE,
2453 	},
2454 #endif /* CONFIG_SMP */
2455 	{
2456 		.procname	= "hung_task_panic",
2457 		.data		= &sysctl_hung_task_panic,
2458 		.maxlen		= sizeof(int),
2459 		.mode		= 0644,
2460 		.proc_handler	= proc_dointvec_minmax,
2461 		.extra1		= SYSCTL_ZERO,
2462 		.extra2		= SYSCTL_ONE,
2463 	},
2464 	{
2465 		.procname	= "hung_task_check_count",
2466 		.data		= &sysctl_hung_task_check_count,
2467 		.maxlen		= sizeof(int),
2468 		.mode		= 0644,
2469 		.proc_handler	= proc_dointvec_minmax,
2470 		.extra1		= SYSCTL_ZERO,
2471 	},
2472 	{
2473 		.procname	= "hung_task_timeout_secs",
2474 		.data		= &sysctl_hung_task_timeout_secs,
2475 		.maxlen		= sizeof(unsigned long),
2476 		.mode		= 0644,
2477 		.proc_handler	= proc_dohung_task_timeout_secs,
2478 		.extra2		= &hung_task_timeout_max,
2479 	},
2480 	{
2481 		.procname	= "hung_task_check_interval_secs",
2482 		.data		= &sysctl_hung_task_check_interval_secs,
2483 		.maxlen		= sizeof(unsigned long),
2484 		.mode		= 0644,
2485 		.proc_handler	= proc_dohung_task_timeout_secs,
2486 		.extra2		= &hung_task_timeout_max,
2487 	},
2488 	{
2489 		.procname	= "hung_task_warnings",
2490 		.data		= &sysctl_hung_task_warnings,
2491 		.maxlen		= sizeof(int),
2492 		.mode		= 0644,
2493 		.proc_handler	= proc_dointvec_minmax,
2494 		.extra1		= &neg_one,
2495 	},
2496 #endif
2497 #ifdef CONFIG_RT_MUTEXES
2498 	{
2499 		.procname	= "max_lock_depth",
2500 		.data		= &max_lock_depth,
2501 		.maxlen		= sizeof(int),
2502 		.mode		= 0644,
2503 		.proc_handler	= proc_dointvec,
2504 	},
2505 #endif
2506 	{
2507 		.procname	= "poweroff_cmd",
2508 		.data		= &poweroff_cmd,
2509 		.maxlen		= POWEROFF_CMD_PATH_LEN,
2510 		.mode		= 0644,
2511 		.proc_handler	= proc_dostring,
2512 	},
2513 #ifdef CONFIG_KEYS
2514 	{
2515 		.procname	= "keys",
2516 		.mode		= 0555,
2517 		.child		= key_sysctls,
2518 	},
2519 #endif
2520 #ifdef CONFIG_PERF_EVENTS
2521 	/*
2522 	 * User-space scripts rely on the existence of this file
2523 	 * as a feature check for perf_events being enabled.
2524 	 *
2525 	 * So it's an ABI, do not remove!
2526 	 */
2527 	{
2528 		.procname	= "perf_event_paranoid",
2529 		.data		= &sysctl_perf_event_paranoid,
2530 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
2531 		.mode		= 0644,
2532 		.proc_handler	= proc_dointvec,
2533 	},
2534 	{
2535 		.procname	= "perf_event_mlock_kb",
2536 		.data		= &sysctl_perf_event_mlock,
2537 		.maxlen		= sizeof(sysctl_perf_event_mlock),
2538 		.mode		= 0644,
2539 		.proc_handler	= proc_dointvec,
2540 	},
2541 	{
2542 		.procname	= "perf_event_max_sample_rate",
2543 		.data		= &sysctl_perf_event_sample_rate,
2544 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
2545 		.mode		= 0644,
2546 		.proc_handler	= perf_proc_update_handler,
2547 		.extra1		= SYSCTL_ONE,
2548 	},
2549 	{
2550 		.procname	= "perf_cpu_time_max_percent",
2551 		.data		= &sysctl_perf_cpu_time_max_percent,
2552 		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
2553 		.mode		= 0644,
2554 		.proc_handler	= perf_cpu_time_max_percent_handler,
2555 		.extra1		= SYSCTL_ZERO,
2556 		.extra2		= &one_hundred,
2557 	},
2558 	{
2559 		.procname	= "perf_event_max_stack",
2560 		.data		= &sysctl_perf_event_max_stack,
2561 		.maxlen		= sizeof(sysctl_perf_event_max_stack),
2562 		.mode		= 0644,
2563 		.proc_handler	= perf_event_max_stack_handler,
2564 		.extra1		= SYSCTL_ZERO,
2565 		.extra2		= &six_hundred_forty_kb,
2566 	},
2567 	{
2568 		.procname	= "perf_event_max_contexts_per_stack",
2569 		.data		= &sysctl_perf_event_max_contexts_per_stack,
2570 		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
2571 		.mode		= 0644,
2572 		.proc_handler	= perf_event_max_stack_handler,
2573 		.extra1		= SYSCTL_ZERO,
2574 		.extra2		= &one_thousand,
2575 	},
2576 #endif
2577 	{
2578 		.procname	= "panic_on_warn",
2579 		.data		= &panic_on_warn,
2580 		.maxlen		= sizeof(int),
2581 		.mode		= 0644,
2582 		.proc_handler	= proc_dointvec_minmax,
2583 		.extra1		= SYSCTL_ZERO,
2584 		.extra2		= SYSCTL_ONE,
2585 	},
2586 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
2587 	{
2588 		.procname	= "timer_migration",
2589 		.data		= &sysctl_timer_migration,
2590 		.maxlen		= sizeof(unsigned int),
2591 		.mode		= 0644,
2592 		.proc_handler	= timer_migration_handler,
2593 		.extra1		= SYSCTL_ZERO,
2594 		.extra2		= SYSCTL_ONE,
2595 	},
2596 #endif
2597 #ifdef CONFIG_BPF_SYSCALL
2598 	{
2599 		.procname	= "unprivileged_bpf_disabled",
2600 		.data		= &sysctl_unprivileged_bpf_disabled,
2601 		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
2602 		.mode		= 0644,
2603 		/* only handle a transition from default "0" to "1" */
2604 		.proc_handler	= proc_dointvec_minmax,
2605 		.extra1		= SYSCTL_ONE,
2606 		.extra2		= SYSCTL_ONE,
2607 	},
2608 	{
2609 		.procname	= "bpf_stats_enabled",
2610 		.data		= &bpf_stats_enabled_key.key,
2611 		.maxlen		= sizeof(bpf_stats_enabled_key),
2612 		.mode		= 0644,
2613 		.proc_handler	= bpf_stats_handler,
2614 	},
2615 #endif
2616 #if defined(CONFIG_TREE_RCU)
2617 	{
2618 		.procname	= "panic_on_rcu_stall",
2619 		.data		= &sysctl_panic_on_rcu_stall,
2620 		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
2621 		.mode		= 0644,
2622 		.proc_handler	= proc_dointvec_minmax,
2623 		.extra1		= SYSCTL_ZERO,
2624 		.extra2		= SYSCTL_ONE,
2625 	},
2626 #endif
2627 #if defined(CONFIG_TREE_RCU)
2628 	{
2629 		.procname	= "max_rcu_stall_to_panic",
2630 		.data		= &sysctl_max_rcu_stall_to_panic,
2631 		.maxlen		= sizeof(sysctl_max_rcu_stall_to_panic),
2632 		.mode		= 0644,
2633 		.proc_handler	= proc_dointvec_minmax,
2634 		.extra1		= SYSCTL_ONE,
2635 		.extra2		= SYSCTL_INT_MAX,
2636 	},
2637 #endif
2638 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
2639 	{
2640 		.procname	= "stack_erasing",
2641 		.data		= NULL,
2642 		.maxlen		= sizeof(int),
2643 		.mode		= 0600,
2644 		.proc_handler	= stack_erasing_sysctl,
2645 		.extra1		= SYSCTL_ZERO,
2646 		.extra2		= SYSCTL_ONE,
2647 	},
2648 #endif
2649 	{ }
2650 };
2651 
2652 static struct ctl_table vm_table[] = {
2653 	{
2654 		.procname	= "overcommit_memory",
2655 		.data		= &sysctl_overcommit_memory,
2656 		.maxlen		= sizeof(sysctl_overcommit_memory),
2657 		.mode		= 0644,
2658 		.proc_handler	= overcommit_policy_handler,
2659 		.extra1		= SYSCTL_ZERO,
2660 		.extra2		= &two,
2661 	},
2662 	{
2663 		.procname	= "panic_on_oom",
2664 		.data		= &sysctl_panic_on_oom,
2665 		.maxlen		= sizeof(sysctl_panic_on_oom),
2666 		.mode		= 0644,
2667 		.proc_handler	= proc_dointvec_minmax,
2668 		.extra1		= SYSCTL_ZERO,
2669 		.extra2		= &two,
2670 	},
2671 	{
2672 		.procname	= "oom_kill_allocating_task",
2673 		.data		= &sysctl_oom_kill_allocating_task,
2674 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
2675 		.mode		= 0644,
2676 		.proc_handler	= proc_dointvec,
2677 	},
2678 	{
2679 		.procname	= "oom_dump_tasks",
2680 		.data		= &sysctl_oom_dump_tasks,
2681 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
2682 		.mode		= 0644,
2683 		.proc_handler	= proc_dointvec,
2684 	},
2685 	{
2686 		.procname	= "overcommit_ratio",
2687 		.data		= &sysctl_overcommit_ratio,
2688 		.maxlen		= sizeof(sysctl_overcommit_ratio),
2689 		.mode		= 0644,
2690 		.proc_handler	= overcommit_ratio_handler,
2691 	},
2692 	{
2693 		.procname	= "overcommit_kbytes",
2694 		.data		= &sysctl_overcommit_kbytes,
2695 		.maxlen		= sizeof(sysctl_overcommit_kbytes),
2696 		.mode		= 0644,
2697 		.proc_handler	= overcommit_kbytes_handler,
2698 	},
2699 	{
2700 		.procname	= "page-cluster",
2701 		.data		= &page_cluster,
2702 		.maxlen		= sizeof(int),
2703 		.mode		= 0644,
2704 		.proc_handler	= proc_dointvec_minmax,
2705 		.extra1		= SYSCTL_ZERO,
2706 	},
2707 	{
2708 		.procname	= "dirty_background_ratio",
2709 		.data		= &dirty_background_ratio,
2710 		.maxlen		= sizeof(dirty_background_ratio),
2711 		.mode		= 0644,
2712 		.proc_handler	= dirty_background_ratio_handler,
2713 		.extra1		= SYSCTL_ZERO,
2714 		.extra2		= &one_hundred,
2715 	},
2716 	{
2717 		.procname	= "dirty_background_bytes",
2718 		.data		= &dirty_background_bytes,
2719 		.maxlen		= sizeof(dirty_background_bytes),
2720 		.mode		= 0644,
2721 		.proc_handler	= dirty_background_bytes_handler,
2722 		.extra1		= &one_ul,
2723 	},
2724 	{
2725 		.procname	= "dirty_ratio",
2726 		.data		= &vm_dirty_ratio,
2727 		.maxlen		= sizeof(vm_dirty_ratio),
2728 		.mode		= 0644,
2729 		.proc_handler	= dirty_ratio_handler,
2730 		.extra1		= SYSCTL_ZERO,
2731 		.extra2		= &one_hundred,
2732 	},
2733 	{
2734 		.procname	= "dirty_bytes",
2735 		.data		= &vm_dirty_bytes,
2736 		.maxlen		= sizeof(vm_dirty_bytes),
2737 		.mode		= 0644,
2738 		.proc_handler	= dirty_bytes_handler,
2739 		.extra1		= &dirty_bytes_min,
2740 	},
2741 	{
2742 		.procname	= "dirty_writeback_centisecs",
2743 		.data		= &dirty_writeback_interval,
2744 		.maxlen		= sizeof(dirty_writeback_interval),
2745 		.mode		= 0644,
2746 		.proc_handler	= dirty_writeback_centisecs_handler,
2747 	},
2748 	{
2749 		.procname	= "dirty_expire_centisecs",
2750 		.data		= &dirty_expire_interval,
2751 		.maxlen		= sizeof(dirty_expire_interval),
2752 		.mode		= 0644,
2753 		.proc_handler	= proc_dointvec_minmax,
2754 		.extra1		= SYSCTL_ZERO,
2755 	},
2756 	{
2757 		.procname	= "dirtytime_expire_seconds",
2758 		.data		= &dirtytime_expire_interval,
2759 		.maxlen		= sizeof(dirtytime_expire_interval),
2760 		.mode		= 0644,
2761 		.proc_handler	= dirtytime_interval_handler,
2762 		.extra1		= SYSCTL_ZERO,
2763 	},
2764 	{
2765 		.procname	= "swappiness",
2766 		.data		= &vm_swappiness,
2767 		.maxlen		= sizeof(vm_swappiness),
2768 		.mode		= 0644,
2769 		.proc_handler	= proc_dointvec_minmax,
2770 		.extra1		= SYSCTL_ZERO,
2771 		.extra2		= &two_hundred,
2772 	},
2773 #ifdef CONFIG_HUGETLB_PAGE
2774 	{
2775 		.procname	= "nr_hugepages",
2776 		.data		= NULL,
2777 		.maxlen		= sizeof(unsigned long),
2778 		.mode		= 0644,
2779 		.proc_handler	= hugetlb_sysctl_handler,
2780 	},
2781 #ifdef CONFIG_NUMA
2782 	{
2783 		.procname       = "nr_hugepages_mempolicy",
2784 		.data           = NULL,
2785 		.maxlen         = sizeof(unsigned long),
2786 		.mode           = 0644,
2787 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
2788 	},
2789 	{
2790 		.procname		= "numa_stat",
2791 		.data			= &sysctl_vm_numa_stat,
2792 		.maxlen			= sizeof(int),
2793 		.mode			= 0644,
2794 		.proc_handler	= sysctl_vm_numa_stat_handler,
2795 		.extra1			= SYSCTL_ZERO,
2796 		.extra2			= SYSCTL_ONE,
2797 	},
2798 #endif
2799 	 {
2800 		.procname	= "hugetlb_shm_group",
2801 		.data		= &sysctl_hugetlb_shm_group,
2802 		.maxlen		= sizeof(gid_t),
2803 		.mode		= 0644,
2804 		.proc_handler	= proc_dointvec,
2805 	 },
2806 	{
2807 		.procname	= "nr_overcommit_hugepages",
2808 		.data		= NULL,
2809 		.maxlen		= sizeof(unsigned long),
2810 		.mode		= 0644,
2811 		.proc_handler	= hugetlb_overcommit_handler,
2812 	},
2813 #endif
2814 	{
2815 		.procname	= "lowmem_reserve_ratio",
2816 		.data		= &sysctl_lowmem_reserve_ratio,
2817 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
2818 		.mode		= 0644,
2819 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
2820 	},
2821 	{
2822 		.procname	= "drop_caches",
2823 		.data		= &sysctl_drop_caches,
2824 		.maxlen		= sizeof(int),
2825 		.mode		= 0200,
2826 		.proc_handler	= drop_caches_sysctl_handler,
2827 		.extra1		= SYSCTL_ONE,
2828 		.extra2		= &four,
2829 	},
2830 #ifdef CONFIG_COMPACTION
2831 	{
2832 		.procname	= "compact_memory",
2833 		.data		= NULL,
2834 		.maxlen		= sizeof(int),
2835 		.mode		= 0200,
2836 		.proc_handler	= sysctl_compaction_handler,
2837 	},
2838 	{
2839 		.procname	= "compaction_proactiveness",
2840 		.data		= &sysctl_compaction_proactiveness,
2841 		.maxlen		= sizeof(sysctl_compaction_proactiveness),
2842 		.mode		= 0644,
2843 		.proc_handler	= proc_dointvec_minmax,
2844 		.extra1		= SYSCTL_ZERO,
2845 		.extra2		= &one_hundred,
2846 	},
2847 	{
2848 		.procname	= "extfrag_threshold",
2849 		.data		= &sysctl_extfrag_threshold,
2850 		.maxlen		= sizeof(int),
2851 		.mode		= 0644,
2852 		.proc_handler	= proc_dointvec_minmax,
2853 		.extra1		= &min_extfrag_threshold,
2854 		.extra2		= &max_extfrag_threshold,
2855 	},
2856 	{
2857 		.procname	= "compact_unevictable_allowed",
2858 		.data		= &sysctl_compact_unevictable_allowed,
2859 		.maxlen		= sizeof(int),
2860 		.mode		= 0644,
2861 		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
2862 		.extra1		= SYSCTL_ZERO,
2863 		.extra2		= SYSCTL_ONE,
2864 	},
2865 
2866 #endif /* CONFIG_COMPACTION */
2867 	{
2868 		.procname	= "min_free_kbytes",
2869 		.data		= &min_free_kbytes,
2870 		.maxlen		= sizeof(min_free_kbytes),
2871 		.mode		= 0644,
2872 		.proc_handler	= min_free_kbytes_sysctl_handler,
2873 		.extra1		= SYSCTL_ZERO,
2874 	},
2875 	{
2876 		.procname	= "watermark_boost_factor",
2877 		.data		= &watermark_boost_factor,
2878 		.maxlen		= sizeof(watermark_boost_factor),
2879 		.mode		= 0644,
2880 		.proc_handler	= proc_dointvec_minmax,
2881 		.extra1		= SYSCTL_ZERO,
2882 	},
2883 	{
2884 		.procname	= "watermark_scale_factor",
2885 		.data		= &watermark_scale_factor,
2886 		.maxlen		= sizeof(watermark_scale_factor),
2887 		.mode		= 0644,
2888 		.proc_handler	= watermark_scale_factor_sysctl_handler,
2889 		.extra1		= SYSCTL_ONE,
2890 		.extra2		= &one_thousand,
2891 	},
2892 	{
2893 		.procname	= "percpu_pagelist_fraction",
2894 		.data		= &percpu_pagelist_fraction,
2895 		.maxlen		= sizeof(percpu_pagelist_fraction),
2896 		.mode		= 0644,
2897 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
2898 		.extra1		= SYSCTL_ZERO,
2899 	},
2900 	{
2901 		.procname	= "page_lock_unfairness",
2902 		.data		= &sysctl_page_lock_unfairness,
2903 		.maxlen		= sizeof(sysctl_page_lock_unfairness),
2904 		.mode		= 0644,
2905 		.proc_handler	= proc_dointvec_minmax,
2906 		.extra1		= SYSCTL_ZERO,
2907 	},
2908 #ifdef CONFIG_MMU
2909 	{
2910 		.procname	= "max_map_count",
2911 		.data		= &sysctl_max_map_count,
2912 		.maxlen		= sizeof(sysctl_max_map_count),
2913 		.mode		= 0644,
2914 		.proc_handler	= proc_dointvec_minmax,
2915 		.extra1		= SYSCTL_ZERO,
2916 	},
2917 #else
2918 	{
2919 		.procname	= "nr_trim_pages",
2920 		.data		= &sysctl_nr_trim_pages,
2921 		.maxlen		= sizeof(sysctl_nr_trim_pages),
2922 		.mode		= 0644,
2923 		.proc_handler	= proc_dointvec_minmax,
2924 		.extra1		= SYSCTL_ZERO,
2925 	},
2926 #endif
2927 	{
2928 		.procname	= "laptop_mode",
2929 		.data		= &laptop_mode,
2930 		.maxlen		= sizeof(laptop_mode),
2931 		.mode		= 0644,
2932 		.proc_handler	= proc_dointvec_jiffies,
2933 	},
2934 	{
2935 		.procname	= "block_dump",
2936 		.data		= &block_dump,
2937 		.maxlen		= sizeof(block_dump),
2938 		.mode		= 0644,
2939 		.proc_handler	= proc_dointvec_minmax,
2940 		.extra1		= SYSCTL_ZERO,
2941 	},
2942 	{
2943 		.procname	= "vfs_cache_pressure",
2944 		.data		= &sysctl_vfs_cache_pressure,
2945 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
2946 		.mode		= 0644,
2947 		.proc_handler	= proc_dointvec_minmax,
2948 		.extra1		= SYSCTL_ZERO,
2949 	},
2950 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2951     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2952 	{
2953 		.procname	= "legacy_va_layout",
2954 		.data		= &sysctl_legacy_va_layout,
2955 		.maxlen		= sizeof(sysctl_legacy_va_layout),
2956 		.mode		= 0644,
2957 		.proc_handler	= proc_dointvec_minmax,
2958 		.extra1		= SYSCTL_ZERO,
2959 	},
2960 #endif
2961 #ifdef CONFIG_NUMA
2962 	{
2963 		.procname	= "zone_reclaim_mode",
2964 		.data		= &node_reclaim_mode,
2965 		.maxlen		= sizeof(node_reclaim_mode),
2966 		.mode		= 0644,
2967 		.proc_handler	= proc_dointvec_minmax,
2968 		.extra1		= SYSCTL_ZERO,
2969 	},
2970 	{
2971 		.procname	= "min_unmapped_ratio",
2972 		.data		= &sysctl_min_unmapped_ratio,
2973 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
2974 		.mode		= 0644,
2975 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
2976 		.extra1		= SYSCTL_ZERO,
2977 		.extra2		= &one_hundred,
2978 	},
2979 	{
2980 		.procname	= "min_slab_ratio",
2981 		.data		= &sysctl_min_slab_ratio,
2982 		.maxlen		= sizeof(sysctl_min_slab_ratio),
2983 		.mode		= 0644,
2984 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
2985 		.extra1		= SYSCTL_ZERO,
2986 		.extra2		= &one_hundred,
2987 	},
2988 #endif
2989 #ifdef CONFIG_SMP
2990 	{
2991 		.procname	= "stat_interval",
2992 		.data		= &sysctl_stat_interval,
2993 		.maxlen		= sizeof(sysctl_stat_interval),
2994 		.mode		= 0644,
2995 		.proc_handler	= proc_dointvec_jiffies,
2996 	},
2997 	{
2998 		.procname	= "stat_refresh",
2999 		.data		= NULL,
3000 		.maxlen		= 0,
3001 		.mode		= 0600,
3002 		.proc_handler	= vmstat_refresh,
3003 	},
3004 #endif
3005 #ifdef CONFIG_MMU
3006 	{
3007 		.procname	= "mmap_min_addr",
3008 		.data		= &dac_mmap_min_addr,
3009 		.maxlen		= sizeof(unsigned long),
3010 		.mode		= 0644,
3011 		.proc_handler	= mmap_min_addr_handler,
3012 	},
3013 #endif
3014 #ifdef CONFIG_NUMA
3015 	{
3016 		.procname	= "numa_zonelist_order",
3017 		.data		= &numa_zonelist_order,
3018 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
3019 		.mode		= 0644,
3020 		.proc_handler	= numa_zonelist_order_handler,
3021 	},
3022 #endif
3023 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
3024    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
3025 	{
3026 		.procname	= "vdso_enabled",
3027 #ifdef CONFIG_X86_32
3028 		.data		= &vdso32_enabled,
3029 		.maxlen		= sizeof(vdso32_enabled),
3030 #else
3031 		.data		= &vdso_enabled,
3032 		.maxlen		= sizeof(vdso_enabled),
3033 #endif
3034 		.mode		= 0644,
3035 		.proc_handler	= proc_dointvec,
3036 		.extra1		= SYSCTL_ZERO,
3037 	},
3038 #endif
3039 #ifdef CONFIG_HIGHMEM
3040 	{
3041 		.procname	= "highmem_is_dirtyable",
3042 		.data		= &vm_highmem_is_dirtyable,
3043 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
3044 		.mode		= 0644,
3045 		.proc_handler	= proc_dointvec_minmax,
3046 		.extra1		= SYSCTL_ZERO,
3047 		.extra2		= SYSCTL_ONE,
3048 	},
3049 #endif
3050 #ifdef CONFIG_MEMORY_FAILURE
3051 	{
3052 		.procname	= "memory_failure_early_kill",
3053 		.data		= &sysctl_memory_failure_early_kill,
3054 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
3055 		.mode		= 0644,
3056 		.proc_handler	= proc_dointvec_minmax,
3057 		.extra1		= SYSCTL_ZERO,
3058 		.extra2		= SYSCTL_ONE,
3059 	},
3060 	{
3061 		.procname	= "memory_failure_recovery",
3062 		.data		= &sysctl_memory_failure_recovery,
3063 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
3064 		.mode		= 0644,
3065 		.proc_handler	= proc_dointvec_minmax,
3066 		.extra1		= SYSCTL_ZERO,
3067 		.extra2		= SYSCTL_ONE,
3068 	},
3069 #endif
3070 	{
3071 		.procname	= "user_reserve_kbytes",
3072 		.data		= &sysctl_user_reserve_kbytes,
3073 		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
3074 		.mode		= 0644,
3075 		.proc_handler	= proc_doulongvec_minmax,
3076 	},
3077 	{
3078 		.procname	= "admin_reserve_kbytes",
3079 		.data		= &sysctl_admin_reserve_kbytes,
3080 		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
3081 		.mode		= 0644,
3082 		.proc_handler	= proc_doulongvec_minmax,
3083 	},
3084 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
3085 	{
3086 		.procname	= "mmap_rnd_bits",
3087 		.data		= &mmap_rnd_bits,
3088 		.maxlen		= sizeof(mmap_rnd_bits),
3089 		.mode		= 0600,
3090 		.proc_handler	= proc_dointvec_minmax,
3091 		.extra1		= (void *)&mmap_rnd_bits_min,
3092 		.extra2		= (void *)&mmap_rnd_bits_max,
3093 	},
3094 #endif
3095 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
3096 	{
3097 		.procname	= "mmap_rnd_compat_bits",
3098 		.data		= &mmap_rnd_compat_bits,
3099 		.maxlen		= sizeof(mmap_rnd_compat_bits),
3100 		.mode		= 0600,
3101 		.proc_handler	= proc_dointvec_minmax,
3102 		.extra1		= (void *)&mmap_rnd_compat_bits_min,
3103 		.extra2		= (void *)&mmap_rnd_compat_bits_max,
3104 	},
3105 #endif
3106 #ifdef CONFIG_USERFAULTFD
3107 	{
3108 		.procname	= "unprivileged_userfaultfd",
3109 		.data		= &sysctl_unprivileged_userfaultfd,
3110 		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
3111 		.mode		= 0644,
3112 		.proc_handler	= proc_dointvec_minmax,
3113 		.extra1		= SYSCTL_ZERO,
3114 		.extra2		= SYSCTL_ONE,
3115 	},
3116 #endif
3117 	{ }
3118 };
3119 
3120 static struct ctl_table fs_table[] = {
3121 	{
3122 		.procname	= "inode-nr",
3123 		.data		= &inodes_stat,
3124 		.maxlen		= 2*sizeof(long),
3125 		.mode		= 0444,
3126 		.proc_handler	= proc_nr_inodes,
3127 	},
3128 	{
3129 		.procname	= "inode-state",
3130 		.data		= &inodes_stat,
3131 		.maxlen		= 7*sizeof(long),
3132 		.mode		= 0444,
3133 		.proc_handler	= proc_nr_inodes,
3134 	},
3135 	{
3136 		.procname	= "file-nr",
3137 		.data		= &files_stat,
3138 		.maxlen		= sizeof(files_stat),
3139 		.mode		= 0444,
3140 		.proc_handler	= proc_nr_files,
3141 	},
3142 	{
3143 		.procname	= "file-max",
3144 		.data		= &files_stat.max_files,
3145 		.maxlen		= sizeof(files_stat.max_files),
3146 		.mode		= 0644,
3147 		.proc_handler	= proc_doulongvec_minmax,
3148 		.extra1		= &zero_ul,
3149 		.extra2		= &long_max,
3150 	},
3151 	{
3152 		.procname	= "nr_open",
3153 		.data		= &sysctl_nr_open,
3154 		.maxlen		= sizeof(unsigned int),
3155 		.mode		= 0644,
3156 		.proc_handler	= proc_dointvec_minmax,
3157 		.extra1		= &sysctl_nr_open_min,
3158 		.extra2		= &sysctl_nr_open_max,
3159 	},
3160 	{
3161 		.procname	= "dentry-state",
3162 		.data		= &dentry_stat,
3163 		.maxlen		= 6*sizeof(long),
3164 		.mode		= 0444,
3165 		.proc_handler	= proc_nr_dentry,
3166 	},
3167 	{
3168 		.procname	= "overflowuid",
3169 		.data		= &fs_overflowuid,
3170 		.maxlen		= sizeof(int),
3171 		.mode		= 0644,
3172 		.proc_handler	= proc_dointvec_minmax,
3173 		.extra1		= &minolduid,
3174 		.extra2		= &maxolduid,
3175 	},
3176 	{
3177 		.procname	= "overflowgid",
3178 		.data		= &fs_overflowgid,
3179 		.maxlen		= sizeof(int),
3180 		.mode		= 0644,
3181 		.proc_handler	= proc_dointvec_minmax,
3182 		.extra1		= &minolduid,
3183 		.extra2		= &maxolduid,
3184 	},
3185 #ifdef CONFIG_FILE_LOCKING
3186 	{
3187 		.procname	= "leases-enable",
3188 		.data		= &leases_enable,
3189 		.maxlen		= sizeof(int),
3190 		.mode		= 0644,
3191 		.proc_handler	= proc_dointvec,
3192 	},
3193 #endif
3194 #ifdef CONFIG_DNOTIFY
3195 	{
3196 		.procname	= "dir-notify-enable",
3197 		.data		= &dir_notify_enable,
3198 		.maxlen		= sizeof(int),
3199 		.mode		= 0644,
3200 		.proc_handler	= proc_dointvec,
3201 	},
3202 #endif
3203 #ifdef CONFIG_MMU
3204 #ifdef CONFIG_FILE_LOCKING
3205 	{
3206 		.procname	= "lease-break-time",
3207 		.data		= &lease_break_time,
3208 		.maxlen		= sizeof(int),
3209 		.mode		= 0644,
3210 		.proc_handler	= proc_dointvec,
3211 	},
3212 #endif
3213 #ifdef CONFIG_AIO
3214 	{
3215 		.procname	= "aio-nr",
3216 		.data		= &aio_nr,
3217 		.maxlen		= sizeof(aio_nr),
3218 		.mode		= 0444,
3219 		.proc_handler	= proc_doulongvec_minmax,
3220 	},
3221 	{
3222 		.procname	= "aio-max-nr",
3223 		.data		= &aio_max_nr,
3224 		.maxlen		= sizeof(aio_max_nr),
3225 		.mode		= 0644,
3226 		.proc_handler	= proc_doulongvec_minmax,
3227 	},
3228 #endif /* CONFIG_AIO */
3229 #ifdef CONFIG_INOTIFY_USER
3230 	{
3231 		.procname	= "inotify",
3232 		.mode		= 0555,
3233 		.child		= inotify_table,
3234 	},
3235 #endif
3236 #ifdef CONFIG_FANOTIFY
3237 	{
3238 		.procname	= "fanotify",
3239 		.mode		= 0555,
3240 		.child		= fanotify_table,
3241 	},
3242 #endif
3243 #ifdef CONFIG_EPOLL
3244 	{
3245 		.procname	= "epoll",
3246 		.mode		= 0555,
3247 		.child		= epoll_table,
3248 	},
3249 #endif
3250 #endif
3251 	{
3252 		.procname	= "protected_symlinks",
3253 		.data		= &sysctl_protected_symlinks,
3254 		.maxlen		= sizeof(int),
3255 		.mode		= 0600,
3256 		.proc_handler	= proc_dointvec_minmax,
3257 		.extra1		= SYSCTL_ZERO,
3258 		.extra2		= SYSCTL_ONE,
3259 	},
3260 	{
3261 		.procname	= "protected_hardlinks",
3262 		.data		= &sysctl_protected_hardlinks,
3263 		.maxlen		= sizeof(int),
3264 		.mode		= 0600,
3265 		.proc_handler	= proc_dointvec_minmax,
3266 		.extra1		= SYSCTL_ZERO,
3267 		.extra2		= SYSCTL_ONE,
3268 	},
3269 	{
3270 		.procname	= "protected_fifos",
3271 		.data		= &sysctl_protected_fifos,
3272 		.maxlen		= sizeof(int),
3273 		.mode		= 0600,
3274 		.proc_handler	= proc_dointvec_minmax,
3275 		.extra1		= SYSCTL_ZERO,
3276 		.extra2		= &two,
3277 	},
3278 	{
3279 		.procname	= "protected_regular",
3280 		.data		= &sysctl_protected_regular,
3281 		.maxlen		= sizeof(int),
3282 		.mode		= 0600,
3283 		.proc_handler	= proc_dointvec_minmax,
3284 		.extra1		= SYSCTL_ZERO,
3285 		.extra2		= &two,
3286 	},
3287 	{
3288 		.procname	= "suid_dumpable",
3289 		.data		= &suid_dumpable,
3290 		.maxlen		= sizeof(int),
3291 		.mode		= 0644,
3292 		.proc_handler	= proc_dointvec_minmax_coredump,
3293 		.extra1		= SYSCTL_ZERO,
3294 		.extra2		= &two,
3295 	},
3296 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
3297 	{
3298 		.procname	= "binfmt_misc",
3299 		.mode		= 0555,
3300 		.child		= sysctl_mount_point,
3301 	},
3302 #endif
3303 	{
3304 		.procname	= "pipe-max-size",
3305 		.data		= &pipe_max_size,
3306 		.maxlen		= sizeof(pipe_max_size),
3307 		.mode		= 0644,
3308 		.proc_handler	= proc_dopipe_max_size,
3309 	},
3310 	{
3311 		.procname	= "pipe-user-pages-hard",
3312 		.data		= &pipe_user_pages_hard,
3313 		.maxlen		= sizeof(pipe_user_pages_hard),
3314 		.mode		= 0644,
3315 		.proc_handler	= proc_doulongvec_minmax,
3316 	},
3317 	{
3318 		.procname	= "pipe-user-pages-soft",
3319 		.data		= &pipe_user_pages_soft,
3320 		.maxlen		= sizeof(pipe_user_pages_soft),
3321 		.mode		= 0644,
3322 		.proc_handler	= proc_doulongvec_minmax,
3323 	},
3324 	{
3325 		.procname	= "mount-max",
3326 		.data		= &sysctl_mount_max,
3327 		.maxlen		= sizeof(unsigned int),
3328 		.mode		= 0644,
3329 		.proc_handler	= proc_dointvec_minmax,
3330 		.extra1		= SYSCTL_ONE,
3331 	},
3332 	{ }
3333 };
3334 
3335 static struct ctl_table debug_table[] = {
3336 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
3337 	{
3338 		.procname	= "exception-trace",
3339 		.data		= &show_unhandled_signals,
3340 		.maxlen		= sizeof(int),
3341 		.mode		= 0644,
3342 		.proc_handler	= proc_dointvec
3343 	},
3344 #endif
3345 #if defined(CONFIG_OPTPROBES)
3346 	{
3347 		.procname	= "kprobes-optimization",
3348 		.data		= &sysctl_kprobes_optimization,
3349 		.maxlen		= sizeof(int),
3350 		.mode		= 0644,
3351 		.proc_handler	= proc_kprobes_optimization_handler,
3352 		.extra1		= SYSCTL_ZERO,
3353 		.extra2		= SYSCTL_ONE,
3354 	},
3355 #endif
3356 	{ }
3357 };
3358 
3359 static struct ctl_table dev_table[] = {
3360 	{ }
3361 };
3362 
3363 static struct ctl_table sysctl_base_table[] = {
3364 	{
3365 		.procname	= "kernel",
3366 		.mode		= 0555,
3367 		.child		= kern_table,
3368 	},
3369 	{
3370 		.procname	= "vm",
3371 		.mode		= 0555,
3372 		.child		= vm_table,
3373 	},
3374 	{
3375 		.procname	= "fs",
3376 		.mode		= 0555,
3377 		.child		= fs_table,
3378 	},
3379 	{
3380 		.procname	= "debug",
3381 		.mode		= 0555,
3382 		.child		= debug_table,
3383 	},
3384 	{
3385 		.procname	= "dev",
3386 		.mode		= 0555,
3387 		.child		= dev_table,
3388 	},
3389 	{ }
3390 };
3391 
sysctl_init(void)3392 int __init sysctl_init(void)
3393 {
3394 	struct ctl_table_header *hdr;
3395 
3396 	hdr = register_sysctl_table(sysctl_base_table);
3397 	kmemleak_not_leak(hdr);
3398 	return 0;
3399 }
3400 #endif /* CONFIG_SYSCTL */
3401 /*
3402  * No sense putting this after each symbol definition, twice,
3403  * exception granted :-)
3404  */
3405 EXPORT_SYMBOL(proc_dointvec);
3406 EXPORT_SYMBOL(proc_douintvec);
3407 EXPORT_SYMBOL(proc_dointvec_jiffies);
3408 EXPORT_SYMBOL(proc_dointvec_minmax);
3409 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3410 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3411 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3412 EXPORT_SYMBOL(proc_dostring);
3413 EXPORT_SYMBOL(proc_doulongvec_minmax);
3414 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3415 EXPORT_SYMBOL(proc_do_large_bitmap);
3416