xref: /linux/fs/binfmt_elf.c (revision 2da68a77)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/fs/binfmt_elf.c
4  *
5  * These are the functions used to load ELF format executables as used
6  * on SVr4 machines.  Information on the format may be found in the book
7  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8  * Tools".
9  *
10  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/fs.h>
16 #include <linux/log2.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/slab.h>
25 #include <linux/personality.h>
26 #include <linux/elfcore.h>
27 #include <linux/init.h>
28 #include <linux/highuid.h>
29 #include <linux/compiler.h>
30 #include <linux/highmem.h>
31 #include <linux/hugetlb.h>
32 #include <linux/pagemap.h>
33 #include <linux/vmalloc.h>
34 #include <linux/security.h>
35 #include <linux/random.h>
36 #include <linux/elf.h>
37 #include <linux/elf-randomize.h>
38 #include <linux/utsname.h>
39 #include <linux/coredump.h>
40 #include <linux/sched.h>
41 #include <linux/sched/coredump.h>
42 #include <linux/sched/task_stack.h>
43 #include <linux/sched/cputime.h>
44 #include <linux/sizes.h>
45 #include <linux/types.h>
46 #include <linux/cred.h>
47 #include <linux/dax.h>
48 #include <linux/uaccess.h>
49 #include <asm/param.h>
50 #include <asm/page.h>
51 
52 #ifndef ELF_COMPAT
53 #define ELF_COMPAT 0
54 #endif
55 
56 #ifndef user_long_t
57 #define user_long_t long
58 #endif
59 #ifndef user_siginfo_t
60 #define user_siginfo_t siginfo_t
61 #endif
62 
63 /* That's for binfmt_elf_fdpic to deal with */
64 #ifndef elf_check_fdpic
65 #define elf_check_fdpic(ex) false
66 #endif
67 
68 static int load_elf_binary(struct linux_binprm *bprm);
69 
70 #ifdef CONFIG_USELIB
71 static int load_elf_library(struct file *);
72 #else
73 #define load_elf_library NULL
74 #endif
75 
76 /*
77  * If we don't support core dumping, then supply a NULL so we
78  * don't even try.
79  */
80 #ifdef CONFIG_ELF_CORE
81 static int elf_core_dump(struct coredump_params *cprm);
82 #else
83 #define elf_core_dump	NULL
84 #endif
85 
86 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
87 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
88 #else
89 #define ELF_MIN_ALIGN	PAGE_SIZE
90 #endif
91 
92 #ifndef ELF_CORE_EFLAGS
93 #define ELF_CORE_EFLAGS	0
94 #endif
95 
96 #define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
97 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
98 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
99 
100 static struct linux_binfmt elf_format = {
101 	.module		= THIS_MODULE,
102 	.load_binary	= load_elf_binary,
103 	.load_shlib	= load_elf_library,
104 #ifdef CONFIG_COREDUMP
105 	.core_dump	= elf_core_dump,
106 	.min_coredump	= ELF_EXEC_PAGESIZE,
107 #endif
108 };
109 
110 #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
111 
112 static int set_brk(unsigned long start, unsigned long end, int prot)
113 {
114 	start = ELF_PAGEALIGN(start);
115 	end = ELF_PAGEALIGN(end);
116 	if (end > start) {
117 		/*
118 		 * Map the last of the bss segment.
119 		 * If the header is requesting these pages to be
120 		 * executable, honour that (ppc32 needs this).
121 		 */
122 		int error = vm_brk_flags(start, end - start,
123 				prot & PROT_EXEC ? VM_EXEC : 0);
124 		if (error)
125 			return error;
126 	}
127 	current->mm->start_brk = current->mm->brk = end;
128 	return 0;
129 }
130 
131 /* We need to explicitly zero any fractional pages
132    after the data section (i.e. bss).  This would
133    contain the junk from the file that should not
134    be in memory
135  */
136 static int padzero(unsigned long elf_bss)
137 {
138 	unsigned long nbyte;
139 
140 	nbyte = ELF_PAGEOFFSET(elf_bss);
141 	if (nbyte) {
142 		nbyte = ELF_MIN_ALIGN - nbyte;
143 		if (clear_user((void __user *) elf_bss, nbyte))
144 			return -EFAULT;
145 	}
146 	return 0;
147 }
148 
149 /* Let's use some macros to make this stack manipulation a little clearer */
150 #ifdef CONFIG_STACK_GROWSUP
151 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
152 #define STACK_ROUND(sp, items) \
153 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
154 #define STACK_ALLOC(sp, len) ({ \
155 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
156 	old_sp; })
157 #else
158 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
159 #define STACK_ROUND(sp, items) \
160 	(((unsigned long) (sp - items)) &~ 15UL)
161 #define STACK_ALLOC(sp, len) (sp -= len)
162 #endif
163 
164 #ifndef ELF_BASE_PLATFORM
165 /*
166  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
167  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
168  * will be copied to the user stack in the same manner as AT_PLATFORM.
169  */
170 #define ELF_BASE_PLATFORM NULL
171 #endif
172 
173 static int
174 create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
175 		unsigned long interp_load_addr,
176 		unsigned long e_entry, unsigned long phdr_addr)
177 {
178 	struct mm_struct *mm = current->mm;
179 	unsigned long p = bprm->p;
180 	int argc = bprm->argc;
181 	int envc = bprm->envc;
182 	elf_addr_t __user *sp;
183 	elf_addr_t __user *u_platform;
184 	elf_addr_t __user *u_base_platform;
185 	elf_addr_t __user *u_rand_bytes;
186 	const char *k_platform = ELF_PLATFORM;
187 	const char *k_base_platform = ELF_BASE_PLATFORM;
188 	unsigned char k_rand_bytes[16];
189 	int items;
190 	elf_addr_t *elf_info;
191 	elf_addr_t flags = 0;
192 	int ei_index;
193 	const struct cred *cred = current_cred();
194 	struct vm_area_struct *vma;
195 
196 	/*
197 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
198 	 * evictions by the processes running on the same package. One
199 	 * thing we can do is to shuffle the initial stack for them.
200 	 */
201 
202 	p = arch_align_stack(p);
203 
204 	/*
205 	 * If this architecture has a platform capability string, copy it
206 	 * to userspace.  In some cases (Sparc), this info is impossible
207 	 * for userspace to get any other way, in others (i386) it is
208 	 * merely difficult.
209 	 */
210 	u_platform = NULL;
211 	if (k_platform) {
212 		size_t len = strlen(k_platform) + 1;
213 
214 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
215 		if (copy_to_user(u_platform, k_platform, len))
216 			return -EFAULT;
217 	}
218 
219 	/*
220 	 * If this architecture has a "base" platform capability
221 	 * string, copy it to userspace.
222 	 */
223 	u_base_platform = NULL;
224 	if (k_base_platform) {
225 		size_t len = strlen(k_base_platform) + 1;
226 
227 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
228 		if (copy_to_user(u_base_platform, k_base_platform, len))
229 			return -EFAULT;
230 	}
231 
232 	/*
233 	 * Generate 16 random bytes for userspace PRNG seeding.
234 	 */
235 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
236 	u_rand_bytes = (elf_addr_t __user *)
237 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
238 	if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
239 		return -EFAULT;
240 
241 	/* Create the ELF interpreter info */
242 	elf_info = (elf_addr_t *)mm->saved_auxv;
243 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
244 #define NEW_AUX_ENT(id, val) \
245 	do { \
246 		*elf_info++ = id; \
247 		*elf_info++ = val; \
248 	} while (0)
249 
250 #ifdef ARCH_DLINFO
251 	/*
252 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
253 	 * AUXV.
254 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
255 	 * ARCH_DLINFO changes
256 	 */
257 	ARCH_DLINFO;
258 #endif
259 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
260 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
261 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
262 	NEW_AUX_ENT(AT_PHDR, phdr_addr);
263 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
264 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
265 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
266 	if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
267 		flags |= AT_FLAGS_PRESERVE_ARGV0;
268 	NEW_AUX_ENT(AT_FLAGS, flags);
269 	NEW_AUX_ENT(AT_ENTRY, e_entry);
270 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
271 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
272 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
273 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
274 	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
275 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
276 #ifdef ELF_HWCAP2
277 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
278 #endif
279 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
280 	if (k_platform) {
281 		NEW_AUX_ENT(AT_PLATFORM,
282 			    (elf_addr_t)(unsigned long)u_platform);
283 	}
284 	if (k_base_platform) {
285 		NEW_AUX_ENT(AT_BASE_PLATFORM,
286 			    (elf_addr_t)(unsigned long)u_base_platform);
287 	}
288 	if (bprm->have_execfd) {
289 		NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
290 	}
291 #undef NEW_AUX_ENT
292 	/* AT_NULL is zero; clear the rest too */
293 	memset(elf_info, 0, (char *)mm->saved_auxv +
294 			sizeof(mm->saved_auxv) - (char *)elf_info);
295 
296 	/* And advance past the AT_NULL entry.  */
297 	elf_info += 2;
298 
299 	ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
300 	sp = STACK_ADD(p, ei_index);
301 
302 	items = (argc + 1) + (envc + 1) + 1;
303 	bprm->p = STACK_ROUND(sp, items);
304 
305 	/* Point sp at the lowest address on the stack */
306 #ifdef CONFIG_STACK_GROWSUP
307 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
308 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
309 #else
310 	sp = (elf_addr_t __user *)bprm->p;
311 #endif
312 
313 
314 	/*
315 	 * Grow the stack manually; some architectures have a limit on how
316 	 * far ahead a user-space access may be in order to grow the stack.
317 	 */
318 	if (mmap_read_lock_killable(mm))
319 		return -EINTR;
320 	vma = find_extend_vma(mm, bprm->p);
321 	mmap_read_unlock(mm);
322 	if (!vma)
323 		return -EFAULT;
324 
325 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
326 	if (put_user(argc, sp++))
327 		return -EFAULT;
328 
329 	/* Populate list of argv pointers back to argv strings. */
330 	p = mm->arg_end = mm->arg_start;
331 	while (argc-- > 0) {
332 		size_t len;
333 		if (put_user((elf_addr_t)p, sp++))
334 			return -EFAULT;
335 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
336 		if (!len || len > MAX_ARG_STRLEN)
337 			return -EINVAL;
338 		p += len;
339 	}
340 	if (put_user(0, sp++))
341 		return -EFAULT;
342 	mm->arg_end = p;
343 
344 	/* Populate list of envp pointers back to envp strings. */
345 	mm->env_end = mm->env_start = p;
346 	while (envc-- > 0) {
347 		size_t len;
348 		if (put_user((elf_addr_t)p, sp++))
349 			return -EFAULT;
350 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
351 		if (!len || len > MAX_ARG_STRLEN)
352 			return -EINVAL;
353 		p += len;
354 	}
355 	if (put_user(0, sp++))
356 		return -EFAULT;
357 	mm->env_end = p;
358 
359 	/* Put the elf_info on the stack in the right place.  */
360 	if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
361 		return -EFAULT;
362 	return 0;
363 }
364 
365 static unsigned long elf_map(struct file *filep, unsigned long addr,
366 		const struct elf_phdr *eppnt, int prot, int type,
367 		unsigned long total_size)
368 {
369 	unsigned long map_addr;
370 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
371 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
372 	addr = ELF_PAGESTART(addr);
373 	size = ELF_PAGEALIGN(size);
374 
375 	/* mmap() will return -EINVAL if given a zero size, but a
376 	 * segment with zero filesize is perfectly valid */
377 	if (!size)
378 		return addr;
379 
380 	/*
381 	* total_size is the size of the ELF (interpreter) image.
382 	* The _first_ mmap needs to know the full size, otherwise
383 	* randomization might put this image into an overlapping
384 	* position with the ELF binary image. (since size < total_size)
385 	* So we first map the 'big' image - and unmap the remainder at
386 	* the end. (which unmap is needed for ELF images with holes.)
387 	*/
388 	if (total_size) {
389 		total_size = ELF_PAGEALIGN(total_size);
390 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
391 		if (!BAD_ADDR(map_addr))
392 			vm_munmap(map_addr+size, total_size-size);
393 	} else
394 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
395 
396 	if ((type & MAP_FIXED_NOREPLACE) &&
397 	    PTR_ERR((void *)map_addr) == -EEXIST)
398 		pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
399 			task_pid_nr(current), current->comm, (void *)addr);
400 
401 	return(map_addr);
402 }
403 
404 static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
405 {
406 	elf_addr_t min_addr = -1;
407 	elf_addr_t max_addr = 0;
408 	bool pt_load = false;
409 	int i;
410 
411 	for (i = 0; i < nr; i++) {
412 		if (phdr[i].p_type == PT_LOAD) {
413 			min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
414 			max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
415 			pt_load = true;
416 		}
417 	}
418 	return pt_load ? (max_addr - min_addr) : 0;
419 }
420 
421 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
422 {
423 	ssize_t rv;
424 
425 	rv = kernel_read(file, buf, len, &pos);
426 	if (unlikely(rv != len)) {
427 		return (rv < 0) ? rv : -EIO;
428 	}
429 	return 0;
430 }
431 
432 static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
433 {
434 	unsigned long alignment = 0;
435 	int i;
436 
437 	for (i = 0; i < nr; i++) {
438 		if (cmds[i].p_type == PT_LOAD) {
439 			unsigned long p_align = cmds[i].p_align;
440 
441 			/* skip non-power of two alignments as invalid */
442 			if (!is_power_of_2(p_align))
443 				continue;
444 			alignment = max(alignment, p_align);
445 		}
446 	}
447 
448 	/* ensure we align to at least one page */
449 	return ELF_PAGEALIGN(alignment);
450 }
451 
452 /**
453  * load_elf_phdrs() - load ELF program headers
454  * @elf_ex:   ELF header of the binary whose program headers should be loaded
455  * @elf_file: the opened ELF binary file
456  *
457  * Loads ELF program headers from the binary file elf_file, which has the ELF
458  * header pointed to by elf_ex, into a newly allocated array. The caller is
459  * responsible for freeing the allocated data. Returns NULL upon failure.
460  */
461 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
462 				       struct file *elf_file)
463 {
464 	struct elf_phdr *elf_phdata = NULL;
465 	int retval = -1;
466 	unsigned int size;
467 
468 	/*
469 	 * If the size of this structure has changed, then punt, since
470 	 * we will be doing the wrong thing.
471 	 */
472 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
473 		goto out;
474 
475 	/* Sanity check the number of program headers... */
476 	/* ...and their total size. */
477 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
478 	if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
479 		goto out;
480 
481 	elf_phdata = kmalloc(size, GFP_KERNEL);
482 	if (!elf_phdata)
483 		goto out;
484 
485 	/* Read in the program headers */
486 	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
487 
488 out:
489 	if (retval) {
490 		kfree(elf_phdata);
491 		elf_phdata = NULL;
492 	}
493 	return elf_phdata;
494 }
495 
496 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
497 
498 /**
499  * struct arch_elf_state - arch-specific ELF loading state
500  *
501  * This structure is used to preserve architecture specific data during
502  * the loading of an ELF file, throughout the checking of architecture
503  * specific ELF headers & through to the point where the ELF load is
504  * known to be proceeding (ie. SET_PERSONALITY).
505  *
506  * This implementation is a dummy for architectures which require no
507  * specific state.
508  */
509 struct arch_elf_state {
510 };
511 
512 #define INIT_ARCH_ELF_STATE {}
513 
514 /**
515  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
516  * @ehdr:	The main ELF header
517  * @phdr:	The program header to check
518  * @elf:	The open ELF file
519  * @is_interp:	True if the phdr is from the interpreter of the ELF being
520  *		loaded, else false.
521  * @state:	Architecture-specific state preserved throughout the process
522  *		of loading the ELF.
523  *
524  * Inspects the program header phdr to validate its correctness and/or
525  * suitability for the system. Called once per ELF program header in the
526  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
527  * interpreter.
528  *
529  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
530  *         with that return code.
531  */
532 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
533 				   struct elf_phdr *phdr,
534 				   struct file *elf, bool is_interp,
535 				   struct arch_elf_state *state)
536 {
537 	/* Dummy implementation, always proceed */
538 	return 0;
539 }
540 
541 /**
542  * arch_check_elf() - check an ELF executable
543  * @ehdr:	The main ELF header
544  * @has_interp:	True if the ELF has an interpreter, else false.
545  * @interp_ehdr: The interpreter's ELF header
546  * @state:	Architecture-specific state preserved throughout the process
547  *		of loading the ELF.
548  *
549  * Provides a final opportunity for architecture code to reject the loading
550  * of the ELF & cause an exec syscall to return an error. This is called after
551  * all program headers to be checked by arch_elf_pt_proc have been.
552  *
553  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
554  *         with that return code.
555  */
556 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
557 				 struct elfhdr *interp_ehdr,
558 				 struct arch_elf_state *state)
559 {
560 	/* Dummy implementation, always proceed */
561 	return 0;
562 }
563 
564 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
565 
566 static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
567 			    bool has_interp, bool is_interp)
568 {
569 	int prot = 0;
570 
571 	if (p_flags & PF_R)
572 		prot |= PROT_READ;
573 	if (p_flags & PF_W)
574 		prot |= PROT_WRITE;
575 	if (p_flags & PF_X)
576 		prot |= PROT_EXEC;
577 
578 	return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
579 }
580 
581 /* This is much more generalized than the library routine read function,
582    so we keep this separate.  Technically the library read function
583    is only provided so that we can read a.out libraries that have
584    an ELF header */
585 
586 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
587 		struct file *interpreter,
588 		unsigned long no_base, struct elf_phdr *interp_elf_phdata,
589 		struct arch_elf_state *arch_state)
590 {
591 	struct elf_phdr *eppnt;
592 	unsigned long load_addr = 0;
593 	int load_addr_set = 0;
594 	unsigned long last_bss = 0, elf_bss = 0;
595 	int bss_prot = 0;
596 	unsigned long error = ~0UL;
597 	unsigned long total_size;
598 	int i;
599 
600 	/* First of all, some simple consistency checks */
601 	if (interp_elf_ex->e_type != ET_EXEC &&
602 	    interp_elf_ex->e_type != ET_DYN)
603 		goto out;
604 	if (!elf_check_arch(interp_elf_ex) ||
605 	    elf_check_fdpic(interp_elf_ex))
606 		goto out;
607 	if (!interpreter->f_op->mmap)
608 		goto out;
609 
610 	total_size = total_mapping_size(interp_elf_phdata,
611 					interp_elf_ex->e_phnum);
612 	if (!total_size) {
613 		error = -EINVAL;
614 		goto out;
615 	}
616 
617 	eppnt = interp_elf_phdata;
618 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
619 		if (eppnt->p_type == PT_LOAD) {
620 			int elf_type = MAP_PRIVATE;
621 			int elf_prot = make_prot(eppnt->p_flags, arch_state,
622 						 true, true);
623 			unsigned long vaddr = 0;
624 			unsigned long k, map_addr;
625 
626 			vaddr = eppnt->p_vaddr;
627 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
628 				elf_type |= MAP_FIXED;
629 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
630 				load_addr = -vaddr;
631 
632 			map_addr = elf_map(interpreter, load_addr + vaddr,
633 					eppnt, elf_prot, elf_type, total_size);
634 			total_size = 0;
635 			error = map_addr;
636 			if (BAD_ADDR(map_addr))
637 				goto out;
638 
639 			if (!load_addr_set &&
640 			    interp_elf_ex->e_type == ET_DYN) {
641 				load_addr = map_addr - ELF_PAGESTART(vaddr);
642 				load_addr_set = 1;
643 			}
644 
645 			/*
646 			 * Check to see if the section's size will overflow the
647 			 * allowed task size. Note that p_filesz must always be
648 			 * <= p_memsize so it's only necessary to check p_memsz.
649 			 */
650 			k = load_addr + eppnt->p_vaddr;
651 			if (BAD_ADDR(k) ||
652 			    eppnt->p_filesz > eppnt->p_memsz ||
653 			    eppnt->p_memsz > TASK_SIZE ||
654 			    TASK_SIZE - eppnt->p_memsz < k) {
655 				error = -ENOMEM;
656 				goto out;
657 			}
658 
659 			/*
660 			 * Find the end of the file mapping for this phdr, and
661 			 * keep track of the largest address we see for this.
662 			 */
663 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
664 			if (k > elf_bss)
665 				elf_bss = k;
666 
667 			/*
668 			 * Do the same thing for the memory mapping - between
669 			 * elf_bss and last_bss is the bss section.
670 			 */
671 			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
672 			if (k > last_bss) {
673 				last_bss = k;
674 				bss_prot = elf_prot;
675 			}
676 		}
677 	}
678 
679 	/*
680 	 * Now fill out the bss section: first pad the last page from
681 	 * the file up to the page boundary, and zero it from elf_bss
682 	 * up to the end of the page.
683 	 */
684 	if (padzero(elf_bss)) {
685 		error = -EFAULT;
686 		goto out;
687 	}
688 	/*
689 	 * Next, align both the file and mem bss up to the page size,
690 	 * since this is where elf_bss was just zeroed up to, and where
691 	 * last_bss will end after the vm_brk_flags() below.
692 	 */
693 	elf_bss = ELF_PAGEALIGN(elf_bss);
694 	last_bss = ELF_PAGEALIGN(last_bss);
695 	/* Finally, if there is still more bss to allocate, do it. */
696 	if (last_bss > elf_bss) {
697 		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
698 				bss_prot & PROT_EXEC ? VM_EXEC : 0);
699 		if (error)
700 			goto out;
701 	}
702 
703 	error = load_addr;
704 out:
705 	return error;
706 }
707 
708 /*
709  * These are the functions used to load ELF style executables and shared
710  * libraries.  There is no binary dependent code anywhere else.
711  */
712 
713 static int parse_elf_property(const char *data, size_t *off, size_t datasz,
714 			      struct arch_elf_state *arch,
715 			      bool have_prev_type, u32 *prev_type)
716 {
717 	size_t o, step;
718 	const struct gnu_property *pr;
719 	int ret;
720 
721 	if (*off == datasz)
722 		return -ENOENT;
723 
724 	if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
725 		return -EIO;
726 	o = *off;
727 	datasz -= *off;
728 
729 	if (datasz < sizeof(*pr))
730 		return -ENOEXEC;
731 	pr = (const struct gnu_property *)(data + o);
732 	o += sizeof(*pr);
733 	datasz -= sizeof(*pr);
734 
735 	if (pr->pr_datasz > datasz)
736 		return -ENOEXEC;
737 
738 	WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
739 	step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
740 	if (step > datasz)
741 		return -ENOEXEC;
742 
743 	/* Properties are supposed to be unique and sorted on pr_type: */
744 	if (have_prev_type && pr->pr_type <= *prev_type)
745 		return -ENOEXEC;
746 	*prev_type = pr->pr_type;
747 
748 	ret = arch_parse_elf_property(pr->pr_type, data + o,
749 				      pr->pr_datasz, ELF_COMPAT, arch);
750 	if (ret)
751 		return ret;
752 
753 	*off = o + step;
754 	return 0;
755 }
756 
757 #define NOTE_DATA_SZ SZ_1K
758 #define GNU_PROPERTY_TYPE_0_NAME "GNU"
759 #define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
760 
761 static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
762 				struct arch_elf_state *arch)
763 {
764 	union {
765 		struct elf_note nhdr;
766 		char data[NOTE_DATA_SZ];
767 	} note;
768 	loff_t pos;
769 	ssize_t n;
770 	size_t off, datasz;
771 	int ret;
772 	bool have_prev_type;
773 	u32 prev_type;
774 
775 	if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
776 		return 0;
777 
778 	/* load_elf_binary() shouldn't call us unless this is true... */
779 	if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
780 		return -ENOEXEC;
781 
782 	/* If the properties are crazy large, that's too bad (for now): */
783 	if (phdr->p_filesz > sizeof(note))
784 		return -ENOEXEC;
785 
786 	pos = phdr->p_offset;
787 	n = kernel_read(f, &note, phdr->p_filesz, &pos);
788 
789 	BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
790 	if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
791 		return -EIO;
792 
793 	if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
794 	    note.nhdr.n_namesz != NOTE_NAME_SZ ||
795 	    strncmp(note.data + sizeof(note.nhdr),
796 		    GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
797 		return -ENOEXEC;
798 
799 	off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
800 		       ELF_GNU_PROPERTY_ALIGN);
801 	if (off > n)
802 		return -ENOEXEC;
803 
804 	if (note.nhdr.n_descsz > n - off)
805 		return -ENOEXEC;
806 	datasz = off + note.nhdr.n_descsz;
807 
808 	have_prev_type = false;
809 	do {
810 		ret = parse_elf_property(note.data, &off, datasz, arch,
811 					 have_prev_type, &prev_type);
812 		have_prev_type = true;
813 	} while (!ret);
814 
815 	return ret == -ENOENT ? 0 : ret;
816 }
817 
818 static int load_elf_binary(struct linux_binprm *bprm)
819 {
820 	struct file *interpreter = NULL; /* to shut gcc up */
821 	unsigned long load_bias = 0, phdr_addr = 0;
822 	int first_pt_load = 1;
823 	unsigned long error;
824 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
825 	struct elf_phdr *elf_property_phdata = NULL;
826 	unsigned long elf_bss, elf_brk;
827 	int bss_prot = 0;
828 	int retval, i;
829 	unsigned long elf_entry;
830 	unsigned long e_entry;
831 	unsigned long interp_load_addr = 0;
832 	unsigned long start_code, end_code, start_data, end_data;
833 	unsigned long reloc_func_desc __maybe_unused = 0;
834 	int executable_stack = EXSTACK_DEFAULT;
835 	struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
836 	struct elfhdr *interp_elf_ex = NULL;
837 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
838 	struct mm_struct *mm;
839 	struct pt_regs *regs;
840 
841 	retval = -ENOEXEC;
842 	/* First of all, some simple consistency checks */
843 	if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
844 		goto out;
845 
846 	if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
847 		goto out;
848 	if (!elf_check_arch(elf_ex))
849 		goto out;
850 	if (elf_check_fdpic(elf_ex))
851 		goto out;
852 	if (!bprm->file->f_op->mmap)
853 		goto out;
854 
855 	elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
856 	if (!elf_phdata)
857 		goto out;
858 
859 	elf_ppnt = elf_phdata;
860 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
861 		char *elf_interpreter;
862 
863 		if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
864 			elf_property_phdata = elf_ppnt;
865 			continue;
866 		}
867 
868 		if (elf_ppnt->p_type != PT_INTERP)
869 			continue;
870 
871 		/*
872 		 * This is the program interpreter used for shared libraries -
873 		 * for now assume that this is an a.out format binary.
874 		 */
875 		retval = -ENOEXEC;
876 		if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
877 			goto out_free_ph;
878 
879 		retval = -ENOMEM;
880 		elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
881 		if (!elf_interpreter)
882 			goto out_free_ph;
883 
884 		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
885 				  elf_ppnt->p_offset);
886 		if (retval < 0)
887 			goto out_free_interp;
888 		/* make sure path is NULL terminated */
889 		retval = -ENOEXEC;
890 		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
891 			goto out_free_interp;
892 
893 		interpreter = open_exec(elf_interpreter);
894 		kfree(elf_interpreter);
895 		retval = PTR_ERR(interpreter);
896 		if (IS_ERR(interpreter))
897 			goto out_free_ph;
898 
899 		/*
900 		 * If the binary is not readable then enforce mm->dumpable = 0
901 		 * regardless of the interpreter's permissions.
902 		 */
903 		would_dump(bprm, interpreter);
904 
905 		interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
906 		if (!interp_elf_ex) {
907 			retval = -ENOMEM;
908 			goto out_free_file;
909 		}
910 
911 		/* Get the exec headers */
912 		retval = elf_read(interpreter, interp_elf_ex,
913 				  sizeof(*interp_elf_ex), 0);
914 		if (retval < 0)
915 			goto out_free_dentry;
916 
917 		break;
918 
919 out_free_interp:
920 		kfree(elf_interpreter);
921 		goto out_free_ph;
922 	}
923 
924 	elf_ppnt = elf_phdata;
925 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
926 		switch (elf_ppnt->p_type) {
927 		case PT_GNU_STACK:
928 			if (elf_ppnt->p_flags & PF_X)
929 				executable_stack = EXSTACK_ENABLE_X;
930 			else
931 				executable_stack = EXSTACK_DISABLE_X;
932 			break;
933 
934 		case PT_LOPROC ... PT_HIPROC:
935 			retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
936 						  bprm->file, false,
937 						  &arch_state);
938 			if (retval)
939 				goto out_free_dentry;
940 			break;
941 		}
942 
943 	/* Some simple consistency checks for the interpreter */
944 	if (interpreter) {
945 		retval = -ELIBBAD;
946 		/* Not an ELF interpreter */
947 		if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
948 			goto out_free_dentry;
949 		/* Verify the interpreter has a valid arch */
950 		if (!elf_check_arch(interp_elf_ex) ||
951 		    elf_check_fdpic(interp_elf_ex))
952 			goto out_free_dentry;
953 
954 		/* Load the interpreter program headers */
955 		interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
956 						   interpreter);
957 		if (!interp_elf_phdata)
958 			goto out_free_dentry;
959 
960 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
961 		elf_property_phdata = NULL;
962 		elf_ppnt = interp_elf_phdata;
963 		for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
964 			switch (elf_ppnt->p_type) {
965 			case PT_GNU_PROPERTY:
966 				elf_property_phdata = elf_ppnt;
967 				break;
968 
969 			case PT_LOPROC ... PT_HIPROC:
970 				retval = arch_elf_pt_proc(interp_elf_ex,
971 							  elf_ppnt, interpreter,
972 							  true, &arch_state);
973 				if (retval)
974 					goto out_free_dentry;
975 				break;
976 			}
977 	}
978 
979 	retval = parse_elf_properties(interpreter ?: bprm->file,
980 				      elf_property_phdata, &arch_state);
981 	if (retval)
982 		goto out_free_dentry;
983 
984 	/*
985 	 * Allow arch code to reject the ELF at this point, whilst it's
986 	 * still possible to return an error to the code that invoked
987 	 * the exec syscall.
988 	 */
989 	retval = arch_check_elf(elf_ex,
990 				!!interpreter, interp_elf_ex,
991 				&arch_state);
992 	if (retval)
993 		goto out_free_dentry;
994 
995 	/* Flush all traces of the currently running executable */
996 	retval = begin_new_exec(bprm);
997 	if (retval)
998 		goto out_free_dentry;
999 
1000 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
1001 	   may depend on the personality.  */
1002 	SET_PERSONALITY2(*elf_ex, &arch_state);
1003 	if (elf_read_implies_exec(*elf_ex, executable_stack))
1004 		current->personality |= READ_IMPLIES_EXEC;
1005 
1006 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1007 		current->flags |= PF_RANDOMIZE;
1008 
1009 	setup_new_exec(bprm);
1010 
1011 	/* Do this so that we can load the interpreter, if need be.  We will
1012 	   change some of these later */
1013 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1014 				 executable_stack);
1015 	if (retval < 0)
1016 		goto out_free_dentry;
1017 
1018 	elf_bss = 0;
1019 	elf_brk = 0;
1020 
1021 	start_code = ~0UL;
1022 	end_code = 0;
1023 	start_data = 0;
1024 	end_data = 0;
1025 
1026 	/* Now we do a little grungy work by mmapping the ELF image into
1027 	   the correct location in memory. */
1028 	for(i = 0, elf_ppnt = elf_phdata;
1029 	    i < elf_ex->e_phnum; i++, elf_ppnt++) {
1030 		int elf_prot, elf_flags;
1031 		unsigned long k, vaddr;
1032 		unsigned long total_size = 0;
1033 		unsigned long alignment;
1034 
1035 		if (elf_ppnt->p_type != PT_LOAD)
1036 			continue;
1037 
1038 		if (unlikely (elf_brk > elf_bss)) {
1039 			unsigned long nbyte;
1040 
1041 			/* There was a PT_LOAD segment with p_memsz > p_filesz
1042 			   before this one. Map anonymous pages, if needed,
1043 			   and clear the area.  */
1044 			retval = set_brk(elf_bss + load_bias,
1045 					 elf_brk + load_bias,
1046 					 bss_prot);
1047 			if (retval)
1048 				goto out_free_dentry;
1049 			nbyte = ELF_PAGEOFFSET(elf_bss);
1050 			if (nbyte) {
1051 				nbyte = ELF_MIN_ALIGN - nbyte;
1052 				if (nbyte > elf_brk - elf_bss)
1053 					nbyte = elf_brk - elf_bss;
1054 				if (clear_user((void __user *)elf_bss +
1055 							load_bias, nbyte)) {
1056 					/*
1057 					 * This bss-zeroing can fail if the ELF
1058 					 * file specifies odd protections. So
1059 					 * we don't check the return value
1060 					 */
1061 				}
1062 			}
1063 		}
1064 
1065 		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1066 				     !!interpreter, false);
1067 
1068 		elf_flags = MAP_PRIVATE;
1069 
1070 		vaddr = elf_ppnt->p_vaddr;
1071 		/*
1072 		 * The first time through the loop, first_pt_load is true:
1073 		 * layout will be calculated. Once set, use MAP_FIXED since
1074 		 * we know we've already safely mapped the entire region with
1075 		 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
1076 		 */
1077 		if (!first_pt_load) {
1078 			elf_flags |= MAP_FIXED;
1079 		} else if (elf_ex->e_type == ET_EXEC) {
1080 			/*
1081 			 * This logic is run once for the first LOAD Program
1082 			 * Header for ET_EXEC binaries. No special handling
1083 			 * is needed.
1084 			 */
1085 			elf_flags |= MAP_FIXED_NOREPLACE;
1086 		} else if (elf_ex->e_type == ET_DYN) {
1087 			/*
1088 			 * This logic is run once for the first LOAD Program
1089 			 * Header for ET_DYN binaries to calculate the
1090 			 * randomization (load_bias) for all the LOAD
1091 			 * Program Headers.
1092 			 *
1093 			 * There are effectively two types of ET_DYN
1094 			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
1095 			 * and loaders (ET_DYN without INTERP, since they
1096 			 * _are_ the ELF interpreter). The loaders must
1097 			 * be loaded away from programs since the program
1098 			 * may otherwise collide with the loader (especially
1099 			 * for ET_EXEC which does not have a randomized
1100 			 * position). For example to handle invocations of
1101 			 * "./ld.so someprog" to test out a new version of
1102 			 * the loader, the subsequent program that the
1103 			 * loader loads must avoid the loader itself, so
1104 			 * they cannot share the same load range. Sufficient
1105 			 * room for the brk must be allocated with the
1106 			 * loader as well, since brk must be available with
1107 			 * the loader.
1108 			 *
1109 			 * Therefore, programs are loaded offset from
1110 			 * ELF_ET_DYN_BASE and loaders are loaded into the
1111 			 * independently randomized mmap region (0 load_bias
1112 			 * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
1113 			 */
1114 			if (interpreter) {
1115 				load_bias = ELF_ET_DYN_BASE;
1116 				if (current->flags & PF_RANDOMIZE)
1117 					load_bias += arch_mmap_rnd();
1118 				alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1119 				if (alignment)
1120 					load_bias &= ~(alignment - 1);
1121 				elf_flags |= MAP_FIXED_NOREPLACE;
1122 			} else
1123 				load_bias = 0;
1124 
1125 			/*
1126 			 * Since load_bias is used for all subsequent loading
1127 			 * calculations, we must lower it by the first vaddr
1128 			 * so that the remaining calculations based on the
1129 			 * ELF vaddrs will be correctly offset. The result
1130 			 * is then page aligned.
1131 			 */
1132 			load_bias = ELF_PAGESTART(load_bias - vaddr);
1133 
1134 			/*
1135 			 * Calculate the entire size of the ELF mapping
1136 			 * (total_size), used for the initial mapping,
1137 			 * due to load_addr_set which is set to true later
1138 			 * once the initial mapping is performed.
1139 			 *
1140 			 * Note that this is only sensible when the LOAD
1141 			 * segments are contiguous (or overlapping). If
1142 			 * used for LOADs that are far apart, this would
1143 			 * cause the holes between LOADs to be mapped,
1144 			 * running the risk of having the mapping fail,
1145 			 * as it would be larger than the ELF file itself.
1146 			 *
1147 			 * As a result, only ET_DYN does this, since
1148 			 * some ET_EXEC (e.g. ia64) may have large virtual
1149 			 * memory holes between LOADs.
1150 			 *
1151 			 */
1152 			total_size = total_mapping_size(elf_phdata,
1153 							elf_ex->e_phnum);
1154 			if (!total_size) {
1155 				retval = -EINVAL;
1156 				goto out_free_dentry;
1157 			}
1158 		}
1159 
1160 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1161 				elf_prot, elf_flags, total_size);
1162 		if (BAD_ADDR(error)) {
1163 			retval = IS_ERR_VALUE(error) ?
1164 				PTR_ERR((void*)error) : -EINVAL;
1165 			goto out_free_dentry;
1166 		}
1167 
1168 		if (first_pt_load) {
1169 			first_pt_load = 0;
1170 			if (elf_ex->e_type == ET_DYN) {
1171 				load_bias += error -
1172 				             ELF_PAGESTART(load_bias + vaddr);
1173 				reloc_func_desc = load_bias;
1174 			}
1175 		}
1176 
1177 		/*
1178 		 * Figure out which segment in the file contains the Program
1179 		 * Header table, and map to the associated memory address.
1180 		 */
1181 		if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
1182 		    elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
1183 			phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
1184 				    elf_ppnt->p_vaddr;
1185 		}
1186 
1187 		k = elf_ppnt->p_vaddr;
1188 		if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1189 			start_code = k;
1190 		if (start_data < k)
1191 			start_data = k;
1192 
1193 		/*
1194 		 * Check to see if the section's size will overflow the
1195 		 * allowed task size. Note that p_filesz must always be
1196 		 * <= p_memsz so it is only necessary to check p_memsz.
1197 		 */
1198 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1199 		    elf_ppnt->p_memsz > TASK_SIZE ||
1200 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1201 			/* set_brk can never work. Avoid overflows. */
1202 			retval = -EINVAL;
1203 			goto out_free_dentry;
1204 		}
1205 
1206 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1207 
1208 		if (k > elf_bss)
1209 			elf_bss = k;
1210 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1211 			end_code = k;
1212 		if (end_data < k)
1213 			end_data = k;
1214 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1215 		if (k > elf_brk) {
1216 			bss_prot = elf_prot;
1217 			elf_brk = k;
1218 		}
1219 	}
1220 
1221 	e_entry = elf_ex->e_entry + load_bias;
1222 	phdr_addr += load_bias;
1223 	elf_bss += load_bias;
1224 	elf_brk += load_bias;
1225 	start_code += load_bias;
1226 	end_code += load_bias;
1227 	start_data += load_bias;
1228 	end_data += load_bias;
1229 
1230 	/* Calling set_brk effectively mmaps the pages that we need
1231 	 * for the bss and break sections.  We must do this before
1232 	 * mapping in the interpreter, to make sure it doesn't wind
1233 	 * up getting placed where the bss needs to go.
1234 	 */
1235 	retval = set_brk(elf_bss, elf_brk, bss_prot);
1236 	if (retval)
1237 		goto out_free_dentry;
1238 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1239 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1240 		goto out_free_dentry;
1241 	}
1242 
1243 	if (interpreter) {
1244 		elf_entry = load_elf_interp(interp_elf_ex,
1245 					    interpreter,
1246 					    load_bias, interp_elf_phdata,
1247 					    &arch_state);
1248 		if (!IS_ERR_VALUE(elf_entry)) {
1249 			/*
1250 			 * load_elf_interp() returns relocation
1251 			 * adjustment
1252 			 */
1253 			interp_load_addr = elf_entry;
1254 			elf_entry += interp_elf_ex->e_entry;
1255 		}
1256 		if (BAD_ADDR(elf_entry)) {
1257 			retval = IS_ERR_VALUE(elf_entry) ?
1258 					(int)elf_entry : -EINVAL;
1259 			goto out_free_dentry;
1260 		}
1261 		reloc_func_desc = interp_load_addr;
1262 
1263 		allow_write_access(interpreter);
1264 		fput(interpreter);
1265 
1266 		kfree(interp_elf_ex);
1267 		kfree(interp_elf_phdata);
1268 	} else {
1269 		elf_entry = e_entry;
1270 		if (BAD_ADDR(elf_entry)) {
1271 			retval = -EINVAL;
1272 			goto out_free_dentry;
1273 		}
1274 	}
1275 
1276 	kfree(elf_phdata);
1277 
1278 	set_binfmt(&elf_format);
1279 
1280 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1281 	retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1282 	if (retval < 0)
1283 		goto out;
1284 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1285 
1286 	retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
1287 				   e_entry, phdr_addr);
1288 	if (retval < 0)
1289 		goto out;
1290 
1291 	mm = current->mm;
1292 	mm->end_code = end_code;
1293 	mm->start_code = start_code;
1294 	mm->start_data = start_data;
1295 	mm->end_data = end_data;
1296 	mm->start_stack = bprm->p;
1297 
1298 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1299 		/*
1300 		 * For architectures with ELF randomization, when executing
1301 		 * a loader directly (i.e. no interpreter listed in ELF
1302 		 * headers), move the brk area out of the mmap region
1303 		 * (since it grows up, and may collide early with the stack
1304 		 * growing down), and into the unused ELF_ET_DYN_BASE region.
1305 		 */
1306 		if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1307 		    elf_ex->e_type == ET_DYN && !interpreter) {
1308 			mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1309 		}
1310 
1311 		mm->brk = mm->start_brk = arch_randomize_brk(mm);
1312 #ifdef compat_brk_randomized
1313 		current->brk_randomized = 1;
1314 #endif
1315 	}
1316 
1317 	if (current->personality & MMAP_PAGE_ZERO) {
1318 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1319 		   and some applications "depend" upon this behavior.
1320 		   Since we do not have the power to recompile these, we
1321 		   emulate the SVr4 behavior. Sigh. */
1322 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1323 				MAP_FIXED | MAP_PRIVATE, 0);
1324 	}
1325 
1326 	regs = current_pt_regs();
1327 #ifdef ELF_PLAT_INIT
1328 	/*
1329 	 * The ABI may specify that certain registers be set up in special
1330 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1331 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1332 	 * that the e_entry field is the address of the function descriptor
1333 	 * for the startup routine, rather than the address of the startup
1334 	 * routine itself.  This macro performs whatever initialization to
1335 	 * the regs structure is required as well as any relocations to the
1336 	 * function descriptor entries when executing dynamically links apps.
1337 	 */
1338 	ELF_PLAT_INIT(regs, reloc_func_desc);
1339 #endif
1340 
1341 	finalize_exec(bprm);
1342 	START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1343 	retval = 0;
1344 out:
1345 	return retval;
1346 
1347 	/* error cleanup */
1348 out_free_dentry:
1349 	kfree(interp_elf_ex);
1350 	kfree(interp_elf_phdata);
1351 out_free_file:
1352 	allow_write_access(interpreter);
1353 	if (interpreter)
1354 		fput(interpreter);
1355 out_free_ph:
1356 	kfree(elf_phdata);
1357 	goto out;
1358 }
1359 
1360 #ifdef CONFIG_USELIB
1361 /* This is really simpleminded and specialized - we are loading an
1362    a.out library that is given an ELF header. */
1363 static int load_elf_library(struct file *file)
1364 {
1365 	struct elf_phdr *elf_phdata;
1366 	struct elf_phdr *eppnt;
1367 	unsigned long elf_bss, bss, len;
1368 	int retval, error, i, j;
1369 	struct elfhdr elf_ex;
1370 
1371 	error = -ENOEXEC;
1372 	retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1373 	if (retval < 0)
1374 		goto out;
1375 
1376 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1377 		goto out;
1378 
1379 	/* First of all, some simple consistency checks */
1380 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1381 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1382 		goto out;
1383 	if (elf_check_fdpic(&elf_ex))
1384 		goto out;
1385 
1386 	/* Now read in all of the header information */
1387 
1388 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1389 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1390 
1391 	error = -ENOMEM;
1392 	elf_phdata = kmalloc(j, GFP_KERNEL);
1393 	if (!elf_phdata)
1394 		goto out;
1395 
1396 	eppnt = elf_phdata;
1397 	error = -ENOEXEC;
1398 	retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1399 	if (retval < 0)
1400 		goto out_free_ph;
1401 
1402 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1403 		if ((eppnt + i)->p_type == PT_LOAD)
1404 			j++;
1405 	if (j != 1)
1406 		goto out_free_ph;
1407 
1408 	while (eppnt->p_type != PT_LOAD)
1409 		eppnt++;
1410 
1411 	/* Now use mmap to map the library into memory. */
1412 	error = vm_mmap(file,
1413 			ELF_PAGESTART(eppnt->p_vaddr),
1414 			(eppnt->p_filesz +
1415 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1416 			PROT_READ | PROT_WRITE | PROT_EXEC,
1417 			MAP_FIXED_NOREPLACE | MAP_PRIVATE,
1418 			(eppnt->p_offset -
1419 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1420 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1421 		goto out_free_ph;
1422 
1423 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1424 	if (padzero(elf_bss)) {
1425 		error = -EFAULT;
1426 		goto out_free_ph;
1427 	}
1428 
1429 	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1430 	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1431 	if (bss > len) {
1432 		error = vm_brk(len, bss - len);
1433 		if (error)
1434 			goto out_free_ph;
1435 	}
1436 	error = 0;
1437 
1438 out_free_ph:
1439 	kfree(elf_phdata);
1440 out:
1441 	return error;
1442 }
1443 #endif /* #ifdef CONFIG_USELIB */
1444 
1445 #ifdef CONFIG_ELF_CORE
1446 /*
1447  * ELF core dumper
1448  *
1449  * Modelled on fs/exec.c:aout_core_dump()
1450  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1451  */
1452 
1453 /* An ELF note in memory */
1454 struct memelfnote
1455 {
1456 	const char *name;
1457 	int type;
1458 	unsigned int datasz;
1459 	void *data;
1460 };
1461 
1462 static int notesize(struct memelfnote *en)
1463 {
1464 	int sz;
1465 
1466 	sz = sizeof(struct elf_note);
1467 	sz += roundup(strlen(en->name) + 1, 4);
1468 	sz += roundup(en->datasz, 4);
1469 
1470 	return sz;
1471 }
1472 
1473 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1474 {
1475 	struct elf_note en;
1476 	en.n_namesz = strlen(men->name) + 1;
1477 	en.n_descsz = men->datasz;
1478 	en.n_type = men->type;
1479 
1480 	return dump_emit(cprm, &en, sizeof(en)) &&
1481 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1482 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1483 }
1484 
1485 static void fill_elf_header(struct elfhdr *elf, int segs,
1486 			    u16 machine, u32 flags)
1487 {
1488 	memset(elf, 0, sizeof(*elf));
1489 
1490 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1491 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1492 	elf->e_ident[EI_DATA] = ELF_DATA;
1493 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1494 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1495 
1496 	elf->e_type = ET_CORE;
1497 	elf->e_machine = machine;
1498 	elf->e_version = EV_CURRENT;
1499 	elf->e_phoff = sizeof(struct elfhdr);
1500 	elf->e_flags = flags;
1501 	elf->e_ehsize = sizeof(struct elfhdr);
1502 	elf->e_phentsize = sizeof(struct elf_phdr);
1503 	elf->e_phnum = segs;
1504 }
1505 
1506 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1507 {
1508 	phdr->p_type = PT_NOTE;
1509 	phdr->p_offset = offset;
1510 	phdr->p_vaddr = 0;
1511 	phdr->p_paddr = 0;
1512 	phdr->p_filesz = sz;
1513 	phdr->p_memsz = 0;
1514 	phdr->p_flags = 0;
1515 	phdr->p_align = 0;
1516 }
1517 
1518 static void fill_note(struct memelfnote *note, const char *name, int type,
1519 		unsigned int sz, void *data)
1520 {
1521 	note->name = name;
1522 	note->type = type;
1523 	note->datasz = sz;
1524 	note->data = data;
1525 }
1526 
1527 /*
1528  * fill up all the fields in prstatus from the given task struct, except
1529  * registers which need to be filled up separately.
1530  */
1531 static void fill_prstatus(struct elf_prstatus_common *prstatus,
1532 		struct task_struct *p, long signr)
1533 {
1534 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1535 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1536 	prstatus->pr_sighold = p->blocked.sig[0];
1537 	rcu_read_lock();
1538 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1539 	rcu_read_unlock();
1540 	prstatus->pr_pid = task_pid_vnr(p);
1541 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1542 	prstatus->pr_sid = task_session_vnr(p);
1543 	if (thread_group_leader(p)) {
1544 		struct task_cputime cputime;
1545 
1546 		/*
1547 		 * This is the record for the group leader.  It shows the
1548 		 * group-wide total, not its individual thread total.
1549 		 */
1550 		thread_group_cputime(p, &cputime);
1551 		prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1552 		prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1553 	} else {
1554 		u64 utime, stime;
1555 
1556 		task_cputime(p, &utime, &stime);
1557 		prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1558 		prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1559 	}
1560 
1561 	prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1562 	prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1563 }
1564 
1565 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1566 		       struct mm_struct *mm)
1567 {
1568 	const struct cred *cred;
1569 	unsigned int i, len;
1570 	unsigned int state;
1571 
1572 	/* first copy the parameters from user space */
1573 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1574 
1575 	len = mm->arg_end - mm->arg_start;
1576 	if (len >= ELF_PRARGSZ)
1577 		len = ELF_PRARGSZ-1;
1578 	if (copy_from_user(&psinfo->pr_psargs,
1579 		           (const char __user *)mm->arg_start, len))
1580 		return -EFAULT;
1581 	for(i = 0; i < len; i++)
1582 		if (psinfo->pr_psargs[i] == 0)
1583 			psinfo->pr_psargs[i] = ' ';
1584 	psinfo->pr_psargs[len] = 0;
1585 
1586 	rcu_read_lock();
1587 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1588 	rcu_read_unlock();
1589 	psinfo->pr_pid = task_pid_vnr(p);
1590 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1591 	psinfo->pr_sid = task_session_vnr(p);
1592 
1593 	state = READ_ONCE(p->__state);
1594 	i = state ? ffz(~state) + 1 : 0;
1595 	psinfo->pr_state = i;
1596 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1597 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1598 	psinfo->pr_nice = task_nice(p);
1599 	psinfo->pr_flag = p->flags;
1600 	rcu_read_lock();
1601 	cred = __task_cred(p);
1602 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1603 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1604 	rcu_read_unlock();
1605 	get_task_comm(psinfo->pr_fname, p);
1606 
1607 	return 0;
1608 }
1609 
1610 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1611 {
1612 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1613 	int i = 0;
1614 	do
1615 		i += 2;
1616 	while (auxv[i - 2] != AT_NULL);
1617 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1618 }
1619 
1620 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1621 		const kernel_siginfo_t *siginfo)
1622 {
1623 	copy_siginfo_to_external(csigdata, siginfo);
1624 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1625 }
1626 
1627 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1628 /*
1629  * Format of NT_FILE note:
1630  *
1631  * long count     -- how many files are mapped
1632  * long page_size -- units for file_ofs
1633  * array of [COUNT] elements of
1634  *   long start
1635  *   long end
1636  *   long file_ofs
1637  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1638  */
1639 static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
1640 {
1641 	unsigned count, size, names_ofs, remaining, n;
1642 	user_long_t *data;
1643 	user_long_t *start_end_ofs;
1644 	char *name_base, *name_curpos;
1645 	int i;
1646 
1647 	/* *Estimated* file count and total data size needed */
1648 	count = cprm->vma_count;
1649 	if (count > UINT_MAX / 64)
1650 		return -EINVAL;
1651 	size = count * 64;
1652 
1653 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1654  alloc:
1655 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1656 		return -EINVAL;
1657 	size = round_up(size, PAGE_SIZE);
1658 	/*
1659 	 * "size" can be 0 here legitimately.
1660 	 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1661 	 */
1662 	data = kvmalloc(size, GFP_KERNEL);
1663 	if (ZERO_OR_NULL_PTR(data))
1664 		return -ENOMEM;
1665 
1666 	start_end_ofs = data + 2;
1667 	name_base = name_curpos = ((char *)data) + names_ofs;
1668 	remaining = size - names_ofs;
1669 	count = 0;
1670 	for (i = 0; i < cprm->vma_count; i++) {
1671 		struct core_vma_metadata *m = &cprm->vma_meta[i];
1672 		struct file *file;
1673 		const char *filename;
1674 
1675 		file = m->file;
1676 		if (!file)
1677 			continue;
1678 		filename = file_path(file, name_curpos, remaining);
1679 		if (IS_ERR(filename)) {
1680 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1681 				kvfree(data);
1682 				size = size * 5 / 4;
1683 				goto alloc;
1684 			}
1685 			continue;
1686 		}
1687 
1688 		/* file_path() fills at the end, move name down */
1689 		/* n = strlen(filename) + 1: */
1690 		n = (name_curpos + remaining) - filename;
1691 		remaining = filename - name_curpos;
1692 		memmove(name_curpos, filename, n);
1693 		name_curpos += n;
1694 
1695 		*start_end_ofs++ = m->start;
1696 		*start_end_ofs++ = m->end;
1697 		*start_end_ofs++ = m->pgoff;
1698 		count++;
1699 	}
1700 
1701 	/* Now we know exact count of files, can store it */
1702 	data[0] = count;
1703 	data[1] = PAGE_SIZE;
1704 	/*
1705 	 * Count usually is less than mm->map_count,
1706 	 * we need to move filenames down.
1707 	 */
1708 	n = cprm->vma_count - count;
1709 	if (n != 0) {
1710 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1711 		memmove(name_base - shift_bytes, name_base,
1712 			name_curpos - name_base);
1713 		name_curpos -= shift_bytes;
1714 	}
1715 
1716 	size = name_curpos - (char *)data;
1717 	fill_note(note, "CORE", NT_FILE, size, data);
1718 	return 0;
1719 }
1720 
1721 #ifdef CORE_DUMP_USE_REGSET
1722 #include <linux/regset.h>
1723 
1724 struct elf_thread_core_info {
1725 	struct elf_thread_core_info *next;
1726 	struct task_struct *task;
1727 	struct elf_prstatus prstatus;
1728 	struct memelfnote notes[];
1729 };
1730 
1731 struct elf_note_info {
1732 	struct elf_thread_core_info *thread;
1733 	struct memelfnote psinfo;
1734 	struct memelfnote signote;
1735 	struct memelfnote auxv;
1736 	struct memelfnote files;
1737 	user_siginfo_t csigdata;
1738 	size_t size;
1739 	int thread_notes;
1740 };
1741 
1742 /*
1743  * When a regset has a writeback hook, we call it on each thread before
1744  * dumping user memory.  On register window machines, this makes sure the
1745  * user memory backing the register data is up to date before we read it.
1746  */
1747 static void do_thread_regset_writeback(struct task_struct *task,
1748 				       const struct user_regset *regset)
1749 {
1750 	if (regset->writeback)
1751 		regset->writeback(task, regset, 1);
1752 }
1753 
1754 #ifndef PRSTATUS_SIZE
1755 #define PRSTATUS_SIZE sizeof(struct elf_prstatus)
1756 #endif
1757 
1758 #ifndef SET_PR_FPVALID
1759 #define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
1760 #endif
1761 
1762 static int fill_thread_core_info(struct elf_thread_core_info *t,
1763 				 const struct user_regset_view *view,
1764 				 long signr, struct elf_note_info *info)
1765 {
1766 	unsigned int note_iter, view_iter;
1767 
1768 	/*
1769 	 * NT_PRSTATUS is the one special case, because the regset data
1770 	 * goes into the pr_reg field inside the note contents, rather
1771 	 * than being the whole note contents.  We fill the reset in here.
1772 	 * We assume that regset 0 is NT_PRSTATUS.
1773 	 */
1774 	fill_prstatus(&t->prstatus.common, t->task, signr);
1775 	regset_get(t->task, &view->regsets[0],
1776 		   sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1777 
1778 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1779 		  PRSTATUS_SIZE, &t->prstatus);
1780 	info->size += notesize(&t->notes[0]);
1781 
1782 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1783 
1784 	/*
1785 	 * Each other regset might generate a note too.  For each regset
1786 	 * that has no core_note_type or is inactive, skip it.
1787 	 */
1788 	note_iter = 1;
1789 	for (view_iter = 1; view_iter < view->n; ++view_iter) {
1790 		const struct user_regset *regset = &view->regsets[view_iter];
1791 		int note_type = regset->core_note_type;
1792 		bool is_fpreg = note_type == NT_PRFPREG;
1793 		void *data;
1794 		int ret;
1795 
1796 		do_thread_regset_writeback(t->task, regset);
1797 		if (!note_type) // not for coredumps
1798 			continue;
1799 		if (regset->active && regset->active(t->task, regset) <= 0)
1800 			continue;
1801 
1802 		ret = regset_get_alloc(t->task, regset, ~0U, &data);
1803 		if (ret < 0)
1804 			continue;
1805 
1806 		if (WARN_ON_ONCE(note_iter >= info->thread_notes))
1807 			break;
1808 
1809 		if (is_fpreg)
1810 			SET_PR_FPVALID(&t->prstatus);
1811 
1812 		fill_note(&t->notes[note_iter], is_fpreg ? "CORE" : "LINUX",
1813 			  note_type, ret, data);
1814 
1815 		info->size += notesize(&t->notes[note_iter]);
1816 		note_iter++;
1817 	}
1818 
1819 	return 1;
1820 }
1821 
1822 static int fill_note_info(struct elfhdr *elf, int phdrs,
1823 			  struct elf_note_info *info,
1824 			  struct coredump_params *cprm)
1825 {
1826 	struct task_struct *dump_task = current;
1827 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1828 	struct elf_thread_core_info *t;
1829 	struct elf_prpsinfo *psinfo;
1830 	struct core_thread *ct;
1831 	unsigned int i;
1832 
1833 	info->size = 0;
1834 	info->thread = NULL;
1835 
1836 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1837 	if (psinfo == NULL) {
1838 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1839 		return 0;
1840 	}
1841 
1842 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1843 
1844 	/*
1845 	 * Figure out how many notes we're going to need for each thread.
1846 	 */
1847 	info->thread_notes = 0;
1848 	for (i = 0; i < view->n; ++i)
1849 		if (view->regsets[i].core_note_type != 0)
1850 			++info->thread_notes;
1851 
1852 	/*
1853 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1854 	 * since it is our one special case.
1855 	 */
1856 	if (unlikely(info->thread_notes == 0) ||
1857 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1858 		WARN_ON(1);
1859 		return 0;
1860 	}
1861 
1862 	/*
1863 	 * Initialize the ELF file header.
1864 	 */
1865 	fill_elf_header(elf, phdrs,
1866 			view->e_machine, view->e_flags);
1867 
1868 	/*
1869 	 * Allocate a structure for each thread.
1870 	 */
1871 	for (ct = &dump_task->signal->core_state->dumper; ct; ct = ct->next) {
1872 		t = kzalloc(offsetof(struct elf_thread_core_info,
1873 				     notes[info->thread_notes]),
1874 			    GFP_KERNEL);
1875 		if (unlikely(!t))
1876 			return 0;
1877 
1878 		t->task = ct->task;
1879 		if (ct->task == dump_task || !info->thread) {
1880 			t->next = info->thread;
1881 			info->thread = t;
1882 		} else {
1883 			/*
1884 			 * Make sure to keep the original task at
1885 			 * the head of the list.
1886 			 */
1887 			t->next = info->thread->next;
1888 			info->thread->next = t;
1889 		}
1890 	}
1891 
1892 	/*
1893 	 * Now fill in each thread's information.
1894 	 */
1895 	for (t = info->thread; t != NULL; t = t->next)
1896 		if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
1897 			return 0;
1898 
1899 	/*
1900 	 * Fill in the two process-wide notes.
1901 	 */
1902 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1903 	info->size += notesize(&info->psinfo);
1904 
1905 	fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
1906 	info->size += notesize(&info->signote);
1907 
1908 	fill_auxv_note(&info->auxv, current->mm);
1909 	info->size += notesize(&info->auxv);
1910 
1911 	if (fill_files_note(&info->files, cprm) == 0)
1912 		info->size += notesize(&info->files);
1913 
1914 	return 1;
1915 }
1916 
1917 static size_t get_note_info_size(struct elf_note_info *info)
1918 {
1919 	return info->size;
1920 }
1921 
1922 /*
1923  * Write all the notes for each thread.  When writing the first thread, the
1924  * process-wide notes are interleaved after the first thread-specific note.
1925  */
1926 static int write_note_info(struct elf_note_info *info,
1927 			   struct coredump_params *cprm)
1928 {
1929 	bool first = true;
1930 	struct elf_thread_core_info *t = info->thread;
1931 
1932 	do {
1933 		int i;
1934 
1935 		if (!writenote(&t->notes[0], cprm))
1936 			return 0;
1937 
1938 		if (first && !writenote(&info->psinfo, cprm))
1939 			return 0;
1940 		if (first && !writenote(&info->signote, cprm))
1941 			return 0;
1942 		if (first && !writenote(&info->auxv, cprm))
1943 			return 0;
1944 		if (first && info->files.data &&
1945 				!writenote(&info->files, cprm))
1946 			return 0;
1947 
1948 		for (i = 1; i < info->thread_notes; ++i)
1949 			if (t->notes[i].data &&
1950 			    !writenote(&t->notes[i], cprm))
1951 				return 0;
1952 
1953 		first = false;
1954 		t = t->next;
1955 	} while (t);
1956 
1957 	return 1;
1958 }
1959 
1960 static void free_note_info(struct elf_note_info *info)
1961 {
1962 	struct elf_thread_core_info *threads = info->thread;
1963 	while (threads) {
1964 		unsigned int i;
1965 		struct elf_thread_core_info *t = threads;
1966 		threads = t->next;
1967 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1968 		for (i = 1; i < info->thread_notes; ++i)
1969 			kfree(t->notes[i].data);
1970 		kfree(t);
1971 	}
1972 	kfree(info->psinfo.data);
1973 	kvfree(info->files.data);
1974 }
1975 
1976 #else
1977 
1978 /* Here is the structure in which status of each thread is captured. */
1979 struct elf_thread_status
1980 {
1981 	struct list_head list;
1982 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1983 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1984 	struct task_struct *thread;
1985 	struct memelfnote notes[3];
1986 	int num_notes;
1987 };
1988 
1989 /*
1990  * In order to add the specific thread information for the elf file format,
1991  * we need to keep a linked list of every threads pr_status and then create
1992  * a single section for them in the final core file.
1993  */
1994 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1995 {
1996 	int sz = 0;
1997 	struct task_struct *p = t->thread;
1998 	t->num_notes = 0;
1999 
2000 	fill_prstatus(&t->prstatus.common, p, signr);
2001 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
2002 
2003 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
2004 		  &(t->prstatus));
2005 	t->num_notes++;
2006 	sz += notesize(&t->notes[0]);
2007 
2008 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
2009 								&t->fpu))) {
2010 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
2011 			  &(t->fpu));
2012 		t->num_notes++;
2013 		sz += notesize(&t->notes[1]);
2014 	}
2015 	return sz;
2016 }
2017 
2018 struct elf_note_info {
2019 	struct memelfnote *notes;
2020 	struct memelfnote *notes_files;
2021 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
2022 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
2023 	struct list_head thread_list;
2024 	elf_fpregset_t *fpu;
2025 	user_siginfo_t csigdata;
2026 	int thread_status_size;
2027 	int numnote;
2028 };
2029 
2030 static int elf_note_info_init(struct elf_note_info *info)
2031 {
2032 	memset(info, 0, sizeof(*info));
2033 	INIT_LIST_HEAD(&info->thread_list);
2034 
2035 	/* Allocate space for ELF notes */
2036 	info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
2037 	if (!info->notes)
2038 		return 0;
2039 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
2040 	if (!info->psinfo)
2041 		return 0;
2042 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
2043 	if (!info->prstatus)
2044 		return 0;
2045 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2046 	if (!info->fpu)
2047 		return 0;
2048 	return 1;
2049 }
2050 
2051 static int fill_note_info(struct elfhdr *elf, int phdrs,
2052 			  struct elf_note_info *info,
2053 			  struct coredump_params *cprm)
2054 {
2055 	struct core_thread *ct;
2056 	struct elf_thread_status *ets;
2057 
2058 	if (!elf_note_info_init(info))
2059 		return 0;
2060 
2061 	for (ct = current->signal->core_state->dumper.next;
2062 					ct; ct = ct->next) {
2063 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2064 		if (!ets)
2065 			return 0;
2066 
2067 		ets->thread = ct->task;
2068 		list_add(&ets->list, &info->thread_list);
2069 	}
2070 
2071 	list_for_each_entry(ets, &info->thread_list, list) {
2072 		int sz;
2073 
2074 		sz = elf_dump_thread_status(cprm->siginfo->si_signo, ets);
2075 		info->thread_status_size += sz;
2076 	}
2077 	/* now collect the dump for the current */
2078 	memset(info->prstatus, 0, sizeof(*info->prstatus));
2079 	fill_prstatus(&info->prstatus->common, current, cprm->siginfo->si_signo);
2080 	elf_core_copy_regs(&info->prstatus->pr_reg, cprm->regs);
2081 
2082 	/* Set up header */
2083 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2084 
2085 	/*
2086 	 * Set up the notes in similar form to SVR4 core dumps made
2087 	 * with info from their /proc.
2088 	 */
2089 
2090 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2091 		  sizeof(*info->prstatus), info->prstatus);
2092 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2093 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2094 		  sizeof(*info->psinfo), info->psinfo);
2095 
2096 	fill_siginfo_note(info->notes + 2, &info->csigdata, cprm->siginfo);
2097 	fill_auxv_note(info->notes + 3, current->mm);
2098 	info->numnote = 4;
2099 
2100 	if (fill_files_note(info->notes + info->numnote, cprm) == 0) {
2101 		info->notes_files = info->notes + info->numnote;
2102 		info->numnote++;
2103 	}
2104 
2105 	/* Try to dump the FPU. */
2106 	info->prstatus->pr_fpvalid =
2107 		elf_core_copy_task_fpregs(current, cprm->regs, info->fpu);
2108 	if (info->prstatus->pr_fpvalid)
2109 		fill_note(info->notes + info->numnote++,
2110 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2111 	return 1;
2112 }
2113 
2114 static size_t get_note_info_size(struct elf_note_info *info)
2115 {
2116 	int sz = 0;
2117 	int i;
2118 
2119 	for (i = 0; i < info->numnote; i++)
2120 		sz += notesize(info->notes + i);
2121 
2122 	sz += info->thread_status_size;
2123 
2124 	return sz;
2125 }
2126 
2127 static int write_note_info(struct elf_note_info *info,
2128 			   struct coredump_params *cprm)
2129 {
2130 	struct elf_thread_status *ets;
2131 	int i;
2132 
2133 	for (i = 0; i < info->numnote; i++)
2134 		if (!writenote(info->notes + i, cprm))
2135 			return 0;
2136 
2137 	/* write out the thread status notes section */
2138 	list_for_each_entry(ets, &info->thread_list, list) {
2139 		for (i = 0; i < ets->num_notes; i++)
2140 			if (!writenote(&ets->notes[i], cprm))
2141 				return 0;
2142 	}
2143 
2144 	return 1;
2145 }
2146 
2147 static void free_note_info(struct elf_note_info *info)
2148 {
2149 	while (!list_empty(&info->thread_list)) {
2150 		struct list_head *tmp = info->thread_list.next;
2151 		list_del(tmp);
2152 		kfree(list_entry(tmp, struct elf_thread_status, list));
2153 	}
2154 
2155 	/* Free data possibly allocated by fill_files_note(): */
2156 	if (info->notes_files)
2157 		kvfree(info->notes_files->data);
2158 
2159 	kfree(info->prstatus);
2160 	kfree(info->psinfo);
2161 	kfree(info->notes);
2162 	kfree(info->fpu);
2163 }
2164 
2165 #endif
2166 
2167 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2168 			     elf_addr_t e_shoff, int segs)
2169 {
2170 	elf->e_shoff = e_shoff;
2171 	elf->e_shentsize = sizeof(*shdr4extnum);
2172 	elf->e_shnum = 1;
2173 	elf->e_shstrndx = SHN_UNDEF;
2174 
2175 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2176 
2177 	shdr4extnum->sh_type = SHT_NULL;
2178 	shdr4extnum->sh_size = elf->e_shnum;
2179 	shdr4extnum->sh_link = elf->e_shstrndx;
2180 	shdr4extnum->sh_info = segs;
2181 }
2182 
2183 /*
2184  * Actual dumper
2185  *
2186  * This is a two-pass process; first we find the offsets of the bits,
2187  * and then they are actually written out.  If we run out of core limit
2188  * we just truncate.
2189  */
2190 static int elf_core_dump(struct coredump_params *cprm)
2191 {
2192 	int has_dumped = 0;
2193 	int segs, i;
2194 	struct elfhdr elf;
2195 	loff_t offset = 0, dataoff;
2196 	struct elf_note_info info = { };
2197 	struct elf_phdr *phdr4note = NULL;
2198 	struct elf_shdr *shdr4extnum = NULL;
2199 	Elf_Half e_phnum;
2200 	elf_addr_t e_shoff;
2201 
2202 	/*
2203 	 * The number of segs are recored into ELF header as 16bit value.
2204 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2205 	 */
2206 	segs = cprm->vma_count + elf_core_extra_phdrs();
2207 
2208 	/* for notes section */
2209 	segs++;
2210 
2211 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2212 	 * this, kernel supports extended numbering. Have a look at
2213 	 * include/linux/elf.h for further information. */
2214 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2215 
2216 	/*
2217 	 * Collect all the non-memory information about the process for the
2218 	 * notes.  This also sets up the file header.
2219 	 */
2220 	if (!fill_note_info(&elf, e_phnum, &info, cprm))
2221 		goto end_coredump;
2222 
2223 	has_dumped = 1;
2224 
2225 	offset += sizeof(elf);				/* Elf header */
2226 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2227 
2228 	/* Write notes phdr entry */
2229 	{
2230 		size_t sz = get_note_info_size(&info);
2231 
2232 		/* For cell spufs */
2233 		sz += elf_coredump_extra_notes_size();
2234 
2235 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2236 		if (!phdr4note)
2237 			goto end_coredump;
2238 
2239 		fill_elf_note_phdr(phdr4note, sz, offset);
2240 		offset += sz;
2241 	}
2242 
2243 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2244 
2245 	offset += cprm->vma_data_size;
2246 	offset += elf_core_extra_data_size();
2247 	e_shoff = offset;
2248 
2249 	if (e_phnum == PN_XNUM) {
2250 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2251 		if (!shdr4extnum)
2252 			goto end_coredump;
2253 		fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2254 	}
2255 
2256 	offset = dataoff;
2257 
2258 	if (!dump_emit(cprm, &elf, sizeof(elf)))
2259 		goto end_coredump;
2260 
2261 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2262 		goto end_coredump;
2263 
2264 	/* Write program headers for segments dump */
2265 	for (i = 0; i < cprm->vma_count; i++) {
2266 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2267 		struct elf_phdr phdr;
2268 
2269 		phdr.p_type = PT_LOAD;
2270 		phdr.p_offset = offset;
2271 		phdr.p_vaddr = meta->start;
2272 		phdr.p_paddr = 0;
2273 		phdr.p_filesz = meta->dump_size;
2274 		phdr.p_memsz = meta->end - meta->start;
2275 		offset += phdr.p_filesz;
2276 		phdr.p_flags = 0;
2277 		if (meta->flags & VM_READ)
2278 			phdr.p_flags |= PF_R;
2279 		if (meta->flags & VM_WRITE)
2280 			phdr.p_flags |= PF_W;
2281 		if (meta->flags & VM_EXEC)
2282 			phdr.p_flags |= PF_X;
2283 		phdr.p_align = ELF_EXEC_PAGESIZE;
2284 
2285 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2286 			goto end_coredump;
2287 	}
2288 
2289 	if (!elf_core_write_extra_phdrs(cprm, offset))
2290 		goto end_coredump;
2291 
2292 	/* write out the notes section */
2293 	if (!write_note_info(&info, cprm))
2294 		goto end_coredump;
2295 
2296 	/* For cell spufs */
2297 	if (elf_coredump_extra_notes_write(cprm))
2298 		goto end_coredump;
2299 
2300 	/* Align to page */
2301 	dump_skip_to(cprm, dataoff);
2302 
2303 	for (i = 0; i < cprm->vma_count; i++) {
2304 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2305 
2306 		if (!dump_user_range(cprm, meta->start, meta->dump_size))
2307 			goto end_coredump;
2308 	}
2309 
2310 	if (!elf_core_write_extra_data(cprm))
2311 		goto end_coredump;
2312 
2313 	if (e_phnum == PN_XNUM) {
2314 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2315 			goto end_coredump;
2316 	}
2317 
2318 end_coredump:
2319 	free_note_info(&info);
2320 	kfree(shdr4extnum);
2321 	kfree(phdr4note);
2322 	return has_dumped;
2323 }
2324 
2325 #endif		/* CONFIG_ELF_CORE */
2326 
2327 static int __init init_elf_binfmt(void)
2328 {
2329 	register_binfmt(&elf_format);
2330 	return 0;
2331 }
2332 
2333 static void __exit exit_elf_binfmt(void)
2334 {
2335 	/* Remove the COFF and ELF loaders. */
2336 	unregister_binfmt(&elf_format);
2337 }
2338 
2339 core_initcall(init_elf_binfmt);
2340 module_exit(exit_elf_binfmt);
2341 MODULE_LICENSE("GPL");
2342 
2343 #ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
2344 #include "binfmt_elf_test.c"
2345 #endif
2346