1 /*- 2 * Copyright (c) 1995-1996 S�ren Schmidt 3 * Copyright (c) 1996 Peter Wemm 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software withough specific prior written permission 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * 29 * $FreeBSD: src/sys/kern/imgact_elf.c,v 1.73.2.13 2002/12/28 19:49:41 dillon Exp $ 30 * $DragonFly: src/sys/kern/imgact_elf.c,v 1.20 2004/06/08 10:14:45 hsu Exp $ 31 */ 32 33 #include <sys/param.h> 34 #include <sys/exec.h> 35 #include <sys/fcntl.h> 36 #include <sys/file.h> 37 #include <sys/imgact.h> 38 #include <sys/imgact_elf.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mman.h> 42 #include <sys/systm.h> 43 #include <sys/proc.h> 44 #include <sys/namei.h> 45 #include <sys/pioctl.h> 46 #include <sys/procfs.h> 47 #include <sys/resourcevar.h> 48 #include <sys/signalvar.h> 49 #include <sys/stat.h> 50 #include <sys/syscall.h> 51 #include <sys/sysctl.h> 52 #include <sys/sysent.h> 53 #include <sys/vnode.h> 54 55 #include <vm/vm.h> 56 #include <vm/vm_kern.h> 57 #include <vm/vm_param.h> 58 #include <vm/pmap.h> 59 #include <sys/lock.h> 60 #include <vm/vm_map.h> 61 #include <vm/vm_object.h> 62 #include <vm/vm_extern.h> 63 64 #include <machine/elf.h> 65 #include <machine/md_var.h> 66 #include <sys/mount.h> 67 #include <sys/ckpt.h> 68 #define OLD_EI_BRAND 8 69 70 __ElfType(Brandinfo); 71 __ElfType(Auxargs); 72 73 static int elf_check_header (const Elf_Ehdr *hdr); 74 static int elf_freebsd_fixup (register_t **stack_base, 75 struct image_params *imgp); 76 static int elf_load_file (struct proc *p, const char *file, u_long *addr, 77 u_long *entry); 78 static int elf_load_section (struct proc *p, 79 struct vmspace *vmspace, struct vnode *vp, 80 vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, 81 vm_prot_t prot); 82 static int exec_elf_imgact (struct image_params *imgp); 83 84 static int elf_trace = 0; 85 SYSCTL_INT(_debug, OID_AUTO, elf_trace, CTLFLAG_RW, &elf_trace, 0, ""); 86 static int elf_legacy_coredump = 0; 87 SYSCTL_INT(_debug, OID_AUTO, elf_legacy_coredump, CTLFLAG_RW, 88 &elf_legacy_coredump, 0, ""); 89 90 static struct sysentvec elf_freebsd_sysvec = { 91 SYS_MAXSYSCALL, 92 sysent, 93 0, 94 0, 95 0, 96 0, 97 0, 98 0, 99 elf_freebsd_fixup, 100 sendsig, 101 sigcode, 102 &szsigcode, 103 0, 104 "FreeBSD ELF", 105 elf_coredump, 106 NULL, 107 MINSIGSTKSZ 108 }; 109 110 static Elf_Brandinfo freebsd_brand_info = { 111 ELFOSABI_FREEBSD, 112 "FreeBSD", 113 "", 114 "/usr/libexec/ld-elf.so.1", 115 &elf_freebsd_sysvec 116 }; 117 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS] = { 118 &freebsd_brand_info, 119 NULL, NULL, NULL, 120 NULL, NULL, NULL, NULL 121 }; 122 123 int 124 elf_insert_brand_entry(Elf_Brandinfo *entry) 125 { 126 int i; 127 128 for (i=1; i<MAX_BRANDS; i++) { 129 if (elf_brand_list[i] == NULL) { 130 elf_brand_list[i] = entry; 131 break; 132 } 133 } 134 if (i == MAX_BRANDS) 135 return -1; 136 return 0; 137 } 138 139 int 140 elf_remove_brand_entry(Elf_Brandinfo *entry) 141 { 142 int i; 143 144 for (i=1; i<MAX_BRANDS; i++) { 145 if (elf_brand_list[i] == entry) { 146 elf_brand_list[i] = NULL; 147 break; 148 } 149 } 150 if (i == MAX_BRANDS) 151 return -1; 152 return 0; 153 } 154 155 int 156 elf_brand_inuse(Elf_Brandinfo *entry) 157 { 158 struct proc *p; 159 int rval = FALSE; 160 161 FOREACH_PROC_IN_SYSTEM(p) { 162 if (p->p_sysent == entry->sysvec) { 163 rval = TRUE; 164 break; 165 } 166 } 167 168 return (rval); 169 } 170 171 static int 172 elf_check_header(const Elf_Ehdr *hdr) 173 { 174 if (!IS_ELF(*hdr) || 175 hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || 176 hdr->e_ident[EI_DATA] != ELF_TARG_DATA || 177 hdr->e_ident[EI_VERSION] != EV_CURRENT || 178 hdr->e_phentsize != sizeof(Elf_Phdr) || 179 hdr->e_ehsize != sizeof(Elf_Ehdr) || 180 hdr->e_version != ELF_TARG_VER) 181 return ENOEXEC; 182 183 if (!ELF_MACHINE_OK(hdr->e_machine)) 184 return ENOEXEC; 185 186 return 0; 187 } 188 189 static int 190 elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, 191 vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, 192 vm_prot_t prot) 193 { 194 size_t map_len; 195 vm_offset_t map_addr; 196 int error, rv, cow; 197 int count; 198 size_t copy_len; 199 vm_object_t object; 200 vm_offset_t file_addr; 201 vm_offset_t data_buf = 0; 202 203 VOP_GETVOBJECT(vp, &object); 204 error = 0; 205 206 /* 207 * It's necessary to fail if the filsz + offset taken from the 208 * header is greater than the actual file pager object's size. 209 * If we were to allow this, then the vm_map_find() below would 210 * walk right off the end of the file object and into the ether. 211 * 212 * While I'm here, might as well check for something else that 213 * is invalid: filsz cannot be greater than memsz. 214 */ 215 if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size || 216 filsz > memsz) { 217 uprintf("elf_load_section: truncated ELF file\n"); 218 return (ENOEXEC); 219 } 220 221 map_addr = trunc_page((vm_offset_t)vmaddr); 222 file_addr = trunc_page(offset); 223 224 /* 225 * We have two choices. We can either clear the data in the last page 226 * of an oversized mapping, or we can start the anon mapping a page 227 * early and copy the initialized data into that first page. We 228 * choose the second.. 229 */ 230 if (memsz > filsz) 231 map_len = trunc_page(offset+filsz) - file_addr; 232 else 233 map_len = round_page(offset+filsz) - file_addr; 234 235 if (map_len != 0) { 236 vm_object_reference(object); 237 238 /* cow flags: don't dump readonly sections in core */ 239 cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | 240 (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); 241 242 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 243 vm_map_lock(&vmspace->vm_map); 244 rv = vm_map_insert(&vmspace->vm_map, &count, 245 object, 246 file_addr, /* file offset */ 247 map_addr, /* virtual start */ 248 map_addr + map_len,/* virtual end */ 249 prot, 250 VM_PROT_ALL, 251 cow); 252 vm_map_unlock(&vmspace->vm_map); 253 vm_map_entry_release(count); 254 if (rv != KERN_SUCCESS) { 255 vm_object_deallocate(object); 256 return EINVAL; 257 } 258 259 /* we can stop now if we've covered it all */ 260 if (memsz == filsz) { 261 return 0; 262 } 263 } 264 265 266 /* 267 * We have to get the remaining bit of the file into the first part 268 * of the oversized map segment. This is normally because the .data 269 * segment in the file is extended to provide bss. It's a neat idea 270 * to try and save a page, but it's a pain in the behind to implement. 271 */ 272 copy_len = (offset + filsz) - trunc_page(offset + filsz); 273 map_addr = trunc_page((vm_offset_t)vmaddr + filsz); 274 map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr; 275 276 /* This had damn well better be true! */ 277 if (map_len != 0) { 278 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 279 vm_map_lock(&vmspace->vm_map); 280 rv = vm_map_insert(&vmspace->vm_map, &count, 281 NULL, 0, 282 map_addr, map_addr + map_len, 283 VM_PROT_ALL, VM_PROT_ALL, 0); 284 vm_map_unlock(&vmspace->vm_map); 285 vm_map_entry_release(count); 286 if (rv != KERN_SUCCESS) { 287 return EINVAL; 288 } 289 } 290 291 if (copy_len != 0) { 292 vm_object_reference(object); 293 rv = vm_map_find(exec_map, 294 object, 295 trunc_page(offset + filsz), 296 &data_buf, 297 PAGE_SIZE, 298 TRUE, 299 VM_PROT_READ, 300 VM_PROT_ALL, 301 MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL); 302 if (rv != KERN_SUCCESS) { 303 vm_object_deallocate(object); 304 return EINVAL; 305 } 306 307 /* send the page fragment to user space */ 308 error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len); 309 vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE); 310 if (error) { 311 return (error); 312 } 313 } 314 315 /* 316 * set it to the specified protection 317 */ 318 vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len, prot, 319 FALSE); 320 321 return error; 322 } 323 324 /* 325 * Load the file "file" into memory. It may be either a shared object 326 * or an executable. 327 * 328 * The "addr" reference parameter is in/out. On entry, it specifies 329 * the address where a shared object should be loaded. If the file is 330 * an executable, this value is ignored. On exit, "addr" specifies 331 * where the file was actually loaded. 332 * 333 * The "entry" reference parameter is out only. On exit, it specifies 334 * the entry point for the loaded file. 335 */ 336 static int 337 elf_load_file(struct proc *p, const char *file, u_long *addr, u_long *entry) 338 { 339 struct { 340 struct nameidata nd; 341 struct vattr attr; 342 struct image_params image_params; 343 } *tempdata; 344 const Elf_Ehdr *hdr = NULL; 345 const Elf_Phdr *phdr = NULL; 346 struct nameidata *nd; 347 struct vmspace *vmspace = p->p_vmspace; 348 struct vattr *attr; 349 struct image_params *imgp; 350 vm_prot_t prot; 351 u_long rbase; 352 u_long base_addr = 0; 353 int error, i, numsegs; 354 struct thread *td = p->p_thread; 355 356 tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK); 357 nd = &tempdata->nd; 358 attr = &tempdata->attr; 359 imgp = &tempdata->image_params; 360 361 /* 362 * Initialize part of the common data 363 */ 364 imgp->proc = p; 365 imgp->attr = attr; 366 imgp->firstpage = NULL; 367 imgp->image_header = NULL; 368 369 NDINIT(nd, NAMEI_LOOKUP, CNP_LOCKLEAF | CNP_FOLLOW, 370 UIO_SYSSPACE, file, td); 371 372 if ((error = namei(nd)) != 0) { 373 nd->ni_vp = NULL; 374 goto fail; 375 } 376 NDFREE(nd, NDF_ONLY_PNBUF); 377 imgp->vp = nd->ni_vp; 378 379 /* 380 * Check permissions, modes, uid, etc on the file, and "open" it. 381 */ 382 error = exec_check_permissions(imgp); 383 if (error) { 384 VOP_UNLOCK(nd->ni_vp, NULL, 0, td); 385 goto fail; 386 } 387 388 error = exec_map_first_page(imgp); 389 /* 390 * Also make certain that the interpreter stays the same, so set 391 * its VTEXT flag, too. 392 */ 393 if (error == 0) 394 nd->ni_vp->v_flag |= VTEXT; 395 VOP_UNLOCK(nd->ni_vp, NULL, 0, td); 396 if (error) 397 goto fail; 398 399 hdr = (const Elf_Ehdr *)imgp->image_header; 400 if ((error = elf_check_header(hdr)) != 0) 401 goto fail; 402 if (hdr->e_type == ET_DYN) 403 rbase = *addr; 404 else if (hdr->e_type == ET_EXEC) 405 rbase = 0; 406 else { 407 error = ENOEXEC; 408 goto fail; 409 } 410 411 /* Only support headers that fit within first page for now 412 * (multiplication of two Elf_Half fields will not overflow) */ 413 if ((hdr->e_phoff > PAGE_SIZE) || 414 (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) { 415 error = ENOEXEC; 416 goto fail; 417 } 418 419 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 420 421 for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) { 422 if (phdr[i].p_type == PT_LOAD) { /* Loadable segment */ 423 prot = 0; 424 if (phdr[i].p_flags & PF_X) 425 prot |= VM_PROT_EXECUTE; 426 if (phdr[i].p_flags & PF_W) 427 prot |= VM_PROT_WRITE; 428 if (phdr[i].p_flags & PF_R) 429 prot |= VM_PROT_READ; 430 431 error = elf_load_section( 432 p, vmspace, nd->ni_vp, 433 phdr[i].p_offset, 434 (caddr_t)phdr[i].p_vaddr + 435 rbase, 436 phdr[i].p_memsz, 437 phdr[i].p_filesz, prot); 438 if (error != 0) 439 goto fail; 440 /* 441 * Establish the base address if this is the 442 * first segment. 443 */ 444 if (numsegs == 0) 445 base_addr = trunc_page(phdr[i].p_vaddr + rbase); 446 numsegs++; 447 } 448 } 449 *addr = base_addr; 450 *entry=(unsigned long)hdr->e_entry + rbase; 451 452 fail: 453 if (imgp->firstpage) 454 exec_unmap_first_page(imgp); 455 if (nd->ni_vp) 456 vrele(nd->ni_vp); 457 458 free(tempdata, M_TEMP); 459 460 return error; 461 } 462 463 /* 464 * non static, as it can be overridden by start_init() 465 */ 466 int fallback_elf_brand = -1; 467 SYSCTL_INT(_kern, OID_AUTO, fallback_elf_brand, CTLFLAG_RW, 468 &fallback_elf_brand, -1, 469 "ELF brand of last resort"); 470 471 static int 472 exec_elf_imgact(struct image_params *imgp) 473 { 474 const Elf_Ehdr *hdr = (const Elf_Ehdr *) imgp->image_header; 475 const Elf_Phdr *phdr; 476 Elf_Auxargs *elf_auxargs = NULL; 477 struct vmspace *vmspace; 478 vm_prot_t prot; 479 u_long text_size = 0, data_size = 0, total_size = 0; 480 u_long text_addr = 0, data_addr = 0; 481 u_long seg_size, seg_addr; 482 u_long addr, entry = 0, proghdr = 0; 483 int error, i; 484 const char *interp = NULL; 485 Elf_Brandinfo *brand_info; 486 char *path; 487 lwkt_tokref ilock; 488 489 error = 0; 490 491 /* 492 * Do we have a valid ELF header ? 493 */ 494 if (elf_check_header(hdr) != 0 || hdr->e_type != ET_EXEC) 495 return -1; 496 497 /* 498 * From here on down, we return an errno, not -1, as we've 499 * detected an ELF file. 500 */ 501 502 if ((hdr->e_phoff > PAGE_SIZE) || 503 (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) { 504 /* Only support headers in first page for now */ 505 return ENOEXEC; 506 } 507 phdr = (const Elf_Phdr*)(imgp->image_header + hdr->e_phoff); 508 509 /* 510 * From this point on, we may have resources that need to be freed. 511 */ 512 513 exec_new_vmspace(imgp, NULL); 514 515 /* 516 * Yeah, I'm paranoid. There is every reason in the world to get 517 * VTEXT now since from here on out, there are places we can have 518 * a context switch. Better safe than sorry; I really don't want 519 * the file to change while it's being loaded. 520 */ 521 lwkt_gettoken(&ilock, imgp->vp->v_interlock); 522 imgp->vp->v_flag |= VTEXT; 523 lwkt_reltoken(&ilock); 524 525 vmspace = imgp->proc->p_vmspace; 526 527 for (i = 0; i < hdr->e_phnum; i++) { 528 switch(phdr[i].p_type) { 529 530 case PT_LOAD: /* Loadable segment */ 531 prot = 0; 532 if (phdr[i].p_flags & PF_X) 533 prot |= VM_PROT_EXECUTE; 534 if (phdr[i].p_flags & PF_W) 535 prot |= VM_PROT_WRITE; 536 if (phdr[i].p_flags & PF_R) 537 prot |= VM_PROT_READ; 538 539 if ((error = elf_load_section(imgp->proc, 540 vmspace, imgp->vp, 541 phdr[i].p_offset, 542 (caddr_t)phdr[i].p_vaddr, 543 phdr[i].p_memsz, 544 phdr[i].p_filesz, prot)) != 0) 545 goto fail; 546 547 seg_addr = trunc_page(phdr[i].p_vaddr); 548 seg_size = round_page(phdr[i].p_memsz + 549 phdr[i].p_vaddr - seg_addr); 550 551 /* 552 * Is this .text or .data? We can't use 553 * VM_PROT_WRITE or VM_PROT_EXEC, it breaks the 554 * alpha terribly and possibly does other bad 555 * things so we stick to the old way of figuring 556 * it out: If the segment contains the program 557 * entry point, it's a text segment, otherwise it 558 * is a data segment. 559 * 560 * Note that obreak() assumes that data_addr + 561 * data_size == end of data load area, and the ELF 562 * file format expects segments to be sorted by 563 * address. If multiple data segments exist, the 564 * last one will be used. 565 */ 566 if (hdr->e_entry >= phdr[i].p_vaddr && 567 hdr->e_entry < (phdr[i].p_vaddr + 568 phdr[i].p_memsz)) { 569 text_size = seg_size; 570 text_addr = seg_addr; 571 entry = (u_long)hdr->e_entry; 572 } else { 573 data_size = seg_size; 574 data_addr = seg_addr; 575 } 576 total_size += seg_size; 577 578 /* 579 * Check limits. It should be safe to check the 580 * limits after loading the segment since we do 581 * not actually fault in all the segment's pages. 582 */ 583 if (data_size > 584 imgp->proc->p_rlimit[RLIMIT_DATA].rlim_cur || 585 text_size > maxtsiz || 586 total_size > 587 imgp->proc->p_rlimit[RLIMIT_VMEM].rlim_cur) { 588 error = ENOMEM; 589 goto fail; 590 } 591 break; 592 case PT_INTERP: /* Path to interpreter */ 593 if (phdr[i].p_filesz > MAXPATHLEN || 594 phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE) { 595 error = ENOEXEC; 596 goto fail; 597 } 598 interp = imgp->image_header + phdr[i].p_offset; 599 break; 600 case PT_PHDR: /* Program header table info */ 601 proghdr = phdr[i].p_vaddr; 602 break; 603 default: 604 break; 605 } 606 } 607 608 vmspace->vm_tsize = text_size >> PAGE_SHIFT; 609 vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; 610 vmspace->vm_dsize = data_size >> PAGE_SHIFT; 611 vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr; 612 613 addr = ELF_RTLD_ADDR(vmspace); 614 615 imgp->entry_addr = entry; 616 617 brand_info = NULL; 618 619 /* We support three types of branding -- (1) the ELF EI_OSABI field 620 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string 621 * branding w/in the ELF header, and (3) path of the `interp_path' 622 * field. We should also look for an ".note.ABI-tag" ELF section now 623 * in all Linux ELF binaries, FreeBSD 4.1+, and some NetBSD ones. 624 */ 625 626 /* If the executable has a brand, search for it in the brand list. */ 627 if (brand_info == NULL) { 628 for (i = 0; i < MAX_BRANDS; i++) { 629 Elf_Brandinfo *bi = elf_brand_list[i]; 630 631 if (bi != NULL && 632 (hdr->e_ident[EI_OSABI] == bi->brand 633 || 0 == 634 strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND], 635 bi->compat_3_brand, strlen(bi->compat_3_brand)))) { 636 brand_info = bi; 637 break; 638 } 639 } 640 } 641 642 /* Lacking a known brand, search for a recognized interpreter. */ 643 if (brand_info == NULL && interp != NULL) { 644 for (i = 0; i < MAX_BRANDS; i++) { 645 Elf_Brandinfo *bi = elf_brand_list[i]; 646 647 if (bi != NULL && 648 strcmp(interp, bi->interp_path) == 0) { 649 brand_info = bi; 650 break; 651 } 652 } 653 } 654 655 /* Lacking a recognized interpreter, try the default brand */ 656 if (brand_info == NULL) { 657 for (i = 0; i < MAX_BRANDS; i++) { 658 Elf_Brandinfo *bi = elf_brand_list[i]; 659 660 if (bi != NULL && fallback_elf_brand == bi->brand) { 661 brand_info = bi; 662 break; 663 } 664 } 665 } 666 667 if (brand_info == NULL) { 668 uprintf("ELF binary type \"%u\" not known.\n", 669 hdr->e_ident[EI_OSABI]); 670 error = ENOEXEC; 671 goto fail; 672 } 673 674 imgp->proc->p_sysent = brand_info->sysvec; 675 if (interp != NULL) { 676 path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 677 snprintf(path, MAXPATHLEN, "%s%s", 678 brand_info->emul_path, interp); 679 if ((error = elf_load_file(imgp->proc, path, &addr, 680 &imgp->entry_addr)) != 0) { 681 if ((error = elf_load_file(imgp->proc, interp, &addr, 682 &imgp->entry_addr)) != 0) { 683 uprintf("ELF interpreter %s not found\n", path); 684 free(path, M_TEMP); 685 goto fail; 686 } 687 } 688 free(path, M_TEMP); 689 } 690 691 /* 692 * Construct auxargs table (used by the fixup routine) 693 */ 694 elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK); 695 elf_auxargs->execfd = -1; 696 elf_auxargs->phdr = proghdr; 697 elf_auxargs->phent = hdr->e_phentsize; 698 elf_auxargs->phnum = hdr->e_phnum; 699 elf_auxargs->pagesz = PAGE_SIZE; 700 elf_auxargs->base = addr; 701 elf_auxargs->flags = 0; 702 elf_auxargs->entry = entry; 703 elf_auxargs->trace = elf_trace; 704 705 imgp->auxargs = elf_auxargs; 706 imgp->interpreted = 0; 707 708 fail: 709 return error; 710 } 711 712 static int 713 elf_freebsd_fixup(register_t **stack_base, struct image_params *imgp) 714 { 715 Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; 716 register_t *pos; 717 718 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 719 720 if (args->trace) { 721 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 722 } 723 if (args->execfd != -1) { 724 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 725 } 726 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 727 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 728 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 729 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 730 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 731 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 732 AUXARGS_ENTRY(pos, AT_BASE, args->base); 733 AUXARGS_ENTRY(pos, AT_NULL, 0); 734 735 free(imgp->auxargs, M_TEMP); 736 imgp->auxargs = NULL; 737 738 (*stack_base)--; 739 suword(*stack_base, (long) imgp->args->argc); 740 return 0; 741 } 742 743 /* 744 * Code for generating ELF core dumps. 745 */ 746 747 typedef int (*segment_callback) (vm_map_entry_t, void *); 748 749 /* Closure for cb_put_phdr(). */ 750 struct phdr_closure { 751 Elf_Phdr *phdr; /* Program header to fill in (incremented) */ 752 Elf_Phdr *phdr_max; /* Pointer bound for error check */ 753 Elf_Off offset; /* Offset of segment in core file */ 754 }; 755 756 /* Closure for cb_size_segment(). */ 757 struct sseg_closure { 758 int count; /* Count of writable segments. */ 759 size_t vsize; /* Total size of all writable segments. */ 760 }; 761 762 /* Closure for cb_put_fp(). */ 763 struct fp_closure { 764 struct vn_hdr *vnh; 765 struct vn_hdr *vnh_max; 766 int count; 767 struct stat *sb; 768 }; 769 770 typedef struct elf_buf { 771 char *buf; 772 size_t off; 773 size_t off_max; 774 } *elf_buf_t; 775 776 static void *target_reserve(elf_buf_t target, size_t bytes, int *error); 777 778 static int cb_put_phdr (vm_map_entry_t, void *); 779 static int cb_size_segment (vm_map_entry_t, void *); 780 static int cb_fpcount_segment(vm_map_entry_t, void *); 781 static int cb_put_fp(vm_map_entry_t, void *); 782 783 784 static int each_segment (struct proc *, segment_callback, void *, int); 785 static int elf_corehdr (struct proc *, struct file *, struct ucred *, 786 int, elf_buf_t); 787 static int elf_puthdr (struct proc *, elf_buf_t, const prstatus_t *, 788 const prfpregset_t *, const prpsinfo_t *, int); 789 static int elf_putnote (elf_buf_t, const char *, int, const void *, size_t); 790 791 static int elf_putsigs(struct proc *, elf_buf_t); 792 static int elf_puttextvp(struct proc *, elf_buf_t); 793 static int elf_putfiles(struct proc *, elf_buf_t); 794 795 extern int osreldate; 796 797 int 798 elf_coredump(struct proc *p, struct vnode *vp, off_t limit) 799 { 800 struct file *fp; 801 int error; 802 803 if ((error = falloc(NULL, &fp, NULL)) != 0) 804 return (error); 805 fsetcred(fp, p->p_ucred); 806 807 fp->f_data = (caddr_t)vp; 808 fp->f_flag = O_CREAT|O_WRONLY|O_NOFOLLOW; 809 fp->f_ops = &vnops; 810 fp->f_type = DTYPE_VNODE; 811 VOP_UNLOCK(vp, NULL, 0, p->p_thread); 812 813 error = generic_elf_coredump(p, fp, limit); 814 815 fp->f_data = NULL; 816 fp->f_flag = 0; 817 fp->f_ops = &badfileops; 818 fp->f_type = 0; 819 fdrop(fp, p->p_thread); 820 return (error); 821 } 822 823 int 824 generic_elf_coredump(struct proc *p, struct file *fp, off_t limit) 825 { 826 struct ucred *cred = p->p_ucred; 827 int error = 0; 828 struct sseg_closure seginfo; 829 struct elf_buf target; 830 831 if (!fp) 832 printf("can't dump core - null fp\n"); 833 834 /* 835 * Size the program segments 836 */ 837 seginfo.count = 0; 838 seginfo.vsize = 0; 839 each_segment(p, cb_size_segment, &seginfo, 1); 840 841 /* 842 * Calculate the size of the core file header area by making 843 * a dry run of generating it. Nothing is written, but the 844 * size is calculated. 845 */ 846 bzero(&target, sizeof(target)); 847 elf_puthdr(p, &target, NULL, NULL, NULL, seginfo.count); 848 849 if (target.off + seginfo.vsize >= limit) 850 return (EFAULT); 851 852 /* 853 * Allocate memory for building the header, fill it up, 854 * and write it out. 855 */ 856 target.off_max = target.off; 857 target.off = 0; 858 target.buf = malloc(target.off_max, M_TEMP, M_WAITOK|M_ZERO); 859 860 if (target.buf == NULL) 861 return EINVAL; 862 error = elf_corehdr(p, fp, cred, seginfo.count, &target); 863 864 /* Write the contents of all of the writable segments. */ 865 if (error == 0) { 866 Elf_Phdr *php; 867 int i; 868 int nbytes; 869 870 php = (Elf_Phdr *)(target.buf + sizeof(Elf_Ehdr)) + 1; 871 for (i = 0; i < seginfo.count; i++) { 872 error = fp_write(fp, (caddr_t)php->p_vaddr, 873 php->p_filesz, &nbytes); 874 if (error != 0) 875 break; 876 php++; 877 } 878 } 879 free(target.buf, M_TEMP); 880 881 return error; 882 } 883 884 /* 885 * A callback for each_segment() to write out the segment's 886 * program header entry. 887 */ 888 static int 889 cb_put_phdr(vm_map_entry_t entry, void *closure) 890 { 891 struct phdr_closure *phc = closure; 892 Elf_Phdr *phdr = phc->phdr; 893 894 if (phc->phdr == phc->phdr_max) 895 return EINVAL; 896 897 phc->offset = round_page(phc->offset); 898 899 phdr->p_type = PT_LOAD; 900 phdr->p_offset = phc->offset; 901 phdr->p_vaddr = entry->start; 902 phdr->p_paddr = 0; 903 phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; 904 phdr->p_align = PAGE_SIZE; 905 phdr->p_flags = 0; 906 if (entry->protection & VM_PROT_READ) 907 phdr->p_flags |= PF_R; 908 if (entry->protection & VM_PROT_WRITE) 909 phdr->p_flags |= PF_W; 910 if (entry->protection & VM_PROT_EXECUTE) 911 phdr->p_flags |= PF_X; 912 913 phc->offset += phdr->p_filesz; 914 ++phc->phdr; 915 return 0; 916 } 917 918 /* 919 * A callback for each_writable_segment() to gather information about 920 * the number of segments and their total size. 921 */ 922 static int 923 cb_size_segment(vm_map_entry_t entry, void *closure) 924 { 925 struct sseg_closure *ssc = closure; 926 927 ++ssc->count; 928 ssc->vsize += entry->end - entry->start; 929 return 0; 930 } 931 932 /* 933 * A callback for each_segment() to gather information about 934 * the number of text segments. 935 */ 936 static int 937 cb_fpcount_segment(vm_map_entry_t entry, void *closure) 938 { 939 int *count = closure; 940 if (entry->object.vm_object->type == OBJT_VNODE) 941 ++*count; 942 return 0; 943 } 944 945 static int 946 cb_put_fp(vm_map_entry_t entry, void *closure) 947 { 948 struct fp_closure *fpc = closure; 949 struct vn_hdr *vnh = fpc->vnh; 950 Elf_Phdr *phdr = &vnh->vnh_phdr; 951 struct vnode *vp; 952 int error; 953 954 if (entry->object.vm_object->type == OBJT_VNODE) { 955 if (vnh == fpc->vnh_max) 956 return EINVAL; 957 vp = (struct vnode *)entry->object.vm_object->handle; 958 959 if (vp->v_mount) 960 vnh->vnh_fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 961 error = VFS_VPTOFH(vp, &vnh->vnh_fh.fh_fid); 962 if (error) 963 return error; 964 965 phdr->p_type = PT_LOAD; 966 phdr->p_offset = 0; /* not written to core */ 967 phdr->p_vaddr = entry->start; 968 phdr->p_paddr = 0; 969 phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; 970 phdr->p_align = PAGE_SIZE; 971 phdr->p_flags = 0; 972 if (entry->protection & VM_PROT_READ) 973 phdr->p_flags |= PF_R; 974 if (entry->protection & VM_PROT_WRITE) 975 phdr->p_flags |= PF_W; 976 if (entry->protection & VM_PROT_EXECUTE) 977 phdr->p_flags |= PF_X; 978 ++fpc->vnh; 979 ++fpc->count; 980 } 981 return 0; 982 } 983 984 /* 985 * For each writable segment in the process's memory map, call the given 986 * function with a pointer to the map entry and some arbitrary 987 * caller-supplied data. 988 */ 989 static int 990 each_segment(struct proc *p, segment_callback func, void *closure, int writable) 991 { 992 int error = 0; 993 vm_map_t map = &p->p_vmspace->vm_map; 994 vm_map_entry_t entry; 995 996 for (entry = map->header.next; error == 0 && entry != &map->header; 997 entry = entry->next) { 998 vm_object_t obj; 999 1000 /* 1001 * Don't dump inaccessible mappings, deal with legacy 1002 * coredump mode. 1003 * 1004 * Note that read-only segments related to the elf binary 1005 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer 1006 * need to arbitrarily ignore such segments. 1007 */ 1008 if (elf_legacy_coredump) { 1009 if (writable && (entry->protection & VM_PROT_RW) != VM_PROT_RW) 1010 continue; 1011 } else { 1012 if (writable && (entry->protection & VM_PROT_ALL) == 0) 1013 continue; 1014 } 1015 1016 /* 1017 * Dont include memory segment in the coredump if 1018 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in 1019 * madvise(2). Do not dump submaps (i.e. parts of the 1020 * kernel map). 1021 */ 1022 if (writable && entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP)) 1023 continue; 1024 1025 if ((obj = entry->object.vm_object) == NULL) 1026 continue; 1027 1028 /* Find the deepest backing object. */ 1029 while (obj->backing_object != NULL) 1030 obj = obj->backing_object; 1031 1032 /* Ignore memory-mapped devices and such things. */ 1033 if (obj->type != OBJT_DEFAULT && 1034 obj->type != OBJT_SWAP && 1035 obj->type != OBJT_VNODE) 1036 continue; 1037 1038 error = (*func)(entry, closure); 1039 } 1040 return error; 1041 } 1042 1043 static 1044 void * 1045 target_reserve(elf_buf_t target, size_t bytes, int *error) 1046 { 1047 void *res = NULL; 1048 1049 if (target->buf) { 1050 if (target->off + bytes > target->off_max) 1051 *error = EINVAL; 1052 else 1053 res = target->buf + target->off; 1054 } 1055 target->off += bytes; 1056 return (res); 1057 } 1058 1059 /* 1060 * Write the core file header to the file, including padding up to 1061 * the page boundary. 1062 */ 1063 static int 1064 elf_corehdr(struct proc *p, struct file *fp, struct ucred *cred, int numsegs, 1065 elf_buf_t target) 1066 { 1067 struct { 1068 prstatus_t status; 1069 prfpregset_t fpregset; 1070 prpsinfo_t psinfo; 1071 } *tempdata; 1072 int error; 1073 prstatus_t *status; 1074 prfpregset_t *fpregset; 1075 prpsinfo_t *psinfo; 1076 int nbytes; 1077 tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO | M_WAITOK); 1078 status = &tempdata->status; 1079 fpregset = &tempdata->fpregset; 1080 psinfo = &tempdata->psinfo; 1081 1082 /* Gather the information for the header. */ 1083 status->pr_version = PRSTATUS_VERSION; 1084 status->pr_statussz = sizeof(prstatus_t); 1085 status->pr_gregsetsz = sizeof(gregset_t); 1086 status->pr_fpregsetsz = sizeof(fpregset_t); 1087 status->pr_osreldate = osreldate; 1088 status->pr_cursig = p->p_sig; 1089 status->pr_pid = p->p_pid; 1090 fill_regs(p, &status->pr_reg); 1091 1092 fill_fpregs(p, fpregset); 1093 1094 psinfo->pr_version = PRPSINFO_VERSION; 1095 psinfo->pr_psinfosz = sizeof(prpsinfo_t); 1096 strncpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname) - 1); 1097 1098 /* XXX - We don't fill in the command line arguments properly yet. */ 1099 strncpy(psinfo->pr_psargs, p->p_comm, PRARGSZ); 1100 1101 /* Fill in the header. */ 1102 error = elf_puthdr(p, target, status, fpregset, psinfo, numsegs); 1103 1104 free(tempdata, M_TEMP); 1105 1106 /* Write it to the core file. */ 1107 if (error == 0) 1108 error = fp_write(fp, target->buf, target->off, &nbytes); 1109 return error; 1110 } 1111 1112 static int 1113 elf_puthdr(struct proc *p, elf_buf_t target, const prstatus_t *status, 1114 const prfpregset_t *fpregset, const prpsinfo_t *psinfo, int numsegs) 1115 { 1116 int error = 0; 1117 size_t phoff; 1118 size_t noteoff; 1119 size_t notesz; 1120 Elf_Ehdr *ehdr; 1121 Elf_Phdr *phdr; 1122 1123 ehdr = target_reserve(target, sizeof(Elf_Ehdr), &error); 1124 1125 phoff = target->off; 1126 phdr = target_reserve(target, (numsegs + 1) * sizeof(Elf_Phdr), &error); 1127 1128 noteoff = target->off; 1129 if (error == 0) { 1130 error = elf_putnote(target, "FreeBSD", NT_PRSTATUS, 1131 status, sizeof *status); 1132 } 1133 if (error == 0) { 1134 error = elf_putnote(target, "FreeBSD", NT_FPREGSET, 1135 fpregset, sizeof *fpregset); 1136 } 1137 if (error == 0) { 1138 error = elf_putnote(target, "FreeBSD", NT_PRPSINFO, 1139 psinfo, sizeof *psinfo); 1140 } 1141 notesz = target->off - noteoff; 1142 1143 /* 1144 * put extra cruft for dumping process state here 1145 * - we really want it be before all the program 1146 * mappings 1147 * - we just need to update the offset accordingly 1148 * and GDB will be none the wiser. 1149 */ 1150 if (error == 0) 1151 error = elf_puttextvp(p, target); 1152 if (error == 0) 1153 error = elf_putsigs(p, target); 1154 if (error == 0) 1155 error = elf_putfiles(p, target); 1156 1157 /* 1158 * Align up to a page boundary for the program segments. The 1159 * actual data will be written to the outptu file, not to elf_buf_t, 1160 * so we do not have to do any further bounds checking. 1161 */ 1162 target->off = round_page(target->off); 1163 if (error == 0 && ehdr != NULL) { 1164 /* 1165 * Fill in the ELF header. 1166 */ 1167 ehdr->e_ident[EI_MAG0] = ELFMAG0; 1168 ehdr->e_ident[EI_MAG1] = ELFMAG1; 1169 ehdr->e_ident[EI_MAG2] = ELFMAG2; 1170 ehdr->e_ident[EI_MAG3] = ELFMAG3; 1171 ehdr->e_ident[EI_CLASS] = ELF_CLASS; 1172 ehdr->e_ident[EI_DATA] = ELF_DATA; 1173 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 1174 ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD; 1175 ehdr->e_ident[EI_ABIVERSION] = 0; 1176 ehdr->e_ident[EI_PAD] = 0; 1177 ehdr->e_type = ET_CORE; 1178 ehdr->e_machine = ELF_ARCH; 1179 ehdr->e_version = EV_CURRENT; 1180 ehdr->e_entry = 0; 1181 ehdr->e_phoff = phoff; 1182 ehdr->e_flags = 0; 1183 ehdr->e_ehsize = sizeof(Elf_Ehdr); 1184 ehdr->e_phentsize = sizeof(Elf_Phdr); 1185 ehdr->e_phnum = numsegs + 1; 1186 ehdr->e_shentsize = sizeof(Elf_Shdr); 1187 ehdr->e_shnum = 0; 1188 ehdr->e_shstrndx = SHN_UNDEF; 1189 } 1190 if (error == 0 && phdr != NULL) { 1191 /* 1192 * Fill in the program header entries. 1193 */ 1194 struct phdr_closure phc; 1195 1196 /* The note segement. */ 1197 phdr->p_type = PT_NOTE; 1198 phdr->p_offset = noteoff; 1199 phdr->p_vaddr = 0; 1200 phdr->p_paddr = 0; 1201 phdr->p_filesz = notesz; 1202 phdr->p_memsz = 0; 1203 phdr->p_flags = 0; 1204 phdr->p_align = 0; 1205 ++phdr; 1206 1207 /* All the writable segments from the program. */ 1208 phc.phdr = phdr; 1209 phc.phdr_max = phdr + numsegs; 1210 phc.offset = target->off; 1211 each_segment(p, cb_put_phdr, &phc, 1); 1212 } 1213 return (error); 1214 } 1215 1216 static int 1217 elf_putnote(elf_buf_t target, const char *name, int type, 1218 const void *desc, size_t descsz) 1219 { 1220 int error = 0; 1221 char *dst; 1222 Elf_Note note; 1223 1224 note.n_namesz = strlen(name) + 1; 1225 note.n_descsz = descsz; 1226 note.n_type = type; 1227 dst = target_reserve(target, sizeof(note), &error); 1228 if (dst != NULL) 1229 bcopy(¬e, dst, sizeof note); 1230 dst = target_reserve(target, note.n_namesz, &error); 1231 if (dst != NULL) 1232 bcopy(name, dst, note.n_namesz); 1233 target->off = roundup2(target->off, sizeof(Elf_Size)); 1234 dst = target_reserve(target, note.n_descsz, &error); 1235 if (dst != NULL) 1236 bcopy(desc, dst, note.n_descsz); 1237 target->off = roundup2(target->off, sizeof(Elf_Size)); 1238 return(error); 1239 } 1240 1241 1242 static int 1243 elf_putsigs(struct proc *p, elf_buf_t target) 1244 { 1245 int error = 0; 1246 struct ckpt_siginfo *csi; 1247 1248 csi = target_reserve(target, sizeof(struct ckpt_siginfo), &error); 1249 if (csi) { 1250 csi->csi_ckptpisz = sizeof(struct ckpt_siginfo); 1251 bcopy(p->p_procsig, &csi->csi_procsig, sizeof(struct procsig)); 1252 bcopy(p->p_procsig->ps_sigacts, &csi->csi_sigacts, sizeof(struct sigacts)); 1253 bcopy(&p->p_realtimer, &csi->csi_itimerval, sizeof(struct itimerval)); 1254 csi->csi_sigparent = p->p_sigparent; 1255 } 1256 return(error); 1257 } 1258 1259 static int 1260 elf_putfiles(struct proc *p, elf_buf_t target) 1261 { 1262 int error = 0; 1263 int i; 1264 struct ckpt_filehdr *cfh = NULL; 1265 struct ckpt_fileinfo *cfi; 1266 struct file *fp; 1267 struct vnode *vp; 1268 /* 1269 * the duplicated loop is gross, but it was the only way 1270 * to eliminate uninitialized variable warnings 1271 */ 1272 cfh = target_reserve(target, sizeof(struct ckpt_filehdr), &error); 1273 if (cfh) { 1274 cfh->cfh_nfiles = 0; 1275 } 1276 1277 /* 1278 * ignore STDIN/STDERR/STDOUT 1279 */ 1280 for (i = 3; error == 0 && i < p->p_fd->fd_nfiles; i++) { 1281 if ((fp = p->p_fd->fd_ofiles[i]) == NULL) 1282 continue; 1283 if (fp->f_type != DTYPE_VNODE) 1284 continue; 1285 cfi = target_reserve(target, sizeof(struct ckpt_fileinfo), &error); 1286 if (cfi) { 1287 cfi->cfi_index = -1; 1288 vp = (struct vnode *)fp->f_data; 1289 /* 1290 * it looks like a bug in ptrace is marking 1291 * a non-vnode as a vnode - until we find the 1292 * root cause this will at least prevent 1293 * further panics from truss 1294 */ 1295 if (vp == NULL || vp->v_mount == NULL) 1296 continue; 1297 cfh->cfh_nfiles++; 1298 cfi->cfi_index = i; 1299 cfi->cfi_flags = fp->f_flag; 1300 cfi->cfi_offset = fp->f_offset; 1301 cfi->cfi_fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 1302 error = VFS_VPTOFH(vp, &cfi->cfi_fh.fh_fid); 1303 } 1304 } 1305 return(error); 1306 } 1307 1308 static int 1309 elf_puttextvp(struct proc *p, elf_buf_t target) 1310 { 1311 int error = 0; 1312 int *vn_count; 1313 struct fp_closure fpc; 1314 struct ckpt_vminfo *vminfo; 1315 1316 vminfo = target_reserve(target, sizeof(struct ckpt_vminfo), &error); 1317 if (vminfo != NULL) { 1318 vminfo->cvm_dsize = p->p_vmspace->vm_dsize; 1319 vminfo->cvm_tsize = p->p_vmspace->vm_tsize; 1320 vminfo->cvm_daddr = p->p_vmspace->vm_daddr; 1321 vminfo->cvm_taddr = p->p_vmspace->vm_taddr; 1322 } 1323 1324 fpc.count = 0; 1325 vn_count = target_reserve(target, sizeof(int), &error); 1326 if (target->buf != NULL) { 1327 fpc.vnh = (struct vn_hdr *)(target->buf + target->off); 1328 fpc.vnh_max = fpc.vnh + 1329 (target->off_max - target->off) / sizeof(struct vn_hdr); 1330 error = each_segment(p, cb_put_fp, &fpc, 0); 1331 if (vn_count) 1332 *vn_count = fpc.count; 1333 } else { 1334 error = each_segment(p, cb_fpcount_segment, &fpc.count, 0); 1335 } 1336 target->off += fpc.count * sizeof(struct vn_hdr); 1337 return(error); 1338 } 1339 1340 1341 /* 1342 * Tell kern_execve.c about it, with a little help from the linker. 1343 */ 1344 static struct execsw elf_execsw = {exec_elf_imgact, "ELF"}; 1345 EXEC_SET_ORDERED(elf, elf_execsw, SI_ORDER_FIRST); 1346