1 /*- 2 * Copyright (c) 2000 David O'Brien 3 * Copyright (c) 1995-1996 Søren Schmidt 4 * Copyright (c) 1996 Peter Wemm 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * $FreeBSD: src/sys/kern/imgact_elf.c,v 1.73.2.13 2002/12/28 19:49:41 dillon Exp $ 31 */ 32 33 #include <sys/param.h> 34 #include <sys/exec.h> 35 #include <sys/fcntl.h> 36 #include <sys/file.h> 37 #include <sys/imgact.h> 38 #include <sys/imgact_elf.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mman.h> 42 #include <sys/systm.h> 43 #include <sys/proc.h> 44 #include <sys/nlookup.h> 45 #include <sys/pioctl.h> 46 #include <sys/procfs.h> 47 #include <sys/resourcevar.h> 48 #include <sys/signalvar.h> 49 #include <sys/stat.h> 50 #include <sys/syscall.h> 51 #include <sys/sysctl.h> 52 #include <sys/sysent.h> 53 #include <sys/vnode.h> 54 #include <sys/eventhandler.h> 55 56 #include <cpu/lwbuf.h> 57 58 #include <vm/vm.h> 59 #include <vm/vm_kern.h> 60 #include <vm/vm_param.h> 61 #include <vm/pmap.h> 62 #include <sys/lock.h> 63 #include <vm/vm_map.h> 64 #include <vm/vm_object.h> 65 #include <vm/vm_extern.h> 66 67 #include <machine/elf.h> 68 #include <machine/md_var.h> 69 #include <sys/mount.h> 70 #include <sys/ckpt.h> 71 72 #define OLD_EI_BRAND 8 73 #define truncps(va,ps) ((va) & ~(ps - 1)) 74 #define aligned(a,t) (truncps((u_long)(a), sizeof(t)) == (u_long)(a)) 75 76 static int __elfN(check_header)(const Elf_Ehdr *hdr); 77 static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp, 78 const char *interp, int32_t *osrel); 79 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, 80 u_long *entry); 81 static int __elfN(load_section)(struct proc *p, 82 struct vmspace *vmspace, struct vnode *vp, 83 vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, 84 vm_prot_t prot); 85 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp); 86 static boolean_t __elfN(bsd_trans_osrel)(const Elf_Note *note, 87 int32_t *osrel); 88 static boolean_t __elfN(check_note)(struct image_params *imgp, 89 Elf_Brandnote *checknote, int32_t *osrel); 90 static boolean_t check_PT_NOTE(struct image_params *imgp, 91 Elf_Brandnote *checknote, int32_t *osrel, const Elf_Phdr * pnote); 92 static boolean_t extract_interpreter(struct image_params *imgp, 93 const Elf_Phdr *pinterpreter, char *data); 94 95 static int elf_legacy_coredump = 0; 96 static int __elfN(fallback_brand) = -1; 97 #if defined(__x86_64__) 98 SYSCTL_NODE(_kern, OID_AUTO, elf64, CTLFLAG_RW, 0, ""); 99 SYSCTL_INT(_debug, OID_AUTO, elf64_legacy_coredump, CTLFLAG_RW, 100 &elf_legacy_coredump, 0, "legacy coredump mode"); 101 SYSCTL_INT(_kern_elf64, OID_AUTO, fallback_brand, CTLFLAG_RW, 102 &elf64_fallback_brand, 0, "ELF64 brand of last resort"); 103 TUNABLE_INT("kern.elf64.fallback_brand", &elf64_fallback_brand); 104 #else /* i386 assumed */ 105 SYSCTL_NODE(_kern, OID_AUTO, elf32, CTLFLAG_RW, 0, ""); 106 SYSCTL_INT(_debug, OID_AUTO, elf32_legacy_coredump, CTLFLAG_RW, 107 &elf_legacy_coredump, 0, "legacy coredump mode"); 108 SYSCTL_INT(_kern_elf32, OID_AUTO, fallback_brand, CTLFLAG_RW, 109 &elf32_fallback_brand, 0, "ELF32 brand of last resort"); 110 TUNABLE_INT("kern.elf32.fallback_brand", &elf32_fallback_brand); 111 #endif 112 113 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; 114 115 static const char DRAGONFLY_ABI_VENDOR[] = "DragonFly"; 116 static const char FREEBSD_ABI_VENDOR[] = "FreeBSD"; 117 118 Elf_Brandnote __elfN(dragonfly_brandnote) = { 119 .hdr.n_namesz = sizeof(DRAGONFLY_ABI_VENDOR), 120 .hdr.n_descsz = sizeof(int32_t), 121 .hdr.n_type = 1, 122 .vendor = DRAGONFLY_ABI_VENDOR, 123 .flags = BN_TRANSLATE_OSREL, 124 .trans_osrel = __elfN(bsd_trans_osrel), 125 }; 126 127 Elf_Brandnote __elfN(freebsd_brandnote) = { 128 .hdr.n_namesz = sizeof(FREEBSD_ABI_VENDOR), 129 .hdr.n_descsz = sizeof(int32_t), 130 .hdr.n_type = 1, 131 .vendor = FREEBSD_ABI_VENDOR, 132 .flags = BN_TRANSLATE_OSREL, 133 .trans_osrel = __elfN(bsd_trans_osrel), 134 }; 135 136 int 137 __elfN(insert_brand_entry)(Elf_Brandinfo *entry) 138 { 139 int i; 140 141 for (i = 0; i < MAX_BRANDS; i++) { 142 if (elf_brand_list[i] == NULL) { 143 elf_brand_list[i] = entry; 144 break; 145 } 146 } 147 if (i == MAX_BRANDS) { 148 uprintf("WARNING: %s: could not insert brandinfo entry: %p\n", 149 __func__, entry); 150 return (-1); 151 } 152 return (0); 153 } 154 155 int 156 __elfN(remove_brand_entry)(Elf_Brandinfo *entry) 157 { 158 int i; 159 160 for (i = 0; i < MAX_BRANDS; i++) { 161 if (elf_brand_list[i] == entry) { 162 elf_brand_list[i] = NULL; 163 break; 164 } 165 } 166 if (i == MAX_BRANDS) 167 return (-1); 168 return (0); 169 } 170 171 /* 172 * Check if an elf brand is being used anywhere in the system. 173 * 174 * Used by the linux emulation module unloader. This isn't safe from 175 * races. 176 */ 177 struct elf_brand_inuse_info { 178 int rval; 179 Elf_Brandinfo *entry; 180 }; 181 182 static int elf_brand_inuse_callback(struct proc *p, void *data); 183 184 int 185 __elfN(brand_inuse)(Elf_Brandinfo *entry) 186 { 187 struct elf_brand_inuse_info info; 188 189 info.rval = FALSE; 190 info.entry = entry; 191 allproc_scan(elf_brand_inuse_callback, &info); 192 return (info.rval); 193 } 194 195 static 196 int 197 elf_brand_inuse_callback(struct proc *p, void *data) 198 { 199 struct elf_brand_inuse_info *info = data; 200 201 if (p->p_sysent == info->entry->sysvec) { 202 info->rval = TRUE; 203 return (-1); 204 } 205 return (0); 206 } 207 208 static int 209 __elfN(check_header)(const Elf_Ehdr *hdr) 210 { 211 Elf_Brandinfo *bi; 212 int i; 213 214 if (!IS_ELF(*hdr) || 215 hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || 216 hdr->e_ident[EI_DATA] != ELF_TARG_DATA || 217 hdr->e_ident[EI_VERSION] != EV_CURRENT || 218 hdr->e_phentsize != sizeof(Elf_Phdr) || 219 hdr->e_ehsize != sizeof(Elf_Ehdr) || 220 hdr->e_version != ELF_TARG_VER) 221 return (ENOEXEC); 222 223 /* 224 * Make sure we have at least one brand for this machine. 225 */ 226 227 for (i = 0; i < MAX_BRANDS; i++) { 228 bi = elf_brand_list[i]; 229 if (bi != NULL && bi->machine == hdr->e_machine) 230 break; 231 } 232 if (i == MAX_BRANDS) 233 return (ENOEXEC); 234 235 return (0); 236 } 237 238 static int 239 __elfN(load_section)(struct proc *p, struct vmspace *vmspace, struct vnode *vp, 240 vm_offset_t offset, caddr_t vmaddr, size_t memsz, 241 size_t filsz, vm_prot_t prot) 242 { 243 size_t map_len; 244 vm_offset_t map_addr; 245 int error, rv, cow; 246 int count; 247 size_t copy_len; 248 vm_object_t object; 249 vm_offset_t file_addr; 250 251 object = vp->v_object; 252 error = 0; 253 254 vm_object_hold(object); 255 256 /* 257 * It's necessary to fail if the filsz + offset taken from the 258 * header is greater than the actual file pager object's size. 259 * If we were to allow this, then the vm_map_find() below would 260 * walk right off the end of the file object and into the ether. 261 * 262 * While I'm here, might as well check for something else that 263 * is invalid: filsz cannot be greater than memsz. 264 */ 265 if ((off_t)filsz + offset > vp->v_filesize || filsz > memsz) { 266 uprintf("elf_load_section: truncated ELF file\n"); 267 vm_object_drop(object); 268 return (ENOEXEC); 269 } 270 271 map_addr = trunc_page((vm_offset_t)vmaddr); 272 file_addr = trunc_page(offset); 273 274 /* 275 * We have two choices. We can either clear the data in the last page 276 * of an oversized mapping, or we can start the anon mapping a page 277 * early and copy the initialized data into that first page. We 278 * choose the second.. 279 */ 280 if (memsz > filsz) 281 map_len = trunc_page(offset+filsz) - file_addr; 282 else 283 map_len = round_page(offset+filsz) - file_addr; 284 285 if (map_len != 0) { 286 vm_object_reference_locked(object); 287 288 /* cow flags: don't dump readonly sections in core */ 289 cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | 290 (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); 291 292 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 293 vm_map_lock(&vmspace->vm_map); 294 rv = vm_map_insert(&vmspace->vm_map, &count, 295 object, 296 file_addr, /* file offset */ 297 map_addr, /* virtual start */ 298 map_addr + map_len,/* virtual end */ 299 VM_MAPTYPE_NORMAL, 300 prot, VM_PROT_ALL, 301 cow); 302 vm_map_unlock(&vmspace->vm_map); 303 vm_map_entry_release(count); 304 if (rv != KERN_SUCCESS) { 305 vm_object_deallocate(object); 306 vm_object_drop(object); 307 return (EINVAL); 308 } 309 310 /* we can stop now if we've covered it all */ 311 if (memsz == filsz) { 312 vm_object_drop(object); 313 return (0); 314 } 315 } 316 317 318 /* 319 * We have to get the remaining bit of the file into the first part 320 * of the oversized map segment. This is normally because the .data 321 * segment in the file is extended to provide bss. It's a neat idea 322 * to try and save a page, but it's a pain in the behind to implement. 323 */ 324 copy_len = (offset + filsz) - trunc_page(offset + filsz); 325 map_addr = trunc_page((vm_offset_t)vmaddr + filsz); 326 map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr; 327 328 /* This had damn well better be true! */ 329 if (map_len != 0) { 330 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 331 vm_map_lock(&vmspace->vm_map); 332 rv = vm_map_insert(&vmspace->vm_map, &count, 333 NULL, 0, 334 map_addr, map_addr + map_len, 335 VM_MAPTYPE_NORMAL, 336 VM_PROT_ALL, VM_PROT_ALL, 337 0); 338 vm_map_unlock(&vmspace->vm_map); 339 vm_map_entry_release(count); 340 if (rv != KERN_SUCCESS) { 341 vm_object_drop(object); 342 return (EINVAL); 343 } 344 } 345 346 if (copy_len != 0) { 347 vm_page_t m; 348 struct lwbuf *lwb; 349 struct lwbuf lwb_cache; 350 351 m = vm_fault_object_page(object, trunc_page(offset + filsz), 352 VM_PROT_READ, 0, &error); 353 if (m) { 354 lwb = lwbuf_alloc(m, &lwb_cache); 355 error = copyout((caddr_t)lwbuf_kva(lwb), 356 (caddr_t)map_addr, copy_len); 357 lwbuf_free(lwb); 358 vm_page_unhold(m); 359 } 360 if (error) { 361 vm_object_drop(object); 362 return (error); 363 } 364 } 365 366 vm_object_drop(object); 367 /* 368 * set it to the specified protection 369 */ 370 vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len, 371 prot, FALSE); 372 373 return (error); 374 } 375 376 /* 377 * Load the file "file" into memory. It may be either a shared object 378 * or an executable. 379 * 380 * The "addr" reference parameter is in/out. On entry, it specifies 381 * the address where a shared object should be loaded. If the file is 382 * an executable, this value is ignored. On exit, "addr" specifies 383 * where the file was actually loaded. 384 * 385 * The "entry" reference parameter is out only. On exit, it specifies 386 * the entry point for the loaded file. 387 */ 388 static int 389 __elfN(load_file)(struct proc *p, const char *file, u_long *addr, u_long *entry) 390 { 391 struct { 392 struct nlookupdata nd; 393 struct vattr attr; 394 struct image_params image_params; 395 } *tempdata; 396 const Elf_Ehdr *hdr = NULL; 397 const Elf_Phdr *phdr = NULL; 398 struct nlookupdata *nd; 399 struct vmspace *vmspace = p->p_vmspace; 400 struct vattr *attr; 401 struct image_params *imgp; 402 struct mount *topmnt; 403 vm_prot_t prot; 404 u_long rbase; 405 u_long base_addr = 0; 406 int error, i, numsegs; 407 408 tempdata = kmalloc(sizeof(*tempdata), M_TEMP, M_WAITOK); 409 nd = &tempdata->nd; 410 attr = &tempdata->attr; 411 imgp = &tempdata->image_params; 412 413 /* 414 * Initialize part of the common data 415 */ 416 imgp->proc = p; 417 imgp->attr = attr; 418 imgp->firstpage = NULL; 419 imgp->image_header = NULL; 420 imgp->vp = NULL; 421 422 error = nlookup_init(nd, file, UIO_SYSSPACE, NLC_FOLLOW); 423 if (error == 0) 424 error = nlookup(nd); 425 if (error == 0) 426 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &imgp->vp); 427 topmnt = nd->nl_nch.mount; 428 nlookup_done(nd); 429 if (error) 430 goto fail; 431 432 /* 433 * Check permissions, modes, uid, etc on the file, and "open" it. 434 */ 435 error = exec_check_permissions(imgp, topmnt); 436 if (error) { 437 vn_unlock(imgp->vp); 438 goto fail; 439 } 440 441 error = exec_map_first_page(imgp); 442 /* 443 * Also make certain that the interpreter stays the same, so set 444 * its VTEXT flag, too. 445 */ 446 if (error == 0) 447 vsetflags(imgp->vp, VTEXT); 448 vn_unlock(imgp->vp); 449 if (error) 450 goto fail; 451 452 hdr = (const Elf_Ehdr *)imgp->image_header; 453 if ((error = __elfN(check_header)(hdr)) != 0) 454 goto fail; 455 if (hdr->e_type == ET_DYN) 456 rbase = *addr; 457 else if (hdr->e_type == ET_EXEC) 458 rbase = 0; 459 else { 460 error = ENOEXEC; 461 goto fail; 462 } 463 464 /* Only support headers that fit within first page for now */ 465 /* (multiplication of two Elf_Half fields will not overflow) */ 466 if ((hdr->e_phoff > PAGE_SIZE) || 467 (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) { 468 error = ENOEXEC; 469 goto fail; 470 } 471 472 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 473 if (!aligned(phdr, Elf_Addr)) { 474 error = ENOEXEC; 475 goto fail; 476 } 477 478 for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) { 479 if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) { 480 /* Loadable segment */ 481 prot = 0; 482 if (phdr[i].p_flags & PF_X) 483 prot |= VM_PROT_EXECUTE; 484 if (phdr[i].p_flags & PF_W) 485 prot |= VM_PROT_WRITE; 486 if (phdr[i].p_flags & PF_R) 487 prot |= VM_PROT_READ; 488 489 error = __elfN(load_section)( 490 p, vmspace, imgp->vp, 491 phdr[i].p_offset, 492 (caddr_t)phdr[i].p_vaddr + 493 rbase, 494 phdr[i].p_memsz, 495 phdr[i].p_filesz, prot); 496 if (error != 0) 497 goto fail; 498 /* 499 * Establish the base address if this is the 500 * first segment. 501 */ 502 if (numsegs == 0) 503 base_addr = trunc_page(phdr[i].p_vaddr + rbase); 504 numsegs++; 505 } 506 } 507 *addr = base_addr; 508 *entry = (unsigned long)hdr->e_entry + rbase; 509 510 fail: 511 if (imgp->firstpage) 512 exec_unmap_first_page(imgp); 513 if (imgp->vp) { 514 vrele(imgp->vp); 515 imgp->vp = NULL; 516 } 517 kfree(tempdata, M_TEMP); 518 519 return (error); 520 } 521 522 static Elf_Brandinfo * 523 __elfN(get_brandinfo)(struct image_params *imgp, const char *interp, 524 int32_t *osrel) 525 { 526 const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; 527 Elf_Brandinfo *bi; 528 boolean_t ret; 529 int i; 530 531 /* We support four types of branding -- (1) the ELF EI_OSABI field 532 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string 533 * branding within the ELF header, (3) path of the `interp_path' field, 534 * and (4) the ".note.ABI-tag" ELF section. 535 */ 536 537 /* Look for an ".note.ABI-tag" ELF section */ 538 for (i = 0; i < MAX_BRANDS; i++) { 539 bi = elf_brand_list[i]; 540 541 if (bi == NULL) 542 continue; 543 if (hdr->e_machine == bi->machine && (bi->flags & 544 (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) { 545 ret = __elfN(check_note)(imgp, bi->brand_note, osrel); 546 if (ret) 547 return (bi); 548 } 549 } 550 551 /* If the executable has a brand, search for it in the brand list. */ 552 for (i = 0; i < MAX_BRANDS; i++) { 553 bi = elf_brand_list[i]; 554 555 if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) 556 continue; 557 if (hdr->e_machine == bi->machine && 558 (hdr->e_ident[EI_OSABI] == bi->brand || 559 strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND], 560 bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0)) 561 return (bi); 562 } 563 564 /* Lacking a known brand, search for a recognized interpreter. */ 565 if (interp != NULL) { 566 for (i = 0; i < MAX_BRANDS; i++) { 567 bi = elf_brand_list[i]; 568 569 if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) 570 continue; 571 if (hdr->e_machine == bi->machine && 572 strcmp(interp, bi->interp_path) == 0) 573 return (bi); 574 } 575 } 576 577 /* Lacking a recognized interpreter, try the default brand */ 578 for (i = 0; i < MAX_BRANDS; i++) { 579 bi = elf_brand_list[i]; 580 581 if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) 582 continue; 583 if (hdr->e_machine == bi->machine && 584 __elfN(fallback_brand) == bi->brand) 585 return (bi); 586 } 587 return (NULL); 588 } 589 590 static int 591 __CONCAT(exec_,__elfN(imgact))(struct image_params *imgp) 592 { 593 const Elf_Ehdr *hdr = (const Elf_Ehdr *) imgp->image_header; 594 const Elf_Phdr *phdr; 595 Elf_Auxargs *elf_auxargs; 596 struct vmspace *vmspace; 597 vm_prot_t prot; 598 u_long text_size = 0, data_size = 0, total_size = 0; 599 u_long text_addr = 0, data_addr = 0; 600 u_long seg_size, seg_addr; 601 u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0; 602 int32_t osrel = 0; 603 int error = 0, i, n; 604 boolean_t failure; 605 char *interp = NULL; 606 const char *newinterp = NULL; 607 Elf_Brandinfo *brand_info; 608 char *path; 609 610 /* 611 * Do we have a valid ELF header ? 612 * 613 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later if a particular 614 * brand doesn't support it. Both DragonFly platforms do by default. 615 */ 616 if (__elfN(check_header)(hdr) != 0 || 617 (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN)) 618 return (-1); 619 620 /* 621 * From here on down, we return an errno, not -1, as we've 622 * detected an ELF file. 623 */ 624 625 if ((hdr->e_phoff > PAGE_SIZE) || 626 (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) { 627 /* Only support headers in first page for now */ 628 return (ENOEXEC); 629 } 630 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 631 if (!aligned(phdr, Elf_Addr)) 632 return (ENOEXEC); 633 n = 0; 634 baddr = 0; 635 for (i = 0; i < hdr->e_phnum; i++) { 636 if (phdr[i].p_type == PT_LOAD) { 637 if (n == 0) 638 baddr = phdr[i].p_vaddr; 639 n++; 640 continue; 641 } 642 if (phdr[i].p_type == PT_INTERP) { 643 /* 644 * If interp is already defined there are more than 645 * one PT_INTERP program headers present. Take only 646 * the first one and ignore the rest. 647 */ 648 if (interp != NULL) 649 continue; 650 651 if (phdr[i].p_filesz == 0 || 652 phdr[i].p_filesz > PAGE_SIZE || 653 phdr[i].p_filesz > MAXPATHLEN) 654 return (ENOEXEC); 655 656 interp = kmalloc(phdr[i].p_filesz, M_TEMP, M_WAITOK); 657 failure = extract_interpreter(imgp, &phdr[i], interp); 658 if (failure) { 659 kfree(interp, M_TEMP); 660 return (ENOEXEC); 661 } 662 continue; 663 } 664 } 665 666 brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel); 667 if (brand_info == NULL) { 668 uprintf("ELF binary type \"%u\" not known.\n", 669 hdr->e_ident[EI_OSABI]); 670 if (interp != NULL) 671 kfree(interp, M_TEMP); 672 return (ENOEXEC); 673 } 674 if (hdr->e_type == ET_DYN) { 675 if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) { 676 if (interp != NULL) 677 kfree(interp, M_TEMP); 678 return (ENOEXEC); 679 } 680 /* 681 * Honour the base load address from the dso if it is 682 * non-zero for some reason. 683 */ 684 if (baddr == 0) 685 et_dyn_addr = ET_DYN_LOAD_ADDR; 686 else 687 et_dyn_addr = 0; 688 } else 689 et_dyn_addr = 0; 690 691 if (interp != NULL && brand_info->interp_newpath != NULL) 692 newinterp = brand_info->interp_newpath; 693 694 exec_new_vmspace(imgp, NULL); 695 696 /* 697 * Yeah, I'm paranoid. There is every reason in the world to get 698 * VTEXT now since from here on out, there are places we can have 699 * a context switch. Better safe than sorry; I really don't want 700 * the file to change while it's being loaded. 701 */ 702 vsetflags(imgp->vp, VTEXT); 703 704 vmspace = imgp->proc->p_vmspace; 705 706 for (i = 0; i < hdr->e_phnum; i++) { 707 switch (phdr[i].p_type) { 708 709 case PT_LOAD: /* Loadable segment */ 710 if (phdr[i].p_memsz == 0) 711 break; 712 prot = 0; 713 if (phdr[i].p_flags & PF_X) 714 prot |= VM_PROT_EXECUTE; 715 if (phdr[i].p_flags & PF_W) 716 prot |= VM_PROT_WRITE; 717 if (phdr[i].p_flags & PF_R) 718 prot |= VM_PROT_READ; 719 720 if ((error = __elfN(load_section)( 721 imgp->proc, 722 vmspace, 723 imgp->vp, 724 phdr[i].p_offset, 725 (caddr_t)phdr[i].p_vaddr + et_dyn_addr, 726 phdr[i].p_memsz, 727 phdr[i].p_filesz, 728 prot)) != 0) { 729 if (interp != NULL) 730 kfree (interp, M_TEMP); 731 return (error); 732 } 733 734 /* 735 * If this segment contains the program headers, 736 * remember their virtual address for the AT_PHDR 737 * aux entry. Static binaries don't usually include 738 * a PT_PHDR entry. 739 */ 740 if (phdr[i].p_offset == 0 && 741 hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize 742 <= phdr[i].p_filesz) 743 proghdr = phdr[i].p_vaddr + hdr->e_phoff + 744 et_dyn_addr; 745 746 seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr); 747 seg_size = round_page(phdr[i].p_memsz + 748 phdr[i].p_vaddr + et_dyn_addr - seg_addr); 749 750 /* 751 * Is this .text or .data? We can't use 752 * VM_PROT_WRITE or VM_PROT_EXEC, it breaks the 753 * alpha terribly and possibly does other bad 754 * things so we stick to the old way of figuring 755 * it out: If the segment contains the program 756 * entry point, it's a text segment, otherwise it 757 * is a data segment. 758 * 759 * Note that obreak() assumes that data_addr + 760 * data_size == end of data load area, and the ELF 761 * file format expects segments to be sorted by 762 * address. If multiple data segments exist, the 763 * last one will be used. 764 */ 765 if (hdr->e_entry >= phdr[i].p_vaddr && 766 hdr->e_entry < (phdr[i].p_vaddr + 767 phdr[i].p_memsz)) { 768 text_size = seg_size; 769 text_addr = seg_addr; 770 entry = (u_long)hdr->e_entry + et_dyn_addr; 771 } else { 772 data_size = seg_size; 773 data_addr = seg_addr; 774 } 775 total_size += seg_size; 776 777 /* 778 * Check limits. It should be safe to check the 779 * limits after loading the segment since we do 780 * not actually fault in all the segment's pages. 781 */ 782 if (data_size > 783 imgp->proc->p_rlimit[RLIMIT_DATA].rlim_cur || 784 text_size > maxtsiz || 785 total_size > 786 imgp->proc->p_rlimit[RLIMIT_VMEM].rlim_cur) { 787 if (interp != NULL) 788 kfree(interp, M_TEMP); 789 error = ENOMEM; 790 return (error); 791 } 792 break; 793 case PT_PHDR: /* Program header table info */ 794 proghdr = phdr[i].p_vaddr + et_dyn_addr; 795 break; 796 default: 797 break; 798 } 799 } 800 801 vmspace->vm_tsize = text_size >> PAGE_SHIFT; 802 vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; 803 vmspace->vm_dsize = data_size >> PAGE_SHIFT; 804 vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr; 805 806 addr = ELF_RTLD_ADDR(vmspace); 807 808 imgp->entry_addr = entry; 809 810 imgp->proc->p_sysent = brand_info->sysvec; 811 EVENTHANDLER_INVOKE(process_exec, imgp); 812 813 if (interp != NULL) { 814 int have_interp = FALSE; 815 if (brand_info->emul_path != NULL && 816 brand_info->emul_path[0] != '\0') { 817 path = kmalloc(MAXPATHLEN, M_TEMP, M_WAITOK); 818 ksnprintf(path, MAXPATHLEN, "%s%s", 819 brand_info->emul_path, interp); 820 error = __elfN(load_file)(imgp->proc, path, &addr, 821 &imgp->entry_addr); 822 kfree(path, M_TEMP); 823 if (error == 0) 824 have_interp = TRUE; 825 } 826 if (!have_interp && newinterp != NULL) { 827 error = __elfN(load_file)(imgp->proc, newinterp, 828 &addr, &imgp->entry_addr); 829 if (error == 0) 830 have_interp = TRUE; 831 } 832 if (!have_interp) { 833 error = __elfN(load_file)(imgp->proc, interp, &addr, 834 &imgp->entry_addr); 835 } 836 if (error != 0) { 837 uprintf("ELF interpreter %s not found\n", interp); 838 kfree(interp, M_TEMP); 839 return (error); 840 } 841 kfree(interp, M_TEMP); 842 } else 843 addr = et_dyn_addr; 844 845 /* 846 * Construct auxargs table (used by the fixup routine) 847 */ 848 elf_auxargs = kmalloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK); 849 elf_auxargs->execfd = -1; 850 elf_auxargs->phdr = proghdr; 851 elf_auxargs->phent = hdr->e_phentsize; 852 elf_auxargs->phnum = hdr->e_phnum; 853 elf_auxargs->pagesz = PAGE_SIZE; 854 elf_auxargs->base = addr; 855 elf_auxargs->flags = 0; 856 elf_auxargs->entry = entry; 857 858 imgp->auxargs = elf_auxargs; 859 imgp->interpreted = 0; 860 imgp->proc->p_osrel = osrel; 861 862 return (error); 863 } 864 865 int 866 __elfN(dragonfly_fixup)(register_t **stack_base, struct image_params *imgp) 867 { 868 Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; 869 Elf_Addr *base; 870 Elf_Addr *pos; 871 872 base = (Elf_Addr *)*stack_base; 873 pos = base + (imgp->args->argc + imgp->args->envc + 2); 874 875 if (args->execfd != -1) 876 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 877 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 878 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 879 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 880 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 881 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 882 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 883 AUXARGS_ENTRY(pos, AT_BASE, args->base); 884 if (imgp->execpathp != 0) 885 AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp); 886 AUXARGS_ENTRY(pos, AT_OSRELDATE, osreldate); 887 AUXARGS_ENTRY(pos, AT_NULL, 0); 888 889 kfree(imgp->auxargs, M_TEMP); 890 imgp->auxargs = NULL; 891 892 base--; 893 suword(base, (long)imgp->args->argc); 894 *stack_base = (register_t *)base; 895 return (0); 896 } 897 898 /* 899 * Code for generating ELF core dumps. 900 */ 901 902 typedef int (*segment_callback)(vm_map_entry_t, void *); 903 904 /* Closure for cb_put_phdr(). */ 905 struct phdr_closure { 906 Elf_Phdr *phdr; /* Program header to fill in (incremented) */ 907 Elf_Phdr *phdr_max; /* Pointer bound for error check */ 908 Elf_Off offset; /* Offset of segment in core file */ 909 }; 910 911 /* Closure for cb_size_segment(). */ 912 struct sseg_closure { 913 int count; /* Count of writable segments. */ 914 size_t vsize; /* Total size of all writable segments. */ 915 }; 916 917 /* Closure for cb_put_fp(). */ 918 struct fp_closure { 919 struct vn_hdr *vnh; 920 struct vn_hdr *vnh_max; 921 int count; 922 struct stat *sb; 923 }; 924 925 typedef struct elf_buf { 926 char *buf; 927 size_t off; 928 size_t off_max; 929 } *elf_buf_t; 930 931 static void *target_reserve(elf_buf_t target, size_t bytes, int *error); 932 933 static int cb_put_phdr (vm_map_entry_t, void *); 934 static int cb_size_segment (vm_map_entry_t, void *); 935 static int cb_fpcount_segment(vm_map_entry_t, void *); 936 static int cb_put_fp(vm_map_entry_t, void *); 937 938 939 static int each_segment (struct proc *, segment_callback, void *, int); 940 static int __elfN(corehdr)(struct lwp *, int, struct file *, struct ucred *, 941 int, elf_buf_t); 942 enum putmode { WRITE, DRYRUN }; 943 static int __elfN(puthdr)(struct lwp *, elf_buf_t, int sig, enum putmode, 944 int, struct file *); 945 static int elf_putallnotes(struct lwp *, elf_buf_t, int, enum putmode); 946 static int __elfN(putnote)(elf_buf_t, const char *, int, const void *, size_t); 947 948 static int elf_putsigs(struct lwp *, elf_buf_t); 949 static int elf_puttextvp(struct proc *, elf_buf_t); 950 static int elf_putfiles(struct proc *, elf_buf_t, struct file *); 951 952 int 953 __elfN(coredump)(struct lwp *lp, int sig, struct vnode *vp, off_t limit) 954 { 955 struct file *fp; 956 int error; 957 958 if ((error = falloc(NULL, &fp, NULL)) != 0) 959 return (error); 960 fsetcred(fp, lp->lwp_proc->p_ucred); 961 962 /* 963 * XXX fixme. 964 */ 965 fp->f_type = DTYPE_VNODE; 966 fp->f_flag = O_CREAT|O_WRONLY|O_NOFOLLOW; 967 fp->f_ops = &vnode_fileops; 968 fp->f_data = vp; 969 970 error = generic_elf_coredump(lp, sig, fp, limit); 971 972 fp->f_type = 0; 973 fp->f_flag = 0; 974 fp->f_ops = &badfileops; 975 fp->f_data = NULL; 976 fdrop(fp); 977 return (error); 978 } 979 980 int 981 generic_elf_coredump(struct lwp *lp, int sig, struct file *fp, off_t limit) 982 { 983 struct proc *p = lp->lwp_proc; 984 struct ucred *cred = p->p_ucred; 985 int error = 0; 986 struct sseg_closure seginfo; 987 struct elf_buf target; 988 989 if (!fp) 990 kprintf("can't dump core - null fp\n"); 991 992 /* 993 * Size the program segments 994 */ 995 seginfo.count = 0; 996 seginfo.vsize = 0; 997 each_segment(p, cb_size_segment, &seginfo, 1); 998 999 /* 1000 * Calculate the size of the core file header area by making 1001 * a dry run of generating it. Nothing is written, but the 1002 * size is calculated. 1003 */ 1004 bzero(&target, sizeof(target)); 1005 __elfN(puthdr)(lp, &target, sig, DRYRUN, seginfo.count, fp); 1006 1007 if (target.off + seginfo.vsize >= limit) 1008 return (EFAULT); 1009 1010 /* 1011 * Allocate memory for building the header, fill it up, 1012 * and write it out. 1013 */ 1014 target.off_max = target.off; 1015 target.off = 0; 1016 target.buf = kmalloc(target.off_max, M_TEMP, M_WAITOK|M_ZERO); 1017 1018 error = __elfN(corehdr)(lp, sig, fp, cred, seginfo.count, &target); 1019 1020 /* Write the contents of all of the writable segments. */ 1021 if (error == 0) { 1022 Elf_Phdr *php; 1023 int i; 1024 ssize_t nbytes; 1025 1026 php = (Elf_Phdr *)(target.buf + sizeof(Elf_Ehdr)) + 1; 1027 for (i = 0; i < seginfo.count; i++) { 1028 error = fp_write(fp, (caddr_t)php->p_vaddr, 1029 php->p_filesz, &nbytes, UIO_USERSPACE); 1030 if (error != 0) 1031 break; 1032 php++; 1033 } 1034 } 1035 kfree(target.buf, M_TEMP); 1036 1037 return (error); 1038 } 1039 1040 /* 1041 * A callback for each_segment() to write out the segment's 1042 * program header entry. 1043 */ 1044 static int 1045 cb_put_phdr(vm_map_entry_t entry, void *closure) 1046 { 1047 struct phdr_closure *phc = closure; 1048 Elf_Phdr *phdr = phc->phdr; 1049 1050 if (phc->phdr == phc->phdr_max) 1051 return (EINVAL); 1052 1053 phc->offset = round_page(phc->offset); 1054 1055 phdr->p_type = PT_LOAD; 1056 phdr->p_offset = phc->offset; 1057 phdr->p_vaddr = entry->start; 1058 phdr->p_paddr = 0; 1059 phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; 1060 phdr->p_align = PAGE_SIZE; 1061 phdr->p_flags = 0; 1062 if (entry->protection & VM_PROT_READ) 1063 phdr->p_flags |= PF_R; 1064 if (entry->protection & VM_PROT_WRITE) 1065 phdr->p_flags |= PF_W; 1066 if (entry->protection & VM_PROT_EXECUTE) 1067 phdr->p_flags |= PF_X; 1068 1069 phc->offset += phdr->p_filesz; 1070 ++phc->phdr; 1071 return (0); 1072 } 1073 1074 /* 1075 * A callback for each_writable_segment() to gather information about 1076 * the number of segments and their total size. 1077 */ 1078 static int 1079 cb_size_segment(vm_map_entry_t entry, void *closure) 1080 { 1081 struct sseg_closure *ssc = closure; 1082 1083 ++ssc->count; 1084 ssc->vsize += entry->end - entry->start; 1085 return (0); 1086 } 1087 1088 /* 1089 * A callback for each_segment() to gather information about 1090 * the number of text segments. 1091 */ 1092 static int 1093 cb_fpcount_segment(vm_map_entry_t entry, void *closure) 1094 { 1095 int *count = closure; 1096 struct vnode *vp; 1097 1098 if (entry->object.vm_object->type == OBJT_VNODE) { 1099 vp = (struct vnode *)entry->object.vm_object->handle; 1100 if ((vp->v_flag & VCKPT) && curproc->p_textvp == vp) 1101 return (0); 1102 ++*count; 1103 } 1104 return (0); 1105 } 1106 1107 static int 1108 cb_put_fp(vm_map_entry_t entry, void *closure) 1109 { 1110 struct fp_closure *fpc = closure; 1111 struct vn_hdr *vnh = fpc->vnh; 1112 Elf_Phdr *phdr = &vnh->vnh_phdr; 1113 struct vnode *vp; 1114 int error; 1115 1116 /* 1117 * If an entry represents a vnode then write out a file handle. 1118 * 1119 * If we are checkpointing a checkpoint-restored program we do 1120 * NOT record the filehandle for the old checkpoint vnode (which 1121 * is mapped all over the place). Instead we rely on the fact 1122 * that a checkpoint-restored program does not mmap() the checkpt 1123 * vnode NOCORE, so its contents will be written out to the 1124 * new checkpoint file. This is necessary because the 'old' 1125 * checkpoint file is typically destroyed when a new one is created 1126 * and thus cannot be used to restore the new checkpoint. 1127 * 1128 * Theoretically we could create a chain of checkpoint files and 1129 * operate the checkpointing operation kinda like an incremental 1130 * checkpoint, but a checkpoint restore would then likely wind up 1131 * referencing many prior checkpoint files and that is a bit over 1132 * the top for the purpose of the checkpoint API. 1133 */ 1134 if (entry->object.vm_object->type == OBJT_VNODE) { 1135 vp = (struct vnode *)entry->object.vm_object->handle; 1136 if ((vp->v_flag & VCKPT) && curproc->p_textvp == vp) 1137 return (0); 1138 if (vnh == fpc->vnh_max) 1139 return (EINVAL); 1140 1141 if (vp->v_mount) 1142 vnh->vnh_fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 1143 error = VFS_VPTOFH(vp, &vnh->vnh_fh.fh_fid); 1144 if (error) { 1145 char *freepath, *fullpath; 1146 1147 if (vn_fullpath(curproc, vp, &fullpath, &freepath, 0)) { 1148 kprintf("Warning: coredump, error %d: cannot store file handle for vnode %p\n", error, vp); 1149 } else { 1150 kprintf("Warning: coredump, error %d: cannot store file handle for %s\n", error, fullpath); 1151 kfree(freepath, M_TEMP); 1152 } 1153 error = 0; 1154 } 1155 1156 phdr->p_type = PT_LOAD; 1157 phdr->p_offset = 0; /* not written to core */ 1158 phdr->p_vaddr = entry->start; 1159 phdr->p_paddr = 0; 1160 phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; 1161 phdr->p_align = PAGE_SIZE; 1162 phdr->p_flags = 0; 1163 if (entry->protection & VM_PROT_READ) 1164 phdr->p_flags |= PF_R; 1165 if (entry->protection & VM_PROT_WRITE) 1166 phdr->p_flags |= PF_W; 1167 if (entry->protection & VM_PROT_EXECUTE) 1168 phdr->p_flags |= PF_X; 1169 ++fpc->vnh; 1170 ++fpc->count; 1171 } 1172 return (0); 1173 } 1174 1175 /* 1176 * For each writable segment in the process's memory map, call the given 1177 * function with a pointer to the map entry and some arbitrary 1178 * caller-supplied data. 1179 */ 1180 static int 1181 each_segment(struct proc *p, segment_callback func, void *closure, int writable) 1182 { 1183 int error = 0; 1184 vm_map_t map = &p->p_vmspace->vm_map; 1185 vm_map_entry_t entry; 1186 1187 for (entry = map->header.next; error == 0 && entry != &map->header; 1188 entry = entry->next) { 1189 vm_object_t obj; 1190 vm_object_t lobj; 1191 vm_object_t tobj; 1192 1193 /* 1194 * Don't dump inaccessible mappings, deal with legacy 1195 * coredump mode. 1196 * 1197 * Note that read-only segments related to the elf binary 1198 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer 1199 * need to arbitrarily ignore such segments. 1200 */ 1201 if (elf_legacy_coredump) { 1202 if (writable && (entry->protection & VM_PROT_RW) != VM_PROT_RW) 1203 continue; 1204 } else { 1205 if (writable && (entry->protection & VM_PROT_ALL) == 0) 1206 continue; 1207 } 1208 1209 /* 1210 * Dont include memory segment in the coredump if 1211 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in 1212 * madvise(2). 1213 * 1214 * Currently we only dump normal VM object maps. We do 1215 * not dump submaps or virtual page tables. 1216 */ 1217 if (writable && (entry->eflags & MAP_ENTRY_NOCOREDUMP)) 1218 continue; 1219 if (entry->maptype != VM_MAPTYPE_NORMAL) 1220 continue; 1221 if ((obj = entry->object.vm_object) == NULL) 1222 continue; 1223 1224 /* 1225 * Find the bottom-most object, leaving the base object 1226 * and the bottom-most object held (but only one hold 1227 * if they happen to be the same). 1228 */ 1229 vm_object_hold(obj); 1230 1231 lobj = obj; 1232 while (lobj && (tobj = lobj->backing_object) != NULL) { 1233 KKASSERT(tobj != obj); 1234 vm_object_hold(tobj); 1235 if (tobj == lobj->backing_object) { 1236 if (lobj != obj) { 1237 vm_object_lock_swap(); 1238 vm_object_drop(lobj); 1239 } 1240 lobj = tobj; 1241 } else { 1242 vm_object_drop(tobj); 1243 } 1244 } 1245 1246 /* 1247 * The callback only applies to default, swap, or vnode 1248 * objects. Other types of objects such as memory-mapped 1249 * devices are ignored. 1250 */ 1251 if (lobj->type == OBJT_DEFAULT || lobj->type == OBJT_SWAP || 1252 lobj->type == OBJT_VNODE) { 1253 error = (*func)(entry, closure); 1254 } 1255 if (lobj != obj) 1256 vm_object_drop(lobj); 1257 vm_object_drop(obj); 1258 } 1259 return (error); 1260 } 1261 1262 static 1263 void * 1264 target_reserve(elf_buf_t target, size_t bytes, int *error) 1265 { 1266 void *res = NULL; 1267 1268 if (target->buf) { 1269 if (target->off + bytes > target->off_max) 1270 *error = EINVAL; 1271 else 1272 res = target->buf + target->off; 1273 } 1274 target->off += bytes; 1275 return (res); 1276 } 1277 1278 /* 1279 * Write the core file header to the file, including padding up to 1280 * the page boundary. 1281 */ 1282 static int 1283 __elfN(corehdr)(struct lwp *lp, int sig, struct file *fp, struct ucred *cred, 1284 int numsegs, elf_buf_t target) 1285 { 1286 int error; 1287 ssize_t nbytes; 1288 1289 /* 1290 * Fill in the header. The fp is passed so we can detect and flag 1291 * a checkpoint file pointer within the core file itself, because 1292 * it may not be restored from the same file handle. 1293 */ 1294 error = __elfN(puthdr)(lp, target, sig, WRITE, numsegs, fp); 1295 1296 /* Write it to the core file. */ 1297 if (error == 0) { 1298 error = fp_write(fp, target->buf, target->off, &nbytes, 1299 UIO_SYSSPACE); 1300 } 1301 return (error); 1302 } 1303 1304 static int 1305 __elfN(puthdr)(struct lwp *lp, elf_buf_t target, int sig, enum putmode mode, 1306 int numsegs, struct file *fp) 1307 { 1308 struct proc *p = lp->lwp_proc; 1309 int error = 0; 1310 size_t phoff; 1311 size_t noteoff; 1312 size_t notesz; 1313 Elf_Ehdr *ehdr; 1314 Elf_Phdr *phdr; 1315 1316 ehdr = target_reserve(target, sizeof(Elf_Ehdr), &error); 1317 1318 phoff = target->off; 1319 phdr = target_reserve(target, (numsegs + 1) * sizeof(Elf_Phdr), &error); 1320 1321 noteoff = target->off; 1322 if (error == 0) 1323 elf_putallnotes(lp, target, sig, mode); 1324 notesz = target->off - noteoff; 1325 1326 /* 1327 * put extra cruft for dumping process state here 1328 * - we really want it be before all the program 1329 * mappings 1330 * - we just need to update the offset accordingly 1331 * and GDB will be none the wiser. 1332 */ 1333 if (error == 0) 1334 error = elf_puttextvp(p, target); 1335 if (error == 0) 1336 error = elf_putsigs(lp, target); 1337 if (error == 0) 1338 error = elf_putfiles(p, target, fp); 1339 1340 /* 1341 * Align up to a page boundary for the program segments. The 1342 * actual data will be written to the outptu file, not to elf_buf_t, 1343 * so we do not have to do any further bounds checking. 1344 */ 1345 target->off = round_page(target->off); 1346 if (error == 0 && ehdr != NULL) { 1347 /* 1348 * Fill in the ELF header. 1349 */ 1350 ehdr->e_ident[EI_MAG0] = ELFMAG0; 1351 ehdr->e_ident[EI_MAG1] = ELFMAG1; 1352 ehdr->e_ident[EI_MAG2] = ELFMAG2; 1353 ehdr->e_ident[EI_MAG3] = ELFMAG3; 1354 ehdr->e_ident[EI_CLASS] = ELF_CLASS; 1355 ehdr->e_ident[EI_DATA] = ELF_DATA; 1356 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 1357 ehdr->e_ident[EI_OSABI] = ELFOSABI_NONE; 1358 ehdr->e_ident[EI_ABIVERSION] = 0; 1359 ehdr->e_ident[EI_PAD] = 0; 1360 ehdr->e_type = ET_CORE; 1361 ehdr->e_machine = ELF_ARCH; 1362 ehdr->e_version = EV_CURRENT; 1363 ehdr->e_entry = 0; 1364 ehdr->e_phoff = phoff; 1365 ehdr->e_flags = 0; 1366 ehdr->e_ehsize = sizeof(Elf_Ehdr); 1367 ehdr->e_phentsize = sizeof(Elf_Phdr); 1368 ehdr->e_phnum = numsegs + 1; 1369 ehdr->e_shentsize = sizeof(Elf_Shdr); 1370 ehdr->e_shnum = 0; 1371 ehdr->e_shstrndx = SHN_UNDEF; 1372 } 1373 if (error == 0 && phdr != NULL) { 1374 /* 1375 * Fill in the program header entries. 1376 */ 1377 struct phdr_closure phc; 1378 1379 /* The note segement. */ 1380 phdr->p_type = PT_NOTE; 1381 phdr->p_offset = noteoff; 1382 phdr->p_vaddr = 0; 1383 phdr->p_paddr = 0; 1384 phdr->p_filesz = notesz; 1385 phdr->p_memsz = 0; 1386 phdr->p_flags = 0; 1387 phdr->p_align = 0; 1388 ++phdr; 1389 1390 /* All the writable segments from the program. */ 1391 phc.phdr = phdr; 1392 phc.phdr_max = phdr + numsegs; 1393 phc.offset = target->off; 1394 each_segment(p, cb_put_phdr, &phc, 1); 1395 } 1396 return (error); 1397 } 1398 1399 /* 1400 * Append core dump notes to target ELF buffer or simply update target size 1401 * if dryrun selected. 1402 */ 1403 static int 1404 elf_putallnotes(struct lwp *corelp, elf_buf_t target, int sig, 1405 enum putmode mode) 1406 { 1407 struct proc *p = corelp->lwp_proc; 1408 int error; 1409 struct { 1410 prstatus_t status; 1411 prfpregset_t fpregs; 1412 prpsinfo_t psinfo; 1413 } *tmpdata; 1414 prstatus_t *status; 1415 prfpregset_t *fpregs; 1416 prpsinfo_t *psinfo; 1417 struct lwp *lp; 1418 1419 /* 1420 * Allocate temporary storage for notes on heap to avoid stack overflow. 1421 */ 1422 if (mode != DRYRUN) { 1423 tmpdata = kmalloc(sizeof(*tmpdata), M_TEMP, M_ZERO | M_WAITOK); 1424 status = &tmpdata->status; 1425 fpregs = &tmpdata->fpregs; 1426 psinfo = &tmpdata->psinfo; 1427 } else { 1428 tmpdata = NULL; 1429 status = NULL; 1430 fpregs = NULL; 1431 psinfo = NULL; 1432 } 1433 1434 /* 1435 * Append LWP-agnostic note. 1436 */ 1437 if (mode != DRYRUN) { 1438 psinfo->pr_version = PRPSINFO_VERSION; 1439 psinfo->pr_psinfosz = sizeof(prpsinfo_t); 1440 strlcpy(psinfo->pr_fname, p->p_comm, 1441 sizeof(psinfo->pr_fname)); 1442 /* 1443 * XXX - We don't fill in the command line arguments 1444 * properly yet. 1445 */ 1446 strlcpy(psinfo->pr_psargs, p->p_comm, 1447 sizeof(psinfo->pr_psargs)); 1448 } 1449 error = 1450 __elfN(putnote)(target, "CORE", NT_PRPSINFO, psinfo, sizeof *psinfo); 1451 if (error) 1452 goto exit; 1453 1454 /* 1455 * Append first note for LWP that triggered core so that it is 1456 * the selected one when the debugger starts. 1457 */ 1458 if (mode != DRYRUN) { 1459 status->pr_version = PRSTATUS_VERSION; 1460 status->pr_statussz = sizeof(prstatus_t); 1461 status->pr_gregsetsz = sizeof(gregset_t); 1462 status->pr_fpregsetsz = sizeof(fpregset_t); 1463 status->pr_osreldate = osreldate; 1464 status->pr_cursig = sig; 1465 /* 1466 * XXX GDB needs unique pr_pid for each LWP and does not 1467 * not support pr_pid==0 but lwp_tid can be 0, so hack unique 1468 * value. 1469 */ 1470 status->pr_pid = corelp->lwp_tid; 1471 fill_regs(corelp, &status->pr_reg); 1472 fill_fpregs(corelp, fpregs); 1473 } 1474 error = 1475 __elfN(putnote)(target, "CORE", NT_PRSTATUS, status, sizeof *status); 1476 if (error) 1477 goto exit; 1478 error = 1479 __elfN(putnote)(target, "CORE", NT_FPREGSET, fpregs, sizeof *fpregs); 1480 if (error) 1481 goto exit; 1482 1483 /* 1484 * Then append notes for other LWPs. 1485 */ 1486 FOREACH_LWP_IN_PROC(lp, p) { 1487 if (lp == corelp) 1488 continue; 1489 /* skip lwps being created */ 1490 if (lp->lwp_thread == NULL) 1491 continue; 1492 if (mode != DRYRUN) { 1493 status->pr_pid = lp->lwp_tid; 1494 fill_regs(lp, &status->pr_reg); 1495 fill_fpregs(lp, fpregs); 1496 } 1497 error = __elfN(putnote)(target, "CORE", NT_PRSTATUS, 1498 status, sizeof *status); 1499 if (error) 1500 goto exit; 1501 error = __elfN(putnote)(target, "CORE", NT_FPREGSET, 1502 fpregs, sizeof *fpregs); 1503 if (error) 1504 goto exit; 1505 } 1506 1507 exit: 1508 if (tmpdata != NULL) 1509 kfree(tmpdata, M_TEMP); 1510 return (error); 1511 } 1512 1513 /* 1514 * Generate a note sub-structure. 1515 * 1516 * NOTE: 4-byte alignment. 1517 */ 1518 static int 1519 __elfN(putnote)(elf_buf_t target, const char *name, int type, 1520 const void *desc, size_t descsz) 1521 { 1522 int error = 0; 1523 char *dst; 1524 Elf_Note note; 1525 1526 note.n_namesz = strlen(name) + 1; 1527 note.n_descsz = descsz; 1528 note.n_type = type; 1529 dst = target_reserve(target, sizeof(note), &error); 1530 if (dst != NULL) 1531 bcopy(¬e, dst, sizeof note); 1532 dst = target_reserve(target, note.n_namesz, &error); 1533 if (dst != NULL) 1534 bcopy(name, dst, note.n_namesz); 1535 target->off = roundup2(target->off, sizeof(Elf_Word)); 1536 dst = target_reserve(target, note.n_descsz, &error); 1537 if (dst != NULL) 1538 bcopy(desc, dst, note.n_descsz); 1539 target->off = roundup2(target->off, sizeof(Elf_Word)); 1540 return (error); 1541 } 1542 1543 1544 static int 1545 elf_putsigs(struct lwp *lp, elf_buf_t target) 1546 { 1547 /* XXX lwp handle more than one lwp */ 1548 struct proc *p = lp->lwp_proc; 1549 int error = 0; 1550 struct ckpt_siginfo *csi; 1551 1552 csi = target_reserve(target, sizeof(struct ckpt_siginfo), &error); 1553 if (csi) { 1554 csi->csi_ckptpisz = sizeof(struct ckpt_siginfo); 1555 bcopy(p->p_sigacts, &csi->csi_sigacts, sizeof(*p->p_sigacts)); 1556 bcopy(&p->p_realtimer, &csi->csi_itimerval, sizeof(struct itimerval)); 1557 bcopy(&lp->lwp_sigmask, &csi->csi_sigmask, 1558 sizeof(sigset_t)); 1559 csi->csi_sigparent = p->p_sigparent; 1560 } 1561 return (error); 1562 } 1563 1564 static int 1565 elf_putfiles(struct proc *p, elf_buf_t target, struct file *ckfp) 1566 { 1567 int error = 0; 1568 int i; 1569 struct ckpt_filehdr *cfh = NULL; 1570 struct ckpt_fileinfo *cfi; 1571 struct file *fp; 1572 struct vnode *vp; 1573 /* 1574 * the duplicated loop is gross, but it was the only way 1575 * to eliminate uninitialized variable warnings 1576 */ 1577 cfh = target_reserve(target, sizeof(struct ckpt_filehdr), &error); 1578 if (cfh) { 1579 cfh->cfh_nfiles = 0; 1580 } 1581 1582 /* 1583 * ignore STDIN/STDERR/STDOUT. 1584 */ 1585 for (i = 3; error == 0 && i < p->p_fd->fd_nfiles; i++) { 1586 fp = holdfp(p->p_fd, i, -1); 1587 if (fp == NULL) 1588 continue; 1589 /* 1590 * XXX Only checkpoint vnodes for now. 1591 */ 1592 if (fp->f_type != DTYPE_VNODE) { 1593 fdrop(fp); 1594 continue; 1595 } 1596 cfi = target_reserve(target, sizeof(struct ckpt_fileinfo), 1597 &error); 1598 if (cfi == NULL) { 1599 fdrop(fp); 1600 continue; 1601 } 1602 cfi->cfi_index = -1; 1603 cfi->cfi_type = fp->f_type; 1604 cfi->cfi_flags = fp->f_flag; 1605 cfi->cfi_offset = fp->f_offset; 1606 cfi->cfi_ckflags = 0; 1607 1608 if (fp == ckfp) 1609 cfi->cfi_ckflags |= CKFIF_ISCKPTFD; 1610 /* f_count and f_msgcount should not be saved/restored */ 1611 /* XXX save cred info */ 1612 1613 switch(fp->f_type) { 1614 case DTYPE_VNODE: 1615 vp = (struct vnode *)fp->f_data; 1616 /* 1617 * it looks like a bug in ptrace is marking 1618 * a non-vnode as a vnode - until we find the 1619 * root cause this will at least prevent 1620 * further panics from truss 1621 */ 1622 if (vp == NULL || vp->v_mount == NULL) 1623 break; 1624 cfh->cfh_nfiles++; 1625 cfi->cfi_index = i; 1626 cfi->cfi_fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 1627 error = VFS_VPTOFH(vp, &cfi->cfi_fh.fh_fid); 1628 break; 1629 default: 1630 break; 1631 } 1632 fdrop(fp); 1633 } 1634 return (error); 1635 } 1636 1637 static int 1638 elf_puttextvp(struct proc *p, elf_buf_t target) 1639 { 1640 int error = 0; 1641 int *vn_count; 1642 struct fp_closure fpc; 1643 struct ckpt_vminfo *vminfo; 1644 1645 vminfo = target_reserve(target, sizeof(struct ckpt_vminfo), &error); 1646 if (vminfo != NULL) { 1647 vminfo->cvm_dsize = p->p_vmspace->vm_dsize; 1648 vminfo->cvm_tsize = p->p_vmspace->vm_tsize; 1649 vminfo->cvm_daddr = p->p_vmspace->vm_daddr; 1650 vminfo->cvm_taddr = p->p_vmspace->vm_taddr; 1651 } 1652 1653 fpc.count = 0; 1654 vn_count = target_reserve(target, sizeof(int), &error); 1655 if (target->buf != NULL) { 1656 fpc.vnh = (struct vn_hdr *)(target->buf + target->off); 1657 fpc.vnh_max = fpc.vnh + 1658 (target->off_max - target->off) / sizeof(struct vn_hdr); 1659 error = each_segment(p, cb_put_fp, &fpc, 0); 1660 if (vn_count) 1661 *vn_count = fpc.count; 1662 } else { 1663 error = each_segment(p, cb_fpcount_segment, &fpc.count, 0); 1664 } 1665 target->off += fpc.count * sizeof(struct vn_hdr); 1666 return (error); 1667 } 1668 1669 /* 1670 * Try to find the appropriate ABI-note section for checknote, 1671 * The entire image is searched if necessary, not only the first page. 1672 */ 1673 static boolean_t 1674 __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote, 1675 int32_t *osrel) 1676 { 1677 boolean_t valid_note_found; 1678 const Elf_Phdr *phdr, *pnote; 1679 const Elf_Ehdr *hdr; 1680 int i; 1681 1682 valid_note_found = FALSE; 1683 hdr = (const Elf_Ehdr *)imgp->image_header; 1684 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 1685 1686 for (i = 0; i < hdr->e_phnum; i++) { 1687 if (phdr[i].p_type == PT_NOTE) { 1688 pnote = &phdr[i]; 1689 valid_note_found = check_PT_NOTE (imgp, checknote, 1690 osrel, pnote); 1691 if (valid_note_found) 1692 break; 1693 } 1694 } 1695 return valid_note_found; 1696 } 1697 1698 static boolean_t 1699 check_PT_NOTE(struct image_params *imgp, Elf_Brandnote *checknote, 1700 int32_t *osrel, const Elf_Phdr * pnote) 1701 { 1702 boolean_t limited_to_first_page; 1703 boolean_t found = FALSE; 1704 const Elf_Note *note, *note0, *note_end; 1705 const char *note_name; 1706 __ElfN(Off) noteloc, firstloc; 1707 __ElfN(Size) notesz, firstlen, endbyte; 1708 struct lwbuf *lwb; 1709 struct lwbuf lwb_cache; 1710 const char *page; 1711 char *data = NULL; 1712 int n; 1713 1714 notesz = pnote->p_filesz; 1715 noteloc = pnote->p_offset; 1716 endbyte = noteloc + notesz; 1717 limited_to_first_page = noteloc < PAGE_SIZE && endbyte < PAGE_SIZE; 1718 1719 if (limited_to_first_page) { 1720 note = (const Elf_Note *)(imgp->image_header + noteloc); 1721 note_end = (const Elf_Note *)(imgp->image_header + endbyte); 1722 note0 = note; 1723 } else { 1724 firstloc = noteloc & PAGE_MASK; 1725 firstlen = PAGE_SIZE - firstloc; 1726 if (notesz < sizeof(Elf_Note) || notesz > PAGE_SIZE) 1727 return (FALSE); 1728 1729 lwb = &lwb_cache; 1730 if (exec_map_page(imgp, noteloc >> PAGE_SHIFT, &lwb, &page)) 1731 return (FALSE); 1732 if (firstlen < notesz) { /* crosses page boundary */ 1733 data = kmalloc(notesz, M_TEMP, M_WAITOK); 1734 bcopy(page + firstloc, data, firstlen); 1735 1736 exec_unmap_page(lwb); 1737 lwb = &lwb_cache; 1738 if (exec_map_page(imgp, (noteloc >> PAGE_SHIFT) + 1, 1739 &lwb, &page)) { 1740 kfree(data, M_TEMP); 1741 return (FALSE); 1742 } 1743 bcopy(page, data + firstlen, notesz - firstlen); 1744 note = note0 = (const Elf_Note *)(data); 1745 note_end = (const Elf_Note *)(data + notesz); 1746 } else { 1747 note = note0 = (const Elf_Note *)(page + firstloc); 1748 note_end = (const Elf_Note *)(page + firstloc + 1749 firstlen); 1750 } 1751 } 1752 1753 for (n = 0; n < 100 && note >= note0 && note < note_end; n++) { 1754 if (!aligned(note, Elf32_Addr)) 1755 break; 1756 note_name = (const char *)(note + 1); 1757 1758 if (note->n_namesz == checknote->hdr.n_namesz 1759 && note->n_descsz == checknote->hdr.n_descsz 1760 && note->n_type == checknote->hdr.n_type 1761 && (strncmp(checknote->vendor, note_name, 1762 checknote->hdr.n_namesz) == 0)) { 1763 /* Fetch osreldata from ABI.note-tag */ 1764 if ((checknote->flags & BN_TRANSLATE_OSREL) != 0 && 1765 checknote->trans_osrel != NULL) 1766 checknote->trans_osrel(note, osrel); 1767 found = TRUE; 1768 break; 1769 } 1770 note = (const Elf_Note *)((const char *)(note + 1) + 1771 roundup2(note->n_namesz, sizeof(Elf32_Addr)) + 1772 roundup2(note->n_descsz, sizeof(Elf32_Addr))); 1773 } 1774 1775 if (!limited_to_first_page) { 1776 if (data != NULL) 1777 kfree(data, M_TEMP); 1778 exec_unmap_page(lwb); 1779 } 1780 return (found); 1781 } 1782 1783 /* 1784 * The interpreter program header may be located beyond the first page, so 1785 * regardless of its location, a copy of the interpreter path is created so 1786 * that it may be safely referenced by the calling function in all case. The 1787 * memory is allocated by calling function, and the copying is done here. 1788 */ 1789 static boolean_t 1790 extract_interpreter(struct image_params *imgp, const Elf_Phdr *pinterpreter, 1791 char *data) 1792 { 1793 boolean_t limited_to_first_page; 1794 const boolean_t result_success = FALSE; 1795 const boolean_t result_failure = TRUE; 1796 __ElfN(Off) pathloc, firstloc; 1797 __ElfN(Size) pathsz, firstlen, endbyte; 1798 struct lwbuf *lwb; 1799 struct lwbuf lwb_cache; 1800 const char *page; 1801 1802 pathsz = pinterpreter->p_filesz; 1803 pathloc = pinterpreter->p_offset; 1804 endbyte = pathloc + pathsz; 1805 1806 limited_to_first_page = pathloc < PAGE_SIZE && endbyte < PAGE_SIZE; 1807 if (limited_to_first_page) { 1808 bcopy(imgp->image_header + pathloc, data, pathsz); 1809 return (result_success); 1810 } 1811 1812 firstloc = pathloc & PAGE_MASK; 1813 firstlen = PAGE_SIZE - firstloc; 1814 1815 lwb = &lwb_cache; 1816 if (exec_map_page(imgp, pathloc >> PAGE_SHIFT, &lwb, &page)) 1817 return (result_failure); 1818 1819 if (firstlen < pathsz) { /* crosses page boundary */ 1820 bcopy(page + firstloc, data, firstlen); 1821 1822 exec_unmap_page(lwb); 1823 lwb = &lwb_cache; 1824 if (exec_map_page(imgp, (pathloc >> PAGE_SHIFT) + 1, &lwb, 1825 &page)) 1826 return (result_failure); 1827 bcopy(page, data + firstlen, pathsz - firstlen); 1828 } else 1829 bcopy(page + firstloc, data, pathsz); 1830 1831 exec_unmap_page(lwb); 1832 return (result_success); 1833 } 1834 1835 static boolean_t 1836 __elfN(bsd_trans_osrel)(const Elf_Note *note, int32_t *osrel) 1837 { 1838 uintptr_t p; 1839 1840 p = (uintptr_t)(note + 1); 1841 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1842 *osrel = *(const int32_t *)(p); 1843 1844 return (TRUE); 1845 } 1846 1847 /* 1848 * Tell kern_execve.c about it, with a little help from the linker. 1849 */ 1850 #if defined(__x86_64__) 1851 static struct execsw elf_execsw = {exec_elf64_imgact, "ELF64"}; 1852 EXEC_SET_ORDERED(elf64, elf_execsw, SI_ORDER_FIRST); 1853 #else /* i386 assumed */ 1854 static struct execsw elf_execsw = {exec_elf32_imgact, "ELF32"}; 1855 EXEC_SET_ORDERED(elf32, elf_execsw, SI_ORDER_FIRST); 1856 #endif 1857