1 /*- 2 * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.proprietary.c% 6 * 7 * @(#)kern_exec.c 7.49 (Berkeley) 11/19/91 8 */ 9 10 #include "param.h" 11 #include "systm.h" 12 #include "filedesc.h" 13 #include "kernel.h" 14 #include "proc.h" 15 #include "mount.h" 16 #include "malloc.h" 17 #include "namei.h" 18 #include "vnode.h" 19 #include "file.h" 20 #include "acct.h" 21 #include "exec.h" 22 #include "ktrace.h" 23 #include "resourcevar.h" 24 25 #include "machine/cpu.h" 26 #include "machine/reg.h" 27 28 #include "mman.h" 29 #include "vm/vm.h" 30 #include "vm/vm_param.h" 31 #include "vm/vm_map.h" 32 #include "vm/vm_kern.h" 33 #include "vm/vm_pager.h" 34 35 #include "signalvar.h" 36 #include "kinfo_proc.h" 37 38 #ifdef HPUXCOMPAT 39 #include "user.h" /* for pcb */ 40 #include "hp300/hpux/hpux_exec.h" 41 #endif 42 43 #ifdef COPY_SIGCODE 44 extern char sigcode[], esigcode[]; 45 #define szsigcode (esigcode - sigcode) 46 #else 47 #define szsigcode 0 48 #endif 49 50 /* 51 * exec system call 52 */ 53 /* ARGSUSED */ 54 execve(p, uap, retval) 55 register struct proc *p; 56 register struct args { 57 char *fname; 58 char **argp; 59 char **envp; 60 } *uap; 61 int *retval; 62 { 63 register struct ucred *cred = p->p_ucred; 64 register struct nameidata *ndp; 65 register struct filedesc *fdp = p->p_fd; 66 int na, ne, ucp, ap, cc; 67 register char *cp; 68 register int nc; 69 unsigned len; 70 int indir, uid, gid; 71 char *sharg; 72 struct vnode *vp; 73 int resid, error, paged = 0; 74 vm_offset_t execargs = 0; 75 struct vattr vattr; 76 char cfarg[MAXINTERP]; 77 union { 78 char ex_shell[MAXINTERP]; /* #! and interpreter name */ 79 struct exec ex_exec; 80 #ifdef HPUXCOMPAT 81 struct hpux_exec ex_hexec; 82 #endif 83 } exdata; 84 #ifdef HPUXCOMPAT 85 struct hpux_exec hhead; 86 #endif 87 struct nameidata nd; 88 89 ndp = &nd; 90 ndp->ni_nameiop = LOOKUP | FOLLOW | LOCKLEAF | SAVENAME; 91 ndp->ni_segflg = UIO_USERSPACE; 92 ndp->ni_dirp = uap->fname; 93 if (error = namei(ndp, p)) 94 return (error); 95 vp = ndp->ni_vp; 96 indir = 0; 97 uid = cred->cr_uid; 98 gid = cred->cr_gid; 99 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 100 goto bad; 101 if (vp->v_mount->mnt_flag & MNT_NOEXEC) { 102 error = EACCES; 103 goto bad; 104 } 105 if ((vp->v_mount->mnt_flag & MNT_NOSUID) == 0) { 106 if (vattr.va_mode & VSUID) 107 uid = vattr.va_uid; 108 if (vattr.va_mode & VSGID) 109 gid = vattr.va_gid; 110 } 111 112 again: 113 if (error = VOP_ACCESS(vp, VEXEC, cred, p)) 114 goto bad; 115 if ((p->p_flag & STRC) && (error = VOP_ACCESS(vp, VREAD, cred, p))) 116 goto bad; 117 if (vp->v_type != VREG || 118 (vattr.va_mode & (VEXEC|(VEXEC>>3)|(VEXEC>>6))) == 0) { 119 error = EACCES; 120 goto bad; 121 } 122 123 /* 124 * Read in first few bytes of file for segment sizes, magic number: 125 * OMAGIC = plain executable 126 * NMAGIC = RO text 127 * ZMAGIC = demand paged RO text 128 * Also an ASCII line beginning with #! is 129 * the file name of a ``shell'' and arguments may be prepended 130 * to the argument list if given here. 131 * 132 * SHELL NAMES ARE LIMITED IN LENGTH. 133 * 134 * ONLY ONE ARGUMENT MAY BE PASSED TO THE SHELL FROM 135 * THE ASCII LINE. 136 */ 137 exdata.ex_shell[0] = '\0'; /* for zero length files */ 138 error = vn_rdwr(UIO_READ, vp, (caddr_t)&exdata, sizeof (exdata), 139 (off_t)0, UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED), cred, &resid, 140 (struct proc *)0); 141 if (error) 142 goto bad; 143 #ifndef lint 144 if (resid > sizeof(exdata) - sizeof(exdata.ex_exec) && 145 exdata.ex_shell[0] != '#') { 146 error = ENOEXEC; 147 goto bad; 148 } 149 #endif 150 #if defined(hp300) 151 switch ((int)exdata.ex_exec.a_mid) { 152 153 /* 154 * An ancient hp200 or hp300 binary, shouldn't happen anymore. 155 * Mark as invalid. 156 */ 157 case MID_ZERO: 158 exdata.ex_exec.a_magic = 0; 159 break; 160 161 /* 162 * HP200 series has a smaller page size so we cannot 163 * demand-load or even write protect text, so we just 164 * treat as OMAGIC. 165 */ 166 case MID_HP200: 167 exdata.ex_exec.a_magic = OMAGIC; 168 break; 169 170 case MID_HP300: 171 break; 172 173 #ifdef HPUXCOMPAT 174 case MID_HPUX: 175 /* 176 * Save a.out header. This is eventually saved in the pcb, 177 * but we cannot do that yet in case the exec fails before 178 * the image is overlayed. 179 */ 180 bcopy((caddr_t)&exdata.ex_hexec, 181 (caddr_t)&hhead, sizeof hhead); 182 /* 183 * If version number is 0x2bad this is a native BSD 184 * binary created via the HPUX SGS. Should not be 185 * treated as an HPUX binary. 186 */ 187 if (exdata.ex_hexec.ha_version != BSDVNUM) 188 paged |= SHPUX; /* XXX */ 189 /* 190 * Shuffle important fields to their BSD locations. 191 * Note that the order in which this is done is important. 192 */ 193 exdata.ex_exec.a_text = exdata.ex_hexec.ha_text; 194 exdata.ex_exec.a_data = exdata.ex_hexec.ha_data; 195 exdata.ex_exec.a_bss = exdata.ex_hexec.ha_bss; 196 exdata.ex_exec.a_entry = exdata.ex_hexec.ha_entry; 197 /* 198 * For ZMAGIC files, make sizes consistant with those 199 * generated by BSD ld. 200 */ 201 if (exdata.ex_exec.a_magic == ZMAGIC) { 202 exdata.ex_exec.a_text = 203 ctob(btoc(exdata.ex_exec.a_text)); 204 nc = exdata.ex_exec.a_data + exdata.ex_exec.a_bss; 205 exdata.ex_exec.a_data = 206 ctob(btoc(exdata.ex_exec.a_data)); 207 nc -= (int)exdata.ex_exec.a_data; 208 exdata.ex_exec.a_bss = (nc < 0) ? 0 : nc; 209 } 210 break; 211 #endif 212 } 213 #endif 214 switch ((int)exdata.ex_exec.a_magic) { 215 216 case OMAGIC: 217 #ifdef COFF 218 if (exdata.ex_exec.ex_fhdr.magic != COFF_MAGIC) { 219 error = ENOEXEC; 220 goto bad; 221 } 222 #endif 223 exdata.ex_exec.a_data += exdata.ex_exec.a_text; 224 exdata.ex_exec.a_text = 0; 225 break; 226 227 case ZMAGIC: 228 paged++; 229 /* FALLTHROUGH */ 230 case NMAGIC: 231 #ifdef COFF 232 if (exdata.ex_exec.ex_fhdr.magic != COFF_MAGIC) { 233 error = ENOEXEC; 234 goto bad; 235 } 236 #endif 237 if (exdata.ex_exec.a_text == 0) { 238 error = ENOEXEC; 239 goto bad; 240 } 241 break; 242 243 default: 244 if (exdata.ex_shell[0] != '#' || 245 exdata.ex_shell[1] != '!' || 246 indir) { 247 error = ENOEXEC; 248 goto bad; 249 } 250 for (cp = &exdata.ex_shell[2];; ++cp) { 251 if (cp >= &exdata.ex_shell[MAXINTERP]) { 252 error = ENOEXEC; 253 goto bad; 254 } 255 if (*cp == '\n') { 256 *cp = '\0'; 257 break; 258 } 259 if (*cp == '\t') 260 *cp = ' '; 261 } 262 cp = &exdata.ex_shell[2]; 263 while (*cp == ' ') 264 cp++; 265 ndp->ni_dirp = cp; 266 while (*cp && *cp != ' ') 267 cp++; 268 cfarg[0] = '\0'; 269 if (*cp) { 270 *cp++ = '\0'; 271 while (*cp == ' ') 272 cp++; 273 if (*cp) 274 bcopy((caddr_t)cp, (caddr_t)cfarg, MAXINTERP); 275 } 276 indir = 1; 277 vput(vp); 278 ndp->ni_segflg = UIO_SYSSPACE; 279 if (error = namei(ndp, p)) 280 return (error); 281 vp = ndp->ni_vp; 282 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 283 goto bad; 284 uid = cred->cr_uid; /* shell scripts can't be setuid */ 285 gid = cred->cr_gid; 286 goto again; 287 } 288 289 /* 290 * Collect arguments on "file" in swap space. 291 */ 292 na = 0; 293 ne = 0; 294 nc = 0; 295 cc = NCARGS; 296 execargs = kmem_alloc_wait(exec_map, NCARGS); 297 #ifdef DIAGNOSTIC 298 if (execargs == (vm_offset_t)0) 299 panic("execve: kmem_alloc_wait"); 300 #endif 301 cp = (char *) execargs; 302 /* 303 * Copy arguments into file in argdev area. 304 */ 305 if (uap->argp) for (;;) { 306 ap = NULL; 307 sharg = NULL; 308 if (indir && na == 0) { 309 sharg = ndp->ni_ptr; 310 ap = (int)sharg; 311 uap->argp++; /* ignore argv[0] */ 312 } else if (indir && (na == 1 && cfarg[0])) { 313 sharg = cfarg; 314 ap = (int)sharg; 315 } else if (indir && (na == 1 || na == 2 && cfarg[0])) 316 ap = (int)uap->fname; 317 else if (uap->argp) { 318 ap = fuword((caddr_t)uap->argp); 319 uap->argp++; 320 } 321 if (ap == NULL && uap->envp) { 322 uap->argp = NULL; 323 if ((ap = fuword((caddr_t)uap->envp)) != NULL) 324 uap->envp++, ne++; 325 } 326 if (ap == NULL) 327 break; 328 na++; 329 if (ap == -1) { 330 error = EFAULT; 331 goto bad; 332 } 333 do { 334 if (nc >= NCARGS-1) { 335 error = E2BIG; 336 break; 337 } 338 if (sharg) { 339 error = copystr(sharg, cp, (unsigned)cc, &len); 340 sharg += len; 341 } else { 342 error = copyinstr((caddr_t)ap, cp, (unsigned)cc, 343 &len); 344 ap += len; 345 } 346 cp += len; 347 nc += len; 348 cc -= len; 349 } while (error == ENAMETOOLONG); 350 if (error) 351 goto bad; 352 } 353 nc = (nc + NBPW-1) & ~(NBPW-1); 354 error = getxfile(p, vp, &exdata.ex_exec, paged, nc + (na+4)*NBPW, 355 uid, gid); 356 if (error) 357 goto bad; 358 vput(vp); 359 vp = NULL; 360 361 #ifdef HPUXCOMPAT 362 /* 363 * We are now committed to the exec so we can save the exec 364 * header in the pcb where we can dump it if necessary in core() 365 */ 366 if (p->p_addr->u_pcb.pcb_flags & PCB_HPUXBIN) 367 bcopy((caddr_t)&hhead, 368 (caddr_t)p->p_addr->u_pcb.pcb_exec, sizeof hhead); 369 #endif 370 371 /* 372 * Copy back arglist. 373 */ 374 ucp = USRSTACK - szsigcode - nc - NBPW; 375 ap = ucp - na*NBPW - 3*NBPW; 376 p->p_regs[SP] = ap; 377 (void) suword((caddr_t)ap, na-ne); 378 nc = 0; 379 cp = (char *) execargs; 380 cc = NCARGS; 381 for (;;) { 382 ap += NBPW; 383 if (na == ne) { 384 (void) suword((caddr_t)ap, 0); 385 ap += NBPW; 386 } 387 if (--na < 0) 388 break; 389 (void) suword((caddr_t)ap, ucp); 390 do { 391 error = copyoutstr(cp, (caddr_t)ucp, (unsigned)cc, 392 &len); 393 ucp += len; 394 cp += len; 395 nc += len; 396 cc -= len; 397 } while (error == ENAMETOOLONG); 398 if (error == EFAULT) 399 panic("exec: EFAULT"); 400 } 401 (void) suword((caddr_t)ap, 0); 402 403 execsigs(p); 404 405 for (nc = fdp->fd_lastfile; nc >= 0; --nc) { 406 if (fdp->fd_ofileflags[nc] & UF_EXCLOSE) { 407 (void) closef(fdp->fd_ofiles[nc], p); 408 fdp->fd_ofiles[nc] = NULL; 409 fdp->fd_ofileflags[nc] = 0; 410 if (nc < fdp->fd_freefile) 411 fdp->fd_freefile = nc; 412 } 413 fdp->fd_ofileflags[nc] &= ~UF_MAPPED; 414 } 415 /* 416 * Adjust fd_lastfile to account for descriptors closed above. 417 * Don't decrement fd_lastfile past 0, as it's unsigned. 418 */ 419 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) 420 fdp->fd_lastfile--; 421 setregs(p, exdata.ex_exec.a_entry, retval); 422 #ifdef COPY_SIGCODE 423 /* 424 * Install sigcode at top of user stack. 425 */ 426 copyout((caddr_t)sigcode, (caddr_t)(USRSTACK - szsigcode), szsigcode); 427 #endif 428 /* 429 * Remember file name for accounting. 430 */ 431 p->p_acflag &= ~AFORK; 432 if (ndp->ni_namelen > MAXCOMLEN) 433 ndp->ni_namelen = MAXCOMLEN; 434 bcopy((caddr_t)ndp->ni_ptr, (caddr_t)p->p_comm, 435 (unsigned)(ndp->ni_namelen)); 436 p->p_comm[ndp->ni_namelen] = '\0'; 437 cpu_exec(p); 438 bad: 439 FREE(ndp->ni_pnbuf, M_NAMEI); 440 if (execargs) 441 kmem_free_wakeup(exec_map, execargs, NCARGS); 442 if (vp) 443 vput(vp); 444 return (error); 445 } 446 447 /* 448 * Read in and set up memory for executed file. 449 */ 450 getxfile(p, vp, ep, paged, nargc, uid, gid) 451 register struct proc *p; 452 register struct vnode *vp; 453 register struct exec *ep; 454 int paged, nargc, uid, gid; 455 { 456 segsz_t ts, ds, ss; 457 register struct ucred *cred = p->p_ucred; 458 off_t toff; 459 int error = 0; 460 vm_offset_t addr; 461 vm_size_t size; 462 struct vmspace *vm = p->p_vmspace; 463 464 #ifdef HPUXCOMPAT 465 int hpux = (paged & SHPUX); 466 paged &= ~SHPUX; 467 if (ep->a_mid == MID_HPUX) { 468 if (paged) 469 toff = CLBYTES; 470 else 471 toff = sizeof (struct hpux_exec); 472 } else 473 #endif 474 #ifdef COFF 475 toff = N_TXTOFF(*ep); 476 #else 477 if (paged) 478 toff = CLBYTES; 479 else 480 toff = sizeof (struct exec); 481 #endif 482 if (ep->a_text != 0 && (vp->v_flag & VTEXT) == 0 && 483 vp->v_writecount != 0) 484 return (ETXTBSY); 485 486 /* 487 * Compute text and data sizes and make sure not too large. 488 * NB - Check data and bss separately as they may overflow 489 * when summed together. 490 */ 491 ts = clrnd(btoc(ep->a_text)); 492 ds = clrnd(btoc(ep->a_data + ep->a_bss)); 493 ss = clrnd(SSIZE + btoc(nargc + szsigcode)); 494 495 /* 496 * If we're sharing the address space, allocate a new space 497 * and release our reference to the old one. Otherwise, 498 * empty out the existing vmspace. 499 */ 500 if (vm->vm_refcnt > 1) { 501 p->p_vmspace = vmspace_alloc(VM_MIN_ADDRESS, 502 VM_MAXUSER_ADDRESS, 1); 503 vmspace_free(vm); 504 vm = p->p_vmspace; 505 } else { 506 #ifdef SYSVSHM 507 if (vm->vm_shm) 508 shmexit(p); 509 #endif 510 (void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS, 511 VM_MAXUSER_ADDRESS); 512 } 513 /* 514 * If parent is waiting for us to exec or exit, 515 * SPPWAIT will be set; clear it and wakeup parent. 516 */ 517 if (p->p_flag & SPPWAIT) { 518 p->p_flag &= ~SPPWAIT; 519 wakeup((caddr_t) p->p_pptr); 520 } 521 #ifdef HPUXCOMPAT 522 p->p_addr->u_pcb.pcb_flags &= ~(PCB_HPUXMMAP|PCB_HPUXBIN); 523 /* remember that we were loaded from an HPUX format file */ 524 if (ep->a_mid == MID_HPUX) 525 p->p_addr->u_pcb.pcb_flags |= PCB_HPUXBIN; 526 if (hpux) 527 p->p_flag |= SHPUX; 528 else 529 p->p_flag &= ~SHPUX; 530 #endif 531 #ifdef ULTRIXCOMPAT 532 /* 533 * Always start out as an ULTRIX process. 534 * A system call in crt0.o will change us to BSD system calls later. 535 */ 536 p->p_md.md_flags |= MDP_ULTRIX; 537 #endif 538 p->p_flag |= SEXEC; 539 #ifndef COFF 540 addr = VM_MIN_ADDRESS; 541 if (vm_allocate(&vm->vm_map, &addr, round_page(ctob(ts + ds)), FALSE)) { 542 uprintf("Cannot allocate text+data space\n"); 543 error = ENOMEM; /* XXX */ 544 goto badmap; 545 } 546 vm->vm_taddr = (caddr_t)VM_MIN_ADDRESS; 547 vm->vm_daddr = (caddr_t)(VM_MIN_ADDRESS + ctob(ts)); 548 #else /* COFF */ 549 addr = (vm_offset_t)ep->ex_aout.codeStart; 550 vm->vm_taddr = (caddr_t)addr; 551 if (vm_allocate(&vm->vm_map, &addr, round_page(ctob(ts)), FALSE)) { 552 uprintf("Cannot allocate text space\n"); 553 error = ENOMEM; /* XXX */ 554 goto badmap; 555 } 556 addr = (vm_offset_t)ep->ex_aout.heapStart; 557 vm->vm_daddr = (caddr_t)addr; 558 if (vm_allocate(&vm->vm_map, &addr, round_page(ctob(ds)), FALSE)) { 559 uprintf("Cannot allocate data space\n"); 560 error = ENOMEM; /* XXX */ 561 goto badmap; 562 } 563 #endif /* COFF */ 564 size = round_page(MAXSSIZ); /* XXX */ 565 #ifdef i386 566 addr = trunc_page(USRSTACK - size) - NBPG; /* XXX */ 567 #else 568 addr = trunc_page(USRSTACK - size); 569 #endif 570 if (vm_allocate(&vm->vm_map, &addr, size, FALSE)) { 571 uprintf("Cannot allocate stack space\n"); 572 error = ENOMEM; /* XXX */ 573 goto badmap; 574 } 575 size -= round_page(p->p_rlimit[RLIMIT_STACK].rlim_cur); 576 if (vm_map_protect(&vm->vm_map, addr, addr+size, VM_PROT_NONE, FALSE)) { 577 uprintf("Cannot protect stack space\n"); 578 error = ENOMEM; 579 goto badmap; 580 } 581 vm->vm_maxsaddr = (caddr_t)addr; 582 583 if (paged == 0) { 584 /* 585 * Read in data segment. 586 */ 587 (void) vn_rdwr(UIO_READ, vp, vm->vm_daddr, (int) ep->a_data, 588 (off_t)(toff + ep->a_text), UIO_USERSPACE, 589 (IO_UNIT|IO_NODELOCKED), cred, (int *)0, p); 590 /* 591 * Read in text segment if necessary (0410), 592 * and read-protect it. 593 */ 594 if (ep->a_text > 0) { 595 error = vn_rdwr(UIO_READ, vp, vm->vm_taddr, 596 (int)ep->a_text, toff, UIO_USERSPACE, 597 (IO_UNIT|IO_NODELOCKED), cred, (int *)0, p); 598 (void) vm_map_protect(&vm->vm_map, vm->vm_taddr, 599 vm->vm_taddr + trunc_page(ep->a_text), 600 VM_PROT_READ|VM_PROT_EXECUTE, FALSE); 601 } 602 } else { 603 /* 604 * Allocate a region backed by the exec'ed vnode. 605 */ 606 #ifndef COFF 607 addr = VM_MIN_ADDRESS; 608 size = round_page(ep->a_text + ep->a_data); 609 error = vm_mmap(&vm->vm_map, &addr, size, VM_PROT_ALL, 610 MAP_FILE|MAP_COPY|MAP_FIXED, 611 (caddr_t)vp, (vm_offset_t)toff); 612 (void) vm_map_protect(&vm->vm_map, addr, 613 addr + trunc_page(ep->a_text), 614 VM_PROT_READ|VM_PROT_EXECUTE, FALSE); 615 #else /* COFF */ 616 addr = (vm_offset_t)vm->vm_taddr; 617 size = round_page(ep->a_text); 618 error = vm_mmap(&vm->vm_map, &addr, size, 619 VM_PROT_READ|VM_PROT_EXECUTE, 620 MAP_FILE|MAP_COPY|MAP_FIXED, 621 (caddr_t)vp, (vm_offset_t)toff); 622 toff += size; 623 addr = (vm_offset_t)vm->vm_daddr; 624 size = round_page(ep->a_data); 625 error = vm_mmap(&vm->vm_map, &addr, size, VM_PROT_ALL, 626 MAP_FILE|MAP_COPY|MAP_FIXED, 627 (caddr_t)vp, (vm_offset_t)toff); 628 #endif /* COFF */ 629 vp->v_flag |= VTEXT; 630 } 631 badmap: 632 if (error) { 633 printf("pid %d: VM allocation failure\n", p->p_pid); 634 uprintf("sorry, pid %d was killed in exec: VM allocation\n", 635 p->p_pid); 636 psignal(p, SIGKILL); 637 p->p_flag |= SKEEP; 638 return(error); 639 } 640 641 /* 642 * set SUID/SGID protections, if no tracing 643 */ 644 if ((p->p_flag&STRC)==0) { 645 if (uid != cred->cr_uid || gid != cred->cr_gid) { 646 p->p_ucred = cred = crcopy(cred); 647 /* 648 * If process is being ktraced, turn off - unless 649 * root set it. 650 */ 651 if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT)) { 652 vrele(p->p_tracep); 653 p->p_tracep = NULL; 654 p->p_traceflag = 0; 655 } 656 } 657 cred->cr_uid = uid; 658 cred->cr_gid = gid; 659 } else 660 psignal(p, SIGTRAP); 661 p->p_cred->p_svuid = cred->cr_uid; 662 p->p_cred->p_svgid = cred->cr_gid; 663 vm->vm_tsize = ts; 664 vm->vm_dsize = ds; 665 vm->vm_ssize = ss; 666 p->p_stats->p_prof.pr_scale = 0; 667 #if defined(tahoe) 668 /* move this when tahoe cpu_exec is created */ 669 p->p_addr->u_pcb.pcb_savacc.faddr = (float *)NULL; 670 #endif 671 return (0); 672 } 673