1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/errno.h>
27 #include <sys/exec.h>
28 #include <sys/kmem.h>
29 #include <sys/modctl.h>
30 #include <sys/model.h>
31 #include <sys/proc.h>
32 #include <sys/syscall.h>
33 #include <sys/systm.h>
34 #include <sys/thread.h>
35 #include <sys/cmn_err.h>
36 #include <sys/archsystm.h>
37 #include <sys/pathname.h>
38 #include <sys/sunddi.h>
39 
40 #include <sys/machbrand.h>
41 #include <sys/brand.h>
42 #include "s10_brand.h"
43 
44 char *s10_emulation_table = NULL;
45 
46 void	s10_init_brand_data(zone_t *);
47 void	s10_free_brand_data(zone_t *);
48 void	s10_setbrand(proc_t *);
49 int	s10_getattr(zone_t *, int, void *, size_t *);
50 int	s10_setattr(zone_t *, int, void *, size_t);
51 int	s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
52 		uintptr_t, uintptr_t, uintptr_t);
53 void	s10_copy_procdata(proc_t *, proc_t *);
54 void	s10_proc_exit(struct proc *, klwp_t *);
55 void	s10_exec();
56 int	s10_initlwp(klwp_t *);
57 void	s10_forklwp(klwp_t *, klwp_t *);
58 void	s10_freelwp(klwp_t *);
59 void	s10_lwpexit(klwp_t *);
60 int	s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
61 	long *, int, caddr_t, cred_t *, int);
62 
63 /* s10 brand */
64 struct brand_ops s10_brops = {
65 	s10_init_brand_data,
66 	s10_free_brand_data,
67 	s10_brandsys,
68 	s10_setbrand,
69 	s10_getattr,
70 	s10_setattr,
71 	s10_copy_procdata,
72 	s10_proc_exit,
73 	s10_exec,
74 	lwp_setrval,
75 	s10_initlwp,
76 	s10_forklwp,
77 	s10_freelwp,
78 	s10_lwpexit,
79 	s10_elfexec
80 };
81 
82 #ifdef	sparc
83 
84 struct brand_mach_ops s10_mops = {
85 	s10_brand_syscall_callback,
86 	s10_brand_syscall32_callback
87 };
88 
89 #else	/* sparc */
90 
91 #ifdef	__amd64
92 
93 struct brand_mach_ops s10_mops = {
94 	s10_brand_sysenter_callback,
95 	NULL,
96 	s10_brand_int91_callback,
97 	s10_brand_syscall_callback,
98 	s10_brand_syscall32_callback,
99 	NULL
100 };
101 
102 #else	/* ! __amd64 */
103 
104 struct brand_mach_ops s10_mops = {
105 	s10_brand_sysenter_callback,
106 	NULL,
107 	NULL,
108 	s10_brand_syscall_callback,
109 	NULL,
110 	NULL
111 };
112 #endif	/* __amd64 */
113 
114 #endif	/* _sparc */
115 
116 struct brand	s10_brand = {
117 	BRAND_VER_1,
118 	"solaris10",
119 	&s10_brops,
120 	&s10_mops
121 };
122 
123 static struct modlbrand modlbrand = {
124 	&mod_brandops,		/* type of module */
125 	"Solaris 10 Brand",	/* description of module */
126 	&s10_brand		/* driver ops */
127 };
128 
129 static struct modlinkage modlinkage = {
130 	MODREV_1, (void *)&modlbrand, NULL
131 };
132 
133 void
134 s10_setbrand(proc_t *p)
135 {
136 	ASSERT(p->p_brand == &s10_brand);
137 	ASSERT(p->p_brand_data == NULL);
138 
139 	/*
140 	 * We should only be called from exec(), when we know the process
141 	 * is single-threaded.
142 	 */
143 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
144 
145 	p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP);
146 	(void) s10_initlwp(p->p_tlist->t_lwp);
147 }
148 
149 /*ARGSUSED*/
150 int
151 s10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
152 {
153 	ASSERT(zone->zone_brand == &s10_brand);
154 	if (attr == S10_EMUL_BITMAP) {
155 		if (buf == NULL || *bufsize != sizeof (s10_emul_bitmap_t))
156 			return (EINVAL);
157 		if (copyout(((s10_zone_data_t *)zone->zone_brand_data)->
158 		    emul_bitmap, buf, sizeof (s10_emul_bitmap_t)) != 0)
159 			return (EFAULT);
160 		return (0);
161 	}
162 
163 	return (EINVAL);
164 }
165 
166 int
167 s10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
168 {
169 	ASSERT(zone->zone_brand == &s10_brand);
170 	if (attr == S10_EMUL_BITMAP) {
171 		if (buf == NULL || bufsize != sizeof (s10_emul_bitmap_t))
172 			return (EINVAL);
173 		if (copyin(buf, ((s10_zone_data_t *)zone->zone_brand_data)->
174 		    emul_bitmap, sizeof (s10_emul_bitmap_t)) != 0)
175 			return (EFAULT);
176 		return (0);
177 	}
178 
179 	return (EINVAL);
180 }
181 
182 #ifdef	__amd64
183 /*
184  * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's
185  * libc expects %fs to be nonzero.  This causes some committed
186  * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several
187  * libraries, including libdoor.  This function sets the specified LWP's %fs
188  * register to the legacy S10 selector value (LWPFS_SEL).
189  *
190  * The best solution to the aforementioned problem is backporting CRs
191  * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes
192  * would accept zero for %fs.  Backporting the CRs is a requirement for running
193  * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is
194  * nonzero.  Such behavior breaks 64-bit processes because Xen has to fetch the
195  * FS segments' base addresses from the LWPs' GDTs, which are only capable of
196  * 32-bit addressing.
197  */
198 /*ARGSUSED*/
199 static void
200 s10_amd64_correct_fsreg(klwp_t *l)
201 {
202 	if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) {
203 		kpreempt_disable();
204 		l->lwp_pcb.pcb_fs = LWPFS_SEL;
205 		l->lwp_pcb.pcb_rupdate = 1;
206 		lwptot(l)->t_post_sys = 1;	/* Guarantee update_sregs() */
207 		kpreempt_enable();
208 	}
209 }
210 #endif	/* __amd64 */
211 
212 int
213 s10_native()
214 {
215 	struct user	*up = PTOU(curproc);
216 	char		*args_new, *comm_new, *p;
217 	int		len;
218 
219 	len = sizeof (S10_NATIVE_LINKER32 " ") - 1;
220 
221 	/*
222 	 * Make sure that the process' interpreter is the native dynamic linker.
223 	 * Convention dictates that native processes executing within solaris10-
224 	 * branded zones are interpreted by the native dynamic linker (the
225 	 * process and its arguments are specified as arguments to the dynamic
226 	 * linker).  If this convention is violated (i.e.,
227 	 * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be
228 	 * native), then do nothing and silently indicate success.
229 	 */
230 	if (strcmp(up->u_comm, S10_LINKER_NAME) != 0)
231 		return (0);
232 	if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0)
233 		len += 3;		/* to account for "/64" in the path */
234 	else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0)
235 		return (0);
236 
237 	args_new = strdup(&up->u_psargs[len]);
238 	if ((p = strchr(args_new, ' ')) != NULL)
239 		*p = '\0';
240 	if ((comm_new = strrchr(args_new, '/')) != NULL)
241 		comm_new = strdup(comm_new + 1);
242 	else
243 		comm_new = strdup(args_new);
244 	if (p != NULL)
245 		*p = ' ';
246 
247 	if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) {
248 		mutex_enter(&curproc->p_lock);
249 		(void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1);
250 		(void) strlcpy(up->u_psargs, args_new, PSARGSZ);
251 		mutex_exit(&curproc->p_lock);
252 	}
253 
254 	strfree(args_new);
255 	strfree(comm_new);
256 	return (0);
257 }
258 
259 /*
260  * Get the address of the user-space system call handler from the user
261  * process and attach it to the proc structure.
262  */
263 /*ARGSUSED*/
264 int
265 s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
266     uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
267 {
268 	s10_proc_data_t	*spd;
269 	s10_brand_reg_t	reg;
270 	proc_t		*p = curproc;
271 	int		err;
272 
273 	*rval = 0;
274 
275 	/*
276 	 * B_EXEC_BRAND is redundant
277 	 * since the kernel assumes a native process doing an exec
278 	 * in a branded zone is going to run a branded processes.
279 	 * hence we don't support this operation.
280 	 */
281 	if (cmd == B_EXEC_BRAND)
282 		return (ENOSYS);
283 
284 	if (cmd == B_S10_NATIVE)
285 		return (s10_native());
286 
287 	/* For all other operations this must be a branded process. */
288 	if (p->p_brand == &native_brand)
289 		return (ENOSYS);
290 
291 	ASSERT(p->p_brand == &s10_brand);
292 	ASSERT(p->p_brand_data != NULL);
293 
294 	spd = (s10_proc_data_t *)p->p_brand_data;
295 
296 	switch (cmd) {
297 	case B_EXEC_NATIVE:
298 		err = exec_common(
299 		    (char *)arg1, (const char **)arg2, (const char **)arg3,
300 		    EBA_NATIVE);
301 		return (err);
302 
303 	case B_REGISTER:
304 		if (p->p_model == DATAMODEL_NATIVE) {
305 			if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
306 				return (EFAULT);
307 #if defined(_LP64)
308 		} else {
309 			s10_brand_reg32_t reg32;
310 
311 			if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
312 				return (EFAULT);
313 			reg.sbr_version = reg32.sbr_version;
314 			reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
315 #endif /* _LP64 */
316 		}
317 
318 		if (reg.sbr_version != S10_VERSION)
319 			return (ENOTSUP);
320 		spd->spd_handler = reg.sbr_handler;
321 		return (0);
322 
323 	case B_ELFDATA:
324 		if (p->p_model == DATAMODEL_NATIVE) {
325 			if (copyout(&spd->spd_elf_data, (void *)arg1,
326 			    sizeof (s10_elf_data_t)) != 0)
327 				return (EFAULT);
328 #if defined(_LP64)
329 		} else {
330 			s10_elf_data32_t sed32;
331 
332 			sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
333 			sed32.sed_phent = spd->spd_elf_data.sed_phent;
334 			sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
335 			sed32.sed_entry = spd->spd_elf_data.sed_entry;
336 			sed32.sed_base = spd->spd_elf_data.sed_base;
337 			sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
338 			sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
339 			if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0)
340 				return (EFAULT);
341 #endif /* _LP64 */
342 		}
343 		return (0);
344 
345 	case B_S10_PIDINFO:
346 		/*
347 		 * The s10 brand needs to be able to get the pid of the
348 		 * current process and the pid of the zone's init, and it
349 		 * needs to do this on every process startup.  Early in
350 		 * brand startup, we can't call getpid() because calls to
351 		 * getpid() represent a magical signal to some old-skool
352 		 * debuggers.  By merging all of this into one call, we
353 		 * make this quite a bit cheaper and easier to handle in
354 		 * the brand module.
355 		 */
356 		if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0)
357 			return (EFAULT);
358 		if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2,
359 		    sizeof (pid_t)) != 0)
360 			return (EFAULT);
361 		return (0);
362 
363 	case B_S10_TRUSS_POINT:
364 		/*
365 		 * This subcommand exists so that we can see truss output
366 		 * from interposed system calls that return without first
367 		 * calling any other system call, meaning they would be
368 		 * invisible to truss(1).
369 		 *
370 		 * If the second argument is set non-zero, set errno to that
371 		 * value as well.
372 		 *
373 		 * Arguments are:
374 		 *
375 		 *    arg1: syscall number
376 		 *    arg2: errno
377 		 */
378 		return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
379 
380 #ifdef	__amd64
381 	case B_S10_FSREGCORRECTION:
382 		/*
383 		 * This subcommand exists so that the SYS_lwp_private and
384 		 * SYS_lwp_create syscalls can manually set the current thread's
385 		 * %fs register to the legacy S10 selector value for 64-bit x86
386 		 * processes.
387 		 */
388 		s10_amd64_correct_fsreg(ttolwp(curthread));
389 		return (0);
390 #endif	/* __amd64 */
391 	}
392 
393 	return (EINVAL);
394 }
395 
396 /*
397  * Copy the per-process brand data from a parent proc to a child.
398  */
399 void
400 s10_copy_procdata(proc_t *child, proc_t *parent)
401 {
402 	s10_proc_data_t	*spd;
403 
404 	ASSERT(parent->p_brand == &s10_brand);
405 	ASSERT(child->p_brand == &s10_brand);
406 	ASSERT(parent->p_brand_data != NULL);
407 	ASSERT(child->p_brand_data == NULL);
408 
409 	/* Just duplicate all the proc data of the parent for the child */
410 	spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP);
411 	bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t));
412 	child->p_brand_data = spd;
413 }
414 
415 /*ARGSUSED*/
416 void
417 s10_proc_exit(struct proc *p, klwp_t *l)
418 {
419 	ASSERT(p->p_brand == &s10_brand);
420 	ASSERT(p->p_brand_data != NULL);
421 
422 	/*
423 	 * We should only be called from proc_exit(), when we know that
424 	 * process is single-threaded.
425 	 */
426 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
427 
428 	/* upon exit, free our lwp brand data */
429 	(void) s10_freelwp(ttolwp(curthread));
430 
431 	/* upon exit, free our proc brand data */
432 	kmem_free(p->p_brand_data, sizeof (s10_proc_data_t));
433 	p->p_brand_data = NULL;
434 }
435 
436 void
437 s10_exec()
438 {
439 	s10_proc_data_t	*spd = curproc->p_brand_data;
440 
441 	ASSERT(curproc->p_brand == &s10_brand);
442 	ASSERT(curproc->p_brand_data != NULL);
443 	ASSERT(ttolwp(curthread)->lwp_brand != NULL);
444 
445 	/*
446 	 * We should only be called from exec(), when we know the process
447 	 * is single-threaded.
448 	 */
449 	ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
450 
451 	/* Upon exec, reset our lwp brand data. */
452 	(void) s10_freelwp(ttolwp(curthread));
453 	(void) s10_initlwp(ttolwp(curthread));
454 
455 	/*
456 	 * Upon exec, reset all the proc brand data, except for the elf
457 	 * data associated with the executable we are exec'ing.
458 	 */
459 	spd->spd_handler = NULL;
460 }
461 
462 /*ARGSUSED*/
463 int
464 s10_initlwp(klwp_t *l)
465 {
466 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
467 	ASSERT(l->lwp_procp->p_brand_data != NULL);
468 	ASSERT(l->lwp_brand == NULL);
469 	l->lwp_brand = (void *)-1;
470 	return (0);
471 }
472 
473 /*ARGSUSED*/
474 void
475 s10_forklwp(klwp_t *p, klwp_t *c)
476 {
477 	ASSERT(p->lwp_procp->p_brand == &s10_brand);
478 	ASSERT(c->lwp_procp->p_brand == &s10_brand);
479 
480 	ASSERT(p->lwp_procp->p_brand_data != NULL);
481 	ASSERT(c->lwp_procp->p_brand_data != NULL);
482 
483 	/* Both LWPs have already had been initialized via s10_initlwp() */
484 	ASSERT(p->lwp_brand != NULL);
485 	ASSERT(c->lwp_brand != NULL);
486 
487 #ifdef	__amd64
488 	/*
489 	 * Only correct the child's %fs register if the parent's %fs register
490 	 * is LWPFS_SEL.  If the parent's %fs register is zero, then the Solaris
491 	 * 10 environment that we're emulating uses a version of libc that
492 	 * works when %fs is zero (i.e., it contains backports of CRs 6467491
493 	 * and 6501650).
494 	 */
495 	if (p->lwp_pcb.pcb_fs == LWPFS_SEL)
496 		s10_amd64_correct_fsreg(c);
497 #endif	/* __amd64 */
498 }
499 
500 /*ARGSUSED*/
501 void
502 s10_freelwp(klwp_t *l)
503 {
504 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
505 	ASSERT(l->lwp_procp->p_brand_data != NULL);
506 	ASSERT(l->lwp_brand != NULL);
507 	l->lwp_brand = NULL;
508 }
509 
510 /*ARGSUSED*/
511 void
512 s10_lwpexit(klwp_t *l)
513 {
514 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
515 	ASSERT(l->lwp_procp->p_brand_data != NULL);
516 	ASSERT(l->lwp_brand != NULL);
517 
518 	/*
519 	 * We should never be called for the last thread in a process.
520 	 * (That case is handled by s10_proc_exit().)  There for this lwp
521 	 * must be exiting from a multi-threaded process.
522 	 */
523 	ASSERT(l->lwp_procp->p_tlist != l->lwp_procp->p_tlist->t_forw);
524 
525 	l->lwp_brand = NULL;
526 }
527 
528 void
529 s10_free_brand_data(zone_t *zone)
530 {
531 	kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t));
532 }
533 
534 void
535 s10_init_brand_data(zone_t *zone)
536 {
537 	ASSERT(zone->zone_brand == &s10_brand);
538 	ASSERT(zone->zone_brand_data == NULL);
539 	zone->zone_brand_data = kmem_zalloc(sizeof (s10_zone_data_t), KM_SLEEP);
540 }
541 
542 #if defined(_LP64)
543 static void
544 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
545 {
546 	bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
547 	dst->e_type =		src->e_type;
548 	dst->e_machine =	src->e_machine;
549 	dst->e_version =	src->e_version;
550 	dst->e_entry =		src->e_entry;
551 	dst->e_phoff =		src->e_phoff;
552 	dst->e_shoff =		src->e_shoff;
553 	dst->e_flags =		src->e_flags;
554 	dst->e_ehsize =		src->e_ehsize;
555 	dst->e_phentsize =	src->e_phentsize;
556 	dst->e_phnum =		src->e_phnum;
557 	dst->e_shentsize =	src->e_shentsize;
558 	dst->e_shnum =		src->e_shnum;
559 	dst->e_shstrndx =	src->e_shstrndx;
560 }
561 #endif /* _LP64 */
562 
563 int
564 s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
565 	int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
566 	int brand_action)
567 {
568 	vnode_t		*nvp;
569 	Ehdr		ehdr;
570 	Addr		uphdr_vaddr;
571 	intptr_t	voffset;
572 	int		interp;
573 	int		i, err;
574 	struct execenv	env;
575 	struct user	*up = PTOU(curproc);
576 	s10_proc_data_t	*spd;
577 	s10_elf_data_t	sed, *sedp;
578 	char		*linker;
579 	uintptr_t	lddata; /* lddata of executable's linker */
580 
581 	ASSERT(curproc->p_brand == &s10_brand);
582 	ASSERT(curproc->p_brand_data != NULL);
583 
584 	spd = (s10_proc_data_t *)curproc->p_brand_data;
585 	sedp = &spd->spd_elf_data;
586 
587 	args->brandname = S10_BRANDNAME;
588 
589 	/*
590 	 * We will exec the brand library and then map in the target
591 	 * application and (optionally) the brand's default linker.
592 	 */
593 	if (args->to_model == DATAMODEL_NATIVE) {
594 		args->emulator = S10_LIB;
595 		linker = S10_LINKER;
596 #if defined(_LP64)
597 	} else {
598 		args->emulator = S10_LIB32;
599 		linker = S10_LINKER32;
600 #endif /* _LP64 */
601 	}
602 
603 	if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP,
604 	    &nvp)) != 0) {
605 		uprintf("%s: not found.", args->emulator);
606 		return (err);
607 	}
608 
609 	if (args->to_model == DATAMODEL_NATIVE) {
610 		err = elfexec(nvp, uap, args, idatap, level + 1, execsz,
611 		    setid, exec_file, cred, brand_action);
612 #if defined(_LP64)
613 	} else {
614 		err = elf32exec(nvp, uap, args, idatap, level + 1, execsz,
615 		    setid, exec_file, cred, brand_action);
616 #endif /* _LP64 */
617 	}
618 	VN_RELE(nvp);
619 	if (err != 0)
620 		return (err);
621 
622 	/*
623 	 * The u_auxv vectors are set up by elfexec to point to the brand
624 	 * emulation library and linker.  Save these so they can be copied to
625 	 * the specific brand aux vectors.
626 	 */
627 	bzero(&sed, sizeof (sed));
628 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
629 		switch (up->u_auxv[i].a_type) {
630 		case AT_SUN_LDDATA:
631 			sed.sed_lddata = up->u_auxv[i].a_un.a_val;
632 			break;
633 		case AT_BASE:
634 			sed.sed_base = up->u_auxv[i].a_un.a_val;
635 			break;
636 		case AT_ENTRY:
637 			sed.sed_entry = up->u_auxv[i].a_un.a_val;
638 			break;
639 		case AT_PHDR:
640 			sed.sed_phdr = up->u_auxv[i].a_un.a_val;
641 			break;
642 		case AT_PHENT:
643 			sed.sed_phent = up->u_auxv[i].a_un.a_val;
644 			break;
645 		case AT_PHNUM:
646 			sed.sed_phnum = up->u_auxv[i].a_un.a_val;
647 			break;
648 		default:
649 			break;
650 		}
651 	}
652 	/* Make sure the emulator has an entry point */
653 	ASSERT(sed.sed_entry != NULL);
654 	ASSERT(sed.sed_phdr != NULL);
655 
656 	bzero(&env, sizeof (env));
657 	if (args->to_model == DATAMODEL_NATIVE) {
658 		err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset,
659 		    exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase,
660 		    &env.ex_brksize, NULL);
661 #if defined(_LP64)
662 	} else {
663 		Elf32_Ehdr ehdr32;
664 		Elf32_Addr uphdr_vaddr32;
665 		err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
666 		    &voffset, exec_file, &interp, &env.ex_bssbase,
667 		    &env.ex_brkbase, &env.ex_brksize, NULL);
668 		Ehdr32to64(&ehdr32, &ehdr);
669 		if (uphdr_vaddr32 == (Elf32_Addr)-1)
670 			uphdr_vaddr = (Addr)-1;
671 		else
672 			uphdr_vaddr = uphdr_vaddr32;
673 #endif /* _LP64 */
674 	}
675 	if (err != 0)
676 		return (err);
677 
678 	/*
679 	 * Save off the important properties of the executable. The brand
680 	 * library will ask us for this data later, when it is initializing
681 	 * and getting ready to transfer control to the brand application.
682 	 */
683 	if (uphdr_vaddr == (Addr)-1)
684 		sedp->sed_phdr = voffset + ehdr.e_phoff;
685 	else
686 		sedp->sed_phdr = voffset + uphdr_vaddr;
687 	sedp->sed_entry = voffset + ehdr.e_entry;
688 	sedp->sed_phent = ehdr.e_phentsize;
689 	sedp->sed_phnum = ehdr.e_phnum;
690 
691 	if (interp) {
692 		if (ehdr.e_type == ET_DYN) {
693 			/*
694 			 * This is a shared object executable, so we need to
695 			 * pick a reasonable place to put the heap. Just don't
696 			 * use the first page.
697 			 */
698 			env.ex_brkbase = (caddr_t)PAGESIZE;
699 			env.ex_bssbase = (caddr_t)PAGESIZE;
700 		}
701 
702 		/*
703 		 * If the program needs an interpreter (most do), map it in and
704 		 * store relevant information about it in the aux vector, where
705 		 * the brand library can find it.
706 		 */
707 		if ((err = lookupname(linker, UIO_SYSSPACE,
708 		    FOLLOW, NULLVPP, &nvp)) != 0) {
709 			uprintf("%s: not found.", S10_LINKER);
710 			return (err);
711 		}
712 		if (args->to_model == DATAMODEL_NATIVE) {
713 			err = mapexec_brand(nvp, args, &ehdr,
714 			    &uphdr_vaddr, &voffset, exec_file, &interp,
715 			    NULL, NULL, NULL, &lddata);
716 #if defined(_LP64)
717 		} else {
718 			Elf32_Ehdr ehdr32;
719 			Elf32_Addr uphdr_vaddr32;
720 			err = mapexec32_brand(nvp, args, &ehdr32,
721 			    &uphdr_vaddr32, &voffset, exec_file, &interp,
722 			    NULL, NULL, NULL, &lddata);
723 			Ehdr32to64(&ehdr32, &ehdr);
724 			if (uphdr_vaddr32 == (Elf32_Addr)-1)
725 				uphdr_vaddr = (Addr)-1;
726 			else
727 				uphdr_vaddr = uphdr_vaddr32;
728 #endif /* _LP64 */
729 		}
730 		VN_RELE(nvp);
731 		if (err != 0)
732 			return (err);
733 
734 		/*
735 		 * Now that we know the base address of the brand's linker,
736 		 * place it in the aux vector.
737 		 */
738 		sedp->sed_base = voffset;
739 		sedp->sed_ldentry = voffset + ehdr.e_entry;
740 		sedp->sed_lddata = voffset + lddata;
741 	} else {
742 		/*
743 		 * This program has no interpreter. The brand library will
744 		 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
745 		 * so in this case, put the entry point of the main executable
746 		 * there.
747 		 */
748 		if (ehdr.e_type == ET_EXEC) {
749 			/*
750 			 * An executable with no interpreter, this must be a
751 			 * statically linked executable, which means we loaded
752 			 * it at the address specified in the elf header, in
753 			 * which case the e_entry field of the elf header is an
754 			 * absolute address.
755 			 */
756 			sedp->sed_ldentry = ehdr.e_entry;
757 			sedp->sed_entry = ehdr.e_entry;
758 			sedp->sed_lddata = NULL;
759 			sedp->sed_base = NULL;
760 		} else {
761 			/*
762 			 * A shared object with no interpreter, we use the
763 			 * calculated address from above.
764 			 */
765 			sedp->sed_ldentry = sedp->sed_entry;
766 			sedp->sed_entry = NULL;
767 			sedp->sed_phdr = NULL;
768 			sedp->sed_phent = NULL;
769 			sedp->sed_phnum = NULL;
770 			sedp->sed_lddata = NULL;
771 			sedp->sed_base = voffset;
772 
773 			if (ehdr.e_type == ET_DYN) {
774 				/*
775 				 * Delay setting the brkbase until the first
776 				 * call to brk(); see elfexec() for details.
777 				 */
778 				env.ex_bssbase = (caddr_t)0;
779 				env.ex_brkbase = (caddr_t)0;
780 				env.ex_brksize = 0;
781 			}
782 		}
783 	}
784 
785 	env.ex_magic = elfmagic;
786 	env.ex_vp = vp;
787 	setexecenv(&env);
788 
789 	/*
790 	 * It's time to manipulate the process aux vectors.  First
791 	 * we need to update the AT_SUN_AUXFLAGS aux vector to set
792 	 * the AF_SUN_NOPLM flag.
793 	 */
794 	if (args->to_model == DATAMODEL_NATIVE) {
795 		auxv_t		auxflags_auxv;
796 
797 		if (copyin(args->auxp_auxflags, &auxflags_auxv,
798 		    sizeof (auxflags_auxv)) != 0)
799 			return (EFAULT);
800 
801 		ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
802 		auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
803 		if (copyout(&auxflags_auxv, args->auxp_auxflags,
804 		    sizeof (auxflags_auxv)) != 0)
805 			return (EFAULT);
806 #if defined(_LP64)
807 	} else {
808 		auxv32_t	auxflags_auxv32;
809 
810 		if (copyin(args->auxp_auxflags, &auxflags_auxv32,
811 		    sizeof (auxflags_auxv32)) != 0)
812 			return (EFAULT);
813 
814 		ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
815 		auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
816 		if (copyout(&auxflags_auxv32, args->auxp_auxflags,
817 		    sizeof (auxflags_auxv32)) != 0)
818 			return (EFAULT);
819 #endif /* _LP64 */
820 	}
821 
822 	/* Second, copy out the brand specific aux vectors. */
823 	if (args->to_model == DATAMODEL_NATIVE) {
824 		auxv_t s10_auxv[] = {
825 		    { AT_SUN_BRAND_AUX1, 0 },
826 		    { AT_SUN_BRAND_AUX2, 0 },
827 		    { AT_SUN_BRAND_AUX3, 0 }
828 		};
829 
830 		ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA);
831 		s10_auxv[0].a_un.a_val = sed.sed_lddata;
832 
833 		if (copyout(&s10_auxv, args->auxp_brand,
834 		    sizeof (s10_auxv)) != 0)
835 			return (EFAULT);
836 #if defined(_LP64)
837 	} else {
838 		auxv32_t s10_auxv32[] = {
839 		    { AT_SUN_BRAND_AUX1, 0 },
840 		    { AT_SUN_BRAND_AUX2, 0 },
841 		    { AT_SUN_BRAND_AUX3, 0 }
842 		};
843 
844 		ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA);
845 		s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
846 		if (copyout(&s10_auxv32, args->auxp_brand,
847 		    sizeof (s10_auxv32)) != 0)
848 			return (EFAULT);
849 #endif /* _LP64 */
850 	}
851 
852 	/*
853 	 * Third, the the /proc aux vectors set up by elfexec() point to brand
854 	 * emulation library and it's linker.  Copy these to the /proc brand
855 	 * specific aux vector, and update the regular /proc aux vectors to
856 	 * point to the executable (and it's linker).  This will enable
857 	 * debuggers to access the executable via the usual /proc or elf notes
858 	 * aux vectors.
859 	 *
860 	 * The brand emulation library's linker will get it's aux vectors off
861 	 * the stack, and then update the stack with the executable's aux
862 	 * vectors before jumping to the executable's linker.
863 	 *
864 	 * Debugging the brand emulation library must be done from
865 	 * the global zone, where the librtld_db module knows how to fetch the
866 	 * brand specific aux vectors to access the brand emulation libraries
867 	 * linker.
868 	 */
869 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
870 		ulong_t val;
871 
872 		switch (up->u_auxv[i].a_type) {
873 		case AT_SUN_BRAND_S10_LDDATA:
874 			up->u_auxv[i].a_un.a_val = sed.sed_lddata;
875 			continue;
876 		case AT_BASE:
877 			val = sedp->sed_base;
878 			break;
879 		case AT_ENTRY:
880 			val = sedp->sed_entry;
881 			break;
882 		case AT_PHDR:
883 			val = sedp->sed_phdr;
884 			break;
885 		case AT_PHENT:
886 			val = sedp->sed_phent;
887 			break;
888 		case AT_PHNUM:
889 			val = sedp->sed_phnum;
890 			break;
891 		case AT_SUN_LDDATA:
892 			val = sedp->sed_lddata;
893 			break;
894 		default:
895 			continue;
896 		}
897 
898 		up->u_auxv[i].a_un.a_val = val;
899 		if (val == NULL) {
900 			/* Hide the entry for static binaries */
901 			up->u_auxv[i].a_type = AT_IGNORE;
902 		}
903 	}
904 
905 	/*
906 	 * The last thing we do here is clear spd->spd_handler.  This is
907 	 * important because if we're already a branded process and if this
908 	 * exec succeeds, there is a window between when the exec() first
909 	 * returns to the userland of the new process and when our brand
910 	 * library get's initialized, during which we don't want system
911 	 * calls to be re-directed to our brand library since it hasn't
912 	 * been initialized yet.
913 	 */
914 	spd->spd_handler = NULL;
915 
916 	return (0);
917 }
918 
919 
920 int
921 _init(void)
922 {
923 	int err;
924 
925 	/*
926 	 * Set up the table indicating which system calls we want to
927 	 * interpose on.  We should probably build this automatically from
928 	 * a list of system calls that is shared with the user-space
929 	 * library.
930 	 */
931 	s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP);
932 	s10_emulation_table[SYS_exec] = 1;			/*  11 */
933 	s10_emulation_table[SYS_ioctl] = 1;			/*  54 */
934 	s10_emulation_table[SYS_execve] = 1;			/*  59 */
935 	s10_emulation_table[SYS_acctctl] = 1;			/*  71 */
936 	s10_emulation_table[S10_SYS_issetugid] = 1;		/*  75 */
937 	s10_emulation_table[SYS_uname] = 1;			/* 135 */
938 	s10_emulation_table[SYS_systeminfo] = 1;		/* 139 */
939 #ifdef	__amd64
940 	s10_emulation_table[SYS_lwp_create] = 1;		/* 159 */
941 	s10_emulation_table[SYS_lwp_private] = 1;		/* 166 */
942 #endif	/* __amd64 */
943 	s10_emulation_table[SYS_pwrite] = 1;			/* 174 */
944 	s10_emulation_table[SYS_auditsys] = 1;			/* 186 */
945 	s10_emulation_table[SYS_sigqueue] = 1;			/* 190 */
946 	s10_emulation_table[SYS_lwp_mutex_timedlock] = 1;	/* 210 */
947 	s10_emulation_table[SYS_pwrite64] = 1;			/* 223 */
948 	s10_emulation_table[SYS_zone] = 1;			/* 227 */
949 	s10_emulation_table[SYS_lwp_mutex_trylock] = 1;		/* 251 */
950 
951 	err = mod_install(&modlinkage);
952 	if (err) {
953 		cmn_err(CE_WARN, "Couldn't install brand module");
954 		kmem_free(s10_emulation_table, NSYSCALL);
955 	}
956 
957 	return (err);
958 }
959 
960 int
961 _info(struct modinfo *modinfop)
962 {
963 	return (mod_info(&modlinkage, modinfop));
964 }
965 
966 int
967 _fini(void)
968 {
969 	int err;
970 
971 	/*
972 	 * If there are any zones using this brand, we can't allow it to be
973 	 * unloaded.
974 	 */
975 	if (brand_zone_count(&s10_brand))
976 		return (EBUSY);
977 
978 	kmem_free(s10_emulation_table, NSYSCALL);
979 	s10_emulation_table = NULL;
980 
981 	err = mod_remove(&modlinkage);
982 	if (err)
983 		cmn_err(CE_WARN, "Couldn't unload s10 brand module");
984 
985 	return (err);
986 }
987