1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/errno.h>
27 #include <sys/exec.h>
28 #include <sys/kmem.h>
29 #include <sys/modctl.h>
30 #include <sys/model.h>
31 #include <sys/proc.h>
32 #include <sys/syscall.h>
33 #include <sys/systm.h>
34 #include <sys/thread.h>
35 #include <sys/cmn_err.h>
36 #include <sys/archsystm.h>
37 #include <sys/pathname.h>
38 #include <sys/sunddi.h>
39 
40 #include <sys/machbrand.h>
41 #include <sys/brand.h>
42 #include "s10_brand.h"
43 
44 char *s10_emulation_table = NULL;
45 
46 void	s10_init_brand_data(zone_t *);
47 void	s10_free_brand_data(zone_t *);
48 void	s10_setbrand(proc_t *);
49 int	s10_getattr(zone_t *, int, void *, size_t *);
50 int	s10_setattr(zone_t *, int, void *, size_t);
51 int	s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
52 		uintptr_t, uintptr_t, uintptr_t);
53 void	s10_copy_procdata(proc_t *, proc_t *);
54 void	s10_proc_exit(struct proc *, klwp_t *);
55 void	s10_exec();
56 int	s10_initlwp(klwp_t *);
57 void	s10_forklwp(klwp_t *, klwp_t *);
58 void	s10_freelwp(klwp_t *);
59 void	s10_lwpexit(klwp_t *);
60 int	s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
61 	long *, int, caddr_t, cred_t *, int);
62 
63 /* s10 brand */
64 struct brand_ops s10_brops = {
65 	s10_init_brand_data,
66 	s10_free_brand_data,
67 	s10_brandsys,
68 	s10_setbrand,
69 	s10_getattr,
70 	s10_setattr,
71 	s10_copy_procdata,
72 	s10_proc_exit,
73 	s10_exec,
74 	lwp_setrval,
75 	s10_initlwp,
76 	s10_forklwp,
77 	s10_freelwp,
78 	s10_lwpexit,
79 	s10_elfexec
80 };
81 
82 #ifdef	sparc
83 
84 struct brand_mach_ops s10_mops = {
85 	s10_brand_syscall_callback,
86 	s10_brand_syscall32_callback
87 };
88 
89 #else	/* sparc */
90 
91 #ifdef	__amd64
92 
93 struct brand_mach_ops s10_mops = {
94 	s10_brand_sysenter_callback,
95 	NULL,
96 	s10_brand_int91_callback,
97 	s10_brand_syscall_callback,
98 	s10_brand_syscall32_callback,
99 	NULL
100 };
101 
102 #else	/* ! __amd64 */
103 
104 struct brand_mach_ops s10_mops = {
105 	s10_brand_sysenter_callback,
106 	NULL,
107 	NULL,
108 	s10_brand_syscall_callback,
109 	NULL,
110 	NULL
111 };
112 #endif	/* __amd64 */
113 
114 #endif	/* _sparc */
115 
116 struct brand	s10_brand = {
117 	BRAND_VER_1,
118 	"solaris10",
119 	&s10_brops,
120 	&s10_mops
121 };
122 
123 static struct modlbrand modlbrand = {
124 	&mod_brandops,		/* type of module */
125 	"Solaris 10 Brand",	/* description of module */
126 	&s10_brand		/* driver ops */
127 };
128 
129 static struct modlinkage modlinkage = {
130 	MODREV_1, (void *)&modlbrand, NULL
131 };
132 
133 void
134 s10_setbrand(proc_t *p)
135 {
136 	ASSERT(p->p_brand == &s10_brand);
137 	ASSERT(p->p_brand_data == NULL);
138 
139 	/*
140 	 * We should only be called from exec(), when we know the process
141 	 * is single-threaded.
142 	 */
143 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
144 
145 	p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP);
146 	(void) s10_initlwp(p->p_tlist->t_lwp);
147 }
148 
149 int
150 s10_get_zone_emul_version(zone_t *zone)
151 {
152 	return (((s10_zone_data_t *)
153 	    zone->zone_brand_data)->s10zd_emul_version);
154 }
155 
156 int
157 s10_get_emul_version()
158 {
159 	return (s10_get_zone_emul_version(curzone));
160 }
161 
162 void
163 s10_set_emul_version(zone_t *zone, int vers)
164 {
165 	s10_zone_data_t *s10zd = (s10_zone_data_t *)zone->zone_brand_data;
166 	s10zd->s10zd_emul_version = vers;
167 }
168 
169 /*ARGSUSED*/
170 int
171 s10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
172 {
173 	int num;
174 
175 	ASSERT(zone->zone_brand == &s10_brand);
176 	if (attr == S10_EMUL_VERSION_NUM) {
177 		if (*bufsize < sizeof (int))
178 			return (ERANGE);
179 		num = s10_get_emul_version();
180 		if (copyout(&num, buf, sizeof (int)) != 0)
181 			return (EFAULT);
182 		*bufsize = sizeof (int);
183 		return (0);
184 	}
185 
186 	return (EINVAL);
187 }
188 
189 int
190 s10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
191 {
192 	int num;
193 
194 	ASSERT(zone->zone_brand == &s10_brand);
195 	if (attr == S10_EMUL_VERSION_NUM) {
196 		if (bufsize > sizeof (int))
197 			return (ERANGE);
198 		if (copyin(buf, &num, sizeof (num)) != 0)
199 			return (EFAULT);
200 		s10_set_emul_version(zone, num);
201 		return (0);
202 	}
203 
204 	return (EINVAL);
205 }
206 
207 #ifdef	__amd64
208 /*
209  * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's
210  * libc expects %fs to be nonzero.  This causes some committed
211  * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several
212  * libraries, including libdoor.  This function sets the specified LWP's %fs
213  * register to the legacy S10 selector value (LWPFS_SEL).
214  *
215  * The best solution to the aforementioned problem is backporting CRs
216  * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes
217  * would accept zero for %fs.  Backporting the CRs is a requirement for running
218  * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is
219  * nonzero.  Such behavior breaks 64-bit processes because Xen has to fetch the
220  * FS segments' base addresses from the LWPs' GDTs, which are only capable of
221  * 32-bit addressing.
222  */
223 /*ARGSUSED*/
224 static void
225 s10_amd64_correct_fsreg(klwp_t *l)
226 {
227 	if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) {
228 		kpreempt_disable();
229 		l->lwp_pcb.pcb_fs = LWPFS_SEL;
230 		l->lwp_pcb.pcb_rupdate = 1;
231 		lwptot(l)->t_post_sys = 1;	/* Guarantee update_sregs() */
232 		kpreempt_enable();
233 	}
234 }
235 #endif	/* __amd64 */
236 
237 int
238 s10_native()
239 {
240 	struct user	*up = PTOU(curproc);
241 	char		*args_new, *comm_new, *p;
242 	int		len;
243 
244 	len = sizeof (S10_NATIVE_LINKER32 " ") - 1;
245 
246 	/*
247 	 * Make sure that the process' interpreter is the native dynamic linker.
248 	 * Convention dictates that native processes executing within solaris10-
249 	 * branded zones are interpreted by the native dynamic linker (the
250 	 * process and its arguments are specified as arguments to the dynamic
251 	 * linker).  If this convention is violated (i.e.,
252 	 * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be
253 	 * native), then do nothing and silently indicate success.
254 	 */
255 	if (strcmp(up->u_comm, S10_LINKER_NAME) != 0)
256 		return (0);
257 	if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0)
258 		len += 3;		/* to account for "/64" in the path */
259 	else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0)
260 		return (0);
261 
262 	args_new = strdup(&up->u_psargs[len]);
263 	if ((p = strchr(args_new, ' ')) != NULL)
264 		*p = '\0';
265 	if ((comm_new = strrchr(args_new, '/')) != NULL)
266 		comm_new = strdup(comm_new + 1);
267 	else
268 		comm_new = strdup(args_new);
269 	if (p != NULL)
270 		*p = ' ';
271 
272 	if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) {
273 		mutex_enter(&curproc->p_lock);
274 		(void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1);
275 		(void) strlcpy(up->u_psargs, args_new, PSARGSZ);
276 		mutex_exit(&curproc->p_lock);
277 	}
278 
279 	strfree(args_new);
280 	strfree(comm_new);
281 	return (0);
282 }
283 
284 /*
285  * Get the address of the user-space system call handler from the user
286  * process and attach it to the proc structure.
287  */
288 /*ARGSUSED*/
289 int
290 s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
291     uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
292 {
293 	s10_proc_data_t	*spd;
294 	s10_brand_reg_t	reg;
295 	proc_t		*p = curproc;
296 	int		err;
297 
298 	*rval = 0;
299 
300 	/*
301 	 * B_EXEC_BRAND is redundant
302 	 * since the kernel assumes a native process doing an exec
303 	 * in a branded zone is going to run a branded processes.
304 	 * hence we don't support this operation.
305 	 */
306 	if (cmd == B_EXEC_BRAND)
307 		return (ENOSYS);
308 
309 	if (cmd == B_S10_NATIVE)
310 		return (s10_native());
311 
312 	/* For all other operations this must be a branded process. */
313 	if (p->p_brand == &native_brand)
314 		return (ENOSYS);
315 
316 	ASSERT(p->p_brand == &s10_brand);
317 	ASSERT(p->p_brand_data != NULL);
318 
319 	spd = (s10_proc_data_t *)p->p_brand_data;
320 
321 	switch (cmd) {
322 	case B_EXEC_NATIVE:
323 		err = exec_common(
324 		    (char *)arg1, (const char **)arg2, (const char **)arg3,
325 		    EBA_NATIVE);
326 		return (err);
327 
328 	case B_REGISTER:
329 		if (p->p_model == DATAMODEL_NATIVE) {
330 			if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
331 				return (EFAULT);
332 #if defined(_LP64)
333 		} else {
334 			s10_brand_reg32_t reg32;
335 
336 			if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
337 				return (EFAULT);
338 			reg.sbr_version = reg32.sbr_version;
339 			reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
340 #endif /* _LP64 */
341 		}
342 
343 		if (reg.sbr_version != S10_VERSION)
344 			return (ENOTSUP);
345 		spd->spd_handler = reg.sbr_handler;
346 		return (0);
347 
348 	case B_ELFDATA:
349 		if (p->p_model == DATAMODEL_NATIVE) {
350 			if (copyout(&spd->spd_elf_data, (void *)arg1,
351 			    sizeof (s10_elf_data_t)) != 0)
352 				return (EFAULT);
353 #if defined(_LP64)
354 		} else {
355 			s10_elf_data32_t sed32;
356 
357 			sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
358 			sed32.sed_phent = spd->spd_elf_data.sed_phent;
359 			sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
360 			sed32.sed_entry = spd->spd_elf_data.sed_entry;
361 			sed32.sed_base = spd->spd_elf_data.sed_base;
362 			sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
363 			sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
364 			if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0)
365 				return (EFAULT);
366 #endif /* _LP64 */
367 		}
368 		return (0);
369 
370 	case B_S10_PIDINFO:
371 		/*
372 		 * The s10 brand needs to be able to get the pid of the
373 		 * current process and the pid of the zone's init, and it
374 		 * needs to do this on every process startup.  Early in
375 		 * brand startup, we can't call getpid() because calls to
376 		 * getpid() represent a magical signal to some old-skool
377 		 * debuggers.  By merging all of this into one call, we
378 		 * make this quite a bit cheaper and easier to handle in
379 		 * the brand module.
380 		 */
381 		if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0)
382 			return (EFAULT);
383 		if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2,
384 		    sizeof (pid_t)) != 0)
385 			return (EFAULT);
386 		return (0);
387 
388 	case B_S10_TRUSS_POINT:
389 		/*
390 		 * This subcommand exists so that we can see truss output
391 		 * from interposed system calls that return without first
392 		 * calling any other system call, meaning they would be
393 		 * invisible to truss(1).
394 		 *
395 		 * If the second argument is set non-zero, set errno to that
396 		 * value as well.
397 		 *
398 		 * Arguments are:
399 		 *
400 		 *    arg1: syscall number
401 		 *    arg2: errno
402 		 */
403 		return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
404 
405 #ifdef	__amd64
406 	case B_S10_FSREGCORRECTION:
407 		/*
408 		 * This subcommand exists so that the SYS_lwp_private and
409 		 * SYS_lwp_create syscalls can manually set the current thread's
410 		 * %fs register to the legacy S10 selector value for 64-bit x86
411 		 * processes.
412 		 */
413 		s10_amd64_correct_fsreg(ttolwp(curthread));
414 		return (0);
415 #endif	/* __amd64 */
416 	}
417 
418 	return (EINVAL);
419 }
420 
421 /*
422  * Copy the per-process brand data from a parent proc to a child.
423  */
424 void
425 s10_copy_procdata(proc_t *child, proc_t *parent)
426 {
427 	s10_proc_data_t	*spd;
428 
429 	ASSERT(parent->p_brand == &s10_brand);
430 	ASSERT(child->p_brand == &s10_brand);
431 	ASSERT(parent->p_brand_data != NULL);
432 	ASSERT(child->p_brand_data == NULL);
433 
434 	/* Just duplicate all the proc data of the parent for the child */
435 	spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP);
436 	bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t));
437 	child->p_brand_data = spd;
438 }
439 
440 /*ARGSUSED*/
441 void
442 s10_proc_exit(struct proc *p, klwp_t *l)
443 {
444 	ASSERT(p->p_brand == &s10_brand);
445 	ASSERT(p->p_brand_data != NULL);
446 
447 	/*
448 	 * We should only be called from proc_exit(), when we know that
449 	 * process is single-threaded.
450 	 */
451 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
452 
453 	/* upon exit, free our lwp brand data */
454 	(void) s10_freelwp(ttolwp(curthread));
455 
456 	/* upon exit, free our proc brand data */
457 	kmem_free(p->p_brand_data, sizeof (s10_proc_data_t));
458 	p->p_brand_data = NULL;
459 }
460 
461 void
462 s10_exec()
463 {
464 	s10_proc_data_t	*spd = curproc->p_brand_data;
465 
466 	ASSERT(curproc->p_brand == &s10_brand);
467 	ASSERT(curproc->p_brand_data != NULL);
468 	ASSERT(ttolwp(curthread)->lwp_brand != NULL);
469 
470 	/*
471 	 * We should only be called from exec(), when we know the process
472 	 * is single-threaded.
473 	 */
474 	ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
475 
476 	/* Upon exec, reset our lwp brand data. */
477 	(void) s10_freelwp(ttolwp(curthread));
478 	(void) s10_initlwp(ttolwp(curthread));
479 
480 	/*
481 	 * Upon exec, reset all the proc brand data, except for the elf
482 	 * data associated with the executable we are exec'ing.
483 	 */
484 	spd->spd_handler = NULL;
485 }
486 
487 /*ARGSUSED*/
488 int
489 s10_initlwp(klwp_t *l)
490 {
491 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
492 	ASSERT(l->lwp_procp->p_brand_data != NULL);
493 	ASSERT(l->lwp_brand == NULL);
494 	l->lwp_brand = (void *)-1;
495 	return (0);
496 }
497 
498 /*ARGSUSED*/
499 void
500 s10_forklwp(klwp_t *p, klwp_t *c)
501 {
502 	ASSERT(p->lwp_procp->p_brand == &s10_brand);
503 	ASSERT(c->lwp_procp->p_brand == &s10_brand);
504 
505 	ASSERT(p->lwp_procp->p_brand_data != NULL);
506 	ASSERT(c->lwp_procp->p_brand_data != NULL);
507 
508 	/* Both LWPs have already had been initialized via s10_initlwp() */
509 	ASSERT(p->lwp_brand != NULL);
510 	ASSERT(c->lwp_brand != NULL);
511 
512 #ifdef	__amd64
513 	/*
514 	 * Only correct the child's %fs register if the parent's %fs register
515 	 * is LWPFS_SEL.  If the parent's %fs register is zero, then the Solaris
516 	 * 10 environment that we're emulating uses a version of libc that
517 	 * works when %fs is zero (i.e., it contains backports of CRs 6467491
518 	 * and 6501650).
519 	 */
520 	if (p->lwp_pcb.pcb_fs == LWPFS_SEL)
521 		s10_amd64_correct_fsreg(c);
522 #endif	/* __amd64 */
523 }
524 
525 /*ARGSUSED*/
526 void
527 s10_freelwp(klwp_t *l)
528 {
529 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
530 	ASSERT(l->lwp_procp->p_brand_data != NULL);
531 	ASSERT(l->lwp_brand != NULL);
532 	l->lwp_brand = NULL;
533 }
534 
535 /*ARGSUSED*/
536 void
537 s10_lwpexit(klwp_t *l)
538 {
539 	proc_t	*p = l->lwp_procp;
540 
541 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
542 	ASSERT(l->lwp_procp->p_brand_data != NULL);
543 	ASSERT(l->lwp_brand != NULL);
544 
545 	/*
546 	 * We should never be called for the last thread in a process.
547 	 * (That case is handled by s10_proc_exit().)  There for this lwp
548 	 * must be exiting from a multi-threaded process.
549 	 */
550 	ASSERT(p->p_tlist != p->p_tlist->t_forw);
551 
552 	l->lwp_brand = NULL;
553 }
554 
555 void
556 s10_free_brand_data(zone_t *zone)
557 {
558 	kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t));
559 }
560 
561 void
562 s10_init_brand_data(zone_t *zone)
563 {
564 	s10_zone_data_t *data;
565 	ASSERT(zone->zone_brand == &s10_brand);
566 	ASSERT(zone->zone_brand_data == NULL);
567 	data = (s10_zone_data_t *)kmem_zalloc(sizeof (s10_zone_data_t),
568 	    KM_SLEEP);
569 	/*
570 	 * Initialize the default s10zd_emul_version to S10_EMUL_UNDEF.
571 	 * This can be changed by a call to setattr() during zone boot.
572 	 */
573 	data->s10zd_emul_version = S10_EMUL_UNDEF;
574 	zone->zone_brand_data = data;
575 }
576 
577 #if defined(_LP64)
578 static void
579 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
580 {
581 	bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
582 	dst->e_type =		src->e_type;
583 	dst->e_machine =	src->e_machine;
584 	dst->e_version =	src->e_version;
585 	dst->e_entry =		src->e_entry;
586 	dst->e_phoff =		src->e_phoff;
587 	dst->e_shoff =		src->e_shoff;
588 	dst->e_flags =		src->e_flags;
589 	dst->e_ehsize =		src->e_ehsize;
590 	dst->e_phentsize =	src->e_phentsize;
591 	dst->e_phnum =		src->e_phnum;
592 	dst->e_shentsize =	src->e_shentsize;
593 	dst->e_shnum =		src->e_shnum;
594 	dst->e_shstrndx =	src->e_shstrndx;
595 }
596 #endif /* _LP64 */
597 
598 int
599 s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
600 	int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
601 	int brand_action)
602 {
603 	vnode_t		*nvp;
604 	Ehdr		ehdr;
605 	Addr		uphdr_vaddr;
606 	intptr_t	voffset;
607 	int		interp;
608 	int		i, err;
609 	struct execenv	env;
610 	struct user	*up = PTOU(curproc);
611 	s10_proc_data_t	*spd;
612 	s10_elf_data_t	sed, *sedp;
613 	char		*linker;
614 	uintptr_t	lddata; /* lddata of executable's linker */
615 
616 	ASSERT(curproc->p_brand == &s10_brand);
617 	ASSERT(curproc->p_brand_data != NULL);
618 
619 	spd = (s10_proc_data_t *)curproc->p_brand_data;
620 	sedp = &spd->spd_elf_data;
621 
622 	args->brandname = S10_BRANDNAME;
623 
624 	/*
625 	 * We will exec the brand library and then map in the target
626 	 * application and (optionally) the brand's default linker.
627 	 */
628 	if (args->to_model == DATAMODEL_NATIVE) {
629 		args->emulator = S10_LIB;
630 		linker = S10_LINKER;
631 #if defined(_LP64)
632 	} else {
633 		args->emulator = S10_LIB32;
634 		linker = S10_LINKER32;
635 #endif /* _LP64 */
636 	}
637 
638 	if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP,
639 	    &nvp)) != 0) {
640 		uprintf("%s: not found.", args->emulator);
641 		return (err);
642 	}
643 
644 	if (args->to_model == DATAMODEL_NATIVE) {
645 		err = elfexec(nvp, uap, args, idatap, level + 1, execsz,
646 		    setid, exec_file, cred, brand_action);
647 #if defined(_LP64)
648 	} else {
649 		err = elf32exec(nvp, uap, args, idatap, level + 1, execsz,
650 		    setid, exec_file, cred, brand_action);
651 #endif /* _LP64 */
652 	}
653 	VN_RELE(nvp);
654 	if (err != 0)
655 		return (err);
656 
657 	/*
658 	 * The u_auxv vectors are set up by elfexec to point to the brand
659 	 * emulation library and linker.  Save these so they can be copied to
660 	 * the specific brand aux vectors.
661 	 */
662 	bzero(&sed, sizeof (sed));
663 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
664 		switch (up->u_auxv[i].a_type) {
665 		case AT_SUN_LDDATA:
666 			sed.sed_lddata = up->u_auxv[i].a_un.a_val;
667 			break;
668 		case AT_BASE:
669 			sed.sed_base = up->u_auxv[i].a_un.a_val;
670 			break;
671 		case AT_ENTRY:
672 			sed.sed_entry = up->u_auxv[i].a_un.a_val;
673 			break;
674 		case AT_PHDR:
675 			sed.sed_phdr = up->u_auxv[i].a_un.a_val;
676 			break;
677 		case AT_PHENT:
678 			sed.sed_phent = up->u_auxv[i].a_un.a_val;
679 			break;
680 		case AT_PHNUM:
681 			sed.sed_phnum = up->u_auxv[i].a_un.a_val;
682 			break;
683 		default:
684 			break;
685 		}
686 	}
687 	/* Make sure the emulator has an entry point */
688 	ASSERT(sed.sed_entry != NULL);
689 	ASSERT(sed.sed_phdr != NULL);
690 
691 	bzero(&env, sizeof (env));
692 	if (args->to_model == DATAMODEL_NATIVE) {
693 		err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset,
694 		    exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase,
695 		    &env.ex_brksize, NULL);
696 #if defined(_LP64)
697 	} else {
698 		Elf32_Ehdr ehdr32;
699 		Elf32_Addr uphdr_vaddr32;
700 		err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
701 		    &voffset, exec_file, &interp, &env.ex_bssbase,
702 		    &env.ex_brkbase, &env.ex_brksize, NULL);
703 		Ehdr32to64(&ehdr32, &ehdr);
704 		if (uphdr_vaddr32 == (Elf32_Addr)-1)
705 			uphdr_vaddr = (Addr)-1;
706 		else
707 			uphdr_vaddr = uphdr_vaddr32;
708 #endif /* _LP64 */
709 	}
710 	if (err != 0)
711 		return (err);
712 
713 	/*
714 	 * Save off the important properties of the executable. The brand
715 	 * library will ask us for this data later, when it is initializing
716 	 * and getting ready to transfer control to the brand application.
717 	 */
718 	if (uphdr_vaddr == (Addr)-1)
719 		sedp->sed_phdr = voffset + ehdr.e_phoff;
720 	else
721 		sedp->sed_phdr = voffset + uphdr_vaddr;
722 	sedp->sed_entry = voffset + ehdr.e_entry;
723 	sedp->sed_phent = ehdr.e_phentsize;
724 	sedp->sed_phnum = ehdr.e_phnum;
725 
726 	if (interp) {
727 		if (ehdr.e_type == ET_DYN) {
728 			/*
729 			 * This is a shared object executable, so we need to
730 			 * pick a reasonable place to put the heap. Just don't
731 			 * use the first page.
732 			 */
733 			env.ex_brkbase = (caddr_t)PAGESIZE;
734 			env.ex_bssbase = (caddr_t)PAGESIZE;
735 		}
736 
737 		/*
738 		 * If the program needs an interpreter (most do), map it in and
739 		 * store relevant information about it in the aux vector, where
740 		 * the brand library can find it.
741 		 */
742 		if ((err = lookupname(linker, UIO_SYSSPACE,
743 		    FOLLOW, NULLVPP, &nvp)) != 0) {
744 			uprintf("%s: not found.", S10_LINKER);
745 			return (err);
746 		}
747 		if (args->to_model == DATAMODEL_NATIVE) {
748 			err = mapexec_brand(nvp, args, &ehdr,
749 			    &uphdr_vaddr, &voffset, exec_file, &interp,
750 			    NULL, NULL, NULL, &lddata);
751 #if defined(_LP64)
752 		} else {
753 			Elf32_Ehdr ehdr32;
754 			Elf32_Addr uphdr_vaddr32;
755 			err = mapexec32_brand(nvp, args, &ehdr32,
756 			    &uphdr_vaddr32, &voffset, exec_file, &interp,
757 			    NULL, NULL, NULL, &lddata);
758 			Ehdr32to64(&ehdr32, &ehdr);
759 			if (uphdr_vaddr32 == (Elf32_Addr)-1)
760 				uphdr_vaddr = (Addr)-1;
761 			else
762 				uphdr_vaddr = uphdr_vaddr32;
763 #endif /* _LP64 */
764 		}
765 		VN_RELE(nvp);
766 		if (err != 0)
767 			return (err);
768 
769 		/*
770 		 * Now that we know the base address of the brand's linker,
771 		 * place it in the aux vector.
772 		 */
773 		sedp->sed_base = voffset;
774 		sedp->sed_ldentry = voffset + ehdr.e_entry;
775 		sedp->sed_lddata = voffset + lddata;
776 	} else {
777 		/*
778 		 * This program has no interpreter. The brand library will
779 		 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
780 		 * so in this case, put the entry point of the main executable
781 		 * there.
782 		 */
783 		if (ehdr.e_type == ET_EXEC) {
784 			/*
785 			 * An executable with no interpreter, this must be a
786 			 * statically linked executable, which means we loaded
787 			 * it at the address specified in the elf header, in
788 			 * which case the e_entry field of the elf header is an
789 			 * absolute address.
790 			 */
791 			sedp->sed_ldentry = ehdr.e_entry;
792 			sedp->sed_entry = ehdr.e_entry;
793 			sedp->sed_lddata = NULL;
794 			sedp->sed_base = NULL;
795 		} else {
796 			/*
797 			 * A shared object with no interpreter, we use the
798 			 * calculated address from above.
799 			 */
800 			sedp->sed_ldentry = sedp->sed_entry;
801 			sedp->sed_entry = NULL;
802 			sedp->sed_phdr = NULL;
803 			sedp->sed_phent = NULL;
804 			sedp->sed_phnum = NULL;
805 			sedp->sed_lddata = NULL;
806 			sedp->sed_base = voffset;
807 
808 			if (ehdr.e_type == ET_DYN) {
809 				/*
810 				 * Delay setting the brkbase until the first
811 				 * call to brk(); see elfexec() for details.
812 				 */
813 				env.ex_bssbase = (caddr_t)0;
814 				env.ex_brkbase = (caddr_t)0;
815 				env.ex_brksize = 0;
816 			}
817 		}
818 	}
819 
820 	env.ex_magic = elfmagic;
821 	env.ex_vp = vp;
822 	setexecenv(&env);
823 
824 	/*
825 	 * It's time to manipulate the process aux vectors.  First
826 	 * we need to update the AT_SUN_AUXFLAGS aux vector to set
827 	 * the AF_SUN_NOPLM flag.
828 	 */
829 	if (args->to_model == DATAMODEL_NATIVE) {
830 		auxv_t		auxflags_auxv;
831 
832 		if (copyin(args->auxp_auxflags, &auxflags_auxv,
833 		    sizeof (auxflags_auxv)) != 0)
834 			return (EFAULT);
835 
836 		ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
837 		auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
838 		if (copyout(&auxflags_auxv, args->auxp_auxflags,
839 		    sizeof (auxflags_auxv)) != 0)
840 			return (EFAULT);
841 #if defined(_LP64)
842 	} else {
843 		auxv32_t	auxflags_auxv32;
844 
845 		if (copyin(args->auxp_auxflags, &auxflags_auxv32,
846 		    sizeof (auxflags_auxv32)) != 0)
847 			return (EFAULT);
848 
849 		ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
850 		auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
851 		if (copyout(&auxflags_auxv32, args->auxp_auxflags,
852 		    sizeof (auxflags_auxv32)) != 0)
853 			return (EFAULT);
854 #endif /* _LP64 */
855 	}
856 
857 	/* Second, copy out the brand specific aux vectors. */
858 	if (args->to_model == DATAMODEL_NATIVE) {
859 		auxv_t s10_auxv[] = {
860 		    { AT_SUN_BRAND_AUX1, 0 },
861 		    { AT_SUN_BRAND_AUX2, 0 },
862 		    { AT_SUN_BRAND_AUX3, 0 }
863 		};
864 
865 		ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA);
866 		s10_auxv[0].a_un.a_val = sed.sed_lddata;
867 
868 		if (copyout(&s10_auxv, args->auxp_brand,
869 		    sizeof (s10_auxv)) != 0)
870 			return (EFAULT);
871 #if defined(_LP64)
872 	} else {
873 		auxv32_t s10_auxv32[] = {
874 		    { AT_SUN_BRAND_AUX1, 0 },
875 		    { AT_SUN_BRAND_AUX2, 0 },
876 		    { AT_SUN_BRAND_AUX3, 0 }
877 		};
878 
879 		ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA);
880 		s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
881 		if (copyout(&s10_auxv32, args->auxp_brand,
882 		    sizeof (s10_auxv32)) != 0)
883 			return (EFAULT);
884 #endif /* _LP64 */
885 	}
886 
887 	/*
888 	 * Third, the the /proc aux vectors set up by elfexec() point to brand
889 	 * emulation library and it's linker.  Copy these to the /proc brand
890 	 * specific aux vector, and update the regular /proc aux vectors to
891 	 * point to the executable (and it's linker).  This will enable
892 	 * debuggers to access the executable via the usual /proc or elf notes
893 	 * aux vectors.
894 	 *
895 	 * The brand emulation library's linker will get it's aux vectors off
896 	 * the stack, and then update the stack with the executable's aux
897 	 * vectors before jumping to the executable's linker.
898 	 *
899 	 * Debugging the brand emulation library must be done from
900 	 * the global zone, where the librtld_db module knows how to fetch the
901 	 * brand specific aux vectors to access the brand emulation libraries
902 	 * linker.
903 	 */
904 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
905 		ulong_t val;
906 
907 		switch (up->u_auxv[i].a_type) {
908 		case AT_SUN_BRAND_S10_LDDATA:
909 			up->u_auxv[i].a_un.a_val = sed.sed_lddata;
910 			continue;
911 		case AT_BASE:
912 			val = sedp->sed_base;
913 			break;
914 		case AT_ENTRY:
915 			val = sedp->sed_entry;
916 			break;
917 		case AT_PHDR:
918 			val = sedp->sed_phdr;
919 			break;
920 		case AT_PHENT:
921 			val = sedp->sed_phent;
922 			break;
923 		case AT_PHNUM:
924 			val = sedp->sed_phnum;
925 			break;
926 		case AT_SUN_LDDATA:
927 			val = sedp->sed_lddata;
928 			break;
929 		default:
930 			continue;
931 		}
932 
933 		up->u_auxv[i].a_un.a_val = val;
934 		if (val == NULL) {
935 			/* Hide the entry for static binaries */
936 			up->u_auxv[i].a_type = AT_IGNORE;
937 		}
938 	}
939 
940 	/*
941 	 * The last thing we do here is clear spd->spd_handler.  This is
942 	 * important because if we're already a branded process and if this
943 	 * exec succeeds, there is a window between when the exec() first
944 	 * returns to the userland of the new process and when our brand
945 	 * library get's initialized, during which we don't want system
946 	 * calls to be re-directed to our brand library since it hasn't
947 	 * been initialized yet.
948 	 */
949 	spd->spd_handler = NULL;
950 
951 	return (0);
952 }
953 
954 
955 int
956 _init(void)
957 {
958 	int err;
959 
960 	/*
961 	 * Set up the table indicating which system calls we want to
962 	 * interpose on.  We should probably build this automatically from
963 	 * a list of system calls that is shared with the user-space
964 	 * library.
965 	 */
966 	s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP);
967 	s10_emulation_table[SYS_exec] = 1;			/*  11 */
968 	s10_emulation_table[SYS_ioctl] = 1;			/*  54 */
969 	s10_emulation_table[SYS_execve] = 1;			/*  59 */
970 	s10_emulation_table[SYS_acctctl] = 1;			/*  71 */
971 	s10_emulation_table[S10_SYS_issetugid] = 1;		/*  75 */
972 	s10_emulation_table[SYS_uname] = 1;			/* 135 */
973 	s10_emulation_table[SYS_systeminfo] = 1;		/* 139 */
974 #ifdef	__amd64
975 	s10_emulation_table[SYS_lwp_create] = 1;		/* 159 */
976 	s10_emulation_table[SYS_lwp_private] = 1;		/* 166 */
977 #endif	/* __amd64 */
978 	s10_emulation_table[SYS_pwrite] = 1;			/* 174 */
979 	s10_emulation_table[SYS_auditsys] = 1;			/* 186 */
980 	s10_emulation_table[SYS_sigqueue] = 1;			/* 190 */
981 	s10_emulation_table[SYS_lwp_mutex_timedlock] = 1;	/* 210 */
982 	s10_emulation_table[SYS_pwrite64] = 1;			/* 223 */
983 	s10_emulation_table[SYS_zone] = 1;			/* 227 */
984 	s10_emulation_table[SYS_lwp_mutex_trylock] = 1;		/* 251 */
985 
986 	err = mod_install(&modlinkage);
987 	if (err) {
988 		cmn_err(CE_WARN, "Couldn't install brand module");
989 		kmem_free(s10_emulation_table, NSYSCALL);
990 	}
991 
992 	return (err);
993 }
994 
995 int
996 _info(struct modinfo *modinfop)
997 {
998 	return (mod_info(&modlinkage, modinfop));
999 }
1000 
1001 int
1002 _fini(void)
1003 {
1004 	int err;
1005 
1006 	/*
1007 	 * If there are any zones using this brand, we can't allow it to be
1008 	 * unloaded.
1009 	 */
1010 	if (brand_zone_count(&s10_brand))
1011 		return (EBUSY);
1012 
1013 	kmem_free(s10_emulation_table, NSYSCALL);
1014 	s10_emulation_table = NULL;
1015 
1016 	err = mod_remove(&modlinkage);
1017 	if (err)
1018 		cmn_err(CE_WARN, "Couldn't unload s10 brand module");
1019 
1020 	return (err);
1021 }
1022