1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <strings.h>
31 #include <unistd.h>
32 #include <thread.h>
33 #include <sys/auxv.h>
34 #include <sys/brand.h>
35 #include <sys/inttypes.h>
36 #include <sys/lwp.h>
37 #include <sys/syscall.h>
38 #include <sys/systm.h>
39 #include <sys/utsname.h>
40 #include <sys/systeminfo.h>
41 #include <sys/zone.h>
42 #include <sys/stat.h>
43 #include <sys/mntent.h>
44 #include <sys/ctfs.h>
45 #include <sys/priv.h>
46 #include <sys/acctctl.h>
47 #include <libgen.h>
48 #include <bsm/audit.h>
49 #include <sys/crypto/ioctl.h>
50 #include <sys/fs/zfs.h>
51 #include <sys/zfs_ioctl.h>
52 #include <sys/ucontext.h>
53 #include <sys/mntio.h>
54 #include <sys/mnttab.h>
55 #include <atomic.h>
56 
57 #include <s10_brand.h>
58 #include <s10_misc.h>
59 
60 /*
61  * Principles of emulation 101.
62  *
63  *
64  * *** Setting errno
65  *
66  * Just don't do it.  This emulation library is loaded onto a
67  * seperate link map from the application who's address space we're
68  * running in.  We have our own private copy of libc, so there for,
69  * the errno value accessible from here is is also private and changing
70  * it will not affect any errno value that the processes who's address
71  * space we are running in will see.  To return an error condition we
72  * should return the negated errno value we'd like the system to return.
73  * For more information about this see the comment in s10_handler().
74  * Basically, when we return to the caller that initiated the system
75  * call it's their responsibility to set errno.
76  *
77  *
78  * *** Recursion Considerations
79  *
80  * When emulating system calls we need to be very careful about what
81  * library calls we invoke.  Library calls should be kept to a minimum.
82  * One issue is that library calls can invoke system calls, so if we're
83  * emulating a system call and we invoke a library call that depends on
84  * that system call we will probably enter a recursive loop, which would
85  * be bad.
86  *
87  *
88  * *** Return Values.
89  *
90  * When declaring new syscall emulation functions, it is very important
91  * to to set the proper RV_* flags in the s10_sysent_table.  Upon failure,
92  * syscall emulation fuctions should return an errno value.  Upon success
93  * syscall emulation functions should return 0 and set the sysret_t return
94  * value parameters accordingly.
95  *
96  * There are five possible syscall macro wrappers used in the kernel's system
97  * call sysent table.  These turn into the following return values:
98  *	SYSENT_CL	-> SYSENT_C or SYSENT_CI
99  *	SYSENT_C	SE_64RVAL		RV_DEFAULT
100  *	SYSENT_CI	SE_32RVAL1		RV_DEFAULT
101  *	SYSENT_2CI	SE_32RVAL1|SE_32RVAL2	RV_32RVAL2
102  *	SYSENT_AP	SE_64RVAL		RV_64RVAL
103  *
104  *
105  * *** Agent lwp considerations
106  *
107  * It is currently impossible to do any emulation for these system call
108  * when they are being invoked on behalf of an agent lwp.  To understand why
109  * it's impossible you have to understand how agent lwp syscalls work.
110  *
111  * The agent lwp syscall process works as follows:
112  *   1  The controlling process stops the target.
113  *   2  The controlling process injects an agent lwp which is also stopped.
114  *      This agent lwp assumes the userland stack and register values
115  *      of another stopped lwp in the current process.
116  *   3  The controlling process configures the agent lwp to start
117  *      executing the requested system call.
118  *   4  The controlling process configure /proc to stop the agent lwp when
119  *      it enters the requested system call.
120  *   5  The controlling processes allows the agent lwp to start executing.
121  *   6  The agent lwp traps into the kernel to perform the requested system
122  *      call and immediately stop.
123  *   7  The controlling process copies all the arguments for the requested
124  *      system call onto the agent lwp's stack.
125  *   8  The controlling process configures /proc to stop the agent lwp
126  *      when it completes the requested system call.
127  *   9  The controlling processes allows the agent lwp to start executing.
128  *  10  The agent lwp executes the system call and then stop before returning
129  *      to userland.
130  *  11  The controlling process copies the return value and return arguments
131  *      back from the agent lwps stack.
132  *  12  The controlling process destroys the agent lwp and restarts
133  *      the target process.
134  *
135  * The fundamental problem is that when the agent executes the request
136  * system call in step 5, if we're emulating that system call then the
137  * lwp is redirected back to our emulation layer without blocking
138  * in the kernel.  But our emulation layer can't access the arguments
139  * for the system call because they haven't been copied to the stack
140  * yet and they still only exist in the controlling processes address
141  * space.  This prevents us from being able to do any emulation of
142  * agent lwp system calls.  Hence, currently our brand trap interposition
143  * callback (s10_brand_syscall_callback_common) will detect if a system
144  * call is being made by an agent lwp, and if this is the case it will
145  * never redirect the system call to this emulation library.
146  *
147  * In the future, if this proves to be a problem the the easiest solution
148  * would probably be to replace the branded versions of these application
149  * with their native counterparts.  Ie,  truss, plimit, and pfiles could be
150  * replace with wrapper scripts that execute the native versions of these
151  * applications.  In the case of plimit and pfiles this should be pretty
152  * strait forward.  Truss would probably be more tricky since it can
153  * execute applications which would be branded applications, so in that
154  * case it might be necessary to create a loadable library which could
155  * be LD_PRELOADed into truss and this library would interpose on the
156  * exec() system call to allow truss to correctly execute branded
157  * processes.  It should be pointed out that this solution could work
158  * because "native agent lwps" (ie, agent lwps created by native
159  * processes) can be treated differently from "branded aged lwps" (ie,
160  * agent lwps created by branded processes), since native agent lwps
161  * would presumably be making native system calls and hence not need
162  * any interposition.
163  *
164  */
165 
166 static zoneid_t zoneid;
167 static boolean_t emul_global_zone = B_FALSE;
168 static s10_emul_bitmap_t emul_bitmap;
169 pid_t zone_init_pid;
170 
171 /*
172  * S10_FEATURE_IS_PRESENT is a macro that helps facilitate conditional
173  * emulation.  For each constant N defined in the s10_emulated_features
174  * enumeration in usr/src/uts/common/brand/solaris10/s10_brand.h,
175  * S10_FEATURE_IS_PRESENT(N) is true iff the feature/backport represented by N
176  * is present in the Solaris 10 image hosted within the zone.  In other words,
177  * S10_FEATURE_IS_PRESENT(N) is true iff the file /usr/lib/brand/solaris10/M,
178  * where M is the enum value of N, was present in the zone when the zone booted.
179  *
180  *
181  * *** Sample Usage
182  *
183  * Suppose that you need to backport a fix to Solaris 10 and there is
184  * emulation in place for the fix.  Suppose further that the emulation won't be
185  * needed if the fix is backported (i.e., if the fix is present in the hosted
186  * Solaris 10 environment, then the brand won't need the emulation).  Then if
187  * you add a constant named "S10_FEATURE_X" to the end of the
188  * s10_emulated_features enumeration that represents the backported fix and
189  * S10_FEATURE_X evaluates to four, then you should create a file named
190  * /usr/lib/brand/solaris10/4 as part of your backport.  Additionally, you
191  * should retain the aforementioned emulation but modify it so that it's
192  * performed only when S10_FEATURE_IS_PRESENT(S10_FEATURE_X) is false.  Thus the
193  * emulation function should look something like the following:
194  *
195  *	static int
196  *	my_emul_function(sysret_t *rv, ...)
197  *	{
198  *		if (S10_FEATURE_IS_PRESENT(S10_FEATURE_X)) {
199  *			// Don't emulate
200  *			return (__systemcall(rv, ...));
201  *		} else {
202  *			// Emulate whatever needs to be emulated when the
203  *			// backport isn't present in the Solaris 10 image.
204  *		}
205  *	}
206  */
207 #define	S10_FEATURE_IS_PRESENT(s10_emulated_features_constant)	\
208 	((emul_bitmap[(s10_emulated_features_constant) >> 3] &	\
209 	(1 << ((s10_emulated_features_constant) & 0x7))) != 0)
210 
211 #define	EMULATE(cb, args)	{ (sysent_cb_t)(cb), (args) }
212 #define	NOSYS			EMULATE(s10_unimpl, (0 | RV_DEFAULT))
213 
214 typedef long (*sysent_cb_t)();
215 typedef struct s10_sysent_table {
216 	sysent_cb_t	st_callc;
217 	uintptr_t	st_args;
218 } s10_sysent_table_t;
219 s10_sysent_table_t s10_sysent_table[];
220 
221 #define	S10_UTS_RELEASE	"5.10"
222 #define	S10_UTS_VERSION	"Generic_Virtual"
223 
224 /*LINTED: static unused*/
225 static volatile int		s10_abort_err;
226 /*LINTED: static unused*/
227 static volatile const char	*s10_abort_msg;
228 /*LINTED: static unused*/
229 static volatile const char	*s10_abort_file;
230 /*LINTED: static unused*/
231 static volatile int		s10_abort_line;
232 
233 extern int errno;
234 
235 /*ARGSUSED*/
236 void
237 _s10_abort(int err, const char *msg, const char *file, int line)
238 {
239 	sysret_t rval;
240 
241 	/* Save the error message into convenient globals */
242 	s10_abort_err = err;
243 	s10_abort_msg = msg;
244 	s10_abort_file = file;
245 	s10_abort_line = line;
246 
247 	/* kill ourselves */
248 	abort();
249 
250 	/* If abort() didn't work, try something stronger. */
251 	(void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGKILL);
252 }
253 
254 static int
255 s10_uucopy(const void *from, void *to, size_t size)
256 {
257 	sysret_t rval;
258 
259 	if (__systemcall(&rval, SYS_uucopy + 1024, from, to, size) != 0)
260 		return (EFAULT);
261 	return (0);
262 }
263 
264 /*
265  * ATTENTION: uucopystr() does NOT ensure that string are null terminated!
266  */
267 static int
268 s10_uucopystr(const void *from, void *to, size_t size)
269 {
270 	sysret_t rval;
271 
272 	if (__systemcall(&rval, SYS_uucopystr + 1024, from, to, size) != 0)
273 		return (EFAULT);
274 	return (0);
275 }
276 
277 /*
278  * Figures out the PID of init for the zone.  Also returns a boolean
279  * indicating whether this process currently has that pid: if so,
280  * then at this moment, we are init.
281  */
282 static boolean_t
283 get_initpid_info(void)
284 {
285 	pid_t pid;
286 	sysret_t rval;
287 	int err;
288 
289 	/*
290 	 * Determine the current process PID and the PID of the zone's init.
291 	 * We use care not to call getpid() here, because we're not supposed
292 	 * to call getpid() until after the program is fully linked-- the
293 	 * first call to getpid() is a signal from the linker to debuggers
294 	 * that linking has been completed.
295 	 */
296 	if ((err = __systemcall(&rval, SYS_brand,
297 	    B_S10_PIDINFO, &pid, &zone_init_pid)) != 0) {
298 		s10_abort(err, "Failed to get init's pid");
299 	}
300 
301 	/*
302 	 * Note that we need to be cautious with the pid we get back--
303 	 * it should not be stashed and used in place of getpid(), since
304 	 * we might fork(2).  So we keep zone_init_pid and toss the pid
305 	 * we otherwise got.
306 	 */
307 	if (pid == zone_init_pid)
308 		return (B_TRUE);
309 
310 	return (B_FALSE);
311 }
312 
313 /*
314  * This function is defined to be NOSYS but it won't be called from the
315  * the kernel since the NOSYS system calls are not enabled in the kernel.
316  * Thus, the only time this function is called is directly from within the
317  * indirect system call path.
318  */
319 /*ARGSUSED*/
320 static long
321 s10_unimpl(sysret_t *rv, uintptr_t p1)
322 {
323 	sysret_t rval;
324 
325 	/*
326 	 * We'd like to print out some kind of error message here like
327 	 * "unsupported syscall", but we can't because it's not safe to
328 	 * assume that stderr or STDERR_FILENO actually points to something
329 	 * that is a terminal, and if we wrote to those files we could
330 	 * inadvertantly write to some applications open files, which would
331 	 * be bad.
332 	 *
333 	 * Normally, if an application calls an invalid system call
334 	 * it get a SIGSYS sent to it.  So we'll just go ahead and send
335 	 * ourselves a signal here.  Note that this is far from ideal since
336 	 * if the application has registered a signal handler, that signal
337 	 * handler may recieve a ucontext_t as the third parameter to
338 	 * indicate the context of the process when the signal was
339 	 * generated, and in this case that context will not be what the
340 	 * application is expecting.  Hence, we should probably create a
341 	 * brandsys() kernel function that can deliver the signal to us
342 	 * with the correct ucontext_t.
343 	 */
344 	(void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGSYS);
345 	return (ENOSYS);
346 }
347 
348 #if defined(__sparc) && !defined(__sparcv9)
349 /*
350  * Yuck.  For 32-bit sparc applications, handle indirect system calls.
351  * Note that we declare this interface to use the maximum number of
352  * system call arguments.  If we recieve a system call that uses less
353  * arguments, then the additional arguments will be garbage, but they
354  * will also be ignored so that should be ok.
355  */
356 static long
357 s10_indir(sysret_t *rv, int code,
358     uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4,
359     uintptr_t a5, uintptr_t a6, uintptr_t a7)
360 {
361 	s10_sysent_table_t *sst = &(s10_sysent_table[code]);
362 
363 	s10_assert(code < NSYSCALL);
364 	switch (sst->st_args & NARGS_MASK) {
365 	case 0:
366 		return ((sst->st_callc)(rv));
367 	case 1:
368 		return ((sst->st_callc)(rv, a0));
369 	case 2:
370 		return ((sst->st_callc)(rv, a0, a1));
371 	case 3:
372 		return ((sst->st_callc)(rv, a0, a1, a2));
373 	case 4:
374 		return ((sst->st_callc)(rv, a0, a1, a2, a3));
375 	case 5:
376 		return ((sst->st_callc)(rv, a0, a1, a2, a3, a4));
377 	case 6:
378 		return ((sst->st_callc)(rv, rv, a0, a1, a2, a3, a4, a5));
379 	case 7:
380 		return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6));
381 	case 8:
382 		return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6, a7));
383 	}
384 	s10_abort(0, "invalid entry in s10_sysent_table");
385 	return (EINVAL);
386 }
387 #endif /* __sparc && !__sparcv9 */
388 
389 /* Free the thread-local storage provided by mntfs_get_mntentbuf(). */
390 static void
391 mntfs_free_mntentbuf(void *arg)
392 {
393 	struct mntentbuf *embufp = arg;
394 
395 	if (embufp == NULL)
396 		return;
397 	if (embufp->mbuf_emp)
398 		free(embufp->mbuf_emp);
399 	if (embufp->mbuf_buf)
400 		free(embufp->mbuf_buf);
401 	bzero(embufp, sizeof (struct mntentbuf));
402 	free(embufp);
403 }
404 
405 /* Provide the thread-local storage required by mntfs_ioctl(). */
406 static struct mntentbuf *
407 mntfs_get_mntentbuf(size_t size)
408 {
409 	static mutex_t keylock;
410 	static thread_key_t key;
411 	static int once_per_keyname = 0;
412 	void *tsd = NULL;
413 	struct mntentbuf *embufp;
414 
415 	/* Create the key. */
416 	if (!once_per_keyname) {
417 		(void) mutex_lock(&keylock);
418 		if (!once_per_keyname) {
419 			if (thr_keycreate(&key, mntfs_free_mntentbuf)) {
420 				(void) mutex_unlock(&keylock);
421 				return (NULL);
422 			} else {
423 				once_per_keyname++;
424 			}
425 		}
426 		(void) mutex_unlock(&keylock);
427 	}
428 
429 	/*
430 	 * The thread-specific datum for this key is the address of a struct
431 	 * mntentbuf. If this is the first time here then we allocate the struct
432 	 * and its contents, and associate its address with the thread; if there
433 	 * are any problems then we abort.
434 	 */
435 	if (thr_getspecific(key, &tsd))
436 		return (NULL);
437 	if (tsd == NULL) {
438 		if (!(embufp = calloc(1, sizeof (struct mntentbuf))) ||
439 		    !(embufp->mbuf_emp = malloc(sizeof (struct extmnttab))) ||
440 		    thr_setspecific(key, embufp)) {
441 			mntfs_free_mntentbuf(embufp);
442 			return (NULL);
443 		}
444 	} else {
445 		embufp = tsd;
446 	}
447 
448 	/* Return the buffer, resizing it if necessary. */
449 	if (size > embufp->mbuf_bufsize) {
450 		if (embufp->mbuf_buf)
451 			free(embufp->mbuf_buf);
452 		if ((embufp->mbuf_buf = malloc(size)) == NULL) {
453 			embufp->mbuf_bufsize = 0;
454 			return (NULL);
455 		} else {
456 			embufp->mbuf_bufsize = size;
457 		}
458 	}
459 	return (embufp);
460 }
461 
462 /*
463  * The MNTIOC_GETMNTENT command in this release differs from that in early
464  * versions of Solaris 10.
465  *
466  * Previously, the command would copy a pointer to a struct extmnttab to an
467  * address provided as an argument. The pointer would be somewhere within a
468  * mapping already present within the user's address space. In addition, the
469  * text to which the struct's members pointed would also be within a
470  * pre-existing mapping. Now, the user is required to allocate memory for both
471  * the struct and the text buffer, and to pass the address of each within a
472  * struct mntentbuf. In order to conceal these details from a Solaris 10 client
473  * we allocate some thread-local storage in which to create the necessary data
474  * structures; this is static, thread-safe memory that will be cleaned up
475  * without the caller's intervention.
476  *
477  * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY are new in this release; they should
478  * not work for older clients.
479  */
480 int
481 mntfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
482 {
483 	int err;
484 	struct stat statbuf;
485 	struct mntentbuf *embufp;
486 	static size_t bufsize = MNT_LINE_MAX;
487 
488 
489 	/* Do not emulate mntfs commands from up-to-date clients. */
490 	if (S10_FEATURE_IS_PRESENT(S10_FEATURE_ALTERED_MNTFS_IOCTL))
491 		return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
492 
493 	/* Do not emulate mntfs commands directed at other file systems. */
494 	if ((err = __systemcall(rval, SYS_fstat + 1024, fdes, &statbuf)) != 0)
495 		return (err);
496 	if (strcmp(statbuf.st_fstype, MNTTYPE_MNTFS) != 0)
497 		return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
498 
499 	if (cmd == MNTIOC_GETEXTMNTENT || cmd == MNTIOC_GETMNTANY)
500 		return (EINVAL);
501 
502 	if ((embufp = mntfs_get_mntentbuf(bufsize)) == NULL)
503 		return (ENOMEM);
504 
505 	/*
506 	 * MNTIOC_GETEXTMNTENT advances the file pointer once it has
507 	 * successfully copied out the result to the address provided. We
508 	 * therefore need to check the user-supplied address now since the
509 	 * one we'll be providing is guaranteed to work.
510 	 */
511 	if (s10_uucopy(&embufp->mbuf_emp, (void *)arg, sizeof (void *)) != 0)
512 		return (EFAULT);
513 
514 	/*
515 	 * Keep retrying for as long as we fail for want of a large enough
516 	 * buffer.
517 	 */
518 	for (;;) {
519 		if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes,
520 		    MNTIOC_GETEXTMNTENT, embufp)) != 0)
521 			return (err);
522 
523 		if (rval->sys_rval1 == MNTFS_TOOLONG) {
524 			/* The buffer wasn't large enough. */
525 			(void) atomic_swap_ulong((unsigned long *)&bufsize,
526 			    2 * embufp->mbuf_bufsize);
527 			if ((embufp = mntfs_get_mntentbuf(bufsize)) == NULL)
528 				return (ENOMEM);
529 		} else {
530 			break;
531 		}
532 	}
533 
534 	if (s10_uucopy(&embufp->mbuf_emp, (void *)arg, sizeof (void *)) != 0)
535 		return (EFAULT);
536 
537 	return (0);
538 }
539 
540 /*
541  * Assign the structure member value from the s (source) structure to the
542  * d (dest) structure.
543  */
544 #define	struct_assign(d, s, val)	(((d).val) = ((s).val))
545 
546 /*
547  * The CRYPTO_GET_FUNCTION_LIST parameter structure crypto_function_list_t
548  * changed between S10 and Nevada, so we have to emulate the old S10
549  * crypto_function_list_t structure when interposing on the ioctl syscall.
550  */
551 typedef struct s10_crypto_function_list {
552 	boolean_t fl_digest_init;
553 	boolean_t fl_digest;
554 	boolean_t fl_digest_update;
555 	boolean_t fl_digest_key;
556 	boolean_t fl_digest_final;
557 
558 	boolean_t fl_encrypt_init;
559 	boolean_t fl_encrypt;
560 	boolean_t fl_encrypt_update;
561 	boolean_t fl_encrypt_final;
562 
563 	boolean_t fl_decrypt_init;
564 	boolean_t fl_decrypt;
565 	boolean_t fl_decrypt_update;
566 	boolean_t fl_decrypt_final;
567 
568 	boolean_t fl_mac_init;
569 	boolean_t fl_mac;
570 	boolean_t fl_mac_update;
571 	boolean_t fl_mac_final;
572 
573 	boolean_t fl_sign_init;
574 	boolean_t fl_sign;
575 	boolean_t fl_sign_update;
576 	boolean_t fl_sign_final;
577 	boolean_t fl_sign_recover_init;
578 	boolean_t fl_sign_recover;
579 
580 	boolean_t fl_verify_init;
581 	boolean_t fl_verify;
582 	boolean_t fl_verify_update;
583 	boolean_t fl_verify_final;
584 	boolean_t fl_verify_recover_init;
585 	boolean_t fl_verify_recover;
586 
587 	boolean_t fl_digest_encrypt_update;
588 	boolean_t fl_decrypt_digest_update;
589 	boolean_t fl_sign_encrypt_update;
590 	boolean_t fl_decrypt_verify_update;
591 
592 	boolean_t fl_seed_random;
593 	boolean_t fl_generate_random;
594 
595 	boolean_t fl_session_open;
596 	boolean_t fl_session_close;
597 	boolean_t fl_session_login;
598 	boolean_t fl_session_logout;
599 
600 	boolean_t fl_object_create;
601 	boolean_t fl_object_copy;
602 	boolean_t fl_object_destroy;
603 	boolean_t fl_object_get_size;
604 	boolean_t fl_object_get_attribute_value;
605 	boolean_t fl_object_set_attribute_value;
606 	boolean_t fl_object_find_init;
607 	boolean_t fl_object_find;
608 	boolean_t fl_object_find_final;
609 
610 	boolean_t fl_key_generate;
611 	boolean_t fl_key_generate_pair;
612 	boolean_t fl_key_wrap;
613 	boolean_t fl_key_unwrap;
614 	boolean_t fl_key_derive;
615 
616 	boolean_t fl_init_token;
617 	boolean_t fl_init_pin;
618 	boolean_t fl_set_pin;
619 
620 	boolean_t prov_is_limited;
621 	uint32_t prov_hash_threshold;
622 	uint32_t prov_hash_limit;
623 } s10_crypto_function_list_t;
624 
625 typedef struct s10_crypto_get_function_list {
626 	uint_t				fl_return_value;
627 	crypto_provider_id_t		fl_provider_id;
628 	s10_crypto_function_list_t	fl_list;
629 } s10_crypto_get_function_list_t;
630 
631 /*
632  * The structure returned by the CRYPTO_GET_FUNCTION_LIST ioctl on /dev/crypto
633  * increased in size due to:
634  *	6482533 Threshold for HW offload via PKCS11 interface
635  * between S10 and Nevada.  This is a relatively simple process of filling
636  * in the S10 structure fields with the Nevada data.
637  *
638  * We stat the device to make sure that the ioctl is meant for /dev/crypto.
639  *
640  */
641 static int
642 crypto_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
643 {
644 	int				err;
645 	s10_crypto_get_function_list_t	s10_param;
646 	crypto_get_function_list_t	native_param;
647 	static dev_t			crypto_dev = (dev_t)-1;
648 	struct stat			sbuf;
649 
650 	if (crypto_dev == (dev_t)-1) {
651 		if ((err = __systemcall(rval, SYS_stat + 1024, "/dev/crypto",
652 		    &sbuf)) != 0)
653 			goto nonemuioctl;
654 		crypto_dev = major(sbuf.st_rdev);
655 	}
656 	if ((err = __systemcall(rval, SYS_fstat + 1024, fdes, &sbuf)) != 0)
657 		return (err);
658 	/* Each open fd of /dev/crypto gets a new minor device. */
659 	if (major(sbuf.st_rdev) != crypto_dev)
660 		goto nonemuioctl;
661 
662 	if (s10_uucopy((const void *)arg, &s10_param, sizeof (s10_param)) != 0)
663 		return (EFAULT);
664 	struct_assign(native_param, s10_param, fl_provider_id);
665 	if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd,
666 	    &native_param)) != 0)
667 		return (err);
668 
669 	struct_assign(s10_param, native_param, fl_return_value);
670 	struct_assign(s10_param, native_param, fl_provider_id);
671 
672 	struct_assign(s10_param, native_param, fl_list.fl_digest_init);
673 	struct_assign(s10_param, native_param, fl_list.fl_digest);
674 	struct_assign(s10_param, native_param, fl_list.fl_digest_update);
675 	struct_assign(s10_param, native_param, fl_list.fl_digest_key);
676 	struct_assign(s10_param, native_param, fl_list.fl_digest_final);
677 
678 	struct_assign(s10_param, native_param, fl_list.fl_encrypt_init);
679 	struct_assign(s10_param, native_param, fl_list.fl_encrypt);
680 	struct_assign(s10_param, native_param, fl_list.fl_encrypt_update);
681 	struct_assign(s10_param, native_param, fl_list.fl_encrypt_final);
682 
683 	struct_assign(s10_param, native_param, fl_list.fl_decrypt_init);
684 	struct_assign(s10_param, native_param, fl_list.fl_decrypt);
685 	struct_assign(s10_param, native_param, fl_list.fl_decrypt_update);
686 	struct_assign(s10_param, native_param, fl_list.fl_decrypt_final);
687 
688 	struct_assign(s10_param, native_param, fl_list.fl_mac_init);
689 	struct_assign(s10_param, native_param, fl_list.fl_mac);
690 	struct_assign(s10_param, native_param, fl_list.fl_mac_update);
691 	struct_assign(s10_param, native_param, fl_list.fl_mac_final);
692 
693 	struct_assign(s10_param, native_param, fl_list.fl_sign_init);
694 	struct_assign(s10_param, native_param, fl_list.fl_sign);
695 	struct_assign(s10_param, native_param, fl_list.fl_sign_update);
696 	struct_assign(s10_param, native_param, fl_list.fl_sign_final);
697 	struct_assign(s10_param, native_param, fl_list.fl_sign_recover_init);
698 	struct_assign(s10_param, native_param, fl_list.fl_sign_recover);
699 
700 	struct_assign(s10_param, native_param, fl_list.fl_verify_init);
701 	struct_assign(s10_param, native_param, fl_list.fl_verify);
702 	struct_assign(s10_param, native_param, fl_list.fl_verify_update);
703 	struct_assign(s10_param, native_param, fl_list.fl_verify_final);
704 	struct_assign(s10_param, native_param, fl_list.fl_verify_recover_init);
705 	struct_assign(s10_param, native_param, fl_list.fl_verify_recover);
706 
707 	struct_assign(s10_param, native_param,
708 	    fl_list.fl_digest_encrypt_update);
709 	struct_assign(s10_param, native_param,
710 	    fl_list.fl_decrypt_digest_update);
711 	struct_assign(s10_param, native_param, fl_list.fl_sign_encrypt_update);
712 	struct_assign(s10_param, native_param,
713 	    fl_list.fl_decrypt_verify_update);
714 
715 	struct_assign(s10_param, native_param, fl_list.fl_seed_random);
716 	struct_assign(s10_param, native_param, fl_list.fl_generate_random);
717 
718 	struct_assign(s10_param, native_param, fl_list.fl_session_open);
719 	struct_assign(s10_param, native_param, fl_list.fl_session_close);
720 	struct_assign(s10_param, native_param, fl_list.fl_session_login);
721 	struct_assign(s10_param, native_param, fl_list.fl_session_logout);
722 
723 	struct_assign(s10_param, native_param, fl_list.fl_object_create);
724 	struct_assign(s10_param, native_param, fl_list.fl_object_copy);
725 	struct_assign(s10_param, native_param, fl_list.fl_object_destroy);
726 	struct_assign(s10_param, native_param, fl_list.fl_object_get_size);
727 	struct_assign(s10_param, native_param,
728 	    fl_list.fl_object_get_attribute_value);
729 	struct_assign(s10_param, native_param,
730 	    fl_list.fl_object_set_attribute_value);
731 	struct_assign(s10_param, native_param, fl_list.fl_object_find_init);
732 	struct_assign(s10_param, native_param, fl_list.fl_object_find);
733 	struct_assign(s10_param, native_param, fl_list.fl_object_find_final);
734 
735 	struct_assign(s10_param, native_param, fl_list.fl_key_generate);
736 	struct_assign(s10_param, native_param, fl_list.fl_key_generate_pair);
737 	struct_assign(s10_param, native_param, fl_list.fl_key_wrap);
738 	struct_assign(s10_param, native_param, fl_list.fl_key_unwrap);
739 	struct_assign(s10_param, native_param, fl_list.fl_key_derive);
740 
741 	struct_assign(s10_param, native_param, fl_list.fl_init_token);
742 	struct_assign(s10_param, native_param, fl_list.fl_init_pin);
743 	struct_assign(s10_param, native_param, fl_list.fl_set_pin);
744 
745 	struct_assign(s10_param, native_param, fl_list.prov_is_limited);
746 	struct_assign(s10_param, native_param, fl_list.prov_hash_threshold);
747 	struct_assign(s10_param, native_param, fl_list.prov_hash_limit);
748 
749 	return (s10_uucopy(&s10_param, (void *)arg, sizeof (s10_param)));
750 
751 nonemuioctl:
752 	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
753 }
754 
755 /*
756  * The process contract CT_TGET and CT_TSET parameter structure ct_param_t
757  * changed between S10 and Nevada, so we have to emulate the old S10
758  * ct_param_t structure when interposing on the ioctl syscall.
759  */
760 typedef struct s10_ct_param {
761 	uint32_t ctpm_id;
762 	uint32_t ctpm_pad;
763 	uint64_t ctpm_value;
764 } s10_ct_param_t;
765 
766 /*
767  * We have to emulate process contract ioctls for init(1M) because the
768  * ioctl parameter structure changed between S10 and Nevada.  This is
769  * a relatively simple process of filling Nevada structure fields,
770  * shuffling values, and initiating a native system call.
771  *
772  * For now, we'll assume that all consumers of CT_TGET and CT_TSET will
773  * need emulation.  We'll issue a stat to make sure that the ioctl
774  * is meant for the contract file system.
775  *
776  */
777 static int
778 ctfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
779 {
780 	int err;
781 	s10_ct_param_t s10param;
782 	ct_param_t param;
783 	struct stat statbuf;
784 
785 	if ((err = __systemcall(rval, SYS_fstat + 1024, fdes, &statbuf)) != 0)
786 		return (err);
787 	if (strcmp(statbuf.st_fstype, MNTTYPE_CTFS) != 0)
788 		return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
789 
790 	if (s10_uucopy((const void *)arg, &s10param, sizeof (s10param)) != 0)
791 		return (EFAULT);
792 	param.ctpm_id = s10param.ctpm_id;
793 	param.ctpm_size = sizeof (uint64_t);
794 	param.ctpm_value = &s10param.ctpm_value;
795 	if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, &param))
796 	    != 0)
797 		return (err);
798 
799 	if (cmd == CT_TGET)
800 		return (s10_uucopy(&s10param, (void *)arg, sizeof (s10param)));
801 
802 	return (0);
803 }
804 
805 typedef struct s10_zfs_cmd {
806 	char		zc_name[MAXPATHLEN];
807 	char		zc_value[MAXPATHLEN * 2];
808 	char		zc_string[MAXNAMELEN];
809 	uint64_t	zc_guid;
810 	uint64_t	zc_nvlist_conf;		/* really (char *) */
811 	uint64_t	zc_nvlist_conf_size;
812 	uint64_t	zc_nvlist_src;		/* really (char *) */
813 	uint64_t	zc_nvlist_src_size;
814 	uint64_t	zc_nvlist_dst;		/* really (char *) */
815 	uint64_t	zc_nvlist_dst_size;
816 	uint64_t	zc_cookie;
817 	uint64_t	zc_objset_type;
818 	uint64_t	zc_perm_action;
819 	uint64_t 	zc_history;		/* really (char *) */
820 	uint64_t 	zc_history_len;
821 	uint64_t	zc_history_offset;
822 	uint64_t	zc_obj;
823 	/* Solaris Next added zc_iflags member here */
824 	zfs_share_t	zc_share;
825 	dmu_objset_stats_t zc_objset_stats;
826 	struct drr_begin zc_begin_record;
827 	zinject_record_t zc_inject_record;
828 } s10_zfs_cmd_t;
829 
830 /*
831  * There is a difference in the zfs_cmd_t ioctl parameter between S10 and
832  * Solaris Next so we need to translate between the two structures when
833  * making ZFS ioctls.
834  */
835 static int
836 zfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
837 {
838 	int				err;
839 	s10_zfs_cmd_t			s10_param;
840 	zfs_cmd_t			native_param;
841 	static dev_t			zfs_dev = (dev_t)-1;
842 	struct stat			sbuf;
843 
844 	if (zfs_dev == (dev_t)-1) {
845 		if ((err = __systemcall(rval, SYS_stat + 1024, "/dev/zfs",
846 		    &sbuf)) != 0)
847 			goto nonemuioctl;
848 		zfs_dev = major(sbuf.st_rdev);
849 	}
850 	if ((err = __systemcall(rval, SYS_fstat + 1024, fdes, &sbuf)) != 0)
851 		return (err);
852 	if (major(sbuf.st_rdev) != zfs_dev)
853 		goto nonemuioctl;
854 
855 	if (s10_uucopy((const void *)arg, &s10_param, sizeof (s10_param)) != 0)
856 		return (EFAULT);
857 
858 	bcopy((const void *)s10_param.zc_name, (void *)native_param.zc_name,
859 	    sizeof (s10_param.zc_name));
860 	bcopy((const void *)s10_param.zc_value, (void *)native_param.zc_value,
861 	    sizeof (s10_param.zc_value));
862 	bcopy((const void *)s10_param.zc_string, (void *)native_param.zc_string,
863 	    sizeof (s10_param.zc_string));
864 	struct_assign(native_param, s10_param, zc_guid);
865 	struct_assign(native_param, s10_param, zc_nvlist_conf);
866 	struct_assign(native_param, s10_param, zc_nvlist_conf_size);
867 	struct_assign(native_param, s10_param, zc_nvlist_src);
868 	struct_assign(native_param, s10_param, zc_nvlist_src_size);
869 	struct_assign(native_param, s10_param, zc_nvlist_dst);
870 	struct_assign(native_param, s10_param, zc_nvlist_dst_size);
871 	struct_assign(native_param, s10_param, zc_cookie);
872 	struct_assign(native_param, s10_param, zc_objset_type);
873 	struct_assign(native_param, s10_param, zc_perm_action);
874 	struct_assign(native_param, s10_param, zc_history);
875 	struct_assign(native_param, s10_param, zc_history_len);
876 	struct_assign(native_param, s10_param, zc_history_offset);
877 	struct_assign(native_param, s10_param, zc_obj);
878 	native_param.zc_iflags = 0;
879 	struct_assign(native_param, s10_param, zc_share);
880 	struct_assign(native_param, s10_param, zc_objset_stats);
881 	struct_assign(native_param, s10_param, zc_begin_record);
882 	struct_assign(native_param, s10_param, zc_inject_record);
883 
884 	err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, &native_param);
885 
886 	bcopy((const void *)native_param.zc_name, (void *)s10_param.zc_name,
887 	    sizeof (s10_param.zc_name));
888 	bcopy((const void *)native_param.zc_value, (void *)s10_param.zc_value,
889 	    sizeof (s10_param.zc_value));
890 	bcopy((const void *)native_param.zc_string, (void *)s10_param.zc_string,
891 	    sizeof (s10_param.zc_string));
892 	struct_assign(s10_param, native_param, zc_guid);
893 	struct_assign(s10_param, native_param, zc_nvlist_conf);
894 	struct_assign(s10_param, native_param, zc_nvlist_conf_size);
895 	struct_assign(s10_param, native_param, zc_nvlist_src);
896 	struct_assign(s10_param, native_param, zc_nvlist_src_size);
897 	struct_assign(s10_param, native_param, zc_nvlist_dst);
898 	struct_assign(s10_param, native_param, zc_nvlist_dst_size);
899 	struct_assign(s10_param, native_param, zc_cookie);
900 	struct_assign(s10_param, native_param, zc_objset_type);
901 	struct_assign(s10_param, native_param, zc_perm_action);
902 	struct_assign(s10_param, native_param, zc_history);
903 	struct_assign(s10_param, native_param, zc_history_len);
904 	struct_assign(s10_param, native_param, zc_history_offset);
905 	struct_assign(s10_param, native_param, zc_obj);
906 	struct_assign(s10_param, native_param, zc_share);
907 	struct_assign(s10_param, native_param, zc_objset_stats);
908 	struct_assign(s10_param, native_param, zc_begin_record);
909 	struct_assign(s10_param, native_param, zc_inject_record);
910 
911 	(void) s10_uucopy(&s10_param, (void *)arg, sizeof (s10_param));
912 	return (err);
913 
914 nonemuioctl:
915 	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
916 }
917 
918 int
919 s10_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
920 {
921 	switch (cmd) {
922 	case CRYPTO_GET_FUNCTION_LIST:
923 		return (crypto_ioctl(rval, fdes, cmd, arg));
924 	case CT_TGET:
925 		/*FALLTHRU*/
926 	case CT_TSET:
927 		return (ctfs_ioctl(rval, fdes, cmd, arg));
928 	case MNTIOC_GETMNTENT:
929 		/*FALLTHRU*/
930 	case MNTIOC_GETEXTMNTENT:
931 		/*FALLTHRU*/
932 	case MNTIOC_GETMNTANY:
933 		return (mntfs_ioctl(rval, fdes, cmd, arg));
934 	}
935 
936 	if ((cmd & 0xff00) == ZFS_IOC)
937 		return (zfs_ioctl(rval, fdes, cmd, arg));
938 
939 	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
940 }
941 
942 /*
943  * Unfortunately, pwrite()'s behavior differs between S10 and Nevada when
944  * applied to files opened with O_APPEND.  The offset argument is ignored and
945  * the buffer is appended to the target file in S10, whereas the current file
946  * position is ignored in Nevada (i.e., pwrite() acts as though the target file
947  * wasn't opened with O_APPEND).  This is a result of the fix for CR 6655660
948  * (pwrite() must ignore the O_APPEND/FAPPEND flag).
949  *
950  * We emulate the old S10 pwrite() behavior by checking whether the target file
951  * was opened with O_APPEND.  If it was, then invoke the write() system call
952  * instead of pwrite(); otherwise, invoke the pwrite() system call as usual.
953  */
954 static int
955 s10_pwrite(sysret_t *rval, int fd, const void *bufferp, size_t num_bytes,
956     off_t offset)
957 {
958 	int err;
959 
960 	if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0)
961 		return (err);
962 	if (rval->sys_rval1 & O_APPEND)
963 		return (__systemcall(rval, SYS_write + 1024, fd, bufferp,
964 		    num_bytes));
965 	return (__systemcall(rval, SYS_pwrite + 1024, fd, bufferp, num_bytes,
966 	    offset));
967 }
968 
969 #ifndef	_LP64
970 /*
971  * This is the large file version of the pwrite() system call for 32-bit
972  * processes.  This exists for the same reason that s10_pwrite() exists; see
973  * the comment above s10_pwrite().
974  */
975 static int
976 s10_pwrite64(sysret_t *rval, int fd, const void *bufferp, size32_t num_bytes,
977     uint32_t offset_1, uint32_t offset_2)
978 {
979 	int err;
980 
981 	if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0)
982 		return (err);
983 	if (rval->sys_rval1 & O_APPEND)
984 		return (__systemcall(rval, SYS_write + 1024, fd, bufferp,
985 		    num_bytes));
986 	return (__systemcall(rval, SYS_pwrite64 + 1024, fd, bufferp,
987 	    num_bytes, offset_1, offset_2));
988 }
989 #endif	/* !_LP64 */
990 
991 #define	S10_AC_PROC		(0x1 << 28)
992 #define	S10_AC_TASK		(0x2 << 28)
993 #define	S10_AC_FLOW		(0x4 << 28)
994 #define	S10_AC_MODE(x)		((x) & 0xf0000000)
995 #define	S10_AC_OPTION(x)	((x) & 0x0fffffff)
996 
997 /*
998  * The mode shift, mode mask and option mask for acctctl have changed.  The
999  * mode is currently the top full byte and the option is the lower 3 full bytes.
1000  */
1001 int
1002 s10_acctctl(sysret_t *rval, int cmd, void *buf, size_t bufsz)
1003 {
1004 	int mode = S10_AC_MODE(cmd);
1005 	int option = S10_AC_OPTION(cmd);
1006 
1007 	switch (mode) {
1008 	case S10_AC_PROC:
1009 		mode = AC_PROC;
1010 		break;
1011 	case S10_AC_TASK:
1012 		mode = AC_TASK;
1013 		break;
1014 	case S10_AC_FLOW:
1015 		mode = AC_FLOW;
1016 		break;
1017 	default:
1018 		return (S10_TRUSS_POINT_3(rval, SYS_acctctl, EINVAL, cmd, buf,
1019 		    bufsz));
1020 	}
1021 
1022 	return (__systemcall(rval, SYS_acctctl + 1024, mode | option, buf,
1023 	    bufsz));
1024 }
1025 
1026 /*
1027  * The Audit Policy parameters have changed due to:
1028  *    6466722 audituser and AUDIT_USER are defined, unused, undocumented and
1029  *            should be removed.
1030  *
1031  * In S10 we had the following flag:
1032  *	#define AUDIT_USER 0x0040
1033  * which doesn't exist in Solaris Next where the subsequent flags are shifted
1034  * down.  For example, in S10 we had:
1035  *	#define AUDIT_GROUP     0x0080
1036  * but on Solaris Next we have:
1037  *	#define AUDIT_GROUP     0x0040
1038  * AUDIT_GROUP has the value AUDIT_USER had in S10 and all of the subsequent
1039  * bits are also shifted one place.
1040  *
1041  * When we're getting or setting the Audit Policy parameters we need to
1042  * shift the outgoing or incoming bits into their proper positions.  Since
1043  * S10_AUDIT_USER was always unused, we always clear that bit on A_GETPOLICY.
1044  *
1045  * The command we care about, BSM_AUDITCTL, passes the most parameters (3),
1046  * so declare this function to take up to 4 args and just pass them on.
1047  * The number of parameters for s10_auditsys needs to be equal to the BSM_*
1048  * subcommand that has the most parameters, since we want to pass all
1049  * parameters through, regardless of which subcommands we interpose on.
1050  *
1051  * Note that the auditsys system call uses the SYSENT_AP macro wrapper instead
1052  * of the more common SYSENT_CI macro.  This means the return value is a
1053  * SE_64RVAL so the syscall table uses RV_64RVAL.
1054  */
1055 
1056 #define	S10_AUDIT_HMASK	0xffffffc0
1057 #define	S10_AUDIT_LMASK	0x3f
1058 
1059 int
1060 s10_auditsys(sysret_t *rval, int bsmcmd, intptr_t a0, intptr_t a1, intptr_t a2)
1061 {
1062 	int	err;
1063 	uint_t	m;
1064 
1065 	if (bsmcmd != BSM_AUDITCTL)
1066 		return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, a1,
1067 		    a2));
1068 
1069 	if ((int)a0 == A_GETPOLICY) {
1070 		if ((err = __systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0,
1071 		    &m, a2)) != 0)
1072 			return (err);
1073 		m = ((m & S10_AUDIT_HMASK) << 1) | (m & S10_AUDIT_LMASK);
1074 		if (s10_uucopy(&m, (void *)a1, sizeof (m)) != 0)
1075 			return (EFAULT);
1076 		return (0);
1077 
1078 	} else if ((int)a0 == A_SETPOLICY) {
1079 		if (s10_uucopy((const void *)a1, &m, sizeof (m)) != 0)
1080 			return (EFAULT);
1081 		m = ((m >> 1) & S10_AUDIT_HMASK) | (m & S10_AUDIT_LMASK);
1082 		return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, &m,
1083 		    a2));
1084 	}
1085 
1086 	return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, a1, a2));
1087 }
1088 
1089 /*
1090  * Determine whether the executable passed to SYS_exec or SYS_execve is a
1091  * native executable.  The s10_npreload.so invokes the B_S10_NATIVE brand
1092  * operation which patches up the processes exec info to eliminate any trace
1093  * of the wrapper.  That will make pgrep and other commands that examine
1094  * process' executable names and command-line parameters work properly.
1095  */
1096 static int
1097 s10_exec_native(sysret_t *rval, const char *fname, const char **argp,
1098     const char **envp)
1099 {
1100 	const char *filename = fname;
1101 	char path[64];
1102 	int err;
1103 
1104 	/* Get a copy of the executable we're trying to run */
1105 	path[0] = '\0';
1106 	(void) s10_uucopystr(filename, path, sizeof (path));
1107 
1108 	/* Check if we're trying to run a native binary */
1109 	if (strncmp(path, "/.SUNWnative/usr/lib/brand/solaris10/s10_native",
1110 	    sizeof (path)) != 0)
1111 		return (0);
1112 
1113 	/* Skip the first element in the argv array */
1114 	argp++;
1115 
1116 	/*
1117 	 * The the path of the dynamic linker is the second parameter
1118 	 * of s10_native_exec().
1119 	 */
1120 	if (s10_uucopy(argp, &filename, sizeof (char *)) != 0)
1121 		return (EFAULT);
1122 
1123 	/* If an exec call succeeds, it never returns */
1124 	err = __systemcall(rval, SYS_brand + 1024, B_EXEC_NATIVE, filename,
1125 	    argp, envp, NULL, NULL, NULL);
1126 	s10_assert(err != 0);
1127 	return (err);
1128 }
1129 
1130 /*
1131  * Interpose on the SYS_exec syscall to detect native wrappers.
1132  */
1133 int
1134 s10_exec(sysret_t *rval, const char *fname, const char **argp)
1135 {
1136 	int err;
1137 
1138 	if ((err = s10_exec_native(rval, fname, argp, NULL)) != 0)
1139 		return (err);
1140 
1141 	/* If an exec call succeeds, it never returns */
1142 	err = __systemcall(rval, SYS_exec + 1024, fname, argp);
1143 	s10_assert(err != 0);
1144 	return (err);
1145 }
1146 
1147 /*
1148  * Interpose on the SYS_execve syscall to detect native wrappers.
1149  */
1150 int
1151 s10_execve(sysret_t *rval, const char *fname, const char **argp,
1152     const char **envp)
1153 {
1154 	int err;
1155 
1156 	if ((err = s10_exec_native(rval, fname, argp, envp)) != 0)
1157 		return (err);
1158 
1159 	/* If an exec call succeeds, it never returns */
1160 	err = __systemcall(rval, SYS_execve + 1024, fname, argp, envp);
1161 	s10_assert(err != 0);
1162 	return (err);
1163 }
1164 
1165 /*
1166  * S10's issetugid() syscall is now a subcode to privsys().
1167  */
1168 static int
1169 s10_issetugid(sysret_t *rval)
1170 {
1171 	return (__systemcall(rval, SYS_privsys + 1024, PRIVSYS_ISSETUGID,
1172 	    0, 0, 0, 0, 0));
1173 }
1174 
1175 /*
1176  * New last arg "block" flag should be zero.  The block flag is used by
1177  * the Opensolaris AIO implementation, which is now part of libc.
1178  */
1179 static int
1180 s10_sigqueue(sysret_t *rval, pid_t pid, int signo, void *value, int si_code)
1181 {
1182 	return (__systemcall(rval, SYS_sigqueue + 1024, pid, signo, value,
1183 	    si_code, 0));
1184 }
1185 
1186 static long
1187 s10_uname(sysret_t *rv, uintptr_t p1)
1188 {
1189 	struct utsname un, *unp = (struct utsname *)p1;
1190 	int rev, err;
1191 
1192 	if ((err = __systemcall(rv, SYS_uname + 1024, &un)) != 0)
1193 		return (err);
1194 
1195 	rev = atoi(&un.release[2]);
1196 	s10_assert(rev >= 11);
1197 	bzero(un.release, _SYS_NMLN);
1198 	(void) strlcpy(un.release, S10_UTS_RELEASE, _SYS_NMLN);
1199 	bzero(un.version, _SYS_NMLN);
1200 	(void) strlcpy(un.version, S10_UTS_VERSION, _SYS_NMLN);
1201 
1202 	/* copy out the modified uname info */
1203 	return (s10_uucopy(&un, unp, sizeof (un)));
1204 }
1205 
1206 int
1207 s10_sysinfo(sysret_t *rv, int command, char *buf, long count)
1208 {
1209 	char *value;
1210 	int len;
1211 
1212 	/*
1213 	 * We must interpose on the sysinfo(2) commands SI_RELEASE and
1214 	 * SI_VERSION; all others get passed to the native sysinfo(2)
1215 	 * command.
1216 	 */
1217 	switch (command) {
1218 		case SI_RELEASE:
1219 			value = S10_UTS_RELEASE;
1220 			break;
1221 
1222 		case SI_VERSION:
1223 			value = S10_UTS_VERSION;
1224 			break;
1225 
1226 		default:
1227 			/*
1228 			 * The default action is to pass the command to the
1229 			 * native sysinfo(2) syscall.
1230 			 */
1231 			return (__systemcall(rv, SYS_systeminfo + 1024,
1232 			    command, buf, count));
1233 	}
1234 
1235 	len = strlen(value) + 1;
1236 	if (count > 0) {
1237 		if (s10_uucopystr(value, buf, count) != 0)
1238 			return (EFAULT);
1239 
1240 		/* Assure NULL termination of buf as s10_uucopystr() doesn't. */
1241 		if (len > count && s10_uucopy("\0", buf + (count - 1), 1) != 0)
1242 			return (EFAULT);
1243 	}
1244 
1245 	/*
1246 	 * On success, sysinfo(2) returns the size of buffer required to hold
1247 	 * the complete value plus its terminating NULL byte.
1248 	 */
1249 	(void) S10_TRUSS_POINT_3(rv, SYS_systeminfo, 0, command, buf, count);
1250 	rv->sys_rval1 = len;
1251 	rv->sys_rval2 = 0;
1252 	return (0);
1253 }
1254 
1255 #ifdef	__x86
1256 #ifdef	__amd64
1257 /*
1258  * 64-bit x86 LWPs created by SYS_lwp_create start here if they need to set
1259  * their %fs registers to the legacy Solaris 10 selector value.
1260  *
1261  * This function does three things:
1262  *
1263  *	1.  Trap to the kernel so that it can set %fs to the legacy Solaris 10
1264  *	    selector value.
1265  *	2.  Read the LWP's true entry point (the entry point supplied by libc
1266  *	    when SYS_lwp_create was invoked) from %r14.
1267  *	3.  Eliminate this function's stack frame and pass control to the LWP's
1268  *	    true entry point.
1269  *
1270  * See the comment above s10_lwp_create_correct_fs() (see below) for the reason
1271  * why this function exists.
1272  */
1273 /*ARGSUSED*/
1274 static void
1275 s10_lwp_create_entry_point(void *ulwp_structp)
1276 {
1277 	sysret_t rval;
1278 
1279 	/*
1280 	 * The new LWP's %fs register is initially zero, but libc won't
1281 	 * function correctly when %fs is zero.  Change the LWP's %fs register
1282 	 * via SYS_brand.
1283 	 */
1284 	(void) __systemcall(&rval, SYS_brand + 1024, B_S10_FSREGCORRECTION);
1285 
1286 	/*
1287 	 * Jump to the true entry point, which is stored in %r14.
1288 	 * Remove our stack frame before jumping so that
1289 	 * s10_lwp_create_entry_point() won't be seen in stack traces.
1290 	 *
1291 	 * NOTE: s10_lwp_create_entry_point() pushes %r12 onto its stack frame
1292 	 * so that it can use it as a temporary register.  We don't restore %r12
1293 	 * in this assembly block because we don't care about its value (and
1294 	 * neither does _lwp_start()).  Besides, the System V ABI AMD64
1295 	 * Actirecture Processor Supplement doesn't specify that %r12 should
1296 	 * have a special value when LWPs start, so we can ignore its value when
1297 	 * we jump to the true entry point.  Furthermore, %r12 is a callee-saved
1298 	 * register, so the true entry point should push %r12 onto its stack
1299 	 * before using the register.  We ignore %r14 after we read it for
1300 	 * similar reasons.
1301 	 *
1302 	 * NOTE: The compiler will generate a function epilogue for this
1303 	 * function despite the fact that the LWP will never execute it.
1304 	 * We could hand-code this entire function in assembly to eliminate
1305 	 * the epilogue, but the epilogue is only three or four instructions,
1306 	 * so we wouldn't save much space.  Besides, why would we want
1307 	 * to create yet another ugly, hard-to-maintain assembly function when
1308 	 * we could write most of it in C?
1309 	 */
1310 	__asm__ __volatile__(
1311 	    "movq %0, %%rdi\n\t"	/* pass ulwp_structp as arg1 */
1312 	    "movq %%rbp, %%rsp\n\t"	/* eliminate the stack frame */
1313 	    "popq %%rbp\n\t"
1314 	    "jmp *%%r14\n\t"		/* jump to the true entry point */
1315 	    : : "r" (ulwp_structp));
1316 	/*NOTREACHED*/
1317 }
1318 
1319 /*
1320  * The S10 libc expects that %fs will be nonzero for new 64-bit x86 LWPs but the
1321  * Nevada kernel clears %fs for such LWPs.  Unforunately, new LWPs do not issue
1322  * SYS_lwp_private (see s10_lwp_private() below) after they are created, so
1323  * we must ensure that new LWPs invoke a brand operation that sets %fs to a
1324  * nonzero value immediately after their creation.
1325  *
1326  * The easiest way to do this is to make new LWPs start at a special function,
1327  * s10_lwp_create_entry_point() (see its definition above), that invokes the
1328  * brand operation that corrects %fs.  We'll store the entry points of new LWPs
1329  * in their %r14 registers so that s10_lwp_create_entry_point() can find and
1330  * call them after invoking the special brand operation.  %r14 is a callee-saved
1331  * register; therefore, any functions invoked by s10_lwp_create_entry_point()
1332  * and all functions dealing with signals (e.g., sigacthandler()) will preserve
1333  * %r14 for s10_lwp_create_entry_point().
1334  *
1335  * The Nevada kernel can safely work with nonzero %fs values because the kernel
1336  * configures per-thread %fs segment descriptors so that the legacy %fs selector
1337  * value will still work.  See the comment in lwp_load() regarding %fs and
1338  * %fsbase in 64-bit x86 processes.
1339  *
1340  * This emulation exists thanks to CRs 6467491 and 6501650.
1341  */
1342 static int
1343 s10_lwp_create_correct_fs(sysret_t *rval, ucontext_t *ucp, int flags,
1344     id_t *new_lwp)
1345 {
1346 	ucontext_t s10_uc;
1347 
1348 	/*
1349 	 * Copy the supplied ucontext_t structure to the local stack
1350 	 * frame and store the new LWP's entry point (the value of %rip
1351 	 * stored in the ucontext_t) in the new LWP's %r14 register.
1352 	 * Then make s10_lwp_create_entry_point() the new LWP's entry
1353 	 * point.
1354 	 */
1355 	if (s10_uucopy(ucp, &s10_uc, sizeof (s10_uc)) != 0)
1356 		return (EFAULT);
1357 	s10_uc.uc_mcontext.gregs[REG_R14] = s10_uc.uc_mcontext.gregs[REG_RIP];
1358 	s10_uc.uc_mcontext.gregs[REG_RIP] = (greg_t)s10_lwp_create_entry_point;
1359 
1360 	/*
1361 	 * Issue SYS_lwp_create to create the new LWP.  We pass the
1362 	 * modified ucontext_t to make sure that the new LWP starts at
1363 	 * s10_lwp_create_entry_point().
1364 	 */
1365 	return (__systemcall(rval, SYS_lwp_create + 1024, &s10_uc,
1366 	    flags, new_lwp));
1367 }
1368 #endif	/* __amd64 */
1369 
1370 /*
1371  * This function is invoked on x86 systems when SYS_lwp_create is issued but no
1372  * %fs register correction is necessary.
1373  *
1374  * See the comment above s10_lwp_create_correct_fs() above for more details.
1375  */
1376 static int
1377 s10_lwp_create(sysret_t *rval, ucontext_t *ucp, int flags, id_t *new_lwp)
1378 {
1379 	return (__systemcall(rval, SYS_lwp_create + 1024, ucp, flags, new_lwp));
1380 }
1381 
1382 /*
1383  * SYS_lwp_private is issued by libc_init() to set %fsbase in 64-bit x86
1384  * processes.  The Nevada kernel sets %fs to zero but the S10 libc expects
1385  * %fs to be nonzero.  We'll pass the issued system call to the kernel untouched
1386  * and invoke a brand operation to set %fs to the legacy S10 selector value.
1387  *
1388  * This emulation exists thanks to CRs 6467491 and 6501650.
1389  */
1390 static int
1391 s10_lwp_private(sysret_t *rval, int cmd, int which, uintptr_t base)
1392 {
1393 #ifdef	__amd64
1394 	int err;
1395 
1396 	/*
1397 	 * The current LWP's %fs register should be zero.  Determine whether the
1398 	 * Solaris 10 libc with which we're working functions correctly when %fs
1399 	 * is zero by calling thr_main() after issuing the SYS_lwp_private
1400 	 * syscall.  If thr_main() barfs (returns -1), then change the LWP's %fs
1401 	 * register via SYS_brand and patch s10_sysent_table so that issuing
1402 	 * SYS_lwp_create executes s10_lwp_create_correct_fs() rather than the
1403 	 * default s10_lwp_create().  s10_lwp_create_correct_fs() will
1404 	 * guarantee that new LWPs will have correct %fs values.
1405 	 */
1406 	if ((err = __systemcall(rval, SYS_lwp_private + 1024, cmd, which,
1407 	    base)) != 0)
1408 		return (err);
1409 	if (thr_main() == -1) {
1410 		/*
1411 		 * SYS_lwp_private is only issued by libc_init(), which is
1412 		 * executed when libc is first loaded by ld.so.1.  Thus we
1413 		 * are guaranteed to be single-threaded at this point.  Even
1414 		 * if we were multithreaded at this point, writing a 64-bit
1415 		 * value to the st_callc field of a s10_sysent_table
1416 		 * entry is guaranteed to be atomic on 64-bit x86 chips
1417 		 * as long as the field is not split across cache lines
1418 		 * (It shouldn't be.).  See chapter 8, section 1.1 of
1419 		 * "The Intel 64 and IA32 Architectures Software Developer's
1420 		 * Manual," Volume 3A for more details.
1421 		 */
1422 		s10_sysent_table[SYS_lwp_create].st_callc =
1423 		    (sysent_cb_t)s10_lwp_create_correct_fs;
1424 		return (__systemcall(rval, SYS_brand + 1024,
1425 		    B_S10_FSREGCORRECTION));
1426 	}
1427 	return (0);
1428 #else	/* !__amd64 */
1429 	return (__systemcall(rval, SYS_lwp_private + 1024, cmd, which, base));
1430 #endif	/* !__amd64 */
1431 }
1432 #endif	/* __x86 */
1433 
1434 /*
1435  * The Opensolaris versions of lwp_mutex_timedlock() and lwp_mutex_trylock()
1436  * add an extra argument to the interfaces, a uintptr_t value for the mutex's
1437  * mutex_owner field.  The Solaris 10 libc assigns the mutex_owner field at
1438  * user-level, so we just make the extra argument be zero in both syscalls.
1439  */
1440 
1441 static int
1442 s10_lwp_mutex_timedlock(sysret_t *rval, lwp_mutex_t *lp, timespec_t *tsp)
1443 {
1444 	return (__systemcall(rval, SYS_lwp_mutex_timedlock + 1024, lp, tsp, 0));
1445 }
1446 
1447 static int
1448 s10_lwp_mutex_trylock(sysret_t *rval, lwp_mutex_t *lp)
1449 {
1450 	return (__systemcall(rval, SYS_lwp_mutex_trylock + 1024, lp, 0));
1451 }
1452 
1453 /*
1454  * If the emul_global_zone flag is set then emulate some aspects of the
1455  * zone system call.  In particular, emulate the global zone ID on the
1456  * ZONE_LOOKUP subcommand and emulate some of the global zone attributes
1457  * on the ZONE_GETATTR subcommand.  If the flag is not set or we're performing
1458  * some other operation, simply pass the calls through.
1459  */
1460 int
1461 s10_zone(sysret_t *rval, int cmd, void *arg1, void *arg2, void *arg3,
1462     void *arg4)
1463 {
1464 	char		*aval;
1465 	int		len;
1466 	zoneid_t	zid;
1467 	int		attr;
1468 	char		*buf;
1469 	size_t		bufsize;
1470 
1471 	/*
1472 	 * We only emulate the zone syscall for a subset of specific commands,
1473 	 * otherwise we just pass the call through.
1474 	 */
1475 	if (!emul_global_zone)
1476 		return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2,
1477 		    arg3, arg4));
1478 
1479 	switch (cmd) {
1480 	case ZONE_LOOKUP:
1481 		(void) S10_TRUSS_POINT_1(rval, SYS_zone, 0, cmd);
1482 		rval->sys_rval1 = GLOBAL_ZONEID;
1483 		rval->sys_rval2 = 0;
1484 		return (0);
1485 
1486 	case ZONE_GETATTR:
1487 		zid = (zoneid_t)(uintptr_t)arg1;
1488 		attr = (int)(uintptr_t)arg2;
1489 		buf = (char *)arg3;
1490 		bufsize = (size_t)arg4;
1491 
1492 		/*
1493 		 * If the request is for the global zone then we're emulating
1494 		 * that, otherwise pass this thru.
1495 		 */
1496 		if (zid != GLOBAL_ZONEID)
1497 			goto passthru;
1498 
1499 		switch (attr) {
1500 		case ZONE_ATTR_NAME:
1501 			aval = GLOBAL_ZONENAME;
1502 			break;
1503 
1504 		case ZONE_ATTR_BRAND:
1505 			aval = NATIVE_BRAND_NAME;
1506 			break;
1507 		default:
1508 			/*
1509 			 * We only emulate a subset of the attrs, use the
1510 			 * real zone id to pass thru the rest.
1511 			 */
1512 			arg1 = (void *)(uintptr_t)zoneid;
1513 			goto passthru;
1514 		}
1515 
1516 		(void) S10_TRUSS_POINT_5(rval, SYS_zone, 0, cmd, zid, attr,
1517 		    buf, bufsize);
1518 
1519 		len = strlen(aval) + 1;
1520 		if (len > bufsize)
1521 			return (ENAMETOOLONG);
1522 
1523 		if (buf != NULL) {
1524 			if (len == 1) {
1525 				if (s10_uucopy("\0", buf, 1) != 0)
1526 					return (EFAULT);
1527 			} else {
1528 				if (s10_uucopystr(aval, buf, len) != 0)
1529 					return (EFAULT);
1530 
1531 				/*
1532 				 * Assure NULL termination of "buf" as
1533 				 * s10_uucopystr() does NOT.
1534 				 */
1535 				if (s10_uucopy("\0", buf + (len - 1), 1) != 0)
1536 					return (EFAULT);
1537 			}
1538 		}
1539 
1540 		rval->sys_rval1 = len;
1541 		rval->sys_rval2 = 0;
1542 		return (0);
1543 
1544 	default:
1545 		break;
1546 	}
1547 
1548 passthru:
1549 	return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2, arg3,
1550 	    arg4));
1551 }
1552 
1553 /*
1554  * Close a libc file handle, but don't actually close the underlying
1555  * file descriptor.
1556  */
1557 static void
1558 s10_close_fh(FILE *file)
1559 {
1560 	int fd, fd_new;
1561 
1562 	if (file == NULL)
1563 		return;
1564 
1565 	if ((fd = fileno(file)) < 0)
1566 		return;
1567 
1568 	fd_new = dup(fd);
1569 	if (fd_new == -1)
1570 		return;
1571 
1572 	(void) fclose(file);
1573 	(void) dup2(fd_new, fd);
1574 	(void) close(fd_new);
1575 }
1576 
1577 /*ARGSUSED*/
1578 int
1579 s10_init(int argc, char *argv[], char *envp[])
1580 {
1581 	sysret_t		rval;
1582 	s10_brand_reg_t		reg;
1583 	s10_elf_data_t		sed;
1584 	auxv_t			*ap;
1585 	uintptr_t		*p;
1586 	int			i, err;
1587 	char			*bname;
1588 
1589 	/* Sanity check our translation table return value codes */
1590 	for (i = 0; i < NSYSCALL; i++) {
1591 		s10_sysent_table_t *est = &(s10_sysent_table[i]);
1592 		s10_assert(BIT_ONLYONESET(est->st_args & RV_MASK));
1593 	}
1594 
1595 	/*
1596 	 * We need to shutdown all libc stdio.  libc stdio normally goes to
1597 	 * file descriptors, but since we're actually part of a another
1598 	 * process we don't own these file descriptors and we can't make
1599 	 * any assumptions about their state.
1600 	 */
1601 	s10_close_fh(stdin);
1602 	s10_close_fh(stdout);
1603 	s10_close_fh(stderr);
1604 
1605 	/*
1606 	 * Cache the pid of the zone's init process and determine if
1607 	 * we're init(1m) for the zone.  Remember: we might be init
1608 	 * now, but as soon as we fork(2) we won't be.
1609 	 */
1610 	(void) get_initpid_info();
1611 
1612 	/* get the current zoneid */
1613 	err = __systemcall(&rval, SYS_zone, ZONE_LOOKUP, NULL);
1614 	s10_assert(err == 0);
1615 	zoneid = (zoneid_t)rval.sys_rval1;
1616 
1617 	/* Get the zone's emulation bitmap. */
1618 	if ((err = __systemcall(&rval, SYS_zone, ZONE_GETATTR, zoneid,
1619 	    S10_EMUL_BITMAP, emul_bitmap, sizeof (emul_bitmap))) != 0) {
1620 		s10_abort(err, "The zone's patch level is unsupported");
1621 		/*NOTREACHED*/
1622 	}
1623 
1624 	bname = basename(argv[0]);
1625 
1626 	/*
1627 	 * In general we want the S10 commands that are zone-aware to continue
1628 	 * to behave as they normally do within a zone.  Since these commands
1629 	 * are zone-aware, they should continue to "do the right thing".
1630 	 * However, some zone-aware commands aren't going to work the way
1631 	 * we expect them to inside the branded zone.  In particular, the pkg
1632 	 * and patch commands will not properly manage all pkgs/patches
1633 	 * unless the commands think they are running in the global zone.  For
1634 	 * these commands we want to emulate the global zone.
1635 	 *
1636 	 * We don't do any emulation for pkgcond since it is typically used
1637 	 * in pkg/patch postinstall scripts and we want those scripts to do
1638 	 * the right thing inside a zone.
1639 	 *
1640 	 * One issue is the handling of hollow pkgs.  Since the pkgs are
1641 	 * hollow, they won't use pkgcond in their postinstall scripts.  These
1642 	 * pkgs typically are installing drivers so we handle that by
1643 	 * replacing add_drv and rem_drv in the s10_boot script.
1644 	 */
1645 	if (strcmp("pkgadd", bname) == 0 || strcmp("pkgrm", bname) == 0 ||
1646 	    strcmp("patchadd", bname) == 0 || strcmp("patchrm", bname) == 0)
1647 		emul_global_zone = B_TRUE;
1648 
1649 	/*
1650 	 * Register our syscall emulation table with the kernel.
1651 	 * Note that we don't have to do invoke (syscall_number + 1024)
1652 	 * until we've actually establised a syscall emulation callback
1653 	 * handler address, which is what we're doing with this brand
1654 	 * syscall.
1655 	 */
1656 	reg.sbr_version = S10_VERSION;
1657 	reg.sbr_handler = (caddr_t)s10_handler;
1658 	if ((err = __systemcall(&rval, SYS_brand, B_REGISTER, &reg)) != 0) {
1659 		s10_abort(err, "Failed to brand current process");
1660 		/*NOTREACHED*/
1661 	}
1662 
1663 	/* Get data about the executable we're running from the kernel. */
1664 	if ((err = __systemcall(&rval, SYS_brand + 1024,
1665 	    B_ELFDATA, (void *)&sed)) != 0) {
1666 		s10_abort(err,
1667 		    "Failed to get required brand ELF data from the kernel");
1668 		/*NOTREACHED*/
1669 	}
1670 
1671 	/*
1672 	 * Find the aux vector on the stack.
1673 	 */
1674 	p = (uintptr_t *)envp;
1675 	while (*p != NULL)
1676 		p++;
1677 
1678 	/*
1679 	 * p is now pointing at the 0 word after the environ pointers.
1680 	 * After that is the aux vectors.
1681 	 *
1682 	 * The aux vectors are currently pointing to the brand emulation
1683 	 * library and associated linker.  We're going to change them to
1684 	 * point to the brand executable and associated linker (or to no
1685 	 * linker for static binaries).  This matches the process data
1686 	 * stored within the kernel and visible from /proc, which was
1687 	 * all setup in s10_elfexec().  We do this so that when a debugger
1688 	 * attaches to the process it sees the process as a normal solaris
1689 	 * process, this brand emulation library and everything on it's
1690 	 * link map will not be visible, unless our librtld_db plugin
1691 	 * is used.  Note that this is very different from how Linux
1692 	 * branded processes are implemented within lx branded zones.
1693 	 * In that situation, the primary linkmap of the process is the
1694 	 * brand emulation libraries linkmap, not the Linux applications
1695 	 * linkmap.
1696 	 *
1697 	 * We also need to clear the AF_SUN_NOPLM flag from the AT_SUN_AUXFLAGS
1698 	 * aux vector.  This flag told our linker that we don't have a
1699 	 * primary link map.  Now that our linker is done initializing, we
1700 	 * want to clear this flag before we transfer control to the
1701 	 * applications copy of the linker, since we want that linker to have
1702 	 * a primary link map which will be the link map for the application
1703 	 * we're running.
1704 	 */
1705 	p++;
1706 	for (ap = (auxv_t *)p; ap->a_type != AT_NULL; ap++) {
1707 		switch (ap->a_type) {
1708 			case AT_BASE:
1709 				/* Hide AT_BASE if static binary */
1710 				if (sed.sed_base == NULL) {
1711 					ap->a_type = AT_IGNORE;
1712 					ap->a_un.a_val = NULL;
1713 				} else {
1714 					ap->a_un.a_val = sed.sed_base;
1715 				}
1716 				break;
1717 			case AT_ENTRY:
1718 				ap->a_un.a_val = sed.sed_entry;
1719 				break;
1720 			case AT_PHDR:
1721 				ap->a_un.a_val = sed.sed_phdr;
1722 				break;
1723 			case AT_PHENT:
1724 				ap->a_un.a_val = sed.sed_phent;
1725 				break;
1726 			case AT_PHNUM:
1727 				ap->a_un.a_val = sed.sed_phnum;
1728 				break;
1729 			case AT_SUN_AUXFLAGS:
1730 				ap->a_un.a_val &= ~AF_SUN_NOPLM;
1731 				break;
1732 			case AT_SUN_EMULATOR:
1733 				/*
1734 				 * ld.so.1 inspects AT_SUN_EMULATOR to see if
1735 				 * if it is the linker for the brand emulation
1736 				 * library.  Hide AT_SUN_EMULATOR, as the
1737 				 * linker we are about to jump to is the linker
1738 				 * for the binary.
1739 				 */
1740 				ap->a_type = AT_IGNORE;
1741 				ap->a_un.a_val = NULL;
1742 				break;
1743 			case AT_SUN_LDDATA:
1744 				/* Hide AT_SUN_LDDATA if static binary */
1745 				if (sed.sed_lddata == NULL) {
1746 					ap->a_type = AT_IGNORE;
1747 					ap->a_un.a_val = NULL;
1748 				} else {
1749 					ap->a_un.a_val = sed.sed_lddata;
1750 				}
1751 				break;
1752 			default:
1753 				break;
1754 		}
1755 	}
1756 
1757 	s10_runexe(argv, sed.sed_ldentry);
1758 	/*NOTREACHED*/
1759 	s10_abort(0, "s10_runexe() returned");
1760 	return (-1);
1761 }
1762 
1763 /*
1764  * This table must have at least NSYSCALL entries in it.
1765  *
1766  * The second parameter of each entry in the s10_sysent_table
1767  * contains the number of parameters and flags that describe the
1768  * syscall return value encoding.  See the block comments at the
1769  * top of this file for more information about the syscall return
1770  * value flags and when they should be used.
1771  */
1772 s10_sysent_table_t s10_sysent_table[] = {
1773 #if defined(__sparc) && !defined(__sparcv9)
1774 	EMULATE(s10_indir, 9 | RV_64RVAL),	/*  0 */
1775 #else /* !__sparc || __sparcv9 */
1776 	NOSYS,					/*  0 */
1777 #endif /* !__sparc || __sparcv9 */
1778 	NOSYS,					/*   1 */
1779 	NOSYS,					/*   2 */
1780 	NOSYS,					/*   3 */
1781 	NOSYS,					/*   4 */
1782 	NOSYS,					/*   5 */
1783 	NOSYS,					/*   6 */
1784 	NOSYS,					/*   7 */
1785 	NOSYS,					/*   8 */
1786 	NOSYS,					/*   9 */
1787 	NOSYS,					/*  10 */
1788 	EMULATE(s10_exec, 2 | RV_DEFAULT),	/*  11 */
1789 	NOSYS,					/*  12 */
1790 	NOSYS,					/*  13 */
1791 	NOSYS,					/*  14 */
1792 	NOSYS,					/*  15 */
1793 	NOSYS,					/*  16 */
1794 	NOSYS,					/*  17 */
1795 	NOSYS,					/*  18 */
1796 	NOSYS,					/*  19 */
1797 	NOSYS,					/*  20 */
1798 	NOSYS,					/*  21 */
1799 	NOSYS,					/*  22 */
1800 	NOSYS,					/*  23 */
1801 	NOSYS,					/*  24 */
1802 	NOSYS,					/*  25 */
1803 	NOSYS,					/*  26 */
1804 	NOSYS,					/*  27 */
1805 	NOSYS,					/*  28 */
1806 	NOSYS,					/*  29 */
1807 	NOSYS,					/*  30 */
1808 	NOSYS,					/*  31 */
1809 	NOSYS,					/*  32 */
1810 	NOSYS,					/*  33 */
1811 	NOSYS,					/*  34 */
1812 	NOSYS,					/*  35 */
1813 	NOSYS,					/*  36 */
1814 	NOSYS,					/*  37 */
1815 	NOSYS,					/*  38 */
1816 	NOSYS,					/*  39 */
1817 	NOSYS,					/*  40 */
1818 	NOSYS,					/*  41 */
1819 	NOSYS,					/*  42 */
1820 	NOSYS,					/*  43 */
1821 	NOSYS,					/*  44 */
1822 	NOSYS,					/*  45 */
1823 	NOSYS,					/*  46 */
1824 	NOSYS,					/*  47 */
1825 	NOSYS,					/*  48 */
1826 	NOSYS,					/*  49 */
1827 	NOSYS,					/*  50 */
1828 	NOSYS,					/*  51 */
1829 	NOSYS,					/*  52 */
1830 	NOSYS,					/*  53 */
1831 	EMULATE(s10_ioctl, 3 | RV_DEFAULT),	/*  54 */
1832 	NOSYS,					/*  55 */
1833 	NOSYS,					/*  56 */
1834 	NOSYS,					/*  57 */
1835 	NOSYS,					/*  58 */
1836 	EMULATE(s10_execve, 3 | RV_DEFAULT),	/*  59 */
1837 	NOSYS,					/*  60 */
1838 	NOSYS,					/*  61 */
1839 	NOSYS,					/*  62 */
1840 	NOSYS,					/*  63 */
1841 	NOSYS,					/*  64 */
1842 	NOSYS,					/*  65 */
1843 	NOSYS,					/*  66 */
1844 	NOSYS,					/*  67 */
1845 	NOSYS,					/*  68 */
1846 	NOSYS,					/*  69 */
1847 	NOSYS,					/*  70 */
1848 	EMULATE(s10_acctctl, 3 | RV_DEFAULT),	/*  71 */
1849 	NOSYS,					/*  72 */
1850 	NOSYS,					/*  73 */
1851 	NOSYS,					/*  74 */
1852 	EMULATE(s10_issetugid, 0 | RV_DEFAULT),	/*  75 */
1853 	NOSYS,					/*  76 */
1854 	NOSYS,					/*  77 */
1855 	NOSYS,					/*  78 */
1856 	NOSYS,					/*  79 */
1857 	NOSYS,					/*  80 */
1858 	NOSYS,					/*  81 */
1859 	NOSYS,					/*  82 */
1860 	NOSYS,					/*  83 */
1861 	NOSYS,					/*  84 */
1862 	NOSYS,					/*  85 */
1863 	NOSYS,					/*  86 */
1864 	NOSYS,					/*  87 */
1865 	NOSYS,					/*  88 */
1866 	NOSYS,					/*  89 */
1867 	NOSYS,					/*  90 */
1868 	NOSYS,					/*  91 */
1869 	NOSYS,					/*  92 */
1870 	NOSYS,					/*  93 */
1871 	NOSYS,					/*  94 */
1872 	NOSYS,					/*  95 */
1873 	NOSYS,					/*  96 */
1874 	NOSYS,					/*  97 */
1875 	NOSYS,					/*  98 */
1876 	NOSYS,					/*  99 */
1877 	NOSYS,					/* 100 */
1878 	NOSYS,					/* 101 */
1879 	NOSYS,					/* 102 */
1880 	NOSYS,					/* 103 */
1881 	NOSYS,					/* 104 */
1882 	NOSYS,					/* 105 */
1883 	NOSYS,					/* 106 */
1884 	NOSYS,					/* 107 */
1885 	NOSYS,					/* 108 */
1886 	NOSYS,					/* 109 */
1887 	NOSYS,					/* 110 */
1888 	NOSYS,					/* 111 */
1889 	NOSYS,					/* 112 */
1890 	NOSYS,					/* 113 */
1891 	NOSYS,					/* 114 */
1892 	NOSYS,					/* 115 */
1893 	NOSYS,					/* 116 */
1894 	NOSYS,					/* 117 */
1895 	NOSYS,					/* 118 */
1896 	NOSYS,					/* 119 */
1897 	NOSYS,					/* 120 */
1898 	NOSYS,					/* 121 */
1899 	NOSYS,					/* 122 */
1900 	NOSYS,					/* 123 */
1901 	NOSYS,					/* 124 */
1902 	NOSYS,					/* 125 */
1903 	NOSYS,					/* 126 */
1904 	NOSYS,					/* 127 */
1905 	NOSYS,					/* 128 */
1906 	NOSYS,					/* 129 */
1907 	NOSYS,					/* 130 */
1908 	NOSYS,					/* 131 */
1909 	NOSYS,					/* 132 */
1910 	NOSYS,					/* 133 */
1911 	NOSYS,					/* 134 */
1912 	EMULATE(s10_uname, 1 | RV_DEFAULT),	/* 135 */
1913 	NOSYS,					/* 136 */
1914 	NOSYS,					/* 137 */
1915 	NOSYS,					/* 138 */
1916 	EMULATE(s10_sysinfo, 3 | RV_DEFAULT),	/* 139 */
1917 	NOSYS,					/* 140 */
1918 	NOSYS,					/* 141 */
1919 	NOSYS,					/* 142 */
1920 	NOSYS,					/* 143 */
1921 	NOSYS,					/* 144 */
1922 	NOSYS,					/* 145 */
1923 	NOSYS,					/* 146 */
1924 	NOSYS,					/* 147 */
1925 	NOSYS,					/* 148 */
1926 	NOSYS,					/* 149 */
1927 	NOSYS,					/* 150 */
1928 	NOSYS,					/* 151 */
1929 	NOSYS,					/* 152 */
1930 	NOSYS,					/* 153 */
1931 	NOSYS,					/* 154 */
1932 	NOSYS,					/* 155 */
1933 	NOSYS,					/* 156 */
1934 	NOSYS,					/* 157 */
1935 	NOSYS,					/* 158 */
1936 #ifdef	__x86
1937 	EMULATE(s10_lwp_create, 3 | RV_DEFAULT), /* 159 */
1938 #else	/* !__x86 */
1939 	NOSYS,					/* 159 */
1940 #endif	/* !__x86 */
1941 	NOSYS,					/* 160 */
1942 	NOSYS,					/* 161 */
1943 	NOSYS,					/* 162 */
1944 	NOSYS,					/* 163 */
1945 	NOSYS,					/* 164 */
1946 	NOSYS,					/* 165 */
1947 #ifdef	__x86
1948 	EMULATE(s10_lwp_private, 3 | RV_DEFAULT), /* 166 */
1949 #else	/* !__x86 */
1950 	NOSYS,					/* 166 */
1951 #endif	/* !__x86 */
1952 	NOSYS,					/* 167 */
1953 	NOSYS,					/* 168 */
1954 	NOSYS,					/* 169 */
1955 	NOSYS,					/* 170 */
1956 	NOSYS,					/* 171 */
1957 	NOSYS,					/* 172 */
1958 	NOSYS,					/* 173 */
1959 	EMULATE(s10_pwrite, 4 | RV_DEFAULT),	/* 174 */
1960 	NOSYS,					/* 175 */
1961 	NOSYS,					/* 176 */
1962 	NOSYS,					/* 177 */
1963 	NOSYS,					/* 178 */
1964 	NOSYS,					/* 179 */
1965 	NOSYS,					/* 180 */
1966 	NOSYS,					/* 181 */
1967 	NOSYS,					/* 182 */
1968 	NOSYS,					/* 183 */
1969 	NOSYS,					/* 184 */
1970 	NOSYS,					/* 185 */
1971 	EMULATE(s10_auditsys, 4 | RV_64RVAL),	/* 186 */
1972 	NOSYS,					/* 187 */
1973 	NOSYS,					/* 188 */
1974 	NOSYS,					/* 189 */
1975 	EMULATE(s10_sigqueue, 4 | RV_DEFAULT),	/* 190 */
1976 	NOSYS,					/* 191 */
1977 	NOSYS,					/* 192 */
1978 	NOSYS,					/* 193 */
1979 	NOSYS,					/* 194 */
1980 	NOSYS,					/* 195 */
1981 	NOSYS,					/* 196 */
1982 	NOSYS,					/* 197 */
1983 	NOSYS,					/* 198 */
1984 	NOSYS,					/* 199 */
1985 	NOSYS,					/* 200 */
1986 	NOSYS,					/* 201 */
1987 	NOSYS,					/* 202 */
1988 	NOSYS,					/* 203 */
1989 	NOSYS,					/* 204 */
1990 	NOSYS,					/* 205 */
1991 	NOSYS,					/* 206 */
1992 	NOSYS,					/* 207 */
1993 	NOSYS,					/* 208 */
1994 	NOSYS,					/* 209 */
1995 	EMULATE(s10_lwp_mutex_timedlock, 2 | RV_DEFAULT),	/* 210 */
1996 	NOSYS,					/* 211 */
1997 	NOSYS,					/* 212 */
1998 	NOSYS,					/* 213 */
1999 	NOSYS,					/* 214 */
2000 	NOSYS,					/* 215 */
2001 	NOSYS,					/* 216 */
2002 	NOSYS,					/* 217 */
2003 	NOSYS,					/* 218 */
2004 	NOSYS,					/* 219 */
2005 	NOSYS,					/* 220 */
2006 	NOSYS,					/* 221 */
2007 	NOSYS,					/* 222 */
2008 #ifdef	_LP64
2009 	NOSYS,					/* 223 */
2010 #else	/* !_LP64 */
2011 	EMULATE(s10_pwrite64, 5 | RV_DEFAULT),	/* 223 */
2012 #endif	/* !_LP64 */
2013 	NOSYS,					/* 224 */
2014 	NOSYS,					/* 225 */
2015 	NOSYS,					/* 226 */
2016 	EMULATE(s10_zone, 5 | RV_DEFAULT),	/* 227 */
2017 	NOSYS,					/* 228 */
2018 	NOSYS,					/* 229 */
2019 	NOSYS,					/* 230 */
2020 	NOSYS,					/* 231 */
2021 	NOSYS,					/* 232 */
2022 	NOSYS,					/* 233 */
2023 	NOSYS,					/* 234 */
2024 	NOSYS,					/* 235 */
2025 	NOSYS,					/* 236 */
2026 	NOSYS,					/* 237 */
2027 	NOSYS,					/* 238 */
2028 	NOSYS,					/* 239 */
2029 	NOSYS,					/* 240 */
2030 	NOSYS,					/* 241 */
2031 	NOSYS,					/* 242 */
2032 	NOSYS,					/* 243 */
2033 	NOSYS,					/* 244 */
2034 	NOSYS,					/* 245 */
2035 	NOSYS,					/* 246 */
2036 	NOSYS,					/* 247 */
2037 	NOSYS,					/* 248 */
2038 	NOSYS,					/* 249 */
2039 	NOSYS,					/* 250 */
2040 	EMULATE(s10_lwp_mutex_trylock, 1 | RV_DEFAULT),		/* 251 */
2041 	NOSYS,					/* 252 */
2042 	NOSYS,					/* 253 */
2043 	NOSYS,					/* 254 */
2044 	NOSYS					/* 255 */
2045 };
2046