1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <strings.h>
31 #include <unistd.h>
32 #include <thread.h>
33 #include <sys/auxv.h>
34 #include <sys/bitmap.h>
35 #include <sys/brand.h>
36 #include <sys/inttypes.h>
37 #include <sys/lwp.h>
38 #include <sys/syscall.h>
39 #include <sys/systm.h>
40 #include <sys/utsname.h>
41 #include <sys/systeminfo.h>
42 #include <sys/zone.h>
43 #include <sys/stat.h>
44 #include <sys/mntent.h>
45 #include <sys/ctfs.h>
46 #include <sys/priv.h>
47 #include <sys/acctctl.h>
48 #include <libgen.h>
49 #include <bsm/audit.h>
50 #include <sys/crypto/ioctl.h>
51 #include <sys/fs/zfs.h>
52 #include <sys/zfs_ioctl.h>
53 #include <sys/ucontext.h>
54 
55 #include <s10_brand.h>
56 #include <s10_misc.h>
57 
58 /*
59  * Principles of emulation 101.
60  *
61  *
62  * *** Setting errno
63  *
64  * Just don't do it.  This emulation library is loaded onto a
65  * seperate link map from the application who's address space we're
66  * running in.  We have our own private copy of libc, so there for,
67  * the errno value accessible from here is is also private and changing
68  * it will not affect any errno value that the processes who's address
69  * space we are running in will see.  To return an error condition we
70  * should return the negated errno value we'd like the system to return.
71  * For more information about this see the comment in s10_handler().
72  * Basically, when we return to the caller that initiated the system
73  * call it's their responsibility to set errno.
74  *
75  *
76  * *** Recursion Considerations
77  *
78  * When emulating system calls we need to be very careful about what
79  * library calls we invoke.  Library calls should be kept to a minimum.
80  * One issue is that library calls can invoke system calls, so if we're
81  * emulating a system call and we invoke a library call that depends on
82  * that system call we will probably enter a recursive loop, which would
83  * be bad.
84  *
85  *
86  * *** Return Values.
87  *
88  * When declaring new syscall emulation functions, it is very important
89  * to to set the proper RV_* flags in the s10_sysent_table.  Upon failure,
90  * syscall emulation fuctions should return an errno value.  Upon success
91  * syscall emulation functions should return 0 and set the sysret_t return
92  * value parameters accordingly.
93  *
94  * There are five possible syscall macro wrappers used in the kernel's system
95  * call sysent table.  These turn into the following return values:
96  *	SYSENT_CL	-> SYSENT_C or SYSENT_CI
97  *	SYSENT_C	SE_64RVAL		RV_DEFAULT
98  *	SYSENT_CI	SE_32RVAL1		RV_DEFAULT
99  *	SYSENT_2CI	SE_32RVAL1|SE_32RVAL2	RV_32RVAL2
100  *	SYSENT_AP	SE_64RVAL		RV_64RVAL
101  *
102  *
103  * *** Agent lwp considerations
104  *
105  * It is currently impossible to do any emulation for these system call
106  * when they are being invoked on behalf of an agent lwp.  To understand why
107  * it's impossible you have to understand how agent lwp syscalls work.
108  *
109  * The agent lwp syscall process works as follows:
110  *   1  The controlling process stops the target.
111  *   2  The controlling process injects an agent lwp which is also stopped.
112  *      This agent lwp assumes the userland stack and register values
113  *      of another stopped lwp in the current process.
114  *   3  The controlling process configures the agent lwp to start
115  *      executing the requested system call.
116  *   4  The controlling process configure /proc to stop the agent lwp when
117  *      it enters the requested system call.
118  *   5  The controlling processes allows the agent lwp to start executing.
119  *   6  The agent lwp traps into the kernel to perform the requested system
120  *      call and immediately stop.
121  *   7  The controlling process copies all the arguments for the requested
122  *      system call onto the agent lwp's stack.
123  *   8  The controlling process configures /proc to stop the agent lwp
124  *      when it completes the requested system call.
125  *   9  The controlling processes allows the agent lwp to start executing.
126  *  10  The agent lwp executes the system call and then stop before returning
127  *      to userland.
128  *  11  The controlling process copies the return value and return arguments
129  *      back from the agent lwps stack.
130  *  12  The controlling process destroys the agent lwp and restarts
131  *      the target process.
132  *
133  * The fundamental problem is that when the agent executes the request
134  * system call in step 5, if we're emulating that system call then the
135  * lwp is redirected back to our emulation layer without blocking
136  * in the kernel.  But our emulation layer can't access the arguments
137  * for the system call because they haven't been copied to the stack
138  * yet and they still only exist in the controlling processes address
139  * space.  This prevents us from being able to do any emulation of
140  * agent lwp system calls.  Hence, currently our brand trap interposition
141  * callback (s10_brand_syscall_callback_common) will detect if a system
142  * call is being made by an agent lwp, and if this is the case it will
143  * never redirect the system call to this emulation library.
144  *
145  * In the future, if this proves to be a problem the the easiest solution
146  * would probably be to replace the branded versions of these application
147  * with their native counterparts.  Ie,  truss, plimit, and pfiles could be
148  * replace with wrapper scripts that execute the native versions of these
149  * applications.  In the case of plimit and pfiles this should be pretty
150  * strait forward.  Truss would probably be more tricky since it can
151  * execute applications which would be branded applications, so in that
152  * case it might be necessary to create a loadable library which could
153  * be LD_PRELOADed into truss and this library would interpose on the
154  * exec() system call to allow truss to correctly execute branded
155  * processes.  It should be pointed out that this solution could work
156  * because "native agent lwps" (ie, agent lwps created by native
157  * processes) can be treated differently from "branded aged lwps" (ie,
158  * agent lwps created by branded processes), since native agent lwps
159  * would presumably be making native system calls and hence not need
160  * any interposition.
161  *
162  */
163 
164 static zoneid_t zoneid;
165 static boolean_t emul_global_zone = B_FALSE;
166 static int emul_vers;
167 pid_t zone_init_pid;
168 
169 #define	EMULATE(cb, args)	{ (sysent_cb_t)(cb), (args) }
170 #define	NOSYS			EMULATE(s10_unimpl, (0 | RV_DEFAULT))
171 
172 typedef long (*sysent_cb_t)();
173 typedef struct s10_sysent_table {
174 	sysent_cb_t	st_callc;
175 	uintptr_t	st_args;
176 } s10_sysent_table_t;
177 s10_sysent_table_t s10_sysent_table[];
178 
179 #define	S10_UTS_RELEASE	"5.10"
180 #define	S10_UTS_VERSION	"Generic_Virtual"
181 
182 /*LINTED: static unused*/
183 static volatile int		s10_abort_err;
184 /*LINTED: static unused*/
185 static volatile const char	*s10_abort_msg;
186 /*LINTED: static unused*/
187 static volatile const char	*s10_abort_file;
188 /*LINTED: static unused*/
189 static volatile int		s10_abort_line;
190 
191 extern int errno;
192 
193 /*ARGSUSED*/
194 void
195 _s10_abort(int err, const char *msg, const char *file, int line)
196 {
197 	sysret_t rval;
198 
199 	/* Save the error message into convenient globals */
200 	s10_abort_err = err;
201 	s10_abort_msg = msg;
202 	s10_abort_file = file;
203 	s10_abort_line = line;
204 
205 	/* kill ourselves */
206 	abort();
207 
208 	/* If abort() didn't work, try something stronger. */
209 	(void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGKILL);
210 }
211 
212 static int
213 s10_uucopy(const void *from, void *to, size_t size)
214 {
215 	sysret_t rval;
216 
217 	if (__systemcall(&rval, SYS_uucopy + 1024, from, to, size) != 0)
218 		return (EFAULT);
219 	return (0);
220 }
221 
222 /*
223  * ATTENTION: uucopystr() does NOT ensure that string are null terminated!
224  */
225 static int
226 s10_uucopystr(const void *from, void *to, size_t size)
227 {
228 	sysret_t rval;
229 
230 	if (__systemcall(&rval, SYS_uucopystr + 1024, from, to, size) != 0)
231 		return (EFAULT);
232 	return (0);
233 }
234 
235 /*
236  * Figures out the PID of init for the zone.  Also returns a boolean
237  * indicating whether this process currently has that pid: if so,
238  * then at this moment, we are init.
239  */
240 static boolean_t
241 get_initpid_info(void)
242 {
243 	pid_t pid;
244 	sysret_t rval;
245 	int err;
246 
247 	/*
248 	 * Determine the current process PID and the PID of the zone's init.
249 	 * We use care not to call getpid() here, because we're not supposed
250 	 * to call getpid() until after the program is fully linked-- the
251 	 * first call to getpid() is a signal from the linker to debuggers
252 	 * that linking has been completed.
253 	 */
254 	if ((err = __systemcall(&rval, SYS_brand,
255 	    B_S10_PIDINFO, &pid, &zone_init_pid)) != 0) {
256 		s10_abort(err, "Failed to get init's pid");
257 	}
258 
259 	/*
260 	 * Note that we need to be cautious with the pid we get back--
261 	 * it should not be stashed and used in place of getpid(), since
262 	 * we might fork(2).  So we keep zone_init_pid and toss the pid
263 	 * we otherwise got.
264 	 */
265 	if (pid == zone_init_pid)
266 		return (B_TRUE);
267 
268 	return (B_FALSE);
269 }
270 
271 /*
272  * This function is defined to be NOSYS but it won't be called from the
273  * the kernel since the NOSYS system calls are not enabled in the kernel.
274  * Thus, the only time this function is called is directly from within the
275  * indirect system call path.
276  */
277 /*ARGSUSED*/
278 static long
279 s10_unimpl(sysret_t *rv, uintptr_t p1)
280 {
281 	sysret_t rval;
282 
283 	/*
284 	 * We'd like to print out some kind of error message here like
285 	 * "unsupported syscall", but we can't because it's not safe to
286 	 * assume that stderr or STDERR_FILENO actually points to something
287 	 * that is a terminal, and if we wrote to those files we could
288 	 * inadvertantly write to some applications open files, which would
289 	 * be bad.
290 	 *
291 	 * Normally, if an application calls an invalid system call
292 	 * it get a SIGSYS sent to it.  So we'll just go ahead and send
293 	 * ourselves a signal here.  Note that this is far from ideal since
294 	 * if the application has registered a signal handler, that signal
295 	 * handler may recieve a ucontext_t as the third parameter to
296 	 * indicate the context of the process when the signal was
297 	 * generated, and in this case that context will not be what the
298 	 * application is expecting.  Hence, we should probably create a
299 	 * brandsys() kernel function that can deliver the signal to us
300 	 * with the correct ucontext_t.
301 	 */
302 	(void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGSYS);
303 	return (ENOSYS);
304 }
305 
306 #if defined(__sparc) && !defined(__sparcv9)
307 /*
308  * Yuck.  For 32-bit sparc applications, handle indirect system calls.
309  * Note that we declare this interface to use the maximum number of
310  * system call arguments.  If we recieve a system call that uses less
311  * arguments, then the additional arguments will be garbage, but they
312  * will also be ignored so that should be ok.
313  */
314 static long
315 s10_indir(sysret_t *rv, int code,
316     uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4,
317     uintptr_t a5, uintptr_t a6, uintptr_t a7)
318 {
319 	s10_sysent_table_t *sst = &(s10_sysent_table[code]);
320 
321 	s10_assert(code < NSYSCALL);
322 	switch (sst->st_args & NARGS_MASK) {
323 	case 0:
324 		return ((sst->st_callc)(rv));
325 	case 1:
326 		return ((sst->st_callc)(rv, a0));
327 	case 2:
328 		return ((sst->st_callc)(rv, a0, a1));
329 	case 3:
330 		return ((sst->st_callc)(rv, a0, a1, a2));
331 	case 4:
332 		return ((sst->st_callc)(rv, a0, a1, a2, a3));
333 	case 5:
334 		return ((sst->st_callc)(rv, a0, a1, a2, a3, a4));
335 	case 6:
336 		return ((sst->st_callc)(rv, rv, a0, a1, a2, a3, a4, a5));
337 	case 7:
338 		return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6));
339 	case 8:
340 		return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6, a7));
341 	}
342 	s10_abort(0, "invalid entry in s10_sysent_table");
343 	return (EINVAL);
344 }
345 #endif /* __sparc && !__sparcv9 */
346 
347 /*
348  * Assign the structure member value from the s (source) structure to the
349  * d (dest) structure.
350  */
351 #define	struct_assign(d, s, val)	(((d).val) = ((s).val))
352 
353 /*
354  * The CRYPTO_GET_FUNCTION_LIST parameter structure crypto_function_list_t
355  * changed between S10 and Nevada, so we have to emulate the old S10
356  * crypto_function_list_t structure when interposing on the ioctl syscall.
357  */
358 typedef struct s10_crypto_function_list {
359 	boolean_t fl_digest_init;
360 	boolean_t fl_digest;
361 	boolean_t fl_digest_update;
362 	boolean_t fl_digest_key;
363 	boolean_t fl_digest_final;
364 
365 	boolean_t fl_encrypt_init;
366 	boolean_t fl_encrypt;
367 	boolean_t fl_encrypt_update;
368 	boolean_t fl_encrypt_final;
369 
370 	boolean_t fl_decrypt_init;
371 	boolean_t fl_decrypt;
372 	boolean_t fl_decrypt_update;
373 	boolean_t fl_decrypt_final;
374 
375 	boolean_t fl_mac_init;
376 	boolean_t fl_mac;
377 	boolean_t fl_mac_update;
378 	boolean_t fl_mac_final;
379 
380 	boolean_t fl_sign_init;
381 	boolean_t fl_sign;
382 	boolean_t fl_sign_update;
383 	boolean_t fl_sign_final;
384 	boolean_t fl_sign_recover_init;
385 	boolean_t fl_sign_recover;
386 
387 	boolean_t fl_verify_init;
388 	boolean_t fl_verify;
389 	boolean_t fl_verify_update;
390 	boolean_t fl_verify_final;
391 	boolean_t fl_verify_recover_init;
392 	boolean_t fl_verify_recover;
393 
394 	boolean_t fl_digest_encrypt_update;
395 	boolean_t fl_decrypt_digest_update;
396 	boolean_t fl_sign_encrypt_update;
397 	boolean_t fl_decrypt_verify_update;
398 
399 	boolean_t fl_seed_random;
400 	boolean_t fl_generate_random;
401 
402 	boolean_t fl_session_open;
403 	boolean_t fl_session_close;
404 	boolean_t fl_session_login;
405 	boolean_t fl_session_logout;
406 
407 	boolean_t fl_object_create;
408 	boolean_t fl_object_copy;
409 	boolean_t fl_object_destroy;
410 	boolean_t fl_object_get_size;
411 	boolean_t fl_object_get_attribute_value;
412 	boolean_t fl_object_set_attribute_value;
413 	boolean_t fl_object_find_init;
414 	boolean_t fl_object_find;
415 	boolean_t fl_object_find_final;
416 
417 	boolean_t fl_key_generate;
418 	boolean_t fl_key_generate_pair;
419 	boolean_t fl_key_wrap;
420 	boolean_t fl_key_unwrap;
421 	boolean_t fl_key_derive;
422 
423 	boolean_t fl_init_token;
424 	boolean_t fl_init_pin;
425 	boolean_t fl_set_pin;
426 
427 	boolean_t prov_is_limited;
428 	uint32_t prov_hash_threshold;
429 	uint32_t prov_hash_limit;
430 } s10_crypto_function_list_t;
431 
432 typedef struct s10_crypto_get_function_list {
433 	uint_t				fl_return_value;
434 	crypto_provider_id_t		fl_provider_id;
435 	s10_crypto_function_list_t	fl_list;
436 } s10_crypto_get_function_list_t;
437 
438 /*
439  * The structure returned by the CRYPTO_GET_FUNCTION_LIST ioctl on /dev/crypto
440  * increased in size due to:
441  *	6482533 Threshold for HW offload via PKCS11 interface
442  * between S10 and Nevada.  This is a relatively simple process of filling
443  * in the S10 structure fields with the Nevada data.
444  *
445  * We stat the device to make sure that the ioctl is meant for /dev/crypto.
446  *
447  */
448 static int
449 crypto_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
450 {
451 	int				err;
452 	s10_crypto_get_function_list_t	s10_param;
453 	crypto_get_function_list_t	native_param;
454 	static dev_t			crypto_dev = (dev_t)-1;
455 	struct stat			sbuf;
456 
457 	if (crypto_dev == (dev_t)-1) {
458 		if ((err = __systemcall(rval, SYS_stat + 1024, "/dev/crypto",
459 		    &sbuf)) != 0)
460 			goto nonemuioctl;
461 		crypto_dev = major(sbuf.st_rdev);
462 	}
463 	if ((err = __systemcall(rval, SYS_fstat + 1024, fdes, &sbuf)) != 0)
464 		return (err);
465 	/* Each open fd of /dev/crypto gets a new minor device. */
466 	if (major(sbuf.st_rdev) != crypto_dev)
467 		goto nonemuioctl;
468 
469 	if (s10_uucopy((const void *)arg, &s10_param, sizeof (s10_param)) != 0)
470 		return (EFAULT);
471 	struct_assign(native_param, s10_param, fl_provider_id);
472 	if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd,
473 	    &native_param)) != 0)
474 		return (err);
475 
476 	struct_assign(s10_param, native_param, fl_return_value);
477 	struct_assign(s10_param, native_param, fl_provider_id);
478 
479 	struct_assign(s10_param, native_param, fl_list.fl_digest_init);
480 	struct_assign(s10_param, native_param, fl_list.fl_digest);
481 	struct_assign(s10_param, native_param, fl_list.fl_digest_update);
482 	struct_assign(s10_param, native_param, fl_list.fl_digest_key);
483 	struct_assign(s10_param, native_param, fl_list.fl_digest_final);
484 
485 	struct_assign(s10_param, native_param, fl_list.fl_encrypt_init);
486 	struct_assign(s10_param, native_param, fl_list.fl_encrypt);
487 	struct_assign(s10_param, native_param, fl_list.fl_encrypt_update);
488 	struct_assign(s10_param, native_param, fl_list.fl_encrypt_final);
489 
490 	struct_assign(s10_param, native_param, fl_list.fl_decrypt_init);
491 	struct_assign(s10_param, native_param, fl_list.fl_decrypt);
492 	struct_assign(s10_param, native_param, fl_list.fl_decrypt_update);
493 	struct_assign(s10_param, native_param, fl_list.fl_decrypt_final);
494 
495 	struct_assign(s10_param, native_param, fl_list.fl_mac_init);
496 	struct_assign(s10_param, native_param, fl_list.fl_mac);
497 	struct_assign(s10_param, native_param, fl_list.fl_mac_update);
498 	struct_assign(s10_param, native_param, fl_list.fl_mac_final);
499 
500 	struct_assign(s10_param, native_param, fl_list.fl_sign_init);
501 	struct_assign(s10_param, native_param, fl_list.fl_sign);
502 	struct_assign(s10_param, native_param, fl_list.fl_sign_update);
503 	struct_assign(s10_param, native_param, fl_list.fl_sign_final);
504 	struct_assign(s10_param, native_param, fl_list.fl_sign_recover_init);
505 	struct_assign(s10_param, native_param, fl_list.fl_sign_recover);
506 
507 	struct_assign(s10_param, native_param, fl_list.fl_verify_init);
508 	struct_assign(s10_param, native_param, fl_list.fl_verify);
509 	struct_assign(s10_param, native_param, fl_list.fl_verify_update);
510 	struct_assign(s10_param, native_param, fl_list.fl_verify_final);
511 	struct_assign(s10_param, native_param, fl_list.fl_verify_recover_init);
512 	struct_assign(s10_param, native_param, fl_list.fl_verify_recover);
513 
514 	struct_assign(s10_param, native_param,
515 	    fl_list.fl_digest_encrypt_update);
516 	struct_assign(s10_param, native_param,
517 	    fl_list.fl_decrypt_digest_update);
518 	struct_assign(s10_param, native_param, fl_list.fl_sign_encrypt_update);
519 	struct_assign(s10_param, native_param,
520 	    fl_list.fl_decrypt_verify_update);
521 
522 	struct_assign(s10_param, native_param, fl_list.fl_seed_random);
523 	struct_assign(s10_param, native_param, fl_list.fl_generate_random);
524 
525 	struct_assign(s10_param, native_param, fl_list.fl_session_open);
526 	struct_assign(s10_param, native_param, fl_list.fl_session_close);
527 	struct_assign(s10_param, native_param, fl_list.fl_session_login);
528 	struct_assign(s10_param, native_param, fl_list.fl_session_logout);
529 
530 	struct_assign(s10_param, native_param, fl_list.fl_object_create);
531 	struct_assign(s10_param, native_param, fl_list.fl_object_copy);
532 	struct_assign(s10_param, native_param, fl_list.fl_object_destroy);
533 	struct_assign(s10_param, native_param, fl_list.fl_object_get_size);
534 	struct_assign(s10_param, native_param,
535 	    fl_list.fl_object_get_attribute_value);
536 	struct_assign(s10_param, native_param,
537 	    fl_list.fl_object_set_attribute_value);
538 	struct_assign(s10_param, native_param, fl_list.fl_object_find_init);
539 	struct_assign(s10_param, native_param, fl_list.fl_object_find);
540 	struct_assign(s10_param, native_param, fl_list.fl_object_find_final);
541 
542 	struct_assign(s10_param, native_param, fl_list.fl_key_generate);
543 	struct_assign(s10_param, native_param, fl_list.fl_key_generate_pair);
544 	struct_assign(s10_param, native_param, fl_list.fl_key_wrap);
545 	struct_assign(s10_param, native_param, fl_list.fl_key_unwrap);
546 	struct_assign(s10_param, native_param, fl_list.fl_key_derive);
547 
548 	struct_assign(s10_param, native_param, fl_list.fl_init_token);
549 	struct_assign(s10_param, native_param, fl_list.fl_init_pin);
550 	struct_assign(s10_param, native_param, fl_list.fl_set_pin);
551 
552 	struct_assign(s10_param, native_param, fl_list.prov_is_limited);
553 	struct_assign(s10_param, native_param, fl_list.prov_hash_threshold);
554 	struct_assign(s10_param, native_param, fl_list.prov_hash_limit);
555 
556 	return (s10_uucopy(&s10_param, (void *)arg, sizeof (s10_param)));
557 
558 nonemuioctl:
559 	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
560 }
561 
562 /*
563  * The process contract CT_TGET and CT_TSET parameter structure ct_param_t
564  * changed between S10 and Nevada, so we have to emulate the old S10
565  * ct_param_t structure when interposing on the ioctl syscall.
566  */
567 typedef struct s10_ct_param {
568 	uint32_t ctpm_id;
569 	uint32_t ctpm_pad;
570 	uint64_t ctpm_value;
571 } s10_ct_param_t;
572 
573 /*
574  * We have to emulate process contract ioctls for init(1M) because the
575  * ioctl parameter structure changed between S10 and Nevada.  This is
576  * a relatively simple process of filling Nevada structure fields,
577  * shuffling values, and initiating a native system call.
578  *
579  * For now, we'll assume that all consumers of CT_TGET and CT_TSET will
580  * need emulation.  We'll issue a stat to make sure that the ioctl
581  * is meant for the contract file system.
582  *
583  */
584 static int
585 ctfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
586 {
587 	int err;
588 	s10_ct_param_t s10param;
589 	ct_param_t param;
590 	struct stat statbuf;
591 
592 	if ((err = __systemcall(rval, SYS_fstat + 1024, fdes, &statbuf)) != 0)
593 		return (err);
594 	if (strcmp(statbuf.st_fstype, MNTTYPE_CTFS) != 0)
595 		return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
596 
597 	if (s10_uucopy((const void *)arg, &s10param, sizeof (s10param)) != 0)
598 		return (EFAULT);
599 	param.ctpm_id = s10param.ctpm_id;
600 	param.ctpm_size = sizeof (uint64_t);
601 	param.ctpm_value = &s10param.ctpm_value;
602 	if ((err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, &param))
603 	    != 0)
604 		return (err);
605 
606 	if (cmd == CT_TGET)
607 		return (s10_uucopy(&s10param, (void *)arg, sizeof (s10param)));
608 
609 	return (0);
610 }
611 
612 typedef struct s10_zfs_cmd {
613 	char		zc_name[MAXPATHLEN];
614 	char		zc_value[MAXPATHLEN * 2];
615 	char		zc_string[MAXNAMELEN];
616 	uint64_t	zc_guid;
617 	uint64_t	zc_nvlist_conf;		/* really (char *) */
618 	uint64_t	zc_nvlist_conf_size;
619 	uint64_t	zc_nvlist_src;		/* really (char *) */
620 	uint64_t	zc_nvlist_src_size;
621 	uint64_t	zc_nvlist_dst;		/* really (char *) */
622 	uint64_t	zc_nvlist_dst_size;
623 	uint64_t	zc_cookie;
624 	uint64_t	zc_objset_type;
625 	uint64_t	zc_perm_action;
626 	uint64_t 	zc_history;		/* really (char *) */
627 	uint64_t 	zc_history_len;
628 	uint64_t	zc_history_offset;
629 	uint64_t	zc_obj;
630 	/* Solaris Next added zc_iflags member here */
631 	zfs_share_t	zc_share;
632 	dmu_objset_stats_t zc_objset_stats;
633 	struct drr_begin zc_begin_record;
634 	zinject_record_t zc_inject_record;
635 } s10_zfs_cmd_t;
636 
637 /*
638  * There is a difference in the zfs_cmd_t ioctl parameter between S10 and
639  * Solaris Next so we need to translate between the two structures when
640  * making ZFS ioctls.
641  */
642 static int
643 zfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
644 {
645 	int				err;
646 	s10_zfs_cmd_t			s10_param;
647 	zfs_cmd_t			native_param;
648 	static dev_t			zfs_dev = (dev_t)-1;
649 	struct stat			sbuf;
650 
651 	if (zfs_dev == (dev_t)-1) {
652 		if ((err = __systemcall(rval, SYS_stat + 1024, "/dev/zfs",
653 		    &sbuf)) != 0)
654 			goto nonemuioctl;
655 		zfs_dev = major(sbuf.st_rdev);
656 	}
657 	if ((err = __systemcall(rval, SYS_fstat + 1024, fdes, &sbuf)) != 0)
658 		return (err);
659 	if (major(sbuf.st_rdev) != zfs_dev)
660 		goto nonemuioctl;
661 
662 	if (s10_uucopy((const void *)arg, &s10_param, sizeof (s10_param)) != 0)
663 		return (EFAULT);
664 
665 	bcopy((const void *)s10_param.zc_name, (void *)native_param.zc_name,
666 	    sizeof (s10_param.zc_name));
667 	bcopy((const void *)s10_param.zc_value, (void *)native_param.zc_value,
668 	    sizeof (s10_param.zc_value));
669 	bcopy((const void *)s10_param.zc_string, (void *)native_param.zc_string,
670 	    sizeof (s10_param.zc_string));
671 	struct_assign(native_param, s10_param, zc_guid);
672 	struct_assign(native_param, s10_param, zc_nvlist_conf);
673 	struct_assign(native_param, s10_param, zc_nvlist_conf_size);
674 	struct_assign(native_param, s10_param, zc_nvlist_src);
675 	struct_assign(native_param, s10_param, zc_nvlist_src_size);
676 	struct_assign(native_param, s10_param, zc_nvlist_dst);
677 	struct_assign(native_param, s10_param, zc_nvlist_dst_size);
678 	struct_assign(native_param, s10_param, zc_cookie);
679 	struct_assign(native_param, s10_param, zc_objset_type);
680 	struct_assign(native_param, s10_param, zc_perm_action);
681 	struct_assign(native_param, s10_param, zc_history);
682 	struct_assign(native_param, s10_param, zc_history_len);
683 	struct_assign(native_param, s10_param, zc_history_offset);
684 	struct_assign(native_param, s10_param, zc_obj);
685 	native_param.zc_iflags = 0;
686 	struct_assign(native_param, s10_param, zc_share);
687 	struct_assign(native_param, s10_param, zc_objset_stats);
688 	struct_assign(native_param, s10_param, zc_begin_record);
689 	struct_assign(native_param, s10_param, zc_inject_record);
690 
691 	err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, &native_param);
692 
693 	bcopy((const void *)native_param.zc_name, (void *)s10_param.zc_name,
694 	    sizeof (s10_param.zc_name));
695 	bcopy((const void *)native_param.zc_value, (void *)s10_param.zc_value,
696 	    sizeof (s10_param.zc_value));
697 	bcopy((const void *)native_param.zc_string, (void *)s10_param.zc_string,
698 	    sizeof (s10_param.zc_string));
699 	struct_assign(s10_param, native_param, zc_guid);
700 	struct_assign(s10_param, native_param, zc_nvlist_conf);
701 	struct_assign(s10_param, native_param, zc_nvlist_conf_size);
702 	struct_assign(s10_param, native_param, zc_nvlist_src);
703 	struct_assign(s10_param, native_param, zc_nvlist_src_size);
704 	struct_assign(s10_param, native_param, zc_nvlist_dst);
705 	struct_assign(s10_param, native_param, zc_nvlist_dst_size);
706 	struct_assign(s10_param, native_param, zc_cookie);
707 	struct_assign(s10_param, native_param, zc_objset_type);
708 	struct_assign(s10_param, native_param, zc_perm_action);
709 	struct_assign(s10_param, native_param, zc_history);
710 	struct_assign(s10_param, native_param, zc_history_len);
711 	struct_assign(s10_param, native_param, zc_history_offset);
712 	struct_assign(s10_param, native_param, zc_obj);
713 	struct_assign(s10_param, native_param, zc_share);
714 	struct_assign(s10_param, native_param, zc_objset_stats);
715 	struct_assign(s10_param, native_param, zc_begin_record);
716 	struct_assign(s10_param, native_param, zc_inject_record);
717 
718 	(void) s10_uucopy(&s10_param, (void *)arg, sizeof (s10_param));
719 	return (err);
720 
721 nonemuioctl:
722 	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
723 }
724 
725 int
726 s10_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg)
727 {
728 	switch (cmd) {
729 	case CRYPTO_GET_FUNCTION_LIST:
730 		return (crypto_ioctl(rval, fdes, cmd, arg));
731 	case CT_TGET:
732 		/*FALLTHRU*/
733 	case CT_TSET:
734 		return (ctfs_ioctl(rval, fdes, cmd, arg));
735 	}
736 
737 	if ((cmd & 0xff00) == ZFS_IOC)
738 		return (zfs_ioctl(rval, fdes, cmd, arg));
739 
740 	return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg));
741 }
742 
743 /*
744  * Unfortunately, pwrite()'s behavior differs between S10 and Nevada when
745  * applied to files opened with O_APPEND.  The offset argument is ignored and
746  * the buffer is appended to the target file in S10, whereas the current file
747  * position is ignored in Nevada (i.e., pwrite() acts as though the target file
748  * wasn't opened with O_APPEND).  This is a result of the fix for CR 6655660
749  * (pwrite() must ignore the O_APPEND/FAPPEND flag).
750  *
751  * We emulate the old S10 pwrite() behavior by checking whether the target file
752  * was opened with O_APPEND.  If it was, then invoke the write() system call
753  * instead of pwrite(); otherwise, invoke the pwrite() system call as usual.
754  */
755 static int
756 s10_pwrite(sysret_t *rval, int fd, const void *bufferp, size_t num_bytes,
757     off_t offset)
758 {
759 	int err;
760 
761 	if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0)
762 		return (err);
763 	if (rval->sys_rval1 & O_APPEND)
764 		return (__systemcall(rval, SYS_write + 1024, fd, bufferp,
765 		    num_bytes));
766 	return (__systemcall(rval, SYS_pwrite + 1024, fd, bufferp, num_bytes,
767 	    offset));
768 }
769 
770 #ifndef	_LP64
771 /*
772  * This is the large file version of the pwrite() system call for 32-bit
773  * processes.  This exists for the same reason that s10_pwrite() exists; see
774  * the comment above s10_pwrite().
775  */
776 static int
777 s10_pwrite64(sysret_t *rval, int fd, const void *bufferp, size32_t num_bytes,
778     uint32_t offset_1, uint32_t offset_2)
779 {
780 	int err;
781 
782 	if ((err = __systemcall(rval, SYS_fcntl + 1024, fd, F_GETFL)) != 0)
783 		return (err);
784 	if (rval->sys_rval1 & O_APPEND)
785 		return (__systemcall(rval, SYS_write + 1024, fd, bufferp,
786 		    num_bytes));
787 	return (__systemcall(rval, SYS_pwrite64 + 1024, fd, bufferp,
788 	    num_bytes, offset_1, offset_2));
789 }
790 #endif	/* !_LP64 */
791 
792 #define	S10_AC_PROC		(0x1 << 28)
793 #define	S10_AC_TASK		(0x2 << 28)
794 #define	S10_AC_FLOW		(0x4 << 28)
795 #define	S10_AC_MODE(x)		((x) & 0xf0000000)
796 #define	S10_AC_OPTION(x)	((x) & 0x0fffffff)
797 
798 /*
799  * The mode shift, mode mask and option mask for acctctl have changed.  The
800  * mode is currently the top full byte and the option is the lower 3 full bytes.
801  */
802 int
803 s10_acctctl(sysret_t *rval, int cmd, void *buf, size_t bufsz)
804 {
805 	int mode = S10_AC_MODE(cmd);
806 	int option = S10_AC_OPTION(cmd);
807 
808 	switch (mode) {
809 	case S10_AC_PROC:
810 		mode = AC_PROC;
811 		break;
812 	case S10_AC_TASK:
813 		mode = AC_TASK;
814 		break;
815 	case S10_AC_FLOW:
816 		mode = AC_FLOW;
817 		break;
818 	default:
819 		return (S10_TRUSS_POINT_3(rval, SYS_acctctl, EINVAL, cmd, buf,
820 		    bufsz));
821 	}
822 
823 	return (__systemcall(rval, SYS_acctctl + 1024, mode | option, buf,
824 	    bufsz));
825 }
826 
827 /*
828  * The Audit Policy parameters have changed due to:
829  *    6466722 audituser and AUDIT_USER are defined, unused, undocumented and
830  *            should be removed.
831  *
832  * In S10 we had the following flag:
833  *	#define AUDIT_USER 0x0040
834  * which doesn't exist in Solaris Next where the subsequent flags are shifted
835  * down.  For example, in S10 we had:
836  *	#define AUDIT_GROUP     0x0080
837  * but on Solaris Next we have:
838  *	#define AUDIT_GROUP     0x0040
839  * AUDIT_GROUP has the value AUDIT_USER had in S10 and all of the subsequent
840  * bits are also shifted one place.
841  *
842  * When we're getting or setting the Audit Policy parameters we need to
843  * shift the outgoing or incoming bits into their proper positions.  Since
844  * S10_AUDIT_USER was always unused, we always clear that bit on A_GETPOLICY.
845  *
846  * The command we care about, BSM_AUDITCTL, passes the most parameters (3),
847  * so declare this function to take up to 4 args and just pass them on.
848  * The number of parameters for s10_auditsys needs to be equal to the BSM_*
849  * subcommand that has the most parameters, since we want to pass all
850  * parameters through, regardless of which subcommands we interpose on.
851  *
852  * Note that the auditsys system call uses the SYSENT_AP macro wrapper instead
853  * of the more common SYSENT_CI macro.  This means the return value is a
854  * SE_64RVAL so the syscall table uses RV_64RVAL.
855  */
856 
857 #define	S10_AUDIT_HMASK	0xffffffc0
858 #define	S10_AUDIT_LMASK	0x3f
859 
860 int
861 s10_auditsys(sysret_t *rval, int bsmcmd, intptr_t a0, intptr_t a1, intptr_t a2)
862 {
863 	int	err;
864 	uint_t	m;
865 
866 	if (bsmcmd != BSM_AUDITCTL)
867 		return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, a1,
868 		    a2));
869 
870 	if ((int)a0 == A_GETPOLICY) {
871 		if ((err = __systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0,
872 		    &m, a2)) != 0)
873 			return (err);
874 		m = ((m & S10_AUDIT_HMASK) << 1) | (m & S10_AUDIT_LMASK);
875 		if (s10_uucopy(&m, (void *)a1, sizeof (m)) != 0)
876 			return (EFAULT);
877 		return (0);
878 
879 	} else if ((int)a0 == A_SETPOLICY) {
880 		if (s10_uucopy((const void *)a1, &m, sizeof (m)) != 0)
881 			return (EFAULT);
882 		m = ((m >> 1) & S10_AUDIT_HMASK) | (m & S10_AUDIT_LMASK);
883 		return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, &m,
884 		    a2));
885 	}
886 
887 	return (__systemcall(rval, SYS_auditsys + 1024, bsmcmd, a0, a1, a2));
888 }
889 
890 /*
891  * Determine whether the executable passed to SYS_exec or SYS_execve is a
892  * native executable.  The s10_npreload.so invokes the B_S10_NATIVE brand
893  * operation which patches up the processes exec info to eliminate any trace
894  * of the wrapper.  That will make pgrep and other commands that examine
895  * process' executable names and command-line parameters work properly.
896  */
897 static int
898 s10_exec_native(sysret_t *rval, const char *fname, const char **argp,
899     const char **envp)
900 {
901 	const char *filename = fname;
902 	char path[64];
903 	int err;
904 
905 	/* Get a copy of the executable we're trying to run */
906 	path[0] = '\0';
907 	(void) s10_uucopystr(filename, path, sizeof (path));
908 
909 	/* Check if we're trying to run a native binary */
910 	if (strncmp(path, "/.SUNWnative/usr/lib/brand/solaris10/s10_native",
911 	    sizeof (path)) != 0)
912 		return (0);
913 
914 	/* Skip the first element in the argv array */
915 	argp++;
916 
917 	/*
918 	 * The the path of the dynamic linker is the second parameter
919 	 * of s10_native_exec().
920 	 */
921 	if (s10_uucopy(argp, &filename, sizeof (char *)) != 0)
922 		return (EFAULT);
923 
924 	/* If an exec call succeeds, it never returns */
925 	err = __systemcall(rval, SYS_brand + 1024, B_EXEC_NATIVE, filename,
926 	    argp, envp, NULL, NULL, NULL);
927 	s10_assert(err != 0);
928 	return (err);
929 }
930 
931 /*
932  * Interpose on the SYS_exec syscall to detect native wrappers.
933  */
934 int
935 s10_exec(sysret_t *rval, const char *fname, const char **argp)
936 {
937 	int err;
938 
939 	if ((err = s10_exec_native(rval, fname, argp, NULL)) != 0)
940 		return (err);
941 
942 	/* If an exec call succeeds, it never returns */
943 	err = __systemcall(rval, SYS_exec + 1024, fname, argp);
944 	s10_assert(err != 0);
945 	return (err);
946 }
947 
948 /*
949  * Interpose on the SYS_execve syscall to detect native wrappers.
950  */
951 int
952 s10_execve(sysret_t *rval, const char *fname, const char **argp,
953     const char **envp)
954 {
955 	int err;
956 
957 	if ((err = s10_exec_native(rval, fname, argp, envp)) != 0)
958 		return (err);
959 
960 	/* If an exec call succeeds, it never returns */
961 	err = __systemcall(rval, SYS_execve + 1024, fname, argp, envp);
962 	s10_assert(err != 0);
963 	return (err);
964 }
965 
966 /*
967  * S10's issetugid() syscall is now a subcode to privsys().
968  */
969 static int
970 s10_issetugid(sysret_t *rval)
971 {
972 	return (__systemcall(rval, SYS_privsys + 1024, PRIVSYS_ISSETUGID,
973 	    0, 0, 0, 0, 0));
974 }
975 
976 /*
977  * New last arg "block" flag should be zero.  The block flag is used by
978  * the Opensolaris AIO implementation, which is now part of libc.
979  */
980 static int
981 s10_sigqueue(sysret_t *rval, pid_t pid, int signo, void *value, int si_code)
982 {
983 	return (__systemcall(rval, SYS_sigqueue + 1024, pid, signo, value,
984 	    si_code, 0));
985 }
986 
987 static long
988 s10_uname(sysret_t *rv, uintptr_t p1)
989 {
990 	struct utsname un, *unp = (struct utsname *)p1;
991 	int rev, err;
992 
993 	if ((err = __systemcall(rv, SYS_uname + 1024, &un)) != 0)
994 		return (err);
995 
996 	rev = atoi(&un.release[2]);
997 	s10_assert(rev >= 11);
998 	bzero(un.release, _SYS_NMLN);
999 	(void) strlcpy(un.release, S10_UTS_RELEASE, _SYS_NMLN);
1000 	bzero(un.version, _SYS_NMLN);
1001 	(void) strlcpy(un.version, S10_UTS_VERSION, _SYS_NMLN);
1002 
1003 	/* copy out the modified uname info */
1004 	return (s10_uucopy(&un, unp, sizeof (un)));
1005 }
1006 
1007 int
1008 s10_sysinfo(sysret_t *rv, int command, char *buf, long count)
1009 {
1010 	char *value;
1011 	int len;
1012 
1013 	/*
1014 	 * We must interpose on the sysinfo(2) commands SI_RELEASE and
1015 	 * SI_VERSION; all others get passed to the native sysinfo(2)
1016 	 * command.
1017 	 */
1018 	switch (command) {
1019 		case SI_RELEASE:
1020 			value = S10_UTS_RELEASE;
1021 			break;
1022 
1023 		case SI_VERSION:
1024 			value = S10_UTS_VERSION;
1025 			break;
1026 
1027 		default:
1028 			/*
1029 			 * The default action is to pass the command to the
1030 			 * native sysinfo(2) syscall.
1031 			 */
1032 			return (__systemcall(rv, SYS_systeminfo + 1024,
1033 			    command, buf, count));
1034 	}
1035 
1036 	len = strlen(value) + 1;
1037 	if (count > 0) {
1038 		if (s10_uucopystr(value, buf, count) != 0)
1039 			return (EFAULT);
1040 
1041 		/* Assure NULL termination of buf as s10_uucopystr() doesn't. */
1042 		if (len > count && s10_uucopy("\0", buf + (count - 1), 1) != 0)
1043 			return (EFAULT);
1044 	}
1045 
1046 	/*
1047 	 * On success, sysinfo(2) returns the size of buffer required to hold
1048 	 * the complete value plus its terminating NULL byte.
1049 	 */
1050 	(void) S10_TRUSS_POINT_3(rv, SYS_systeminfo, 0, command, buf, count);
1051 	rv->sys_rval1 = len;
1052 	rv->sys_rval2 = 0;
1053 	return (0);
1054 }
1055 
1056 #ifdef	__x86
1057 #ifdef	__amd64
1058 /*
1059  * 64-bit x86 LWPs created by SYS_lwp_create start here if they need to set
1060  * their %fs registers to the legacy Solaris 10 selector value.
1061  *
1062  * This function does three things:
1063  *
1064  *	1.  Trap to the kernel so that it can set %fs to the legacy Solaris 10
1065  *	    selector value.
1066  *	2.  Read the LWP's true entry point (the entry point supplied by libc
1067  *	    when SYS_lwp_create was invoked) from %r14.
1068  *	3.  Eliminate this function's stack frame and pass control to the LWP's
1069  *	    true entry point.
1070  *
1071  * See the comment above s10_lwp_create_correct_fs() (see below) for the reason
1072  * why this function exists.
1073  */
1074 /*ARGSUSED*/
1075 static void
1076 s10_lwp_create_entry_point(void *ulwp_structp)
1077 {
1078 	sysret_t rval;
1079 
1080 	/*
1081 	 * The new LWP's %fs register is initially zero, but libc won't
1082 	 * function correctly when %fs is zero.  Change the LWP's %fs register
1083 	 * via SYS_brand.
1084 	 */
1085 	(void) __systemcall(&rval, SYS_brand + 1024, B_S10_FSREGCORRECTION);
1086 
1087 	/*
1088 	 * Jump to the true entry point, which is stored in %r14.
1089 	 * Remove our stack frame before jumping so that
1090 	 * s10_lwp_create_entry_point() won't be seen in stack traces.
1091 	 *
1092 	 * NOTE: s10_lwp_create_entry_point() pushes %r12 onto its stack frame
1093 	 * so that it can use it as a temporary register.  We don't restore %r12
1094 	 * in this assembly block because we don't care about its value (and
1095 	 * neither does _lwp_start()).  Besides, the System V ABI AMD64
1096 	 * Actirecture Processor Supplement doesn't specify that %r12 should
1097 	 * have a special value when LWPs start, so we can ignore its value when
1098 	 * we jump to the true entry point.  Furthermore, %r12 is a callee-saved
1099 	 * register, so the true entry point should push %r12 onto its stack
1100 	 * before using the register.  We ignore %r14 after we read it for
1101 	 * similar reasons.
1102 	 *
1103 	 * NOTE: The compiler will generate a function epilogue for this
1104 	 * function despite the fact that the LWP will never execute it.
1105 	 * We could hand-code this entire function in assembly to eliminate
1106 	 * the epilogue, but the epilogue is only three or four instructions,
1107 	 * so we wouldn't save much space.  Besides, why would we want
1108 	 * to create yet another ugly, hard-to-maintain assembly function when
1109 	 * we could write most of it in C?
1110 	 */
1111 	__asm__ __volatile__(
1112 	    "movq %0, %%rdi\n\t"	/* pass ulwp_structp as arg1 */
1113 	    "movq %%rbp, %%rsp\n\t"	/* eliminate the stack frame */
1114 	    "popq %%rbp\n\t"
1115 	    "jmp *%%r14\n\t"		/* jump to the true entry point */
1116 	    : : "r" (ulwp_structp));
1117 	/*NOTREACHED*/
1118 }
1119 
1120 /*
1121  * The S10 libc expects that %fs will be nonzero for new 64-bit x86 LWPs but the
1122  * Nevada kernel clears %fs for such LWPs.  Unforunately, new LWPs do not issue
1123  * SYS_lwp_private (see s10_lwp_private() below) after they are created, so
1124  * we must ensure that new LWPs invoke a brand operation that sets %fs to a
1125  * nonzero value immediately after their creation.
1126  *
1127  * The easiest way to do this is to make new LWPs start at a special function,
1128  * s10_lwp_create_entry_point() (see its definition above), that invokes the
1129  * brand operation that corrects %fs.  We'll store the entry points of new LWPs
1130  * in their %r14 registers so that s10_lwp_create_entry_point() can find and
1131  * call them after invoking the special brand operation.  %r14 is a callee-saved
1132  * register; therefore, any functions invoked by s10_lwp_create_entry_point()
1133  * and all functions dealing with signals (e.g., sigacthandler()) will preserve
1134  * %r14 for s10_lwp_create_entry_point().
1135  *
1136  * The Nevada kernel can safely work with nonzero %fs values because the kernel
1137  * configures per-thread %fs segment descriptors so that the legacy %fs selector
1138  * value will still work.  See the comment in lwp_load() regarding %fs and
1139  * %fsbase in 64-bit x86 processes.
1140  *
1141  * This emulation exists thanks to CRs 6467491 and 6501650.
1142  */
1143 static int
1144 s10_lwp_create_correct_fs(sysret_t *rval, ucontext_t *ucp, int flags,
1145     id_t *new_lwp)
1146 {
1147 	ucontext_t s10_uc;
1148 
1149 	/*
1150 	 * Copy the supplied ucontext_t structure to the local stack
1151 	 * frame and store the new LWP's entry point (the value of %rip
1152 	 * stored in the ucontext_t) in the new LWP's %r14 register.
1153 	 * Then make s10_lwp_create_entry_point() the new LWP's entry
1154 	 * point.
1155 	 */
1156 	if (s10_uucopy(ucp, &s10_uc, sizeof (s10_uc)) != 0)
1157 		return (EFAULT);
1158 	s10_uc.uc_mcontext.gregs[REG_R14] = s10_uc.uc_mcontext.gregs[REG_RIP];
1159 	s10_uc.uc_mcontext.gregs[REG_RIP] = (greg_t)s10_lwp_create_entry_point;
1160 
1161 	/*
1162 	 * Issue SYS_lwp_create to create the new LWP.  We pass the
1163 	 * modified ucontext_t to make sure that the new LWP starts at
1164 	 * s10_lwp_create_entry_point().
1165 	 */
1166 	return (__systemcall(rval, SYS_lwp_create + 1024, &s10_uc,
1167 	    flags, new_lwp));
1168 }
1169 #endif	/* __amd64 */
1170 
1171 /*
1172  * This function is invoked on x86 systems when SYS_lwp_create is issued but no
1173  * %fs register correction is necessary.
1174  *
1175  * See the comment above s10_lwp_create_correct_fs() above for more details.
1176  */
1177 static int
1178 s10_lwp_create(sysret_t *rval, ucontext_t *ucp, int flags, id_t *new_lwp)
1179 {
1180 	return (__systemcall(rval, SYS_lwp_create + 1024, ucp, flags, new_lwp));
1181 }
1182 
1183 /*
1184  * SYS_lwp_private is issued by libc_init() to set %fsbase in 64-bit x86
1185  * processes.  The Nevada kernel sets %fs to zero but the S10 libc expects
1186  * %fs to be nonzero.  We'll pass the issued system call to the kernel untouched
1187  * and invoke a brand operation to set %fs to the legacy S10 selector value.
1188  *
1189  * This emulation exists thanks to CRs 6467491 and 6501650.
1190  */
1191 static int
1192 s10_lwp_private(sysret_t *rval, int cmd, int which, uintptr_t base)
1193 {
1194 #ifdef	__amd64
1195 	int err;
1196 
1197 	/*
1198 	 * The current LWP's %fs register should be zero.  Determine whether the
1199 	 * Solaris 10 libc with which we're working functions correctly when %fs
1200 	 * is zero by calling thr_main() after issuing the SYS_lwp_private
1201 	 * syscall.  If thr_main() barfs (returns -1), then change the LWP's %fs
1202 	 * register via SYS_brand and patch s10_sysent_table so that issuing
1203 	 * SYS_lwp_create executes s10_lwp_create_correct_fs() rather than the
1204 	 * default s10_lwp_create().  s10_lwp_create_correct_fs() will
1205 	 * guarantee that new LWPs will have correct %fs values.
1206 	 */
1207 	if ((err = __systemcall(rval, SYS_lwp_private + 1024, cmd, which,
1208 	    base)) != 0)
1209 		return (err);
1210 	if (thr_main() == -1) {
1211 		/*
1212 		 * SYS_lwp_private is only issued by libc_init(), which is
1213 		 * executed when libc is first loaded by ld.so.1.  Thus we
1214 		 * are guaranteed to be single-threaded at this point.  Even
1215 		 * if we were multithreaded at this point, writing a 64-bit
1216 		 * value to the st_callc field of a s10_sysent_table
1217 		 * entry is guaranteed to be atomic on 64-bit x86 chips
1218 		 * as long as the field is not split across cache lines
1219 		 * (It shouldn't be.).  See chapter 8, section 1.1 of
1220 		 * "The Intel 64 and IA32 Architectures Software Developer's
1221 		 * Manual," Volume 3A for more details.
1222 		 */
1223 		s10_sysent_table[SYS_lwp_create].st_callc =
1224 		    (sysent_cb_t)s10_lwp_create_correct_fs;
1225 		return (__systemcall(rval, SYS_brand + 1024,
1226 		    B_S10_FSREGCORRECTION));
1227 	}
1228 	return (0);
1229 #else	/* !__amd64 */
1230 	return (__systemcall(rval, SYS_lwp_private + 1024, cmd, which, base));
1231 #endif	/* !__amd64 */
1232 }
1233 #endif	/* __x86 */
1234 
1235 /*
1236  * If the emul_global_zone flag is set then emulate some aspects of the
1237  * zone system call.  In particular, emulate the global zone ID on the
1238  * ZONE_LOOKUP subcommand and emulate some of the global zone attributes
1239  * on the ZONE_GETATTR subcommand.  If the flag is not set or we're performing
1240  * some other operation, simply pass the calls through.
1241  */
1242 int
1243 s10_zone(sysret_t *rval, int cmd, void *arg1, void *arg2, void *arg3,
1244     void *arg4)
1245 {
1246 	char		*aval;
1247 	int		len;
1248 	zoneid_t	zid;
1249 	int		attr;
1250 	char		*buf;
1251 	size_t		bufsize;
1252 
1253 	/*
1254 	 * We only emulate the zone syscall for a subset of specific commands,
1255 	 * otherwise we just pass the call through.
1256 	 */
1257 	if (!emul_global_zone)
1258 		return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2,
1259 		    arg3, arg4));
1260 
1261 	switch (cmd) {
1262 	case ZONE_LOOKUP:
1263 		(void) S10_TRUSS_POINT_1(rval, SYS_zone, 0, cmd);
1264 		rval->sys_rval1 = GLOBAL_ZONEID;
1265 		rval->sys_rval2 = 0;
1266 		return (0);
1267 
1268 	case ZONE_GETATTR:
1269 		zid = (zoneid_t)(uintptr_t)arg1;
1270 		attr = (int)(uintptr_t)arg2;
1271 		buf = (char *)arg3;
1272 		bufsize = (size_t)arg4;
1273 
1274 		/*
1275 		 * If the request is for the global zone then we're emulating
1276 		 * that, otherwise pass this thru.
1277 		 */
1278 		if (zid != GLOBAL_ZONEID)
1279 			goto passthru;
1280 
1281 		switch (attr) {
1282 		case ZONE_ATTR_NAME:
1283 			aval = GLOBAL_ZONENAME;
1284 			break;
1285 
1286 		case ZONE_ATTR_BRAND:
1287 			aval = NATIVE_BRAND_NAME;
1288 			break;
1289 		default:
1290 			/*
1291 			 * We only emulate a subset of the attrs, use the
1292 			 * real zone id to pass thru the rest.
1293 			 */
1294 			arg1 = (void *)(uintptr_t)zoneid;
1295 			goto passthru;
1296 		}
1297 
1298 		(void) S10_TRUSS_POINT_5(rval, SYS_zone, 0, cmd, zid, attr,
1299 		    buf, bufsize);
1300 
1301 		len = strlen(aval) + 1;
1302 		if (len > bufsize)
1303 			return (ENAMETOOLONG);
1304 
1305 		if (buf != NULL) {
1306 			if (len == 1) {
1307 				if (s10_uucopy("\0", buf, 1) != 0)
1308 					return (EFAULT);
1309 			} else {
1310 				if (s10_uucopystr(aval, buf, len) != 0)
1311 					return (EFAULT);
1312 
1313 				/*
1314 				 * Assure NULL termination of "buf" as
1315 				 * s10_uucopystr() does NOT.
1316 				 */
1317 				if (s10_uucopy("\0", buf + (len - 1), 1) != 0)
1318 					return (EFAULT);
1319 			}
1320 		}
1321 
1322 		rval->sys_rval1 = len;
1323 		rval->sys_rval2 = 0;
1324 		return (0);
1325 
1326 	default:
1327 		break;
1328 	}
1329 
1330 passthru:
1331 	return (__systemcall(rval, SYS_zone + 1024, cmd, arg1, arg2, arg3,
1332 	    arg4));
1333 }
1334 
1335 /*
1336  * Close a libc file handle, but don't actually close the underlying
1337  * file descriptor.
1338  */
1339 static void
1340 s10_close_fh(FILE *file)
1341 {
1342 	int fd, fd_new;
1343 
1344 	if (file == NULL)
1345 		return;
1346 
1347 	if ((fd = fileno(file)) < 0)
1348 		return;
1349 
1350 	fd_new = dup(fd);
1351 	if (fd_new == -1)
1352 		return;
1353 
1354 	(void) fclose(file);
1355 	(void) dup2(fd_new, fd);
1356 	(void) close(fd_new);
1357 }
1358 
1359 /*ARGSUSED*/
1360 int
1361 s10_init(int argc, char *argv[], char *envp[])
1362 {
1363 	sysret_t		rval;
1364 	s10_brand_reg_t		reg;
1365 	s10_elf_data_t		sed;
1366 	auxv_t			*ap;
1367 	uintptr_t		*p;
1368 	int			i, err;
1369 	char			*bname;
1370 
1371 	/* Sanity check our translation table return value codes */
1372 	for (i = 0; i < NSYSCALL; i++) {
1373 		s10_sysent_table_t *est = &(s10_sysent_table[i]);
1374 		s10_assert(BIT_ONLYONESET(est->st_args & RV_MASK));
1375 	}
1376 
1377 	/*
1378 	 * We need to shutdown all libc stdio.  libc stdio normally goes to
1379 	 * file descriptors, but since we're actually part of a another
1380 	 * process we don't own these file descriptors and we can't make
1381 	 * any assumptions about their state.
1382 	 */
1383 	s10_close_fh(stdin);
1384 	s10_close_fh(stdout);
1385 	s10_close_fh(stderr);
1386 
1387 	/*
1388 	 * Cache the pid of the zone's init process and determine if
1389 	 * we're init(1m) for the zone.  Remember: we might be init
1390 	 * now, but as soon as we fork(2) we won't be.
1391 	 */
1392 	(void) get_initpid_info();
1393 
1394 	/* get the current zoneid */
1395 	err = __systemcall(&rval, SYS_zone, ZONE_LOOKUP, NULL);
1396 	s10_assert(err == 0);
1397 	zoneid = (zoneid_t)rval.sys_rval1;
1398 
1399 	/* Get the emulation version number. */
1400 	if ((err = __systemcall(&rval, SYS_zone, ZONE_GETATTR, zoneid,
1401 	    S10_EMUL_VERSION_NUM, &emul_vers, sizeof (emul_vers))) != 0 ||
1402 	    emul_vers != 0) {
1403 		s10_abort(err, "The zone's patch level is unsupported");
1404 		/*NOTREACHED*/
1405 	}
1406 
1407 	bname = basename(argv[0]);
1408 
1409 	/*
1410 	 * In general we want the S10 commands that are zone-aware to continue
1411 	 * to behave as they normally do within a zone.  Since these commands
1412 	 * are zone-aware, they should continue to "do the right thing".
1413 	 * However, some zone-aware commands aren't going to work the way
1414 	 * we expect them to inside the branded zone.  In particular, the pkg
1415 	 * and patch commands will not properly manage all pkgs/patches
1416 	 * unless the commands think they are running in the global zone.  For
1417 	 * these commands we want to emulate the global zone.
1418 	 *
1419 	 * We don't do any emulation for pkgcond since it is typically used
1420 	 * in pkg/patch postinstall scripts and we want those scripts to do
1421 	 * the right thing inside a zone.
1422 	 *
1423 	 * One issue is the handling of hollow pkgs.  Since the pkgs are
1424 	 * hollow, they won't use pkgcond in their postinstall scripts.  These
1425 	 * pkgs typically are installing drivers so we handle that by
1426 	 * replacing add_drv and rem_drv in the s10_boot script.
1427 	 */
1428 	if (strcmp("pkgadd", bname) == 0 || strcmp("pkgrm", bname) == 0 ||
1429 	    strcmp("patchadd", bname) == 0 || strcmp("patchrm", bname) == 0)
1430 		emul_global_zone = B_TRUE;
1431 
1432 	/*
1433 	 * Register our syscall emulation table with the kernel.
1434 	 * Note that we don't have to do invoke (syscall_number + 1024)
1435 	 * until we've actually establised a syscall emulation callback
1436 	 * handler address, which is what we're doing with this brand
1437 	 * syscall.
1438 	 */
1439 	reg.sbr_version = S10_VERSION;
1440 	reg.sbr_handler = (caddr_t)s10_handler;
1441 	if ((err = __systemcall(&rval, SYS_brand, B_REGISTER, &reg)) != 0) {
1442 		s10_abort(err, "Failed to brand current process");
1443 		/*NOTREACHED*/
1444 	}
1445 
1446 	/* Get data about the executable we're running from the kernel. */
1447 	if ((err = __systemcall(&rval, SYS_brand + 1024,
1448 	    B_ELFDATA, (void *)&sed)) != 0) {
1449 		s10_abort(err,
1450 		    "Failed to get required brand ELF data from the kernel");
1451 		/*NOTREACHED*/
1452 	}
1453 
1454 	/*
1455 	 * Find the aux vector on the stack.
1456 	 */
1457 	p = (uintptr_t *)envp;
1458 	while (*p != NULL)
1459 		p++;
1460 
1461 	/*
1462 	 * p is now pointing at the 0 word after the environ pointers.
1463 	 * After that is the aux vectors.
1464 	 *
1465 	 * The aux vectors are currently pointing to the brand emulation
1466 	 * library and associated linker.  We're going to change them to
1467 	 * point to the brand executable and associated linker (or to no
1468 	 * linker for static binaries).  This matches the process data
1469 	 * stored within the kernel and visible from /proc, which was
1470 	 * all setup in s10_elfexec().  We do this so that when a debugger
1471 	 * attaches to the process it sees the process as a normal solaris
1472 	 * process, this brand emulation library and everything on it's
1473 	 * link map will not be visible, unless our librtld_db plugin
1474 	 * is used.  Note that this is very different from how Linux
1475 	 * branded processes are implemented within lx branded zones.
1476 	 * In that situation, the primary linkmap of the process is the
1477 	 * brand emulation libraries linkmap, not the Linux applications
1478 	 * linkmap.
1479 	 *
1480 	 * We also need to clear the AF_SUN_NOPLM flag from the AT_SUN_AUXFLAGS
1481 	 * aux vector.  This flag told our linker that we don't have a
1482 	 * primary link map.  Now that our linker is done initializing, we
1483 	 * want to clear this flag before we transfer control to the
1484 	 * applications copy of the linker, since we want that linker to have
1485 	 * a primary link map which will be the link map for the application
1486 	 * we're running.
1487 	 */
1488 	p++;
1489 	for (ap = (auxv_t *)p; ap->a_type != AT_NULL; ap++) {
1490 		switch (ap->a_type) {
1491 			case AT_BASE:
1492 				/* Hide AT_BASE if static binary */
1493 				if (sed.sed_base == NULL) {
1494 					ap->a_type = AT_IGNORE;
1495 					ap->a_un.a_val = NULL;
1496 				} else {
1497 					ap->a_un.a_val = sed.sed_base;
1498 				}
1499 				break;
1500 			case AT_ENTRY:
1501 				ap->a_un.a_val = sed.sed_entry;
1502 				break;
1503 			case AT_PHDR:
1504 				ap->a_un.a_val = sed.sed_phdr;
1505 				break;
1506 			case AT_PHENT:
1507 				ap->a_un.a_val = sed.sed_phent;
1508 				break;
1509 			case AT_PHNUM:
1510 				ap->a_un.a_val = sed.sed_phnum;
1511 				break;
1512 			case AT_SUN_AUXFLAGS:
1513 				ap->a_un.a_val &= ~AF_SUN_NOPLM;
1514 				break;
1515 			case AT_SUN_EMULATOR:
1516 				/*
1517 				 * ld.so.1 inspects AT_SUN_EMULATOR to see if
1518 				 * if it is the linker for the brand emulation
1519 				 * library.  Hide AT_SUN_EMULATOR, as the
1520 				 * linker we are about to jump to is the linker
1521 				 * for the binary.
1522 				 */
1523 				ap->a_type = AT_IGNORE;
1524 				ap->a_un.a_val = NULL;
1525 				break;
1526 			case AT_SUN_LDDATA:
1527 				/* Hide AT_SUN_LDDATA if static binary */
1528 				if (sed.sed_lddata == NULL) {
1529 					ap->a_type = AT_IGNORE;
1530 					ap->a_un.a_val = NULL;
1531 				} else {
1532 					ap->a_un.a_val = sed.sed_lddata;
1533 				}
1534 				break;
1535 			default:
1536 				break;
1537 		}
1538 	}
1539 
1540 	s10_runexe(argv, sed.sed_ldentry);
1541 	/*NOTREACHED*/
1542 	s10_abort(0, "s10_runexe() returned");
1543 	return (-1);
1544 }
1545 
1546 /*
1547  * This table must have at least NSYSCALL entries in it.
1548  *
1549  * The second parameter of each entry in the s10_sysent_table
1550  * contains the number of parameters and flags that describe the
1551  * syscall return value encoding.  See the block comments at the
1552  * top of this file for more information about the syscall return
1553  * value flags and when they should be used.
1554  */
1555 s10_sysent_table_t s10_sysent_table[] = {
1556 #if defined(__sparc) && !defined(__sparcv9)
1557 	EMULATE(s10_indir, 9 | RV_64RVAL),	/*  0 */
1558 #else /* !__sparc || __sparcv9 */
1559 	NOSYS,					/*  0 */
1560 #endif /* !__sparc || __sparcv9 */
1561 	NOSYS,					/*   1 */
1562 	NOSYS,					/*   2 */
1563 	NOSYS,					/*   3 */
1564 	NOSYS,					/*   4 */
1565 	NOSYS,					/*   5 */
1566 	NOSYS,					/*   6 */
1567 	NOSYS,					/*   7 */
1568 	NOSYS,					/*   8 */
1569 	NOSYS,					/*   9 */
1570 	NOSYS,					/*  10 */
1571 	EMULATE(s10_exec, 2 | RV_DEFAULT),	/*  11 */
1572 	NOSYS,					/*  12 */
1573 	NOSYS,					/*  13 */
1574 	NOSYS,					/*  14 */
1575 	NOSYS,					/*  15 */
1576 	NOSYS,					/*  16 */
1577 	NOSYS,					/*  17 */
1578 	NOSYS,					/*  18 */
1579 	NOSYS,					/*  19 */
1580 	NOSYS,					/*  20 */
1581 	NOSYS,					/*  21 */
1582 	NOSYS,					/*  22 */
1583 	NOSYS,					/*  23 */
1584 	NOSYS,					/*  24 */
1585 	NOSYS,					/*  25 */
1586 	NOSYS,					/*  26 */
1587 	NOSYS,					/*  27 */
1588 	NOSYS,					/*  28 */
1589 	NOSYS,					/*  29 */
1590 	NOSYS,					/*  30 */
1591 	NOSYS,					/*  31 */
1592 	NOSYS,					/*  32 */
1593 	NOSYS,					/*  33 */
1594 	NOSYS,					/*  34 */
1595 	NOSYS,					/*  35 */
1596 	NOSYS,					/*  36 */
1597 	NOSYS,					/*  37 */
1598 	NOSYS,					/*  38 */
1599 	NOSYS,					/*  39 */
1600 	NOSYS,					/*  40 */
1601 	NOSYS,					/*  41 */
1602 	NOSYS,					/*  42 */
1603 	NOSYS,					/*  43 */
1604 	NOSYS,					/*  44 */
1605 	NOSYS,					/*  45 */
1606 	NOSYS,					/*  46 */
1607 	NOSYS,					/*  47 */
1608 	NOSYS,					/*  48 */
1609 	NOSYS,					/*  49 */
1610 	NOSYS,					/*  50 */
1611 	NOSYS,					/*  51 */
1612 	NOSYS,					/*  52 */
1613 	NOSYS,					/*  53 */
1614 	EMULATE(s10_ioctl, 3 | RV_DEFAULT),	/*  54 */
1615 	NOSYS,					/*  55 */
1616 	NOSYS,					/*  56 */
1617 	NOSYS,					/*  57 */
1618 	NOSYS,					/*  58 */
1619 	EMULATE(s10_execve, 3 | RV_DEFAULT),	/*  59 */
1620 	NOSYS,					/*  60 */
1621 	NOSYS,					/*  61 */
1622 	NOSYS,					/*  62 */
1623 	NOSYS,					/*  63 */
1624 	NOSYS,					/*  64 */
1625 	NOSYS,					/*  65 */
1626 	NOSYS,					/*  66 */
1627 	NOSYS,					/*  67 */
1628 	NOSYS,					/*  68 */
1629 	NOSYS,					/*  69 */
1630 	NOSYS,					/*  70 */
1631 	EMULATE(s10_acctctl, 3 | RV_DEFAULT),	/*  71 */
1632 	NOSYS,					/*  72 */
1633 	NOSYS,					/*  73 */
1634 	NOSYS,					/*  74 */
1635 	EMULATE(s10_issetugid, 0 | RV_DEFAULT),	/*  75 */
1636 	NOSYS,					/*  76 */
1637 	NOSYS,					/*  77 */
1638 	NOSYS,					/*  78 */
1639 	NOSYS,					/*  79 */
1640 	NOSYS,					/*  80 */
1641 	NOSYS,					/*  81 */
1642 	NOSYS,					/*  82 */
1643 	NOSYS,					/*  83 */
1644 	NOSYS,					/*  84 */
1645 	NOSYS,					/*  85 */
1646 	NOSYS,					/*  86 */
1647 	NOSYS,					/*  87 */
1648 	NOSYS,					/*  88 */
1649 	NOSYS,					/*  89 */
1650 	NOSYS,					/*  90 */
1651 	NOSYS,					/*  91 */
1652 	NOSYS,					/*  92 */
1653 	NOSYS,					/*  93 */
1654 	NOSYS,					/*  94 */
1655 	NOSYS,					/*  95 */
1656 	NOSYS,					/*  96 */
1657 	NOSYS,					/*  97 */
1658 	NOSYS,					/*  98 */
1659 	NOSYS,					/*  99 */
1660 	NOSYS,					/* 100 */
1661 	NOSYS,					/* 101 */
1662 	NOSYS,					/* 102 */
1663 	NOSYS,					/* 103 */
1664 	NOSYS,					/* 104 */
1665 	NOSYS,					/* 105 */
1666 	NOSYS,					/* 106 */
1667 	NOSYS,					/* 107 */
1668 	NOSYS,					/* 108 */
1669 	NOSYS,					/* 109 */
1670 	NOSYS,					/* 110 */
1671 	NOSYS,					/* 111 */
1672 	NOSYS,					/* 112 */
1673 	NOSYS,					/* 113 */
1674 	NOSYS,					/* 114 */
1675 	NOSYS,					/* 115 */
1676 	NOSYS,					/* 116 */
1677 	NOSYS,					/* 117 */
1678 	NOSYS,					/* 118 */
1679 	NOSYS,					/* 119 */
1680 	NOSYS,					/* 120 */
1681 	NOSYS,					/* 121 */
1682 	NOSYS,					/* 122 */
1683 	NOSYS,					/* 123 */
1684 	NOSYS,					/* 124 */
1685 	NOSYS,					/* 125 */
1686 	NOSYS,					/* 126 */
1687 	NOSYS,					/* 127 */
1688 	NOSYS,					/* 128 */
1689 	NOSYS,					/* 129 */
1690 	NOSYS,					/* 130 */
1691 	NOSYS,					/* 131 */
1692 	NOSYS,					/* 132 */
1693 	NOSYS,					/* 133 */
1694 	NOSYS,					/* 134 */
1695 	EMULATE(s10_uname, 1 | RV_DEFAULT),	/* 135 */
1696 	NOSYS,					/* 136 */
1697 	NOSYS,					/* 137 */
1698 	NOSYS,					/* 138 */
1699 	EMULATE(s10_sysinfo, 3 | RV_DEFAULT),	/* 139 */
1700 	NOSYS,					/* 140 */
1701 	NOSYS,					/* 141 */
1702 	NOSYS,					/* 142 */
1703 	NOSYS,					/* 143 */
1704 	NOSYS,					/* 144 */
1705 	NOSYS,					/* 145 */
1706 	NOSYS,					/* 146 */
1707 	NOSYS,					/* 147 */
1708 	NOSYS,					/* 148 */
1709 	NOSYS,					/* 149 */
1710 	NOSYS,					/* 150 */
1711 	NOSYS,					/* 151 */
1712 	NOSYS,					/* 152 */
1713 	NOSYS,					/* 153 */
1714 	NOSYS,					/* 154 */
1715 	NOSYS,					/* 155 */
1716 	NOSYS,					/* 156 */
1717 	NOSYS,					/* 157 */
1718 	NOSYS,					/* 158 */
1719 #ifdef	__x86
1720 	EMULATE(s10_lwp_create, 3 | RV_DEFAULT), /* 159 */
1721 #else	/* !__x86 */
1722 	NOSYS,					/* 159 */
1723 #endif	/* !__x86 */
1724 	NOSYS,					/* 160 */
1725 	NOSYS,					/* 161 */
1726 	NOSYS,					/* 162 */
1727 	NOSYS,					/* 163 */
1728 	NOSYS,					/* 164 */
1729 	NOSYS,					/* 165 */
1730 #ifdef	__x86
1731 	EMULATE(s10_lwp_private, 3 | RV_DEFAULT), /* 166 */
1732 #else	/* !__x86 */
1733 	NOSYS,					/* 166 */
1734 #endif	/* !__x86 */
1735 	NOSYS,					/* 167 */
1736 	NOSYS,					/* 168 */
1737 	NOSYS,					/* 169 */
1738 	NOSYS,					/* 170 */
1739 	NOSYS,					/* 171 */
1740 	NOSYS,					/* 172 */
1741 	NOSYS,					/* 173 */
1742 	EMULATE(s10_pwrite, 4 | RV_DEFAULT),	/* 174 */
1743 	NOSYS,					/* 175 */
1744 	NOSYS,					/* 176 */
1745 	NOSYS,					/* 177 */
1746 	NOSYS,					/* 178 */
1747 	NOSYS,					/* 179 */
1748 	NOSYS,					/* 180 */
1749 	NOSYS,					/* 181 */
1750 	NOSYS,					/* 182 */
1751 	NOSYS,					/* 183 */
1752 	NOSYS,					/* 184 */
1753 	NOSYS,					/* 185 */
1754 	EMULATE(s10_auditsys, 4 | RV_64RVAL),	/* 186 */
1755 	NOSYS,					/* 187 */
1756 	NOSYS,					/* 188 */
1757 	NOSYS,					/* 189 */
1758 	EMULATE(s10_sigqueue, 4 | RV_DEFAULT),	/* 190 */
1759 	NOSYS,					/* 191 */
1760 	NOSYS,					/* 192 */
1761 	NOSYS,					/* 193 */
1762 	NOSYS,					/* 194 */
1763 	NOSYS,					/* 195 */
1764 	NOSYS,					/* 196 */
1765 	NOSYS,					/* 197 */
1766 	NOSYS,					/* 198 */
1767 	NOSYS,					/* 199 */
1768 	NOSYS,					/* 200 */
1769 	NOSYS,					/* 201 */
1770 	NOSYS,					/* 202 */
1771 	NOSYS,					/* 203 */
1772 	NOSYS,					/* 204 */
1773 	NOSYS,					/* 205 */
1774 	NOSYS,					/* 206 */
1775 	NOSYS,					/* 207 */
1776 	NOSYS,					/* 208 */
1777 	NOSYS,					/* 209 */
1778 	NOSYS,					/* 210 */
1779 	NOSYS,					/* 211 */
1780 	NOSYS,					/* 212 */
1781 	NOSYS,					/* 213 */
1782 	NOSYS,					/* 214 */
1783 	NOSYS,					/* 215 */
1784 	NOSYS,					/* 216 */
1785 	NOSYS,					/* 217 */
1786 	NOSYS,					/* 218 */
1787 	NOSYS,					/* 219 */
1788 	NOSYS,					/* 220 */
1789 	NOSYS,					/* 221 */
1790 	NOSYS,					/* 222 */
1791 #ifdef	_LP64
1792 	NOSYS,					/* 223 */
1793 #else	/* !_LP64 */
1794 	EMULATE(s10_pwrite64, 5 | RV_DEFAULT),	/* 223 */
1795 #endif	/* !_LP64 */
1796 	NOSYS,					/* 224 */
1797 	NOSYS,					/* 225 */
1798 	NOSYS,					/* 226 */
1799 	EMULATE(s10_zone, 5 | RV_DEFAULT),	/* 227 */
1800 	NOSYS,					/* 228 */
1801 	NOSYS,					/* 229 */
1802 	NOSYS,					/* 230 */
1803 	NOSYS,					/* 231 */
1804 	NOSYS,					/* 232 */
1805 	NOSYS,					/* 233 */
1806 	NOSYS,					/* 234 */
1807 	NOSYS,					/* 235 */
1808 	NOSYS,					/* 236 */
1809 	NOSYS,					/* 237 */
1810 	NOSYS,					/* 238 */
1811 	NOSYS,					/* 239 */
1812 	NOSYS,					/* 240 */
1813 	NOSYS,					/* 241 */
1814 	NOSYS,					/* 242 */
1815 	NOSYS,					/* 243 */
1816 	NOSYS,					/* 244 */
1817 	NOSYS,					/* 245 */
1818 	NOSYS,					/* 246 */
1819 	NOSYS,					/* 247 */
1820 	NOSYS,					/* 248 */
1821 	NOSYS,					/* 249 */
1822 	NOSYS,					/* 250 */
1823 	NOSYS,					/* 251 */
1824 	NOSYS,					/* 252 */
1825 	NOSYS,					/* 253 */
1826 	NOSYS,					/* 254 */
1827 	NOSYS					/* 255 */
1828 };
1829