1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/xpv_user.h>
27 
28 #include <sys/types.h>
29 #include <sys/file.h>
30 #include <sys/errno.h>
31 #include <sys/open.h>
32 #include <sys/cred.h>
33 #include <sys/conf.h>
34 #include <sys/stat.h>
35 #include <sys/modctl.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/vmsystm.h>
39 #include <sys/hypervisor.h>
40 #include <sys/xen_errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/sdt.h>
43 
44 #include <xen/sys/privcmd.h>
45 #include <sys/privcmd_impl.h>
46 
47 typedef struct import_export {
48 	void *			ie_uaddr;
49 	void *			ie_kaddr;
50 	size_t			ie_size;
51 	uint32_t		ie_flags;
52 } import_export_t;
53 
54 static import_export_t null_ie = {NULL, NULL, 0, 0};
55 
56 #define	IE_IMPORT	0x0001		/* Data needs to be copied in */
57 #define	IE_EXPORT	0x0002		/* Data needs to be copied out */
58 #define	IE_FREE		0x0004
59 #define	IE_IMPEXP	(IE_IMPORT | IE_EXPORT)
60 
61 static void *
62 uaddr_from_handle(void *field)
63 {
64 	struct { void *p; } *hdl = field;
65 	void *ptr;
66 
67 	/*LINTED: constant in conditional context*/
68 	get_xen_guest_handle(ptr, (*hdl));
69 	return (ptr);
70 }
71 
72 
73 /*
74  * Import a buffer from user-space.  If the caller provides a kernel
75  * address, we import to that address.  If not, we kmem_alloc() the space
76  * ourselves.
77  */
78 static int
79 import_buffer(import_export_t *iep, void *uaddr, void *kaddr, size_t size,
80     uint32_t flags)
81 {
82 	iep->ie_uaddr = uaddr;
83 	iep->ie_size = size;
84 	iep->ie_flags = flags & IE_EXPORT;
85 
86 	if (size == 0 || uaddr == NULL) {
87 		*iep = null_ie;
88 		return (0);
89 	}
90 
91 	if (kaddr == NULL) {
92 		iep->ie_kaddr = kmem_alloc(size, KM_SLEEP);
93 		iep->ie_flags |= IE_FREE;
94 	} else {
95 		iep->ie_kaddr = kaddr;
96 		iep->ie_flags &= ~IE_FREE;
97 	}
98 
99 	if ((flags & IE_IMPORT) &&
100 	    (ddi_copyin(uaddr, iep->ie_kaddr, size, 0) != 0)) {
101 		if (iep->ie_flags & IE_FREE) {
102 			kmem_free(iep->ie_kaddr, iep->ie_size);
103 			iep->ie_kaddr = NULL;
104 			iep->ie_flags = 0;
105 		}
106 		return (-X_EFAULT);
107 	}
108 
109 	return (0);
110 }
111 
112 static void
113 export_buffer(import_export_t *iep, int *error)
114 {
115 	int copy_err = 0;
116 
117 	if (iep->ie_size == 0 || iep->ie_uaddr == NULL)
118 		return;
119 
120 	/*
121 	 * If the buffer was marked for export initially, and if the
122 	 * hypercall completed successfully, resync the user-space buffer
123 	 * with our in-kernel buffer.
124 	 */
125 	if ((iep->ie_flags & IE_EXPORT) && (*error >= 0) &&
126 	    (ddi_copyout(iep->ie_kaddr, iep->ie_uaddr, iep->ie_size, 0) != 0))
127 		copy_err = -X_EFAULT;
128 	if (iep->ie_flags & IE_FREE) {
129 		kmem_free(iep->ie_kaddr, iep->ie_size);
130 		iep->ie_kaddr = NULL;
131 		iep->ie_flags = 0;
132 	}
133 
134 	if (copy_err != 0 && *error >= 0)
135 		*error = copy_err;
136 }
137 
138 /*
139  * Xen 'op' structures often include pointers disguised as 'handles', which
140  * refer to addresses in user space.  This routine copies a buffer
141  * associated with an embedded pointer into kernel space, and replaces the
142  * pointer to userspace with a pointer to the new kernel buffer.
143  *
144  * Note: if Xen ever redefines the structure of a 'handle', this routine
145  * (specifically the definition of 'hdl') will need to be updated.
146  */
147 static int
148 import_handle(import_export_t *iep, void *field, size_t size, int flags)
149 {
150 	struct { void *p; } *hdl = field;
151 	void *ptr;
152 	int err;
153 
154 	ptr = uaddr_from_handle(field);
155 	err = import_buffer(iep, ptr, NULL, size, (flags));
156 	if (err == 0) {
157 		/*LINTED: constant in conditional context*/
158 		set_xen_guest_handle((*hdl), (void *)((iep)->ie_kaddr));
159 	}
160 	return (err);
161 }
162 
163 static int
164 privcmd_HYPERVISOR_mmu_update(mmu_update_t *ureq, int count, int *scount,
165     domid_t domid)
166 {
167 	mmu_update_t *kreq, single_kreq;
168 	import_export_t cnt_ie, req_ie;
169 	int error, kscount, bytes;
170 
171 	bytes = count * sizeof (*kreq);
172 	kreq = (count == 1) ? &single_kreq : kmem_alloc(bytes, KM_SLEEP);
173 
174 	error = import_buffer(&cnt_ie, scount, &kscount, sizeof (kscount),
175 	    IE_IMPEXP);
176 	if (error != 0)
177 		req_ie = null_ie;
178 	else
179 		error = import_buffer(&req_ie, ureq, kreq, bytes, IE_IMPEXP);
180 
181 	DTRACE_XPV3(mmu__update__start, int, domid, int, count, mmu_update_t *,
182 	    ((error == -X_EFAULT) ? ureq : kreq));
183 
184 	if (error == 0)
185 		error = HYPERVISOR_mmu_update(kreq, count, &kscount, domid);
186 	export_buffer(&cnt_ie, &error);
187 	export_buffer(&req_ie, &error);
188 	if (count != 1)
189 		kmem_free(kreq, bytes);
190 
191 	DTRACE_XPV1(mmu__update__end, int, error);
192 	return (error);
193 }
194 
195 static int
196 privcmd_HYPERVISOR_domctl(xen_domctl_t *opp)
197 {
198 	xen_domctl_t op;
199 	import_export_t op_ie, sub_ie;
200 	int error = 0;
201 
202 	if ((error = import_buffer(&op_ie, opp, &op, sizeof (op),
203 	    IE_IMPEXP)) != 0)
204 		return (error);
205 
206 	sub_ie = null_ie;
207 
208 	/*
209 	 * Check this first because our wrapper will forcibly overwrite it.
210 	 */
211 	if (op.interface_version != XEN_DOMCTL_INTERFACE_VERSION) {
212 #ifdef DEBUG
213 		printf("domctl vers mismatch (cmd %d, found 0x%x, need 0x%x\n",
214 		    op.cmd, op.interface_version, XEN_DOMCTL_INTERFACE_VERSION);
215 #endif
216 		error = -X_EACCES;
217 		export_buffer(&op_ie, &error);
218 		return (error);
219 	}
220 
221 	/*
222 	 * Now handle any domctl ops with embedded pointers elsewhere
223 	 * in the user address space that also need to be tacked down
224 	 * while the hypervisor futzes with them.
225 	 */
226 	switch (op.cmd) {
227 	case XEN_DOMCTL_createdomain:
228 		DTRACE_XPV1(dom__create__start, xen_domctl_t *,
229 		    &op.u.createdomain);
230 		break;
231 
232 	case XEN_DOMCTL_destroydomain:
233 		DTRACE_XPV1(dom__destroy__start, domid_t, op.domain);
234 		break;
235 
236 	case XEN_DOMCTL_pausedomain:
237 		DTRACE_XPV1(dom__pause__start, domid_t, op.domain);
238 		break;
239 
240 	case XEN_DOMCTL_unpausedomain:
241 		DTRACE_XPV1(dom__unpause__start, domid_t, op.domain);
242 		break;
243 
244 	case XEN_DOMCTL_getmemlist: {
245 		error = import_handle(&sub_ie, &op.u.getmemlist.buffer,
246 		    op.u.getmemlist.max_pfns * sizeof (xen_pfn_t), IE_EXPORT);
247 		break;
248 	}
249 
250 	case XEN_DOMCTL_getpageframeinfo2: {
251 		error = import_handle(&sub_ie, &op.u.getpageframeinfo2.array,
252 		    op.u.getpageframeinfo2.num * sizeof (ulong_t), IE_IMPEXP);
253 		break;
254 	}
255 
256 	case XEN_DOMCTL_shadow_op: {
257 		size_t size;
258 
259 		size = roundup(howmany(op.u.shadow_op.pages, NBBY),
260 		    sizeof (ulong_t));
261 		error = import_handle(&sub_ie,
262 		    &op.u.shadow_op.dirty_bitmap, size, IE_IMPEXP);
263 		break;
264 	}
265 
266 	case XEN_DOMCTL_setvcpucontext: {
267 		vcpu_guest_context_t *taddr;
268 		error = import_handle(&sub_ie, &op.u.vcpucontext.ctxt,
269 		    sizeof (vcpu_guest_context_t), IE_IMPORT);
270 		if (error == -X_EFAULT)
271 			/*LINTED: constant in conditional context*/
272 			get_xen_guest_handle_u(taddr, op.u.vcpucontext.ctxt);
273 		else
274 			taddr = sub_ie.ie_kaddr;
275 		DTRACE_XPV2(setvcpucontext__start, domid_t, op.domain,
276 		    vcpu_guest_context_t *, taddr);
277 		break;
278 	}
279 
280 	case XEN_DOMCTL_getvcpucontext: {
281 		error = import_handle(&sub_ie, &op.u.vcpucontext.ctxt,
282 		    sizeof (vcpu_guest_context_t), IE_EXPORT);
283 		break;
284 	}
285 
286 
287 	case XEN_DOMCTL_sethvmcontext: {
288 		error = import_handle(&sub_ie, &op.u.hvmcontext.buffer,
289 		    op.u.hvmcontext.size, IE_IMPORT);
290 		break;
291 	}
292 
293 	case XEN_DOMCTL_gethvmcontext: {
294 #if !defined(__GNUC__) && defined(__i386__)
295 		if (op.u.hvmcontext.buffer.u.p != NULL)
296 #else
297 		if (op.u.hvmcontext.buffer.p != NULL)
298 #endif
299 			error = import_handle(&sub_ie, &op.u.hvmcontext.buffer,
300 			    op.u.hvmcontext.size, IE_EXPORT);
301 		break;
302 	}
303 
304 	case XEN_DOMCTL_getdomaininfo:
305 	case XEN_DOMCTL_getpageframeinfo:
306 	case XEN_DOMCTL_max_mem:
307 	case XEN_DOMCTL_resumedomain:
308 	case XEN_DOMCTL_getvcpuinfo:
309 	case XEN_DOMCTL_setvcpuaffinity:
310 	case XEN_DOMCTL_getvcpuaffinity:
311 	case XEN_DOMCTL_max_vcpus:
312 	case XEN_DOMCTL_scheduler_op:
313 	case XEN_DOMCTL_setdomainhandle:
314 	case XEN_DOMCTL_setdebugging:
315 	case XEN_DOMCTL_irq_permission:
316 	case XEN_DOMCTL_iomem_permission:
317 	case XEN_DOMCTL_ioport_permission:
318 	case XEN_DOMCTL_hypercall_init:
319 	case XEN_DOMCTL_arch_setup:
320 	case XEN_DOMCTL_settimeoffset:
321 	case XEN_DOMCTL_real_mode_area:
322 	case XEN_DOMCTL_sendtrigger:
323 	case XEN_DOMCTL_assign_device:
324 	case XEN_DOMCTL_bind_pt_irq:
325 	case XEN_DOMCTL_get_address_size:
326 	case XEN_DOMCTL_set_address_size:
327 	case XEN_DOMCTL_get_ext_vcpucontext:
328 	case XEN_DOMCTL_set_ext_vcpucontext:
329 	case XEN_DOMCTL_set_opt_feature:
330 	case XEN_DOMCTL_memory_mapping:
331 	case XEN_DOMCTL_ioport_mapping:
332 	case XEN_DOMCTL_pin_mem_cacheattr:
333 	case XEN_DOMCTL_test_assign_device:
334 	case XEN_DOMCTL_set_target:
335 	case XEN_DOMCTL_deassign_device:
336 	case XEN_DOMCTL_set_cpuid:
337 	case XEN_DOMCTL_get_device_group:
338 	case XEN_DOMCTL_get_machine_address_size:
339 	case XEN_DOMCTL_set_machine_address_size:
340 	case XEN_DOMCTL_suppress_spurious_page_faults:
341 		break;
342 
343 	default:
344 #ifdef DEBUG
345 		printf("unrecognized HYPERVISOR_domctl %d\n", op.cmd);
346 #endif
347 		error = -X_EINVAL;
348 	}
349 
350 	if (error == 0)
351 		error = HYPERVISOR_domctl(&op);
352 
353 	export_buffer(&op_ie, &error);
354 	export_buffer(&sub_ie, &error);
355 
356 	switch (op.cmd) {
357 	case XEN_DOMCTL_createdomain:
358 		DTRACE_XPV1(dom__create__end, int, error);
359 		break;
360 	case XEN_DOMCTL_destroydomain:
361 		DTRACE_XPV1(dom__destroy__end, int, error);
362 		break;
363 	case XEN_DOMCTL_pausedomain:
364 		DTRACE_XPV1(dom__pause__end, int, error);
365 		break;
366 	case XEN_DOMCTL_unpausedomain:
367 		DTRACE_XPV1(dom__unpause__end, int, error);
368 		break;
369 	case XEN_DOMCTL_setvcpucontext:
370 		DTRACE_XPV1(setvcpucontext__end, int, error);
371 		break;
372 	default:
373 		;
374 	}
375 
376 	return (error);
377 }
378 
379 static int
380 privcmd_HYPERVISOR_sysctl(xen_sysctl_t *opp)
381 {
382 	xen_sysctl_t op, dop;
383 	import_export_t op_ie, sub_ie, sub2_ie;
384 	int error = 0;
385 
386 	if (import_buffer(&op_ie, opp, &op, sizeof (op), IE_IMPEXP) != 0)
387 		return (-X_EFAULT);
388 
389 	sub_ie = null_ie;
390 	sub2_ie = null_ie;
391 
392 	/*
393 	 * Check this first because our wrapper will forcibly overwrite it.
394 	 */
395 	if (op.interface_version != XEN_SYSCTL_INTERFACE_VERSION) {
396 		error = -X_EACCES;
397 		export_buffer(&op_ie, &error);
398 		return (error);
399 	}
400 
401 	switch (op.cmd) {
402 	case XEN_SYSCTL_readconsole: {
403 		error = import_handle(&sub_ie, &op.u.readconsole.buffer,
404 		    op.u.readconsole.count, IE_EXPORT);
405 		break;
406 	}
407 
408 	case XEN_SYSCTL_debug_keys: {
409 		error = import_handle(&sub_ie, &op.u.debug_keys.keys,
410 		    op.u.debug_keys.nr_keys, IE_IMPORT);
411 		break;
412 	}
413 
414 	case XEN_SYSCTL_tbuf_op:
415 	case XEN_SYSCTL_physinfo: {
416 		if (uaddr_from_handle(&op.u.physinfo.cpu_to_node) != NULL &&
417 		    op.u.physinfo.max_cpu_id != 0) {
418 			error = import_handle(&sub_ie,
419 			    &op.u.physinfo.cpu_to_node,
420 			    op.u.physinfo.max_cpu_id * sizeof (uint32_t),
421 			    IE_EXPORT);
422 		}
423 		break;
424 	}
425 	case XEN_SYSCTL_sched_id:
426 	case XEN_SYSCTL_availheap:
427 	case XEN_SYSCTL_cpu_hotplug:
428 		break;
429 	case XEN_SYSCTL_get_pmstat: {
430 		unsigned int maxs;
431 
432 		switch (op.u.get_pmstat.type) {
433 		case PMSTAT_get_pxstat:
434 			/*
435 			 * This interface is broken. Xen always copies out
436 			 * all the state information, and the interface
437 			 * does not specify how much space the caller has
438 			 * reserved. So, the only thing to do is just mirror
439 			 * the hypervisor and libxc behavior, and use the
440 			 * maximum amount of data.
441 			 */
442 			dop.cmd = XEN_SYSCTL_get_pmstat;
443 			dop.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
444 			dop.u.get_pmstat.cpuid = op.u.get_pmstat.cpuid;
445 			dop.u.get_pmstat.type = PMSTAT_get_max_px;
446 			error = HYPERVISOR_sysctl(&dop);
447 			if (error != 0)
448 				break;
449 
450 			maxs = dop.u.get_pmstat.u.getpx.total;
451 			if (maxs == 0) {
452 				error = -X_EINVAL;
453 				break;
454 			}
455 
456 			error = import_handle(&sub_ie,
457 			    &op.u.get_pmstat.u.getpx.trans_pt,
458 			    maxs * maxs * sizeof (uint64_t), IE_EXPORT);
459 			if (error != 0)
460 				break;
461 
462 			error = import_handle(&sub2_ie,
463 			    &op.u.get_pmstat.u.getpx.pt,
464 			    maxs * sizeof (pm_px_val_t), IE_EXPORT);
465 			break;
466 		case PMSTAT_get_cxstat:
467 			/* See above */
468 			dop.cmd = XEN_SYSCTL_get_pmstat;
469 			dop.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
470 			dop.u.get_pmstat.cpuid = op.u.get_pmstat.cpuid;
471 			dop.u.get_pmstat.type = PMSTAT_get_max_cx;
472 			error = HYPERVISOR_sysctl(&dop);
473 			if (error != 0)
474 				break;
475 
476 			maxs = dop.u.get_pmstat.u.getcx.nr;
477 			if (maxs == 0) {
478 				error = -X_EINVAL;
479 				break;
480 			}
481 
482 			error = import_handle(&sub_ie,
483 			    &op.u.get_pmstat.u.getcx.triggers,
484 			    maxs * sizeof (uint64_t), IE_EXPORT);
485 			if (error != 0)
486 				break;
487 			error = import_handle(&sub2_ie,
488 			    &op.u.get_pmstat.u.getcx.residencies,
489 			    maxs * sizeof (uint64_t), IE_EXPORT);
490 			break;
491 
492 		case PMSTAT_get_max_px:
493 		case PMSTAT_reset_pxstat:
494 		case PMSTAT_get_max_cx:
495 		case PMSTAT_reset_cxstat:
496 			break;
497 		default:
498 			error = -X_EINVAL;
499 			break;
500 		}
501 		break;
502 	}
503 
504 	case XEN_SYSCTL_perfc_op: {
505 		xen_sysctl_perfc_desc_t *scdp;
506 		/*
507 		 * If 'desc' is NULL, then the caller is asking for
508 		 * the number of counters.  If 'desc' is non-NULL,
509 		 * then we need to know how many counters there are
510 		 * before wiring down the output buffer appropriately.
511 		 */
512 		/*LINTED: constant in conditional context*/
513 		get_xen_guest_handle_u(scdp, op.u.perfc_op.desc);
514 		if (scdp != NULL) {
515 			static int numcounters = -1;
516 			static int numvals = -1;
517 
518 			if (numcounters == -1) {
519 				dop.cmd = XEN_SYSCTL_perfc_op;
520 				dop.interface_version =
521 				    XEN_SYSCTL_INTERFACE_VERSION;
522 				dop.u.perfc_op.cmd = XEN_SYSCTL_PERFCOP_query;
523 				/*LINTED: constant in conditional context*/
524 				set_xen_guest_handle_u(dop.u.perfc_op.desc,
525 				    NULL);
526 				/*LINTED: constant in conditional context*/
527 				set_xen_guest_handle_u(dop.u.perfc_op.val,
528 				    NULL);
529 
530 				error = HYPERVISOR_sysctl(&dop);
531 				if (error != 0)
532 					break;
533 				numcounters = dop.u.perfc_op.nr_counters;
534 				numvals = dop.u.perfc_op.nr_vals;
535 			}
536 			ASSERT(numcounters != -1);
537 			ASSERT(numvals != -1);
538 			error = import_handle(&sub_ie, &op.u.perfc_op.desc,
539 			    (sizeof (xen_sysctl_perfc_desc_t) * numcounters),
540 			    IE_EXPORT);
541 			error = import_handle(&sub2_ie, &op.u.perfc_op.val,
542 			    (sizeof (xen_sysctl_perfc_val_t) * numvals),
543 			    IE_EXPORT);
544 		}
545 		break;
546 	}
547 
548 	case XEN_SYSCTL_getdomaininfolist: {
549 		error = import_handle(&sub_ie, &op.u.getdomaininfolist.buffer,
550 		    (op.u.getdomaininfolist.max_domains *
551 		    sizeof (xen_domctl_getdomaininfo_t)), IE_EXPORT);
552 		break;
553 	}
554 
555 	case XEN_SYSCTL_getcpuinfo:
556 		error = import_handle(&sub_ie, &op.u.getcpuinfo.info,
557 		    op.u.getcpuinfo.max_cpus *
558 		    sizeof (xen_sysctl_cpuinfo_t), IE_EXPORT);
559 		break;
560 	default:
561 #ifdef DEBUG
562 		printf("unrecognized HYPERVISOR_sysctl %d\n", op.cmd);
563 #endif
564 		error = -X_EINVAL;
565 	}
566 
567 	if (error == 0)
568 		error = HYPERVISOR_sysctl(&op);
569 
570 	export_buffer(&op_ie, &error);
571 	export_buffer(&sub_ie, &error);
572 	export_buffer(&sub2_ie, &error);
573 
574 	return (error);
575 }
576 
577 static int
578 privcmd_HYPERVISOR_platform_op(xen_platform_op_t *opp)
579 {
580 	import_export_t op_ie, sub_ie, sub2_ie;
581 	xen_platform_op_t op;
582 	int error;
583 
584 	if (import_buffer(&op_ie, opp, &op, sizeof (op), IE_IMPEXP) != 0)
585 		return (-X_EFAULT);
586 
587 	sub_ie = null_ie;
588 	sub2_ie = null_ie;
589 
590 	/*
591 	 * Check this first because our wrapper will forcibly overwrite it.
592 	 */
593 	if (op.interface_version != XENPF_INTERFACE_VERSION) {
594 		error = -X_EACCES;
595 		export_buffer(&op_ie, &error);
596 		return (error);
597 	}
598 
599 	/*
600 	 * Now handle any platform ops with embedded pointers elsewhere
601 	 * in the user address space that also need to be tacked down
602 	 * while the hypervisor futzes with them.
603 	 */
604 	switch (op.cmd) {
605 	case XENPF_settime:
606 	case XENPF_add_memtype:
607 	case XENPF_del_memtype:
608 	case XENPF_read_memtype:
609 	case XENPF_platform_quirk:
610 	case XENPF_enter_acpi_sleep:
611 	case XENPF_change_freq:
612 	case XENPF_panic_init:
613 		break;
614 
615 	case XENPF_microcode_update:
616 		error = import_handle(&sub_ie, &op.u.microcode.data,
617 		    op.u.microcode.length, IE_IMPORT);
618 		break;
619 	case XENPF_getidletime:
620 		error = import_handle(&sub_ie, &op.u.getidletime.cpumap_bitmap,
621 		    op.u.getidletime.cpumap_nr_cpus, IE_IMPEXP);
622 		if (error != 0)
623 			break;
624 
625 		error = import_handle(&sub2_ie, &op.u.getidletime.idletime,
626 		    op.u.getidletime.cpumap_nr_cpus * sizeof (uint64_t),
627 		    IE_EXPORT);
628 		break;
629 
630 	case XENPF_set_processor_pminfo: {
631 		size_t s;
632 
633 		switch (op.u.set_pminfo.type) {
634 		case XEN_PM_PX:
635 			s = op.u.set_pminfo.u.perf.state_count *
636 			    sizeof (xen_processor_px_t);
637 			if (op.u.set_pminfo.u.perf.flags & XEN_PX_PSS) {
638 				error = import_handle(&sub_ie,
639 				    &op.u.set_pminfo.u.perf.states, s,
640 				    IE_IMPORT);
641 			}
642 			break;
643 		case XEN_PM_CX:
644 			s = op.u.set_pminfo.u.power.count *
645 			    sizeof (xen_processor_cx_t);
646 			error = import_handle(&sub_ie,
647 			    &op.u.set_pminfo.u.power.states, s, IE_IMPORT);
648 			break;
649 		case XEN_PM_TX:
650 			break;
651 		default:
652 			error = -X_EINVAL;
653 			break;
654 		}
655 		break;
656 	}
657 	case XENPF_firmware_info: {
658 		uint16_t len;
659 		void *uaddr;
660 
661 		switch (op.u.firmware_info.type) {
662 		case XEN_FW_DISK_INFO:
663 			/*
664 			 * Ugh.. another hokey interface. The first 16 bits
665 			 * of the buffer are also used as the (input) length.
666 			 */
667 			uaddr = uaddr_from_handle(
668 			    &op.u.firmware_info.u.disk_info.edd_params);
669 			error = ddi_copyin(uaddr, &len, sizeof (len), 0);
670 			if (error != 0)
671 				break;
672 			error = import_handle(&sub_ie,
673 			    &op.u.firmware_info.u.disk_info.edd_params, len,
674 			    IE_IMPEXP);
675 			break;
676 		case XEN_FW_VBEDDC_INFO:
677 			error = import_handle(&sub_ie,
678 			    &op.u.firmware_info.u.vbeddc_info.edid, 128,
679 			    IE_EXPORT);
680 			break;
681 		case XEN_FW_DISK_MBR_SIGNATURE:
682 		default:
683 			break;
684 		}
685 		break;
686 	}
687 	default:
688 		/* FIXME: see this with non-existed ID 38 ???? */
689 #ifdef DEBUG
690 		printf("unrecognized HYPERVISOR_platform_op %d pid %d\n",
691 		    op.cmd, curthread->t_procp->p_pid);
692 #endif
693 		return (-X_EINVAL);
694 	}
695 
696 	if (error == 0)
697 		error = HYPERVISOR_platform_op(&op);
698 
699 	export_buffer(&op_ie, &error);
700 	export_buffer(&sub_ie, &error);
701 	export_buffer(&sub2_ie, &error);
702 
703 	return (error);
704 }
705 
706 static int
707 privcmd_HYPERVISOR_memory_op(int cmd, void *arg)
708 {
709 	int error = 0;
710 	import_export_t op_ie, sub_ie, gpfn_ie, mfn_ie;
711 	union {
712 		domid_t domid;
713 		struct xen_memory_reservation resv;
714 		struct xen_machphys_mfn_list xmml;
715 		struct xen_add_to_physmap xatp;
716 		struct xen_memory_map mm;
717 		struct xen_foreign_memory_map fmm;
718 		struct xen_pod_target pd;
719 	} op_arg;
720 
721 	op_ie = sub_ie = gpfn_ie = mfn_ie = null_ie;
722 
723 	switch (cmd) {
724 	case XENMEM_increase_reservation:
725 	case XENMEM_decrease_reservation:
726 	case XENMEM_populate_physmap: {
727 		ulong_t *taddr;
728 
729 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.resv),
730 		    IE_IMPEXP) != 0)
731 			return (-X_EFAULT);
732 
733 		error = import_handle(&sub_ie, &op_arg.resv.extent_start,
734 		    (op_arg.resv.nr_extents * sizeof (ulong_t)), IE_IMPEXP);
735 
736 		if (error == -X_EFAULT)
737 			/*LINTED: constant in conditional context*/
738 			get_xen_guest_handle(taddr, op_arg.resv.extent_start);
739 		else
740 			taddr = sub_ie.ie_kaddr;
741 
742 		switch (cmd) {
743 		case XENMEM_increase_reservation:
744 			DTRACE_XPV4(increase__reservation__start,
745 			    domid_t, op_arg.resv.domid,
746 			    ulong_t, op_arg.resv.nr_extents,
747 			    uint_t, op_arg.resv.extent_order,
748 			    ulong_t *, taddr);
749 			break;
750 		case XENMEM_decrease_reservation:
751 			DTRACE_XPV4(decrease__reservation__start,
752 			    domid_t, op_arg.resv.domid,
753 			    ulong_t, op_arg.resv.nr_extents,
754 			    uint_t, op_arg.resv.extent_order,
755 			    ulong_t *, taddr);
756 			break;
757 		case XENMEM_populate_physmap:
758 			DTRACE_XPV3(populate__physmap__start,
759 			    domid_t, op_arg.resv.domid,
760 			    ulong_t, op_arg.resv.nr_extents,
761 			    ulong_t *, taddr);
762 			break;
763 		}
764 
765 		break;
766 	}
767 
768 	case XENMEM_maximum_ram_page:
769 		break;
770 
771 	case XENMEM_current_reservation:
772 	case XENMEM_maximum_reservation:
773 	case XENMEM_maximum_gpfn:
774 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.domid),
775 		    IE_IMPEXP) != 0)
776 			return (-X_EFAULT);
777 		break;
778 
779 	case XENMEM_machphys_mfn_list: {
780 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.xmml),
781 		    IE_IMPEXP) != 0)
782 			return (-X_EFAULT);
783 
784 		error = import_handle(&sub_ie, &op_arg.xmml.extent_start,
785 		    (op_arg.xmml.max_extents * sizeof (ulong_t)), IE_IMPEXP);
786 		break;
787 	}
788 
789 	case XENMEM_add_to_physmap:
790 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.xatp),
791 		    IE_IMPEXP) != 0)
792 			return (-X_EFAULT);
793 		DTRACE_XPV4(add__to__physmap__start, domid_t,
794 		    op_arg.xatp.domid, uint_t, op_arg.xatp.space, ulong_t,
795 		    op_arg.xatp.idx, ulong_t, op_arg.xatp.gpfn);
796 		break;
797 
798 	case XENMEM_memory_map:
799 	case XENMEM_machine_memory_map: {
800 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.mm),
801 		    IE_EXPORT) != 0)
802 			return (-X_EFAULT);
803 
804 		/*
805 		 * XXPV: ugh. e820entry is packed, but not in the kernel, since
806 		 * we remove all attributes; seems like this is a nice way to
807 		 * break mysteriously.
808 		 */
809 		error = import_handle(&sub_ie, &op_arg.mm.buffer,
810 		    (op_arg.mm.nr_entries * 20), IE_IMPEXP);
811 		break;
812 	}
813 
814 	case XENMEM_set_memory_map: {
815 		struct xen_memory_map *taddr;
816 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.fmm),
817 		    IE_IMPORT) != 0)
818 			return (-X_EFAULT);
819 
820 		/*
821 		 * As above.
822 		 */
823 		error = import_handle(&sub_ie, &op_arg.fmm.map.buffer,
824 		    (op_arg.fmm.map.nr_entries * 20), IE_IMPEXP);
825 
826 		if (error == -X_EFAULT)
827 			/*LINTED: constant in conditional context*/
828 			get_xen_guest_handle(taddr, op_arg.fmm.map.buffer);
829 		else
830 			taddr = sub_ie.ie_kaddr;
831 		DTRACE_XPV3(set__memory__map__start, domid_t,
832 		    op_arg.fmm.domid, int, op_arg.fmm.map.nr_entries,
833 		    struct xen_memory_map *, taddr);
834 		break;
835 	}
836 
837 	case XENMEM_set_pod_target:
838 	case XENMEM_get_pod_target:
839 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.pd),
840 		    IE_IMPEXP) != 0)
841 			return (-X_EFAULT);
842 		break;
843 
844 	default:
845 #ifdef DEBUG
846 		printf("unrecognized HYPERVISOR_memory_op %d\n", cmd);
847 #endif
848 		return (-X_EINVAL);
849 	}
850 
851 	if (error == 0)
852 		error = HYPERVISOR_memory_op(cmd,
853 		    (arg == NULL) ? NULL: &op_arg);
854 
855 	export_buffer(&op_ie, &error);
856 	export_buffer(&sub_ie, &error);
857 	export_buffer(&gpfn_ie, &error);
858 	export_buffer(&mfn_ie, &error);
859 
860 	switch (cmd) {
861 	case XENMEM_increase_reservation:
862 		DTRACE_XPV1(increase__reservation__end, int, error);
863 		break;
864 	case XENMEM_decrease_reservation:
865 		DTRACE_XPV1(decrease__reservation__end, int, error);
866 		break;
867 	case XENMEM_populate_physmap:
868 		DTRACE_XPV1(populate__physmap__end, int, error);
869 		break;
870 	case XENMEM_add_to_physmap:
871 		DTRACE_XPV1(add__to__physmap__end, int, error);
872 		break;
873 	case XENMEM_set_memory_map:
874 		DTRACE_XPV1(set__memory__map__end, int, error);
875 		break;
876 	}
877 	return (error);
878 }
879 
880 static int
881 privcmd_HYPERVISOR_event_channel_op(int cmd, void *arg)
882 {
883 	int error;
884 	size_t size;
885 	import_export_t op_ie;
886 	uint32_t flags;
887 
888 	switch (cmd) {
889 	case EVTCHNOP_alloc_unbound:
890 		size = sizeof (evtchn_alloc_unbound_t);
891 		flags = IE_IMPEXP;
892 		break;
893 	case EVTCHNOP_bind_interdomain:
894 		size = sizeof (evtchn_bind_interdomain_t);
895 		flags = IE_IMPEXP;
896 		break;
897 	case EVTCHNOP_bind_virq:
898 		size = sizeof (evtchn_bind_virq_t);
899 		flags = IE_IMPEXP;
900 		break;
901 	case EVTCHNOP_bind_pirq:
902 		size = sizeof (evtchn_bind_pirq_t);
903 		flags = IE_IMPEXP;
904 		break;
905 	case EVTCHNOP_bind_ipi:
906 		size = sizeof (evtchn_bind_ipi_t);
907 		flags = IE_IMPEXP;
908 		break;
909 	case EVTCHNOP_close:
910 		size = sizeof (evtchn_close_t);
911 		flags = IE_IMPORT;
912 		break;
913 	case EVTCHNOP_send:
914 		size = sizeof (evtchn_send_t);
915 		flags = IE_IMPORT;
916 		break;
917 	case EVTCHNOP_status:
918 		size = sizeof (evtchn_status_t);
919 		flags = IE_IMPEXP;
920 		break;
921 	case EVTCHNOP_bind_vcpu:
922 		size = sizeof (evtchn_bind_vcpu_t);
923 		flags = IE_IMPORT;
924 		break;
925 	case EVTCHNOP_unmask:
926 		size = sizeof (evtchn_unmask_t);
927 		flags = IE_IMPORT;
928 		break;
929 	case EVTCHNOP_reset:
930 		size = sizeof (evtchn_reset_t);
931 		flags = IE_IMPORT;
932 		break;
933 
934 	default:
935 #ifdef DEBUG
936 		printf("unrecognized HYPERVISOR_event_channel op %d\n", cmd);
937 #endif
938 		return (-X_EINVAL);
939 	}
940 
941 	error = import_buffer(&op_ie, arg, NULL, size, flags);
942 
943 	/*
944 	 * If there is sufficient demand, we can replace this void * with
945 	 * the proper op structure pointer.
946 	 */
947 	DTRACE_XPV2(evtchn__op__start, int, cmd, void *,
948 	    ((error == -X_EFAULT) ? arg : op_ie.ie_kaddr));
949 
950 	if (error == 0)
951 		error = HYPERVISOR_event_channel_op(cmd, op_ie.ie_kaddr);
952 	export_buffer(&op_ie, &error);
953 
954 	DTRACE_XPV1(evtchn__op__end, int, error);
955 
956 	return (error);
957 }
958 
959 static int
960 privcmd_HYPERVISOR_xen_version(int cmd, void *arg)
961 {
962 	int error;
963 	int size = 0;
964 	import_export_t op_ie;
965 	uint32_t flags = IE_EXPORT;
966 
967 	switch (cmd) {
968 	case XENVER_version:
969 		break;
970 	case XENVER_extraversion:
971 		size = sizeof (xen_extraversion_t);
972 		break;
973 	case XENVER_compile_info:
974 		size = sizeof (xen_compile_info_t);
975 		break;
976 	case XENVER_capabilities:
977 		size = sizeof (xen_capabilities_info_t);
978 		break;
979 	case XENVER_changeset:
980 		size = sizeof (xen_changeset_info_t);
981 		break;
982 	case XENVER_platform_parameters:
983 		size = sizeof (xen_platform_parameters_t);
984 		break;
985 	case XENVER_get_features:
986 		flags = IE_IMPEXP;
987 		size = sizeof (xen_feature_info_t);
988 		break;
989 	case XENVER_pagesize:
990 		break;
991 	case XENVER_guest_handle:
992 		size = sizeof (xen_domain_handle_t);
993 		break;
994 
995 	default:
996 #ifdef DEBUG
997 		printf("unrecognized HYPERVISOR_xen_version op %d\n", cmd);
998 #endif
999 		return (-X_EINVAL);
1000 	}
1001 
1002 	error = import_buffer(&op_ie, arg, NULL, size, flags);
1003 	if (error == 0)
1004 		error = HYPERVISOR_xen_version(cmd, op_ie.ie_kaddr);
1005 	export_buffer(&op_ie, &error);
1006 
1007 	return (error);
1008 }
1009 
1010 static int
1011 privcmd_HYPERVISOR_xsm_op(void *uacmctl)
1012 {
1013 	int error;
1014 	struct xen_acmctl *acmctl;
1015 	import_export_t op_ie;
1016 
1017 	error = import_buffer(&op_ie, uacmctl, NULL, sizeof (*acmctl),
1018 	    IE_IMPEXP);
1019 	if (error != 0)
1020 		return (error);
1021 
1022 	acmctl = op_ie.ie_kaddr;
1023 
1024 	if (acmctl->interface_version != ACM_INTERFACE_VERSION) {
1025 #ifdef DEBUG
1026 		printf("acm vers mismatch (cmd %d, found 0x%x, need 0x%x\n",
1027 		    acmctl->cmd, acmctl->interface_version,
1028 		    ACM_INTERFACE_VERSION);
1029 #endif
1030 		error = -X_EACCES;
1031 		export_buffer(&op_ie, &error);
1032 		return (error);
1033 	}
1034 
1035 	/* FIXME: flask ops??? */
1036 
1037 	switch (acmctl->cmd) {
1038 	case ACMOP_setpolicy:
1039 	case ACMOP_getpolicy:
1040 	case ACMOP_dumpstats:
1041 	case ACMOP_getssid:
1042 	case ACMOP_getdecision:
1043 	case ACMOP_chgpolicy:
1044 	case ACMOP_relabeldoms:
1045 		/* flags = IE_IMPEXP; */
1046 		break;
1047 	default:
1048 #ifdef DEBUG
1049 		printf("unrecognized HYPERVISOR_xsm_op op %d\n", acmctl->cmd);
1050 #endif
1051 		return (-X_EINVAL);
1052 	}
1053 
1054 	if (error == 0)
1055 		error = HYPERVISOR_xsm_op(acmctl);
1056 	export_buffer(&op_ie, &error);
1057 
1058 	return (error);
1059 }
1060 
1061 static int
1062 privcmd_HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, uint_t *scount,
1063     domid_t domid)
1064 {
1065 	int error, bytes;
1066 	uint_t kscount;
1067 	struct mmuext_op *kop, single_kop;
1068 	import_export_t op_ie, scnt_ie;
1069 
1070 	kop = NULL;
1071 	op_ie = scnt_ie = null_ie;
1072 	error = 0;
1073 
1074 	if (count >= 1) {
1075 		bytes = count * sizeof (*kop);
1076 		kop = (count == 1) ? &single_kop : kmem_alloc(bytes, KM_SLEEP);
1077 		error = import_buffer(&op_ie, op, kop, bytes, IE_IMPORT);
1078 	}
1079 
1080 	DTRACE_XPV2(mmu__ext__op__start, int, count, struct mmuext_op *,
1081 	    ((error == -X_EFAULT) ? op : kop));
1082 
1083 	if (scount != NULL && error == 0)
1084 		error = import_buffer(&scnt_ie, scount, &kscount,
1085 		    sizeof (kscount), IE_EXPORT);
1086 
1087 	if (error == 0)
1088 		error = HYPERVISOR_mmuext_op(kop, count, &kscount, domid);
1089 	export_buffer(&op_ie, &error);
1090 	export_buffer(&scnt_ie, &error);
1091 
1092 	DTRACE_XPV1(mmu__ext__op__end, int, error);
1093 
1094 	if (count > 1)
1095 		kmem_free(kop, bytes);
1096 	return (error);
1097 }
1098 
1099 static int
1100 privcmd_HYPERVISOR_hvm_op(int cmd, void *arg)
1101 {
1102 	int error;
1103 	int size = 0;
1104 	import_export_t arg_ie;
1105 	uint32_t flags = IE_IMPORT;
1106 
1107 	switch (cmd) {
1108 	case HVMOP_set_param:
1109 	case HVMOP_get_param:
1110 		size = sizeof (struct xen_hvm_param);
1111 		flags = IE_IMPEXP;
1112 		break;
1113 	case HVMOP_set_pci_intx_level:
1114 		size = sizeof (struct xen_hvm_set_pci_intx_level);
1115 		break;
1116 	case HVMOP_set_isa_irq_level:
1117 		size = sizeof (struct xen_hvm_set_isa_irq_level);
1118 		break;
1119 	case HVMOP_set_pci_link_route:
1120 		size = sizeof (struct xen_hvm_set_pci_link_route);
1121 		break;
1122 	case HVMOP_track_dirty_vram:
1123 		size = sizeof (struct xen_hvm_track_dirty_vram);
1124 		break;
1125 	case HVMOP_modified_memory:
1126 		size = sizeof (struct xen_hvm_modified_memory);
1127 		break;
1128 	case HVMOP_set_mem_type:
1129 		size = sizeof (struct xen_hvm_set_mem_type);
1130 		break;
1131 
1132 	default:
1133 #ifdef DEBUG
1134 		printf("unrecognized HVM op 0x%x\n", cmd);
1135 #endif
1136 		return (-X_EINVAL);
1137 	}
1138 
1139 	error = import_buffer(&arg_ie, arg, NULL, size, flags);
1140 	if (error == 0)
1141 		error = HYPERVISOR_hvm_op(cmd, arg_ie.ie_kaddr);
1142 	export_buffer(&arg_ie, &error);
1143 
1144 	return (error);
1145 }
1146 
1147 static int
1148 privcmd_HYPERVISOR_sched_op(int cmd, void *arg)
1149 {
1150 	int error;
1151 	int size = 0;
1152 	import_export_t op_ie;
1153 	struct sched_remote_shutdown op;
1154 
1155 	switch (cmd) {
1156 	case SCHEDOP_remote_shutdown:
1157 		size = sizeof (struct sched_remote_shutdown);
1158 		break;
1159 	default:
1160 #ifdef DEBUG
1161 		printf("unrecognized sched op 0x%x\n", cmd);
1162 #endif
1163 		return (-X_EINVAL);
1164 	}
1165 
1166 	error = import_buffer(&op_ie, arg, &op, size, IE_IMPORT);
1167 	if (error == 0)
1168 		error = HYPERVISOR_sched_op(cmd, (arg == NULL) ? NULL : &op);
1169 	export_buffer(&op_ie, &error);
1170 
1171 	return (error);
1172 }
1173 
1174 int allow_all_hypercalls = 0;
1175 int privcmd_efault_debug = 0;
1176 
1177 /*ARGSUSED*/
1178 int
1179 do_privcmd_hypercall(void *uarg, int mode, cred_t *cr, int *rval)
1180 {
1181 	privcmd_hypercall_t __hc, *hc = &__hc;
1182 	int error;
1183 
1184 	if (ddi_copyin(uarg, hc, sizeof (*hc), mode))
1185 		return (EFAULT);
1186 
1187 	switch (hc->op) {
1188 	case __HYPERVISOR_mmu_update:
1189 		error = privcmd_HYPERVISOR_mmu_update(
1190 		    (mmu_update_t *)hc->arg[0], (int)hc->arg[1],
1191 		    (int *)hc->arg[2], (domid_t)hc->arg[3]);
1192 		break;
1193 	case __HYPERVISOR_domctl:
1194 		error = privcmd_HYPERVISOR_domctl(
1195 		    (xen_domctl_t *)hc->arg[0]);
1196 		break;
1197 	case __HYPERVISOR_sysctl:
1198 		error = privcmd_HYPERVISOR_sysctl(
1199 		    (xen_sysctl_t *)hc->arg[0]);
1200 		break;
1201 	case __HYPERVISOR_platform_op:
1202 		error = privcmd_HYPERVISOR_platform_op(
1203 		    (xen_platform_op_t *)hc->arg[0]);
1204 		break;
1205 	case __HYPERVISOR_memory_op:
1206 		error = privcmd_HYPERVISOR_memory_op(
1207 		    (int)hc->arg[0], (void *)hc->arg[1]);
1208 		break;
1209 	case __HYPERVISOR_event_channel_op:
1210 		error = privcmd_HYPERVISOR_event_channel_op(
1211 		    (int)hc->arg[0], (void *)hc->arg[1]);
1212 		break;
1213 	case __HYPERVISOR_xen_version:
1214 		error = privcmd_HYPERVISOR_xen_version(
1215 		    (int)hc->arg[0], (void *)hc->arg[1]);
1216 		break;
1217 	case __HYPERVISOR_mmuext_op:
1218 		error = privcmd_HYPERVISOR_mmuext_op(
1219 		    (struct mmuext_op *)hc->arg[0], (int)hc->arg[1],
1220 		    (uint_t *)hc->arg[2], (domid_t)hc->arg[3]);
1221 		break;
1222 	case __HYPERVISOR_xsm_op:
1223 		error = privcmd_HYPERVISOR_xsm_op((void *)hc->arg[0]);
1224 		break;
1225 	case __HYPERVISOR_hvm_op:
1226 		error = privcmd_HYPERVISOR_hvm_op(
1227 		    (int)hc->arg[0], (void *)hc->arg[1]);
1228 		break;
1229 	case __HYPERVISOR_sched_op:
1230 		error = privcmd_HYPERVISOR_sched_op(
1231 		    (int)hc->arg[0], (void *)hc->arg[1]);
1232 		break;
1233 	default:
1234 		if (allow_all_hypercalls)
1235 			error = __hypercall5(hc->op, hc->arg[0], hc->arg[1],
1236 			    hc->arg[2], hc->arg[3], hc->arg[4]);
1237 		else {
1238 #ifdef DEBUG
1239 			printf("unrecognized hypercall %ld\n", hc->op);
1240 #endif
1241 			error = -X_EPERM;
1242 		}
1243 		break;
1244 	}
1245 
1246 	if (error > 0) {
1247 		*rval = error;
1248 		error = 0;
1249 	} else if (error != 0)
1250 		error = xen_xlate_errcode(error);
1251 
1252 	return (error);
1253 }
1254