xref: /illumos-gate/usr/src/uts/i86pc/io/pci/pci_tools.c (revision 499fd601)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/mkdev.h>
30 #include <sys/stat.h>
31 #include <sys/sunddi.h>
32 #include <vm/seg_kmem.h>
33 #include <sys/machparam.h>
34 #include <sys/sunndi.h>
35 #include <sys/ontrap.h>
36 #include <sys/psm.h>
37 #include <sys/pcie.h>
38 #include <sys/hotplug/pci/pcihp.h>
39 #include <sys/pci_cfgspace.h>
40 #include <sys/pci_tools.h>
41 #include <io/pci/pci_tools_ext.h>
42 #include <sys/apic.h>
43 #include <io/pci/pci_var.h>
44 #include <sys/promif.h>
45 #include <sys/x86_archext.h>
46 #include <sys/cpuvar.h>
47 
48 #ifdef __xpv
49 #include <sys/hypervisor.h>
50 #endif
51 
52 #define	PCIEX_BDF_OFFSET_DELTA	4
53 #define	PCIEX_REG_FUNC_SHIFT	(PCI_REG_FUNC_SHIFT + PCIEX_BDF_OFFSET_DELTA)
54 #define	PCIEX_REG_DEV_SHIFT	(PCI_REG_DEV_SHIFT + PCIEX_BDF_OFFSET_DELTA)
55 #define	PCIEX_REG_BUS_SHIFT	(PCI_REG_BUS_SHIFT + PCIEX_BDF_OFFSET_DELTA)
56 
57 #define	SUCCESS	0
58 
59 int pcitool_debug = 0;
60 
61 /*
62  * Offsets of BARS in config space.  First entry of 0 means config space.
63  * Entries here correlate to pcitool_bars_t enumerated type.
64  */
65 static uint8_t pci_bars[] = {
66 	0x0,
67 	PCI_CONF_BASE0,
68 	PCI_CONF_BASE1,
69 	PCI_CONF_BASE2,
70 	PCI_CONF_BASE3,
71 	PCI_CONF_BASE4,
72 	PCI_CONF_BASE5,
73 	PCI_CONF_ROM
74 };
75 
76 /* Max offset allowed into config space for a particular device. */
77 static uint64_t max_cfg_size = PCI_CONF_HDR_SIZE;
78 
79 static uint64_t pcitool_swap_endian(uint64_t data, int size);
80 static int pcitool_pciex_cfg_access(dev_info_t *dip, pcitool_reg_t *prg,
81     boolean_t write_flag);
82 static int pcitool_cfg_access(dev_info_t *dip, pcitool_reg_t *prg,
83     boolean_t write_flag);
84 static int pcitool_io_access(dev_info_t *dip, pcitool_reg_t *prg,
85     boolean_t write_flag);
86 static int pcitool_mem_access(dev_info_t *dip, pcitool_reg_t *prg,
87     uint64_t virt_addr, boolean_t write_flag);
88 static uint64_t pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages);
89 static void pcitool_unmap(uint64_t virt_addr, size_t num_pages);
90 
91 /* Extern declarations */
92 extern int	(*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
93 		    psm_intr_op_t, int *);
94 
95 int
96 pcitool_init(dev_info_t *dip, boolean_t is_pciex)
97 {
98 	int instance = ddi_get_instance(dip);
99 
100 	/* Create pcitool nodes for register access and interrupt routing. */
101 
102 	if (ddi_create_minor_node(dip, PCI_MINOR_REG, S_IFCHR,
103 	    PCIHP_AP_MINOR_NUM(instance, PCI_TOOL_REG_MINOR_NUM),
104 	    DDI_NT_REGACC, 0) != DDI_SUCCESS) {
105 		return (DDI_FAILURE);
106 	}
107 
108 	if (ddi_create_minor_node(dip, PCI_MINOR_INTR, S_IFCHR,
109 	    PCIHP_AP_MINOR_NUM(instance, PCI_TOOL_INTR_MINOR_NUM),
110 	    DDI_NT_INTRCTL, 0) != DDI_SUCCESS) {
111 		ddi_remove_minor_node(dip, PCI_MINOR_REG);
112 		return (DDI_FAILURE);
113 	}
114 
115 	if (is_pciex)
116 		max_cfg_size = PCIE_CONF_HDR_SIZE;
117 
118 	return (DDI_SUCCESS);
119 }
120 
121 void
122 pcitool_uninit(dev_info_t *dip)
123 {
124 	ddi_remove_minor_node(dip, PCI_MINOR_INTR);
125 	ddi_remove_minor_node(dip, PCI_MINOR_REG);
126 }
127 
128 /*ARGSUSED*/
129 static int
130 pcitool_set_intr(dev_info_t *dip, void *arg, int mode)
131 {
132 	ddi_intr_handle_impl_t info_hdl;
133 	pcitool_intr_set_t iset;
134 	uint32_t old_cpu;
135 	int ret, result;
136 	size_t copyinout_size;
137 	int rval = SUCCESS;
138 
139 	/* Version 1 of pcitool_intr_set_t doesn't have flags. */
140 	copyinout_size = (size_t)&iset.flags - (size_t)&iset;
141 
142 	if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
143 		return (EFAULT);
144 
145 	switch (iset.user_version) {
146 	case PCITOOL_V1:
147 		break;
148 
149 	case PCITOOL_V2:
150 		copyinout_size = sizeof (pcitool_intr_set_t);
151 		if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
152 			return (EFAULT);
153 		break;
154 
155 	default:
156 		iset.status = PCITOOL_OUT_OF_RANGE;
157 		rval = ENOTSUP;
158 		goto done_set_intr;
159 	}
160 
161 	if (iset.ino > APIC_MAX_VECTOR) {
162 		rval = EINVAL;
163 		iset.status = PCITOOL_INVALID_INO;
164 		goto done_set_intr;
165 	}
166 
167 	iset.status = PCITOOL_SUCCESS;
168 
169 	if ((old_cpu = pci_get_cpu_from_vecirq(iset.ino, IS_VEC)) == -1) {
170 		iset.status = PCITOOL_IO_ERROR;
171 		rval = EINVAL;
172 		goto done_set_intr;
173 	}
174 
175 
176 	old_cpu &= ~PSMGI_CPU_USER_BOUND;
177 
178 	/*
179 	 * For this locally-declared and used handle, ih_private will contain a
180 	 * CPU value, not an ihdl_plat_t as used for global interrupt handling.
181 	 */
182 	info_hdl.ih_vector = iset.ino;
183 	info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id;
184 	if (pcitool_debug)
185 		prom_printf("user version:%d, flags:0x%x\n",
186 		    iset.user_version, iset.flags);
187 
188 	result = ENOTSUP;
189 	if ((iset.user_version >= PCITOOL_V2) &&
190 	    (iset.flags & PCITOOL_INTR_SET_FLAG_GROUP)) {
191 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU,
192 		    &result);
193 	} else {
194 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_SET_CPU,
195 		    &result);
196 	}
197 
198 	if (ret != PSM_SUCCESS) {
199 		switch (result) {
200 		case EIO:		/* Error making the change */
201 			rval = EIO;
202 			iset.status = PCITOOL_IO_ERROR;
203 			break;
204 		case ENXIO:		/* Couldn't convert vector to irq */
205 			rval = EINVAL;
206 			iset.status = PCITOOL_INVALID_INO;
207 			break;
208 		case EINVAL:		/* CPU out of range */
209 			rval = EINVAL;
210 			iset.status = PCITOOL_INVALID_CPUID;
211 			break;
212 		case ENOTSUP:		/* Requested PSM intr ops missing */
213 			rval = ENOTSUP;
214 			iset.status = PCITOOL_IO_ERROR;
215 			break;
216 		}
217 	}
218 
219 	/* Return original CPU. */
220 	iset.cpu_id = old_cpu;
221 
222 done_set_intr:
223 	iset.drvr_version = PCITOOL_VERSION;
224 	if (ddi_copyout(&iset, arg, copyinout_size, mode) != DDI_SUCCESS)
225 		rval = EFAULT;
226 	return (rval);
227 }
228 
229 
230 /* It is assumed that dip != NULL */
231 static void
232 pcitool_get_intr_dev_info(dev_info_t *dip, pcitool_intr_dev_t *devs)
233 {
234 	(void) strncpy(devs->driver_name,
235 	    ddi_driver_name(dip), MAXMODCONFNAME-1);
236 	devs->driver_name[MAXMODCONFNAME] = '\0';
237 	(void) ddi_pathname(dip, devs->path);
238 	devs->dev_inst = ddi_get_instance(dip);
239 }
240 
241 
242 /*ARGSUSED*/
243 static int
244 pcitool_get_intr(dev_info_t *dip, void *arg, int mode)
245 {
246 	/* Array part isn't used here, but oh well... */
247 	pcitool_intr_get_t partial_iget;
248 	pcitool_intr_get_t *iget = &partial_iget;
249 	size_t	iget_kmem_alloc_size = 0;
250 	uint8_t num_devs_ret;
251 	int copyout_rval;
252 	int rval = SUCCESS;
253 	int circ;
254 	int i;
255 
256 	ddi_intr_handle_impl_t info_hdl;
257 	apic_get_intr_t intr_info;
258 
259 	/* Read in just the header part, no array section. */
260 	if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) !=
261 	    DDI_SUCCESS)
262 		return (EFAULT);
263 
264 	/* Validate argument. */
265 	if (partial_iget.ino > APIC_MAX_VECTOR) {
266 		partial_iget.status = PCITOOL_INVALID_INO;
267 		partial_iget.num_devs_ret = 0;
268 		rval = EINVAL;
269 		goto done_get_intr;
270 	}
271 
272 	num_devs_ret = partial_iget.num_devs_ret;
273 	intr_info.avgi_dip_list = NULL;
274 	intr_info.avgi_req_flags =
275 	    PSMGI_REQ_CPUID | PSMGI_REQ_NUM_DEVS | PSMGI_INTRBY_VEC;
276 	/*
277 	 * For this locally-declared and used handle, ih_private will contain a
278 	 * pointer to apic_get_intr_t, not an ihdl_plat_t as used for
279 	 * global interrupt handling.
280 	 */
281 	info_hdl.ih_private = &intr_info;
282 	info_hdl.ih_vector = partial_iget.ino;
283 
284 	/* Caller wants device information returned. */
285 	if (num_devs_ret > 0) {
286 
287 		intr_info.avgi_req_flags |= PSMGI_REQ_GET_DEVS;
288 
289 		/*
290 		 * Allocate room.
291 		 * If num_devs_ret == 0 iget remains pointing to partial_iget.
292 		 */
293 		iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret);
294 		iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP);
295 
296 		/* Read in whole structure to verify there's room. */
297 		if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) !=
298 		    SUCCESS) {
299 
300 			/* Be consistent and just return EFAULT here. */
301 			kmem_free(iget, iget_kmem_alloc_size);
302 
303 			return (EFAULT);
304 		}
305 	}
306 
307 	bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret));
308 	iget->ino = info_hdl.ih_vector;
309 
310 	/*
311 	 * Lock device tree branch from the pci root nexus on down if info will
312 	 * be extracted from dips returned from the tree.
313 	 */
314 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
315 		ndi_devi_enter(dip, &circ);
316 	}
317 
318 	/* Call psm_intr_ops(PSM_INTR_OP_GET_INTR) to get information. */
319 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
320 	    PSM_INTR_OP_GET_INTR, NULL)) != PSM_SUCCESS) {
321 		iget->status = PCITOOL_IO_ERROR;
322 		iget->num_devs_ret = 0;
323 		rval = EINVAL;
324 		goto done_get_intr;
325 	}
326 
327 	/*
328 	 * Fill in the pcitool_intr_get_t to be returned,
329 	 * with the CPU, num_devs_ret and num_devs.
330 	 */
331 	iget->cpu_id = intr_info.avgi_cpu_id & ~PSMGI_CPU_USER_BOUND;
332 
333 	/* Number of devices returned by apic. */
334 	iget->num_devs = intr_info.avgi_num_devs;
335 
336 	/* Device info was returned. */
337 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
338 
339 		/*
340 		 * num devs returned is num devs ret by apic,
341 		 * space permitting.
342 		 */
343 		iget->num_devs_ret = min(num_devs_ret, intr_info.avgi_num_devs);
344 
345 		/*
346 		 * Loop thru list of dips and extract driver, name and instance.
347 		 * Fill in the pcitool_intr_dev_t's with this info.
348 		 */
349 		for (i = 0; i < iget->num_devs_ret; i++)
350 			pcitool_get_intr_dev_info(intr_info.avgi_dip_list[i],
351 			    &iget->dev[i]);
352 
353 		/* Free kmem_alloc'ed memory of the apic_get_intr_t */
354 		kmem_free(intr_info.avgi_dip_list,
355 		    intr_info.avgi_num_devs * sizeof (dev_info_t *));
356 	}
357 
358 done_get_intr:
359 
360 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
361 		ndi_devi_exit(dip, circ);
362 	}
363 
364 	iget->drvr_version = PCITOOL_VERSION;
365 	copyout_rval = ddi_copyout(iget, arg,
366 	    PCITOOL_IGET_SIZE(num_devs_ret), mode);
367 
368 	if (iget_kmem_alloc_size > 0)
369 		kmem_free(iget, iget_kmem_alloc_size);
370 
371 	if (copyout_rval != DDI_SUCCESS)
372 		rval = EFAULT;
373 
374 	return (rval);
375 }
376 
377 /*ARGSUSED*/
378 static int
379 pcitool_intr_info(dev_info_t *dip, void *arg, int mode)
380 {
381 	pcitool_intr_info_t intr_info;
382 	ddi_intr_handle_impl_t info_hdl;
383 	int rval = SUCCESS;
384 
385 	/* If we need user_version, and to ret same user version as passed in */
386 	if (ddi_copyin(arg, &intr_info, sizeof (pcitool_intr_info_t), mode) !=
387 	    DDI_SUCCESS) {
388 		if (pcitool_debug)
389 			prom_printf("Error reading arguments\n");
390 		return (EFAULT);
391 	}
392 
393 	/* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */
394 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
395 	    PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
396 		intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UPPC;
397 		intr_info.ctlr_version = 0;
398 
399 	} else {
400 		intr_info.ctlr_version = (uint32_t)info_hdl.ih_ver;
401 		if (strcmp((char *)info_hdl.ih_private,
402 		    APIC_PCPLUSMP_NAME) == 0)
403 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_PCPLUSMP;
404 		else
405 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UNKNOWN;
406 	}
407 
408 	intr_info.num_intr = APIC_MAX_VECTOR;
409 	intr_info.drvr_version = PCITOOL_VERSION;
410 	if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) !=
411 	    DDI_SUCCESS) {
412 		if (pcitool_debug)
413 			prom_printf("Error returning arguments.\n");
414 		rval = EFAULT;
415 	}
416 
417 	return (rval);
418 }
419 
420 
421 
422 /*
423  * Main function for handling interrupt CPU binding requests and queries.
424  * Need to implement later
425  */
426 /*ARGSUSED*/
427 int
428 pcitool_intr_admn(dev_info_t *dip, void *arg, int cmd, int mode)
429 {
430 	int rval;
431 
432 	switch (cmd) {
433 
434 	/* Associate a new CPU with a given vector */
435 	case PCITOOL_DEVICE_SET_INTR:
436 		rval = pcitool_set_intr(dip, arg, mode);
437 		break;
438 
439 	case PCITOOL_DEVICE_GET_INTR:
440 		rval = pcitool_get_intr(dip, arg, mode);
441 		break;
442 
443 	case PCITOOL_SYSTEM_INTR_INFO:
444 		rval = pcitool_intr_info(dip, arg, mode);
445 		break;
446 
447 	default:
448 		rval = ENOTSUP;
449 	}
450 
451 	return (rval);
452 }
453 
454 
455 /*
456  * A note about ontrap handling:
457  *
458  * X86 systems on which this module was tested return FFs instead of bus errors
459  * when accessing devices with invalid addresses.  Ontrap handling, which
460  * gracefully handles kernel bus errors, is installed anyway, in case future
461  * X86 platforms require it.
462  */
463 
464 /*
465  * Perform register accesses on the nexus device itself.
466  * No explicit PCI nexus device for X86, so not applicable.
467  */
468 
469 /*ARGSUSED*/
470 int
471 pcitool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
472 {
473 	return (ENOTSUP);
474 }
475 
476 /* Swap endianness. */
477 static uint64_t
478 pcitool_swap_endian(uint64_t data, int size)
479 {
480 	typedef union {
481 		uint64_t data64;
482 		uint8_t data8[8];
483 	} data_split_t;
484 
485 	data_split_t orig_data;
486 	data_split_t returned_data;
487 	int i;
488 
489 	orig_data.data64 = data;
490 	returned_data.data64 = 0;
491 
492 	for (i = 0; i < size; i++) {
493 		returned_data.data8[i] = orig_data.data8[size - 1 - i];
494 	}
495 
496 	return (returned_data.data64);
497 }
498 
499 
500 /*
501  * Access device.  prg is modified.
502  *
503  * Extended config space is available only through memory-mapped access.
504  * Standard config space on pci express devices is available either way,
505  * so do it memory-mapped here too, for simplicity.
506  */
507 /*ARGSUSED*/
508 static int
509 pcitool_pciex_cfg_access(dev_info_t *dip, pcitool_reg_t *prg,
510     boolean_t write_flag)
511 {
512 	int rval = SUCCESS;
513 	uint64_t virt_addr;
514 	size_t	num_virt_pages;
515 
516 	prg->status = PCITOOL_SUCCESS;
517 
518 	prg->phys_addr = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0,
519 	    "ecfga-base-address", 0);
520 	if (prg->phys_addr == 0) {
521 		prg->status = PCITOOL_IO_ERROR;
522 		return (EIO);
523 	}
524 
525 	prg->phys_addr += prg->offset +
526 	    ((prg->bus_no << PCIEX_REG_BUS_SHIFT) |
527 	    (prg->dev_no << PCIEX_REG_DEV_SHIFT) |
528 	    (prg->func_no << PCIEX_REG_FUNC_SHIFT));
529 
530 	virt_addr = pcitool_map(prg->phys_addr,
531 	    PCITOOL_ACC_ATTR_SIZE(prg->acc_attr), &num_virt_pages);
532 	if (virt_addr == NULL) {
533 		prg->status = PCITOOL_IO_ERROR;
534 		return (EIO);
535 	}
536 
537 	rval = pcitool_mem_access(dip, prg, virt_addr, write_flag);
538 	pcitool_unmap(virt_addr, num_virt_pages);
539 	return (rval);
540 }
541 
542 /* Access device.  prg is modified. */
543 /*ARGSUSED*/
544 static int
545 pcitool_cfg_access(dev_info_t *dip, pcitool_reg_t *prg, boolean_t write_flag)
546 {
547 	int size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
548 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
549 	int rval = SUCCESS;
550 	uint64_t local_data;
551 
552 	/*
553 	 * NOTE: there is no way to verify whether or not the address is valid.
554 	 * The put functions return void and the get functions return ff on
555 	 * error.
556 	 */
557 	prg->status = PCITOOL_SUCCESS;
558 
559 	if (write_flag) {
560 
561 		if (big_endian) {
562 			local_data = pcitool_swap_endian(prg->data, size);
563 		} else {
564 			local_data = prg->data;
565 		}
566 
567 		switch (size) {
568 		case 1:
569 			(*pci_putb_func)(prg->bus_no, prg->dev_no,
570 			    prg->func_no, prg->offset, local_data);
571 			break;
572 		case 2:
573 			(*pci_putw_func)(prg->bus_no, prg->dev_no,
574 			    prg->func_no, prg->offset, local_data);
575 			break;
576 		case 4:
577 			(*pci_putl_func)(prg->bus_no, prg->dev_no,
578 			    prg->func_no, prg->offset, local_data);
579 			break;
580 		default:
581 			rval = ENOTSUP;
582 			prg->status = PCITOOL_INVALID_SIZE;
583 			break;
584 		}
585 	} else {
586 		switch (size) {
587 		case 1:
588 			local_data = (*pci_getb_func)(prg->bus_no, prg->dev_no,
589 			    prg->func_no, prg->offset);
590 			break;
591 		case 2:
592 			local_data = (*pci_getw_func)(prg->bus_no, prg->dev_no,
593 			    prg->func_no, prg->offset);
594 			break;
595 		case 4:
596 			local_data = (*pci_getl_func)(prg->bus_no, prg->dev_no,
597 			    prg->func_no, prg->offset);
598 			break;
599 		default:
600 			rval = ENOTSUP;
601 			prg->status = PCITOOL_INVALID_SIZE;
602 			break;
603 		}
604 
605 		if (rval == SUCCESS) {
606 			if (big_endian) {
607 				prg->data =
608 				    pcitool_swap_endian(local_data, size);
609 			} else {
610 				prg->data = local_data;
611 			}
612 		}
613 	}
614 	prg->phys_addr = 0;	/* Config space is not memory mapped on X86. */
615 	return (rval);
616 }
617 
618 
619 /*ARGSUSED*/
620 static int
621 pcitool_io_access(dev_info_t *dip, pcitool_reg_t *prg, boolean_t write_flag)
622 {
623 	int port = (int)prg->phys_addr;
624 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
625 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
626 	int rval = SUCCESS;
627 	on_trap_data_t otd;
628 	uint64_t local_data;
629 
630 
631 	/*
632 	 * on_trap works like setjmp.
633 	 *
634 	 * A non-zero return here means on_trap has returned from an error.
635 	 *
636 	 * A zero return here means that on_trap has just returned from setup.
637 	 */
638 	if (on_trap(&otd, OT_DATA_ACCESS)) {
639 		no_trap();
640 		if (pcitool_debug)
641 			prom_printf(
642 			    "pcitool_io_access: on_trap caught an error...\n");
643 		prg->status = PCITOOL_INVALID_ADDRESS;
644 		return (EFAULT);
645 	}
646 
647 	if (write_flag) {
648 
649 		if (big_endian) {
650 			local_data = pcitool_swap_endian(prg->data, size);
651 		} else {
652 			local_data = prg->data;
653 		}
654 
655 		if (pcitool_debug)
656 			prom_printf("Writing %ld byte(s) to port 0x%x\n",
657 			    size, port);
658 
659 		switch (size) {
660 		case 1:
661 			outb(port, (uint8_t)local_data);
662 			break;
663 		case 2:
664 			outw(port, (uint16_t)local_data);
665 			break;
666 		case 4:
667 			outl(port, (uint32_t)local_data);
668 			break;
669 		default:
670 			rval = ENOTSUP;
671 			prg->status = PCITOOL_INVALID_SIZE;
672 			break;
673 		}
674 	} else {
675 		if (pcitool_debug)
676 			prom_printf("Reading %ld byte(s) from port 0x%x\n",
677 			    size, port);
678 
679 		switch (size) {
680 		case 1:
681 			local_data = inb(port);
682 			break;
683 		case 2:
684 			local_data = inw(port);
685 			break;
686 		case 4:
687 			local_data = inl(port);
688 			break;
689 		default:
690 			rval = ENOTSUP;
691 			prg->status = PCITOOL_INVALID_SIZE;
692 			break;
693 		}
694 
695 		if (rval == SUCCESS) {
696 			if (big_endian) {
697 				prg->data =
698 				    pcitool_swap_endian(local_data, size);
699 			} else {
700 				prg->data = local_data;
701 			}
702 		}
703 	}
704 
705 	no_trap();
706 	return (rval);
707 }
708 
709 /*ARGSUSED*/
710 static int
711 pcitool_mem_access(dev_info_t *dip, pcitool_reg_t *prg, uint64_t virt_addr,
712 	boolean_t write_flag)
713 {
714 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
715 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
716 	int rval = DDI_SUCCESS;
717 	on_trap_data_t otd;
718 	uint64_t local_data;
719 
720 	/*
721 	 * on_trap works like setjmp.
722 	 *
723 	 * A non-zero return here means on_trap has returned from an error.
724 	 *
725 	 * A zero return here means that on_trap has just returned from setup.
726 	 */
727 	if (on_trap(&otd, OT_DATA_ACCESS)) {
728 		no_trap();
729 		if (pcitool_debug)
730 			prom_printf(
731 			    "pcitool_mem_access: on_trap caught an error...\n");
732 		prg->status = PCITOOL_INVALID_ADDRESS;
733 		return (EFAULT);
734 	}
735 
736 	if (write_flag) {
737 
738 		if (big_endian) {
739 			local_data = pcitool_swap_endian(prg->data, size);
740 		} else {
741 			local_data = prg->data;
742 		}
743 
744 		switch (size) {
745 		case 1:
746 			*((uint8_t *)(uintptr_t)virt_addr) = local_data;
747 			break;
748 		case 2:
749 			*((uint16_t *)(uintptr_t)virt_addr) = local_data;
750 			break;
751 		case 4:
752 			*((uint32_t *)(uintptr_t)virt_addr) = local_data;
753 			break;
754 		case 8:
755 			*((uint64_t *)(uintptr_t)virt_addr) = local_data;
756 			break;
757 		default:
758 			rval = ENOTSUP;
759 			prg->status = PCITOOL_INVALID_SIZE;
760 			break;
761 		}
762 	} else {
763 		switch (size) {
764 		case 1:
765 			local_data = *((uint8_t *)(uintptr_t)virt_addr);
766 			break;
767 		case 2:
768 			local_data = *((uint16_t *)(uintptr_t)virt_addr);
769 			break;
770 		case 4:
771 			local_data = *((uint32_t *)(uintptr_t)virt_addr);
772 			break;
773 		case 8:
774 			local_data = *((uint64_t *)(uintptr_t)virt_addr);
775 			break;
776 		default:
777 			rval = ENOTSUP;
778 			prg->status = PCITOOL_INVALID_SIZE;
779 			break;
780 		}
781 
782 		if (rval == SUCCESS) {
783 			if (big_endian) {
784 				prg->data =
785 				    pcitool_swap_endian(local_data, size);
786 			} else {
787 				prg->data = local_data;
788 			}
789 		}
790 	}
791 
792 	no_trap();
793 	return (rval);
794 }
795 
796 /*
797  * Map up to 2 pages which contain the address we want to access.
798  *
799  * Mapping should span no more than 8 bytes.  With X86 it is possible for an
800  * 8 byte value to start on a 4 byte boundary, so it can cross a page boundary.
801  * We'll never have to map more than two pages.
802  */
803 
804 static uint64_t
805 pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages)
806 {
807 
808 	uint64_t page_base = phys_addr & ~MMU_PAGEOFFSET;
809 	uint64_t offset = phys_addr & MMU_PAGEOFFSET;
810 	void *virt_base;
811 	uint64_t returned_addr;
812 	pfn_t pfn;
813 
814 	if (pcitool_debug)
815 		prom_printf("pcitool_map: Called with PA:0x%p\n",
816 		    (uint8_t *)(uintptr_t)phys_addr);
817 
818 	*num_pages = 1;
819 
820 	/* Desired mapping would span more than two pages. */
821 	if ((offset + size) > (MMU_PAGESIZE * 2)) {
822 		if (pcitool_debug)
823 			prom_printf("boundary violation: "
824 			    "offset:0x%" PRIx64 ", size:%ld, pagesize:0x%lx\n",
825 			    offset, (uintptr_t)size, (uintptr_t)MMU_PAGESIZE);
826 		return (NULL);
827 
828 	} else if ((offset + size) > MMU_PAGESIZE) {
829 		(*num_pages)++;
830 	}
831 
832 	/* Get page(s) of virtual space. */
833 	virt_base = vmem_alloc(heap_arena, ptob(*num_pages), VM_NOSLEEP);
834 	if (virt_base == NULL) {
835 		if (pcitool_debug)
836 			prom_printf("Couldn't get virtual base address.\n");
837 		return (NULL);
838 	}
839 
840 	if (pcitool_debug)
841 		prom_printf("Got base virtual address:0x%p\n", virt_base);
842 
843 #ifdef __xpv
844 	/*
845 	 * We should only get here if we are dom0.
846 	 * We're using a real device so we need to translate the MA to a PFN.
847 	 */
848 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
849 	pfn = xen_assign_pfn(mmu_btop(page_base));
850 #else
851 	pfn = btop(page_base);
852 #endif
853 
854 	/* Now map the allocated virtual space to the physical address. */
855 	hat_devload(kas.a_hat, virt_base, mmu_ptob(*num_pages), pfn,
856 	    PROT_READ | PROT_WRITE | HAT_STRICTORDER,
857 	    HAT_LOAD_LOCK);
858 
859 	returned_addr = ((uintptr_t)(virt_base)) + offset;
860 
861 	if (pcitool_debug)
862 		prom_printf("pcitool_map: returning VA:0x%p\n",
863 		    (void *)(uintptr_t)returned_addr);
864 
865 	return (returned_addr);
866 }
867 
868 /* Unmap the mapped page(s). */
869 static void
870 pcitool_unmap(uint64_t virt_addr, size_t num_pages)
871 {
872 	void *base_virt_addr = (void *)(uintptr_t)(virt_addr & ~MMU_PAGEOFFSET);
873 
874 	hat_unload(kas.a_hat, base_virt_addr, ptob(num_pages),
875 	    HAT_UNLOAD_UNLOCK);
876 	vmem_free(heap_arena, base_virt_addr, ptob(num_pages));
877 }
878 
879 
880 /* Perform register accesses on PCI leaf devices. */
881 int
882 pcitool_dev_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
883 {
884 	boolean_t	write_flag = B_FALSE;
885 	int		rval = 0;
886 	pcitool_reg_t	prg;
887 	uint8_t		size;
888 
889 	uint64_t	base_addr;
890 	uint64_t	virt_addr;
891 	size_t		num_virt_pages;
892 
893 	switch (cmd) {
894 	case (PCITOOL_DEVICE_SET_REG):
895 		write_flag = B_TRUE;
896 
897 	/*FALLTHRU*/
898 	case (PCITOOL_DEVICE_GET_REG):
899 		if (pcitool_debug)
900 			prom_printf("pci_dev_reg_ops set/get reg\n");
901 		if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t), mode) !=
902 		    DDI_SUCCESS) {
903 			if (pcitool_debug)
904 				prom_printf("Error reading arguments\n");
905 			return (EFAULT);
906 		}
907 
908 		if (prg.barnum >= (sizeof (pci_bars) / sizeof (pci_bars[0]))) {
909 			prg.status = PCITOOL_OUT_OF_RANGE;
910 			rval = EINVAL;
911 			goto done_reg;
912 		}
913 
914 		if (pcitool_debug)
915 			prom_printf("raw bus:0x%x, dev:0x%x, func:0x%x\n",
916 			    prg.bus_no, prg.dev_no, prg.func_no);
917 		/* Validate address arguments of bus / dev / func */
918 		if (((prg.bus_no &
919 		    (PCI_REG_BUS_M >> PCI_REG_BUS_SHIFT)) !=
920 		    prg.bus_no) ||
921 		    ((prg.dev_no &
922 		    (PCI_REG_DEV_M >> PCI_REG_DEV_SHIFT)) !=
923 		    prg.dev_no) ||
924 		    ((prg.func_no &
925 		    (PCI_REG_FUNC_M >> PCI_REG_FUNC_SHIFT)) !=
926 		    prg.func_no)) {
927 			prg.status = PCITOOL_INVALID_ADDRESS;
928 			rval = EINVAL;
929 			goto done_reg;
930 		}
931 
932 		size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
933 
934 		/* Proper config space desired. */
935 		if (prg.barnum == 0) {
936 
937 			if (pcitool_debug)
938 				prom_printf(
939 				    "config access: offset:0x%" PRIx64 ", "
940 				    "phys_addr:0x%" PRIx64 "\n",
941 				    prg.offset, prg.phys_addr);
942 
943 			if (prg.offset >= max_cfg_size) {
944 				prg.status = PCITOOL_OUT_OF_RANGE;
945 				rval = EINVAL;
946 				goto done_reg;
947 			}
948 
949 			/*
950 			 * Access device.  prg is modified.
951 			 * First, check for AMD northbridges for I/O access
952 			 * (This fix will move in future to pcitool user-land)
953 			 * Next, check for PCIe devices and do
954 			 * memory-mapped access
955 			 * Lastly, check for PCI devices and do I/O access
956 			 */
957 			if ((prg.bus_no == 0) &&
958 			    (prg.dev_no >= 0x18) &&
959 			    (prg.dev_no < (0x18 + ncpus))) {
960 				if (cpuid_getvendor(CPU) == X86_VENDOR_AMD)
961 					rval = pcitool_cfg_access(dip, &prg,
962 					    write_flag);
963 			} else if (max_cfg_size == PCIE_CONF_HDR_SIZE)
964 				rval = pcitool_pciex_cfg_access(dip, &prg,
965 				    write_flag);
966 			else
967 				rval = pcitool_cfg_access(dip, &prg,
968 				    write_flag);
969 
970 			if (pcitool_debug)
971 				prom_printf(
972 				    "config access: data:0x%" PRIx64 "\n",
973 				    prg.data);
974 
975 		/* IO/ MEM/ MEM64 space. */
976 		} else {
977 
978 			pcitool_reg_t	prg2;
979 			bcopy(&prg, &prg2, sizeof (pcitool_reg_t));
980 
981 			/*
982 			 * Translate BAR number into offset of the BAR in
983 			 * the device's config space.
984 			 */
985 			prg2.offset = pci_bars[prg2.barnum];
986 			prg2.acc_attr =
987 			    PCITOOL_ACC_ATTR_SIZE_4 | PCITOOL_ACC_ATTR_ENDN_LTL;
988 
989 			if (pcitool_debug)
990 				prom_printf(
991 				    "barnum:%d, bar_offset:0x%" PRIx64 "\n",
992 				    prg2.barnum, prg2.offset);
993 			/*
994 			 * Get Bus Address Register (BAR) from config space.
995 			 * prg2.offset is the offset into config space of the
996 			 * BAR desired.  prg.status is modified on error.
997 			 */
998 			rval = pcitool_cfg_access(dip, &prg2, B_FALSE);
999 			if (rval != SUCCESS) {
1000 				if (pcitool_debug)
1001 					prom_printf("BAR access failed\n");
1002 				prg.status = prg2.status;
1003 				goto done_reg;
1004 			}
1005 			/*
1006 			 * Reference proper PCI space based on the BAR.
1007 			 * If 64 bit MEM space, need to load other half of the
1008 			 * BAR first.
1009 			 */
1010 
1011 			if (pcitool_debug)
1012 				prom_printf("bar returned is 0x%" PRIx64 "\n",
1013 				    prg2.data);
1014 			if (!prg2.data) {
1015 				if (pcitool_debug)
1016 					prom_printf("BAR data == 0\n");
1017 				rval = EINVAL;
1018 				prg.status = PCITOOL_INVALID_ADDRESS;
1019 				goto done_reg;
1020 			}
1021 			if (prg2.data == 0xffffffff) {
1022 				if (pcitool_debug)
1023 					prom_printf("BAR data == -1\n");
1024 				rval = EINVAL;
1025 				prg.status = PCITOOL_INVALID_ADDRESS;
1026 				goto done_reg;
1027 			}
1028 
1029 			/*
1030 			 * BAR has bits saying this space is IO space, unless
1031 			 * this is the ROM address register.
1032 			 */
1033 			if (((PCI_BASE_SPACE_M & prg2.data) ==
1034 			    PCI_BASE_SPACE_IO) &&
1035 			    (prg2.offset != PCI_CONF_ROM)) {
1036 				if (pcitool_debug)
1037 					prom_printf("IO space\n");
1038 
1039 				prg2.data &= PCI_BASE_IO_ADDR_M;
1040 				prg.phys_addr = prg2.data + prg.offset;
1041 
1042 				rval = pcitool_io_access(dip, &prg, write_flag);
1043 				if ((rval != SUCCESS) && (pcitool_debug))
1044 					prom_printf("IO access failed\n");
1045 
1046 				goto done_reg;
1047 
1048 
1049 			/*
1050 			 * BAR has bits saying this space is 64 bit memory
1051 			 * space, unless this is the ROM address register.
1052 			 *
1053 			 * The 64 bit address stored in two BAR cells is not
1054 			 * necessarily aligned on an 8-byte boundary.
1055 			 * Need to keep the first 4 bytes read,
1056 			 * and do a separate read of the high 4 bytes.
1057 			 */
1058 
1059 			} else if ((PCI_BASE_TYPE_ALL & prg2.data) &&
1060 			    (prg2.offset != PCI_CONF_ROM)) {
1061 
1062 				uint32_t low_bytes =
1063 				    (uint32_t)(prg2.data & ~PCI_BASE_TYPE_ALL);
1064 
1065 				/*
1066 				 * Don't try to read the next 4 bytes
1067 				 * past the end of BARs.
1068 				 */
1069 				if (prg2.offset >= PCI_CONF_BASE5) {
1070 					prg.status = PCITOOL_OUT_OF_RANGE;
1071 					rval = EIO;
1072 					goto done_reg;
1073 				}
1074 
1075 				/*
1076 				 * Access device.
1077 				 * prg2.status is modified on error.
1078 				 */
1079 				prg2.offset += 4;
1080 				rval = pcitool_cfg_access(dip, &prg2, B_FALSE);
1081 				if (rval != SUCCESS) {
1082 					prg.status = prg2.status;
1083 					goto done_reg;
1084 				}
1085 
1086 				if (prg2.data == 0xffffffff) {
1087 					prg.status = PCITOOL_INVALID_ADDRESS;
1088 					prg.status = EFAULT;
1089 					goto done_reg;
1090 				}
1091 
1092 				prg2.data = (prg2.data << 32) + low_bytes;
1093 				if (pcitool_debug)
1094 					prom_printf(
1095 					    "64 bit mem space.  "
1096 					    "64-bit bar is 0x%" PRIx64 "\n",
1097 					    prg2.data);
1098 
1099 			/* Mem32 space, including ROM */
1100 			} else {
1101 
1102 				if (prg2.offset == PCI_CONF_ROM) {
1103 					if (pcitool_debug)
1104 						prom_printf(
1105 						    "Additional ROM "
1106 						    "checking\n");
1107 					/* Can't write to ROM */
1108 					if (write_flag) {
1109 						prg.status = PCITOOL_ROM_WRITE;
1110 						rval = EIO;
1111 						goto done_reg;
1112 
1113 					/* ROM disabled for reading */
1114 					} else if (!(prg2.data & 0x00000001)) {
1115 						prg.status =
1116 						    PCITOOL_ROM_DISABLED;
1117 						rval = EIO;
1118 						goto done_reg;
1119 					}
1120 				}
1121 
1122 				if (pcitool_debug)
1123 					prom_printf("32 bit mem space\n");
1124 			}
1125 
1126 			/* Common code for all IO/MEM range spaces. */
1127 
1128 			base_addr = prg2.data;
1129 			if (pcitool_debug)
1130 				prom_printf(
1131 				    "addr portion of bar is 0x%" PRIx64 ", "
1132 				    "base=0x%" PRIx64 ", "
1133 				    "offset:0x%" PRIx64 "\n",
1134 				    prg2.data, base_addr, prg.offset);
1135 			/*
1136 			 * Use offset provided by caller to index into
1137 			 * desired space, then access.
1138 			 * Note that prg.status is modified on error.
1139 			 */
1140 			prg.phys_addr = base_addr + prg.offset;
1141 
1142 			virt_addr = pcitool_map(prg.phys_addr, size,
1143 			    &num_virt_pages);
1144 			if (virt_addr == NULL) {
1145 				prg.status = PCITOOL_IO_ERROR;
1146 				rval = EIO;
1147 				goto done_reg;
1148 			}
1149 
1150 			rval = pcitool_mem_access(dip, &prg, virt_addr,
1151 			    write_flag);
1152 			pcitool_unmap(virt_addr, num_virt_pages);
1153 		}
1154 done_reg:
1155 		prg.drvr_version = PCITOOL_VERSION;
1156 		if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t), mode) !=
1157 		    DDI_SUCCESS) {
1158 			if (pcitool_debug)
1159 				prom_printf("Error returning arguments.\n");
1160 			rval = EFAULT;
1161 		}
1162 		break;
1163 	default:
1164 		rval = ENOTTY;
1165 		break;
1166 	}
1167 	return (rval);
1168 }
1169