xref: /illumos-gate/usr/src/uts/i86pc/io/pci/pci_tools.c (revision c279fc79)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/mkdev.h>
28 #include <sys/stat.h>
29 #include <sys/sunddi.h>
30 #include <vm/seg_kmem.h>
31 #include <sys/machparam.h>
32 #include <sys/sunndi.h>
33 #include <sys/ontrap.h>
34 #include <sys/psm.h>
35 #include <sys/pcie.h>
36 #include <sys/hotplug/pci/pcihp.h>
37 #include <sys/pci_cfgspace.h>
38 #include <sys/pci_tools.h>
39 #include <io/pci/pci_tools_ext.h>
40 #include <sys/apic.h>
41 #include <io/pci/pci_var.h>
42 #include <sys/promif.h>
43 #include <sys/x86_archext.h>
44 #include <sys/cpuvar.h>
45 
46 #ifdef __xpv
47 #include <sys/hypervisor.h>
48 #endif
49 
50 #define	PCIEX_BDF_OFFSET_DELTA	4
51 #define	PCIEX_REG_FUNC_SHIFT	(PCI_REG_FUNC_SHIFT + PCIEX_BDF_OFFSET_DELTA)
52 #define	PCIEX_REG_DEV_SHIFT	(PCI_REG_DEV_SHIFT + PCIEX_BDF_OFFSET_DELTA)
53 #define	PCIEX_REG_BUS_SHIFT	(PCI_REG_BUS_SHIFT + PCIEX_BDF_OFFSET_DELTA)
54 
55 #define	SUCCESS	0
56 
57 int pcitool_debug = 0;
58 
59 /*
60  * Offsets of BARS in config space.  First entry of 0 means config space.
61  * Entries here correlate to pcitool_bars_t enumerated type.
62  */
63 static uint8_t pci_bars[] = {
64 	0x0,
65 	PCI_CONF_BASE0,
66 	PCI_CONF_BASE1,
67 	PCI_CONF_BASE2,
68 	PCI_CONF_BASE3,
69 	PCI_CONF_BASE4,
70 	PCI_CONF_BASE5,
71 	PCI_CONF_ROM
72 };
73 
74 /* Max offset allowed into config space for a particular device. */
75 static uint64_t max_cfg_size = PCI_CONF_HDR_SIZE;
76 
77 static uint64_t pcitool_swap_endian(uint64_t data, int size);
78 static int pcitool_pciex_cfg_access(dev_info_t *dip, pcitool_reg_t *prg,
79     boolean_t write_flag);
80 static int pcitool_cfg_access(dev_info_t *dip, pcitool_reg_t *prg,
81     boolean_t write_flag);
82 static int pcitool_io_access(dev_info_t *dip, pcitool_reg_t *prg,
83     boolean_t write_flag);
84 static int pcitool_mem_access(dev_info_t *dip, pcitool_reg_t *prg,
85     uint64_t virt_addr, boolean_t write_flag);
86 static uint64_t pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages);
87 static void pcitool_unmap(uint64_t virt_addr, size_t num_pages);
88 
89 /* Extern declarations */
90 extern int	(*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
91 		    psm_intr_op_t, int *);
92 
93 int
94 pcitool_init(dev_info_t *dip, boolean_t is_pciex)
95 {
96 	int instance = ddi_get_instance(dip);
97 
98 	/* Create pcitool nodes for register access and interrupt routing. */
99 
100 	if (ddi_create_minor_node(dip, PCI_MINOR_REG, S_IFCHR,
101 	    PCIHP_AP_MINOR_NUM(instance, PCI_TOOL_REG_MINOR_NUM),
102 	    DDI_NT_REGACC, 0) != DDI_SUCCESS) {
103 		return (DDI_FAILURE);
104 	}
105 
106 	if (ddi_create_minor_node(dip, PCI_MINOR_INTR, S_IFCHR,
107 	    PCIHP_AP_MINOR_NUM(instance, PCI_TOOL_INTR_MINOR_NUM),
108 	    DDI_NT_INTRCTL, 0) != DDI_SUCCESS) {
109 		ddi_remove_minor_node(dip, PCI_MINOR_REG);
110 		return (DDI_FAILURE);
111 	}
112 
113 	if (is_pciex)
114 		max_cfg_size = PCIE_CONF_HDR_SIZE;
115 
116 	return (DDI_SUCCESS);
117 }
118 
119 void
120 pcitool_uninit(dev_info_t *dip)
121 {
122 	ddi_remove_minor_node(dip, PCI_MINOR_INTR);
123 	ddi_remove_minor_node(dip, PCI_MINOR_REG);
124 }
125 
126 /*ARGSUSED*/
127 static int
128 pcitool_set_intr(dev_info_t *dip, void *arg, int mode)
129 {
130 	ddi_intr_handle_impl_t info_hdl;
131 	pcitool_intr_set_t iset;
132 	uint32_t old_cpu;
133 	int ret, result;
134 	size_t copyinout_size;
135 	int rval = SUCCESS;
136 
137 	/* Version 1 of pcitool_intr_set_t doesn't have flags. */
138 	copyinout_size = (size_t)&iset.flags - (size_t)&iset;
139 
140 	if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
141 		return (EFAULT);
142 
143 	switch (iset.user_version) {
144 	case PCITOOL_V1:
145 		break;
146 
147 	case PCITOOL_V2:
148 		copyinout_size = sizeof (pcitool_intr_set_t);
149 		if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
150 			return (EFAULT);
151 		break;
152 
153 	default:
154 		iset.status = PCITOOL_OUT_OF_RANGE;
155 		rval = ENOTSUP;
156 		goto done_set_intr;
157 	}
158 
159 	if (iset.ino > APIC_MAX_VECTOR) {
160 		rval = EINVAL;
161 		iset.status = PCITOOL_INVALID_INO;
162 		goto done_set_intr;
163 	}
164 
165 	iset.status = PCITOOL_SUCCESS;
166 
167 	if ((old_cpu = pci_get_cpu_from_vecirq(iset.ino, IS_VEC)) == -1) {
168 		iset.status = PCITOOL_IO_ERROR;
169 		rval = EINVAL;
170 		goto done_set_intr;
171 	}
172 
173 
174 	old_cpu &= ~PSMGI_CPU_USER_BOUND;
175 
176 	/*
177 	 * For this locally-declared and used handle, ih_private will contain a
178 	 * CPU value, not an ihdl_plat_t as used for global interrupt handling.
179 	 */
180 	info_hdl.ih_vector = iset.ino;
181 	info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id;
182 	if (pcitool_debug)
183 		prom_printf("user version:%d, flags:0x%x\n",
184 		    iset.user_version, iset.flags);
185 
186 	result = ENOTSUP;
187 	if ((iset.user_version >= PCITOOL_V2) &&
188 	    (iset.flags & PCITOOL_INTR_SET_FLAG_GROUP)) {
189 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU,
190 		    &result);
191 	} else {
192 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_SET_CPU,
193 		    &result);
194 	}
195 
196 	if (ret != PSM_SUCCESS) {
197 		switch (result) {
198 		case EIO:		/* Error making the change */
199 			rval = EIO;
200 			iset.status = PCITOOL_IO_ERROR;
201 			break;
202 		case ENXIO:		/* Couldn't convert vector to irq */
203 			rval = EINVAL;
204 			iset.status = PCITOOL_INVALID_INO;
205 			break;
206 		case EINVAL:		/* CPU out of range */
207 			rval = EINVAL;
208 			iset.status = PCITOOL_INVALID_CPUID;
209 			break;
210 		case ENOTSUP:		/* Requested PSM intr ops missing */
211 			rval = ENOTSUP;
212 			iset.status = PCITOOL_IO_ERROR;
213 			break;
214 		}
215 	}
216 
217 	/* Return original CPU. */
218 	iset.cpu_id = old_cpu;
219 
220 done_set_intr:
221 	iset.drvr_version = PCITOOL_VERSION;
222 	if (ddi_copyout(&iset, arg, copyinout_size, mode) != DDI_SUCCESS)
223 		rval = EFAULT;
224 	return (rval);
225 }
226 
227 
228 /* It is assumed that dip != NULL */
229 static void
230 pcitool_get_intr_dev_info(dev_info_t *dip, pcitool_intr_dev_t *devs)
231 {
232 	(void) strncpy(devs->driver_name,
233 	    ddi_driver_name(dip), MAXMODCONFNAME-2);
234 	devs->driver_name[MAXMODCONFNAME-1] = '\0';
235 	(void) ddi_pathname(dip, devs->path);
236 	devs->dev_inst = ddi_get_instance(dip);
237 }
238 
239 
240 /*ARGSUSED*/
241 static int
242 pcitool_get_intr(dev_info_t *dip, void *arg, int mode)
243 {
244 	/* Array part isn't used here, but oh well... */
245 	pcitool_intr_get_t partial_iget;
246 	pcitool_intr_get_t *iget = &partial_iget;
247 	size_t	iget_kmem_alloc_size = 0;
248 	uint8_t num_devs_ret;
249 	int copyout_rval;
250 	int rval = SUCCESS;
251 	int circ;
252 	int i;
253 
254 	ddi_intr_handle_impl_t info_hdl;
255 	apic_get_intr_t intr_info;
256 
257 	/* Read in just the header part, no array section. */
258 	if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) !=
259 	    DDI_SUCCESS)
260 		return (EFAULT);
261 
262 	/* Validate argument. */
263 	if (partial_iget.ino > APIC_MAX_VECTOR) {
264 		partial_iget.status = PCITOOL_INVALID_INO;
265 		partial_iget.num_devs_ret = 0;
266 		rval = EINVAL;
267 		goto done_get_intr;
268 	}
269 
270 	num_devs_ret = partial_iget.num_devs_ret;
271 	intr_info.avgi_dip_list = NULL;
272 	intr_info.avgi_req_flags =
273 	    PSMGI_REQ_CPUID | PSMGI_REQ_NUM_DEVS | PSMGI_INTRBY_VEC;
274 	/*
275 	 * For this locally-declared and used handle, ih_private will contain a
276 	 * pointer to apic_get_intr_t, not an ihdl_plat_t as used for
277 	 * global interrupt handling.
278 	 */
279 	info_hdl.ih_private = &intr_info;
280 	info_hdl.ih_vector = partial_iget.ino;
281 
282 	/* Caller wants device information returned. */
283 	if (num_devs_ret > 0) {
284 
285 		intr_info.avgi_req_flags |= PSMGI_REQ_GET_DEVS;
286 
287 		/*
288 		 * Allocate room.
289 		 * If num_devs_ret == 0 iget remains pointing to partial_iget.
290 		 */
291 		iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret);
292 		iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP);
293 
294 		/* Read in whole structure to verify there's room. */
295 		if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) !=
296 		    SUCCESS) {
297 
298 			/* Be consistent and just return EFAULT here. */
299 			kmem_free(iget, iget_kmem_alloc_size);
300 
301 			return (EFAULT);
302 		}
303 	}
304 
305 	bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret));
306 	iget->ino = info_hdl.ih_vector;
307 
308 	/*
309 	 * Lock device tree branch from the pci root nexus on down if info will
310 	 * be extracted from dips returned from the tree.
311 	 */
312 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
313 		ndi_devi_enter(dip, &circ);
314 	}
315 
316 	/* Call psm_intr_ops(PSM_INTR_OP_GET_INTR) to get information. */
317 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
318 	    PSM_INTR_OP_GET_INTR, NULL)) != PSM_SUCCESS) {
319 		iget->status = PCITOOL_IO_ERROR;
320 		iget->num_devs_ret = 0;
321 		rval = EINVAL;
322 		goto done_get_intr;
323 	}
324 
325 	/*
326 	 * Fill in the pcitool_intr_get_t to be returned,
327 	 * with the CPU, num_devs_ret and num_devs.
328 	 */
329 	iget->cpu_id = intr_info.avgi_cpu_id & ~PSMGI_CPU_USER_BOUND;
330 
331 	/* Number of devices returned by apic. */
332 	iget->num_devs = intr_info.avgi_num_devs;
333 
334 	/* Device info was returned. */
335 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
336 
337 		/*
338 		 * num devs returned is num devs ret by apic,
339 		 * space permitting.
340 		 */
341 		iget->num_devs_ret = min(num_devs_ret, intr_info.avgi_num_devs);
342 
343 		/*
344 		 * Loop thru list of dips and extract driver, name and instance.
345 		 * Fill in the pcitool_intr_dev_t's with this info.
346 		 */
347 		for (i = 0; i < iget->num_devs_ret; i++)
348 			pcitool_get_intr_dev_info(intr_info.avgi_dip_list[i],
349 			    &iget->dev[i]);
350 
351 		/* Free kmem_alloc'ed memory of the apic_get_intr_t */
352 		kmem_free(intr_info.avgi_dip_list,
353 		    intr_info.avgi_num_devs * sizeof (dev_info_t *));
354 	}
355 
356 done_get_intr:
357 
358 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
359 		ndi_devi_exit(dip, circ);
360 	}
361 
362 	iget->drvr_version = PCITOOL_VERSION;
363 	copyout_rval = ddi_copyout(iget, arg,
364 	    PCITOOL_IGET_SIZE(num_devs_ret), mode);
365 
366 	if (iget_kmem_alloc_size > 0)
367 		kmem_free(iget, iget_kmem_alloc_size);
368 
369 	if (copyout_rval != DDI_SUCCESS)
370 		rval = EFAULT;
371 
372 	return (rval);
373 }
374 
375 /*ARGSUSED*/
376 static int
377 pcitool_intr_info(dev_info_t *dip, void *arg, int mode)
378 {
379 	pcitool_intr_info_t intr_info;
380 	ddi_intr_handle_impl_t info_hdl;
381 	int rval = SUCCESS;
382 
383 	/* If we need user_version, and to ret same user version as passed in */
384 	if (ddi_copyin(arg, &intr_info, sizeof (pcitool_intr_info_t), mode) !=
385 	    DDI_SUCCESS) {
386 		if (pcitool_debug)
387 			prom_printf("Error reading arguments\n");
388 		return (EFAULT);
389 	}
390 
391 	/* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */
392 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
393 	    PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
394 		intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UPPC;
395 		intr_info.ctlr_version = 0;
396 
397 	} else {
398 		intr_info.ctlr_version = (uint32_t)info_hdl.ih_ver;
399 		if (strcmp((char *)info_hdl.ih_private,
400 		    APIC_PCPLUSMP_NAME) == 0)
401 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_PCPLUSMP;
402 		else
403 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UNKNOWN;
404 	}
405 
406 	intr_info.num_intr = APIC_MAX_VECTOR;
407 	intr_info.drvr_version = PCITOOL_VERSION;
408 	if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) !=
409 	    DDI_SUCCESS) {
410 		if (pcitool_debug)
411 			prom_printf("Error returning arguments.\n");
412 		rval = EFAULT;
413 	}
414 
415 	return (rval);
416 }
417 
418 
419 
420 /*
421  * Main function for handling interrupt CPU binding requests and queries.
422  * Need to implement later
423  */
424 /*ARGSUSED*/
425 int
426 pcitool_intr_admn(dev_info_t *dip, void *arg, int cmd, int mode)
427 {
428 	int rval;
429 
430 	switch (cmd) {
431 
432 	/* Associate a new CPU with a given vector */
433 	case PCITOOL_DEVICE_SET_INTR:
434 		rval = pcitool_set_intr(dip, arg, mode);
435 		break;
436 
437 	case PCITOOL_DEVICE_GET_INTR:
438 		rval = pcitool_get_intr(dip, arg, mode);
439 		break;
440 
441 	case PCITOOL_SYSTEM_INTR_INFO:
442 		rval = pcitool_intr_info(dip, arg, mode);
443 		break;
444 
445 	default:
446 		rval = ENOTSUP;
447 	}
448 
449 	return (rval);
450 }
451 
452 
453 /*
454  * A note about ontrap handling:
455  *
456  * X86 systems on which this module was tested return FFs instead of bus errors
457  * when accessing devices with invalid addresses.  Ontrap handling, which
458  * gracefully handles kernel bus errors, is installed anyway, in case future
459  * X86 platforms require it.
460  */
461 
462 /*
463  * Perform register accesses on the nexus device itself.
464  * No explicit PCI nexus device for X86, so not applicable.
465  */
466 
467 /*ARGSUSED*/
468 int
469 pcitool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
470 {
471 	return (ENOTSUP);
472 }
473 
474 /* Swap endianness. */
475 static uint64_t
476 pcitool_swap_endian(uint64_t data, int size)
477 {
478 	typedef union {
479 		uint64_t data64;
480 		uint8_t data8[8];
481 	} data_split_t;
482 
483 	data_split_t orig_data;
484 	data_split_t returned_data;
485 	int i;
486 
487 	orig_data.data64 = data;
488 	returned_data.data64 = 0;
489 
490 	for (i = 0; i < size; i++) {
491 		returned_data.data8[i] = orig_data.data8[size - 1 - i];
492 	}
493 
494 	return (returned_data.data64);
495 }
496 
497 
498 /*
499  * Access device.  prg is modified.
500  *
501  * Extended config space is available only through memory-mapped access.
502  * Standard config space on pci express devices is available either way,
503  * so do it memory-mapped here too, for simplicity, if allowed by MCFG.
504  * If anything fails, return EINVAL so caller can try I/O access.
505  */
506 /*ARGSUSED*/
507 static int
508 pcitool_pciex_cfg_access(dev_info_t *dip, pcitool_reg_t *prg,
509     boolean_t write_flag)
510 {
511 	int rval = SUCCESS;
512 	uint64_t virt_addr;
513 	size_t	num_virt_pages;
514 	int first_bus, last_bus;
515 	int64_t *ecfginfo;
516 	uint_t nelem;
517 
518 	prg->status = PCITOOL_SUCCESS;
519 
520 	if (ddi_prop_lookup_int64_array(DDI_DEV_T_ANY, dip, 0,
521 	    "ecfg", &ecfginfo, &nelem) == DDI_PROP_SUCCESS) {
522 
523 		/*
524 		 * We must have a four-element property; base addr [0] must
525 		 * be nonzero.  Also, segment [1] must be 0 for now; we don't
526 		 * handle nonzero segments (or create a property containing
527 		 * them)
528 		 */
529 		if ((nelem != 4) || (ecfginfo[0] == 0) || (ecfginfo[1] != 0)) {
530 			ddi_prop_free(ecfginfo);
531 			return (EINVAL);
532 		}
533 
534 		prg->phys_addr = ecfginfo[0];
535 		first_bus = ecfginfo[2];
536 		last_bus = ecfginfo[3];
537 
538 		ddi_prop_free(ecfginfo);
539 
540 		if (prg->bus_no < first_bus || prg->bus_no > last_bus)
541 			return (EINVAL);
542 	} else {
543 		return (EINVAL);
544 	}
545 
546 	prg->phys_addr += prg->offset +
547 	    ((prg->bus_no << PCIEX_REG_BUS_SHIFT) |
548 	    (prg->dev_no << PCIEX_REG_DEV_SHIFT) |
549 	    (prg->func_no << PCIEX_REG_FUNC_SHIFT));
550 
551 	virt_addr = pcitool_map(prg->phys_addr,
552 	    PCITOOL_ACC_ATTR_SIZE(prg->acc_attr), &num_virt_pages);
553 
554 	if (virt_addr == NULL)
555 		return (EINVAL);
556 
557 	rval = pcitool_mem_access(dip, prg, virt_addr, write_flag);
558 	pcitool_unmap(virt_addr, num_virt_pages);
559 	return (rval);
560 }
561 
562 /* Access device.  prg is modified. */
563 /*ARGSUSED*/
564 static int
565 pcitool_cfg_access(dev_info_t *dip, pcitool_reg_t *prg, boolean_t write_flag)
566 {
567 	int size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
568 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
569 	int rval = SUCCESS;
570 	uint64_t local_data;
571 
572 	/*
573 	 * NOTE: there is no way to verify whether or not the address is
574 	 * valid other than that it is within the maximum offset.  The
575 	 * put functions return void and the get functions return ff on
576 	 * error.
577 	 */
578 
579 	if (prg->offset + size - 1 > 0xFF) {
580 		prg->status = PCITOOL_INVALID_ADDRESS;
581 		return (ENOTSUP);
582 	}
583 
584 	prg->status = PCITOOL_SUCCESS;
585 
586 	if (write_flag) {
587 
588 		if (big_endian) {
589 			local_data = pcitool_swap_endian(prg->data, size);
590 		} else {
591 			local_data = prg->data;
592 		}
593 
594 		switch (size) {
595 		case 1:
596 			(*pci_putb_func)(prg->bus_no, prg->dev_no,
597 			    prg->func_no, prg->offset, local_data);
598 			break;
599 		case 2:
600 			(*pci_putw_func)(prg->bus_no, prg->dev_no,
601 			    prg->func_no, prg->offset, local_data);
602 			break;
603 		case 4:
604 			(*pci_putl_func)(prg->bus_no, prg->dev_no,
605 			    prg->func_no, prg->offset, local_data);
606 			break;
607 		default:
608 			rval = ENOTSUP;
609 			prg->status = PCITOOL_INVALID_SIZE;
610 			break;
611 		}
612 	} else {
613 		switch (size) {
614 		case 1:
615 			local_data = (*pci_getb_func)(prg->bus_no, prg->dev_no,
616 			    prg->func_no, prg->offset);
617 			break;
618 		case 2:
619 			local_data = (*pci_getw_func)(prg->bus_no, prg->dev_no,
620 			    prg->func_no, prg->offset);
621 			break;
622 		case 4:
623 			local_data = (*pci_getl_func)(prg->bus_no, prg->dev_no,
624 			    prg->func_no, prg->offset);
625 			break;
626 		default:
627 			rval = ENOTSUP;
628 			prg->status = PCITOOL_INVALID_SIZE;
629 			break;
630 		}
631 
632 		if (rval == SUCCESS) {
633 			if (big_endian) {
634 				prg->data =
635 				    pcitool_swap_endian(local_data, size);
636 			} else {
637 				prg->data = local_data;
638 			}
639 		}
640 	}
641 	prg->phys_addr = 0;	/* Config space is not memory mapped on X86. */
642 	return (rval);
643 }
644 
645 
646 /*ARGSUSED*/
647 static int
648 pcitool_io_access(dev_info_t *dip, pcitool_reg_t *prg, boolean_t write_flag)
649 {
650 	int port = (int)prg->phys_addr;
651 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
652 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
653 	int rval = SUCCESS;
654 	on_trap_data_t otd;
655 	uint64_t local_data;
656 
657 
658 	/*
659 	 * on_trap works like setjmp.
660 	 *
661 	 * A non-zero return here means on_trap has returned from an error.
662 	 *
663 	 * A zero return here means that on_trap has just returned from setup.
664 	 */
665 	if (on_trap(&otd, OT_DATA_ACCESS)) {
666 		no_trap();
667 		if (pcitool_debug)
668 			prom_printf(
669 			    "pcitool_io_access: on_trap caught an error...\n");
670 		prg->status = PCITOOL_INVALID_ADDRESS;
671 		return (EFAULT);
672 	}
673 
674 	if (write_flag) {
675 
676 		if (big_endian) {
677 			local_data = pcitool_swap_endian(prg->data, size);
678 		} else {
679 			local_data = prg->data;
680 		}
681 
682 		if (pcitool_debug)
683 			prom_printf("Writing %ld byte(s) to port 0x%x\n",
684 			    size, port);
685 
686 		switch (size) {
687 		case 1:
688 			outb(port, (uint8_t)local_data);
689 			break;
690 		case 2:
691 			outw(port, (uint16_t)local_data);
692 			break;
693 		case 4:
694 			outl(port, (uint32_t)local_data);
695 			break;
696 		default:
697 			rval = ENOTSUP;
698 			prg->status = PCITOOL_INVALID_SIZE;
699 			break;
700 		}
701 	} else {
702 		if (pcitool_debug)
703 			prom_printf("Reading %ld byte(s) from port 0x%x\n",
704 			    size, port);
705 
706 		switch (size) {
707 		case 1:
708 			local_data = inb(port);
709 			break;
710 		case 2:
711 			local_data = inw(port);
712 			break;
713 		case 4:
714 			local_data = inl(port);
715 			break;
716 		default:
717 			rval = ENOTSUP;
718 			prg->status = PCITOOL_INVALID_SIZE;
719 			break;
720 		}
721 
722 		if (rval == SUCCESS) {
723 			if (big_endian) {
724 				prg->data =
725 				    pcitool_swap_endian(local_data, size);
726 			} else {
727 				prg->data = local_data;
728 			}
729 		}
730 	}
731 
732 	no_trap();
733 	return (rval);
734 }
735 
736 /*ARGSUSED*/
737 static int
738 pcitool_mem_access(dev_info_t *dip, pcitool_reg_t *prg, uint64_t virt_addr,
739 	boolean_t write_flag)
740 {
741 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
742 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
743 	int rval = DDI_SUCCESS;
744 	on_trap_data_t otd;
745 	uint64_t local_data;
746 
747 	/*
748 	 * on_trap works like setjmp.
749 	 *
750 	 * A non-zero return here means on_trap has returned from an error.
751 	 *
752 	 * A zero return here means that on_trap has just returned from setup.
753 	 */
754 	if (on_trap(&otd, OT_DATA_ACCESS)) {
755 		no_trap();
756 		if (pcitool_debug)
757 			prom_printf(
758 			    "pcitool_mem_access: on_trap caught an error...\n");
759 		prg->status = PCITOOL_INVALID_ADDRESS;
760 		return (EFAULT);
761 	}
762 
763 	if (write_flag) {
764 
765 		if (big_endian) {
766 			local_data = pcitool_swap_endian(prg->data, size);
767 		} else {
768 			local_data = prg->data;
769 		}
770 
771 		switch (size) {
772 		case 1:
773 			*((uint8_t *)(uintptr_t)virt_addr) = local_data;
774 			break;
775 		case 2:
776 			*((uint16_t *)(uintptr_t)virt_addr) = local_data;
777 			break;
778 		case 4:
779 			*((uint32_t *)(uintptr_t)virt_addr) = local_data;
780 			break;
781 		case 8:
782 			*((uint64_t *)(uintptr_t)virt_addr) = local_data;
783 			break;
784 		default:
785 			rval = ENOTSUP;
786 			prg->status = PCITOOL_INVALID_SIZE;
787 			break;
788 		}
789 	} else {
790 		switch (size) {
791 		case 1:
792 			local_data = *((uint8_t *)(uintptr_t)virt_addr);
793 			break;
794 		case 2:
795 			local_data = *((uint16_t *)(uintptr_t)virt_addr);
796 			break;
797 		case 4:
798 			local_data = *((uint32_t *)(uintptr_t)virt_addr);
799 			break;
800 		case 8:
801 			local_data = *((uint64_t *)(uintptr_t)virt_addr);
802 			break;
803 		default:
804 			rval = ENOTSUP;
805 			prg->status = PCITOOL_INVALID_SIZE;
806 			break;
807 		}
808 
809 		if (rval == SUCCESS) {
810 			if (big_endian) {
811 				prg->data =
812 				    pcitool_swap_endian(local_data, size);
813 			} else {
814 				prg->data = local_data;
815 			}
816 		}
817 	}
818 
819 	no_trap();
820 	return (rval);
821 }
822 
823 /*
824  * Map up to 2 pages which contain the address we want to access.
825  *
826  * Mapping should span no more than 8 bytes.  With X86 it is possible for an
827  * 8 byte value to start on a 4 byte boundary, so it can cross a page boundary.
828  * We'll never have to map more than two pages.
829  */
830 
831 static uint64_t
832 pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages)
833 {
834 
835 	uint64_t page_base = phys_addr & ~MMU_PAGEOFFSET;
836 	uint64_t offset = phys_addr & MMU_PAGEOFFSET;
837 	void *virt_base;
838 	uint64_t returned_addr;
839 	pfn_t pfn;
840 
841 	if (pcitool_debug)
842 		prom_printf("pcitool_map: Called with PA:0x%p\n",
843 		    (void *)(uintptr_t)phys_addr);
844 
845 	*num_pages = 1;
846 
847 	/* Desired mapping would span more than two pages. */
848 	if ((offset + size) > (MMU_PAGESIZE * 2)) {
849 		if (pcitool_debug)
850 			prom_printf("boundary violation: "
851 			    "offset:0x%" PRIx64 ", size:%ld, pagesize:0x%lx\n",
852 			    offset, (uintptr_t)size, (uintptr_t)MMU_PAGESIZE);
853 		return (NULL);
854 
855 	} else if ((offset + size) > MMU_PAGESIZE) {
856 		(*num_pages)++;
857 	}
858 
859 	/* Get page(s) of virtual space. */
860 	virt_base = vmem_alloc(heap_arena, ptob(*num_pages), VM_NOSLEEP);
861 	if (virt_base == NULL) {
862 		if (pcitool_debug)
863 			prom_printf("Couldn't get virtual base address.\n");
864 		return (NULL);
865 	}
866 
867 	if (pcitool_debug)
868 		prom_printf("Got base virtual address:0x%p\n", virt_base);
869 
870 #ifdef __xpv
871 	/*
872 	 * We should only get here if we are dom0.
873 	 * We're using a real device so we need to translate the MA to a PFN.
874 	 */
875 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
876 	pfn = xen_assign_pfn(mmu_btop(page_base));
877 #else
878 	pfn = btop(page_base);
879 #endif
880 
881 	/* Now map the allocated virtual space to the physical address. */
882 	hat_devload(kas.a_hat, virt_base, mmu_ptob(*num_pages), pfn,
883 	    PROT_READ | PROT_WRITE | HAT_STRICTORDER,
884 	    HAT_LOAD_LOCK);
885 
886 	returned_addr = ((uintptr_t)(virt_base)) + offset;
887 
888 	if (pcitool_debug)
889 		prom_printf("pcitool_map: returning VA:0x%p\n",
890 		    (void *)(uintptr_t)returned_addr);
891 
892 	return (returned_addr);
893 }
894 
895 /* Unmap the mapped page(s). */
896 static void
897 pcitool_unmap(uint64_t virt_addr, size_t num_pages)
898 {
899 	void *base_virt_addr = (void *)(uintptr_t)(virt_addr & ~MMU_PAGEOFFSET);
900 
901 	hat_unload(kas.a_hat, base_virt_addr, ptob(num_pages),
902 	    HAT_UNLOAD_UNLOCK);
903 	vmem_free(heap_arena, base_virt_addr, ptob(num_pages));
904 }
905 
906 
907 /* Perform register accesses on PCI leaf devices. */
908 int
909 pcitool_dev_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
910 {
911 	boolean_t	write_flag = B_FALSE;
912 	int		rval = 0;
913 	pcitool_reg_t	prg;
914 	uint8_t		size;
915 
916 	uint64_t	base_addr;
917 	uint64_t	virt_addr;
918 	size_t		num_virt_pages;
919 
920 	switch (cmd) {
921 	case (PCITOOL_DEVICE_SET_REG):
922 		write_flag = B_TRUE;
923 
924 	/*FALLTHRU*/
925 	case (PCITOOL_DEVICE_GET_REG):
926 		if (pcitool_debug)
927 			prom_printf("pci_dev_reg_ops set/get reg\n");
928 		if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t), mode) !=
929 		    DDI_SUCCESS) {
930 			if (pcitool_debug)
931 				prom_printf("Error reading arguments\n");
932 			return (EFAULT);
933 		}
934 
935 		if (prg.barnum >= (sizeof (pci_bars) / sizeof (pci_bars[0]))) {
936 			prg.status = PCITOOL_OUT_OF_RANGE;
937 			rval = EINVAL;
938 			goto done_reg;
939 		}
940 
941 		if (pcitool_debug)
942 			prom_printf("raw bus:0x%x, dev:0x%x, func:0x%x\n",
943 			    prg.bus_no, prg.dev_no, prg.func_no);
944 		/* Validate address arguments of bus / dev / func */
945 		if (((prg.bus_no &
946 		    (PCI_REG_BUS_M >> PCI_REG_BUS_SHIFT)) !=
947 		    prg.bus_no) ||
948 		    ((prg.dev_no &
949 		    (PCI_REG_DEV_M >> PCI_REG_DEV_SHIFT)) !=
950 		    prg.dev_no) ||
951 		    ((prg.func_no &
952 		    (PCI_REG_FUNC_M >> PCI_REG_FUNC_SHIFT)) !=
953 		    prg.func_no)) {
954 			prg.status = PCITOOL_INVALID_ADDRESS;
955 			rval = EINVAL;
956 			goto done_reg;
957 		}
958 
959 		size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
960 
961 		/* Proper config space desired. */
962 		if (prg.barnum == 0) {
963 
964 			if (pcitool_debug)
965 				prom_printf(
966 				    "config access: offset:0x%" PRIx64 ", "
967 				    "phys_addr:0x%" PRIx64 "\n",
968 				    prg.offset, prg.phys_addr);
969 
970 			if (prg.offset >= max_cfg_size) {
971 				prg.status = PCITOOL_OUT_OF_RANGE;
972 				rval = EINVAL;
973 				goto done_reg;
974 			}
975 
976 			/*
977 			 * Access device.  prg is modified.
978 			 * First, check for AMD northbridges for I/O access
979 			 * (This fix will move in future to pcitool user-land)
980 			 * Next, check for PCIe devices and do
981 			 * memory-mapped access
982 			 * Lastly, check for PCI devices and do I/O access
983 			 */
984 			if ((prg.bus_no == 0) &&
985 			    (prg.dev_no >= 0x18) &&
986 			    (prg.dev_no < (0x18 + ncpus)) &&
987 			    (cpuid_getvendor(CPU) == X86_VENDOR_AMD)) {
988 				rval = pcitool_cfg_access(dip, &prg,
989 				    write_flag);
990 			} else if (max_cfg_size == PCIE_CONF_HDR_SIZE) {
991 				rval = pcitool_pciex_cfg_access(dip, &prg,
992 				    write_flag);
993 				if (rval == EINVAL) {
994 					/* Not valid for MMIO; try IO */
995 					rval = pcitool_cfg_access(dip, &prg,
996 					    write_flag);
997 				}
998 			} else {
999 				rval = pcitool_cfg_access(dip, &prg,
1000 				    write_flag);
1001 			}
1002 
1003 			if (pcitool_debug)
1004 				prom_printf(
1005 				    "config access: data:0x%" PRIx64 "\n",
1006 				    prg.data);
1007 
1008 		/* IO/ MEM/ MEM64 space. */
1009 		} else {
1010 
1011 			pcitool_reg_t	prg2;
1012 			bcopy(&prg, &prg2, sizeof (pcitool_reg_t));
1013 
1014 			/*
1015 			 * Translate BAR number into offset of the BAR in
1016 			 * the device's config space.
1017 			 */
1018 			prg2.offset = pci_bars[prg2.barnum];
1019 			prg2.acc_attr =
1020 			    PCITOOL_ACC_ATTR_SIZE_4 | PCITOOL_ACC_ATTR_ENDN_LTL;
1021 
1022 			if (pcitool_debug)
1023 				prom_printf(
1024 				    "barnum:%d, bar_offset:0x%" PRIx64 "\n",
1025 				    prg2.barnum, prg2.offset);
1026 			/*
1027 			 * Get Bus Address Register (BAR) from config space.
1028 			 * prg2.offset is the offset into config space of the
1029 			 * BAR desired.  prg.status is modified on error.
1030 			 */
1031 			rval = pcitool_cfg_access(dip, &prg2, B_FALSE);
1032 			if (rval != SUCCESS) {
1033 				if (pcitool_debug)
1034 					prom_printf("BAR access failed\n");
1035 				prg.status = prg2.status;
1036 				goto done_reg;
1037 			}
1038 			/*
1039 			 * Reference proper PCI space based on the BAR.
1040 			 * If 64 bit MEM space, need to load other half of the
1041 			 * BAR first.
1042 			 */
1043 
1044 			if (pcitool_debug)
1045 				prom_printf("bar returned is 0x%" PRIx64 "\n",
1046 				    prg2.data);
1047 			if (!prg2.data) {
1048 				if (pcitool_debug)
1049 					prom_printf("BAR data == 0\n");
1050 				rval = EINVAL;
1051 				prg.status = PCITOOL_INVALID_ADDRESS;
1052 				goto done_reg;
1053 			}
1054 			if (prg2.data == 0xffffffff) {
1055 				if (pcitool_debug)
1056 					prom_printf("BAR data == -1\n");
1057 				rval = EINVAL;
1058 				prg.status = PCITOOL_INVALID_ADDRESS;
1059 				goto done_reg;
1060 			}
1061 
1062 			/*
1063 			 * BAR has bits saying this space is IO space, unless
1064 			 * this is the ROM address register.
1065 			 */
1066 			if (((PCI_BASE_SPACE_M & prg2.data) ==
1067 			    PCI_BASE_SPACE_IO) &&
1068 			    (prg2.offset != PCI_CONF_ROM)) {
1069 				if (pcitool_debug)
1070 					prom_printf("IO space\n");
1071 
1072 				prg2.data &= PCI_BASE_IO_ADDR_M;
1073 				prg.phys_addr = prg2.data + prg.offset;
1074 
1075 				rval = pcitool_io_access(dip, &prg, write_flag);
1076 				if ((rval != SUCCESS) && (pcitool_debug))
1077 					prom_printf("IO access failed\n");
1078 
1079 				goto done_reg;
1080 
1081 
1082 			/*
1083 			 * BAR has bits saying this space is 64 bit memory
1084 			 * space, unless this is the ROM address register.
1085 			 *
1086 			 * The 64 bit address stored in two BAR cells is not
1087 			 * necessarily aligned on an 8-byte boundary.
1088 			 * Need to keep the first 4 bytes read,
1089 			 * and do a separate read of the high 4 bytes.
1090 			 */
1091 
1092 			} else if ((PCI_BASE_TYPE_ALL & prg2.data) &&
1093 			    (prg2.offset != PCI_CONF_ROM)) {
1094 
1095 				uint32_t low_bytes =
1096 				    (uint32_t)(prg2.data & ~PCI_BASE_TYPE_ALL);
1097 
1098 				/*
1099 				 * Don't try to read the next 4 bytes
1100 				 * past the end of BARs.
1101 				 */
1102 				if (prg2.offset >= PCI_CONF_BASE5) {
1103 					prg.status = PCITOOL_OUT_OF_RANGE;
1104 					rval = EIO;
1105 					goto done_reg;
1106 				}
1107 
1108 				/*
1109 				 * Access device.
1110 				 * prg2.status is modified on error.
1111 				 */
1112 				prg2.offset += 4;
1113 				rval = pcitool_cfg_access(dip, &prg2, B_FALSE);
1114 				if (rval != SUCCESS) {
1115 					prg.status = prg2.status;
1116 					goto done_reg;
1117 				}
1118 
1119 				if (prg2.data == 0xffffffff) {
1120 					prg.status = PCITOOL_INVALID_ADDRESS;
1121 					prg.status = EFAULT;
1122 					goto done_reg;
1123 				}
1124 
1125 				prg2.data = (prg2.data << 32) + low_bytes;
1126 				if (pcitool_debug)
1127 					prom_printf(
1128 					    "64 bit mem space.  "
1129 					    "64-bit bar is 0x%" PRIx64 "\n",
1130 					    prg2.data);
1131 
1132 			/* Mem32 space, including ROM */
1133 			} else {
1134 
1135 				if (prg2.offset == PCI_CONF_ROM) {
1136 					if (pcitool_debug)
1137 						prom_printf(
1138 						    "Additional ROM "
1139 						    "checking\n");
1140 					/* Can't write to ROM */
1141 					if (write_flag) {
1142 						prg.status = PCITOOL_ROM_WRITE;
1143 						rval = EIO;
1144 						goto done_reg;
1145 
1146 					/* ROM disabled for reading */
1147 					} else if (!(prg2.data & 0x00000001)) {
1148 						prg.status =
1149 						    PCITOOL_ROM_DISABLED;
1150 						rval = EIO;
1151 						goto done_reg;
1152 					}
1153 				}
1154 
1155 				if (pcitool_debug)
1156 					prom_printf("32 bit mem space\n");
1157 			}
1158 
1159 			/* Common code for all IO/MEM range spaces. */
1160 
1161 			base_addr = prg2.data;
1162 			if (pcitool_debug)
1163 				prom_printf(
1164 				    "addr portion of bar is 0x%" PRIx64 ", "
1165 				    "base=0x%" PRIx64 ", "
1166 				    "offset:0x%" PRIx64 "\n",
1167 				    prg2.data, base_addr, prg.offset);
1168 			/*
1169 			 * Use offset provided by caller to index into
1170 			 * desired space, then access.
1171 			 * Note that prg.status is modified on error.
1172 			 */
1173 			prg.phys_addr = base_addr + prg.offset;
1174 
1175 			virt_addr = pcitool_map(prg.phys_addr, size,
1176 			    &num_virt_pages);
1177 			if (virt_addr == NULL) {
1178 				prg.status = PCITOOL_IO_ERROR;
1179 				rval = EIO;
1180 				goto done_reg;
1181 			}
1182 
1183 			rval = pcitool_mem_access(dip, &prg, virt_addr,
1184 			    write_flag);
1185 			pcitool_unmap(virt_addr, num_virt_pages);
1186 		}
1187 done_reg:
1188 		prg.drvr_version = PCITOOL_VERSION;
1189 		if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t), mode) !=
1190 		    DDI_SUCCESS) {
1191 			if (pcitool_debug)
1192 				prom_printf("Error returning arguments.\n");
1193 			rval = EFAULT;
1194 		}
1195 		break;
1196 	default:
1197 		rval = ENOTTY;
1198 		break;
1199 	}
1200 	return (rval);
1201 }
1202