xref: /illumos-gate/usr/src/uts/sun4u/io/px/px_lib4u.c (revision 4f60987d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/kmem.h>
28 #include <sys/conf.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/sunndi.h>
32 #include <sys/fm/protocol.h>
33 #include <sys/fm/util.h>
34 #include <sys/modctl.h>
35 #include <sys/disp.h>
36 #include <sys/stat.h>
37 #include <sys/ddi_impldefs.h>
38 #include <sys/vmem.h>
39 #include <sys/iommutsb.h>
40 #include <sys/cpuvar.h>
41 #include <sys/ivintr.h>
42 #include <sys/byteorder.h>
43 #include <sys/spl.h>
44 #include <px_obj.h>
45 #include <sys/pcie_pwr.h>
46 #include "px_tools_var.h"
47 #include <px_regs.h>
48 #include <px_csr.h>
49 #include <sys/machsystm.h>
50 #include "px_lib4u.h"
51 #include "px_err.h"
52 #include "oberon_regs.h"
53 #include <sys/hotplug/pci/pcie_hp.h>
54 
55 #pragma weak jbus_stst_order
56 
57 extern void jbus_stst_order();
58 
59 ulong_t px_mmu_dvma_end = 0xfffffffful;
60 uint_t px_ranges_phi_mask = 0xfffffffful;
61 uint64_t *px_oberon_ubc_scratch_regs;
62 uint64_t px_paddr_mask;
63 
64 static int px_goto_l23ready(px_t *px_p);
65 static int px_goto_l0(px_t *px_p);
66 static int px_pre_pwron_check(px_t *px_p);
67 static uint32_t px_identity_init(px_t *px_p);
68 static boolean_t px_cpr_callb(void *arg, int code);
69 static uint_t px_cb_intr(caddr_t arg);
70 
71 /*
72  * ACKNAK Latency Threshold Table.
73  * See Fire PRM 2.0 section 1.2.12.2, table 1-17.
74  */
75 int px_acknak_timer_table[LINK_MAX_PKT_ARR_SIZE][LINK_WIDTH_ARR_SIZE] = {
76 	{0xED,   0x49,  0x43,  0x30},
77 	{0x1A0,  0x76,  0x6B,  0x48},
78 	{0x22F,  0x9A,  0x56,  0x56},
79 	{0x42F,  0x11A, 0x96,  0x96},
80 	{0x82F,  0x21A, 0x116, 0x116},
81 	{0x102F, 0x41A, 0x216, 0x216}
82 };
83 
84 /*
85  * TxLink Replay Timer Latency Table
86  * See Fire PRM 2.0 sections 1.2.12.3, table 1-18.
87  */
88 int px_replay_timer_table[LINK_MAX_PKT_ARR_SIZE][LINK_WIDTH_ARR_SIZE] = {
89 	{0x379,  0x112, 0xFC,  0xB4},
90 	{0x618,  0x1BA, 0x192, 0x10E},
91 	{0x831,  0x242, 0x143, 0x143},
92 	{0xFB1,  0x422, 0x233, 0x233},
93 	{0x1EB0, 0x7E1, 0x412, 0x412},
94 	{0x3CB0, 0xF61, 0x7D2, 0x7D2}
95 };
96 /*
97  * px_lib_map_registers
98  *
99  * This function is called from the attach routine to map the registers
100  * accessed by this driver.
101  *
102  * used by: px_attach()
103  *
104  * return value: DDI_FAILURE on failure
105  */
106 int
107 px_lib_map_regs(pxu_t *pxu_p, dev_info_t *dip)
108 {
109 	ddi_device_acc_attr_t	attr;
110 	px_reg_bank_t		reg_bank = PX_REG_CSR;
111 
112 	DBG(DBG_ATTACH, dip, "px_lib_map_regs: pxu_p:0x%p, dip 0x%p\n",
113 	    pxu_p, dip);
114 
115 	attr.devacc_attr_version = DDI_DEVICE_ATTR_V0;
116 	attr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
117 	attr.devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
118 
119 	/*
120 	 * PCI CSR Base
121 	 */
122 	if (ddi_regs_map_setup(dip, reg_bank, &pxu_p->px_address[reg_bank],
123 	    0, 0, &attr, &pxu_p->px_ac[reg_bank]) != DDI_SUCCESS) {
124 		goto fail;
125 	}
126 
127 	reg_bank++;
128 
129 	/*
130 	 * XBUS CSR Base
131 	 */
132 	if (ddi_regs_map_setup(dip, reg_bank, &pxu_p->px_address[reg_bank],
133 	    0, 0, &attr, &pxu_p->px_ac[reg_bank]) != DDI_SUCCESS) {
134 		goto fail;
135 	}
136 
137 	pxu_p->px_address[reg_bank] -= FIRE_CONTROL_STATUS;
138 
139 done:
140 	for (; reg_bank >= PX_REG_CSR; reg_bank--) {
141 		DBG(DBG_ATTACH, dip, "reg_bank 0x%x address 0x%p\n",
142 		    reg_bank, pxu_p->px_address[reg_bank]);
143 	}
144 
145 	return (DDI_SUCCESS);
146 
147 fail:
148 	cmn_err(CE_WARN, "%s%d: unable to map reg entry %d\n",
149 	    ddi_driver_name(dip), ddi_get_instance(dip), reg_bank);
150 
151 	for (reg_bank--; reg_bank >= PX_REG_CSR; reg_bank--) {
152 		pxu_p->px_address[reg_bank] = NULL;
153 		ddi_regs_map_free(&pxu_p->px_ac[reg_bank]);
154 	}
155 
156 	return (DDI_FAILURE);
157 }
158 
159 /*
160  * px_lib_unmap_regs:
161  *
162  * This routine unmaps the registers mapped by map_px_registers.
163  *
164  * used by: px_detach(), and error conditions in px_attach()
165  *
166  * return value: none
167  */
168 void
169 px_lib_unmap_regs(pxu_t *pxu_p)
170 {
171 	int i;
172 
173 	for (i = 0; i < PX_REG_MAX; i++) {
174 		if (pxu_p->px_ac[i])
175 			ddi_regs_map_free(&pxu_p->px_ac[i]);
176 	}
177 }
178 
179 int
180 px_lib_dev_init(dev_info_t *dip, devhandle_t *dev_hdl)
181 {
182 
183 	caddr_t			xbc_csr_base, csr_base;
184 	px_dvma_range_prop_t	px_dvma_range;
185 	pxu_t			*pxu_p;
186 	uint8_t			chip_mask;
187 	px_t			*px_p = DIP_TO_STATE(dip);
188 	px_chip_type_t		chip_type = px_identity_init(px_p);
189 
190 	DBG(DBG_ATTACH, dip, "px_lib_dev_init: dip 0x%p", dip);
191 
192 	if (chip_type == PX_CHIP_UNIDENTIFIED) {
193 		cmn_err(CE_WARN, "%s%d: Unrecognized Hardware Version\n",
194 		    NAMEINST(dip));
195 		return (DDI_FAILURE);
196 	}
197 
198 	chip_mask = BITMASK(chip_type);
199 	px_paddr_mask = (chip_type == PX_CHIP_FIRE) ? MMU_FIRE_PADDR_MASK :
200 	    MMU_OBERON_PADDR_MASK;
201 
202 	/*
203 	 * Allocate platform specific structure and link it to
204 	 * the px state structure.
205 	 */
206 	pxu_p = kmem_zalloc(sizeof (pxu_t), KM_SLEEP);
207 	pxu_p->chip_type = chip_type;
208 	pxu_p->portid  = ddi_getprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
209 	    "portid", -1);
210 
211 	/* Map in the registers */
212 	if (px_lib_map_regs(pxu_p, dip) == DDI_FAILURE) {
213 		kmem_free(pxu_p, sizeof (pxu_t));
214 
215 		return (DDI_FAILURE);
216 	}
217 
218 	xbc_csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC];
219 	csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
220 
221 	pxu_p->tsb_cookie = iommu_tsb_alloc(pxu_p->portid);
222 	pxu_p->tsb_size = iommu_tsb_cookie_to_size(pxu_p->tsb_cookie);
223 	pxu_p->tsb_vaddr = iommu_tsb_cookie_to_va(pxu_p->tsb_cookie);
224 
225 	pxu_p->tsb_paddr = va_to_pa(pxu_p->tsb_vaddr);
226 
227 	/*
228 	 * Create "virtual-dma" property to support child devices
229 	 * needing to know DVMA range.
230 	 */
231 	px_dvma_range.dvma_base = (uint32_t)px_mmu_dvma_end + 1
232 	    - ((pxu_p->tsb_size >> 3) << MMU_PAGE_SHIFT);
233 	px_dvma_range.dvma_len = (uint32_t)
234 	    px_mmu_dvma_end - px_dvma_range.dvma_base + 1;
235 
236 	(void) ddi_prop_update_int_array(DDI_DEV_T_NONE, dip,
237 	    "virtual-dma", (int *)&px_dvma_range,
238 	    sizeof (px_dvma_range_prop_t) / sizeof (int));
239 	/*
240 	 * Initilize all fire hardware specific blocks.
241 	 */
242 	hvio_cb_init(xbc_csr_base, pxu_p);
243 	hvio_ib_init(csr_base, pxu_p);
244 	hvio_pec_init(csr_base, pxu_p);
245 	hvio_mmu_init(csr_base, pxu_p);
246 
247 	px_p->px_plat_p = (void *)pxu_p;
248 
249 	/*
250 	 * Initialize all the interrupt handlers
251 	 */
252 	switch (PX_CHIP_TYPE(pxu_p)) {
253 	case PX_CHIP_OBERON:
254 		/*
255 		 * Oberon hotplug uses SPARE3 field in ILU Error Log Enable
256 		 * register to indicate the status of leaf reset,
257 		 * we need to preserve the value of this bit, and keep it in
258 		 * px_ilu_log_mask to reflect the state of the bit
259 		 */
260 		if (CSR_BR(csr_base, ILU_ERROR_LOG_ENABLE, SPARE3))
261 			px_ilu_log_mask |= (1ull <<
262 			    ILU_ERROR_LOG_ENABLE_SPARE3);
263 		else
264 			px_ilu_log_mask &= ~(1ull <<
265 			    ILU_ERROR_LOG_ENABLE_SPARE3);
266 
267 		px_err_reg_setup_pcie(chip_mask, csr_base, PX_ERR_ENABLE);
268 		break;
269 
270 	case PX_CHIP_FIRE:
271 		px_err_reg_setup_pcie(chip_mask, csr_base, PX_ERR_ENABLE);
272 		break;
273 
274 	default:
275 		cmn_err(CE_WARN, "%s%d: PX primary bus Unknown\n",
276 		    ddi_driver_name(dip), ddi_get_instance(dip));
277 		return (DDI_FAILURE);
278 	}
279 
280 	/* Initilize device handle */
281 	*dev_hdl = (devhandle_t)csr_base;
282 
283 	DBG(DBG_ATTACH, dip, "px_lib_dev_init: dev_hdl 0x%llx\n", *dev_hdl);
284 
285 	/* Sun4u always support fixed interrupt */
286 	px_p->px_supp_intr_types |= DDI_INTR_TYPE_FIXED;
287 
288 	return (DDI_SUCCESS);
289 }
290 
291 int
292 px_lib_dev_fini(dev_info_t *dip)
293 {
294 	caddr_t			csr_base;
295 	uint8_t			chip_mask;
296 	px_t			*px_p = DIP_TO_STATE(dip);
297 	pxu_t			*pxu_p = (pxu_t *)px_p->px_plat_p;
298 
299 	DBG(DBG_DETACH, dip, "px_lib_dev_fini: dip 0x%p\n", dip);
300 
301 	/*
302 	 * Deinitialize all the interrupt handlers
303 	 */
304 	switch (PX_CHIP_TYPE(pxu_p)) {
305 	case PX_CHIP_OBERON:
306 	case PX_CHIP_FIRE:
307 		chip_mask = BITMASK(PX_CHIP_TYPE(pxu_p));
308 		csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
309 		px_err_reg_setup_pcie(chip_mask, csr_base, PX_ERR_DISABLE);
310 		break;
311 
312 	default:
313 		cmn_err(CE_WARN, "%s%d: PX primary bus Unknown\n",
314 		    ddi_driver_name(dip), ddi_get_instance(dip));
315 		return (DDI_FAILURE);
316 	}
317 
318 	iommu_tsb_free(pxu_p->tsb_cookie);
319 
320 	px_lib_unmap_regs((pxu_t *)px_p->px_plat_p);
321 	kmem_free(px_p->px_plat_p, sizeof (pxu_t));
322 	px_p->px_plat_p = NULL;
323 	(void) ddi_prop_remove(DDI_DEV_T_NONE, dip, "virtual-dma");
324 
325 	return (DDI_SUCCESS);
326 }
327 
328 /*ARGSUSED*/
329 int
330 px_lib_intr_devino_to_sysino(dev_info_t *dip, devino_t devino,
331     sysino_t *sysino)
332 {
333 	px_t	*px_p = DIP_TO_STATE(dip);
334 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
335 	uint64_t	ret;
336 
337 	DBG(DBG_LIB_INT, dip, "px_lib_intr_devino_to_sysino: dip 0x%p "
338 	    "devino 0x%x\n", dip, devino);
339 
340 	if ((ret = hvio_intr_devino_to_sysino(DIP_TO_HANDLE(dip),
341 	    pxu_p, devino, sysino)) != H_EOK) {
342 		DBG(DBG_LIB_INT, dip,
343 		    "hvio_intr_devino_to_sysino failed, ret 0x%lx\n", ret);
344 		return (DDI_FAILURE);
345 	}
346 
347 	DBG(DBG_LIB_INT, dip, "px_lib_intr_devino_to_sysino: sysino 0x%llx\n",
348 	    *sysino);
349 
350 	return (DDI_SUCCESS);
351 }
352 
353 /*ARGSUSED*/
354 int
355 px_lib_intr_getvalid(dev_info_t *dip, sysino_t sysino,
356     intr_valid_state_t *intr_valid_state)
357 {
358 	uint64_t	ret;
359 
360 	DBG(DBG_LIB_INT, dip, "px_lib_intr_getvalid: dip 0x%p sysino 0x%llx\n",
361 	    dip, sysino);
362 
363 	if ((ret = hvio_intr_getvalid(DIP_TO_HANDLE(dip),
364 	    sysino, intr_valid_state)) != H_EOK) {
365 		DBG(DBG_LIB_INT, dip, "hvio_intr_getvalid failed, ret 0x%lx\n",
366 		    ret);
367 		return (DDI_FAILURE);
368 	}
369 
370 	DBG(DBG_LIB_INT, dip, "px_lib_intr_getvalid: intr_valid_state 0x%x\n",
371 	    *intr_valid_state);
372 
373 	return (DDI_SUCCESS);
374 }
375 
376 /*ARGSUSED*/
377 int
378 px_lib_intr_setvalid(dev_info_t *dip, sysino_t sysino,
379     intr_valid_state_t intr_valid_state)
380 {
381 	uint64_t	ret;
382 
383 	DBG(DBG_LIB_INT, dip, "px_lib_intr_setvalid: dip 0x%p sysino 0x%llx "
384 	    "intr_valid_state 0x%x\n", dip, sysino, intr_valid_state);
385 
386 	if ((ret = hvio_intr_setvalid(DIP_TO_HANDLE(dip),
387 	    sysino, intr_valid_state)) != H_EOK) {
388 		DBG(DBG_LIB_INT, dip, "hvio_intr_setvalid failed, ret 0x%lx\n",
389 		    ret);
390 		return (DDI_FAILURE);
391 	}
392 
393 	return (DDI_SUCCESS);
394 }
395 
396 /*ARGSUSED*/
397 int
398 px_lib_intr_getstate(dev_info_t *dip, sysino_t sysino,
399     intr_state_t *intr_state)
400 {
401 	uint64_t	ret;
402 
403 	DBG(DBG_LIB_INT, dip, "px_lib_intr_getstate: dip 0x%p sysino 0x%llx\n",
404 	    dip, sysino);
405 
406 	if ((ret = hvio_intr_getstate(DIP_TO_HANDLE(dip),
407 	    sysino, intr_state)) != H_EOK) {
408 		DBG(DBG_LIB_INT, dip, "hvio_intr_getstate failed, ret 0x%lx\n",
409 		    ret);
410 		return (DDI_FAILURE);
411 	}
412 
413 	DBG(DBG_LIB_INT, dip, "px_lib_intr_getstate: intr_state 0x%x\n",
414 	    *intr_state);
415 
416 	return (DDI_SUCCESS);
417 }
418 
419 /*ARGSUSED*/
420 int
421 px_lib_intr_setstate(dev_info_t *dip, sysino_t sysino,
422     intr_state_t intr_state)
423 {
424 	uint64_t	ret;
425 
426 	DBG(DBG_LIB_INT, dip, "px_lib_intr_setstate: dip 0x%p sysino 0x%llx "
427 	    "intr_state 0x%x\n", dip, sysino, intr_state);
428 
429 	if ((ret = hvio_intr_setstate(DIP_TO_HANDLE(dip),
430 	    sysino, intr_state)) != H_EOK) {
431 		DBG(DBG_LIB_INT, dip, "hvio_intr_setstate failed, ret 0x%lx\n",
432 		    ret);
433 		return (DDI_FAILURE);
434 	}
435 
436 	return (DDI_SUCCESS);
437 }
438 
439 /*ARGSUSED*/
440 int
441 px_lib_intr_gettarget(dev_info_t *dip, sysino_t sysino, cpuid_t *cpuid)
442 {
443 	px_t		*px_p = DIP_TO_STATE(dip);
444 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
445 	uint64_t	ret;
446 
447 	DBG(DBG_LIB_INT, dip, "px_lib_intr_gettarget: dip 0x%p sysino 0x%llx\n",
448 	    dip, sysino);
449 
450 	if ((ret = hvio_intr_gettarget(DIP_TO_HANDLE(dip), pxu_p,
451 	    sysino, cpuid)) != H_EOK) {
452 		DBG(DBG_LIB_INT, dip, "hvio_intr_gettarget failed, ret 0x%lx\n",
453 		    ret);
454 		return (DDI_FAILURE);
455 	}
456 
457 	DBG(DBG_LIB_INT, dip, "px_lib_intr_gettarget: cpuid 0x%x\n", cpuid);
458 
459 	return (DDI_SUCCESS);
460 }
461 
462 /*ARGSUSED*/
463 int
464 px_lib_intr_settarget(dev_info_t *dip, sysino_t sysino, cpuid_t cpuid)
465 {
466 	px_t		*px_p = DIP_TO_STATE(dip);
467 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
468 	uint64_t	ret;
469 
470 	DBG(DBG_LIB_INT, dip, "px_lib_intr_settarget: dip 0x%p sysino 0x%llx "
471 	    "cpuid 0x%x\n", dip, sysino, cpuid);
472 
473 	if ((ret = hvio_intr_settarget(DIP_TO_HANDLE(dip), pxu_p,
474 	    sysino, cpuid)) != H_EOK) {
475 		DBG(DBG_LIB_INT, dip, "hvio_intr_settarget failed, ret 0x%lx\n",
476 		    ret);
477 		return (DDI_FAILURE);
478 	}
479 
480 	return (DDI_SUCCESS);
481 }
482 
483 /*ARGSUSED*/
484 int
485 px_lib_intr_reset(dev_info_t *dip)
486 {
487 	devino_t	ino;
488 	sysino_t	sysino;
489 
490 	DBG(DBG_LIB_INT, dip, "px_lib_intr_reset: dip 0x%p\n", dip);
491 
492 	/* Reset all Interrupts */
493 	for (ino = 0; ino < INTERRUPT_MAPPING_ENTRIES; ino++) {
494 		if (px_lib_intr_devino_to_sysino(dip, ino,
495 		    &sysino) != DDI_SUCCESS)
496 			return (BF_FATAL);
497 
498 		if (px_lib_intr_setstate(dip, sysino,
499 		    INTR_IDLE_STATE) != DDI_SUCCESS)
500 			return (BF_FATAL);
501 	}
502 
503 	return (BF_NONE);
504 }
505 
506 /*ARGSUSED*/
507 int
508 px_lib_iommu_map(dev_info_t *dip, tsbid_t tsbid, pages_t pages,
509     io_attributes_t attr, void *addr, size_t pfn_index, int flags)
510 {
511 	px_t		*px_p = DIP_TO_STATE(dip);
512 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
513 	uint64_t	ret;
514 
515 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_map: dip 0x%p tsbid 0x%llx "
516 	    "pages 0x%x attr 0x%llx addr 0x%p pfn_index 0x%llx flags 0x%x\n",
517 	    dip, tsbid, pages, attr, addr, pfn_index, flags);
518 
519 	if ((ret = hvio_iommu_map(px_p->px_dev_hdl, pxu_p, tsbid, pages,
520 	    attr, addr, pfn_index, flags)) != H_EOK) {
521 		DBG(DBG_LIB_DMA, dip,
522 		    "px_lib_iommu_map failed, ret 0x%lx\n", ret);
523 		return (DDI_FAILURE);
524 	}
525 
526 	return (DDI_SUCCESS);
527 }
528 
529 /*ARGSUSED*/
530 int
531 px_lib_iommu_demap(dev_info_t *dip, tsbid_t tsbid, pages_t pages)
532 {
533 	px_t		*px_p = DIP_TO_STATE(dip);
534 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
535 	uint64_t	ret;
536 
537 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_demap: dip 0x%p tsbid 0x%llx "
538 	    "pages 0x%x\n", dip, tsbid, pages);
539 
540 	if ((ret = hvio_iommu_demap(px_p->px_dev_hdl, pxu_p, tsbid, pages))
541 	    != H_EOK) {
542 		DBG(DBG_LIB_DMA, dip,
543 		    "px_lib_iommu_demap failed, ret 0x%lx\n", ret);
544 
545 		return (DDI_FAILURE);
546 	}
547 
548 	return (DDI_SUCCESS);
549 }
550 
551 /*ARGSUSED*/
552 int
553 px_lib_iommu_getmap(dev_info_t *dip, tsbid_t tsbid, io_attributes_t *attr_p,
554     r_addr_t *r_addr_p)
555 {
556 	px_t	*px_p = DIP_TO_STATE(dip);
557 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
558 	uint64_t	ret;
559 
560 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_getmap: dip 0x%p tsbid 0x%llx\n",
561 	    dip, tsbid);
562 
563 	if ((ret = hvio_iommu_getmap(DIP_TO_HANDLE(dip), pxu_p, tsbid,
564 	    attr_p, r_addr_p)) != H_EOK) {
565 		DBG(DBG_LIB_DMA, dip,
566 		    "hvio_iommu_getmap failed, ret 0x%lx\n", ret);
567 
568 		return ((ret == H_ENOMAP) ? DDI_DMA_NOMAPPING:DDI_FAILURE);
569 	}
570 
571 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_getmap: attr 0x%llx "
572 	    "r_addr 0x%llx\n", *attr_p, *r_addr_p);
573 
574 	return (DDI_SUCCESS);
575 }
576 
577 
578 /*
579  * Checks dma attributes against system bypass ranges
580  * The bypass range is determined by the hardware. Return them so the
581  * common code can do generic checking against them.
582  */
583 /*ARGSUSED*/
584 int
585 px_lib_dma_bypass_rngchk(dev_info_t *dip, ddi_dma_attr_t *attr_p,
586     uint64_t *lo_p, uint64_t *hi_p)
587 {
588 	px_t	*px_p = DIP_TO_STATE(dip);
589 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
590 
591 	*lo_p = hvio_get_bypass_base(pxu_p);
592 	*hi_p = hvio_get_bypass_end(pxu_p);
593 
594 	return (DDI_SUCCESS);
595 }
596 
597 
598 /*ARGSUSED*/
599 int
600 px_lib_iommu_getbypass(dev_info_t *dip, r_addr_t ra, io_attributes_t attr,
601     io_addr_t *io_addr_p)
602 {
603 	uint64_t	ret;
604 	px_t	*px_p = DIP_TO_STATE(dip);
605 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
606 
607 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_getbypass: dip 0x%p ra 0x%llx "
608 	    "attr 0x%llx\n", dip, ra, attr);
609 
610 	if ((ret = hvio_iommu_getbypass(DIP_TO_HANDLE(dip), pxu_p, ra,
611 	    attr, io_addr_p)) != H_EOK) {
612 		DBG(DBG_LIB_DMA, dip,
613 		    "hvio_iommu_getbypass failed, ret 0x%lx\n", ret);
614 		return (DDI_FAILURE);
615 	}
616 
617 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_getbypass: io_addr 0x%llx\n",
618 	    *io_addr_p);
619 
620 	return (DDI_SUCCESS);
621 }
622 
623 /*
624  * Returns any needed IO address bit(s) for relaxed ordering in IOMMU
625  * bypass mode.
626  */
627 uint64_t
628 px_lib_ro_bypass(dev_info_t *dip, io_attributes_t attr, uint64_t ioaddr)
629 {
630 	px_t	*px_p = DIP_TO_STATE(dip);
631 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
632 
633 	if ((PX_CHIP_TYPE(pxu_p) == PX_CHIP_OBERON) && (attr & PCI_MAP_ATTR_RO))
634 		return (MMU_OBERON_BYPASS_RO | ioaddr);
635 	else
636 		return (ioaddr);
637 }
638 
639 /*
640  * bus dma sync entry point.
641  */
642 /*ARGSUSED*/
643 int
644 px_lib_dma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
645     off_t off, size_t len, uint_t cache_flags)
646 {
647 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle;
648 	px_t	*px_p = DIP_TO_STATE(dip);
649 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
650 
651 	DBG(DBG_LIB_DMA, dip, "px_lib_dma_sync: dip 0x%p rdip 0x%p "
652 	    "handle 0x%llx off 0x%x len 0x%x flags 0x%x\n",
653 	    dip, rdip, handle, off, len, cache_flags);
654 
655 	/*
656 	 * No flush needed for Oberon
657 	 */
658 	if (PX_CHIP_TYPE(pxu_p) == PX_CHIP_OBERON)
659 		return (DDI_SUCCESS);
660 
661 	/*
662 	 * jbus_stst_order is found only in certain cpu modules.
663 	 * Just return success if not present.
664 	 */
665 	if (&jbus_stst_order == NULL)
666 		return (DDI_SUCCESS);
667 
668 	if (!(mp->dmai_flags & PX_DMAI_FLAGS_INUSE)) {
669 		cmn_err(CE_WARN, "%s%d: Unbound dma handle %p.",
670 		    ddi_driver_name(rdip), ddi_get_instance(rdip), (void *)mp);
671 
672 		return (DDI_FAILURE);
673 	}
674 
675 	if (mp->dmai_flags & PX_DMAI_FLAGS_NOSYNC)
676 		return (DDI_SUCCESS);
677 
678 	/*
679 	 * No flush needed when sending data from memory to device.
680 	 * Nothing to do to "sync" memory to what device would already see.
681 	 */
682 	if (!(mp->dmai_rflags & DDI_DMA_READ) ||
683 	    ((cache_flags & PX_DMA_SYNC_DDI_FLAGS) == DDI_DMA_SYNC_FORDEV))
684 		return (DDI_SUCCESS);
685 
686 	/*
687 	 * Perform necessary cpu workaround to ensure jbus ordering.
688 	 * CPU's internal "invalidate FIFOs" are flushed.
689 	 */
690 
691 #if !defined(lint)
692 	kpreempt_disable();
693 #endif
694 	jbus_stst_order();
695 #if !defined(lint)
696 	kpreempt_enable();
697 #endif
698 	return (DDI_SUCCESS);
699 }
700 
701 /*
702  * MSIQ Functions:
703  */
704 /*ARGSUSED*/
705 int
706 px_lib_msiq_init(dev_info_t *dip)
707 {
708 	px_t		*px_p = DIP_TO_STATE(dip);
709 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
710 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
711 	px_dvma_addr_t	pg_index;
712 	size_t		q_sz = msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t);
713 	size_t		size;
714 	int		i, ret;
715 
716 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_init: dip 0x%p\n", dip);
717 
718 	/* must aligned on q_sz (happens to be !!! page) boundary */
719 	ASSERT(q_sz == 8 * 1024);
720 
721 	/*
722 	 * Map the EQ memory into the Fire MMU (has to be 512KB aligned)
723 	 * and then initialize the base address register.
724 	 *
725 	 * Allocate entries from Fire IOMMU so that the resulting address
726 	 * is properly aligned.  Calculate the index of the first allocated
727 	 * entry.  Note: The size of the mapping is assumed to be a multiple
728 	 * of the page size.
729 	 */
730 	size = msiq_state_p->msiq_cnt * q_sz;
731 
732 	msiq_state_p->msiq_buf_p = kmem_zalloc(size, KM_SLEEP);
733 
734 	for (i = 0; i < msiq_state_p->msiq_cnt; i++)
735 		msiq_state_p->msiq_p[i].msiq_base_p = (msiqhead_t *)
736 		    ((caddr_t)msiq_state_p->msiq_buf_p + (i * q_sz));
737 
738 	pxu_p->msiq_mapped_p = vmem_xalloc(px_p->px_mmu_p->mmu_dvma_map,
739 	    size, (512 * 1024), 0, 0, NULL, NULL, VM_NOSLEEP | VM_BESTFIT);
740 
741 	if (pxu_p->msiq_mapped_p == NULL)
742 		return (DDI_FAILURE);
743 
744 	pg_index = MMU_PAGE_INDEX(px_p->px_mmu_p,
745 	    MMU_BTOP((ulong_t)pxu_p->msiq_mapped_p));
746 
747 	if ((ret = px_lib_iommu_map(px_p->px_dip, PCI_TSBID(0, pg_index),
748 	    MMU_BTOP(size), PCI_MAP_ATTR_WRITE, msiq_state_p->msiq_buf_p,
749 	    0, MMU_MAP_BUF)) != DDI_SUCCESS) {
750 		DBG(DBG_LIB_MSIQ, dip,
751 		    "px_lib_msiq_init: px_lib_iommu_map failed, "
752 		    "ret 0x%lx\n", ret);
753 
754 		(void) px_lib_msiq_fini(dip);
755 		return (DDI_FAILURE);
756 	}
757 
758 	if ((ret = hvio_msiq_init(DIP_TO_HANDLE(dip),
759 	    pxu_p)) != H_EOK) {
760 		DBG(DBG_LIB_MSIQ, dip,
761 		    "hvio_msiq_init failed, ret 0x%lx\n", ret);
762 
763 		(void) px_lib_msiq_fini(dip);
764 		return (DDI_FAILURE);
765 	}
766 
767 	return (DDI_SUCCESS);
768 }
769 
770 /*ARGSUSED*/
771 int
772 px_lib_msiq_fini(dev_info_t *dip)
773 {
774 	px_t		*px_p = DIP_TO_STATE(dip);
775 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
776 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
777 	px_dvma_addr_t	pg_index;
778 	size_t		size;
779 
780 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_fini: dip 0x%p\n", dip);
781 
782 	/*
783 	 * Unmap and free the EQ memory that had been mapped
784 	 * into the Fire IOMMU.
785 	 */
786 	size = msiq_state_p->msiq_cnt *
787 	    msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t);
788 
789 	pg_index = MMU_PAGE_INDEX(px_p->px_mmu_p,
790 	    MMU_BTOP((ulong_t)pxu_p->msiq_mapped_p));
791 
792 	(void) px_lib_iommu_demap(px_p->px_dip,
793 	    PCI_TSBID(0, pg_index), MMU_BTOP(size));
794 
795 	/* Free the entries from the Fire MMU */
796 	vmem_xfree(px_p->px_mmu_p->mmu_dvma_map,
797 	    (void *)pxu_p->msiq_mapped_p, size);
798 
799 	kmem_free(msiq_state_p->msiq_buf_p, msiq_state_p->msiq_cnt *
800 	    msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t));
801 
802 	return (DDI_SUCCESS);
803 }
804 
805 /*ARGSUSED*/
806 int
807 px_lib_msiq_info(dev_info_t *dip, msiqid_t msiq_id, r_addr_t *ra_p,
808     uint_t *msiq_rec_cnt_p)
809 {
810 	px_t		*px_p = DIP_TO_STATE(dip);
811 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
812 	size_t		msiq_size;
813 
814 	DBG(DBG_LIB_MSIQ, dip, "px_msiq_info: dip 0x%p msiq_id 0x%x\n",
815 	    dip, msiq_id);
816 
817 	msiq_size = msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t);
818 	ra_p = (r_addr_t *)((caddr_t)msiq_state_p->msiq_buf_p +
819 	    (msiq_id * msiq_size));
820 
821 	*msiq_rec_cnt_p = msiq_state_p->msiq_rec_cnt;
822 
823 	DBG(DBG_LIB_MSIQ, dip, "px_msiq_info: ra_p 0x%p msiq_rec_cnt 0x%x\n",
824 	    ra_p, *msiq_rec_cnt_p);
825 
826 	return (DDI_SUCCESS);
827 }
828 
829 /*ARGSUSED*/
830 int
831 px_lib_msiq_getvalid(dev_info_t *dip, msiqid_t msiq_id,
832     pci_msiq_valid_state_t *msiq_valid_state)
833 {
834 	uint64_t	ret;
835 
836 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_getvalid: dip 0x%p msiq_id 0x%x\n",
837 	    dip, msiq_id);
838 
839 	if ((ret = hvio_msiq_getvalid(DIP_TO_HANDLE(dip),
840 	    msiq_id, msiq_valid_state)) != H_EOK) {
841 		DBG(DBG_LIB_MSIQ, dip,
842 		    "hvio_msiq_getvalid failed, ret 0x%lx\n", ret);
843 		return (DDI_FAILURE);
844 	}
845 
846 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_getvalid: msiq_valid_state 0x%x\n",
847 	    *msiq_valid_state);
848 
849 	return (DDI_SUCCESS);
850 }
851 
852 /*ARGSUSED*/
853 int
854 px_lib_msiq_setvalid(dev_info_t *dip, msiqid_t msiq_id,
855     pci_msiq_valid_state_t msiq_valid_state)
856 {
857 	uint64_t	ret;
858 
859 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_setvalid: dip 0x%p msiq_id 0x%x "
860 	    "msiq_valid_state 0x%x\n", dip, msiq_id, msiq_valid_state);
861 
862 	if ((ret = hvio_msiq_setvalid(DIP_TO_HANDLE(dip),
863 	    msiq_id, msiq_valid_state)) != H_EOK) {
864 		DBG(DBG_LIB_MSIQ, dip,
865 		    "hvio_msiq_setvalid failed, ret 0x%lx\n", ret);
866 		return (DDI_FAILURE);
867 	}
868 
869 	return (DDI_SUCCESS);
870 }
871 
872 /*ARGSUSED*/
873 int
874 px_lib_msiq_getstate(dev_info_t *dip, msiqid_t msiq_id,
875     pci_msiq_state_t *msiq_state)
876 {
877 	uint64_t	ret;
878 
879 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_getstate: dip 0x%p msiq_id 0x%x\n",
880 	    dip, msiq_id);
881 
882 	if ((ret = hvio_msiq_getstate(DIP_TO_HANDLE(dip),
883 	    msiq_id, msiq_state)) != H_EOK) {
884 		DBG(DBG_LIB_MSIQ, dip,
885 		    "hvio_msiq_getstate failed, ret 0x%lx\n", ret);
886 		return (DDI_FAILURE);
887 	}
888 
889 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_getstate: msiq_state 0x%x\n",
890 	    *msiq_state);
891 
892 	return (DDI_SUCCESS);
893 }
894 
895 /*ARGSUSED*/
896 int
897 px_lib_msiq_setstate(dev_info_t *dip, msiqid_t msiq_id,
898     pci_msiq_state_t msiq_state)
899 {
900 	uint64_t	ret;
901 
902 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_setstate: dip 0x%p msiq_id 0x%x "
903 	    "msiq_state 0x%x\n", dip, msiq_id, msiq_state);
904 
905 	if ((ret = hvio_msiq_setstate(DIP_TO_HANDLE(dip),
906 	    msiq_id, msiq_state)) != H_EOK) {
907 		DBG(DBG_LIB_MSIQ, dip,
908 		    "hvio_msiq_setstate failed, ret 0x%lx\n", ret);
909 		return (DDI_FAILURE);
910 	}
911 
912 	return (DDI_SUCCESS);
913 }
914 
915 /*ARGSUSED*/
916 int
917 px_lib_msiq_gethead(dev_info_t *dip, msiqid_t msiq_id,
918     msiqhead_t *msiq_head)
919 {
920 	uint64_t	ret;
921 
922 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_gethead: dip 0x%p msiq_id 0x%x\n",
923 	    dip, msiq_id);
924 
925 	if ((ret = hvio_msiq_gethead(DIP_TO_HANDLE(dip),
926 	    msiq_id, msiq_head)) != H_EOK) {
927 		DBG(DBG_LIB_MSIQ, dip,
928 		    "hvio_msiq_gethead failed, ret 0x%lx\n", ret);
929 		return (DDI_FAILURE);
930 	}
931 
932 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_gethead: msiq_head 0x%x\n",
933 	    *msiq_head);
934 
935 	return (DDI_SUCCESS);
936 }
937 
938 /*ARGSUSED*/
939 int
940 px_lib_msiq_sethead(dev_info_t *dip, msiqid_t msiq_id,
941     msiqhead_t msiq_head)
942 {
943 	uint64_t	ret;
944 
945 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_sethead: dip 0x%p msiq_id 0x%x "
946 	    "msiq_head 0x%x\n", dip, msiq_id, msiq_head);
947 
948 	if ((ret = hvio_msiq_sethead(DIP_TO_HANDLE(dip),
949 	    msiq_id, msiq_head)) != H_EOK) {
950 		DBG(DBG_LIB_MSIQ, dip,
951 		    "hvio_msiq_sethead failed, ret 0x%lx\n", ret);
952 		return (DDI_FAILURE);
953 	}
954 
955 	return (DDI_SUCCESS);
956 }
957 
958 /*ARGSUSED*/
959 int
960 px_lib_msiq_gettail(dev_info_t *dip, msiqid_t msiq_id,
961     msiqtail_t *msiq_tail)
962 {
963 	uint64_t	ret;
964 
965 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_gettail: dip 0x%p msiq_id 0x%x\n",
966 	    dip, msiq_id);
967 
968 	if ((ret = hvio_msiq_gettail(DIP_TO_HANDLE(dip),
969 	    msiq_id, msiq_tail)) != H_EOK) {
970 		DBG(DBG_LIB_MSIQ, dip,
971 		    "hvio_msiq_gettail failed, ret 0x%lx\n", ret);
972 		return (DDI_FAILURE);
973 	}
974 
975 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_gettail: msiq_tail 0x%x\n",
976 	    *msiq_tail);
977 
978 	return (DDI_SUCCESS);
979 }
980 
981 /*ARGSUSED*/
982 void
983 px_lib_get_msiq_rec(dev_info_t *dip, msiqhead_t *msiq_head_p,
984     msiq_rec_t *msiq_rec_p)
985 {
986 	eq_rec_t	*eq_rec_p = (eq_rec_t *)msiq_head_p;
987 
988 	DBG(DBG_LIB_MSIQ, dip, "px_lib_get_msiq_rec: dip 0x%p eq_rec_p 0x%p\n",
989 	    dip, eq_rec_p);
990 
991 	if (!eq_rec_p->eq_rec_fmt_type) {
992 		/* Set msiq_rec_type to zero */
993 		msiq_rec_p->msiq_rec_type = 0;
994 
995 		return;
996 	}
997 
998 	DBG(DBG_LIB_MSIQ, dip, "px_lib_get_msiq_rec: EQ RECORD, "
999 	    "eq_rec_rid 0x%llx eq_rec_fmt_type 0x%llx "
1000 	    "eq_rec_len 0x%llx eq_rec_addr0 0x%llx "
1001 	    "eq_rec_addr1 0x%llx eq_rec_data0 0x%llx "
1002 	    "eq_rec_data1 0x%llx\n", eq_rec_p->eq_rec_rid,
1003 	    eq_rec_p->eq_rec_fmt_type, eq_rec_p->eq_rec_len,
1004 	    eq_rec_p->eq_rec_addr0, eq_rec_p->eq_rec_addr1,
1005 	    eq_rec_p->eq_rec_data0, eq_rec_p->eq_rec_data1);
1006 
1007 	/*
1008 	 * Only upper 4 bits of eq_rec_fmt_type is used
1009 	 * to identify the EQ record type.
1010 	 */
1011 	switch (eq_rec_p->eq_rec_fmt_type >> 3) {
1012 	case EQ_REC_MSI32:
1013 		msiq_rec_p->msiq_rec_type = MSI32_REC;
1014 
1015 		msiq_rec_p->msiq_rec_data.msi.msi_data =
1016 		    eq_rec_p->eq_rec_data0;
1017 		break;
1018 	case EQ_REC_MSI64:
1019 		msiq_rec_p->msiq_rec_type = MSI64_REC;
1020 
1021 		msiq_rec_p->msiq_rec_data.msi.msi_data =
1022 		    eq_rec_p->eq_rec_data0;
1023 		break;
1024 	case EQ_REC_MSG:
1025 		msiq_rec_p->msiq_rec_type = MSG_REC;
1026 
1027 		msiq_rec_p->msiq_rec_data.msg.msg_route =
1028 		    eq_rec_p->eq_rec_fmt_type & 7;
1029 		msiq_rec_p->msiq_rec_data.msg.msg_targ = eq_rec_p->eq_rec_rid;
1030 		msiq_rec_p->msiq_rec_data.msg.msg_code = eq_rec_p->eq_rec_data0;
1031 		break;
1032 	default:
1033 		cmn_err(CE_WARN, "%s%d: px_lib_get_msiq_rec: "
1034 		    "0x%x is an unknown EQ record type",
1035 		    ddi_driver_name(dip), ddi_get_instance(dip),
1036 		    (int)eq_rec_p->eq_rec_fmt_type);
1037 		break;
1038 	}
1039 
1040 	msiq_rec_p->msiq_rec_rid = eq_rec_p->eq_rec_rid;
1041 	msiq_rec_p->msiq_rec_msi_addr = ((eq_rec_p->eq_rec_addr1 << 16) |
1042 	    (eq_rec_p->eq_rec_addr0 << 2));
1043 }
1044 
1045 /*ARGSUSED*/
1046 void
1047 px_lib_clr_msiq_rec(dev_info_t *dip, msiqhead_t *msiq_head_p)
1048 {
1049 	eq_rec_t	*eq_rec_p = (eq_rec_t *)msiq_head_p;
1050 
1051 	DBG(DBG_LIB_MSIQ, dip, "px_lib_clr_msiq_rec: dip 0x%p eq_rec_p 0x%p\n",
1052 	    dip, eq_rec_p);
1053 
1054 	if (eq_rec_p->eq_rec_fmt_type) {
1055 		/* Zero out eq_rec_fmt_type field */
1056 		eq_rec_p->eq_rec_fmt_type = 0;
1057 	}
1058 }
1059 
1060 /*
1061  * MSI Functions:
1062  */
1063 /*ARGSUSED*/
1064 int
1065 px_lib_msi_init(dev_info_t *dip)
1066 {
1067 	px_t		*px_p = DIP_TO_STATE(dip);
1068 	px_msi_state_t	*msi_state_p = &px_p->px_ib_p->ib_msi_state;
1069 	uint64_t	ret;
1070 
1071 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_init: dip 0x%p\n", dip);
1072 
1073 	if ((ret = hvio_msi_init(DIP_TO_HANDLE(dip),
1074 	    msi_state_p->msi_addr32, msi_state_p->msi_addr64)) != H_EOK) {
1075 		DBG(DBG_LIB_MSIQ, dip, "px_lib_msi_init failed, ret 0x%lx\n",
1076 		    ret);
1077 		return (DDI_FAILURE);
1078 	}
1079 
1080 	return (DDI_SUCCESS);
1081 }
1082 
1083 /*ARGSUSED*/
1084 int
1085 px_lib_msi_getmsiq(dev_info_t *dip, msinum_t msi_num,
1086     msiqid_t *msiq_id)
1087 {
1088 	uint64_t	ret;
1089 
1090 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getmsiq: dip 0x%p msi_num 0x%x\n",
1091 	    dip, msi_num);
1092 
1093 	if ((ret = hvio_msi_getmsiq(DIP_TO_HANDLE(dip),
1094 	    msi_num, msiq_id)) != H_EOK) {
1095 		DBG(DBG_LIB_MSI, dip,
1096 		    "hvio_msi_getmsiq failed, ret 0x%lx\n", ret);
1097 		return (DDI_FAILURE);
1098 	}
1099 
1100 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getmsiq: msiq_id 0x%x\n",
1101 	    *msiq_id);
1102 
1103 	return (DDI_SUCCESS);
1104 }
1105 
1106 /*ARGSUSED*/
1107 int
1108 px_lib_msi_setmsiq(dev_info_t *dip, msinum_t msi_num,
1109     msiqid_t msiq_id, msi_type_t msitype)
1110 {
1111 	uint64_t	ret;
1112 
1113 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_setmsiq: dip 0x%p msi_num 0x%x "
1114 	    "msq_id 0x%x\n", dip, msi_num, msiq_id);
1115 
1116 	if ((ret = hvio_msi_setmsiq(DIP_TO_HANDLE(dip),
1117 	    msi_num, msiq_id)) != H_EOK) {
1118 		DBG(DBG_LIB_MSI, dip,
1119 		    "hvio_msi_setmsiq failed, ret 0x%lx\n", ret);
1120 		return (DDI_FAILURE);
1121 	}
1122 
1123 	return (DDI_SUCCESS);
1124 }
1125 
1126 /*ARGSUSED*/
1127 int
1128 px_lib_msi_getvalid(dev_info_t *dip, msinum_t msi_num,
1129     pci_msi_valid_state_t *msi_valid_state)
1130 {
1131 	uint64_t	ret;
1132 
1133 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getvalid: dip 0x%p msi_num 0x%x\n",
1134 	    dip, msi_num);
1135 
1136 	if ((ret = hvio_msi_getvalid(DIP_TO_HANDLE(dip),
1137 	    msi_num, msi_valid_state)) != H_EOK) {
1138 		DBG(DBG_LIB_MSI, dip,
1139 		    "hvio_msi_getvalid failed, ret 0x%lx\n", ret);
1140 		return (DDI_FAILURE);
1141 	}
1142 
1143 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getvalid: msiq_id 0x%x\n",
1144 	    *msi_valid_state);
1145 
1146 	return (DDI_SUCCESS);
1147 }
1148 
1149 /*ARGSUSED*/
1150 int
1151 px_lib_msi_setvalid(dev_info_t *dip, msinum_t msi_num,
1152     pci_msi_valid_state_t msi_valid_state)
1153 {
1154 	uint64_t	ret;
1155 
1156 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_setvalid: dip 0x%p msi_num 0x%x "
1157 	    "msi_valid_state 0x%x\n", dip, msi_num, msi_valid_state);
1158 
1159 	if ((ret = hvio_msi_setvalid(DIP_TO_HANDLE(dip),
1160 	    msi_num, msi_valid_state)) != H_EOK) {
1161 		DBG(DBG_LIB_MSI, dip,
1162 		    "hvio_msi_setvalid failed, ret 0x%lx\n", ret);
1163 		return (DDI_FAILURE);
1164 	}
1165 
1166 	return (DDI_SUCCESS);
1167 }
1168 
1169 /*ARGSUSED*/
1170 int
1171 px_lib_msi_getstate(dev_info_t *dip, msinum_t msi_num,
1172     pci_msi_state_t *msi_state)
1173 {
1174 	uint64_t	ret;
1175 
1176 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getstate: dip 0x%p msi_num 0x%x\n",
1177 	    dip, msi_num);
1178 
1179 	if ((ret = hvio_msi_getstate(DIP_TO_HANDLE(dip),
1180 	    msi_num, msi_state)) != H_EOK) {
1181 		DBG(DBG_LIB_MSI, dip,
1182 		    "hvio_msi_getstate failed, ret 0x%lx\n", ret);
1183 		return (DDI_FAILURE);
1184 	}
1185 
1186 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getstate: msi_state 0x%x\n",
1187 	    *msi_state);
1188 
1189 	return (DDI_SUCCESS);
1190 }
1191 
1192 /*ARGSUSED*/
1193 int
1194 px_lib_msi_setstate(dev_info_t *dip, msinum_t msi_num,
1195     pci_msi_state_t msi_state)
1196 {
1197 	uint64_t	ret;
1198 
1199 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_setstate: dip 0x%p msi_num 0x%x "
1200 	    "msi_state 0x%x\n", dip, msi_num, msi_state);
1201 
1202 	if ((ret = hvio_msi_setstate(DIP_TO_HANDLE(dip),
1203 	    msi_num, msi_state)) != H_EOK) {
1204 		DBG(DBG_LIB_MSI, dip,
1205 		    "hvio_msi_setstate failed, ret 0x%lx\n", ret);
1206 		return (DDI_FAILURE);
1207 	}
1208 
1209 	return (DDI_SUCCESS);
1210 }
1211 
1212 /*
1213  * MSG Functions:
1214  */
1215 /*ARGSUSED*/
1216 int
1217 px_lib_msg_getmsiq(dev_info_t *dip, pcie_msg_type_t msg_type,
1218     msiqid_t *msiq_id)
1219 {
1220 	uint64_t	ret;
1221 
1222 	DBG(DBG_LIB_MSG, dip, "px_lib_msg_getmsiq: dip 0x%p msg_type 0x%x\n",
1223 	    dip, msg_type);
1224 
1225 	if ((ret = hvio_msg_getmsiq(DIP_TO_HANDLE(dip),
1226 	    msg_type, msiq_id)) != H_EOK) {
1227 		DBG(DBG_LIB_MSG, dip,
1228 		    "hvio_msg_getmsiq failed, ret 0x%lx\n", ret);
1229 		return (DDI_FAILURE);
1230 	}
1231 
1232 	DBG(DBG_LIB_MSI, dip, "px_lib_msg_getmsiq: msiq_id 0x%x\n",
1233 	    *msiq_id);
1234 
1235 	return (DDI_SUCCESS);
1236 }
1237 
1238 /*ARGSUSED*/
1239 int
1240 px_lib_msg_setmsiq(dev_info_t *dip, pcie_msg_type_t msg_type,
1241     msiqid_t msiq_id)
1242 {
1243 	uint64_t	ret;
1244 
1245 	DBG(DBG_LIB_MSG, dip, "px_lib_msi_setstate: dip 0x%p msg_type 0x%x "
1246 	    "msiq_id 0x%x\n", dip, msg_type, msiq_id);
1247 
1248 	if ((ret = hvio_msg_setmsiq(DIP_TO_HANDLE(dip),
1249 	    msg_type, msiq_id)) != H_EOK) {
1250 		DBG(DBG_LIB_MSG, dip,
1251 		    "hvio_msg_setmsiq failed, ret 0x%lx\n", ret);
1252 		return (DDI_FAILURE);
1253 	}
1254 
1255 	return (DDI_SUCCESS);
1256 }
1257 
1258 /*ARGSUSED*/
1259 int
1260 px_lib_msg_getvalid(dev_info_t *dip, pcie_msg_type_t msg_type,
1261     pcie_msg_valid_state_t *msg_valid_state)
1262 {
1263 	uint64_t	ret;
1264 
1265 	DBG(DBG_LIB_MSG, dip, "px_lib_msg_getvalid: dip 0x%p msg_type 0x%x\n",
1266 	    dip, msg_type);
1267 
1268 	if ((ret = hvio_msg_getvalid(DIP_TO_HANDLE(dip), msg_type,
1269 	    msg_valid_state)) != H_EOK) {
1270 		DBG(DBG_LIB_MSG, dip,
1271 		    "hvio_msg_getvalid failed, ret 0x%lx\n", ret);
1272 		return (DDI_FAILURE);
1273 	}
1274 
1275 	DBG(DBG_LIB_MSI, dip, "px_lib_msg_getvalid: msg_valid_state 0x%x\n",
1276 	    *msg_valid_state);
1277 
1278 	return (DDI_SUCCESS);
1279 }
1280 
1281 /*ARGSUSED*/
1282 int
1283 px_lib_msg_setvalid(dev_info_t *dip, pcie_msg_type_t msg_type,
1284     pcie_msg_valid_state_t msg_valid_state)
1285 {
1286 	uint64_t	ret;
1287 
1288 	DBG(DBG_LIB_MSG, dip, "px_lib_msg_setvalid: dip 0x%p msg_type 0x%x "
1289 	    "msg_valid_state 0x%x\n", dip, msg_type, msg_valid_state);
1290 
1291 	if ((ret = hvio_msg_setvalid(DIP_TO_HANDLE(dip), msg_type,
1292 	    msg_valid_state)) != H_EOK) {
1293 		DBG(DBG_LIB_MSG, dip,
1294 		    "hvio_msg_setvalid failed, ret 0x%lx\n", ret);
1295 		return (DDI_FAILURE);
1296 	}
1297 
1298 	return (DDI_SUCCESS);
1299 }
1300 
1301 /*ARGSUSED*/
1302 void
1303 px_panic_domain(px_t *px_p, pcie_req_id_t bdf)
1304 {
1305 }
1306 
1307 /*
1308  * Suspend/Resume Functions:
1309  * Currently unsupported by hypervisor
1310  */
1311 int
1312 px_lib_suspend(dev_info_t *dip)
1313 {
1314 	px_t		*px_p = DIP_TO_STATE(dip);
1315 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
1316 	px_cb_t		*cb_p = PX2CB(px_p);
1317 	devhandle_t	dev_hdl, xbus_dev_hdl;
1318 	uint64_t	ret = H_EOK;
1319 
1320 	DBG(DBG_DETACH, dip, "px_lib_suspend: dip 0x%p\n", dip);
1321 
1322 	dev_hdl = (devhandle_t)pxu_p->px_address[PX_REG_CSR];
1323 	xbus_dev_hdl = (devhandle_t)pxu_p->px_address[PX_REG_XBC];
1324 
1325 	if ((ret = hvio_suspend(dev_hdl, pxu_p)) != H_EOK)
1326 		goto fail;
1327 
1328 	if (--cb_p->attachcnt == 0) {
1329 		ret = hvio_cb_suspend(xbus_dev_hdl, pxu_p);
1330 		if (ret != H_EOK)
1331 			cb_p->attachcnt++;
1332 	}
1333 	pxu_p->cpr_flag = PX_ENTERED_CPR;
1334 
1335 fail:
1336 	return ((ret != H_EOK) ? DDI_FAILURE: DDI_SUCCESS);
1337 }
1338 
1339 void
1340 px_lib_resume(dev_info_t *dip)
1341 {
1342 	px_t		*px_p = DIP_TO_STATE(dip);
1343 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
1344 	px_cb_t		*cb_p = PX2CB(px_p);
1345 	devhandle_t	dev_hdl, xbus_dev_hdl;
1346 	devino_t	pec_ino = px_p->px_inos[PX_INTR_PEC];
1347 	devino_t	xbc_ino = px_p->px_inos[PX_INTR_XBC];
1348 
1349 	DBG(DBG_ATTACH, dip, "px_lib_resume: dip 0x%p\n", dip);
1350 
1351 	dev_hdl = (devhandle_t)pxu_p->px_address[PX_REG_CSR];
1352 	xbus_dev_hdl = (devhandle_t)pxu_p->px_address[PX_REG_XBC];
1353 
1354 	if (++cb_p->attachcnt == 1)
1355 		hvio_cb_resume(dev_hdl, xbus_dev_hdl, xbc_ino, pxu_p);
1356 
1357 	hvio_resume(dev_hdl, pec_ino, pxu_p);
1358 }
1359 
1360 /*
1361  * Generate a unique Oberon UBC ID based on the Logicial System Board and
1362  * the IO Channel from the portid property field.
1363  */
1364 static uint64_t
1365 oberon_get_ubc_id(dev_info_t *dip)
1366 {
1367 	px_t	*px_p = DIP_TO_STATE(dip);
1368 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
1369 	uint64_t	ubc_id;
1370 
1371 	/*
1372 	 * Generate a unique 6 bit UBC ID using the 2 IO_Channel#[1:0] bits and
1373 	 * the 4 LSB_ID[3:0] bits from the Oberon's portid property.
1374 	 */
1375 	ubc_id = (((pxu_p->portid >> OBERON_PORT_ID_IOC) &
1376 	    OBERON_PORT_ID_IOC_MASK) | (((pxu_p->portid >>
1377 	    OBERON_PORT_ID_LSB) & OBERON_PORT_ID_LSB_MASK)
1378 	    << OBERON_UBC_ID_LSB));
1379 
1380 	return (ubc_id);
1381 }
1382 
1383 /*
1384  * Oberon does not have a UBC scratch register, so alloc an array of scratch
1385  * registers when needed and use a unique UBC ID as an index. This code
1386  * can be simplified if we use a pre-allocated array. They are currently
1387  * being dynamically allocated because it's only needed by the Oberon.
1388  */
1389 static void
1390 oberon_set_cb(dev_info_t *dip, uint64_t val)
1391 {
1392 	uint64_t	ubc_id;
1393 
1394 	if (px_oberon_ubc_scratch_regs == NULL)
1395 		px_oberon_ubc_scratch_regs =
1396 		    (uint64_t *)kmem_zalloc(sizeof (uint64_t)*
1397 		    OBERON_UBC_ID_MAX, KM_SLEEP);
1398 
1399 	ubc_id = oberon_get_ubc_id(dip);
1400 
1401 	px_oberon_ubc_scratch_regs[ubc_id] = val;
1402 
1403 	/*
1404 	 * Check if any scratch registers are still in use. If all scratch
1405 	 * registers are currently set to zero, then deallocate the scratch
1406 	 * register array.
1407 	 */
1408 	for (ubc_id = 0; ubc_id < OBERON_UBC_ID_MAX; ubc_id++) {
1409 		if (px_oberon_ubc_scratch_regs[ubc_id] != NULL)
1410 			return;
1411 	}
1412 
1413 	/*
1414 	 * All scratch registers are set to zero so deallocate the scratch
1415 	 * register array and set the pointer to NULL.
1416 	 */
1417 	kmem_free(px_oberon_ubc_scratch_regs,
1418 	    (sizeof (uint64_t)*OBERON_UBC_ID_MAX));
1419 
1420 	px_oberon_ubc_scratch_regs = NULL;
1421 }
1422 
1423 /*
1424  * Oberon does not have a UBC scratch register, so use an allocated array of
1425  * scratch registers and use the unique UBC ID as an index into that array.
1426  */
1427 static uint64_t
1428 oberon_get_cb(dev_info_t *dip)
1429 {
1430 	uint64_t	ubc_id;
1431 
1432 	if (px_oberon_ubc_scratch_regs == NULL)
1433 		return (0);
1434 
1435 	ubc_id = oberon_get_ubc_id(dip);
1436 
1437 	return (px_oberon_ubc_scratch_regs[ubc_id]);
1438 }
1439 
1440 /*
1441  * Misc Functions:
1442  * Currently unsupported by hypervisor
1443  */
1444 static uint64_t
1445 px_get_cb(dev_info_t *dip)
1446 {
1447 	px_t	*px_p = DIP_TO_STATE(dip);
1448 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
1449 
1450 	/*
1451 	 * Oberon does not currently have Scratchpad registers.
1452 	 */
1453 	if (PX_CHIP_TYPE(pxu_p) == PX_CHIP_OBERON)
1454 		return (oberon_get_cb(dip));
1455 
1456 	return (CSR_XR((caddr_t)pxu_p->px_address[PX_REG_XBC], JBUS_SCRATCH_1));
1457 }
1458 
1459 static void
1460 px_set_cb(dev_info_t *dip, uint64_t val)
1461 {
1462 	px_t	*px_p = DIP_TO_STATE(dip);
1463 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
1464 
1465 	/*
1466 	 * Oberon does not currently have Scratchpad registers.
1467 	 */
1468 	if (PX_CHIP_TYPE(pxu_p) == PX_CHIP_OBERON) {
1469 		oberon_set_cb(dip, val);
1470 		return;
1471 	}
1472 
1473 	CSR_XS((caddr_t)pxu_p->px_address[PX_REG_XBC], JBUS_SCRATCH_1, val);
1474 }
1475 
1476 /*ARGSUSED*/
1477 int
1478 px_lib_map_vconfig(dev_info_t *dip,
1479 	ddi_map_req_t *mp, pci_config_offset_t off,
1480 		pci_regspec_t *rp, caddr_t *addrp)
1481 {
1482 	/*
1483 	 * No special config space access services in this layer.
1484 	 */
1485 	return (DDI_FAILURE);
1486 }
1487 
1488 void
1489 px_lib_map_attr_check(ddi_map_req_t *mp)
1490 {
1491 	ddi_acc_hdl_t *hp = mp->map_handlep;
1492 
1493 	/* fire does not accept byte masks from PIO store merge */
1494 	if (hp->ah_acc.devacc_attr_dataorder == DDI_STORECACHING_OK_ACC)
1495 		hp->ah_acc.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1496 }
1497 
1498 /* This function is called only by poke, caut put and pxtool poke. */
1499 void
1500 px_lib_clr_errs(px_t *px_p, dev_info_t *rdip, uint64_t addr)
1501 {
1502 	px_pec_t	*pec_p = px_p->px_pec_p;
1503 	dev_info_t	*rpdip = px_p->px_dip;
1504 	int		rc_err, fab_err, i;
1505 	int		acctype = pec_p->pec_safeacc_type;
1506 	ddi_fm_error_t	derr;
1507 	pci_ranges_t	*ranges_p;
1508 	int		range_len;
1509 	uint32_t	addr_high, addr_low;
1510 	pcie_req_id_t	bdf = PCIE_INVALID_BDF;
1511 
1512 	/* Create the derr */
1513 	bzero(&derr, sizeof (ddi_fm_error_t));
1514 	derr.fme_version = DDI_FME_VERSION;
1515 	derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
1516 	derr.fme_flag = acctype;
1517 
1518 	if (acctype == DDI_FM_ERR_EXPECTED) {
1519 		derr.fme_status = DDI_FM_NONFATAL;
1520 		ndi_fm_acc_err_set(pec_p->pec_acc_hdl, &derr);
1521 	}
1522 
1523 	if (px_fm_enter(px_p) != DDI_SUCCESS)
1524 		return;
1525 
1526 	/* send ereport/handle/clear fire registers */
1527 	rc_err = px_err_cmn_intr(px_p, &derr, PX_LIB_CALL, PX_FM_BLOCK_ALL);
1528 
1529 	/* Figure out if this is a cfg or mem32 access */
1530 	addr_high = (uint32_t)(addr >> 32);
1531 	addr_low = (uint32_t)addr;
1532 	range_len = px_p->px_ranges_length / sizeof (pci_ranges_t);
1533 	i = 0;
1534 	for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) {
1535 		if (ranges_p->parent_high == addr_high) {
1536 			switch (ranges_p->child_high & PCI_ADDR_MASK) {
1537 			case PCI_ADDR_CONFIG:
1538 				bdf = (pcie_req_id_t)(addr_low >> 12);
1539 				addr_low = 0;
1540 				break;
1541 			case PCI_ADDR_MEM32:
1542 				if (rdip)
1543 					bdf = PCI_GET_BDF(rdip);
1544 				else
1545 					bdf = PCIE_INVALID_BDF;
1546 				break;
1547 			}
1548 			break;
1549 		}
1550 	}
1551 
1552 	(void) px_rp_en_q(px_p, bdf, addr_low, NULL);
1553 
1554 	/*
1555 	 * XXX - Current code scans the fabric for all px_tool accesses.
1556 	 * In future, do not scan fabric for px_tool access to IO Root Nexus
1557 	 */
1558 	fab_err = px_scan_fabric(px_p, rpdip, &derr);
1559 
1560 	px_err_panic(rc_err, PX_RC, fab_err, B_TRUE);
1561 	px_fm_exit(px_p);
1562 	px_err_panic(rc_err, PX_RC, fab_err, B_FALSE);
1563 }
1564 
1565 #ifdef  DEBUG
1566 int	px_peekfault_cnt = 0;
1567 int	px_pokefault_cnt = 0;
1568 #endif  /* DEBUG */
1569 
1570 /*ARGSUSED*/
1571 static int
1572 px_lib_do_poke(dev_info_t *dip, dev_info_t *rdip,
1573     peekpoke_ctlops_t *in_args)
1574 {
1575 	px_t *px_p = DIP_TO_STATE(dip);
1576 	px_pec_t *pec_p = px_p->px_pec_p;
1577 	int err = DDI_SUCCESS;
1578 	on_trap_data_t otd;
1579 
1580 	mutex_enter(&pec_p->pec_pokefault_mutex);
1581 	pec_p->pec_ontrap_data = &otd;
1582 	pec_p->pec_safeacc_type = DDI_FM_ERR_POKE;
1583 
1584 	/* Set up protected environment. */
1585 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
1586 		uintptr_t tramp = otd.ot_trampoline;
1587 
1588 		otd.ot_trampoline = (uintptr_t)&poke_fault;
1589 		err = do_poke(in_args->size, (void *)in_args->dev_addr,
1590 		    (void *)in_args->host_addr);
1591 		otd.ot_trampoline = tramp;
1592 	} else
1593 		err = DDI_FAILURE;
1594 
1595 	px_lib_clr_errs(px_p, rdip, in_args->dev_addr);
1596 
1597 	if (otd.ot_trap & OT_DATA_ACCESS)
1598 		err = DDI_FAILURE;
1599 
1600 	/* Take down protected environment. */
1601 	no_trap();
1602 
1603 	pec_p->pec_ontrap_data = NULL;
1604 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
1605 	mutex_exit(&pec_p->pec_pokefault_mutex);
1606 
1607 #ifdef  DEBUG
1608 	if (err == DDI_FAILURE)
1609 		px_pokefault_cnt++;
1610 #endif
1611 	return (err);
1612 }
1613 
1614 /*ARGSUSED*/
1615 static int
1616 px_lib_do_caut_put(dev_info_t *dip, dev_info_t *rdip,
1617     peekpoke_ctlops_t *cautacc_ctlops_arg)
1618 {
1619 	size_t size = cautacc_ctlops_arg->size;
1620 	uintptr_t dev_addr = cautacc_ctlops_arg->dev_addr;
1621 	uintptr_t host_addr = cautacc_ctlops_arg->host_addr;
1622 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)cautacc_ctlops_arg->handle;
1623 	size_t repcount = cautacc_ctlops_arg->repcount;
1624 	uint_t flags = cautacc_ctlops_arg->flags;
1625 
1626 	px_t *px_p = DIP_TO_STATE(dip);
1627 	px_pec_t *pec_p = px_p->px_pec_p;
1628 	int err = DDI_SUCCESS;
1629 
1630 	/*
1631 	 * Note that i_ndi_busop_access_enter ends up grabbing the pokefault
1632 	 * mutex.
1633 	 */
1634 	i_ndi_busop_access_enter(hp->ahi_common.ah_dip, (ddi_acc_handle_t)hp);
1635 
1636 	pec_p->pec_ontrap_data = (on_trap_data_t *)hp->ahi_err->err_ontrap;
1637 	pec_p->pec_safeacc_type = DDI_FM_ERR_EXPECTED;
1638 	hp->ahi_err->err_expected = DDI_FM_ERR_EXPECTED;
1639 
1640 	if (!i_ddi_ontrap((ddi_acc_handle_t)hp)) {
1641 		for (; repcount; repcount--) {
1642 			switch (size) {
1643 
1644 			case sizeof (uint8_t):
1645 				i_ddi_put8(hp, (uint8_t *)dev_addr,
1646 				    *(uint8_t *)host_addr);
1647 				break;
1648 
1649 			case sizeof (uint16_t):
1650 				i_ddi_put16(hp, (uint16_t *)dev_addr,
1651 				    *(uint16_t *)host_addr);
1652 				break;
1653 
1654 			case sizeof (uint32_t):
1655 				i_ddi_put32(hp, (uint32_t *)dev_addr,
1656 				    *(uint32_t *)host_addr);
1657 				break;
1658 
1659 			case sizeof (uint64_t):
1660 				i_ddi_put64(hp, (uint64_t *)dev_addr,
1661 				    *(uint64_t *)host_addr);
1662 				break;
1663 			}
1664 
1665 			host_addr += size;
1666 
1667 			if (flags == DDI_DEV_AUTOINCR)
1668 				dev_addr += size;
1669 
1670 			px_lib_clr_errs(px_p, rdip, dev_addr);
1671 
1672 			if (pec_p->pec_ontrap_data->ot_trap & OT_DATA_ACCESS) {
1673 				err = DDI_FAILURE;
1674 #ifdef  DEBUG
1675 				px_pokefault_cnt++;
1676 #endif
1677 				break;
1678 			}
1679 		}
1680 	}
1681 
1682 	i_ddi_notrap((ddi_acc_handle_t)hp);
1683 	pec_p->pec_ontrap_data = NULL;
1684 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
1685 	i_ndi_busop_access_exit(hp->ahi_common.ah_dip, (ddi_acc_handle_t)hp);
1686 	hp->ahi_err->err_expected = DDI_FM_ERR_UNEXPECTED;
1687 
1688 	return (err);
1689 }
1690 
1691 
1692 int
1693 px_lib_ctlops_poke(dev_info_t *dip, dev_info_t *rdip,
1694     peekpoke_ctlops_t *in_args)
1695 {
1696 	return (in_args->handle ? px_lib_do_caut_put(dip, rdip, in_args) :
1697 	    px_lib_do_poke(dip, rdip, in_args));
1698 }
1699 
1700 
1701 /*ARGSUSED*/
1702 static int
1703 px_lib_do_peek(dev_info_t *dip, peekpoke_ctlops_t *in_args)
1704 {
1705 	px_t *px_p = DIP_TO_STATE(dip);
1706 	px_pec_t *pec_p = px_p->px_pec_p;
1707 	int err = DDI_SUCCESS;
1708 	on_trap_data_t otd;
1709 
1710 	mutex_enter(&pec_p->pec_pokefault_mutex);
1711 	if (px_fm_enter(px_p) != DDI_SUCCESS) {
1712 		mutex_exit(&pec_p->pec_pokefault_mutex);
1713 		return (DDI_FAILURE);
1714 	}
1715 	pec_p->pec_safeacc_type = DDI_FM_ERR_PEEK;
1716 	px_fm_exit(px_p);
1717 
1718 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
1719 		uintptr_t tramp = otd.ot_trampoline;
1720 
1721 		otd.ot_trampoline = (uintptr_t)&peek_fault;
1722 		err = do_peek(in_args->size, (void *)in_args->dev_addr,
1723 		    (void *)in_args->host_addr);
1724 		otd.ot_trampoline = tramp;
1725 	} else
1726 		err = DDI_FAILURE;
1727 
1728 	no_trap();
1729 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
1730 	mutex_exit(&pec_p->pec_pokefault_mutex);
1731 
1732 #ifdef  DEBUG
1733 	if (err == DDI_FAILURE)
1734 		px_peekfault_cnt++;
1735 #endif
1736 	return (err);
1737 }
1738 
1739 
1740 static int
1741 px_lib_do_caut_get(dev_info_t *dip, peekpoke_ctlops_t *cautacc_ctlops_arg)
1742 {
1743 	size_t size = cautacc_ctlops_arg->size;
1744 	uintptr_t dev_addr = cautacc_ctlops_arg->dev_addr;
1745 	uintptr_t host_addr = cautacc_ctlops_arg->host_addr;
1746 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)cautacc_ctlops_arg->handle;
1747 	size_t repcount = cautacc_ctlops_arg->repcount;
1748 	uint_t flags = cautacc_ctlops_arg->flags;
1749 
1750 	px_t *px_p = DIP_TO_STATE(dip);
1751 	px_pec_t *pec_p = px_p->px_pec_p;
1752 	int err = DDI_SUCCESS;
1753 
1754 	/*
1755 	 * Note that i_ndi_busop_access_enter ends up grabbing the pokefault
1756 	 * mutex.
1757 	 */
1758 	i_ndi_busop_access_enter(hp->ahi_common.ah_dip, (ddi_acc_handle_t)hp);
1759 
1760 	pec_p->pec_ontrap_data = (on_trap_data_t *)hp->ahi_err->err_ontrap;
1761 	pec_p->pec_safeacc_type = DDI_FM_ERR_EXPECTED;
1762 	hp->ahi_err->err_expected = DDI_FM_ERR_EXPECTED;
1763 
1764 	if (repcount == 1) {
1765 		if (!i_ddi_ontrap((ddi_acc_handle_t)hp)) {
1766 			i_ddi_caut_get(size, (void *)dev_addr,
1767 			    (void *)host_addr);
1768 		} else {
1769 			int i;
1770 			uint8_t *ff_addr = (uint8_t *)host_addr;
1771 			for (i = 0; i < size; i++)
1772 				*ff_addr++ = 0xff;
1773 
1774 			err = DDI_FAILURE;
1775 #ifdef  DEBUG
1776 			px_peekfault_cnt++;
1777 #endif
1778 		}
1779 	} else {
1780 		if (!i_ddi_ontrap((ddi_acc_handle_t)hp)) {
1781 			for (; repcount; repcount--) {
1782 				i_ddi_caut_get(size, (void *)dev_addr,
1783 				    (void *)host_addr);
1784 
1785 				host_addr += size;
1786 
1787 				if (flags == DDI_DEV_AUTOINCR)
1788 					dev_addr += size;
1789 			}
1790 		} else {
1791 			err = DDI_FAILURE;
1792 #ifdef  DEBUG
1793 			px_peekfault_cnt++;
1794 #endif
1795 		}
1796 	}
1797 
1798 	i_ddi_notrap((ddi_acc_handle_t)hp);
1799 	pec_p->pec_ontrap_data = NULL;
1800 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
1801 	i_ndi_busop_access_exit(hp->ahi_common.ah_dip, (ddi_acc_handle_t)hp);
1802 	hp->ahi_err->err_expected = DDI_FM_ERR_UNEXPECTED;
1803 
1804 	return (err);
1805 }
1806 
1807 /*ARGSUSED*/
1808 int
1809 px_lib_ctlops_peek(dev_info_t *dip, dev_info_t *rdip,
1810     peekpoke_ctlops_t *in_args, void *result)
1811 {
1812 	result = (void *)in_args->host_addr;
1813 	return (in_args->handle ? px_lib_do_caut_get(dip, in_args) :
1814 	    px_lib_do_peek(dip, in_args));
1815 }
1816 
1817 /*
1818  * implements PPM interface
1819  */
1820 int
1821 px_lib_pmctl(int cmd, px_t *px_p)
1822 {
1823 	ASSERT((cmd & ~PPMREQ_MASK) == PPMREQ);
1824 	switch (cmd) {
1825 	case PPMREQ_PRE_PWR_OFF:
1826 		/*
1827 		 * Currently there is no device power management for
1828 		 * the root complex (fire). When there is we need to make
1829 		 * sure that it is at full power before trying to send the
1830 		 * PME_Turn_Off message.
1831 		 */
1832 		DBG(DBG_PWR, px_p->px_dip,
1833 		    "ioctl: request to send PME_Turn_Off\n");
1834 		return (px_goto_l23ready(px_p));
1835 
1836 	case PPMREQ_PRE_PWR_ON:
1837 		DBG(DBG_PWR, px_p->px_dip, "ioctl: PRE_PWR_ON request\n");
1838 		return (px_pre_pwron_check(px_p));
1839 
1840 	case PPMREQ_POST_PWR_ON:
1841 		DBG(DBG_PWR, px_p->px_dip, "ioctl: POST_PWR_ON request\n");
1842 		return (px_goto_l0(px_p));
1843 
1844 	default:
1845 		return (DDI_FAILURE);
1846 	}
1847 }
1848 
1849 /*
1850  * sends PME_Turn_Off message to put the link in L2/L3 ready state.
1851  * called by px_ioctl.
1852  * returns DDI_SUCCESS or DDI_FAILURE
1853  * 1. Wait for link to be in L1 state (link status reg)
1854  * 2. write to PME_Turn_off reg to boradcast
1855  * 3. set timeout
1856  * 4. If timeout, return failure.
1857  * 5. If PM_TO_Ack, wait till link is in L2/L3 ready
1858  */
1859 static int
1860 px_goto_l23ready(px_t *px_p)
1861 {
1862 	pcie_pwr_t	*pwr_p;
1863 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
1864 	caddr_t	csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
1865 	int		ret = DDI_SUCCESS;
1866 	clock_t		end, timeleft;
1867 	int		mutex_held = 1;
1868 
1869 	/* If no PM info, return failure */
1870 	if (!PCIE_PMINFO(px_p->px_dip) ||
1871 	    !(pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip)))
1872 		return (DDI_FAILURE);
1873 
1874 	mutex_enter(&pwr_p->pwr_lock);
1875 	mutex_enter(&px_p->px_l23ready_lock);
1876 	/* Clear the PME_To_ACK receieved flag */
1877 	px_p->px_pm_flags &= ~PX_PMETOACK_RECVD;
1878 	/*
1879 	 * When P25 is the downstream device, after receiving
1880 	 * PME_To_ACK, fire will go to Detect state, which causes
1881 	 * the link down event. Inform FMA that this is expected.
1882 	 * In case of all other cards complaint with the pci express
1883 	 * spec, this will happen when the power is re-applied. FMA
1884 	 * code will clear this flag after one instance of LDN. Since
1885 	 * there will not be a LDN event for the spec compliant cards,
1886 	 * we need to clear the flag after receiving PME_To_ACK.
1887 	 */
1888 	px_p->px_pm_flags |= PX_LDN_EXPECTED;
1889 	if (px_send_pme_turnoff(csr_base) != DDI_SUCCESS) {
1890 		ret = DDI_FAILURE;
1891 		goto l23ready_done;
1892 	}
1893 	px_p->px_pm_flags |= PX_PME_TURNOFF_PENDING;
1894 
1895 	end = ddi_get_lbolt() + drv_usectohz(px_pme_to_ack_timeout);
1896 	while (!(px_p->px_pm_flags & PX_PMETOACK_RECVD)) {
1897 		timeleft = cv_timedwait(&px_p->px_l23ready_cv,
1898 		    &px_p->px_l23ready_lock, end);
1899 		/*
1900 		 * if cv_timedwait returns -1, it is either
1901 		 * 1) timed out or
1902 		 * 2) there was a pre-mature wakeup but by the time
1903 		 * cv_timedwait is called again end < lbolt i.e.
1904 		 * end is in the past.
1905 		 * 3) By the time we make first cv_timedwait call,
1906 		 * end < lbolt is true.
1907 		 */
1908 		if (timeleft == -1)
1909 			break;
1910 	}
1911 	if (!(px_p->px_pm_flags & PX_PMETOACK_RECVD)) {
1912 		/*
1913 		 * Either timedout or interrupt didn't get a
1914 		 * chance to grab the mutex and set the flag.
1915 		 * release the mutex and delay for sometime.
1916 		 * This will 1) give a chance for interrupt to
1917 		 * set the flag 2) creates a delay between two
1918 		 * consequetive requests.
1919 		 */
1920 		mutex_exit(&px_p->px_l23ready_lock);
1921 		delay(drv_usectohz(50 * PX_MSEC_TO_USEC));
1922 		mutex_held = 0;
1923 		if (!(px_p->px_pm_flags & PX_PMETOACK_RECVD)) {
1924 			ret = DDI_FAILURE;
1925 			DBG(DBG_PWR, px_p->px_dip, " Timed out while waiting"
1926 			    " for PME_TO_ACK\n");
1927 		}
1928 	}
1929 	px_p->px_pm_flags &=
1930 	    ~(PX_PME_TURNOFF_PENDING | PX_PMETOACK_RECVD | PX_LDN_EXPECTED);
1931 
1932 l23ready_done:
1933 	if (mutex_held)
1934 		mutex_exit(&px_p->px_l23ready_lock);
1935 	/*
1936 	 * Wait till link is in L1 idle, if sending PME_Turn_Off
1937 	 * was succesful.
1938 	 */
1939 	if (ret == DDI_SUCCESS) {
1940 		if (px_link_wait4l1idle(csr_base) != DDI_SUCCESS) {
1941 			DBG(DBG_PWR, px_p->px_dip, " Link is not at L1"
1942 			    " even though we received PME_To_ACK.\n");
1943 			/*
1944 			 * Workaround for hardware bug with P25.
1945 			 * Due to a hardware bug with P25, link state
1946 			 * will be Detect state rather than L1 after
1947 			 * link is transitioned to L23Ready state. Since
1948 			 * we don't know whether link is L23ready state
1949 			 * without Fire's state being L1_idle, we delay
1950 			 * here just to make sure that we wait till link
1951 			 * is transitioned to L23Ready state.
1952 			 */
1953 			delay(drv_usectohz(100 * PX_MSEC_TO_USEC));
1954 		}
1955 		pwr_p->pwr_link_lvl = PM_LEVEL_L3;
1956 
1957 	}
1958 	mutex_exit(&pwr_p->pwr_lock);
1959 	return (ret);
1960 }
1961 
1962 /*
1963  * Message interrupt handler intended to be shared for both
1964  * PME and PME_TO_ACK msg handling, currently only handles
1965  * PME_To_ACK message.
1966  */
1967 uint_t
1968 px_pmeq_intr(caddr_t arg)
1969 {
1970 	px_t	*px_p = (px_t *)arg;
1971 
1972 	DBG(DBG_PWR, px_p->px_dip, " PME_To_ACK received \n");
1973 	mutex_enter(&px_p->px_l23ready_lock);
1974 	cv_broadcast(&px_p->px_l23ready_cv);
1975 	if (px_p->px_pm_flags & PX_PME_TURNOFF_PENDING) {
1976 		px_p->px_pm_flags |= PX_PMETOACK_RECVD;
1977 	} else {
1978 		/*
1979 		 * This maybe the second ack received. If so then,
1980 		 * we should be receiving it during wait4L1 stage.
1981 		 */
1982 		px_p->px_pmetoack_ignored++;
1983 	}
1984 	mutex_exit(&px_p->px_l23ready_lock);
1985 	return (DDI_INTR_CLAIMED);
1986 }
1987 
1988 static int
1989 px_pre_pwron_check(px_t *px_p)
1990 {
1991 	pcie_pwr_t	*pwr_p;
1992 
1993 	/* If no PM info, return failure */
1994 	if (!PCIE_PMINFO(px_p->px_dip) ||
1995 	    !(pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip)))
1996 		return (DDI_FAILURE);
1997 
1998 	/*
1999 	 * For the spec compliant downstream cards link down
2000 	 * is expected when the device is powered on.
2001 	 */
2002 	px_p->px_pm_flags |= PX_LDN_EXPECTED;
2003 	return (pwr_p->pwr_link_lvl == PM_LEVEL_L3 ? DDI_SUCCESS : DDI_FAILURE);
2004 }
2005 
2006 static int
2007 px_goto_l0(px_t *px_p)
2008 {
2009 	pcie_pwr_t	*pwr_p;
2010 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
2011 	caddr_t csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2012 	int		ret = DDI_SUCCESS;
2013 	uint64_t	time_spent = 0;
2014 
2015 	/* If no PM info, return failure */
2016 	if (!PCIE_PMINFO(px_p->px_dip) ||
2017 	    !(pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip)))
2018 		return (DDI_FAILURE);
2019 
2020 	mutex_enter(&pwr_p->pwr_lock);
2021 	/*
2022 	 * The following link retrain activity will cause LDN and LUP event.
2023 	 * Receiving LDN prior to receiving LUP is expected, not an error in
2024 	 * this case.  Receiving LUP indicates link is fully up to support
2025 	 * powering up down stream device, and of course any further LDN and
2026 	 * LUP outside this context will be error.
2027 	 */
2028 	px_p->px_lup_pending = 1;
2029 	if (px_link_retrain(csr_base) != DDI_SUCCESS) {
2030 		ret = DDI_FAILURE;
2031 		goto l0_done;
2032 	}
2033 
2034 	/* LUP event takes the order of 15ms amount of time to occur */
2035 	for (; px_p->px_lup_pending && (time_spent < px_lup_poll_to);
2036 	    time_spent += px_lup_poll_interval)
2037 		drv_usecwait(px_lup_poll_interval);
2038 	if (px_p->px_lup_pending)
2039 		ret = DDI_FAILURE;
2040 l0_done:
2041 	px_enable_detect_quiet(csr_base);
2042 	if (ret == DDI_SUCCESS)
2043 		pwr_p->pwr_link_lvl = PM_LEVEL_L0;
2044 	mutex_exit(&pwr_p->pwr_lock);
2045 	return (ret);
2046 }
2047 
2048 /*
2049  * Extract the drivers binding name to identify which chip we're binding to.
2050  * Whenever a new bus bridge is created, the driver alias entry should be
2051  * added here to identify the device if needed.  If a device isn't added,
2052  * the identity defaults to PX_CHIP_UNIDENTIFIED.
2053  */
2054 static uint32_t
2055 px_identity_init(px_t *px_p)
2056 {
2057 	dev_info_t	*dip = px_p->px_dip;
2058 	char		*name = ddi_binding_name(dip);
2059 	uint32_t	revision = 0;
2060 
2061 	revision = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2062 	    "module-revision#", 0);
2063 
2064 	/* Check for Fire driver binding name */
2065 	if (strcmp(name, "pciex108e,80f0") == 0) {
2066 		DBG(DBG_ATTACH, dip, "px_identity_init: %s%d: "
2067 		    "(FIRE), module-revision %d\n", NAMEINST(dip),
2068 		    revision);
2069 
2070 		return ((revision >= FIRE_MOD_REV_20) ?
2071 		    PX_CHIP_FIRE : PX_CHIP_UNIDENTIFIED);
2072 	}
2073 
2074 	/* Check for Oberon driver binding name */
2075 	if (strcmp(name, "pciex108e,80f8") == 0) {
2076 		DBG(DBG_ATTACH, dip, "px_identity_init: %s%d: "
2077 		    "(OBERON), module-revision %d\n", NAMEINST(dip),
2078 		    revision);
2079 
2080 		return (PX_CHIP_OBERON);
2081 	}
2082 
2083 	DBG(DBG_ATTACH, dip, "%s%d: Unknown PCI Express Host bridge %s %x\n",
2084 	    ddi_driver_name(dip), ddi_get_instance(dip), name, revision);
2085 
2086 	return (PX_CHIP_UNIDENTIFIED);
2087 }
2088 
2089 int
2090 px_err_add_intr(px_fault_t *px_fault_p)
2091 {
2092 	dev_info_t	*dip = px_fault_p->px_fh_dip;
2093 	px_t		*px_p = DIP_TO_STATE(dip);
2094 
2095 	VERIFY(add_ivintr(px_fault_p->px_fh_sysino, PX_ERR_PIL,
2096 	    (intrfunc)px_fault_p->px_err_func, (caddr_t)px_fault_p,
2097 	    NULL, NULL) == 0);
2098 
2099 	px_ib_intr_enable(px_p, intr_dist_cpuid(), px_fault_p->px_intr_ino);
2100 
2101 	return (DDI_SUCCESS);
2102 }
2103 
2104 void
2105 px_err_rem_intr(px_fault_t *px_fault_p)
2106 {
2107 	dev_info_t	*dip = px_fault_p->px_fh_dip;
2108 	px_t		*px_p = DIP_TO_STATE(dip);
2109 
2110 	px_ib_intr_disable(px_p->px_ib_p, px_fault_p->px_intr_ino,
2111 	    IB_INTR_WAIT);
2112 
2113 	VERIFY(rem_ivintr(px_fault_p->px_fh_sysino, PX_ERR_PIL) == 0);
2114 }
2115 
2116 /*
2117  * px_cb_intr_redist() - sun4u only, CB interrupt redistribution
2118  */
2119 void
2120 px_cb_intr_redist(void *arg)
2121 {
2122 	px_cb_t		*cb_p = (px_cb_t *)arg;
2123 	px_cb_list_t	*pxl;
2124 	px_t		*pxp = NULL;
2125 	px_fault_t	*f_p = NULL;
2126 	uint32_t	new_cpuid;
2127 	intr_valid_state_t	enabled = 0;
2128 
2129 	mutex_enter(&cb_p->cb_mutex);
2130 
2131 	pxl = cb_p->pxl;
2132 	if (!pxl)
2133 		goto cb_done;
2134 
2135 	pxp = pxl->pxp;
2136 	f_p = &pxp->px_cb_fault;
2137 	for (; pxl && (f_p->px_fh_sysino != cb_p->sysino); ) {
2138 		pxl = pxl->next;
2139 		pxp = pxl->pxp;
2140 		f_p = &pxp->px_cb_fault;
2141 	}
2142 	if (pxl == NULL)
2143 		goto cb_done;
2144 
2145 	new_cpuid =  intr_dist_cpuid();
2146 	if (new_cpuid == cb_p->cpuid)
2147 		goto cb_done;
2148 
2149 	if ((px_lib_intr_getvalid(pxp->px_dip, f_p->px_fh_sysino, &enabled)
2150 	    != DDI_SUCCESS) || !enabled) {
2151 		DBG(DBG_IB, pxp->px_dip, "px_cb_intr_redist: CB not enabled, "
2152 		    "sysino(0x%x)\n", f_p->px_fh_sysino);
2153 		goto cb_done;
2154 	}
2155 
2156 	PX_INTR_DISABLE(pxp->px_dip, f_p->px_fh_sysino);
2157 
2158 	cb_p->cpuid = new_cpuid;
2159 	cb_p->sysino = f_p->px_fh_sysino;
2160 	PX_INTR_ENABLE(pxp->px_dip, cb_p->sysino, cb_p->cpuid);
2161 
2162 cb_done:
2163 	mutex_exit(&cb_p->cb_mutex);
2164 }
2165 
2166 /*
2167  * px_cb_add_intr() - Called from attach(9E) to create CB if not yet
2168  * created, to add CB interrupt vector always, but enable only once.
2169  */
2170 int
2171 px_cb_add_intr(px_fault_t *fault_p)
2172 {
2173 	px_t		*px_p = DIP_TO_STATE(fault_p->px_fh_dip);
2174 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
2175 	px_cb_t		*cb_p = (px_cb_t *)px_get_cb(fault_p->px_fh_dip);
2176 	px_cb_list_t	*pxl, *pxl_new;
2177 	boolean_t	is_proxy = B_FALSE;
2178 
2179 	/* create cb */
2180 	if (cb_p == NULL) {
2181 		cb_p = kmem_zalloc(sizeof (px_cb_t), KM_SLEEP);
2182 
2183 		mutex_init(&cb_p->cb_mutex, NULL, MUTEX_DRIVER,
2184 		    (void *) ipltospl(FM_ERR_PIL));
2185 
2186 		cb_p->px_cb_func = px_cb_intr;
2187 		pxu_p->px_cb_p = cb_p;
2188 		px_set_cb(fault_p->px_fh_dip, (uint64_t)cb_p);
2189 
2190 		/* px_lib_dev_init allows only FIRE and OBERON */
2191 		px_err_reg_enable(
2192 		    (pxu_p->chip_type == PX_CHIP_FIRE) ?
2193 		    PX_ERR_JBC : PX_ERR_UBC,
2194 		    pxu_p->px_address[PX_REG_XBC]);
2195 	} else
2196 		pxu_p->px_cb_p = cb_p;
2197 
2198 	/* register cb interrupt */
2199 	VERIFY(add_ivintr(fault_p->px_fh_sysino, PX_ERR_PIL,
2200 	    (intrfunc)cb_p->px_cb_func, (caddr_t)cb_p, NULL, NULL) == 0);
2201 
2202 
2203 	/* update cb list */
2204 	mutex_enter(&cb_p->cb_mutex);
2205 	if (cb_p->pxl == NULL) {
2206 		is_proxy = B_TRUE;
2207 		pxl = kmem_zalloc(sizeof (px_cb_list_t), KM_SLEEP);
2208 		pxl->pxp = px_p;
2209 		cb_p->pxl = pxl;
2210 		cb_p->sysino = fault_p->px_fh_sysino;
2211 		cb_p->cpuid = intr_dist_cpuid();
2212 	} else {
2213 		/*
2214 		 * Find the last pxl or
2215 		 * stop short at encountering a redundent entry, or
2216 		 * both.
2217 		 */
2218 		pxl = cb_p->pxl;
2219 		for (; !(pxl->pxp == px_p) && pxl->next; pxl = pxl->next) {};
2220 		ASSERT(pxl->pxp != px_p);
2221 
2222 		/* add to linked list */
2223 		pxl_new = kmem_zalloc(sizeof (px_cb_list_t), KM_SLEEP);
2224 		pxl_new->pxp = px_p;
2225 		pxl->next = pxl_new;
2226 	}
2227 	cb_p->attachcnt++;
2228 	mutex_exit(&cb_p->cb_mutex);
2229 
2230 	if (is_proxy) {
2231 		/* add to interrupt redistribution list */
2232 		intr_dist_add(px_cb_intr_redist, cb_p);
2233 
2234 		/* enable cb hw interrupt */
2235 		px_ib_intr_enable(px_p, cb_p->cpuid, fault_p->px_intr_ino);
2236 	}
2237 
2238 	return (DDI_SUCCESS);
2239 }
2240 
2241 /*
2242  * px_cb_rem_intr() - Called from detach(9E) to remove its CB
2243  * interrupt vector, to shift proxy to the next available px,
2244  * or disable CB interrupt when itself is the last.
2245  */
2246 void
2247 px_cb_rem_intr(px_fault_t *fault_p)
2248 {
2249 	px_t		*px_p = DIP_TO_STATE(fault_p->px_fh_dip), *pxp;
2250 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
2251 	px_cb_t		*cb_p = PX2CB(px_p);
2252 	px_cb_list_t	*pxl, *prev;
2253 	px_fault_t	*f_p;
2254 
2255 	ASSERT(cb_p->pxl);
2256 
2257 	/* find and remove this px, and update cb list */
2258 	mutex_enter(&cb_p->cb_mutex);
2259 
2260 	pxl = cb_p->pxl;
2261 	if (pxl->pxp == px_p) {
2262 		cb_p->pxl = pxl->next;
2263 	} else {
2264 		prev = pxl;
2265 		pxl = pxl->next;
2266 		for (; pxl && (pxl->pxp != px_p); prev = pxl, pxl = pxl->next) {
2267 		};
2268 		if (!pxl) {
2269 			cmn_err(CE_WARN, "px_cb_rem_intr: can't find px_p 0x%p "
2270 			    "in registered CB list.", (void *)px_p);
2271 			mutex_exit(&cb_p->cb_mutex);
2272 			return;
2273 		}
2274 		prev->next = pxl->next;
2275 	}
2276 	pxu_p->px_cb_p = NULL;
2277 	cb_p->attachcnt--;
2278 	kmem_free(pxl, sizeof (px_cb_list_t));
2279 	mutex_exit(&cb_p->cb_mutex);
2280 
2281 	/* disable cb hw interrupt */
2282 	if (fault_p->px_fh_sysino == cb_p->sysino)
2283 		px_ib_intr_disable(px_p->px_ib_p, fault_p->px_intr_ino,
2284 		    IB_INTR_WAIT);
2285 
2286 	/* if last px, remove from interrupt redistribution list */
2287 	if (cb_p->pxl == NULL)
2288 		intr_dist_rem(px_cb_intr_redist, cb_p);
2289 
2290 	/* de-register interrupt */
2291 	VERIFY(rem_ivintr(fault_p->px_fh_sysino, PX_ERR_PIL) == 0);
2292 
2293 	/* if not last px, assign next px to manage cb */
2294 	mutex_enter(&cb_p->cb_mutex);
2295 	if (cb_p->pxl) {
2296 		if (fault_p->px_fh_sysino == cb_p->sysino) {
2297 			pxp = cb_p->pxl->pxp;
2298 			f_p = &pxp->px_cb_fault;
2299 			cb_p->sysino = f_p->px_fh_sysino;
2300 
2301 			PX_INTR_ENABLE(pxp->px_dip, cb_p->sysino, cb_p->cpuid);
2302 			(void) px_lib_intr_setstate(pxp->px_dip, cb_p->sysino,
2303 			    INTR_IDLE_STATE);
2304 		}
2305 		mutex_exit(&cb_p->cb_mutex);
2306 		return;
2307 	}
2308 
2309 	/* clean up after the last px */
2310 	mutex_exit(&cb_p->cb_mutex);
2311 
2312 	/* px_lib_dev_init allows only FIRE and OBERON */
2313 	px_err_reg_disable(
2314 	    (pxu_p->chip_type == PX_CHIP_FIRE) ? PX_ERR_JBC : PX_ERR_UBC,
2315 	    pxu_p->px_address[PX_REG_XBC]);
2316 
2317 	mutex_destroy(&cb_p->cb_mutex);
2318 	px_set_cb(fault_p->px_fh_dip, 0ull);
2319 	kmem_free(cb_p, sizeof (px_cb_t));
2320 }
2321 
2322 /*
2323  * px_cb_intr() - sun4u only,  CB interrupt dispatcher
2324  */
2325 uint_t
2326 px_cb_intr(caddr_t arg)
2327 {
2328 	px_cb_t		*cb_p = (px_cb_t *)arg;
2329 	px_t		*pxp;
2330 	px_fault_t	*f_p;
2331 	int		ret;
2332 
2333 	mutex_enter(&cb_p->cb_mutex);
2334 
2335 	if (!cb_p->pxl) {
2336 		mutex_exit(&cb_p->cb_mutex);
2337 		return (DDI_INTR_UNCLAIMED);
2338 	}
2339 
2340 	pxp = cb_p->pxl->pxp;
2341 	f_p = &pxp->px_cb_fault;
2342 
2343 	ret = f_p->px_err_func((caddr_t)f_p);
2344 
2345 	mutex_exit(&cb_p->cb_mutex);
2346 	return (ret);
2347 }
2348 
2349 #ifdef	FMA
2350 void
2351 px_fill_rc_status(px_fault_t *px_fault_p, pciex_rc_error_regs_t *rc_status)
2352 {
2353 	/* populate the rc_status by reading the registers - TBD */
2354 }
2355 #endif /* FMA */
2356 
2357 /*
2358  * cpr callback
2359  *
2360  * disable fabric error msg interrupt prior to suspending
2361  * all device drivers; re-enable fabric error msg interrupt
2362  * after all devices are resumed.
2363  */
2364 static boolean_t
2365 px_cpr_callb(void *arg, int code)
2366 {
2367 	px_t		*px_p = (px_t *)arg;
2368 	px_ib_t		*ib_p = px_p->px_ib_p;
2369 	px_pec_t	*pec_p = px_p->px_pec_p;
2370 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
2371 	caddr_t		csr_base;
2372 	devino_t	ce_ino, nf_ino, f_ino;
2373 	px_ino_t	*ce_ino_p, *nf_ino_p, *f_ino_p;
2374 	uint64_t	imu_log_enable, imu_intr_enable;
2375 	uint64_t	imu_log_mask, imu_intr_mask;
2376 
2377 	ce_ino = px_msiqid_to_devino(px_p, pec_p->pec_corr_msg_msiq_id);
2378 	nf_ino = px_msiqid_to_devino(px_p, pec_p->pec_non_fatal_msg_msiq_id);
2379 	f_ino = px_msiqid_to_devino(px_p, pec_p->pec_fatal_msg_msiq_id);
2380 	csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2381 
2382 	imu_log_enable = CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE);
2383 	imu_intr_enable = CSR_XR(csr_base, IMU_INTERRUPT_ENABLE);
2384 
2385 	imu_log_mask = BITMASK(IMU_ERROR_LOG_ENABLE_FATAL_MES_NOT_EN_LOG_EN) |
2386 	    BITMASK(IMU_ERROR_LOG_ENABLE_NONFATAL_MES_NOT_EN_LOG_EN) |
2387 	    BITMASK(IMU_ERROR_LOG_ENABLE_COR_MES_NOT_EN_LOG_EN);
2388 
2389 	imu_intr_mask =
2390 	    BITMASK(IMU_INTERRUPT_ENABLE_FATAL_MES_NOT_EN_S_INT_EN) |
2391 	    BITMASK(IMU_INTERRUPT_ENABLE_NONFATAL_MES_NOT_EN_S_INT_EN) |
2392 	    BITMASK(IMU_INTERRUPT_ENABLE_COR_MES_NOT_EN_S_INT_EN) |
2393 	    BITMASK(IMU_INTERRUPT_ENABLE_FATAL_MES_NOT_EN_P_INT_EN) |
2394 	    BITMASK(IMU_INTERRUPT_ENABLE_NONFATAL_MES_NOT_EN_P_INT_EN) |
2395 	    BITMASK(IMU_INTERRUPT_ENABLE_COR_MES_NOT_EN_P_INT_EN);
2396 
2397 	switch (code) {
2398 	case CB_CODE_CPR_CHKPT:
2399 		/* disable imu rbne on corr/nonfatal/fatal errors */
2400 		CSR_XS(csr_base, IMU_ERROR_LOG_ENABLE,
2401 		    imu_log_enable & (~imu_log_mask));
2402 
2403 		CSR_XS(csr_base, IMU_INTERRUPT_ENABLE,
2404 		    imu_intr_enable & (~imu_intr_mask));
2405 
2406 		/* disable CORR intr mapping */
2407 		px_ib_intr_disable(ib_p, ce_ino, IB_INTR_NOWAIT);
2408 
2409 		/* disable NON FATAL intr mapping */
2410 		px_ib_intr_disable(ib_p, nf_ino, IB_INTR_NOWAIT);
2411 
2412 		/* disable FATAL intr mapping */
2413 		px_ib_intr_disable(ib_p, f_ino, IB_INTR_NOWAIT);
2414 
2415 		break;
2416 
2417 	case CB_CODE_CPR_RESUME:
2418 		pxu_p->cpr_flag = PX_NOT_CPR;
2419 		mutex_enter(&ib_p->ib_ino_lst_mutex);
2420 
2421 		ce_ino_p = px_ib_locate_ino(ib_p, ce_ino);
2422 		nf_ino_p = px_ib_locate_ino(ib_p, nf_ino);
2423 		f_ino_p = px_ib_locate_ino(ib_p, f_ino);
2424 
2425 		/* enable CORR intr mapping */
2426 		if (ce_ino_p)
2427 			px_ib_intr_enable(px_p, ce_ino_p->ino_cpuid, ce_ino);
2428 		else
2429 			cmn_err(CE_WARN, "px_cpr_callb: RESUME unable to "
2430 			    "reenable PCIe Correctable msg intr.\n");
2431 
2432 		/* enable NON FATAL intr mapping */
2433 		if (nf_ino_p)
2434 			px_ib_intr_enable(px_p, nf_ino_p->ino_cpuid, nf_ino);
2435 		else
2436 			cmn_err(CE_WARN, "px_cpr_callb: RESUME unable to "
2437 			    "reenable PCIe Non Fatal msg intr.\n");
2438 
2439 		/* enable FATAL intr mapping */
2440 		if (f_ino_p)
2441 			px_ib_intr_enable(px_p, f_ino_p->ino_cpuid, f_ino);
2442 		else
2443 			cmn_err(CE_WARN, "px_cpr_callb: RESUME unable to "
2444 			    "reenable PCIe Fatal msg intr.\n");
2445 
2446 		mutex_exit(&ib_p->ib_ino_lst_mutex);
2447 
2448 		/* enable corr/nonfatal/fatal not enable error */
2449 		CSR_XS(csr_base, IMU_ERROR_LOG_ENABLE, (imu_log_enable |
2450 		    (imu_log_mask & px_imu_log_mask)));
2451 		CSR_XS(csr_base, IMU_INTERRUPT_ENABLE, (imu_intr_enable |
2452 		    (imu_intr_mask & px_imu_intr_mask)));
2453 
2454 		break;
2455 	}
2456 
2457 	return (B_TRUE);
2458 }
2459 
2460 uint64_t
2461 px_get_rng_parent_hi_mask(px_t *px_p)
2462 {
2463 	pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p;
2464 	uint64_t mask;
2465 
2466 	switch (PX_CHIP_TYPE(pxu_p)) {
2467 	case PX_CHIP_OBERON:
2468 		mask = OBERON_RANGE_PROP_MASK;
2469 		break;
2470 	case PX_CHIP_FIRE:
2471 		mask = PX_RANGE_PROP_MASK;
2472 		break;
2473 	default:
2474 		mask = PX_RANGE_PROP_MASK;
2475 	}
2476 
2477 	return (mask);
2478 }
2479 
2480 /*
2481  * fetch chip's range propery's value
2482  */
2483 uint64_t
2484 px_get_range_prop(px_t *px_p, pci_ranges_t *rp, int bank)
2485 {
2486 	uint64_t mask, range_prop;
2487 
2488 	mask = px_get_rng_parent_hi_mask(px_p);
2489 	range_prop = (((uint64_t)(rp[bank].parent_high & mask)) << 32) |
2490 	    rp[bank].parent_low;
2491 
2492 	return (range_prop);
2493 }
2494 
2495 /*
2496  * fetch the config space base addr of the root complex
2497  * note this depends on px structure being initialized
2498  */
2499 uint64_t
2500 px_lib_get_cfgacc_base(dev_info_t *dip)
2501 {
2502 	int		instance = DIP_TO_INST(dip);
2503 	px_t		*px_p = INST_TO_STATE(instance);
2504 	pci_ranges_t	*rp = px_p->px_ranges_p;
2505 	int		bank = PCI_REG_ADDR_G(PCI_ADDR_CONFIG);
2506 
2507 	/* Get Fire's Physical Base Address */
2508 	return (px_get_range_prop(px_p, rp, bank));
2509 }
2510 
2511 /*
2512  * add cpr callback
2513  */
2514 void
2515 px_cpr_add_callb(px_t *px_p)
2516 {
2517 	px_p->px_cprcb_id = callb_add(px_cpr_callb, (void *)px_p,
2518 	    CB_CL_CPR_POST_USER, "px_cpr");
2519 }
2520 
2521 /*
2522  * remove cpr callback
2523  */
2524 void
2525 px_cpr_rem_callb(px_t *px_p)
2526 {
2527 	(void) callb_delete(px_p->px_cprcb_id);
2528 }
2529 
2530 /*ARGSUSED*/
2531 static uint_t
2532 px_hp_intr(caddr_t arg1, caddr_t arg2)
2533 {
2534 	px_t		*px_p = (px_t *)arg1;
2535 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
2536 	int		rval;
2537 
2538 	rval = pcie_intr(px_p->px_dip);
2539 
2540 #ifdef  DEBUG
2541 	if (rval == DDI_INTR_UNCLAIMED)
2542 		cmn_err(CE_WARN, "%s%d: UNCLAIMED intr\n",
2543 		    ddi_driver_name(px_p->px_dip),
2544 		    ddi_get_instance(px_p->px_dip));
2545 #endif
2546 
2547 	/* Set the interrupt state to idle */
2548 	if (px_lib_intr_setstate(px_p->px_dip,
2549 	    pxu_p->hp_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
2550 		return (DDI_INTR_UNCLAIMED);
2551 
2552 	return (rval);
2553 }
2554 
2555 int
2556 px_lib_hotplug_init(dev_info_t *dip, void *arg)
2557 {
2558 	px_t	*px_p = DIP_TO_STATE(dip);
2559 	pxu_t 	*pxu_p = (pxu_t *)px_p->px_plat_p;
2560 	uint64_t ret;
2561 
2562 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2563 	    "hotplug-capable") == 0)
2564 		return (DDI_FAILURE);
2565 
2566 	if ((ret = hvio_hotplug_init(dip, arg)) == DDI_SUCCESS) {
2567 		if (px_lib_intr_devino_to_sysino(px_p->px_dip,
2568 		    px_p->px_inos[PX_INTR_HOTPLUG], &pxu_p->hp_sysino) !=
2569 		    DDI_SUCCESS) {
2570 #ifdef	DEBUG
2571 			cmn_err(CE_WARN, "%s%d: devino_to_sysino fails\n",
2572 			    ddi_driver_name(px_p->px_dip),
2573 			    ddi_get_instance(px_p->px_dip));
2574 #endif
2575 			return (DDI_FAILURE);
2576 		}
2577 
2578 		VERIFY(add_ivintr(pxu_p->hp_sysino, PCIE_INTR_PRI,
2579 		    (intrfunc)px_hp_intr, (caddr_t)px_p, NULL, NULL) == 0);
2580 
2581 		px_ib_intr_enable(px_p, intr_dist_cpuid(),
2582 		    px_p->px_inos[PX_INTR_HOTPLUG]);
2583 	}
2584 
2585 	return (ret);
2586 }
2587 
2588 void
2589 px_lib_hotplug_uninit(dev_info_t *dip)
2590 {
2591 	if (hvio_hotplug_uninit(dip) == DDI_SUCCESS) {
2592 		px_t	*px_p = DIP_TO_STATE(dip);
2593 		pxu_t 	*pxu_p = (pxu_t *)px_p->px_plat_p;
2594 
2595 		px_ib_intr_disable(px_p->px_ib_p,
2596 		    px_p->px_inos[PX_INTR_HOTPLUG], IB_INTR_WAIT);
2597 
2598 		VERIFY(rem_ivintr(pxu_p->hp_sysino, PCIE_INTR_PRI) == 0);
2599 	}
2600 }
2601 
2602 /*
2603  * px_hp_intr_redist() - sun4u only, HP interrupt redistribution
2604  */
2605 void
2606 px_hp_intr_redist(px_t *px_p)
2607 {
2608 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(px_p->px_dip);
2609 
2610 	if (px_p && PCIE_IS_PCIE_HOTPLUG_ENABLED(bus_p)) {
2611 		px_ib_intr_dist_en(px_p->px_dip, intr_dist_cpuid(),
2612 		    px_p->px_inos[PX_INTR_HOTPLUG], B_FALSE);
2613 	}
2614 }
2615 
2616 boolean_t
2617 px_lib_is_in_drain_state(px_t *px_p)
2618 {
2619 	pxu_t 	*pxu_p = (pxu_t *)px_p->px_plat_p;
2620 	caddr_t csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2621 	uint64_t drain_status;
2622 
2623 	if (PX_CHIP_TYPE(pxu_p) == PX_CHIP_OBERON) {
2624 		drain_status = CSR_BR(csr_base, DRAIN_CONTROL_STATUS, DRAIN);
2625 	} else {
2626 		drain_status = CSR_BR(csr_base, TLU_STATUS, DRAIN);
2627 	}
2628 
2629 	return (drain_status);
2630 }
2631 
2632 pcie_req_id_t
2633 px_lib_get_bdf(px_t *px_p)
2634 {
2635 	pxu_t 	*pxu_p = (pxu_t *)px_p->px_plat_p;
2636 	caddr_t csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2637 	pcie_req_id_t bdf;
2638 
2639 	bdf = CSR_BR(csr_base, DMC_PCI_EXPRESS_CONFIGURATION, REQ_ID);
2640 
2641 	return (bdf);
2642 }
2643 
2644 /*ARGSUSED*/
2645 int
2646 px_lib_get_root_complex_mps(px_t *px_p, dev_info_t *dip, int *mps)
2647 {
2648 	pxu_t	*pxu_p;
2649 	caddr_t csr_base;
2650 
2651 	pxu_p = (pxu_t *)px_p->px_plat_p;
2652 
2653 	if (pxu_p == NULL)
2654 		return (DDI_FAILURE);
2655 
2656 	csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2657 
2658 
2659 	*mps = CSR_XR(csr_base, TLU_DEVICE_CAPABILITIES) &
2660 	    TLU_DEVICE_CAPABILITIES_MPS_MASK;
2661 
2662 	return (DDI_SUCCESS);
2663 }
2664 
2665 /*ARGSUSED*/
2666 int
2667 px_lib_set_root_complex_mps(px_t *px_p,  dev_info_t *dip, int mps)
2668 {
2669 	pxu_t	*pxu_p;
2670 	caddr_t csr_base;
2671 	uint64_t dev_ctrl;
2672 	int link_width, val;
2673 	px_chip_type_t chip_type = px_identity_init(px_p);
2674 
2675 	pxu_p = (pxu_t *)px_p->px_plat_p;
2676 
2677 	if (pxu_p == NULL)
2678 		return (DDI_FAILURE);
2679 
2680 	csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2681 
2682 	dev_ctrl = CSR_XR(csr_base, TLU_DEVICE_CONTROL);
2683 	dev_ctrl |= (mps << TLU_DEVICE_CONTROL_MPS);
2684 
2685 	CSR_XS(csr_base, TLU_DEVICE_CONTROL, dev_ctrl);
2686 
2687 	link_width = CSR_FR(csr_base, TLU_LINK_STATUS, WIDTH);
2688 
2689 	/*
2690 	 * Convert link_width to match timer array configuration.
2691 	 */
2692 	switch (link_width) {
2693 	case 1:
2694 		link_width = 0;
2695 		break;
2696 	case 4:
2697 		link_width = 1;
2698 		break;
2699 	case 8:
2700 		link_width = 2;
2701 		break;
2702 	case 16:
2703 		link_width = 3;
2704 		break;
2705 	default:
2706 		link_width = 0;
2707 	}
2708 
2709 	val = px_replay_timer_table[mps][link_width];
2710 	CSR_XS(csr_base, LPU_TXLINK_REPLAY_TIMER_THRESHOLD, val);
2711 
2712 	if (chip_type == PX_CHIP_OBERON)
2713 		return (DDI_SUCCESS);
2714 
2715 	val = px_acknak_timer_table[mps][link_width];
2716 	CSR_XS(csr_base, LPU_TXLINK_FREQUENT_NAK_LATENCY_TIMER_THRESHOLD, val);
2717 
2718 	return (DDI_SUCCESS);
2719 }
2720 
2721 /*ARGSUSED*/
2722 int
2723 px_lib_fabric_sync(dev_info_t *dip)
2724 {
2725 	/* an no-op on sun4u platform */
2726 	return (DDI_SUCCESS);
2727 }
2728