xref: /dragonfly/sys/bus/pci/pci.c (revision 2b7dbe20)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/linker.h>
38 #include <sys/fcntl.h>
39 #include <sys/conf.h>
40 #include <sys/kernel.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/endian.h>
44 #include <sys/machintr.h>
45 
46 #include <machine/msi_machdep.h>
47 
48 #include <vm/vm.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_extern.h>
51 
52 #include <sys/bus.h>
53 #include <sys/rman.h>
54 #include <sys/device.h>
55 
56 #include <sys/pciio.h>
57 #include <bus/pci/pcireg.h>
58 #include <bus/pci/pcivar.h>
59 #include <bus/pci/pci_private.h>
60 
61 #include <bus/u4b/controller/xhcireg.h>
62 #include <bus/u4b/controller/ehcireg.h>
63 #include <bus/u4b/controller/ohcireg.h>
64 #include <bus/u4b/controller/uhcireg.h>
65 
66 #include "pcib_if.h"
67 #include "pci_if.h"
68 
69 #ifdef __HAVE_ACPI
70 #include <contrib/dev/acpica/acpi.h>
71 #include "acpi_if.h"
72 #else
73 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
74 #endif
75 
76 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
77 
78 static uint32_t		pci_mapbase(unsigned mapreg);
79 static const char	*pci_maptype(unsigned mapreg);
80 static int		pci_mapsize(unsigned testval);
81 static int		pci_maprange(unsigned mapreg);
82 static void		pci_fixancient(pcicfgregs *cfg);
83 
84 static int		pci_porten(device_t pcib, int b, int s, int f);
85 static int		pci_memen(device_t pcib, int b, int s, int f);
86 static void		pci_assign_interrupt(device_t bus, device_t dev,
87 			    int force_route);
88 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
89 			    int b, int s, int f, int reg,
90 			    struct resource_list *rl, int force, int prefetch);
91 static int		pci_probe(device_t dev);
92 static int		pci_attach(device_t dev);
93 static void		pci_child_detached(device_t, device_t);
94 static void		pci_load_vendor_data(void);
95 static int		pci_describe_parse_line(char **ptr, int *vendor,
96 			    int *device, char **desc);
97 static char		*pci_describe_device(device_t dev);
98 static int		pci_modevent(module_t mod, int what, void *arg);
99 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100 			    pcicfgregs *cfg);
101 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
102 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103 			    int reg, uint32_t *data);
104 #if 0
105 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106 			    int reg, uint32_t data);
107 #endif
108 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109 static void		pci_disable_msi(device_t dev);
110 static void		pci_enable_msi(device_t dev, uint64_t address,
111 			    uint16_t data);
112 static void		pci_setup_msix_vector(device_t dev, u_int index,
113 			    uint64_t address, uint32_t data);
114 static void		pci_mask_msix_vector(device_t dev, u_int index);
115 static void		pci_unmask_msix_vector(device_t dev, u_int index);
116 static void		pci_mask_msix_allvectors(device_t dev);
117 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
118 static int		pci_msi_blacklisted(void);
119 static void		pci_resume_msi(device_t dev);
120 static void		pci_resume_msix(device_t dev);
121 static int		pcie_slotimpl(const pcicfgregs *);
122 static void		pci_print_verbose_expr(const pcicfgregs *);
123 
124 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
128 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
129 static void		pci_read_cap_subvendor(device_t, int, int,
130 			    pcicfgregs *);
131 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
132 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
133 
134 static device_method_t pci_methods[] = {
135 	/* Device interface */
136 	DEVMETHOD(device_probe,		pci_probe),
137 	DEVMETHOD(device_attach,	pci_attach),
138 	DEVMETHOD(device_detach,	bus_generic_detach),
139 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
140 	DEVMETHOD(device_suspend,	pci_suspend),
141 	DEVMETHOD(device_resume,	pci_resume),
142 
143 	/* Bus interface */
144 	DEVMETHOD(bus_print_child,	pci_print_child),
145 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
146 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
147 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
148 	DEVMETHOD(bus_driver_added,	pci_driver_added),
149 	DEVMETHOD(bus_child_detached,	pci_child_detached),
150 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
151 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
152 
153 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
154 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
155 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
156 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
157 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
158 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
159 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
160 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
161 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163 
164 	/* PCI interface */
165 	DEVMETHOD(pci_read_config,	pci_read_config_method),
166 	DEVMETHOD(pci_write_config,	pci_write_config_method),
167 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
168 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
169 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
170 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
171 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
172 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
173 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
174 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
175 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
176 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
177 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
178 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
179 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
180 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
181 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
182 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
183 
184 	DEVMETHOD_END
185 };
186 
187 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
188 
189 static devclass_t pci_devclass;
190 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
191 MODULE_VERSION(pci, 1);
192 
193 static char	*pci_vendordata;
194 static size_t	pci_vendordata_size;
195 
196 
197 static const struct pci_read_cap {
198 	int		cap;
199 	pci_read_cap_t	read_cap;
200 } pci_read_caps[] = {
201 	{ PCIY_PMG,		pci_read_cap_pmgt },
202 	{ PCIY_HT,		pci_read_cap_ht },
203 	{ PCIY_MSI,		pci_read_cap_msi },
204 	{ PCIY_MSIX,		pci_read_cap_msix },
205 	{ PCIY_VPD,		pci_read_cap_vpd },
206 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
207 	{ PCIY_PCIX,		pci_read_cap_pcix },
208 	{ PCIY_EXPRESS,		pci_read_cap_express },
209 	{ 0, NULL } /* required last entry */
210 };
211 
212 struct pci_quirk {
213 	uint32_t devid;	/* Vendor/device of the card */
214 	int	type;
215 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
216 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
217 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
218 	int	arg1;
219 	int	arg2;
220 };
221 
222 struct pci_quirk pci_quirks[] = {
223 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
224 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
225 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
226 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
227 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
228 
229 	/*
230 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
231 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
232 	 */
233 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 
236 	/*
237 	 * MSI doesn't work on earlier Intel chipsets including
238 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
239 	 */
240 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 
248 	/*
249 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
250 	 * bridge.
251 	 */
252 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 
254 	/*
255 	 * Atheros AR8161/AR8162/E2200/E2400/E2500 Ethernet controllers have
256 	 * a bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit
257 	 * of the command register is set.
258 	 */
259 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
260 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
261 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
262 	{ 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
263 	{ 0xE0B11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
264 
265 	{ 0 }
266 };
267 
268 /* map register information */
269 #define	PCI_MAPMEM	0x01	/* memory map */
270 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
271 #define	PCI_MAPPORT	0x04	/* port map */
272 
273 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
274 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
275 
276 struct devlist pci_devq;
277 uint32_t pci_generation;
278 uint32_t pci_numdevs = 0;
279 static int pcie_chipset, pcix_chipset;
280 
281 /* sysctl vars */
282 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
283 
284 static int pci_enable_io_modes = 1;
285 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
286 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
287     &pci_enable_io_modes, 1,
288     "Enable I/O and memory bits in the config register.  Some BIOSes do not"
289     " enable these bits correctly.  We'd like to do this all the time, but"
290     " there are some peripherals that this causes problems with.");
291 
292 static int pci_do_power_nodriver = 0;
293 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
294 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
295     &pci_do_power_nodriver, 0,
296     "Place a function into D3 state when no driver attaches to it.  0 means"
297     " disable.  1 means conservatively place devices into D3 state.  2 means"
298     " aggressively place devices into D3 state.  3 means put absolutely"
299     " everything in D3 state.");
300 
301 static int pci_do_power_resume = 1;
302 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
303 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
304     &pci_do_power_resume, 1,
305   "Transition from D3 -> D0 on resume.");
306 
307 static int pci_do_msi = 1;
308 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
309 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
310     "Enable support for MSI interrupts");
311 
312 static int pci_do_msix = 1;
313 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
314 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
315     "Enable support for MSI-X interrupts");
316 
317 static int pci_honor_msi_blacklist = 1;
318 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
319 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
320     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
321 
322 #if defined(__x86_64__)
323 static int pci_usb_takeover = 1;
324 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
325 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD,
326     &pci_usb_takeover, 1,
327     "Enable early takeover of USB controllers. Disable this if you depend on"
328     " BIOS emulation of USB devices, that is you use USB devices (like"
329     " keyboard or mouse) but do not load USB drivers");
330 #endif
331 
332 static int pci_msi_cpuid;
333 
334 static int
335 pci_has_quirk(uint32_t devid, int quirk)
336 {
337 	const struct pci_quirk *q;
338 
339 	for (q = &pci_quirks[0]; q->devid; q++) {
340 		if (q->devid == devid && q->type == quirk)
341 			return (1);
342 	}
343 	return (0);
344 }
345 
346 /* Find a device_t by bus/slot/function in domain 0 */
347 
348 device_t
349 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
350 {
351 
352 	return (pci_find_dbsf(0, bus, slot, func));
353 }
354 
355 /* Find a device_t by domain/bus/slot/function */
356 
357 device_t
358 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
359 {
360 	struct pci_devinfo *dinfo;
361 
362 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
363 		if ((dinfo->cfg.domain == domain) &&
364 		    (dinfo->cfg.bus == bus) &&
365 		    (dinfo->cfg.slot == slot) &&
366 		    (dinfo->cfg.func == func)) {
367 			return (dinfo->cfg.dev);
368 		}
369 	}
370 
371 	return (NULL);
372 }
373 
374 /* Find a device_t by vendor/device ID */
375 
376 device_t
377 pci_find_device(uint16_t vendor, uint16_t device)
378 {
379 	struct pci_devinfo *dinfo;
380 
381 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
382 		if ((dinfo->cfg.vendor == vendor) &&
383 		    (dinfo->cfg.device == device)) {
384 			return (dinfo->cfg.dev);
385 		}
386 	}
387 
388 	return (NULL);
389 }
390 
391 device_t
392 pci_find_class(uint8_t class, uint8_t subclass)
393 {
394 	struct pci_devinfo *dinfo;
395 
396 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
397 		if (dinfo->cfg.baseclass == class &&
398 		    dinfo->cfg.subclass == subclass) {
399 			return (dinfo->cfg.dev);
400 		}
401 	}
402 
403 	return (NULL);
404 }
405 
406 device_t
407 pci_iterate_class(struct pci_devinfo **dinfop, uint8_t class, uint8_t subclass)
408 {
409 	struct pci_devinfo *dinfo;
410 
411 	if (*dinfop)
412 		dinfo = STAILQ_NEXT(*dinfop, pci_links);
413 	else
414 		dinfo = STAILQ_FIRST(&pci_devq);
415 
416 	while (dinfo) {
417 		if (dinfo->cfg.baseclass == class &&
418 		    dinfo->cfg.subclass == subclass) {
419 			*dinfop = dinfo;
420 			return (dinfo->cfg.dev);
421 		}
422 		dinfo = STAILQ_NEXT(dinfo, pci_links);
423 	}
424 	*dinfop = NULL;
425 	return (NULL);
426 }
427 
428 /* return base address of memory or port map */
429 
430 static uint32_t
431 pci_mapbase(uint32_t mapreg)
432 {
433 
434 	if (PCI_BAR_MEM(mapreg))
435 		return (mapreg & PCIM_BAR_MEM_BASE);
436 	else
437 		return (mapreg & PCIM_BAR_IO_BASE);
438 }
439 
440 /* return map type of memory or port map */
441 
442 static const char *
443 pci_maptype(unsigned mapreg)
444 {
445 
446 	if (PCI_BAR_IO(mapreg))
447 		return ("I/O Port");
448 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
449 		return ("Prefetchable Memory");
450 	return ("Memory");
451 }
452 
453 /* return log2 of map size decoded for memory or port map */
454 
455 static int
456 pci_mapsize(uint32_t testval)
457 {
458 	int ln2size;
459 
460 	testval = pci_mapbase(testval);
461 	ln2size = 0;
462 	if (testval != 0) {
463 		while ((testval & 1) == 0)
464 		{
465 			ln2size++;
466 			testval >>= 1;
467 		}
468 	}
469 	return (ln2size);
470 }
471 
472 /* return log2 of address range supported by map register */
473 
474 static int
475 pci_maprange(unsigned mapreg)
476 {
477 	int ln2range = 0;
478 
479 	if (PCI_BAR_IO(mapreg))
480 		ln2range = 32;
481 	else
482 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
483 		case PCIM_BAR_MEM_32:
484 			ln2range = 32;
485 			break;
486 		case PCIM_BAR_MEM_1MB:
487 			ln2range = 20;
488 			break;
489 		case PCIM_BAR_MEM_64:
490 			ln2range = 64;
491 			break;
492 		}
493 	return (ln2range);
494 }
495 
496 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
497 
498 static void
499 pci_fixancient(pcicfgregs *cfg)
500 {
501 	if (cfg->hdrtype != 0)
502 		return;
503 
504 	/* PCI to PCI bridges use header type 1 */
505 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
506 		cfg->hdrtype = 1;
507 }
508 
509 /* extract header type specific config data */
510 
511 static void
512 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
513 {
514 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
515 	switch (cfg->hdrtype) {
516 	case 0:
517 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
518 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
519 		cfg->nummaps	    = PCI_MAXMAPS_0;
520 		break;
521 	case 1:
522 		cfg->nummaps	    = PCI_MAXMAPS_1;
523 		break;
524 	case 2:
525 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
526 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
527 		cfg->nummaps	    = PCI_MAXMAPS_2;
528 		break;
529 	}
530 #undef REG
531 }
532 
533 /* read configuration header into pcicfgregs structure */
534 struct pci_devinfo *
535 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
536 {
537 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
538 	pcicfgregs *cfg = NULL;
539 	struct pci_devinfo *devlist_entry;
540 	struct devlist *devlist_head;
541 
542 	devlist_head = &pci_devq;
543 
544 	devlist_entry = NULL;
545 
546 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
547 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
548 
549 		cfg = &devlist_entry->cfg;
550 
551 		cfg->domain		= d;
552 		cfg->bus		= b;
553 		cfg->slot		= s;
554 		cfg->func		= f;
555 		cfg->vendor		= REG(PCIR_VENDOR, 2);
556 		cfg->device		= REG(PCIR_DEVICE, 2);
557 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
558 		cfg->statreg		= REG(PCIR_STATUS, 2);
559 		cfg->baseclass		= REG(PCIR_CLASS, 1);
560 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
561 		cfg->progif		= REG(PCIR_PROGIF, 1);
562 		cfg->revid		= REG(PCIR_REVID, 1);
563 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
564 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
565 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
566 		cfg->intpin		= REG(PCIR_INTPIN, 1);
567 		cfg->intline		= REG(PCIR_INTLINE, 1);
568 
569 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
570 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
571 
572 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
573 		cfg->hdrtype		&= ~PCIM_MFDEV;
574 
575 		pci_fixancient(cfg);
576 		pci_hdrtypedata(pcib, b, s, f, cfg);
577 
578 		pci_read_capabilities(pcib, cfg);
579 
580 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
581 
582 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
583 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
584 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
585 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
586 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
587 
588 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
589 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
590 		devlist_entry->conf.pc_vendor = cfg->vendor;
591 		devlist_entry->conf.pc_device = cfg->device;
592 
593 		devlist_entry->conf.pc_class = cfg->baseclass;
594 		devlist_entry->conf.pc_subclass = cfg->subclass;
595 		devlist_entry->conf.pc_progif = cfg->progif;
596 		devlist_entry->conf.pc_revid = cfg->revid;
597 
598 		pci_numdevs++;
599 		pci_generation++;
600 	}
601 	return (devlist_entry);
602 #undef REG
603 }
604 
605 static int
606 pci_fixup_nextptr(int *nextptr0)
607 {
608 	int nextptr = *nextptr0;
609 
610 	/* "Next pointer" is only one byte */
611 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
612 
613 	if (nextptr & 0x3) {
614 		/*
615 		 * PCI local bus spec 3.0:
616 		 *
617 		 * "... The bottom two bits of all pointers are reserved
618 		 *  and must be implemented as 00b although software must
619 		 *  mask them to allow for future uses of these bits ..."
620 		 */
621 		if (bootverbose) {
622 			kprintf("Illegal PCI extended capability "
623 				"offset, fixup 0x%02x -> 0x%02x\n",
624 				nextptr, nextptr & ~0x3);
625 		}
626 		nextptr &= ~0x3;
627 	}
628 	*nextptr0 = nextptr;
629 
630 	if (nextptr < 0x40) {
631 		if (nextptr != 0) {
632 			kprintf("Illegal PCI extended capability "
633 				"offset 0x%02x", nextptr);
634 		}
635 		return 0;
636 	}
637 	return 1;
638 }
639 
640 static void
641 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
642 {
643 #define REG(n, w)	\
644 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
645 
646 	struct pcicfg_pp *pp = &cfg->pp;
647 
648 	if (pp->pp_cap)
649 		return;
650 
651 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
652 	pp->pp_status = ptr + PCIR_POWER_STATUS;
653 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
654 
655 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
656 		/*
657 		 * XXX
658 		 * We should write to data_select and read back from
659 		 * data_scale to determine whether data register is
660 		 * implemented.
661 		 */
662 #ifdef foo
663 		pp->pp_data = ptr + PCIR_POWER_DATA;
664 #else
665 		pp->pp_data = 0;
666 #endif
667 	}
668 
669 #undef REG
670 }
671 
672 static void
673 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
674 {
675 #if defined(__x86_64__)
676 
677 #define REG(n, w)	\
678 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
679 
680 	struct pcicfg_ht *ht = &cfg->ht;
681 	uint64_t addr;
682 	uint32_t val;
683 
684 	/* Determine HT-specific capability type. */
685 	val = REG(ptr + PCIR_HT_COMMAND, 2);
686 
687 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
688 		cfg->ht.ht_slave = ptr;
689 
690 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
691 		return;
692 
693 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
694 		/* Sanity check the mapping window. */
695 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
696 		addr <<= 32;
697 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
698 		if (addr != MSI_X86_ADDR_BASE) {
699 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
700 				"has non-default MSI window 0x%llx\n",
701 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
702 				(long long)addr);
703 		}
704 	} else {
705 		addr = MSI_X86_ADDR_BASE;
706 	}
707 
708 	ht->ht_msimap = ptr;
709 	ht->ht_msictrl = val;
710 	ht->ht_msiaddr = addr;
711 
712 #undef REG
713 
714 #endif	/* __x86_64__ */
715 }
716 
717 static void
718 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
719 {
720 #define REG(n, w)	\
721 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
722 
723 	struct pcicfg_msi *msi = &cfg->msi;
724 
725 	msi->msi_location = ptr;
726 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
727 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
728 
729 #undef REG
730 }
731 
732 static void
733 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
734 {
735 #define REG(n, w)	\
736 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
737 
738 	struct pcicfg_msix *msix = &cfg->msix;
739 	uint32_t val;
740 
741 	msix->msix_location = ptr;
742 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
743 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
744 
745 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
746 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
747 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
748 
749 	val = REG(ptr + PCIR_MSIX_PBA, 4);
750 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
751 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
752 
753 	TAILQ_INIT(&msix->msix_vectors);
754 
755 #undef REG
756 }
757 
758 static void
759 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
760 {
761 	cfg->vpd.vpd_reg = ptr;
762 }
763 
764 static void
765 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
766 {
767 #define REG(n, w)	\
768 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
769 
770 	/* Should always be true. */
771 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
772 		uint32_t val;
773 
774 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
775 		cfg->subvendor = val & 0xffff;
776 		cfg->subdevice = val >> 16;
777 	}
778 
779 #undef REG
780 }
781 
782 static void
783 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
784 {
785 	/*
786 	 * Assume we have a PCI-X chipset if we have
787 	 * at least one PCI-PCI bridge with a PCI-X
788 	 * capability.  Note that some systems with
789 	 * PCI-express or HT chipsets might match on
790 	 * this check as well.
791 	 */
792 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
793 		pcix_chipset = 1;
794 
795 	cfg->pcix.pcix_ptr = ptr;
796 }
797 
798 static int
799 pcie_slotimpl(const pcicfgregs *cfg)
800 {
801 	const struct pcicfg_expr *expr = &cfg->expr;
802 	uint16_t port_type;
803 
804 	/*
805 	 * - Slot implemented bit is meaningful iff current port is
806 	 *   root port or down stream port.
807 	 * - Testing for root port or down stream port is meanningful
808 	 *   iff PCI configure has type 1 header.
809 	 */
810 
811 	if (cfg->hdrtype != 1)
812 		return 0;
813 
814 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
815 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
816 		return 0;
817 
818 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
819 		return 0;
820 
821 	return 1;
822 }
823 
824 static void
825 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
826 {
827 #define REG(n, w)	\
828 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
829 
830 	struct pcicfg_expr *expr = &cfg->expr;
831 
832 	/*
833 	 * Assume we have a PCI-express chipset if we have
834 	 * at least one PCI-express device.
835 	 */
836 	pcie_chipset = 1;
837 
838 	expr->expr_ptr = ptr;
839 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
840 
841 	/*
842 	 * Read slot capabilities.  Slot capabilities exists iff
843 	 * current port's slot is implemented
844 	 */
845 	if (pcie_slotimpl(cfg))
846 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
847 
848 #undef REG
849 }
850 
851 static void
852 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
853 {
854 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
855 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
856 
857 	uint32_t val;
858 	int nextptr, ptrptr;
859 
860 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
861 		/* No capabilities */
862 		return;
863 	}
864 
865 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
866 	case 0:
867 	case 1:
868 		ptrptr = PCIR_CAP_PTR;
869 		break;
870 	case 2:
871 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
872 		break;
873 	default:
874 		return;				/* no capabilities support */
875 	}
876 	nextptr = REG(ptrptr, 1);	/* sanity check? */
877 
878 	/*
879 	 * Read capability entries.
880 	 */
881 	while (pci_fixup_nextptr(&nextptr)) {
882 		const struct pci_read_cap *rc;
883 		int ptr = nextptr;
884 
885 		/* Find the next entry */
886 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
887 
888 		/* Process this entry */
889 		val = REG(ptr + PCICAP_ID, 1);
890 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
891 			if (rc->cap == val) {
892 				rc->read_cap(pcib, ptr, nextptr, cfg);
893 				break;
894 			}
895 		}
896 	}
897 
898 #if defined(__x86_64__)
899 	/*
900 	 * Enable the MSI mapping window for all HyperTransport
901 	 * slaves.  PCI-PCI bridges have their windows enabled via
902 	 * PCIB_MAP_MSI().
903 	 */
904 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
905 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
906 		device_printf(pcib,
907 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
908 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
909 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
910 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
911 		     2);
912 	}
913 #endif
914 
915 /* REG and WREG use carry through to next functions */
916 }
917 
918 /*
919  * PCI Vital Product Data
920  */
921 
922 #define	PCI_VPD_TIMEOUT		1000000
923 
924 static int
925 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
926 {
927 	int count = PCI_VPD_TIMEOUT;
928 
929 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
930 
931 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
932 
933 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
934 		if (--count < 0)
935 			return (ENXIO);
936 		DELAY(1);	/* limit looping */
937 	}
938 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
939 
940 	return (0);
941 }
942 
943 #if 0
944 static int
945 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
946 {
947 	int count = PCI_VPD_TIMEOUT;
948 
949 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
950 
951 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
952 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
953 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
954 		if (--count < 0)
955 			return (ENXIO);
956 		DELAY(1);	/* limit looping */
957 	}
958 
959 	return (0);
960 }
961 #endif
962 
963 #undef PCI_VPD_TIMEOUT
964 
965 struct vpd_readstate {
966 	device_t	pcib;
967 	pcicfgregs	*cfg;
968 	uint32_t	val;
969 	int		bytesinval;
970 	int		off;
971 	uint8_t		cksum;
972 };
973 
974 static int
975 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
976 {
977 	uint32_t reg;
978 	uint8_t byte;
979 
980 	if (vrs->bytesinval == 0) {
981 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
982 			return (ENXIO);
983 		vrs->val = le32toh(reg);
984 		vrs->off += 4;
985 		byte = vrs->val & 0xff;
986 		vrs->bytesinval = 3;
987 	} else {
988 		vrs->val = vrs->val >> 8;
989 		byte = vrs->val & 0xff;
990 		vrs->bytesinval--;
991 	}
992 
993 	vrs->cksum += byte;
994 	*data = byte;
995 	return (0);
996 }
997 
998 int
999 pcie_slot_implemented(device_t dev)
1000 {
1001 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1002 
1003 	return pcie_slotimpl(&dinfo->cfg);
1004 }
1005 
1006 void
1007 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
1008 {
1009 	uint8_t expr_ptr;
1010 	uint16_t val;
1011 
1012 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
1013 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
1014 		panic("%s: invalid max read request size 0x%02x",
1015 		      device_get_nameunit(dev), rqsize);
1016 	}
1017 
1018 	expr_ptr = pci_get_pciecap_ptr(dev);
1019 	if (!expr_ptr)
1020 		panic("%s: not PCIe device", device_get_nameunit(dev));
1021 
1022 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
1023 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
1024 		if (bootverbose)
1025 			device_printf(dev, "adjust device control 0x%04x", val);
1026 
1027 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
1028 		val |= rqsize;
1029 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
1030 
1031 		if (bootverbose)
1032 			kprintf(" -> 0x%04x\n", val);
1033 	}
1034 }
1035 
1036 uint16_t
1037 pcie_get_max_readrq(device_t dev)
1038 {
1039 	uint8_t expr_ptr;
1040 	uint16_t val;
1041 
1042 	expr_ptr = pci_get_pciecap_ptr(dev);
1043 	if (!expr_ptr)
1044 		panic("%s: not PCIe device", device_get_nameunit(dev));
1045 
1046 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
1047 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
1048 }
1049 
1050 static void
1051 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1052 {
1053 	struct vpd_readstate vrs;
1054 	int state;
1055 	int name;
1056 	int remain;
1057 	int i;
1058 	int alloc, off;		/* alloc/off for RO/W arrays */
1059 	int cksumvalid;
1060 	int dflen;
1061 	uint8_t byte;
1062 	uint8_t byte2;
1063 
1064 	/* init vpd reader */
1065 	vrs.bytesinval = 0;
1066 	vrs.off = 0;
1067 	vrs.pcib = pcib;
1068 	vrs.cfg = cfg;
1069 	vrs.cksum = 0;
1070 
1071 	state = 0;
1072 	name = remain = i = 0;	/* shut up stupid gcc */
1073 	alloc = off = 0;	/* shut up stupid gcc */
1074 	dflen = 0;		/* shut up stupid gcc */
1075 	cksumvalid = -1;
1076 	while (state >= 0) {
1077 		if (vpd_nextbyte(&vrs, &byte)) {
1078 			state = -2;
1079 			break;
1080 		}
1081 #if 0
1082 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1083 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1084 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1085 #endif
1086 		switch (state) {
1087 		case 0:		/* item name */
1088 			if (byte & 0x80) {
1089 				if (vpd_nextbyte(&vrs, &byte2)) {
1090 					state = -2;
1091 					break;
1092 				}
1093 				remain = byte2;
1094 				if (vpd_nextbyte(&vrs, &byte2)) {
1095 					state = -2;
1096 					break;
1097 				}
1098 				remain |= byte2 << 8;
1099 				if (remain > (0x7f*4 - vrs.off)) {
1100 					state = -1;
1101 					kprintf(
1102 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1103 					    cfg->domain, cfg->bus, cfg->slot,
1104 					    cfg->func, remain);
1105 				}
1106 				name = byte & 0x7f;
1107 			} else {
1108 				remain = byte & 0x7;
1109 				name = (byte >> 3) & 0xf;
1110 			}
1111 			switch (name) {
1112 			case 0x2:	/* String */
1113 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1114 				    M_DEVBUF, M_WAITOK);
1115 				i = 0;
1116 				state = 1;
1117 				break;
1118 			case 0xf:	/* End */
1119 				state = -1;
1120 				break;
1121 			case 0x10:	/* VPD-R */
1122 				alloc = 8;
1123 				off = 0;
1124 				cfg->vpd.vpd_ros = kmalloc(alloc *
1125 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1126 				    M_WAITOK | M_ZERO);
1127 				state = 2;
1128 				break;
1129 			case 0x11:	/* VPD-W */
1130 				alloc = 8;
1131 				off = 0;
1132 				cfg->vpd.vpd_w = kmalloc(alloc *
1133 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1134 				    M_WAITOK | M_ZERO);
1135 				state = 5;
1136 				break;
1137 			default:	/* Invalid data, abort */
1138 				state = -1;
1139 				break;
1140 			}
1141 			break;
1142 
1143 		case 1:	/* Identifier String */
1144 			cfg->vpd.vpd_ident[i++] = byte;
1145 			remain--;
1146 			if (remain == 0)  {
1147 				cfg->vpd.vpd_ident[i] = '\0';
1148 				state = 0;
1149 			}
1150 			break;
1151 
1152 		case 2:	/* VPD-R Keyword Header */
1153 			if (off == alloc) {
1154 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1155 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1156 				    M_DEVBUF, M_WAITOK | M_ZERO);
1157 			}
1158 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1159 			if (vpd_nextbyte(&vrs, &byte2)) {
1160 				state = -2;
1161 				break;
1162 			}
1163 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1164 			if (vpd_nextbyte(&vrs, &byte2)) {
1165 				state = -2;
1166 				break;
1167 			}
1168 			dflen = byte2;
1169 			if (dflen == 0 &&
1170 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1171 			    2) == 0) {
1172 				/*
1173 				 * if this happens, we can't trust the rest
1174 				 * of the VPD.
1175 				 */
1176 				kprintf(
1177 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1178 				    cfg->domain, cfg->bus, cfg->slot,
1179 				    cfg->func, dflen);
1180 				cksumvalid = 0;
1181 				state = -1;
1182 				break;
1183 			} else if (dflen == 0) {
1184 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1185 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1186 				    M_DEVBUF, M_WAITOK);
1187 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1188 			} else
1189 				cfg->vpd.vpd_ros[off].value = kmalloc(
1190 				    (dflen + 1) *
1191 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1192 				    M_DEVBUF, M_WAITOK);
1193 			remain -= 3;
1194 			i = 0;
1195 			/* keep in sync w/ state 3's transistions */
1196 			if (dflen == 0 && remain == 0)
1197 				state = 0;
1198 			else if (dflen == 0)
1199 				state = 2;
1200 			else
1201 				state = 3;
1202 			break;
1203 
1204 		case 3:	/* VPD-R Keyword Value */
1205 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1206 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1207 			    "RV", 2) == 0 && cksumvalid == -1) {
1208 				if (vrs.cksum == 0)
1209 					cksumvalid = 1;
1210 				else {
1211 					if (bootverbose)
1212 						kprintf(
1213 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1214 						    cfg->domain, cfg->bus,
1215 						    cfg->slot, cfg->func,
1216 						    vrs.cksum);
1217 					cksumvalid = 0;
1218 					state = -1;
1219 					break;
1220 				}
1221 			}
1222 			dflen--;
1223 			remain--;
1224 			/* keep in sync w/ state 2's transistions */
1225 			if (dflen == 0)
1226 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1227 			if (dflen == 0 && remain == 0) {
1228 				cfg->vpd.vpd_rocnt = off;
1229 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1230 				    off * sizeof(*cfg->vpd.vpd_ros),
1231 				    M_DEVBUF, M_WAITOK | M_ZERO);
1232 				state = 0;
1233 			} else if (dflen == 0)
1234 				state = 2;
1235 			break;
1236 
1237 		case 4:
1238 			remain--;
1239 			if (remain == 0)
1240 				state = 0;
1241 			break;
1242 
1243 		case 5:	/* VPD-W Keyword Header */
1244 			if (off == alloc) {
1245 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1246 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1247 				    M_DEVBUF, M_WAITOK | M_ZERO);
1248 			}
1249 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1250 			if (vpd_nextbyte(&vrs, &byte2)) {
1251 				state = -2;
1252 				break;
1253 			}
1254 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1255 			if (vpd_nextbyte(&vrs, &byte2)) {
1256 				state = -2;
1257 				break;
1258 			}
1259 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1260 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1261 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1262 			    sizeof(*cfg->vpd.vpd_w[off].value),
1263 			    M_DEVBUF, M_WAITOK);
1264 			remain -= 3;
1265 			i = 0;
1266 			/* keep in sync w/ state 6's transistions */
1267 			if (dflen == 0 && remain == 0)
1268 				state = 0;
1269 			else if (dflen == 0)
1270 				state = 5;
1271 			else
1272 				state = 6;
1273 			break;
1274 
1275 		case 6:	/* VPD-W Keyword Value */
1276 			cfg->vpd.vpd_w[off].value[i++] = byte;
1277 			dflen--;
1278 			remain--;
1279 			/* keep in sync w/ state 5's transistions */
1280 			if (dflen == 0)
1281 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1282 			if (dflen == 0 && remain == 0) {
1283 				cfg->vpd.vpd_wcnt = off;
1284 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1285 				    off * sizeof(*cfg->vpd.vpd_w),
1286 				    M_DEVBUF, M_WAITOK | M_ZERO);
1287 				state = 0;
1288 			} else if (dflen == 0)
1289 				state = 5;
1290 			break;
1291 
1292 		default:
1293 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1294 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1295 			    state);
1296 			state = -1;
1297 			break;
1298 		}
1299 	}
1300 
1301 	if (cksumvalid == 0 || state < -1) {
1302 		/* read-only data bad, clean up */
1303 		if (cfg->vpd.vpd_ros != NULL) {
1304 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1305 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1306 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1307 			cfg->vpd.vpd_ros = NULL;
1308 		}
1309 	}
1310 	if (state < -1) {
1311 		/* I/O error, clean up */
1312 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1313 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1314 		if (cfg->vpd.vpd_ident != NULL) {
1315 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1316 			cfg->vpd.vpd_ident = NULL;
1317 		}
1318 		if (cfg->vpd.vpd_w != NULL) {
1319 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1320 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1321 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1322 			cfg->vpd.vpd_w = NULL;
1323 		}
1324 	}
1325 	cfg->vpd.vpd_cached = 1;
1326 #undef REG
1327 #undef WREG
1328 }
1329 
1330 int
1331 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1332 {
1333 	struct pci_devinfo *dinfo = device_get_ivars(child);
1334 	pcicfgregs *cfg = &dinfo->cfg;
1335 
1336 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1337 		pci_read_vpd(device_get_parent(dev), cfg);
1338 
1339 	*identptr = cfg->vpd.vpd_ident;
1340 
1341 	if (*identptr == NULL)
1342 		return (ENXIO);
1343 
1344 	return (0);
1345 }
1346 
1347 int
1348 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1349 	const char **vptr)
1350 {
1351 	struct pci_devinfo *dinfo = device_get_ivars(child);
1352 	pcicfgregs *cfg = &dinfo->cfg;
1353 	int i;
1354 
1355 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1356 		pci_read_vpd(device_get_parent(dev), cfg);
1357 
1358 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1359 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1360 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1361 			*vptr = cfg->vpd.vpd_ros[i].value;
1362 		}
1363 
1364 	if (i != cfg->vpd.vpd_rocnt)
1365 		return (0);
1366 
1367 	*vptr = NULL;
1368 	return (ENXIO);
1369 }
1370 
1371 /*
1372  * Return the offset in configuration space of the requested extended
1373  * capability entry or 0 if the specified capability was not found.
1374  */
1375 int
1376 pci_find_extcap_method(device_t dev, device_t child, int capability,
1377     int *capreg)
1378 {
1379 	struct pci_devinfo *dinfo = device_get_ivars(child);
1380 	pcicfgregs *cfg = &dinfo->cfg;
1381 	u_int32_t status;
1382 	u_int8_t ptr;
1383 
1384 	/*
1385 	 * Check the CAP_LIST bit of the PCI status register first.
1386 	 */
1387 	status = pci_read_config(child, PCIR_STATUS, 2);
1388 	if (!(status & PCIM_STATUS_CAPPRESENT))
1389 		return (ENXIO);
1390 
1391 	/*
1392 	 * Determine the start pointer of the capabilities list.
1393 	 */
1394 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1395 	case 0:
1396 	case 1:
1397 		ptr = PCIR_CAP_PTR;
1398 		break;
1399 	case 2:
1400 		ptr = PCIR_CAP_PTR_2;
1401 		break;
1402 	default:
1403 		/* XXX: panic? */
1404 		return (ENXIO);		/* no extended capabilities support */
1405 	}
1406 	ptr = pci_read_config(child, ptr, 1);
1407 
1408 	/*
1409 	 * Traverse the capabilities list.
1410 	 */
1411 	while (ptr != 0) {
1412 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1413 			if (capreg != NULL)
1414 				*capreg = ptr;
1415 			return (0);
1416 		}
1417 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1418 	}
1419 
1420 	return (ENOENT);
1421 }
1422 
1423 /*
1424  * Support for MSI-X message interrupts.
1425  */
1426 static void
1427 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1428     uint32_t data)
1429 {
1430 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1431 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1432 	uint32_t offset;
1433 
1434 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1435 	offset = msix->msix_table_offset + index * 16;
1436 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1437 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1438 	bus_write_4(msix->msix_table_res, offset + 8, data);
1439 
1440 	/* Enable MSI -> HT mapping. */
1441 	pci_ht_map_msi(dev, address);
1442 }
1443 
1444 static void
1445 pci_mask_msix_vector(device_t dev, u_int index)
1446 {
1447 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1448 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1449 	uint32_t offset, val;
1450 
1451 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1452 	offset = msix->msix_table_offset + index * 16 + 12;
1453 	val = bus_read_4(msix->msix_table_res, offset);
1454 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1455 		val |= PCIM_MSIX_VCTRL_MASK;
1456 		bus_write_4(msix->msix_table_res, offset, val);
1457 	}
1458 }
1459 
1460 static void
1461 pci_unmask_msix_vector(device_t dev, u_int index)
1462 {
1463 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1464 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1465 	uint32_t offset, val;
1466 
1467 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1468 	offset = msix->msix_table_offset + index * 16 + 12;
1469 	val = bus_read_4(msix->msix_table_res, offset);
1470 	if (val & PCIM_MSIX_VCTRL_MASK) {
1471 		val &= ~PCIM_MSIX_VCTRL_MASK;
1472 		bus_write_4(msix->msix_table_res, offset, val);
1473 	}
1474 }
1475 
1476 int
1477 pci_pending_msix_vector(device_t dev, u_int index)
1478 {
1479 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1480 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1481 	uint32_t offset, bit;
1482 
1483 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1484 	    ("MSI-X is not setup yet"));
1485 
1486 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1487 	offset = msix->msix_pba_offset + (index / 32) * 4;
1488 	bit = 1 << index % 32;
1489 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1490 }
1491 
1492 /*
1493  * Restore MSI-X registers and table during resume.  If MSI-X is
1494  * enabled then walk the virtual table to restore the actual MSI-X
1495  * table.
1496  */
1497 static void
1498 pci_resume_msix(device_t dev)
1499 {
1500 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1501 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1502 
1503 	if (msix->msix_table_res != NULL) {
1504 		const struct msix_vector *mv;
1505 
1506 		pci_mask_msix_allvectors(dev);
1507 
1508 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1509 			u_int vector;
1510 
1511 			if (mv->mv_address == 0)
1512 				continue;
1513 
1514 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1515 			pci_setup_msix_vector(dev, vector,
1516 			    mv->mv_address, mv->mv_data);
1517 			pci_unmask_msix_vector(dev, vector);
1518 		}
1519 	}
1520 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1521 	    msix->msix_ctrl, 2);
1522 }
1523 
1524 /*
1525  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1526  *
1527  * After this function returns, the MSI-X's rid will be saved in rid0.
1528  */
1529 int
1530 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1531     int *rid0, int cpuid)
1532 {
1533 	struct pci_devinfo *dinfo = device_get_ivars(child);
1534 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1535 	struct msix_vector *mv;
1536 	struct resource_list_entry *rle;
1537 	int error, irq, rid;
1538 
1539 	KASSERT(msix->msix_table_res != NULL &&
1540 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1541 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1542 	KASSERT(vector < msix->msix_msgnum,
1543 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1544 
1545 	if (bootverbose) {
1546 		device_printf(child,
1547 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1548 		    vector, msix->msix_msgnum);
1549 	}
1550 
1551 	/* Set rid according to vector number */
1552 	rid = PCI_MSIX_VEC2RID(vector);
1553 
1554 	/* Vector has already been allocated */
1555 	mv = pci_find_msix_vector(child, rid);
1556 	if (mv != NULL)
1557 		return EBUSY;
1558 
1559 	/* Allocate a message. */
1560 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1561 	if (error)
1562 		return error;
1563 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1564 	    irq, irq, 1, cpuid);
1565 
1566 	if (bootverbose) {
1567 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1568 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1569 		    rle->start, cpuid);
1570 	}
1571 
1572 	/* Update counts of alloc'd messages. */
1573 	msix->msix_alloc++;
1574 
1575 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1576 	mv->mv_rid = rid;
1577 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1578 
1579 	*rid0 = rid;
1580 	return 0;
1581 }
1582 
1583 int
1584 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1585 {
1586 	struct pci_devinfo *dinfo = device_get_ivars(child);
1587 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1588 	struct resource_list_entry *rle;
1589 	struct msix_vector *mv;
1590 	int irq, cpuid;
1591 
1592 	KASSERT(msix->msix_table_res != NULL &&
1593 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1594 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1595 	KASSERT(rid > 0, ("invalid rid %d", rid));
1596 
1597 	mv = pci_find_msix_vector(child, rid);
1598 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1599 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1600 
1601 	/* Make sure resource is no longer allocated. */
1602 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1603 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1604 	KASSERT(rle->res == NULL,
1605 	    ("MSI-X resource is still allocated, rid %d", rid));
1606 
1607 	irq = rle->start;
1608 	cpuid = rle->cpuid;
1609 
1610 	/* Free the resource list entries. */
1611 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1612 
1613 	/* Release the IRQ. */
1614 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1615 
1616 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1617 	kfree(mv, M_DEVBUF);
1618 
1619 	msix->msix_alloc--;
1620 	return (0);
1621 }
1622 
1623 /*
1624  * Return the max supported MSI-X messages this device supports.
1625  * Basically, assuming the MD code can alloc messages, this function
1626  * should return the maximum value that pci_alloc_msix() can return.
1627  * Thus, it is subject to the tunables, etc.
1628  */
1629 int
1630 pci_msix_count_method(device_t dev, device_t child)
1631 {
1632 	struct pci_devinfo *dinfo = device_get_ivars(child);
1633 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1634 
1635 	if (pci_do_msix && msix->msix_location != 0)
1636 		return (msix->msix_msgnum);
1637 	return (0);
1638 }
1639 
1640 int
1641 pci_setup_msix(device_t dev)
1642 {
1643 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1644 	pcicfgregs *cfg = &dinfo->cfg;
1645 	struct resource_list_entry *rle;
1646 	struct resource *table_res, *pba_res;
1647 
1648 	KASSERT(cfg->msix.msix_table_res == NULL &&
1649 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1650 
1651 	/* If rid 0 is allocated, then fail. */
1652 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1653 	if (rle != NULL && rle->res != NULL)
1654 		return (ENXIO);
1655 
1656 	/* Already have allocated MSIs? */
1657 	if (cfg->msi.msi_alloc != 0)
1658 		return (ENXIO);
1659 
1660 	/* If MSI is blacklisted for this system, fail. */
1661 	if (pci_msi_blacklisted())
1662 		return (ENXIO);
1663 
1664 	/* MSI-X capability present? */
1665 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1666 	    !pci_do_msix)
1667 		return (ENODEV);
1668 
1669 	KASSERT(cfg->msix.msix_alloc == 0 &&
1670 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1671 	    ("MSI-X vector has been allocated"));
1672 
1673 	/* Make sure the appropriate BARs are mapped. */
1674 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1675 	    cfg->msix.msix_table_bar);
1676 	if (rle == NULL || rle->res == NULL ||
1677 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1678 		return (ENXIO);
1679 	table_res = rle->res;
1680 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1681 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1682 		    cfg->msix.msix_pba_bar);
1683 		if (rle == NULL || rle->res == NULL ||
1684 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1685 			return (ENXIO);
1686 	}
1687 	pba_res = rle->res;
1688 
1689 	cfg->msix.msix_table_res = table_res;
1690 	cfg->msix.msix_pba_res = pba_res;
1691 
1692 	pci_mask_msix_allvectors(dev);
1693 
1694 	return 0;
1695 }
1696 
1697 void
1698 pci_teardown_msix(device_t dev)
1699 {
1700 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1701 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1702 
1703 	KASSERT(msix->msix_table_res != NULL &&
1704 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1705 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1706 	    ("MSI-X vector is still allocated"));
1707 
1708 	pci_disable_msix(dev);
1709 	pci_mask_msix_allvectors(dev);
1710 
1711 	msix->msix_table_res = NULL;
1712 	msix->msix_pba_res = NULL;
1713 }
1714 
1715 void
1716 pci_enable_msix(device_t dev)
1717 {
1718 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1719 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1720 
1721 	KASSERT(msix->msix_table_res != NULL &&
1722 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1723 
1724 	/* Update control register to enable MSI-X. */
1725 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1726 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1727 	    msix->msix_ctrl, 2);
1728 }
1729 
1730 void
1731 pci_disable_msix(device_t dev)
1732 {
1733 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1734 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1735 
1736 	KASSERT(msix->msix_table_res != NULL &&
1737 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1738 
1739 	/* Disable MSI -> HT mapping. */
1740 	pci_ht_map_msi(dev, 0);
1741 
1742 	/* Update control register to disable MSI-X. */
1743 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1744 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1745 	    msix->msix_ctrl, 2);
1746 }
1747 
1748 static void
1749 pci_mask_msix_allvectors(device_t dev)
1750 {
1751 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1752 	u_int i;
1753 
1754 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1755 		pci_mask_msix_vector(dev, i);
1756 }
1757 
1758 static struct msix_vector *
1759 pci_find_msix_vector(device_t dev, int rid)
1760 {
1761 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1762 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1763 	struct msix_vector *mv;
1764 
1765 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1766 		if (mv->mv_rid == rid)
1767 			return mv;
1768 	}
1769 	return NULL;
1770 }
1771 
1772 /*
1773  * HyperTransport MSI mapping control
1774  */
1775 void
1776 pci_ht_map_msi(device_t dev, uint64_t addr)
1777 {
1778 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1779 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1780 
1781 	if (!ht->ht_msimap)
1782 		return;
1783 
1784 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1785 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1786 		/* Enable MSI -> HT mapping. */
1787 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1788 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1789 		    ht->ht_msictrl, 2);
1790 	}
1791 
1792 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1793 		/* Disable MSI -> HT mapping. */
1794 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1795 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1796 		    ht->ht_msictrl, 2);
1797 	}
1798 }
1799 
1800 /*
1801  * Support for MSI message signalled interrupts.
1802  */
1803 static void
1804 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1805 {
1806 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1807 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1808 
1809 	/* Write data and address values. */
1810 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1811 	    address & 0xffffffff, 4);
1812 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1813 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1814 		    address >> 32, 4);
1815 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1816 		    data, 2);
1817 	} else
1818 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1819 		    2);
1820 
1821 	/* Enable MSI in the control register. */
1822 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1823 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1824 	    2);
1825 
1826 	/* Enable MSI -> HT mapping. */
1827 	pci_ht_map_msi(dev, address);
1828 }
1829 
1830 static void
1831 pci_disable_msi(device_t dev)
1832 {
1833 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1834 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1835 
1836 	/* Disable MSI -> HT mapping. */
1837 	pci_ht_map_msi(dev, 0);
1838 
1839 	/* Disable MSI in the control register. */
1840 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1841 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1842 	    2);
1843 }
1844 
1845 /*
1846  * Restore MSI registers during resume.  If MSI is enabled then
1847  * restore the data and address registers in addition to the control
1848  * register.
1849  */
1850 static void
1851 pci_resume_msi(device_t dev)
1852 {
1853 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1854 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1855 	uint64_t address;
1856 	uint16_t data;
1857 
1858 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1859 		address = msi->msi_addr;
1860 		data = msi->msi_data;
1861 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1862 		    address & 0xffffffff, 4);
1863 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1864 			pci_write_config(dev, msi->msi_location +
1865 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1866 			pci_write_config(dev, msi->msi_location +
1867 			    PCIR_MSI_DATA_64BIT, data, 2);
1868 		} else
1869 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1870 			    data, 2);
1871 	}
1872 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1873 	    2);
1874 }
1875 
1876 /*
1877  * Returns true if the specified device is blacklisted because MSI
1878  * doesn't work.
1879  */
1880 int
1881 pci_msi_device_blacklisted(device_t dev)
1882 {
1883 	struct pci_quirk *q;
1884 
1885 	if (!pci_honor_msi_blacklist)
1886 		return (0);
1887 
1888 	for (q = &pci_quirks[0]; q->devid; q++) {
1889 		if (q->devid == pci_get_devid(dev) &&
1890 		    q->type == PCI_QUIRK_DISABLE_MSI)
1891 			return (1);
1892 	}
1893 	return (0);
1894 }
1895 
1896 /*
1897  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1898  * we just check for blacklisted chipsets as represented by the
1899  * host-PCI bridge at device 0:0:0.  In the future, it may become
1900  * necessary to check other system attributes, such as the kenv values
1901  * that give the motherboard manufacturer and model number.
1902  */
1903 static int
1904 pci_msi_blacklisted(void)
1905 {
1906 	device_t dev;
1907 
1908 	if (!pci_honor_msi_blacklist)
1909 		return (0);
1910 
1911 	/*
1912 	 * Always assume that MSI-X works in virtual machines. This is
1913 	 * for example needed for most (or all) qemu based setups, since
1914 	 * the emulated chipsets tend to be very old.
1915 	 */
1916 	if (vmm_guest != VMM_GUEST_NONE)
1917 		return (0);
1918 
1919 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1920 	if (!(pcie_chipset || pcix_chipset))
1921 		return (1);
1922 
1923 	dev = pci_find_bsf(0, 0, 0);
1924 	if (dev != NULL)
1925 		return (pci_msi_device_blacklisted(dev));
1926 	return (0);
1927 }
1928 
1929 /*
1930  * Attempt to allocate count MSI messages on start_cpuid.
1931  *
1932  * If start_cpuid < 0, then the MSI messages' target CPU will be
1933  * selected automaticly.
1934  *
1935  * If the caller explicitly specified the MSI messages' target CPU,
1936  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1937  * messages on the specified CPU, if the allocation fails due to MD
1938  * does not have enough vectors (EMSGSIZE), then we will try next
1939  * available CPU, until the allocation fails on all CPUs.
1940  *
1941  * EMSGSIZE will be returned, if all available CPUs does not have
1942  * enough vectors for the requested amount of MSI messages.  Caller
1943  * should either reduce the amount of MSI messages to be requested,
1944  * or simply giving up using MSI.
1945  *
1946  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1947  * returned in 'rid' array, if the allocation succeeds.
1948  */
1949 int
1950 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1951     int start_cpuid)
1952 {
1953 	struct pci_devinfo *dinfo = device_get_ivars(child);
1954 	pcicfgregs *cfg = &dinfo->cfg;
1955 	struct resource_list_entry *rle;
1956 	int error, i, irqs[32], cpuid = 0;
1957 	uint16_t ctrl;
1958 
1959 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1960 	    ("invalid MSI count %d", count));
1961 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1962 
1963 	/* If rid 0 is allocated, then fail. */
1964 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1965 	if (rle != NULL && rle->res != NULL)
1966 		return (ENXIO);
1967 
1968 	/* Already have allocated messages? */
1969 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1970 		return (ENXIO);
1971 
1972 	/* If MSI is blacklisted for this system, fail. */
1973 	if (pci_msi_blacklisted())
1974 		return (ENXIO);
1975 
1976 	/* MSI capability present? */
1977 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1978 	    !pci_do_msi)
1979 		return (ENODEV);
1980 
1981 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1982 	    count, cfg->msi.msi_msgnum));
1983 
1984 	if (bootverbose) {
1985 		device_printf(child,
1986 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1987 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1988 	}
1989 
1990 	if (start_cpuid < 0)
1991 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1992 
1993 	error = EINVAL;
1994 	for (i = 0; i < ncpus; ++i) {
1995 		cpuid = (start_cpuid + i) % ncpus;
1996 
1997 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1998 		    cfg->msi.msi_msgnum, irqs, cpuid);
1999 		if (error == 0)
2000 			break;
2001 		else if (error != EMSGSIZE)
2002 			return error;
2003 	}
2004 	if (error)
2005 		return error;
2006 
2007 	/*
2008 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
2009 	 * the irqs[] array, so add new resources starting at rid 1.
2010 	 */
2011 	for (i = 0; i < count; i++) {
2012 		rid[i] = i + 1;
2013 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2014 		    irqs[i], irqs[i], 1, cpuid);
2015 	}
2016 
2017 	if (bootverbose) {
2018 		if (count == 1) {
2019 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
2020 			    irqs[0], cpuid);
2021 		} else {
2022 			int run;
2023 
2024 			/*
2025 			 * Be fancy and try to print contiguous runs
2026 			 * of IRQ values as ranges.  'run' is true if
2027 			 * we are in a range.
2028 			 */
2029 			device_printf(child, "using IRQs %d", irqs[0]);
2030 			run = 0;
2031 			for (i = 1; i < count; i++) {
2032 
2033 				/* Still in a run? */
2034 				if (irqs[i] == irqs[i - 1] + 1) {
2035 					run = 1;
2036 					continue;
2037 				}
2038 
2039 				/* Finish previous range. */
2040 				if (run) {
2041 					kprintf("-%d", irqs[i - 1]);
2042 					run = 0;
2043 				}
2044 
2045 				/* Start new range. */
2046 				kprintf(",%d", irqs[i]);
2047 			}
2048 
2049 			/* Unfinished range? */
2050 			if (run)
2051 				kprintf("-%d", irqs[count - 1]);
2052 			kprintf(" for MSI on cpu%d\n", cpuid);
2053 		}
2054 	}
2055 
2056 	/* Update control register with count. */
2057 	ctrl = cfg->msi.msi_ctrl;
2058 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2059 	ctrl |= (ffs(count) - 1) << 4;
2060 	cfg->msi.msi_ctrl = ctrl;
2061 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2062 
2063 	/* Update counts of alloc'd messages. */
2064 	cfg->msi.msi_alloc = count;
2065 	cfg->msi.msi_handlers = 0;
2066 	return (0);
2067 }
2068 
2069 /* Release the MSI messages associated with this device. */
2070 int
2071 pci_release_msi_method(device_t dev, device_t child)
2072 {
2073 	struct pci_devinfo *dinfo = device_get_ivars(child);
2074 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2075 	struct resource_list_entry *rle;
2076 	int i, irqs[32], cpuid = -1;
2077 
2078 	/* Do we have any messages to release? */
2079 	if (msi->msi_alloc == 0)
2080 		return (ENODEV);
2081 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2082 
2083 	/* Make sure none of the resources are allocated. */
2084 	if (msi->msi_handlers > 0)
2085 		return (EBUSY);
2086 	for (i = 0; i < msi->msi_alloc; i++) {
2087 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2088 		KASSERT(rle != NULL, ("missing MSI resource"));
2089 		if (rle->res != NULL)
2090 			return (EBUSY);
2091 		if (i == 0) {
2092 			cpuid = rle->cpuid;
2093 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2094 			    ("invalid MSI target cpuid %d", cpuid));
2095 		} else {
2096 			KASSERT(rle->cpuid == cpuid,
2097 			    ("MSI targets different cpus, "
2098 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2099 		}
2100 		irqs[i] = rle->start;
2101 	}
2102 
2103 	/* Update control register with 0 count. */
2104 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2105 	    ("%s: MSI still enabled", __func__));
2106 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2107 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2108 	    msi->msi_ctrl, 2);
2109 
2110 	/* Release the messages. */
2111 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2112 	    cpuid);
2113 	for (i = 0; i < msi->msi_alloc; i++)
2114 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2115 
2116 	/* Update alloc count. */
2117 	msi->msi_alloc = 0;
2118 	msi->msi_addr = 0;
2119 	msi->msi_data = 0;
2120 	return (0);
2121 }
2122 
2123 /*
2124  * Return the max supported MSI messages this device supports.
2125  * Basically, assuming the MD code can alloc messages, this function
2126  * should return the maximum value that pci_alloc_msi() can return.
2127  * Thus, it is subject to the tunables, etc.
2128  */
2129 int
2130 pci_msi_count_method(device_t dev, device_t child)
2131 {
2132 	struct pci_devinfo *dinfo = device_get_ivars(child);
2133 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2134 
2135 	if (pci_do_msi && msi->msi_location != 0)
2136 		return (msi->msi_msgnum);
2137 	return (0);
2138 }
2139 
2140 /* kfree pcicfgregs structure and all depending data structures */
2141 
2142 int
2143 pci_freecfg(struct pci_devinfo *dinfo)
2144 {
2145 	struct devlist *devlist_head;
2146 	int i;
2147 
2148 	devlist_head = &pci_devq;
2149 
2150 	if (dinfo->cfg.vpd.vpd_reg) {
2151 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2152 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2153 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2154 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2155 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2156 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2157 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2158 	}
2159 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2160 	kfree(dinfo, M_DEVBUF);
2161 
2162 	/* increment the generation count */
2163 	pci_generation++;
2164 
2165 	/* we're losing one device */
2166 	pci_numdevs--;
2167 	return (0);
2168 }
2169 
2170 /*
2171  * PCI power manangement
2172  */
2173 int
2174 pci_set_powerstate_method(device_t dev, device_t child, int state)
2175 {
2176 	struct pci_devinfo *dinfo = device_get_ivars(child);
2177 	pcicfgregs *cfg = &dinfo->cfg;
2178 	uint16_t status;
2179 	int oldstate, highest, delay;
2180 
2181 	if (cfg->pp.pp_cap == 0)
2182 		return (EOPNOTSUPP);
2183 
2184 	/*
2185 	 * Optimize a no state change request away.  While it would be OK to
2186 	 * write to the hardware in theory, some devices have shown odd
2187 	 * behavior when going from D3 -> D3.
2188 	 */
2189 	oldstate = pci_get_powerstate(child);
2190 	if (oldstate == state)
2191 		return (0);
2192 
2193 	/*
2194 	 * The PCI power management specification states that after a state
2195 	 * transition between PCI power states, system software must
2196 	 * guarantee a minimal delay before the function accesses the device.
2197 	 * Compute the worst case delay that we need to guarantee before we
2198 	 * access the device.  Many devices will be responsive much more
2199 	 * quickly than this delay, but there are some that don't respond
2200 	 * instantly to state changes.  Transitions to/from D3 state require
2201 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2202 	 * is done below with DELAY rather than a sleeper function because
2203 	 * this function can be called from contexts where we cannot sleep.
2204 	 */
2205 	highest = (oldstate > state) ? oldstate : state;
2206 	if (highest == PCI_POWERSTATE_D3)
2207 	    delay = 10000;
2208 	else if (highest == PCI_POWERSTATE_D2)
2209 	    delay = 200;
2210 	else
2211 	    delay = 0;
2212 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2213 	    & ~PCIM_PSTAT_DMASK;
2214 	switch (state) {
2215 	case PCI_POWERSTATE_D0:
2216 		status |= PCIM_PSTAT_D0;
2217 		break;
2218 	case PCI_POWERSTATE_D1:
2219 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2220 			return (EOPNOTSUPP);
2221 		status |= PCIM_PSTAT_D1;
2222 		break;
2223 	case PCI_POWERSTATE_D2:
2224 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2225 			return (EOPNOTSUPP);
2226 		status |= PCIM_PSTAT_D2;
2227 		break;
2228 	case PCI_POWERSTATE_D3:
2229 		status |= PCIM_PSTAT_D3;
2230 		break;
2231 	default:
2232 		return (EINVAL);
2233 	}
2234 
2235 	if (bootverbose)
2236 		kprintf(
2237 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2238 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2239 		    dinfo->cfg.func, oldstate, state);
2240 
2241 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2242 	if (delay)
2243 		DELAY(delay);
2244 	return (0);
2245 }
2246 
2247 int
2248 pci_get_powerstate_method(device_t dev, device_t child)
2249 {
2250 	struct pci_devinfo *dinfo = device_get_ivars(child);
2251 	pcicfgregs *cfg = &dinfo->cfg;
2252 	uint16_t status;
2253 	int result;
2254 
2255 	if (cfg->pp.pp_cap != 0) {
2256 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2257 		switch (status & PCIM_PSTAT_DMASK) {
2258 		case PCIM_PSTAT_D0:
2259 			result = PCI_POWERSTATE_D0;
2260 			break;
2261 		case PCIM_PSTAT_D1:
2262 			result = PCI_POWERSTATE_D1;
2263 			break;
2264 		case PCIM_PSTAT_D2:
2265 			result = PCI_POWERSTATE_D2;
2266 			break;
2267 		case PCIM_PSTAT_D3:
2268 			result = PCI_POWERSTATE_D3;
2269 			break;
2270 		default:
2271 			result = PCI_POWERSTATE_UNKNOWN;
2272 			break;
2273 		}
2274 	} else {
2275 		/* No support, device is always at D0 */
2276 		result = PCI_POWERSTATE_D0;
2277 	}
2278 	return (result);
2279 }
2280 
2281 /*
2282  * Some convenience functions for PCI device drivers.
2283  */
2284 
2285 static __inline void
2286 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2287 {
2288 	uint16_t	command;
2289 
2290 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2291 	command |= bit;
2292 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2293 }
2294 
2295 static __inline void
2296 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2297 {
2298 	uint16_t	command;
2299 
2300 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2301 	command &= ~bit;
2302 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2303 }
2304 
2305 int
2306 pci_enable_busmaster_method(device_t dev, device_t child)
2307 {
2308 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2309 	return (0);
2310 }
2311 
2312 int
2313 pci_disable_busmaster_method(device_t dev, device_t child)
2314 {
2315 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2316 	return (0);
2317 }
2318 
2319 int
2320 pci_enable_io_method(device_t dev, device_t child, int space)
2321 {
2322 	uint16_t command;
2323 	uint16_t bit;
2324 	char *error;
2325 
2326 	bit = 0;
2327 	error = NULL;
2328 
2329 	switch(space) {
2330 	case SYS_RES_IOPORT:
2331 		bit = PCIM_CMD_PORTEN;
2332 		error = "port";
2333 		break;
2334 	case SYS_RES_MEMORY:
2335 		bit = PCIM_CMD_MEMEN;
2336 		error = "memory";
2337 		break;
2338 	default:
2339 		return (EINVAL);
2340 	}
2341 	pci_set_command_bit(dev, child, bit);
2342 	/* Some devices seem to need a brief stall here, what do to? */
2343 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2344 	if (command & bit)
2345 		return (0);
2346 	device_printf(child, "failed to enable %s mapping!\n", error);
2347 	return (ENXIO);
2348 }
2349 
2350 int
2351 pci_disable_io_method(device_t dev, device_t child, int space)
2352 {
2353 	uint16_t command;
2354 	uint16_t bit;
2355 	char *error;
2356 
2357 	bit = 0;
2358 	error = NULL;
2359 
2360 	switch(space) {
2361 	case SYS_RES_IOPORT:
2362 		bit = PCIM_CMD_PORTEN;
2363 		error = "port";
2364 		break;
2365 	case SYS_RES_MEMORY:
2366 		bit = PCIM_CMD_MEMEN;
2367 		error = "memory";
2368 		break;
2369 	default:
2370 		return (EINVAL);
2371 	}
2372 	pci_clear_command_bit(dev, child, bit);
2373 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2374 	if (command & bit) {
2375 		device_printf(child, "failed to disable %s mapping!\n", error);
2376 		return (ENXIO);
2377 	}
2378 	return (0);
2379 }
2380 
2381 /*
2382  * New style pci driver.  Parent device is either a pci-host-bridge or a
2383  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2384  */
2385 
2386 void
2387 pci_print_verbose(struct pci_devinfo *dinfo)
2388 {
2389 
2390 	if (bootverbose) {
2391 		pcicfgregs *cfg = &dinfo->cfg;
2392 
2393 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2394 		    cfg->vendor, cfg->device, cfg->revid);
2395 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2396 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2397 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2398 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2399 		    cfg->mfdev);
2400 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2401 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2402 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2403 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2404 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2405 		if (cfg->intpin > 0)
2406 			kprintf("\tintpin=%c, irq=%d\n",
2407 			    cfg->intpin +'a' -1, cfg->intline);
2408 		if (cfg->pp.pp_cap) {
2409 			uint16_t status;
2410 
2411 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2412 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2413 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2414 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2415 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2416 			    status & PCIM_PSTAT_DMASK);
2417 		}
2418 		if (cfg->msi.msi_location) {
2419 			int ctrl;
2420 
2421 			ctrl = cfg->msi.msi_ctrl;
2422 			kprintf("\tMSI supports %d message%s%s%s\n",
2423 			    cfg->msi.msi_msgnum,
2424 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2425 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2426 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2427 		}
2428 		if (cfg->msix.msix_location) {
2429 			kprintf("\tMSI-X supports %d message%s ",
2430 			    cfg->msix.msix_msgnum,
2431 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2432 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2433 				kprintf("in map 0x%x\n",
2434 				    cfg->msix.msix_table_bar);
2435 			else
2436 				kprintf("in maps 0x%x and 0x%x\n",
2437 				    cfg->msix.msix_table_bar,
2438 				    cfg->msix.msix_pba_bar);
2439 		}
2440 		pci_print_verbose_expr(cfg);
2441 	}
2442 }
2443 
2444 static void
2445 pci_print_verbose_expr(const pcicfgregs *cfg)
2446 {
2447 	const struct pcicfg_expr *expr = &cfg->expr;
2448 	const char *port_name;
2449 	uint16_t port_type;
2450 
2451 	if (!bootverbose)
2452 		return;
2453 
2454 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2455 		return;
2456 
2457 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2458 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2459 
2460 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2461 
2462 	switch (port_type) {
2463 	case PCIE_END_POINT:
2464 		port_name = "DEVICE";
2465 		break;
2466 	case PCIE_LEG_END_POINT:
2467 		port_name = "LEGDEV";
2468 		break;
2469 	case PCIE_ROOT_PORT:
2470 		port_name = "ROOT";
2471 		break;
2472 	case PCIE_UP_STREAM_PORT:
2473 		port_name = "UPSTREAM";
2474 		break;
2475 	case PCIE_DOWN_STREAM_PORT:
2476 		port_name = "DOWNSTRM";
2477 		break;
2478 	case PCIE_PCIE2PCI_BRIDGE:
2479 		port_name = "PCIE2PCI";
2480 		break;
2481 	case PCIE_PCI2PCIE_BRIDGE:
2482 		port_name = "PCI2PCIE";
2483 		break;
2484 	case PCIE_ROOT_END_POINT:
2485 		port_name = "ROOTDEV";
2486 		break;
2487 	case PCIE_ROOT_EVT_COLL:
2488 		port_name = "ROOTEVTC";
2489 		break;
2490 	default:
2491 		port_name = NULL;
2492 		break;
2493 	}
2494 	if ((port_type == PCIE_ROOT_PORT ||
2495 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2496 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2497 		port_name = NULL;
2498 	if (port_name != NULL)
2499 		kprintf("[%s]", port_name);
2500 
2501 	if (pcie_slotimpl(cfg)) {
2502 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2503 		if (expr->expr_slotcap & PCIEM_SLOTCAP_HP_CAP)
2504 			kprintf("[HOTPLUG]");
2505 	}
2506 	kprintf("\n");
2507 }
2508 
2509 static int
2510 pci_porten(device_t pcib, int b, int s, int f)
2511 {
2512 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2513 		& PCIM_CMD_PORTEN) != 0;
2514 }
2515 
2516 static int
2517 pci_memen(device_t pcib, int b, int s, int f)
2518 {
2519 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2520 		& PCIM_CMD_MEMEN) != 0;
2521 }
2522 
2523 /*
2524  * Add a resource based on a pci map register. Return 1 if the map
2525  * register is a 32bit map register or 2 if it is a 64bit register.
2526  */
2527 static int
2528 pci_add_map(device_t pcib, device_t bus, device_t dev,
2529     int b, int s, int f, int reg, struct resource_list *rl, int force,
2530     int prefetch)
2531 {
2532 	uint32_t map;
2533 	uint16_t old_cmd;
2534 	pci_addr_t base;
2535 	pci_addr_t start, end, count;
2536 	uint8_t ln2size;
2537 	uint8_t ln2range;
2538 	uint32_t testval;
2539 	uint16_t cmd;
2540 	int type;
2541 	int barlen;
2542 	struct resource *res;
2543 
2544 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2545 
2546         /* Disable access to device memory */
2547 	old_cmd = 0;
2548 	if (PCI_BAR_MEM(map)) {
2549 		old_cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2550 		cmd = old_cmd & ~PCIM_CMD_MEMEN;
2551 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2552 	}
2553 
2554 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2555 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2556 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2557 
2558         /* Restore memory access mode */
2559 	if (PCI_BAR_MEM(map)) {
2560 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, old_cmd, 2);
2561 	}
2562 
2563 	if (PCI_BAR_MEM(map)) {
2564 		type = SYS_RES_MEMORY;
2565 		if (map & PCIM_BAR_MEM_PREFETCH)
2566 			prefetch = 1;
2567 	} else
2568 		type = SYS_RES_IOPORT;
2569 	ln2size = pci_mapsize(testval);
2570 	ln2range = pci_maprange(testval);
2571 	base = pci_mapbase(map);
2572 	barlen = ln2range == 64 ? 2 : 1;
2573 
2574 	/*
2575 	 * For I/O registers, if bottom bit is set, and the next bit up
2576 	 * isn't clear, we know we have a BAR that doesn't conform to the
2577 	 * spec, so ignore it.  Also, sanity check the size of the data
2578 	 * areas to the type of memory involved.  Memory must be at least
2579 	 * 16 bytes in size, while I/O ranges must be at least 4.
2580 	 */
2581 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2582 		return (barlen);
2583 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2584 	    (type == SYS_RES_IOPORT && ln2size < 2))
2585 		return (barlen);
2586 
2587 	if (ln2range == 64)
2588 		/* Read the other half of a 64bit map register */
2589 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2590 	if (bootverbose) {
2591 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2592 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2593 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2594 			kprintf(", port disabled\n");
2595 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2596 			kprintf(", memory disabled\n");
2597 		else
2598 			kprintf(", enabled\n");
2599 	}
2600 
2601 	/*
2602 	 * If base is 0, then we have problems.  It is best to ignore
2603 	 * such entries for the moment.  These will be allocated later if
2604 	 * the driver specifically requests them.  However, some
2605 	 * removable busses look better when all resources are allocated,
2606 	 * so allow '0' to be overridden.
2607 	 *
2608 	 * Similarly treat maps whose values is the same as the test value
2609 	 * read back.  These maps have had all f's written to them by the
2610 	 * BIOS in an attempt to disable the resources.
2611 	 */
2612 	if (!force && (base == 0 || map == testval))
2613 		return (barlen);
2614 	if ((u_long)base != base) {
2615 		device_printf(bus,
2616 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2617 		    pci_get_domain(dev), b, s, f, reg);
2618 		return (barlen);
2619 	}
2620 
2621 	/*
2622 	 * This code theoretically does the right thing, but has
2623 	 * undesirable side effects in some cases where peripherals
2624 	 * respond oddly to having these bits enabled.  Let the user
2625 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2626 	 * default).
2627 	 */
2628 	if (pci_enable_io_modes) {
2629 		/* Turn on resources that have been left off by a lazy BIOS */
2630 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2631 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2632 			cmd |= PCIM_CMD_PORTEN;
2633 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2634 		}
2635 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2636 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2637 			cmd |= PCIM_CMD_MEMEN;
2638 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2639 		}
2640 	} else {
2641 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2642 			return (barlen);
2643 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2644 			return (barlen);
2645 	}
2646 
2647 	count = 1 << ln2size;
2648 	if (base == 0 || base == pci_mapbase(testval)) {
2649 		start = 0;	/* Let the parent decide. */
2650 		end = ~0ULL;
2651 	} else {
2652 		start = base;
2653 		end = base + (1 << ln2size) - 1;
2654 	}
2655 	resource_list_add(rl, type, reg, start, end, count, -1);
2656 
2657 	/*
2658 	 * Try to allocate the resource for this BAR from our parent
2659 	 * so that this resource range is already reserved.  The
2660 	 * driver for this device will later inherit this resource in
2661 	 * pci_alloc_resource().
2662 	 */
2663 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2664 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2665 	if (res == NULL) {
2666 		/*
2667 		 * If the allocation fails, delete the resource list
2668 		 * entry to force pci_alloc_resource() to allocate
2669 		 * resources from the parent.
2670 		 */
2671 		resource_list_delete(rl, type, reg);
2672 #ifdef PCI_BAR_CLEAR
2673 		/* Clear the BAR */
2674 		start = 0;
2675 #else	/* !PCI_BAR_CLEAR */
2676 		/*
2677 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2678 		 * PCI function, clearing the BAR causes HPET timer
2679 		 * stop ticking.
2680 		 */
2681 		if (bootverbose) {
2682 			kprintf("pci:%d:%d:%d: resource reservation failed "
2683 				"%#jx - %#jx\n", b, s, f,
2684 				(intmax_t)start, (intmax_t)end);
2685 		}
2686 		return (barlen);
2687 #endif	/* PCI_BAR_CLEAR */
2688 	} else {
2689 		start = rman_get_start(res);
2690 	}
2691 	pci_write_config(dev, reg, start, 4);
2692 	if (ln2range == 64)
2693 		pci_write_config(dev, reg + 4, start >> 32, 4);
2694 	return (barlen);
2695 }
2696 
2697 /*
2698  * For ATA devices we need to decide early what addressing mode to use.
2699  * Legacy demands that the primary and secondary ATA ports sits on the
2700  * same addresses that old ISA hardware did. This dictates that we use
2701  * those addresses and ignore the BAR's if we cannot set PCI native
2702  * addressing mode.
2703  */
2704 static void
2705 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2706     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2707 {
2708 	int rid, type, progif;
2709 #if 0
2710 	/* if this device supports PCI native addressing use it */
2711 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2712 	if ((progif & 0x8a) == 0x8a) {
2713 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2714 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2715 			kprintf("Trying ATA native PCI addressing mode\n");
2716 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2717 		}
2718 	}
2719 #endif
2720 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2721 	type = SYS_RES_IOPORT;
2722 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2723 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2724 		    prefetchmask & (1 << 0));
2725 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2726 		    prefetchmask & (1 << 1));
2727 	} else {
2728 		rid = PCIR_BAR(0);
2729 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2730 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2731 		    0, -1);
2732 		rid = PCIR_BAR(1);
2733 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2734 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2735 		    0, -1);
2736 	}
2737 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2738 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2739 		    prefetchmask & (1 << 2));
2740 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2741 		    prefetchmask & (1 << 3));
2742 	} else {
2743 		rid = PCIR_BAR(2);
2744 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2745 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2746 		    0, -1);
2747 		rid = PCIR_BAR(3);
2748 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2749 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2750 		    0, -1);
2751 	}
2752 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2753 	    prefetchmask & (1 << 4));
2754 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2755 	    prefetchmask & (1 << 5));
2756 }
2757 
2758 static void
2759 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2760 {
2761 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2762 	pcicfgregs *cfg = &dinfo->cfg;
2763 	char tunable_name[64];
2764 	int irq;
2765 
2766 	/* Has to have an intpin to have an interrupt. */
2767 	if (cfg->intpin == 0)
2768 		return;
2769 
2770 	/* Let the user override the IRQ with a tunable. */
2771 	irq = PCI_INVALID_IRQ;
2772 	ksnprintf(tunable_name, sizeof(tunable_name),
2773 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2774 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2775 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2776 		if (irq >= 255 || irq <= 0) {
2777 			irq = PCI_INVALID_IRQ;
2778 		} else {
2779 			if (machintr_legacy_intr_find(irq,
2780 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW) < 0) {
2781 				device_printf(dev,
2782 				    "hw.pci%d.%d.%d.%d.INT%c.irq=%d, invalid\n",
2783 				    cfg->domain, cfg->bus, cfg->slot, cfg->func,
2784 				    cfg->intpin + 'A' - 1, irq);
2785 				irq = PCI_INVALID_IRQ;
2786 			} else {
2787 				BUS_CONFIG_INTR(bus, dev, irq,
2788 				    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2789 			}
2790 		}
2791 	}
2792 
2793 	/*
2794 	 * If we didn't get an IRQ via the tunable, then we either use the
2795 	 * IRQ value in the intline register or we ask the bus to route an
2796 	 * interrupt for us.  If force_route is true, then we only use the
2797 	 * value in the intline register if the bus was unable to assign an
2798 	 * IRQ.
2799 	 */
2800 	if (!PCI_INTERRUPT_VALID(irq)) {
2801 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2802 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2803 		if (!PCI_INTERRUPT_VALID(irq))
2804 			irq = cfg->intline;
2805 	}
2806 
2807 	/* If after all that we don't have an IRQ, just bail. */
2808 	if (!PCI_INTERRUPT_VALID(irq))
2809 		return;
2810 
2811 	/* Update the config register if it changed. */
2812 	if (irq != cfg->intline) {
2813 		cfg->intline = irq;
2814 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2815 	}
2816 
2817 	/* Add this IRQ as rid 0 interrupt resource. */
2818 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2819 	    machintr_legacy_intr_cpuid(irq));
2820 }
2821 
2822 /* Perform early OHCI takeover from SMM. */
2823 static void
2824 ohci_early_takeover(device_t self)
2825 {
2826 	struct resource *res;
2827 	uint32_t ctl;
2828 	int rid;
2829 	int i;
2830 
2831 	rid = PCIR_BAR(0);
2832 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2833 	if (res == NULL)
2834 		return;
2835 
2836 	ctl = bus_read_4(res, OHCI_CONTROL);
2837 	if (ctl & OHCI_IR) {
2838 		if (bootverbose)
2839 			kprintf("ohci early: "
2840 			    "SMM active, request owner change\n");
2841 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2842 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2843 			DELAY(1000);
2844 			ctl = bus_read_4(res, OHCI_CONTROL);
2845 		}
2846 		if (ctl & OHCI_IR) {
2847 			if (bootverbose)
2848 				kprintf("ohci early: "
2849 				    "SMM does not respond, resetting\n");
2850 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2851 		}
2852 		/* Disable interrupts */
2853 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2854 	}
2855 
2856 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2857 }
2858 
2859 /* Perform early UHCI takeover from SMM. */
2860 static void
2861 uhci_early_takeover(device_t self)
2862 {
2863 	struct resource *res;
2864 	int rid;
2865 
2866 	/*
2867 	 * Set the PIRQD enable bit and switch off all the others. We don't
2868 	 * want legacy support to interfere with us XXX Does this also mean
2869 	 * that the BIOS won't touch the keyboard anymore if it is connected
2870 	 * to the ports of the root hub?
2871 	 */
2872 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2873 
2874 	/* Disable interrupts */
2875 	rid = PCI_UHCI_BASE_REG;
2876 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2877 	if (res != NULL) {
2878 		bus_write_2(res, UHCI_INTR, 0);
2879 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2880 	}
2881 }
2882 
2883 /* Perform early EHCI takeover from SMM. */
2884 static void
2885 ehci_early_takeover(device_t self)
2886 {
2887 	struct resource *res;
2888 	uint32_t cparams;
2889 	uint32_t eec;
2890 	uint32_t eecp;
2891 	uint32_t bios_sem;
2892 	uint32_t offs;
2893 	int rid;
2894 	int i;
2895 
2896 	rid = PCIR_BAR(0);
2897 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2898 	if (res == NULL)
2899 		return;
2900 
2901 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2902 
2903 	/* Synchronise with the BIOS if it owns the controller. */
2904 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2905 	    eecp = EHCI_EECP_NEXT(eec)) {
2906 		eec = pci_read_config(self, eecp, 4);
2907 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2908 			continue;
2909 		}
2910 		bios_sem = pci_read_config(self, eecp +
2911 		    EHCI_LEGSUP_BIOS_SEM, 1);
2912 		if (bios_sem == 0) {
2913 			continue;
2914 		}
2915 		if (bootverbose)
2916 			kprintf("ehci early: "
2917 			    "SMM active, request owner change\n");
2918 
2919 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2920 
2921 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2922 			DELAY(1000);
2923 			bios_sem = pci_read_config(self, eecp +
2924 			    EHCI_LEGSUP_BIOS_SEM, 1);
2925 		}
2926 
2927 		if (bios_sem != 0) {
2928 			if (bootverbose)
2929 				kprintf("ehci early: "
2930 				    "SMM does not respond\n");
2931 		}
2932 		/* Disable interrupts */
2933 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2934 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2935 	}
2936 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2937 }
2938 
2939 /* Perform early XHCI takeover from SMM. */
2940 static void
2941 xhci_early_takeover(device_t self)
2942 {
2943 	struct resource *res;
2944 	uint32_t cparams;
2945 	uint32_t eec;
2946 	uint32_t eecp;
2947 	uint32_t bios_sem;
2948 	uint32_t offs;
2949 	int rid;
2950 	int i;
2951 
2952 	rid = PCIR_BAR(0);
2953 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2954 	if (res == NULL)
2955 		return;
2956 
2957 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2958 
2959 	eec = -1;
2960 
2961 	/* Synchronise with the BIOS if it owns the controller. */
2962 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2963 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2964 		eec = bus_read_4(res, eecp);
2965 
2966 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2967 			continue;
2968 
2969 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2970 
2971 		if (bios_sem == 0) {
2972 			if (bootverbose)
2973 				kprintf("xhci early: xhci is not owned by SMM\n");
2974 
2975 			continue;
2976 		}
2977 
2978 		if (bootverbose)
2979 			kprintf("xhci early: "
2980 			    "SMM active, request owner change\n");
2981 
2982 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
2983 
2984 		/* wait a maximum of 5 seconds */
2985 
2986 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
2987 			DELAY(1000);
2988 
2989 			bios_sem = bus_read_1(res, eecp +
2990 			    XHCI_XECP_BIOS_SEM);
2991 		}
2992 
2993 		if (bios_sem != 0) {
2994 			if (bootverbose) {
2995 				kprintf("xhci early: "
2996 				    "SMM does not respond\n");
2997 				kprintf("xhci early: "
2998 				    "taking xhci by force\n");
2999 			}
3000 			bus_write_1(res, eecp + XHCI_XECP_BIOS_SEM, 0x00);
3001 		} else {
3002 			if (bootverbose)
3003 				kprintf("xhci early: "
3004 				    "handover successful\n");
3005 		}
3006 
3007 		/* Disable interrupts */
3008 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3009 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3010 		bus_read_4(res, offs + XHCI_USBSTS);
3011 	}
3012 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3013 }
3014 
3015 void
3016 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
3017 {
3018 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3019 	pcicfgregs *cfg = &dinfo->cfg;
3020 	struct resource_list *rl = &dinfo->resources;
3021 	struct pci_quirk *q;
3022 	int b, i, f, s;
3023 
3024 	b = cfg->bus;
3025 	s = cfg->slot;
3026 	f = cfg->func;
3027 
3028 	/* ATA devices needs special map treatment */
3029 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3030 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3031 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3032 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3033 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3034 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
3035 	else
3036 		for (i = 0; i < cfg->nummaps;)
3037 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
3038 			    rl, force, prefetchmask & (1 << i));
3039 
3040 	/*
3041 	 * Add additional, quirked resources.
3042 	 */
3043 	for (q = &pci_quirks[0]; q->devid; q++) {
3044 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3045 		    && q->type == PCI_QUIRK_MAP_REG)
3046 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
3047 			  force, 0);
3048 	}
3049 
3050 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3051 		/*
3052 		 * Try to re-route interrupts. Sometimes the BIOS or
3053 		 * firmware may leave bogus values in these registers.
3054 		 * If the re-route fails, then just stick with what we
3055 		 * have.
3056 		 */
3057 		pci_assign_interrupt(bus, dev, 1);
3058 	}
3059 
3060 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3061 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3062 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3063 			xhci_early_takeover(dev);
3064 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3065 			ehci_early_takeover(dev);
3066 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3067 			ohci_early_takeover(dev);
3068 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3069 			uhci_early_takeover(dev);
3070 	}
3071 }
3072 
3073 void
3074 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3075 {
3076 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3077 	device_t pcib = device_get_parent(dev);
3078 	struct pci_devinfo *dinfo;
3079 	int maxslots;
3080 	int s, f, pcifunchigh;
3081 	uint8_t hdrtype;
3082 
3083 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3084 	    ("dinfo_size too small"));
3085 	maxslots = PCIB_MAXSLOTS(pcib);
3086 	for (s = 0; s <= maxslots; s++) {
3087 		pcifunchigh = 0;
3088 		f = 0;
3089 		DELAY(1);
3090 		hdrtype = REG(PCIR_HDRTYPE, 1);
3091 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3092 			continue;
3093 		if (hdrtype & PCIM_MFDEV)
3094 			pcifunchigh = PCI_FUNCMAX;
3095 		for (f = 0; f <= pcifunchigh; f++) {
3096 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3097 			    dinfo_size);
3098 			if (dinfo != NULL) {
3099 				pci_add_child(dev, dinfo);
3100 			}
3101 		}
3102 	}
3103 #undef REG
3104 }
3105 
3106 void
3107 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3108 {
3109 	device_t pcib;
3110 
3111 	pcib = device_get_parent(bus);
3112 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3113 	device_set_ivars(dinfo->cfg.dev, dinfo);
3114 	resource_list_init(&dinfo->resources);
3115 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3116 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3117 	pci_print_verbose(dinfo);
3118 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
3119 }
3120 
3121 static int
3122 pci_probe(device_t dev)
3123 {
3124 	device_set_desc(dev, "PCI bus");
3125 
3126 	/* Allow other subclasses to override this driver. */
3127 	return (-1000);
3128 }
3129 
3130 static int
3131 pci_attach(device_t dev)
3132 {
3133 	int busno, domain;
3134 
3135 	/*
3136 	 * Since there can be multiple independantly numbered PCI
3137 	 * busses on systems with multiple PCI domains, we can't use
3138 	 * the unit number to decide which bus we are probing. We ask
3139 	 * the parent pcib what our domain and bus numbers are.
3140 	 */
3141 	domain = pcib_get_domain(dev);
3142 	busno = pcib_get_bus(dev);
3143 	if (bootverbose)
3144 		device_printf(dev, "domain=%d, physical bus=%d\n",
3145 		    domain, busno);
3146 
3147 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3148 
3149 	return (bus_generic_attach(dev));
3150 }
3151 
3152 int
3153 pci_suspend(device_t dev)
3154 {
3155 	int dstate, error, i, numdevs;
3156 	device_t acpi_dev, child, *devlist;
3157 	struct pci_devinfo *dinfo;
3158 
3159 	/*
3160 	 * Save the PCI configuration space for each child and set the
3161 	 * device in the appropriate power state for this sleep state.
3162 	 */
3163 	acpi_dev = NULL;
3164 	if (pci_do_power_resume)
3165 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3166 	device_get_children(dev, &devlist, &numdevs);
3167 	for (i = 0; i < numdevs; i++) {
3168 		child = devlist[i];
3169 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3170 		pci_cfg_save(child, dinfo, 0);
3171 	}
3172 
3173 	/* Suspend devices before potentially powering them down. */
3174 	error = bus_generic_suspend(dev);
3175 	if (error) {
3176 		kfree(devlist, M_TEMP);
3177 		return (error);
3178 	}
3179 
3180 	/*
3181 	 * Always set the device to D3.  If ACPI suggests a different
3182 	 * power state, use it instead.  If ACPI is not present, the
3183 	 * firmware is responsible for managing device power.  Skip
3184 	 * children who aren't attached since they are powered down
3185 	 * separately.  Only manage type 0 devices for now.
3186 	 */
3187 	for (i = 0; acpi_dev && i < numdevs; i++) {
3188 		child = devlist[i];
3189 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3190 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
3191 			dstate = PCI_POWERSTATE_D3;
3192 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
3193 			pci_set_powerstate(child, dstate);
3194 		}
3195 	}
3196 	kfree(devlist, M_TEMP);
3197 	return (0);
3198 }
3199 
3200 int
3201 pci_resume(device_t dev)
3202 {
3203 	int i, numdevs;
3204 	device_t acpi_dev, child, *devlist;
3205 	struct pci_devinfo *dinfo;
3206 
3207 	/*
3208 	 * Set each child to D0 and restore its PCI configuration space.
3209 	 */
3210 	acpi_dev = NULL;
3211 	if (pci_do_power_resume)
3212 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3213 	device_get_children(dev, &devlist, &numdevs);
3214 	for (i = 0; i < numdevs; i++) {
3215 		/*
3216 		 * Notify ACPI we're going to D0 but ignore the result.  If
3217 		 * ACPI is not present, the firmware is responsible for
3218 		 * managing device power.  Only manage type 0 devices for now.
3219 		 */
3220 		child = devlist[i];
3221 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3222 		if (acpi_dev && device_is_attached(child) &&
3223 		    dinfo->cfg.hdrtype == 0) {
3224 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
3225 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
3226 		}
3227 
3228 		/* Now the device is powered up, restore its config space. */
3229 		pci_cfg_restore(child, dinfo);
3230 	}
3231 	kfree(devlist, M_TEMP);
3232 	return (bus_generic_resume(dev));
3233 }
3234 
3235 static void
3236 pci_load_vendor_data(void)
3237 {
3238 	caddr_t vendordata, info;
3239 
3240 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3241 		info = preload_search_info(vendordata, MODINFO_ADDR);
3242 		pci_vendordata = *(char **)info;
3243 		info = preload_search_info(vendordata, MODINFO_SIZE);
3244 		pci_vendordata_size = *(size_t *)info;
3245 		/* terminate the database */
3246 		pci_vendordata[pci_vendordata_size] = '\n';
3247 	}
3248 }
3249 
3250 void
3251 pci_driver_added(device_t dev, driver_t *driver)
3252 {
3253 	int numdevs;
3254 	device_t *devlist;
3255 	device_t child;
3256 	struct pci_devinfo *dinfo;
3257 	int i;
3258 
3259 	if (bootverbose)
3260 		device_printf(dev, "driver added\n");
3261 	DEVICE_IDENTIFY(driver, dev);
3262 	device_get_children(dev, &devlist, &numdevs);
3263 	for (i = 0; i < numdevs; i++) {
3264 		child = devlist[i];
3265 		if (device_get_state(child) != DS_NOTPRESENT)
3266 			continue;
3267 		dinfo = device_get_ivars(child);
3268 		pci_print_verbose(dinfo);
3269 		if (bootverbose)
3270 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3271 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3272 			    dinfo->cfg.func);
3273 		pci_cfg_restore(child, dinfo);
3274 		if (device_probe_and_attach(child) != 0)
3275 			pci_cfg_save(child, dinfo, 1);
3276 	}
3277 	kfree(devlist, M_TEMP);
3278 }
3279 
3280 static void
3281 pci_child_detached(device_t parent __unused, device_t child)
3282 {
3283 	/* Turn child's power off */
3284 	pci_cfg_save(child, device_get_ivars(child), 1);
3285 }
3286 
3287 int
3288 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3289     driver_intr_t *intr, void *arg, void **cookiep,
3290     lwkt_serialize_t serializer, const char *desc)
3291 {
3292 	int rid, error;
3293 	void *cookie;
3294 
3295 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3296 	    arg, &cookie, serializer, desc);
3297 	if (error)
3298 		return (error);
3299 
3300 	/* If this is not a direct child, just bail out. */
3301 	if (device_get_parent(child) != dev) {
3302 		*cookiep = cookie;
3303 		return(0);
3304 	}
3305 
3306 	rid = rman_get_rid(irq);
3307 	if (rid == 0) {
3308 		/* Make sure that INTx is enabled */
3309 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3310 	} else {
3311 		struct pci_devinfo *dinfo = device_get_ivars(child);
3312 		uint64_t addr;
3313 		uint32_t data;
3314 
3315 		/*
3316 		 * Check to see if the interrupt is MSI or MSI-X.
3317 		 * Ask our parent to map the MSI and give
3318 		 * us the address and data register values.
3319 		 * If we fail for some reason, teardown the
3320 		 * interrupt handler.
3321 		 */
3322 		if (dinfo->cfg.msi.msi_alloc > 0) {
3323 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3324 
3325 			if (msi->msi_addr == 0) {
3326 				KASSERT(msi->msi_handlers == 0,
3327 			    ("MSI has handlers, but vectors not mapped"));
3328 				error = PCIB_MAP_MSI(device_get_parent(dev),
3329 				    child, rman_get_start(irq), &addr, &data,
3330 				    rman_get_cpuid(irq));
3331 				if (error)
3332 					goto bad;
3333 				msi->msi_addr = addr;
3334 				msi->msi_data = data;
3335 				pci_enable_msi(child, addr, data);
3336 			}
3337 			msi->msi_handlers++;
3338 		} else {
3339 			struct msix_vector *mv;
3340 			u_int vector;
3341 
3342 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3343 			    ("No MSI-X or MSI rid %d allocated", rid));
3344 
3345 			mv = pci_find_msix_vector(child, rid);
3346 			KASSERT(mv != NULL,
3347 			    ("MSI-X rid %d is not allocated", rid));
3348 			KASSERT(mv->mv_address == 0,
3349 			    ("MSI-X rid %d has been setup", rid));
3350 
3351 			error = PCIB_MAP_MSI(device_get_parent(dev),
3352 			    child, rman_get_start(irq), &addr, &data,
3353 			    rman_get_cpuid(irq));
3354 			if (error)
3355 				goto bad;
3356 			mv->mv_address = addr;
3357 			mv->mv_data = data;
3358 
3359 			vector = PCI_MSIX_RID2VEC(rid);
3360 			pci_setup_msix_vector(child, vector,
3361 			    mv->mv_address, mv->mv_data);
3362 			pci_unmask_msix_vector(child, vector);
3363 		}
3364 
3365 		/*
3366 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3367 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3368 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3369 		 */
3370 		if (!pci_has_quirk(pci_get_devid(child),
3371 		    PCI_QUIRK_MSI_INTX_BUG))
3372 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3373 		else
3374 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3375 	bad:
3376 		if (error) {
3377 			(void)bus_generic_teardown_intr(dev, child, irq,
3378 			    cookie);
3379 			return (error);
3380 		}
3381 	}
3382 	*cookiep = cookie;
3383 	return (0);
3384 }
3385 
3386 int
3387 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3388     void *cookie)
3389 {
3390 	int rid, error;
3391 
3392 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3393 		return (EINVAL);
3394 
3395 	/* If this isn't a direct child, just bail out */
3396 	if (device_get_parent(child) != dev)
3397 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3398 
3399 	rid = rman_get_rid(irq);
3400 	if (rid == 0) {
3401 		/* Mask INTx */
3402 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3403 	} else {
3404 		struct pci_devinfo *dinfo = device_get_ivars(child);
3405 
3406 		/*
3407 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3408 		 * decrement the appropriate handlers count and mask the
3409 		 * MSI-X message, or disable MSI messages if the count
3410 		 * drops to 0.
3411 		 */
3412 		if (dinfo->cfg.msi.msi_alloc > 0) {
3413 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3414 
3415 			KASSERT(rid <= msi->msi_alloc,
3416 			    ("MSI-X index too high"));
3417 			KASSERT(msi->msi_handlers > 0,
3418 			    ("MSI rid %d is not setup", rid));
3419 
3420 			msi->msi_handlers--;
3421 			if (msi->msi_handlers == 0)
3422 				pci_disable_msi(child);
3423 		} else {
3424 			struct msix_vector *mv;
3425 
3426 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3427 			    ("No MSI or MSI-X rid %d allocated", rid));
3428 
3429 			mv = pci_find_msix_vector(child, rid);
3430 			KASSERT(mv != NULL,
3431 			    ("MSI-X rid %d is not allocated", rid));
3432 			KASSERT(mv->mv_address != 0,
3433 			    ("MSI-X rid %d has not been setup", rid));
3434 
3435 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3436 			mv->mv_address = 0;
3437 			mv->mv_data = 0;
3438 		}
3439 	}
3440 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3441 	if (rid > 0)
3442 		KASSERT(error == 0,
3443 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3444 	return (error);
3445 }
3446 
3447 int
3448 pci_print_child(device_t dev, device_t child)
3449 {
3450 	struct pci_devinfo *dinfo;
3451 	struct resource_list *rl;
3452 	int retval = 0;
3453 
3454 	dinfo = device_get_ivars(child);
3455 	rl = &dinfo->resources;
3456 
3457 	retval += bus_print_child_header(dev, child);
3458 
3459 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3460 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3461 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3462 	if (device_get_flags(dev))
3463 		retval += kprintf(" flags %#x", device_get_flags(dev));
3464 
3465 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3466 	    pci_get_function(child));
3467 
3468 	retval += bus_print_child_footer(dev, child);
3469 
3470 	return (retval);
3471 }
3472 
3473 static struct
3474 {
3475 	int	class;
3476 	int	subclass;
3477 	char	*desc;
3478 } pci_nomatch_tab[] = {
3479 	{PCIC_OLD,		-1,			"old"},
3480 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3481 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3482 	{PCIC_STORAGE,		-1,			"mass storage"},
3483 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3484 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3485 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3486 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3487 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3488 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3489 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3490 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3491 	{PCIC_NETWORK,		-1,			"network"},
3492 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3493 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3494 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3495 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3496 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3497 	{PCIC_DISPLAY,		-1,			"display"},
3498 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3499 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3500 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3501 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3502 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3503 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3504 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3505 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3506 	{PCIC_MEMORY,		-1,			"memory"},
3507 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3508 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3509 	{PCIC_BRIDGE,		-1,			"bridge"},
3510 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3511 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3512 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3513 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3514 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3515 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3516 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3517 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3518 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3519 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3520 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3521 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3522 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3523 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3524 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3525 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3526 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3527 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3528 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3529 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3530 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3531 	{PCIC_INPUTDEV,		-1,			"input device"},
3532 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3533 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3534 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3535 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3536 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3537 	{PCIC_DOCKING,		-1,			"docking station"},
3538 	{PCIC_PROCESSOR,	-1,			"processor"},
3539 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3540 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3541 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3542 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3543 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3544 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3545 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3546 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3547 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3548 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3549 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3550 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3551 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3552 	{PCIC_SATCOM,		-1,			"satellite communication"},
3553 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3554 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3555 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3556 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3557 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3558 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3559 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3560 	{PCIC_DASP,		-1,			"dasp"},
3561 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3562 	{0, 0,		NULL}
3563 };
3564 
3565 void
3566 pci_probe_nomatch(device_t dev, device_t child)
3567 {
3568 	int	i;
3569 	char	*cp, *scp, *device;
3570 
3571 	/*
3572 	 * Look for a listing for this device in a loaded device database.
3573 	 */
3574 	if ((device = pci_describe_device(child)) != NULL) {
3575 		device_printf(dev, "<%s>", device);
3576 		kfree(device, M_DEVBUF);
3577 	} else {
3578 		/*
3579 		 * Scan the class/subclass descriptions for a general
3580 		 * description.
3581 		 */
3582 		cp = "unknown";
3583 		scp = NULL;
3584 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3585 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3586 				if (pci_nomatch_tab[i].subclass == -1) {
3587 					cp = pci_nomatch_tab[i].desc;
3588 				} else if (pci_nomatch_tab[i].subclass ==
3589 				    pci_get_subclass(child)) {
3590 					scp = pci_nomatch_tab[i].desc;
3591 				}
3592 			}
3593 		}
3594 		device_printf(dev, "<%s%s%s>",
3595 		    cp ? cp : "",
3596 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3597 		    scp ? scp : "");
3598 	}
3599 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3600 		pci_get_vendor(child), pci_get_device(child),
3601 		pci_get_slot(child), pci_get_function(child));
3602 	if (pci_get_intpin(child) > 0) {
3603 		int irq;
3604 
3605 		irq = pci_get_irq(child);
3606 		if (PCI_INTERRUPT_VALID(irq))
3607 			kprintf(" irq %d", irq);
3608 	}
3609 	kprintf("\n");
3610 
3611 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3612 }
3613 
3614 /*
3615  * Parse the PCI device database, if loaded, and return a pointer to a
3616  * description of the device.
3617  *
3618  * The database is flat text formatted as follows:
3619  *
3620  * Any line not in a valid format is ignored.
3621  * Lines are terminated with newline '\n' characters.
3622  *
3623  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3624  * the vendor name.
3625  *
3626  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3627  * - devices cannot be listed without a corresponding VENDOR line.
3628  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3629  * another TAB, then the device name.
3630  */
3631 
3632 /*
3633  * Assuming (ptr) points to the beginning of a line in the database,
3634  * return the vendor or device and description of the next entry.
3635  * The value of (vendor) or (device) inappropriate for the entry type
3636  * is set to -1.  Returns nonzero at the end of the database.
3637  *
3638  * Note that this is slightly unrobust in the face of corrupt data;
3639  * we attempt to safeguard against this by spamming the end of the
3640  * database with a newline when we initialise.
3641  */
3642 static int
3643 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3644 {
3645 	char	*cp = *ptr;
3646 	int	left;
3647 
3648 	*device = -1;
3649 	*vendor = -1;
3650 	**desc = '\0';
3651 	for (;;) {
3652 		left = pci_vendordata_size - (cp - pci_vendordata);
3653 		if (left <= 0) {
3654 			*ptr = cp;
3655 			return(1);
3656 		}
3657 
3658 		/* vendor entry? */
3659 		if (*cp != '\t' &&
3660 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3661 			break;
3662 		/* device entry? */
3663 		if (*cp == '\t' &&
3664 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3665 			break;
3666 
3667 		/* skip to next line */
3668 		while (*cp != '\n' && left > 0) {
3669 			cp++;
3670 			left--;
3671 		}
3672 		if (*cp == '\n') {
3673 			cp++;
3674 			left--;
3675 		}
3676 	}
3677 	/* skip to next line */
3678 	while (*cp != '\n' && left > 0) {
3679 		cp++;
3680 		left--;
3681 	}
3682 	if (*cp == '\n' && left > 0)
3683 		cp++;
3684 	*ptr = cp;
3685 	return(0);
3686 }
3687 
3688 static char *
3689 pci_describe_device(device_t dev)
3690 {
3691 	int	vendor, device;
3692 	char	*desc, *vp, *dp, *line;
3693 
3694 	desc = vp = dp = NULL;
3695 
3696 	/*
3697 	 * If we have no vendor data, we can't do anything.
3698 	 */
3699 	if (pci_vendordata == NULL)
3700 		goto out;
3701 
3702 	/*
3703 	 * Scan the vendor data looking for this device
3704 	 */
3705 	line = pci_vendordata;
3706 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3707 		goto out;
3708 	for (;;) {
3709 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3710 			goto out;
3711 		if (vendor == pci_get_vendor(dev))
3712 			break;
3713 	}
3714 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3715 		goto out;
3716 	for (;;) {
3717 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3718 			*dp = 0;
3719 			break;
3720 		}
3721 		if (vendor != -1) {
3722 			*dp = 0;
3723 			break;
3724 		}
3725 		if (device == pci_get_device(dev))
3726 			break;
3727 	}
3728 	if (dp[0] == '\0')
3729 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3730 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3731 	    NULL)
3732 		ksprintf(desc, "%s, %s", vp, dp);
3733  out:
3734 	if (vp != NULL)
3735 		kfree(vp, M_DEVBUF);
3736 	if (dp != NULL)
3737 		kfree(dp, M_DEVBUF);
3738 	return(desc);
3739 }
3740 
3741 int
3742 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3743 {
3744 	struct pci_devinfo *dinfo;
3745 	pcicfgregs *cfg;
3746 
3747 	dinfo = device_get_ivars(child);
3748 	cfg = &dinfo->cfg;
3749 
3750 	switch (which) {
3751 	case PCI_IVAR_ETHADDR:
3752 		/*
3753 		 * The generic accessor doesn't deal with failure, so
3754 		 * we set the return value, then return an error.
3755 		 */
3756 		*((uint8_t **) result) = NULL;
3757 		return (EINVAL);
3758 	case PCI_IVAR_SUBVENDOR:
3759 		*result = cfg->subvendor;
3760 		break;
3761 	case PCI_IVAR_SUBDEVICE:
3762 		*result = cfg->subdevice;
3763 		break;
3764 	case PCI_IVAR_VENDOR:
3765 		*result = cfg->vendor;
3766 		break;
3767 	case PCI_IVAR_DEVICE:
3768 		*result = cfg->device;
3769 		break;
3770 	case PCI_IVAR_DEVID:
3771 		*result = (cfg->device << 16) | cfg->vendor;
3772 		break;
3773 	case PCI_IVAR_CLASS:
3774 		*result = cfg->baseclass;
3775 		break;
3776 	case PCI_IVAR_SUBCLASS:
3777 		*result = cfg->subclass;
3778 		break;
3779 	case PCI_IVAR_PROGIF:
3780 		*result = cfg->progif;
3781 		break;
3782 	case PCI_IVAR_REVID:
3783 		*result = cfg->revid;
3784 		break;
3785 	case PCI_IVAR_INTPIN:
3786 		*result = cfg->intpin;
3787 		break;
3788 	case PCI_IVAR_IRQ:
3789 		*result = cfg->intline;
3790 		break;
3791 	case PCI_IVAR_DOMAIN:
3792 		*result = cfg->domain;
3793 		break;
3794 	case PCI_IVAR_BUS:
3795 		*result = cfg->bus;
3796 		break;
3797 	case PCI_IVAR_SLOT:
3798 		*result = cfg->slot;
3799 		break;
3800 	case PCI_IVAR_FUNCTION:
3801 		*result = cfg->func;
3802 		break;
3803 	case PCI_IVAR_CMDREG:
3804 		*result = cfg->cmdreg;
3805 		break;
3806 	case PCI_IVAR_CACHELNSZ:
3807 		*result = cfg->cachelnsz;
3808 		break;
3809 	case PCI_IVAR_MINGNT:
3810 		*result = cfg->mingnt;
3811 		break;
3812 	case PCI_IVAR_MAXLAT:
3813 		*result = cfg->maxlat;
3814 		break;
3815 	case PCI_IVAR_LATTIMER:
3816 		*result = cfg->lattimer;
3817 		break;
3818 	case PCI_IVAR_PCIXCAP_PTR:
3819 		*result = cfg->pcix.pcix_ptr;
3820 		break;
3821 	case PCI_IVAR_PCIECAP_PTR:
3822 		*result = cfg->expr.expr_ptr;
3823 		break;
3824 	case PCI_IVAR_VPDCAP_PTR:
3825 		*result = cfg->vpd.vpd_reg;
3826 		break;
3827 	default:
3828 		return (ENOENT);
3829 	}
3830 	return (0);
3831 }
3832 
3833 int
3834 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3835 {
3836 	struct pci_devinfo *dinfo;
3837 
3838 	dinfo = device_get_ivars(child);
3839 
3840 	switch (which) {
3841 	case PCI_IVAR_INTPIN:
3842 		dinfo->cfg.intpin = value;
3843 		return (0);
3844 	case PCI_IVAR_ETHADDR:
3845 	case PCI_IVAR_SUBVENDOR:
3846 	case PCI_IVAR_SUBDEVICE:
3847 	case PCI_IVAR_VENDOR:
3848 	case PCI_IVAR_DEVICE:
3849 	case PCI_IVAR_DEVID:
3850 	case PCI_IVAR_CLASS:
3851 	case PCI_IVAR_SUBCLASS:
3852 	case PCI_IVAR_PROGIF:
3853 	case PCI_IVAR_REVID:
3854 	case PCI_IVAR_IRQ:
3855 	case PCI_IVAR_DOMAIN:
3856 	case PCI_IVAR_BUS:
3857 	case PCI_IVAR_SLOT:
3858 	case PCI_IVAR_FUNCTION:
3859 		return (EINVAL);	/* disallow for now */
3860 
3861 	default:
3862 		return (ENOENT);
3863 	}
3864 }
3865 #ifdef notyet
3866 #include "opt_ddb.h"
3867 #ifdef DDB
3868 #include <ddb/ddb.h>
3869 #include <sys/cons.h>
3870 
3871 /*
3872  * List resources based on pci map registers, used for within ddb
3873  */
3874 
3875 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3876 {
3877 	struct pci_devinfo *dinfo;
3878 	struct devlist *devlist_head;
3879 	struct pci_conf *p;
3880 	const char *name;
3881 	int i, error, none_count;
3882 
3883 	none_count = 0;
3884 	/* get the head of the device queue */
3885 	devlist_head = &pci_devq;
3886 
3887 	/*
3888 	 * Go through the list of devices and print out devices
3889 	 */
3890 	for (error = 0, i = 0,
3891 	     dinfo = STAILQ_FIRST(devlist_head);
3892 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3893 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3894 
3895 		/* Populate pd_name and pd_unit */
3896 		name = NULL;
3897 		if (dinfo->cfg.dev)
3898 			name = device_get_name(dinfo->cfg.dev);
3899 
3900 		p = &dinfo->conf;
3901 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3902 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3903 			(name && *name) ? name : "none",
3904 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3905 			none_count++,
3906 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3907 			p->pc_sel.pc_func, (p->pc_class << 16) |
3908 			(p->pc_subclass << 8) | p->pc_progif,
3909 			(p->pc_subdevice << 16) | p->pc_subvendor,
3910 			(p->pc_device << 16) | p->pc_vendor,
3911 			p->pc_revid, p->pc_hdr);
3912 	}
3913 }
3914 #endif /* DDB */
3915 #endif
3916 
3917 static struct resource *
3918 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3919     u_long start, u_long end, u_long count, u_int flags)
3920 {
3921 	struct pci_devinfo *dinfo = device_get_ivars(child);
3922 	struct resource_list *rl = &dinfo->resources;
3923 	struct resource_list_entry *rle;
3924 	struct resource *res;
3925 	pci_addr_t map, testval;
3926 	int mapsize;
3927 
3928 	/*
3929 	 * Weed out the bogons, and figure out how large the BAR/map
3930 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3931 	 * Note: atapci in legacy mode are special and handled elsewhere
3932 	 * in the code.  If you have a atapci device in legacy mode and
3933 	 * it fails here, that other code is broken.
3934 	 */
3935 	res = NULL;
3936 	map = pci_read_config(child, *rid, 4);
3937 	pci_write_config(child, *rid, 0xffffffff, 4);
3938 	testval = pci_read_config(child, *rid, 4);
3939 	if (pci_maprange(testval) == 64)
3940 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3941 	if (pci_mapbase(testval) == 0)
3942 		goto out;
3943 
3944 	/*
3945 	 * Restore the original value of the BAR.  We may have reprogrammed
3946 	 * the BAR of the low-level console device and when booting verbose,
3947 	 * we need the console device addressable.
3948 	 */
3949 	pci_write_config(child, *rid, map, 4);
3950 
3951 	if (PCI_BAR_MEM(testval)) {
3952 		if (type != SYS_RES_MEMORY) {
3953 			if (bootverbose)
3954 				device_printf(dev,
3955 				    "child %s requested type %d for rid %#x,"
3956 				    " but the BAR says it is an memio\n",
3957 				    device_get_nameunit(child), type, *rid);
3958 			goto out;
3959 		}
3960 	} else {
3961 		if (type != SYS_RES_IOPORT) {
3962 			if (bootverbose)
3963 				device_printf(dev,
3964 				    "child %s requested type %d for rid %#x,"
3965 				    " but the BAR says it is an ioport\n",
3966 				    device_get_nameunit(child), type, *rid);
3967 			goto out;
3968 		}
3969 	}
3970 	/*
3971 	 * For real BARs, we need to override the size that
3972 	 * the driver requests, because that's what the BAR
3973 	 * actually uses and we would otherwise have a
3974 	 * situation where we might allocate the excess to
3975 	 * another driver, which won't work.
3976 	 */
3977 	mapsize = pci_mapsize(testval);
3978 	count = 1UL << mapsize;
3979 	if (RF_ALIGNMENT(flags) < mapsize)
3980 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3981 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3982 		flags |= RF_PREFETCHABLE;
3983 
3984 	/*
3985 	 * Allocate enough resource, and then write back the
3986 	 * appropriate bar for that resource.
3987 	 */
3988 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3989 	    start, end, count, flags, -1);
3990 	if (res == NULL) {
3991 		device_printf(child,
3992 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3993 		    count, *rid, type, start, end);
3994 		goto out;
3995 	}
3996 	resource_list_add(rl, type, *rid, start, end, count, -1);
3997 	rle = resource_list_find(rl, type, *rid);
3998 	if (rle == NULL)
3999 		panic("pci_alloc_map: unexpectedly can't find resource.");
4000 	rle->res = res;
4001 	rle->start = rman_get_start(res);
4002 	rle->end = rman_get_end(res);
4003 	rle->count = count;
4004 	if (bootverbose)
4005 		device_printf(child,
4006 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4007 		    count, *rid, type, rman_get_start(res));
4008 	map = rman_get_start(res);
4009 out:;
4010 	pci_write_config(child, *rid, map, 4);
4011 	if (pci_maprange(testval) == 64)
4012 		pci_write_config(child, *rid + 4, map >> 32, 4);
4013 	return (res);
4014 }
4015 
4016 
4017 struct resource *
4018 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4019     u_long start, u_long end, u_long count, u_int flags, int cpuid)
4020 {
4021 	struct pci_devinfo *dinfo = device_get_ivars(child);
4022 	struct resource_list *rl = &dinfo->resources;
4023 	struct resource_list_entry *rle;
4024 	pcicfgregs *cfg = &dinfo->cfg;
4025 
4026 	/*
4027 	 * Perform lazy resource allocation
4028 	 */
4029 	if (device_get_parent(child) == dev) {
4030 		switch (type) {
4031 		case SYS_RES_IRQ:
4032 			/*
4033 			 * Can't alloc legacy interrupt once MSI messages
4034 			 * have been allocated.
4035 			 */
4036 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4037 			    cfg->msix.msix_alloc > 0))
4038 				return (NULL);
4039 			/*
4040 			 * If the child device doesn't have an
4041 			 * interrupt routed and is deserving of an
4042 			 * interrupt, try to assign it one.
4043 			 */
4044 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4045 			    (cfg->intpin != 0))
4046 				pci_assign_interrupt(dev, child, 0);
4047 			break;
4048 		case SYS_RES_IOPORT:
4049 		case SYS_RES_MEMORY:
4050 			if (*rid < PCIR_BAR(cfg->nummaps)) {
4051 				/*
4052 				 * Enable the I/O mode.  We should
4053 				 * also be assigning resources too
4054 				 * when none are present.  The
4055 				 * resource_list_alloc kind of sorta does
4056 				 * this...
4057 				 */
4058 				if (PCI_ENABLE_IO(dev, child, type))
4059 					return (NULL);
4060 			}
4061 			rle = resource_list_find(rl, type, *rid);
4062 			if (rle == NULL)
4063 				return (pci_alloc_map(dev, child, type, rid,
4064 				    start, end, count, flags));
4065 			break;
4066 		}
4067 		/*
4068 		 * If we've already allocated the resource, then
4069 		 * return it now.  But first we may need to activate
4070 		 * it, since we don't allocate the resource as active
4071 		 * above.  Normally this would be done down in the
4072 		 * nexus, but since we short-circuit that path we have
4073 		 * to do its job here.  Not sure if we should kfree the
4074 		 * resource if it fails to activate.
4075 		 */
4076 		rle = resource_list_find(rl, type, *rid);
4077 		if (rle != NULL && rle->res != NULL) {
4078 			if (bootverbose)
4079 				device_printf(child,
4080 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
4081 				    rman_get_size(rle->res), *rid, type,
4082 				    rman_get_start(rle->res));
4083 			if ((flags & RF_ACTIVE) &&
4084 			    bus_generic_activate_resource(dev, child, type,
4085 			    *rid, rle->res) != 0)
4086 				return (NULL);
4087 			return (rle->res);
4088 		}
4089 	}
4090 	return (resource_list_alloc(rl, dev, child, type, rid,
4091 	    start, end, count, flags, cpuid));
4092 }
4093 
4094 void
4095 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4096 {
4097 	struct pci_devinfo *dinfo;
4098 	struct resource_list *rl;
4099 	struct resource_list_entry *rle;
4100 
4101 	if (device_get_parent(child) != dev)
4102 		return;
4103 
4104 	dinfo = device_get_ivars(child);
4105 	rl = &dinfo->resources;
4106 	rle = resource_list_find(rl, type, rid);
4107 	if (rle) {
4108 		if (rle->res) {
4109 			if (rman_get_device(rle->res) != dev ||
4110 			    rman_get_flags(rle->res) & RF_ACTIVE) {
4111 				device_printf(dev, "delete_resource: "
4112 				    "Resource still owned by child, oops. "
4113 				    "(type=%d, rid=%d, addr=%lx)\n",
4114 				    rle->type, rle->rid,
4115 				    rman_get_start(rle->res));
4116 				return;
4117 			}
4118 			bus_release_resource(dev, type, rid, rle->res);
4119 		}
4120 		resource_list_delete(rl, type, rid);
4121 	}
4122 	/*
4123 	 * Why do we turn off the PCI configuration BAR when we delete a
4124 	 * resource? -- imp
4125 	 */
4126 	pci_write_config(child, rid, 0, 4);
4127 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
4128 }
4129 
4130 struct resource_list *
4131 pci_get_resource_list (device_t dev, device_t child)
4132 {
4133 	struct pci_devinfo *dinfo = device_get_ivars(child);
4134 
4135 	if (dinfo == NULL)
4136 		return (NULL);
4137 
4138 	return (&dinfo->resources);
4139 }
4140 
4141 uint32_t
4142 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4143 {
4144 	struct pci_devinfo *dinfo = device_get_ivars(child);
4145 	pcicfgregs *cfg = &dinfo->cfg;
4146 
4147 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4148 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4149 }
4150 
4151 void
4152 pci_write_config_method(device_t dev, device_t child, int reg,
4153     uint32_t val, int width)
4154 {
4155 	struct pci_devinfo *dinfo = device_get_ivars(child);
4156 	pcicfgregs *cfg = &dinfo->cfg;
4157 
4158 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4159 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4160 }
4161 
4162 int
4163 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4164     size_t buflen)
4165 {
4166 
4167 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4168 	    pci_get_function(child));
4169 	return (0);
4170 }
4171 
4172 int
4173 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4174     size_t buflen)
4175 {
4176 	struct pci_devinfo *dinfo;
4177 	pcicfgregs *cfg;
4178 
4179 	dinfo = device_get_ivars(child);
4180 	cfg = &dinfo->cfg;
4181 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4182 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4183 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4184 	    cfg->progif);
4185 	return (0);
4186 }
4187 
4188 int
4189 pci_assign_interrupt_method(device_t dev, device_t child)
4190 {
4191 	struct pci_devinfo *dinfo = device_get_ivars(child);
4192 	pcicfgregs *cfg = &dinfo->cfg;
4193 
4194 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4195 	    cfg->intpin));
4196 }
4197 
4198 static int
4199 pci_modevent(module_t mod, int what, void *arg)
4200 {
4201 	static struct cdev *pci_cdev;
4202 
4203 	switch (what) {
4204 	case MOD_LOAD:
4205 		STAILQ_INIT(&pci_devq);
4206 		pci_generation = 0;
4207 		pci_cdev = make_dev(&pci_ops, 0, UID_ROOT, GID_WHEEL, 0644,
4208 				    "pci");
4209 		pci_load_vendor_data();
4210 		break;
4211 
4212 	case MOD_UNLOAD:
4213 		destroy_dev(pci_cdev);
4214 		break;
4215 	}
4216 
4217 	return (0);
4218 }
4219 
4220 void
4221 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4222 {
4223 	int i;
4224 
4225 	/*
4226 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4227 	 * which we know need special treatment.  Type 2 devices are
4228 	 * cardbus bridges which also require special treatment.
4229 	 * Other types are unknown, and we err on the side of safety
4230 	 * by ignoring them.
4231 	 */
4232 	if (dinfo->cfg.hdrtype != 0)
4233 		return;
4234 
4235 	/*
4236 	 * Restore the device to full power mode.  We must do this
4237 	 * before we restore the registers because moving from D3 to
4238 	 * D0 will cause the chip's BARs and some other registers to
4239 	 * be reset to some unknown power on reset values.  Cut down
4240 	 * the noise on boot by doing nothing if we are already in
4241 	 * state D0.
4242 	 */
4243 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4244 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4245 	}
4246 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4247 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4248 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4249 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4250 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4251 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4252 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4253 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4254 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4255 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4256 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4257 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4258 
4259 	/* Restore MSI and MSI-X configurations if they are present. */
4260 	if (dinfo->cfg.msi.msi_location != 0)
4261 		pci_resume_msi(dev);
4262 	if (dinfo->cfg.msix.msix_location != 0)
4263 		pci_resume_msix(dev);
4264 }
4265 
4266 void
4267 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4268 {
4269 	int i;
4270 	uint32_t cls;
4271 	int ps;
4272 
4273 	/*
4274 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4275 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4276 	 * which also require special treatment.  Other types are unknown, and
4277 	 * we err on the side of safety by ignoring them.  Powering down
4278 	 * bridges should not be undertaken lightly.
4279 	 */
4280 	if (dinfo->cfg.hdrtype != 0)
4281 		return;
4282 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4283 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4284 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4285 
4286 	/*
4287 	 * Some drivers apparently write to these registers w/o updating our
4288 	 * cached copy.  No harm happens if we update the copy, so do so here
4289 	 * so we can restore them.  The COMMAND register is modified by the
4290 	 * bus w/o updating the cache.  This should represent the normally
4291 	 * writable portion of the 'defined' part of type 0 headers.  In
4292 	 * theory we also need to save/restore the PCI capability structures
4293 	 * we know about, but apart from power we don't know any that are
4294 	 * writable.
4295 	 */
4296 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4297 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4298 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4299 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4300 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4301 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4302 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4303 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4304 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4305 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4306 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4307 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4308 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4309 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4310 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4311 
4312 	/*
4313 	 * don't set the state for display devices, base peripherals and
4314 	 * memory devices since bad things happen when they are powered down.
4315 	 * We should (a) have drivers that can easily detach and (b) use
4316 	 * generic drivers for these devices so that some device actually
4317 	 * attaches.  We need to make sure that when we implement (a) we don't
4318 	 * power the device down on a reattach.
4319 	 */
4320 	cls = pci_get_class(dev);
4321 	if (!setstate)
4322 		return;
4323 	switch (pci_do_power_nodriver)
4324 	{
4325 		case 0:		/* NO powerdown at all */
4326 			return;
4327 		case 1:		/* Conservative about what to power down */
4328 			if (cls == PCIC_STORAGE)
4329 				return;
4330 			/*FALLTHROUGH*/
4331 		case 2:		/* Agressive about what to power down */
4332 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4333 			    cls == PCIC_BASEPERIPH)
4334 				return;
4335 			/*FALLTHROUGH*/
4336 		case 3:		/* Power down everything */
4337 			break;
4338 	}
4339 	/*
4340 	 * PCI spec says we can only go into D3 state from D0 state.
4341 	 * Transition from D[12] into D0 before going to D3 state.
4342 	 */
4343 	ps = pci_get_powerstate(dev);
4344 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4345 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4346 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4347 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4348 }
4349 
4350 int
4351 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4352 {
4353 	int rid, type;
4354 	u_int flags;
4355 
4356 	rid = 0;
4357 	type = PCI_INTR_TYPE_LEGACY;
4358 	flags = RF_SHAREABLE | RF_ACTIVE;
4359 
4360 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4361 	if (msi_enable) {
4362 		int cpu;
4363 
4364 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4365 		if (cpu >= ncpus)
4366 			cpu = ncpus - 1;
4367 
4368 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4369 			flags &= ~RF_SHAREABLE;
4370 			type = PCI_INTR_TYPE_MSI;
4371 		}
4372 	}
4373 
4374 	*rid0 = rid;
4375 	*flags0 = flags;
4376 
4377 	return type;
4378 }
4379 
4380 /* Wrapper APIs suitable for device driver use. */
4381 void
4382 pci_save_state(device_t dev)
4383 {
4384 	struct pci_devinfo *dinfo;
4385 
4386 	dinfo = device_get_ivars(dev);
4387 	pci_cfg_save(dev, dinfo, 0);
4388 }
4389 
4390 void
4391 pci_restore_state(device_t dev)
4392 {
4393 	struct pci_devinfo *dinfo;
4394 
4395 	dinfo = device_get_ivars(dev);
4396 	pci_cfg_restore(dev, dinfo);
4397 }
4398