xref: /dragonfly/sys/bus/pci/pci.c (revision 267c04fd)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 #include "opt_compat_oldpci.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 #include <sys/machintr.h>
46 
47 #include <machine/msi_machdep.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_extern.h>
52 
53 #include <sys/bus.h>
54 #include <sys/rman.h>
55 #include <sys/device.h>
56 
57 #include <sys/pciio.h>
58 #include <bus/pci/pcireg.h>
59 #include <bus/pci/pcivar.h>
60 #include <bus/pci/pci_private.h>
61 
62 #include <bus/u4b/controller/xhcireg.h>
63 #include <bus/u4b/controller/ehcireg.h>
64 #include <bus/u4b/controller/ohcireg.h>
65 #include <bus/u4b/controller/uhcireg.h>
66 
67 #include "pcib_if.h"
68 #include "pci_if.h"
69 
70 #ifdef __HAVE_ACPI
71 #include <contrib/dev/acpica/acpi.h>
72 #include "acpi_if.h"
73 #else
74 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
75 #endif
76 
77 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
78 
79 static uint32_t		pci_mapbase(unsigned mapreg);
80 static const char	*pci_maptype(unsigned mapreg);
81 static int		pci_mapsize(unsigned testval);
82 static int		pci_maprange(unsigned mapreg);
83 static void		pci_fixancient(pcicfgregs *cfg);
84 
85 static int		pci_porten(device_t pcib, int b, int s, int f);
86 static int		pci_memen(device_t pcib, int b, int s, int f);
87 static void		pci_assign_interrupt(device_t bus, device_t dev,
88 			    int force_route);
89 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
90 			    int b, int s, int f, int reg,
91 			    struct resource_list *rl, int force, int prefetch);
92 static int		pci_probe(device_t dev);
93 static int		pci_attach(device_t dev);
94 static void		pci_child_detached(device_t, device_t);
95 static void		pci_load_vendor_data(void);
96 static int		pci_describe_parse_line(char **ptr, int *vendor,
97 			    int *device, char **desc);
98 static char		*pci_describe_device(device_t dev);
99 static int		pci_modevent(module_t mod, int what, void *arg);
100 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
101 			    pcicfgregs *cfg);
102 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
103 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
104 			    int reg, uint32_t *data);
105 #if 0
106 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t data);
108 #endif
109 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
110 static void		pci_disable_msi(device_t dev);
111 static void		pci_enable_msi(device_t dev, uint64_t address,
112 			    uint16_t data);
113 static void		pci_setup_msix_vector(device_t dev, u_int index,
114 			    uint64_t address, uint32_t data);
115 static void		pci_mask_msix_vector(device_t dev, u_int index);
116 static void		pci_unmask_msix_vector(device_t dev, u_int index);
117 static void		pci_mask_msix_allvectors(device_t dev);
118 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
119 static int		pci_msi_blacklisted(void);
120 static void		pci_resume_msi(device_t dev);
121 static void		pci_resume_msix(device_t dev);
122 static int		pcie_slotimpl(const pcicfgregs *);
123 static void		pci_print_verbose_expr(const pcicfgregs *);
124 
125 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
128 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
129 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
130 static void		pci_read_cap_subvendor(device_t, int, int,
131 			    pcicfgregs *);
132 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
133 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
134 
135 static device_method_t pci_methods[] = {
136 	/* Device interface */
137 	DEVMETHOD(device_probe,		pci_probe),
138 	DEVMETHOD(device_attach,	pci_attach),
139 	DEVMETHOD(device_detach,	bus_generic_detach),
140 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
141 	DEVMETHOD(device_suspend,	pci_suspend),
142 	DEVMETHOD(device_resume,	pci_resume),
143 
144 	/* Bus interface */
145 	DEVMETHOD(bus_print_child,	pci_print_child),
146 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
147 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
148 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
149 	DEVMETHOD(bus_driver_added,	pci_driver_added),
150 	DEVMETHOD(bus_child_detached,	pci_child_detached),
151 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
152 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
153 
154 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
155 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
156 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
157 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
158 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
159 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
160 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
161 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
162 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
163 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
164 
165 	/* PCI interface */
166 	DEVMETHOD(pci_read_config,	pci_read_config_method),
167 	DEVMETHOD(pci_write_config,	pci_write_config_method),
168 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
169 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
170 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
171 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
172 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
173 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
174 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
175 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
176 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
177 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
178 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
179 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
180 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
181 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
182 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
183 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
184 
185 	DEVMETHOD_END
186 };
187 
188 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
189 
190 static devclass_t pci_devclass;
191 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
192 MODULE_VERSION(pci, 1);
193 
194 static char	*pci_vendordata;
195 static size_t	pci_vendordata_size;
196 
197 
198 static const struct pci_read_cap {
199 	int		cap;
200 	pci_read_cap_t	read_cap;
201 } pci_read_caps[] = {
202 	{ PCIY_PMG,		pci_read_cap_pmgt },
203 	{ PCIY_HT,		pci_read_cap_ht },
204 	{ PCIY_MSI,		pci_read_cap_msi },
205 	{ PCIY_MSIX,		pci_read_cap_msix },
206 	{ PCIY_VPD,		pci_read_cap_vpd },
207 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
208 	{ PCIY_PCIX,		pci_read_cap_pcix },
209 	{ PCIY_EXPRESS,		pci_read_cap_express },
210 	{ 0, NULL } /* required last entry */
211 };
212 
213 struct pci_quirk {
214 	uint32_t devid;	/* Vendor/device of the card */
215 	int	type;
216 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
217 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
218 	int	arg1;
219 	int	arg2;
220 };
221 
222 struct pci_quirk pci_quirks[] = {
223 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
224 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
225 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
226 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
227 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
228 
229 	/*
230 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
231 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
232 	 */
233 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 
236 	/*
237 	 * MSI doesn't work on earlier Intel chipsets including
238 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
239 	 */
240 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 
248 	/*
249 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
250 	 * bridge.
251 	 */
252 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 
254 	{ 0 }
255 };
256 
257 /* map register information */
258 #define	PCI_MAPMEM	0x01	/* memory map */
259 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
260 #define	PCI_MAPPORT	0x04	/* port map */
261 
262 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
263 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
264 
265 struct devlist pci_devq;
266 uint32_t pci_generation;
267 uint32_t pci_numdevs = 0;
268 static int pcie_chipset, pcix_chipset;
269 
270 /* sysctl vars */
271 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
272 
273 static int pci_enable_io_modes = 1;
274 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
275 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
276     &pci_enable_io_modes, 1,
277     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
278 enable these bits correctly.  We'd like to do this all the time, but there\n\
279 are some peripherals that this causes problems with.");
280 
281 static int pci_do_power_nodriver = 0;
282 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
283 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
284     &pci_do_power_nodriver, 0,
285   "Place a function into D3 state when no driver attaches to it.  0 means\n\
286 disable.  1 means conservatively place devices into D3 state.  2 means\n\
287 aggressively place devices into D3 state.  3 means put absolutely everything\n\
288 in D3 state.");
289 
290 static int pci_do_power_resume = 1;
291 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
292 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
293     &pci_do_power_resume, 1,
294   "Transition from D3 -> D0 on resume.");
295 
296 static int pci_do_msi = 1;
297 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
298 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
299     "Enable support for MSI interrupts");
300 
301 static int pci_do_msix = 1;
302 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
303 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
304     "Enable support for MSI-X interrupts");
305 
306 static int pci_honor_msi_blacklist = 1;
307 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
308 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
309     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
310 
311 #if defined(__x86_64__)
312 static int pci_usb_takeover = 1;
313 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
314 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD,
315     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
316 Disable this if you depend on BIOS emulation of USB devices, that is\n\
317 you use USB devices (like keyboard or mouse) but do not load USB drivers");
318 #endif
319 
320 static int pci_msi_cpuid;
321 
322 /* Find a device_t by bus/slot/function in domain 0 */
323 
324 device_t
325 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
326 {
327 
328 	return (pci_find_dbsf(0, bus, slot, func));
329 }
330 
331 /* Find a device_t by domain/bus/slot/function */
332 
333 device_t
334 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
335 {
336 	struct pci_devinfo *dinfo;
337 
338 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
339 		if ((dinfo->cfg.domain == domain) &&
340 		    (dinfo->cfg.bus == bus) &&
341 		    (dinfo->cfg.slot == slot) &&
342 		    (dinfo->cfg.func == func)) {
343 			return (dinfo->cfg.dev);
344 		}
345 	}
346 
347 	return (NULL);
348 }
349 
350 /* Find a device_t by vendor/device ID */
351 
352 device_t
353 pci_find_device(uint16_t vendor, uint16_t device)
354 {
355 	struct pci_devinfo *dinfo;
356 
357 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
358 		if ((dinfo->cfg.vendor == vendor) &&
359 		    (dinfo->cfg.device == device)) {
360 			return (dinfo->cfg.dev);
361 		}
362 	}
363 
364 	return (NULL);
365 }
366 
367 device_t
368 pci_find_class(uint8_t class, uint8_t subclass)
369 {
370 	struct pci_devinfo *dinfo;
371 
372 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
373 		if (dinfo->cfg.baseclass == class &&
374 		    dinfo->cfg.subclass == subclass) {
375 			return (dinfo->cfg.dev);
376 		}
377 	}
378 
379 	return (NULL);
380 }
381 
382 /* return base address of memory or port map */
383 
384 static uint32_t
385 pci_mapbase(uint32_t mapreg)
386 {
387 
388 	if (PCI_BAR_MEM(mapreg))
389 		return (mapreg & PCIM_BAR_MEM_BASE);
390 	else
391 		return (mapreg & PCIM_BAR_IO_BASE);
392 }
393 
394 /* return map type of memory or port map */
395 
396 static const char *
397 pci_maptype(unsigned mapreg)
398 {
399 
400 	if (PCI_BAR_IO(mapreg))
401 		return ("I/O Port");
402 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
403 		return ("Prefetchable Memory");
404 	return ("Memory");
405 }
406 
407 /* return log2 of map size decoded for memory or port map */
408 
409 static int
410 pci_mapsize(uint32_t testval)
411 {
412 	int ln2size;
413 
414 	testval = pci_mapbase(testval);
415 	ln2size = 0;
416 	if (testval != 0) {
417 		while ((testval & 1) == 0)
418 		{
419 			ln2size++;
420 			testval >>= 1;
421 		}
422 	}
423 	return (ln2size);
424 }
425 
426 /* return log2 of address range supported by map register */
427 
428 static int
429 pci_maprange(unsigned mapreg)
430 {
431 	int ln2range = 0;
432 
433 	if (PCI_BAR_IO(mapreg))
434 		ln2range = 32;
435 	else
436 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
437 		case PCIM_BAR_MEM_32:
438 			ln2range = 32;
439 			break;
440 		case PCIM_BAR_MEM_1MB:
441 			ln2range = 20;
442 			break;
443 		case PCIM_BAR_MEM_64:
444 			ln2range = 64;
445 			break;
446 		}
447 	return (ln2range);
448 }
449 
450 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
451 
452 static void
453 pci_fixancient(pcicfgregs *cfg)
454 {
455 	if (cfg->hdrtype != 0)
456 		return;
457 
458 	/* PCI to PCI bridges use header type 1 */
459 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
460 		cfg->hdrtype = 1;
461 }
462 
463 /* extract header type specific config data */
464 
465 static void
466 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
467 {
468 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
469 	switch (cfg->hdrtype) {
470 	case 0:
471 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
472 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
473 		cfg->nummaps	    = PCI_MAXMAPS_0;
474 		break;
475 	case 1:
476 		cfg->nummaps	    = PCI_MAXMAPS_1;
477 #ifdef COMPAT_OLDPCI
478 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
479 #endif
480 		break;
481 	case 2:
482 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
483 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
484 		cfg->nummaps	    = PCI_MAXMAPS_2;
485 #ifdef COMPAT_OLDPCI
486 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
487 #endif
488 		break;
489 	}
490 #undef REG
491 }
492 
493 /* read configuration header into pcicfgregs structure */
494 struct pci_devinfo *
495 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
496 {
497 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
498 	pcicfgregs *cfg = NULL;
499 	struct pci_devinfo *devlist_entry;
500 	struct devlist *devlist_head;
501 
502 	devlist_head = &pci_devq;
503 
504 	devlist_entry = NULL;
505 
506 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
507 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
508 
509 		cfg = &devlist_entry->cfg;
510 
511 		cfg->domain		= d;
512 		cfg->bus		= b;
513 		cfg->slot		= s;
514 		cfg->func		= f;
515 		cfg->vendor		= REG(PCIR_VENDOR, 2);
516 		cfg->device		= REG(PCIR_DEVICE, 2);
517 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
518 		cfg->statreg		= REG(PCIR_STATUS, 2);
519 		cfg->baseclass		= REG(PCIR_CLASS, 1);
520 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
521 		cfg->progif		= REG(PCIR_PROGIF, 1);
522 		cfg->revid		= REG(PCIR_REVID, 1);
523 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
524 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
525 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
526 		cfg->intpin		= REG(PCIR_INTPIN, 1);
527 		cfg->intline		= REG(PCIR_INTLINE, 1);
528 
529 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
530 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
531 
532 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
533 		cfg->hdrtype		&= ~PCIM_MFDEV;
534 
535 		pci_fixancient(cfg);
536 		pci_hdrtypedata(pcib, b, s, f, cfg);
537 
538 		pci_read_capabilities(pcib, cfg);
539 
540 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
541 
542 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
543 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
544 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
545 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
546 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
547 
548 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
549 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
550 		devlist_entry->conf.pc_vendor = cfg->vendor;
551 		devlist_entry->conf.pc_device = cfg->device;
552 
553 		devlist_entry->conf.pc_class = cfg->baseclass;
554 		devlist_entry->conf.pc_subclass = cfg->subclass;
555 		devlist_entry->conf.pc_progif = cfg->progif;
556 		devlist_entry->conf.pc_revid = cfg->revid;
557 
558 		pci_numdevs++;
559 		pci_generation++;
560 	}
561 	return (devlist_entry);
562 #undef REG
563 }
564 
565 static int
566 pci_fixup_nextptr(int *nextptr0)
567 {
568 	int nextptr = *nextptr0;
569 
570 	/* "Next pointer" is only one byte */
571 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
572 
573 	if (nextptr & 0x3) {
574 		/*
575 		 * PCI local bus spec 3.0:
576 		 *
577 		 * "... The bottom two bits of all pointers are reserved
578 		 *  and must be implemented as 00b although software must
579 		 *  mask them to allow for future uses of these bits ..."
580 		 */
581 		if (bootverbose) {
582 			kprintf("Illegal PCI extended capability "
583 				"offset, fixup 0x%02x -> 0x%02x\n",
584 				nextptr, nextptr & ~0x3);
585 		}
586 		nextptr &= ~0x3;
587 	}
588 	*nextptr0 = nextptr;
589 
590 	if (nextptr < 0x40) {
591 		if (nextptr != 0) {
592 			kprintf("Illegal PCI extended capability "
593 				"offset 0x%02x", nextptr);
594 		}
595 		return 0;
596 	}
597 	return 1;
598 }
599 
600 static void
601 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
602 {
603 #define REG(n, w)	\
604 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
605 
606 	struct pcicfg_pp *pp = &cfg->pp;
607 
608 	if (pp->pp_cap)
609 		return;
610 
611 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
612 	pp->pp_status = ptr + PCIR_POWER_STATUS;
613 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
614 
615 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
616 		/*
617 		 * XXX
618 		 * We should write to data_select and read back from
619 		 * data_scale to determine whether data register is
620 		 * implemented.
621 		 */
622 #ifdef foo
623 		pp->pp_data = ptr + PCIR_POWER_DATA;
624 #else
625 		pp->pp_data = 0;
626 #endif
627 	}
628 
629 #undef REG
630 }
631 
632 static void
633 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
634 {
635 #if defined(__x86_64__)
636 
637 #define REG(n, w)	\
638 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
639 
640 	struct pcicfg_ht *ht = &cfg->ht;
641 	uint64_t addr;
642 	uint32_t val;
643 
644 	/* Determine HT-specific capability type. */
645 	val = REG(ptr + PCIR_HT_COMMAND, 2);
646 
647 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
648 		cfg->ht.ht_slave = ptr;
649 
650 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
651 		return;
652 
653 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
654 		/* Sanity check the mapping window. */
655 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
656 		addr <<= 32;
657 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
658 		if (addr != MSI_X86_ADDR_BASE) {
659 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
660 				"has non-default MSI window 0x%llx\n",
661 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
662 				(long long)addr);
663 		}
664 	} else {
665 		addr = MSI_X86_ADDR_BASE;
666 	}
667 
668 	ht->ht_msimap = ptr;
669 	ht->ht_msictrl = val;
670 	ht->ht_msiaddr = addr;
671 
672 #undef REG
673 
674 #endif	/* __x86_64__ */
675 }
676 
677 static void
678 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
679 {
680 #define REG(n, w)	\
681 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
682 
683 	struct pcicfg_msi *msi = &cfg->msi;
684 
685 	msi->msi_location = ptr;
686 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
687 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
688 
689 #undef REG
690 }
691 
692 static void
693 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
694 {
695 #define REG(n, w)	\
696 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
697 
698 	struct pcicfg_msix *msix = &cfg->msix;
699 	uint32_t val;
700 
701 	msix->msix_location = ptr;
702 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
703 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
704 
705 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
706 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
707 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
708 
709 	val = REG(ptr + PCIR_MSIX_PBA, 4);
710 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
711 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
712 
713 	TAILQ_INIT(&msix->msix_vectors);
714 
715 #undef REG
716 }
717 
718 static void
719 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
720 {
721 	cfg->vpd.vpd_reg = ptr;
722 }
723 
724 static void
725 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
726 {
727 #define REG(n, w)	\
728 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
729 
730 	/* Should always be true. */
731 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
732 		uint32_t val;
733 
734 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
735 		cfg->subvendor = val & 0xffff;
736 		cfg->subdevice = val >> 16;
737 	}
738 
739 #undef REG
740 }
741 
742 static void
743 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
744 {
745 	/*
746 	 * Assume we have a PCI-X chipset if we have
747 	 * at least one PCI-PCI bridge with a PCI-X
748 	 * capability.  Note that some systems with
749 	 * PCI-express or HT chipsets might match on
750 	 * this check as well.
751 	 */
752 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
753 		pcix_chipset = 1;
754 
755 	cfg->pcix.pcix_ptr = ptr;
756 }
757 
758 static int
759 pcie_slotimpl(const pcicfgregs *cfg)
760 {
761 	const struct pcicfg_expr *expr = &cfg->expr;
762 	uint16_t port_type;
763 
764 	/*
765 	 * - Slot implemented bit is meaningful iff current port is
766 	 *   root port or down stream port.
767 	 * - Testing for root port or down stream port is meanningful
768 	 *   iff PCI configure has type 1 header.
769 	 */
770 
771 	if (cfg->hdrtype != 1)
772 		return 0;
773 
774 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
775 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
776 		return 0;
777 
778 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
779 		return 0;
780 
781 	return 1;
782 }
783 
784 static void
785 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
786 {
787 #define REG(n, w)	\
788 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
789 
790 	struct pcicfg_expr *expr = &cfg->expr;
791 
792 	/*
793 	 * Assume we have a PCI-express chipset if we have
794 	 * at least one PCI-express device.
795 	 */
796 	pcie_chipset = 1;
797 
798 	expr->expr_ptr = ptr;
799 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
800 
801 	/*
802 	 * Read slot capabilities.  Slot capabilities exists iff
803 	 * current port's slot is implemented
804 	 */
805 	if (pcie_slotimpl(cfg))
806 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
807 
808 #undef REG
809 }
810 
811 static void
812 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
813 {
814 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
815 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
816 
817 	uint32_t val;
818 	int nextptr, ptrptr;
819 
820 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
821 		/* No capabilities */
822 		return;
823 	}
824 
825 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
826 	case 0:
827 	case 1:
828 		ptrptr = PCIR_CAP_PTR;
829 		break;
830 	case 2:
831 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
832 		break;
833 	default:
834 		return;				/* no capabilities support */
835 	}
836 	nextptr = REG(ptrptr, 1);	/* sanity check? */
837 
838 	/*
839 	 * Read capability entries.
840 	 */
841 	while (pci_fixup_nextptr(&nextptr)) {
842 		const struct pci_read_cap *rc;
843 		int ptr = nextptr;
844 
845 		/* Find the next entry */
846 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
847 
848 		/* Process this entry */
849 		val = REG(ptr + PCICAP_ID, 1);
850 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
851 			if (rc->cap == val) {
852 				rc->read_cap(pcib, ptr, nextptr, cfg);
853 				break;
854 			}
855 		}
856 	}
857 
858 #if defined(__x86_64__)
859 	/*
860 	 * Enable the MSI mapping window for all HyperTransport
861 	 * slaves.  PCI-PCI bridges have their windows enabled via
862 	 * PCIB_MAP_MSI().
863 	 */
864 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
865 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
866 		device_printf(pcib,
867 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
868 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
869 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
870 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
871 		     2);
872 	}
873 #endif
874 
875 /* REG and WREG use carry through to next functions */
876 }
877 
878 /*
879  * PCI Vital Product Data
880  */
881 
882 #define	PCI_VPD_TIMEOUT		1000000
883 
884 static int
885 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
886 {
887 	int count = PCI_VPD_TIMEOUT;
888 
889 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
890 
891 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
892 
893 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
894 		if (--count < 0)
895 			return (ENXIO);
896 		DELAY(1);	/* limit looping */
897 	}
898 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
899 
900 	return (0);
901 }
902 
903 #if 0
904 static int
905 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
906 {
907 	int count = PCI_VPD_TIMEOUT;
908 
909 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
910 
911 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
912 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
913 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
914 		if (--count < 0)
915 			return (ENXIO);
916 		DELAY(1);	/* limit looping */
917 	}
918 
919 	return (0);
920 }
921 #endif
922 
923 #undef PCI_VPD_TIMEOUT
924 
925 struct vpd_readstate {
926 	device_t	pcib;
927 	pcicfgregs	*cfg;
928 	uint32_t	val;
929 	int		bytesinval;
930 	int		off;
931 	uint8_t		cksum;
932 };
933 
934 static int
935 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
936 {
937 	uint32_t reg;
938 	uint8_t byte;
939 
940 	if (vrs->bytesinval == 0) {
941 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
942 			return (ENXIO);
943 		vrs->val = le32toh(reg);
944 		vrs->off += 4;
945 		byte = vrs->val & 0xff;
946 		vrs->bytesinval = 3;
947 	} else {
948 		vrs->val = vrs->val >> 8;
949 		byte = vrs->val & 0xff;
950 		vrs->bytesinval--;
951 	}
952 
953 	vrs->cksum += byte;
954 	*data = byte;
955 	return (0);
956 }
957 
958 int
959 pcie_slot_implemented(device_t dev)
960 {
961 	struct pci_devinfo *dinfo = device_get_ivars(dev);
962 
963 	return pcie_slotimpl(&dinfo->cfg);
964 }
965 
966 void
967 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
968 {
969 	uint8_t expr_ptr;
970 	uint16_t val;
971 
972 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
973 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
974 		panic("%s: invalid max read request size 0x%02x",
975 		      device_get_nameunit(dev), rqsize);
976 	}
977 
978 	expr_ptr = pci_get_pciecap_ptr(dev);
979 	if (!expr_ptr)
980 		panic("%s: not PCIe device", device_get_nameunit(dev));
981 
982 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
983 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
984 		if (bootverbose)
985 			device_printf(dev, "adjust device control 0x%04x", val);
986 
987 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
988 		val |= rqsize;
989 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
990 
991 		if (bootverbose)
992 			kprintf(" -> 0x%04x\n", val);
993 	}
994 }
995 
996 uint16_t
997 pcie_get_max_readrq(device_t dev)
998 {
999 	uint8_t expr_ptr;
1000 	uint16_t val;
1001 
1002 	expr_ptr = pci_get_pciecap_ptr(dev);
1003 	if (!expr_ptr)
1004 		panic("%s: not PCIe device", device_get_nameunit(dev));
1005 
1006 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
1007 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
1008 }
1009 
1010 static void
1011 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1012 {
1013 	struct vpd_readstate vrs;
1014 	int state;
1015 	int name;
1016 	int remain;
1017 	int i;
1018 	int alloc, off;		/* alloc/off for RO/W arrays */
1019 	int cksumvalid;
1020 	int dflen;
1021 	uint8_t byte;
1022 	uint8_t byte2;
1023 
1024 	/* init vpd reader */
1025 	vrs.bytesinval = 0;
1026 	vrs.off = 0;
1027 	vrs.pcib = pcib;
1028 	vrs.cfg = cfg;
1029 	vrs.cksum = 0;
1030 
1031 	state = 0;
1032 	name = remain = i = 0;	/* shut up stupid gcc */
1033 	alloc = off = 0;	/* shut up stupid gcc */
1034 	dflen = 0;		/* shut up stupid gcc */
1035 	cksumvalid = -1;
1036 	while (state >= 0) {
1037 		if (vpd_nextbyte(&vrs, &byte)) {
1038 			state = -2;
1039 			break;
1040 		}
1041 #if 0
1042 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1043 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1044 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1045 #endif
1046 		switch (state) {
1047 		case 0:		/* item name */
1048 			if (byte & 0x80) {
1049 				if (vpd_nextbyte(&vrs, &byte2)) {
1050 					state = -2;
1051 					break;
1052 				}
1053 				remain = byte2;
1054 				if (vpd_nextbyte(&vrs, &byte2)) {
1055 					state = -2;
1056 					break;
1057 				}
1058 				remain |= byte2 << 8;
1059 				if (remain > (0x7f*4 - vrs.off)) {
1060 					state = -1;
1061 					kprintf(
1062 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1063 					    cfg->domain, cfg->bus, cfg->slot,
1064 					    cfg->func, remain);
1065 				}
1066 				name = byte & 0x7f;
1067 			} else {
1068 				remain = byte & 0x7;
1069 				name = (byte >> 3) & 0xf;
1070 			}
1071 			switch (name) {
1072 			case 0x2:	/* String */
1073 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1074 				    M_DEVBUF, M_WAITOK);
1075 				i = 0;
1076 				state = 1;
1077 				break;
1078 			case 0xf:	/* End */
1079 				state = -1;
1080 				break;
1081 			case 0x10:	/* VPD-R */
1082 				alloc = 8;
1083 				off = 0;
1084 				cfg->vpd.vpd_ros = kmalloc(alloc *
1085 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1086 				    M_WAITOK | M_ZERO);
1087 				state = 2;
1088 				break;
1089 			case 0x11:	/* VPD-W */
1090 				alloc = 8;
1091 				off = 0;
1092 				cfg->vpd.vpd_w = kmalloc(alloc *
1093 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1094 				    M_WAITOK | M_ZERO);
1095 				state = 5;
1096 				break;
1097 			default:	/* Invalid data, abort */
1098 				state = -1;
1099 				break;
1100 			}
1101 			break;
1102 
1103 		case 1:	/* Identifier String */
1104 			cfg->vpd.vpd_ident[i++] = byte;
1105 			remain--;
1106 			if (remain == 0)  {
1107 				cfg->vpd.vpd_ident[i] = '\0';
1108 				state = 0;
1109 			}
1110 			break;
1111 
1112 		case 2:	/* VPD-R Keyword Header */
1113 			if (off == alloc) {
1114 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1115 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1116 				    M_DEVBUF, M_WAITOK | M_ZERO);
1117 			}
1118 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1119 			if (vpd_nextbyte(&vrs, &byte2)) {
1120 				state = -2;
1121 				break;
1122 			}
1123 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1124 			if (vpd_nextbyte(&vrs, &byte2)) {
1125 				state = -2;
1126 				break;
1127 			}
1128 			dflen = byte2;
1129 			if (dflen == 0 &&
1130 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1131 			    2) == 0) {
1132 				/*
1133 				 * if this happens, we can't trust the rest
1134 				 * of the VPD.
1135 				 */
1136 				kprintf(
1137 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1138 				    cfg->domain, cfg->bus, cfg->slot,
1139 				    cfg->func, dflen);
1140 				cksumvalid = 0;
1141 				state = -1;
1142 				break;
1143 			} else if (dflen == 0) {
1144 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1145 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1146 				    M_DEVBUF, M_WAITOK);
1147 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1148 			} else
1149 				cfg->vpd.vpd_ros[off].value = kmalloc(
1150 				    (dflen + 1) *
1151 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1152 				    M_DEVBUF, M_WAITOK);
1153 			remain -= 3;
1154 			i = 0;
1155 			/* keep in sync w/ state 3's transistions */
1156 			if (dflen == 0 && remain == 0)
1157 				state = 0;
1158 			else if (dflen == 0)
1159 				state = 2;
1160 			else
1161 				state = 3;
1162 			break;
1163 
1164 		case 3:	/* VPD-R Keyword Value */
1165 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1166 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1167 			    "RV", 2) == 0 && cksumvalid == -1) {
1168 				if (vrs.cksum == 0)
1169 					cksumvalid = 1;
1170 				else {
1171 					if (bootverbose)
1172 						kprintf(
1173 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1174 						    cfg->domain, cfg->bus,
1175 						    cfg->slot, cfg->func,
1176 						    vrs.cksum);
1177 					cksumvalid = 0;
1178 					state = -1;
1179 					break;
1180 				}
1181 			}
1182 			dflen--;
1183 			remain--;
1184 			/* keep in sync w/ state 2's transistions */
1185 			if (dflen == 0)
1186 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1187 			if (dflen == 0 && remain == 0) {
1188 				cfg->vpd.vpd_rocnt = off;
1189 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1190 				    off * sizeof(*cfg->vpd.vpd_ros),
1191 				    M_DEVBUF, M_WAITOK | M_ZERO);
1192 				state = 0;
1193 			} else if (dflen == 0)
1194 				state = 2;
1195 			break;
1196 
1197 		case 4:
1198 			remain--;
1199 			if (remain == 0)
1200 				state = 0;
1201 			break;
1202 
1203 		case 5:	/* VPD-W Keyword Header */
1204 			if (off == alloc) {
1205 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1206 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1207 				    M_DEVBUF, M_WAITOK | M_ZERO);
1208 			}
1209 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1210 			if (vpd_nextbyte(&vrs, &byte2)) {
1211 				state = -2;
1212 				break;
1213 			}
1214 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1215 			if (vpd_nextbyte(&vrs, &byte2)) {
1216 				state = -2;
1217 				break;
1218 			}
1219 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1220 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1221 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1222 			    sizeof(*cfg->vpd.vpd_w[off].value),
1223 			    M_DEVBUF, M_WAITOK);
1224 			remain -= 3;
1225 			i = 0;
1226 			/* keep in sync w/ state 6's transistions */
1227 			if (dflen == 0 && remain == 0)
1228 				state = 0;
1229 			else if (dflen == 0)
1230 				state = 5;
1231 			else
1232 				state = 6;
1233 			break;
1234 
1235 		case 6:	/* VPD-W Keyword Value */
1236 			cfg->vpd.vpd_w[off].value[i++] = byte;
1237 			dflen--;
1238 			remain--;
1239 			/* keep in sync w/ state 5's transistions */
1240 			if (dflen == 0)
1241 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1242 			if (dflen == 0 && remain == 0) {
1243 				cfg->vpd.vpd_wcnt = off;
1244 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1245 				    off * sizeof(*cfg->vpd.vpd_w),
1246 				    M_DEVBUF, M_WAITOK | M_ZERO);
1247 				state = 0;
1248 			} else if (dflen == 0)
1249 				state = 5;
1250 			break;
1251 
1252 		default:
1253 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1254 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1255 			    state);
1256 			state = -1;
1257 			break;
1258 		}
1259 	}
1260 
1261 	if (cksumvalid == 0 || state < -1) {
1262 		/* read-only data bad, clean up */
1263 		if (cfg->vpd.vpd_ros != NULL) {
1264 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1265 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1266 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1267 			cfg->vpd.vpd_ros = NULL;
1268 		}
1269 	}
1270 	if (state < -1) {
1271 		/* I/O error, clean up */
1272 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1273 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1274 		if (cfg->vpd.vpd_ident != NULL) {
1275 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1276 			cfg->vpd.vpd_ident = NULL;
1277 		}
1278 		if (cfg->vpd.vpd_w != NULL) {
1279 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1280 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1281 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1282 			cfg->vpd.vpd_w = NULL;
1283 		}
1284 	}
1285 	cfg->vpd.vpd_cached = 1;
1286 #undef REG
1287 #undef WREG
1288 }
1289 
1290 int
1291 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1292 {
1293 	struct pci_devinfo *dinfo = device_get_ivars(child);
1294 	pcicfgregs *cfg = &dinfo->cfg;
1295 
1296 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1297 		pci_read_vpd(device_get_parent(dev), cfg);
1298 
1299 	*identptr = cfg->vpd.vpd_ident;
1300 
1301 	if (*identptr == NULL)
1302 		return (ENXIO);
1303 
1304 	return (0);
1305 }
1306 
1307 int
1308 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1309 	const char **vptr)
1310 {
1311 	struct pci_devinfo *dinfo = device_get_ivars(child);
1312 	pcicfgregs *cfg = &dinfo->cfg;
1313 	int i;
1314 
1315 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1316 		pci_read_vpd(device_get_parent(dev), cfg);
1317 
1318 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1319 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1320 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1321 			*vptr = cfg->vpd.vpd_ros[i].value;
1322 		}
1323 
1324 	if (i != cfg->vpd.vpd_rocnt)
1325 		return (0);
1326 
1327 	*vptr = NULL;
1328 	return (ENXIO);
1329 }
1330 
1331 /*
1332  * Return the offset in configuration space of the requested extended
1333  * capability entry or 0 if the specified capability was not found.
1334  */
1335 int
1336 pci_find_extcap_method(device_t dev, device_t child, int capability,
1337     int *capreg)
1338 {
1339 	struct pci_devinfo *dinfo = device_get_ivars(child);
1340 	pcicfgregs *cfg = &dinfo->cfg;
1341 	u_int32_t status;
1342 	u_int8_t ptr;
1343 
1344 	/*
1345 	 * Check the CAP_LIST bit of the PCI status register first.
1346 	 */
1347 	status = pci_read_config(child, PCIR_STATUS, 2);
1348 	if (!(status & PCIM_STATUS_CAPPRESENT))
1349 		return (ENXIO);
1350 
1351 	/*
1352 	 * Determine the start pointer of the capabilities list.
1353 	 */
1354 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1355 	case 0:
1356 	case 1:
1357 		ptr = PCIR_CAP_PTR;
1358 		break;
1359 	case 2:
1360 		ptr = PCIR_CAP_PTR_2;
1361 		break;
1362 	default:
1363 		/* XXX: panic? */
1364 		return (ENXIO);		/* no extended capabilities support */
1365 	}
1366 	ptr = pci_read_config(child, ptr, 1);
1367 
1368 	/*
1369 	 * Traverse the capabilities list.
1370 	 */
1371 	while (ptr != 0) {
1372 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1373 			if (capreg != NULL)
1374 				*capreg = ptr;
1375 			return (0);
1376 		}
1377 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1378 	}
1379 
1380 	return (ENOENT);
1381 }
1382 
1383 /*
1384  * Support for MSI-X message interrupts.
1385  */
1386 static void
1387 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1388     uint32_t data)
1389 {
1390 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1391 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1392 	uint32_t offset;
1393 
1394 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1395 	offset = msix->msix_table_offset + index * 16;
1396 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1397 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1398 	bus_write_4(msix->msix_table_res, offset + 8, data);
1399 
1400 	/* Enable MSI -> HT mapping. */
1401 	pci_ht_map_msi(dev, address);
1402 }
1403 
1404 static void
1405 pci_mask_msix_vector(device_t dev, u_int index)
1406 {
1407 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1408 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1409 	uint32_t offset, val;
1410 
1411 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1412 	offset = msix->msix_table_offset + index * 16 + 12;
1413 	val = bus_read_4(msix->msix_table_res, offset);
1414 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1415 		val |= PCIM_MSIX_VCTRL_MASK;
1416 		bus_write_4(msix->msix_table_res, offset, val);
1417 	}
1418 }
1419 
1420 static void
1421 pci_unmask_msix_vector(device_t dev, u_int index)
1422 {
1423 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1424 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1425 	uint32_t offset, val;
1426 
1427 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1428 	offset = msix->msix_table_offset + index * 16 + 12;
1429 	val = bus_read_4(msix->msix_table_res, offset);
1430 	if (val & PCIM_MSIX_VCTRL_MASK) {
1431 		val &= ~PCIM_MSIX_VCTRL_MASK;
1432 		bus_write_4(msix->msix_table_res, offset, val);
1433 	}
1434 }
1435 
1436 int
1437 pci_pending_msix_vector(device_t dev, u_int index)
1438 {
1439 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1440 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1441 	uint32_t offset, bit;
1442 
1443 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1444 	    ("MSI-X is not setup yet"));
1445 
1446 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1447 	offset = msix->msix_pba_offset + (index / 32) * 4;
1448 	bit = 1 << index % 32;
1449 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1450 }
1451 
1452 /*
1453  * Restore MSI-X registers and table during resume.  If MSI-X is
1454  * enabled then walk the virtual table to restore the actual MSI-X
1455  * table.
1456  */
1457 static void
1458 pci_resume_msix(device_t dev)
1459 {
1460 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1461 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1462 
1463 	if (msix->msix_table_res != NULL) {
1464 		const struct msix_vector *mv;
1465 
1466 		pci_mask_msix_allvectors(dev);
1467 
1468 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1469 			u_int vector;
1470 
1471 			if (mv->mv_address == 0)
1472 				continue;
1473 
1474 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1475 			pci_setup_msix_vector(dev, vector,
1476 			    mv->mv_address, mv->mv_data);
1477 			pci_unmask_msix_vector(dev, vector);
1478 		}
1479 	}
1480 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1481 	    msix->msix_ctrl, 2);
1482 }
1483 
1484 /*
1485  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1486  *
1487  * After this function returns, the MSI-X's rid will be saved in rid0.
1488  */
1489 int
1490 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1491     int *rid0, int cpuid)
1492 {
1493 	struct pci_devinfo *dinfo = device_get_ivars(child);
1494 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1495 	struct msix_vector *mv;
1496 	struct resource_list_entry *rle;
1497 	int error, irq, rid;
1498 
1499 	KASSERT(msix->msix_table_res != NULL &&
1500 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1501 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1502 	KASSERT(vector < msix->msix_msgnum,
1503 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1504 
1505 	if (bootverbose) {
1506 		device_printf(child,
1507 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1508 		    vector, msix->msix_msgnum);
1509 	}
1510 
1511 	/* Set rid according to vector number */
1512 	rid = PCI_MSIX_VEC2RID(vector);
1513 
1514 	/* Vector has already been allocated */
1515 	mv = pci_find_msix_vector(child, rid);
1516 	if (mv != NULL)
1517 		return EBUSY;
1518 
1519 	/* Allocate a message. */
1520 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1521 	if (error)
1522 		return error;
1523 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1524 	    irq, irq, 1, cpuid);
1525 
1526 	if (bootverbose) {
1527 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1528 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1529 		    rle->start, cpuid);
1530 	}
1531 
1532 	/* Update counts of alloc'd messages. */
1533 	msix->msix_alloc++;
1534 
1535 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1536 	mv->mv_rid = rid;
1537 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1538 
1539 	*rid0 = rid;
1540 	return 0;
1541 }
1542 
1543 int
1544 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1545 {
1546 	struct pci_devinfo *dinfo = device_get_ivars(child);
1547 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1548 	struct resource_list_entry *rle;
1549 	struct msix_vector *mv;
1550 	int irq, cpuid;
1551 
1552 	KASSERT(msix->msix_table_res != NULL &&
1553 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1554 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1555 	KASSERT(rid > 0, ("invalid rid %d", rid));
1556 
1557 	mv = pci_find_msix_vector(child, rid);
1558 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1559 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1560 
1561 	/* Make sure resource is no longer allocated. */
1562 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1563 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1564 	KASSERT(rle->res == NULL,
1565 	    ("MSI-X resource is still allocated, rid %d", rid));
1566 
1567 	irq = rle->start;
1568 	cpuid = rle->cpuid;
1569 
1570 	/* Free the resource list entries. */
1571 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1572 
1573 	/* Release the IRQ. */
1574 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1575 
1576 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1577 	kfree(mv, M_DEVBUF);
1578 
1579 	msix->msix_alloc--;
1580 	return (0);
1581 }
1582 
1583 /*
1584  * Return the max supported MSI-X messages this device supports.
1585  * Basically, assuming the MD code can alloc messages, this function
1586  * should return the maximum value that pci_alloc_msix() can return.
1587  * Thus, it is subject to the tunables, etc.
1588  */
1589 int
1590 pci_msix_count_method(device_t dev, device_t child)
1591 {
1592 	struct pci_devinfo *dinfo = device_get_ivars(child);
1593 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1594 
1595 	if (pci_do_msix && msix->msix_location != 0)
1596 		return (msix->msix_msgnum);
1597 	return (0);
1598 }
1599 
1600 int
1601 pci_setup_msix(device_t dev)
1602 {
1603 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1604 	pcicfgregs *cfg = &dinfo->cfg;
1605 	struct resource_list_entry *rle;
1606 	struct resource *table_res, *pba_res;
1607 
1608 	KASSERT(cfg->msix.msix_table_res == NULL &&
1609 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1610 
1611 	/* If rid 0 is allocated, then fail. */
1612 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1613 	if (rle != NULL && rle->res != NULL)
1614 		return (ENXIO);
1615 
1616 	/* Already have allocated MSIs? */
1617 	if (cfg->msi.msi_alloc != 0)
1618 		return (ENXIO);
1619 
1620 	/* If MSI is blacklisted for this system, fail. */
1621 	if (pci_msi_blacklisted())
1622 		return (ENXIO);
1623 
1624 	/* MSI-X capability present? */
1625 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1626 	    !pci_do_msix)
1627 		return (ENODEV);
1628 
1629 	KASSERT(cfg->msix.msix_alloc == 0 &&
1630 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1631 	    ("MSI-X vector has been allocated"));
1632 
1633 	/* Make sure the appropriate BARs are mapped. */
1634 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1635 	    cfg->msix.msix_table_bar);
1636 	if (rle == NULL || rle->res == NULL ||
1637 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1638 		return (ENXIO);
1639 	table_res = rle->res;
1640 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1641 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1642 		    cfg->msix.msix_pba_bar);
1643 		if (rle == NULL || rle->res == NULL ||
1644 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1645 			return (ENXIO);
1646 	}
1647 	pba_res = rle->res;
1648 
1649 	cfg->msix.msix_table_res = table_res;
1650 	cfg->msix.msix_pba_res = pba_res;
1651 
1652 	pci_mask_msix_allvectors(dev);
1653 
1654 	return 0;
1655 }
1656 
1657 void
1658 pci_teardown_msix(device_t dev)
1659 {
1660 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1661 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1662 
1663 	KASSERT(msix->msix_table_res != NULL &&
1664 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1665 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1666 	    ("MSI-X vector is still allocated"));
1667 
1668 	pci_mask_msix_allvectors(dev);
1669 
1670 	msix->msix_table_res = NULL;
1671 	msix->msix_pba_res = NULL;
1672 }
1673 
1674 void
1675 pci_enable_msix(device_t dev)
1676 {
1677 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1678 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1679 
1680 	KASSERT(msix->msix_table_res != NULL &&
1681 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1682 
1683 	/* Update control register to enable MSI-X. */
1684 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1685 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1686 	    msix->msix_ctrl, 2);
1687 }
1688 
1689 void
1690 pci_disable_msix(device_t dev)
1691 {
1692 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1693 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1694 
1695 	KASSERT(msix->msix_table_res != NULL &&
1696 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1697 
1698 	/* Disable MSI -> HT mapping. */
1699 	pci_ht_map_msi(dev, 0);
1700 
1701 	/* Update control register to disable MSI-X. */
1702 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1703 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1704 	    msix->msix_ctrl, 2);
1705 }
1706 
1707 static void
1708 pci_mask_msix_allvectors(device_t dev)
1709 {
1710 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1711 	u_int i;
1712 
1713 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1714 		pci_mask_msix_vector(dev, i);
1715 }
1716 
1717 static struct msix_vector *
1718 pci_find_msix_vector(device_t dev, int rid)
1719 {
1720 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1721 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1722 	struct msix_vector *mv;
1723 
1724 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1725 		if (mv->mv_rid == rid)
1726 			return mv;
1727 	}
1728 	return NULL;
1729 }
1730 
1731 /*
1732  * HyperTransport MSI mapping control
1733  */
1734 void
1735 pci_ht_map_msi(device_t dev, uint64_t addr)
1736 {
1737 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1738 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1739 
1740 	if (!ht->ht_msimap)
1741 		return;
1742 
1743 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1744 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1745 		/* Enable MSI -> HT mapping. */
1746 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1747 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1748 		    ht->ht_msictrl, 2);
1749 	}
1750 
1751 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1752 		/* Disable MSI -> HT mapping. */
1753 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1754 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1755 		    ht->ht_msictrl, 2);
1756 	}
1757 }
1758 
1759 /*
1760  * Support for MSI message signalled interrupts.
1761  */
1762 void
1763 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1764 {
1765 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1766 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1767 
1768 	/* Write data and address values. */
1769 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1770 	    address & 0xffffffff, 4);
1771 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1772 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1773 		    address >> 32, 4);
1774 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1775 		    data, 2);
1776 	} else
1777 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1778 		    2);
1779 
1780 	/* Enable MSI in the control register. */
1781 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1782 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1783 	    2);
1784 
1785 	/* Enable MSI -> HT mapping. */
1786 	pci_ht_map_msi(dev, address);
1787 }
1788 
1789 void
1790 pci_disable_msi(device_t dev)
1791 {
1792 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1793 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1794 
1795 	/* Disable MSI -> HT mapping. */
1796 	pci_ht_map_msi(dev, 0);
1797 
1798 	/* Disable MSI in the control register. */
1799 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1800 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1801 	    2);
1802 }
1803 
1804 /*
1805  * Restore MSI registers during resume.  If MSI is enabled then
1806  * restore the data and address registers in addition to the control
1807  * register.
1808  */
1809 static void
1810 pci_resume_msi(device_t dev)
1811 {
1812 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1813 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1814 	uint64_t address;
1815 	uint16_t data;
1816 
1817 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1818 		address = msi->msi_addr;
1819 		data = msi->msi_data;
1820 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1821 		    address & 0xffffffff, 4);
1822 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1823 			pci_write_config(dev, msi->msi_location +
1824 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1825 			pci_write_config(dev, msi->msi_location +
1826 			    PCIR_MSI_DATA_64BIT, data, 2);
1827 		} else
1828 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1829 			    data, 2);
1830 	}
1831 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1832 	    2);
1833 }
1834 
1835 /*
1836  * Returns true if the specified device is blacklisted because MSI
1837  * doesn't work.
1838  */
1839 int
1840 pci_msi_device_blacklisted(device_t dev)
1841 {
1842 	struct pci_quirk *q;
1843 
1844 	if (!pci_honor_msi_blacklist)
1845 		return (0);
1846 
1847 	for (q = &pci_quirks[0]; q->devid; q++) {
1848 		if (q->devid == pci_get_devid(dev) &&
1849 		    q->type == PCI_QUIRK_DISABLE_MSI)
1850 			return (1);
1851 	}
1852 	return (0);
1853 }
1854 
1855 /*
1856  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1857  * we just check for blacklisted chipsets as represented by the
1858  * host-PCI bridge at device 0:0:0.  In the future, it may become
1859  * necessary to check other system attributes, such as the kenv values
1860  * that give the motherboard manufacturer and model number.
1861  */
1862 static int
1863 pci_msi_blacklisted(void)
1864 {
1865 	device_t dev;
1866 
1867 	if (!pci_honor_msi_blacklist)
1868 		return (0);
1869 
1870 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1871 	if (!(pcie_chipset || pcix_chipset))
1872 		return (1);
1873 
1874 	dev = pci_find_bsf(0, 0, 0);
1875 	if (dev != NULL)
1876 		return (pci_msi_device_blacklisted(dev));
1877 	return (0);
1878 }
1879 
1880 /*
1881  * Attempt to allocate count MSI messages on start_cpuid.
1882  *
1883  * If start_cpuid < 0, then the MSI messages' target CPU will be
1884  * selected automaticly.
1885  *
1886  * If the caller explicitly specified the MSI messages' target CPU,
1887  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1888  * messages on the specified CPU, if the allocation fails due to MD
1889  * does not have enough vectors (EMSGSIZE), then we will try next
1890  * available CPU, until the allocation fails on all CPUs.
1891  *
1892  * EMSGSIZE will be returned, if all available CPUs does not have
1893  * enough vectors for the requested amount of MSI messages.  Caller
1894  * should either reduce the amount of MSI messages to be requested,
1895  * or simply giving up using MSI.
1896  *
1897  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1898  * returned in 'rid' array, if the allocation succeeds.
1899  */
1900 int
1901 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1902     int start_cpuid)
1903 {
1904 	struct pci_devinfo *dinfo = device_get_ivars(child);
1905 	pcicfgregs *cfg = &dinfo->cfg;
1906 	struct resource_list_entry *rle;
1907 	int error, i, irqs[32], cpuid = 0;
1908 	uint16_t ctrl;
1909 
1910 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1911 	    ("invalid MSI count %d", count));
1912 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1913 
1914 	/* If rid 0 is allocated, then fail. */
1915 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1916 	if (rle != NULL && rle->res != NULL)
1917 		return (ENXIO);
1918 
1919 	/* Already have allocated messages? */
1920 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1921 		return (ENXIO);
1922 
1923 	/* If MSI is blacklisted for this system, fail. */
1924 	if (pci_msi_blacklisted())
1925 		return (ENXIO);
1926 
1927 	/* MSI capability present? */
1928 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1929 	    !pci_do_msi)
1930 		return (ENODEV);
1931 
1932 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1933 	    count, cfg->msi.msi_msgnum));
1934 
1935 	if (bootverbose) {
1936 		device_printf(child,
1937 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1938 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1939 	}
1940 
1941 	if (start_cpuid < 0)
1942 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1943 
1944 	error = EINVAL;
1945 	for (i = 0; i < ncpus; ++i) {
1946 		cpuid = (start_cpuid + i) % ncpus;
1947 
1948 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1949 		    cfg->msi.msi_msgnum, irqs, cpuid);
1950 		if (error == 0)
1951 			break;
1952 		else if (error != EMSGSIZE)
1953 			return error;
1954 	}
1955 	if (error)
1956 		return error;
1957 
1958 	/*
1959 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
1960 	 * the irqs[] array, so add new resources starting at rid 1.
1961 	 */
1962 	for (i = 0; i < count; i++) {
1963 		rid[i] = i + 1;
1964 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1965 		    irqs[i], irqs[i], 1, cpuid);
1966 	}
1967 
1968 	if (bootverbose) {
1969 		if (count == 1) {
1970 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
1971 			    irqs[0], cpuid);
1972 		} else {
1973 			int run;
1974 
1975 			/*
1976 			 * Be fancy and try to print contiguous runs
1977 			 * of IRQ values as ranges.  'run' is true if
1978 			 * we are in a range.
1979 			 */
1980 			device_printf(child, "using IRQs %d", irqs[0]);
1981 			run = 0;
1982 			for (i = 1; i < count; i++) {
1983 
1984 				/* Still in a run? */
1985 				if (irqs[i] == irqs[i - 1] + 1) {
1986 					run = 1;
1987 					continue;
1988 				}
1989 
1990 				/* Finish previous range. */
1991 				if (run) {
1992 					kprintf("-%d", irqs[i - 1]);
1993 					run = 0;
1994 				}
1995 
1996 				/* Start new range. */
1997 				kprintf(",%d", irqs[i]);
1998 			}
1999 
2000 			/* Unfinished range? */
2001 			if (run)
2002 				kprintf("-%d", irqs[count - 1]);
2003 			kprintf(" for MSI on cpu%d\n", cpuid);
2004 		}
2005 	}
2006 
2007 	/* Update control register with count. */
2008 	ctrl = cfg->msi.msi_ctrl;
2009 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2010 	ctrl |= (ffs(count) - 1) << 4;
2011 	cfg->msi.msi_ctrl = ctrl;
2012 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2013 
2014 	/* Update counts of alloc'd messages. */
2015 	cfg->msi.msi_alloc = count;
2016 	cfg->msi.msi_handlers = 0;
2017 	return (0);
2018 }
2019 
2020 /* Release the MSI messages associated with this device. */
2021 int
2022 pci_release_msi_method(device_t dev, device_t child)
2023 {
2024 	struct pci_devinfo *dinfo = device_get_ivars(child);
2025 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2026 	struct resource_list_entry *rle;
2027 	int i, irqs[32], cpuid = -1;
2028 
2029 	/* Do we have any messages to release? */
2030 	if (msi->msi_alloc == 0)
2031 		return (ENODEV);
2032 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2033 
2034 	/* Make sure none of the resources are allocated. */
2035 	if (msi->msi_handlers > 0)
2036 		return (EBUSY);
2037 	for (i = 0; i < msi->msi_alloc; i++) {
2038 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2039 		KASSERT(rle != NULL, ("missing MSI resource"));
2040 		if (rle->res != NULL)
2041 			return (EBUSY);
2042 		if (i == 0) {
2043 			cpuid = rle->cpuid;
2044 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2045 			    ("invalid MSI target cpuid %d", cpuid));
2046 		} else {
2047 			KASSERT(rle->cpuid == cpuid,
2048 			    ("MSI targets different cpus, "
2049 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2050 		}
2051 		irqs[i] = rle->start;
2052 	}
2053 
2054 	/* Update control register with 0 count. */
2055 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2056 	    ("%s: MSI still enabled", __func__));
2057 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2058 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2059 	    msi->msi_ctrl, 2);
2060 
2061 	/* Release the messages. */
2062 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2063 	    cpuid);
2064 	for (i = 0; i < msi->msi_alloc; i++)
2065 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2066 
2067 	/* Update alloc count. */
2068 	msi->msi_alloc = 0;
2069 	msi->msi_addr = 0;
2070 	msi->msi_data = 0;
2071 	return (0);
2072 }
2073 
2074 /*
2075  * Return the max supported MSI messages this device supports.
2076  * Basically, assuming the MD code can alloc messages, this function
2077  * should return the maximum value that pci_alloc_msi() can return.
2078  * Thus, it is subject to the tunables, etc.
2079  */
2080 int
2081 pci_msi_count_method(device_t dev, device_t child)
2082 {
2083 	struct pci_devinfo *dinfo = device_get_ivars(child);
2084 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2085 
2086 	if (pci_do_msi && msi->msi_location != 0)
2087 		return (msi->msi_msgnum);
2088 	return (0);
2089 }
2090 
2091 /* kfree pcicfgregs structure and all depending data structures */
2092 
2093 int
2094 pci_freecfg(struct pci_devinfo *dinfo)
2095 {
2096 	struct devlist *devlist_head;
2097 	int i;
2098 
2099 	devlist_head = &pci_devq;
2100 
2101 	if (dinfo->cfg.vpd.vpd_reg) {
2102 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2103 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2104 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2105 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2106 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2107 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2108 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2109 	}
2110 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2111 	kfree(dinfo, M_DEVBUF);
2112 
2113 	/* increment the generation count */
2114 	pci_generation++;
2115 
2116 	/* we're losing one device */
2117 	pci_numdevs--;
2118 	return (0);
2119 }
2120 
2121 /*
2122  * PCI power manangement
2123  */
2124 int
2125 pci_set_powerstate_method(device_t dev, device_t child, int state)
2126 {
2127 	struct pci_devinfo *dinfo = device_get_ivars(child);
2128 	pcicfgregs *cfg = &dinfo->cfg;
2129 	uint16_t status;
2130 	int oldstate, highest, delay;
2131 
2132 	if (cfg->pp.pp_cap == 0)
2133 		return (EOPNOTSUPP);
2134 
2135 	/*
2136 	 * Optimize a no state change request away.  While it would be OK to
2137 	 * write to the hardware in theory, some devices have shown odd
2138 	 * behavior when going from D3 -> D3.
2139 	 */
2140 	oldstate = pci_get_powerstate(child);
2141 	if (oldstate == state)
2142 		return (0);
2143 
2144 	/*
2145 	 * The PCI power management specification states that after a state
2146 	 * transition between PCI power states, system software must
2147 	 * guarantee a minimal delay before the function accesses the device.
2148 	 * Compute the worst case delay that we need to guarantee before we
2149 	 * access the device.  Many devices will be responsive much more
2150 	 * quickly than this delay, but there are some that don't respond
2151 	 * instantly to state changes.  Transitions to/from D3 state require
2152 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2153 	 * is done below with DELAY rather than a sleeper function because
2154 	 * this function can be called from contexts where we cannot sleep.
2155 	 */
2156 	highest = (oldstate > state) ? oldstate : state;
2157 	if (highest == PCI_POWERSTATE_D3)
2158 	    delay = 10000;
2159 	else if (highest == PCI_POWERSTATE_D2)
2160 	    delay = 200;
2161 	else
2162 	    delay = 0;
2163 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2164 	    & ~PCIM_PSTAT_DMASK;
2165 	switch (state) {
2166 	case PCI_POWERSTATE_D0:
2167 		status |= PCIM_PSTAT_D0;
2168 		break;
2169 	case PCI_POWERSTATE_D1:
2170 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2171 			return (EOPNOTSUPP);
2172 		status |= PCIM_PSTAT_D1;
2173 		break;
2174 	case PCI_POWERSTATE_D2:
2175 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2176 			return (EOPNOTSUPP);
2177 		status |= PCIM_PSTAT_D2;
2178 		break;
2179 	case PCI_POWERSTATE_D3:
2180 		status |= PCIM_PSTAT_D3;
2181 		break;
2182 	default:
2183 		return (EINVAL);
2184 	}
2185 
2186 	if (bootverbose)
2187 		kprintf(
2188 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2189 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2190 		    dinfo->cfg.func, oldstate, state);
2191 
2192 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2193 	if (delay)
2194 		DELAY(delay);
2195 	return (0);
2196 }
2197 
2198 int
2199 pci_get_powerstate_method(device_t dev, device_t child)
2200 {
2201 	struct pci_devinfo *dinfo = device_get_ivars(child);
2202 	pcicfgregs *cfg = &dinfo->cfg;
2203 	uint16_t status;
2204 	int result;
2205 
2206 	if (cfg->pp.pp_cap != 0) {
2207 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2208 		switch (status & PCIM_PSTAT_DMASK) {
2209 		case PCIM_PSTAT_D0:
2210 			result = PCI_POWERSTATE_D0;
2211 			break;
2212 		case PCIM_PSTAT_D1:
2213 			result = PCI_POWERSTATE_D1;
2214 			break;
2215 		case PCIM_PSTAT_D2:
2216 			result = PCI_POWERSTATE_D2;
2217 			break;
2218 		case PCIM_PSTAT_D3:
2219 			result = PCI_POWERSTATE_D3;
2220 			break;
2221 		default:
2222 			result = PCI_POWERSTATE_UNKNOWN;
2223 			break;
2224 		}
2225 	} else {
2226 		/* No support, device is always at D0 */
2227 		result = PCI_POWERSTATE_D0;
2228 	}
2229 	return (result);
2230 }
2231 
2232 /*
2233  * Some convenience functions for PCI device drivers.
2234  */
2235 
2236 static __inline void
2237 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2238 {
2239 	uint16_t	command;
2240 
2241 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2242 	command |= bit;
2243 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2244 }
2245 
2246 static __inline void
2247 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2248 {
2249 	uint16_t	command;
2250 
2251 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2252 	command &= ~bit;
2253 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2254 }
2255 
2256 int
2257 pci_enable_busmaster_method(device_t dev, device_t child)
2258 {
2259 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2260 	return (0);
2261 }
2262 
2263 int
2264 pci_disable_busmaster_method(device_t dev, device_t child)
2265 {
2266 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2267 	return (0);
2268 }
2269 
2270 int
2271 pci_enable_io_method(device_t dev, device_t child, int space)
2272 {
2273 	uint16_t command;
2274 	uint16_t bit;
2275 	char *error;
2276 
2277 	bit = 0;
2278 	error = NULL;
2279 
2280 	switch(space) {
2281 	case SYS_RES_IOPORT:
2282 		bit = PCIM_CMD_PORTEN;
2283 		error = "port";
2284 		break;
2285 	case SYS_RES_MEMORY:
2286 		bit = PCIM_CMD_MEMEN;
2287 		error = "memory";
2288 		break;
2289 	default:
2290 		return (EINVAL);
2291 	}
2292 	pci_set_command_bit(dev, child, bit);
2293 	/* Some devices seem to need a brief stall here, what do to? */
2294 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2295 	if (command & bit)
2296 		return (0);
2297 	device_printf(child, "failed to enable %s mapping!\n", error);
2298 	return (ENXIO);
2299 }
2300 
2301 int
2302 pci_disable_io_method(device_t dev, device_t child, int space)
2303 {
2304 	uint16_t command;
2305 	uint16_t bit;
2306 	char *error;
2307 
2308 	bit = 0;
2309 	error = NULL;
2310 
2311 	switch(space) {
2312 	case SYS_RES_IOPORT:
2313 		bit = PCIM_CMD_PORTEN;
2314 		error = "port";
2315 		break;
2316 	case SYS_RES_MEMORY:
2317 		bit = PCIM_CMD_MEMEN;
2318 		error = "memory";
2319 		break;
2320 	default:
2321 		return (EINVAL);
2322 	}
2323 	pci_clear_command_bit(dev, child, bit);
2324 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2325 	if (command & bit) {
2326 		device_printf(child, "failed to disable %s mapping!\n", error);
2327 		return (ENXIO);
2328 	}
2329 	return (0);
2330 }
2331 
2332 /*
2333  * New style pci driver.  Parent device is either a pci-host-bridge or a
2334  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2335  */
2336 
2337 void
2338 pci_print_verbose(struct pci_devinfo *dinfo)
2339 {
2340 
2341 	if (bootverbose) {
2342 		pcicfgregs *cfg = &dinfo->cfg;
2343 
2344 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2345 		    cfg->vendor, cfg->device, cfg->revid);
2346 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2347 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2348 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2349 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2350 		    cfg->mfdev);
2351 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2352 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2353 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2354 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2355 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2356 		if (cfg->intpin > 0)
2357 			kprintf("\tintpin=%c, irq=%d\n",
2358 			    cfg->intpin +'a' -1, cfg->intline);
2359 		if (cfg->pp.pp_cap) {
2360 			uint16_t status;
2361 
2362 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2363 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2364 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2365 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2366 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2367 			    status & PCIM_PSTAT_DMASK);
2368 		}
2369 		if (cfg->msi.msi_location) {
2370 			int ctrl;
2371 
2372 			ctrl = cfg->msi.msi_ctrl;
2373 			kprintf("\tMSI supports %d message%s%s%s\n",
2374 			    cfg->msi.msi_msgnum,
2375 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2376 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2377 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2378 		}
2379 		if (cfg->msix.msix_location) {
2380 			kprintf("\tMSI-X supports %d message%s ",
2381 			    cfg->msix.msix_msgnum,
2382 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2383 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2384 				kprintf("in map 0x%x\n",
2385 				    cfg->msix.msix_table_bar);
2386 			else
2387 				kprintf("in maps 0x%x and 0x%x\n",
2388 				    cfg->msix.msix_table_bar,
2389 				    cfg->msix.msix_pba_bar);
2390 		}
2391 		pci_print_verbose_expr(cfg);
2392 	}
2393 }
2394 
2395 static void
2396 pci_print_verbose_expr(const pcicfgregs *cfg)
2397 {
2398 	const struct pcicfg_expr *expr = &cfg->expr;
2399 	const char *port_name;
2400 	uint16_t port_type;
2401 
2402 	if (!bootverbose)
2403 		return;
2404 
2405 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2406 		return;
2407 
2408 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2409 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2410 
2411 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2412 
2413 	switch (port_type) {
2414 	case PCIE_END_POINT:
2415 		port_name = "DEVICE";
2416 		break;
2417 	case PCIE_LEG_END_POINT:
2418 		port_name = "LEGDEV";
2419 		break;
2420 	case PCIE_ROOT_PORT:
2421 		port_name = "ROOT";
2422 		break;
2423 	case PCIE_UP_STREAM_PORT:
2424 		port_name = "UPSTREAM";
2425 		break;
2426 	case PCIE_DOWN_STREAM_PORT:
2427 		port_name = "DOWNSTRM";
2428 		break;
2429 	case PCIE_PCIE2PCI_BRIDGE:
2430 		port_name = "PCIE2PCI";
2431 		break;
2432 	case PCIE_PCI2PCIE_BRIDGE:
2433 		port_name = "PCI2PCIE";
2434 		break;
2435 	case PCIE_ROOT_END_POINT:
2436 		port_name = "ROOTDEV";
2437 		break;
2438 	case PCIE_ROOT_EVT_COLL:
2439 		port_name = "ROOTEVTC";
2440 		break;
2441 	default:
2442 		port_name = NULL;
2443 		break;
2444 	}
2445 	if ((port_type == PCIE_ROOT_PORT ||
2446 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2447 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2448 		port_name = NULL;
2449 	if (port_name != NULL)
2450 		kprintf("[%s]", port_name);
2451 
2452 	if (pcie_slotimpl(cfg)) {
2453 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2454 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2455 			kprintf("[HOTPLUG]");
2456 	}
2457 	kprintf("\n");
2458 }
2459 
2460 static int
2461 pci_porten(device_t pcib, int b, int s, int f)
2462 {
2463 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2464 		& PCIM_CMD_PORTEN) != 0;
2465 }
2466 
2467 static int
2468 pci_memen(device_t pcib, int b, int s, int f)
2469 {
2470 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2471 		& PCIM_CMD_MEMEN) != 0;
2472 }
2473 
2474 /*
2475  * Add a resource based on a pci map register. Return 1 if the map
2476  * register is a 32bit map register or 2 if it is a 64bit register.
2477  */
2478 static int
2479 pci_add_map(device_t pcib, device_t bus, device_t dev,
2480     int b, int s, int f, int reg, struct resource_list *rl, int force,
2481     int prefetch)
2482 {
2483 	uint32_t map;
2484 	uint16_t old_cmd;
2485 	pci_addr_t base;
2486 	pci_addr_t start, end, count;
2487 	uint8_t ln2size;
2488 	uint8_t ln2range;
2489 	uint32_t testval;
2490 	uint16_t cmd;
2491 	int type;
2492 	int barlen;
2493 	struct resource *res;
2494 
2495 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2496 
2497         /* Disable access to device memory */
2498 	old_cmd = 0;
2499 	if (PCI_BAR_MEM(map)) {
2500 		old_cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2501 		cmd = old_cmd & ~PCIM_CMD_MEMEN;
2502 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2503 	}
2504 
2505 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2506 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2507 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2508 
2509         /* Restore memory access mode */
2510 	if (PCI_BAR_MEM(map)) {
2511 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, old_cmd, 2);
2512 	}
2513 
2514 	if (PCI_BAR_MEM(map)) {
2515 		type = SYS_RES_MEMORY;
2516 		if (map & PCIM_BAR_MEM_PREFETCH)
2517 			prefetch = 1;
2518 	} else
2519 		type = SYS_RES_IOPORT;
2520 	ln2size = pci_mapsize(testval);
2521 	ln2range = pci_maprange(testval);
2522 	base = pci_mapbase(map);
2523 	barlen = ln2range == 64 ? 2 : 1;
2524 
2525 	/*
2526 	 * For I/O registers, if bottom bit is set, and the next bit up
2527 	 * isn't clear, we know we have a BAR that doesn't conform to the
2528 	 * spec, so ignore it.  Also, sanity check the size of the data
2529 	 * areas to the type of memory involved.  Memory must be at least
2530 	 * 16 bytes in size, while I/O ranges must be at least 4.
2531 	 */
2532 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2533 		return (barlen);
2534 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2535 	    (type == SYS_RES_IOPORT && ln2size < 2))
2536 		return (barlen);
2537 
2538 	if (ln2range == 64)
2539 		/* Read the other half of a 64bit map register */
2540 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2541 	if (bootverbose) {
2542 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2543 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2544 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2545 			kprintf(", port disabled\n");
2546 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2547 			kprintf(", memory disabled\n");
2548 		else
2549 			kprintf(", enabled\n");
2550 	}
2551 
2552 	/*
2553 	 * If base is 0, then we have problems.  It is best to ignore
2554 	 * such entries for the moment.  These will be allocated later if
2555 	 * the driver specifically requests them.  However, some
2556 	 * removable busses look better when all resources are allocated,
2557 	 * so allow '0' to be overriden.
2558 	 *
2559 	 * Similarly treat maps whose values is the same as the test value
2560 	 * read back.  These maps have had all f's written to them by the
2561 	 * BIOS in an attempt to disable the resources.
2562 	 */
2563 	if (!force && (base == 0 || map == testval))
2564 		return (barlen);
2565 	if ((u_long)base != base) {
2566 		device_printf(bus,
2567 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2568 		    pci_get_domain(dev), b, s, f, reg);
2569 		return (barlen);
2570 	}
2571 
2572 	/*
2573 	 * This code theoretically does the right thing, but has
2574 	 * undesirable side effects in some cases where peripherals
2575 	 * respond oddly to having these bits enabled.  Let the user
2576 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2577 	 * default).
2578 	 */
2579 	if (pci_enable_io_modes) {
2580 		/* Turn on resources that have been left off by a lazy BIOS */
2581 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2582 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2583 			cmd |= PCIM_CMD_PORTEN;
2584 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2585 		}
2586 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2587 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2588 			cmd |= PCIM_CMD_MEMEN;
2589 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2590 		}
2591 	} else {
2592 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2593 			return (barlen);
2594 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2595 			return (barlen);
2596 	}
2597 
2598 	count = 1 << ln2size;
2599 	if (base == 0 || base == pci_mapbase(testval)) {
2600 		start = 0;	/* Let the parent decide. */
2601 		end = ~0ULL;
2602 	} else {
2603 		start = base;
2604 		end = base + (1 << ln2size) - 1;
2605 	}
2606 	resource_list_add(rl, type, reg, start, end, count, -1);
2607 
2608 	/*
2609 	 * Try to allocate the resource for this BAR from our parent
2610 	 * so that this resource range is already reserved.  The
2611 	 * driver for this device will later inherit this resource in
2612 	 * pci_alloc_resource().
2613 	 */
2614 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2615 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2616 	if (res == NULL) {
2617 		/*
2618 		 * If the allocation fails, delete the resource list
2619 		 * entry to force pci_alloc_resource() to allocate
2620 		 * resources from the parent.
2621 		 */
2622 		resource_list_delete(rl, type, reg);
2623 #ifdef PCI_BAR_CLEAR
2624 		/* Clear the BAR */
2625 		start = 0;
2626 #else	/* !PCI_BAR_CLEAR */
2627 		/*
2628 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2629 		 * PCI function, clearing the BAR causes HPET timer
2630 		 * stop ticking.
2631 		 */
2632 		if (bootverbose) {
2633 			kprintf("pci:%d:%d:%d: resource reservation failed "
2634 				"%#jx - %#jx\n", b, s, f,
2635 				(intmax_t)start, (intmax_t)end);
2636 		}
2637 		return (barlen);
2638 #endif	/* PCI_BAR_CLEAR */
2639 	} else {
2640 		start = rman_get_start(res);
2641 	}
2642 	pci_write_config(dev, reg, start, 4);
2643 	if (ln2range == 64)
2644 		pci_write_config(dev, reg + 4, start >> 32, 4);
2645 	return (barlen);
2646 }
2647 
2648 /*
2649  * For ATA devices we need to decide early what addressing mode to use.
2650  * Legacy demands that the primary and secondary ATA ports sits on the
2651  * same addresses that old ISA hardware did. This dictates that we use
2652  * those addresses and ignore the BAR's if we cannot set PCI native
2653  * addressing mode.
2654  */
2655 static void
2656 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2657     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2658 {
2659 	int rid, type, progif;
2660 #if 0
2661 	/* if this device supports PCI native addressing use it */
2662 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2663 	if ((progif & 0x8a) == 0x8a) {
2664 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2665 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2666 			kprintf("Trying ATA native PCI addressing mode\n");
2667 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2668 		}
2669 	}
2670 #endif
2671 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2672 	type = SYS_RES_IOPORT;
2673 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2674 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2675 		    prefetchmask & (1 << 0));
2676 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2677 		    prefetchmask & (1 << 1));
2678 	} else {
2679 		rid = PCIR_BAR(0);
2680 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2681 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2682 		    0, -1);
2683 		rid = PCIR_BAR(1);
2684 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2685 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2686 		    0, -1);
2687 	}
2688 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2689 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2690 		    prefetchmask & (1 << 2));
2691 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2692 		    prefetchmask & (1 << 3));
2693 	} else {
2694 		rid = PCIR_BAR(2);
2695 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2696 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2697 		    0, -1);
2698 		rid = PCIR_BAR(3);
2699 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2700 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2701 		    0, -1);
2702 	}
2703 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2704 	    prefetchmask & (1 << 4));
2705 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2706 	    prefetchmask & (1 << 5));
2707 }
2708 
2709 static void
2710 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2711 {
2712 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2713 	pcicfgregs *cfg = &dinfo->cfg;
2714 	char tunable_name[64];
2715 	int irq;
2716 
2717 	/* Has to have an intpin to have an interrupt. */
2718 	if (cfg->intpin == 0)
2719 		return;
2720 
2721 	/* Let the user override the IRQ with a tunable. */
2722 	irq = PCI_INVALID_IRQ;
2723 	ksnprintf(tunable_name, sizeof(tunable_name),
2724 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2725 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2726 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2727 		if (irq >= 255 || irq <= 0) {
2728 			irq = PCI_INVALID_IRQ;
2729 		} else {
2730 			if (machintr_legacy_intr_find(irq,
2731 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW) < 0) {
2732 				device_printf(dev,
2733 				    "hw.pci%d.%d.%d.%d.INT%c.irq=%d, invalid\n",
2734 				    cfg->domain, cfg->bus, cfg->slot, cfg->func,
2735 				    cfg->intpin + 'A' - 1, irq);
2736 				irq = PCI_INVALID_IRQ;
2737 			} else {
2738 				BUS_CONFIG_INTR(bus, dev, irq,
2739 				    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2740 			}
2741 		}
2742 	}
2743 
2744 	/*
2745 	 * If we didn't get an IRQ via the tunable, then we either use the
2746 	 * IRQ value in the intline register or we ask the bus to route an
2747 	 * interrupt for us.  If force_route is true, then we only use the
2748 	 * value in the intline register if the bus was unable to assign an
2749 	 * IRQ.
2750 	 */
2751 	if (!PCI_INTERRUPT_VALID(irq)) {
2752 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2753 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2754 		if (!PCI_INTERRUPT_VALID(irq))
2755 			irq = cfg->intline;
2756 	}
2757 
2758 	/* If after all that we don't have an IRQ, just bail. */
2759 	if (!PCI_INTERRUPT_VALID(irq))
2760 		return;
2761 
2762 	/* Update the config register if it changed. */
2763 	if (irq != cfg->intline) {
2764 		cfg->intline = irq;
2765 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2766 	}
2767 
2768 	/* Add this IRQ as rid 0 interrupt resource. */
2769 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2770 	    machintr_legacy_intr_cpuid(irq));
2771 }
2772 
2773 /* Perform early OHCI takeover from SMM. */
2774 static void
2775 ohci_early_takeover(device_t self)
2776 {
2777 	struct resource *res;
2778 	uint32_t ctl;
2779 	int rid;
2780 	int i;
2781 
2782 	rid = PCIR_BAR(0);
2783 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2784 	if (res == NULL)
2785 		return;
2786 
2787 	ctl = bus_read_4(res, OHCI_CONTROL);
2788 	if (ctl & OHCI_IR) {
2789 		if (bootverbose)
2790 			kprintf("ohci early: "
2791 			    "SMM active, request owner change\n");
2792 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2793 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2794 			DELAY(1000);
2795 			ctl = bus_read_4(res, OHCI_CONTROL);
2796 		}
2797 		if (ctl & OHCI_IR) {
2798 			if (bootverbose)
2799 				kprintf("ohci early: "
2800 				    "SMM does not respond, resetting\n");
2801 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2802 		}
2803 		/* Disable interrupts */
2804 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2805 	}
2806 
2807 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2808 }
2809 
2810 /* Perform early UHCI takeover from SMM. */
2811 static void
2812 uhci_early_takeover(device_t self)
2813 {
2814 	struct resource *res;
2815 	int rid;
2816 
2817 	/*
2818 	 * Set the PIRQD enable bit and switch off all the others. We don't
2819 	 * want legacy support to interfere with us XXX Does this also mean
2820 	 * that the BIOS won't touch the keyboard anymore if it is connected
2821 	 * to the ports of the root hub?
2822 	 */
2823 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2824 
2825 	/* Disable interrupts */
2826 	rid = PCI_UHCI_BASE_REG;
2827 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2828 	if (res != NULL) {
2829 		bus_write_2(res, UHCI_INTR, 0);
2830 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2831 	}
2832 }
2833 
2834 /* Perform early EHCI takeover from SMM. */
2835 static void
2836 ehci_early_takeover(device_t self)
2837 {
2838 	struct resource *res;
2839 	uint32_t cparams;
2840 	uint32_t eec;
2841 	uint32_t eecp;
2842 	uint32_t bios_sem;
2843 	uint32_t offs;
2844 	int rid;
2845 	int i;
2846 
2847 	rid = PCIR_BAR(0);
2848 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2849 	if (res == NULL)
2850 		return;
2851 
2852 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2853 
2854 	/* Synchronise with the BIOS if it owns the controller. */
2855 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2856 	    eecp = EHCI_EECP_NEXT(eec)) {
2857 		eec = pci_read_config(self, eecp, 4);
2858 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2859 			continue;
2860 		}
2861 		bios_sem = pci_read_config(self, eecp +
2862 		    EHCI_LEGSUP_BIOS_SEM, 1);
2863 		if (bios_sem == 0) {
2864 			continue;
2865 		}
2866 		if (bootverbose)
2867 			kprintf("ehci early: "
2868 			    "SMM active, request owner change\n");
2869 
2870 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2871 
2872 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2873 			DELAY(1000);
2874 			bios_sem = pci_read_config(self, eecp +
2875 			    EHCI_LEGSUP_BIOS_SEM, 1);
2876 		}
2877 
2878 		if (bios_sem != 0) {
2879 			if (bootverbose)
2880 				kprintf("ehci early: "
2881 				    "SMM does not respond\n");
2882 		}
2883 		/* Disable interrupts */
2884 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2885 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2886 	}
2887 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2888 }
2889 
2890 /* Perform early XHCI takeover from SMM. */
2891 static void
2892 xhci_early_takeover(device_t self)
2893 {
2894 	struct resource *res;
2895 	uint32_t cparams;
2896 	uint32_t eec;
2897 	uint32_t eecp;
2898 	uint32_t bios_sem;
2899 	uint32_t offs;
2900 	int rid;
2901 	int i;
2902 
2903 	rid = PCIR_BAR(0);
2904 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2905 	if (res == NULL)
2906 		return;
2907 
2908 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2909 
2910 	eec = -1;
2911 
2912 	/* Synchronise with the BIOS if it owns the controller. */
2913 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2914 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2915 		eec = bus_read_4(res, eecp);
2916 
2917 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2918 			continue;
2919 
2920 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2921 
2922 		if (bios_sem == 0) {
2923 			if (bootverbose)
2924 				kprintf("xhci early: xhci is not owned by SMM\n");
2925 
2926 			continue;
2927 		}
2928 
2929 		if (bootverbose)
2930 			kprintf("xhci early: "
2931 			    "SMM active, request owner change\n");
2932 
2933 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
2934 
2935 		/* wait a maximum of 5 seconds */
2936 
2937 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
2938 			DELAY(1000);
2939 
2940 			bios_sem = bus_read_1(res, eecp +
2941 			    XHCI_XECP_BIOS_SEM);
2942 		}
2943 
2944 		if (bios_sem != 0) {
2945 			if (bootverbose) {
2946 				kprintf("xhci early: "
2947 				    "SMM does not respond\n");
2948 				kprintf("xhci early: "
2949 				    "taking xhci by force\n");
2950 			}
2951 			bus_write_1(res, eecp + XHCI_XECP_BIOS_SEM, 0x00);
2952 		} else {
2953 			if (bootverbose)
2954 				kprintf("xhci early:"
2955 				    "handover successful\n");
2956 		}
2957 
2958 		/* Disable interrupts */
2959 		offs = bus_read_1(res, XHCI_CAPLENGTH);
2960 		bus_write_4(res, offs + XHCI_USBCMD, 0);
2961 		bus_read_4(res, offs + XHCI_USBSTS);
2962 	}
2963 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2964 }
2965 
2966 void
2967 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2968 {
2969 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2970 	pcicfgregs *cfg = &dinfo->cfg;
2971 	struct resource_list *rl = &dinfo->resources;
2972 	struct pci_quirk *q;
2973 	int b, i, f, s;
2974 
2975 	b = cfg->bus;
2976 	s = cfg->slot;
2977 	f = cfg->func;
2978 
2979 	/* ATA devices needs special map treatment */
2980 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2981 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2982 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2983 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2984 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2985 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2986 	else
2987 		for (i = 0; i < cfg->nummaps;)
2988 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2989 			    rl, force, prefetchmask & (1 << i));
2990 
2991 	/*
2992 	 * Add additional, quirked resources.
2993 	 */
2994 	for (q = &pci_quirks[0]; q->devid; q++) {
2995 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2996 		    && q->type == PCI_QUIRK_MAP_REG)
2997 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2998 			  force, 0);
2999 	}
3000 
3001 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3002 		/*
3003 		 * Try to re-route interrupts. Sometimes the BIOS or
3004 		 * firmware may leave bogus values in these registers.
3005 		 * If the re-route fails, then just stick with what we
3006 		 * have.
3007 		 */
3008 		pci_assign_interrupt(bus, dev, 1);
3009 	}
3010 
3011 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3012 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3013 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3014 			xhci_early_takeover(dev);
3015 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3016 			ehci_early_takeover(dev);
3017 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3018 			ohci_early_takeover(dev);
3019 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3020 			uhci_early_takeover(dev);
3021 	}
3022 }
3023 
3024 void
3025 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3026 {
3027 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3028 	device_t pcib = device_get_parent(dev);
3029 	struct pci_devinfo *dinfo;
3030 	int maxslots;
3031 	int s, f, pcifunchigh;
3032 	uint8_t hdrtype;
3033 
3034 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3035 	    ("dinfo_size too small"));
3036 	maxslots = PCIB_MAXSLOTS(pcib);
3037 	for (s = 0; s <= maxslots; s++) {
3038 		pcifunchigh = 0;
3039 		f = 0;
3040 		DELAY(1);
3041 		hdrtype = REG(PCIR_HDRTYPE, 1);
3042 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3043 			continue;
3044 		if (hdrtype & PCIM_MFDEV)
3045 			pcifunchigh = PCI_FUNCMAX;
3046 		for (f = 0; f <= pcifunchigh; f++) {
3047 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3048 			    dinfo_size);
3049 			if (dinfo != NULL) {
3050 				pci_add_child(dev, dinfo);
3051 			}
3052 		}
3053 	}
3054 #undef REG
3055 }
3056 
3057 void
3058 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3059 {
3060 	device_t pcib;
3061 
3062 	pcib = device_get_parent(bus);
3063 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3064 	device_set_ivars(dinfo->cfg.dev, dinfo);
3065 	resource_list_init(&dinfo->resources);
3066 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3067 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3068 	pci_print_verbose(dinfo);
3069 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
3070 }
3071 
3072 static int
3073 pci_probe(device_t dev)
3074 {
3075 	device_set_desc(dev, "PCI bus");
3076 
3077 	/* Allow other subclasses to override this driver. */
3078 	return (-1000);
3079 }
3080 
3081 static int
3082 pci_attach(device_t dev)
3083 {
3084 	int busno, domain;
3085 
3086 	/*
3087 	 * Since there can be multiple independantly numbered PCI
3088 	 * busses on systems with multiple PCI domains, we can't use
3089 	 * the unit number to decide which bus we are probing. We ask
3090 	 * the parent pcib what our domain and bus numbers are.
3091 	 */
3092 	domain = pcib_get_domain(dev);
3093 	busno = pcib_get_bus(dev);
3094 	if (bootverbose)
3095 		device_printf(dev, "domain=%d, physical bus=%d\n",
3096 		    domain, busno);
3097 
3098 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3099 
3100 	return (bus_generic_attach(dev));
3101 }
3102 
3103 int
3104 pci_suspend(device_t dev)
3105 {
3106 	int dstate, error, i, numdevs;
3107 	device_t acpi_dev, child, *devlist;
3108 	struct pci_devinfo *dinfo;
3109 
3110 	/*
3111 	 * Save the PCI configuration space for each child and set the
3112 	 * device in the appropriate power state for this sleep state.
3113 	 */
3114 	acpi_dev = NULL;
3115 	if (pci_do_power_resume)
3116 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3117 	device_get_children(dev, &devlist, &numdevs);
3118 	for (i = 0; i < numdevs; i++) {
3119 		child = devlist[i];
3120 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3121 		pci_cfg_save(child, dinfo, 0);
3122 	}
3123 
3124 	/* Suspend devices before potentially powering them down. */
3125 	error = bus_generic_suspend(dev);
3126 	if (error) {
3127 		kfree(devlist, M_TEMP);
3128 		return (error);
3129 	}
3130 
3131 	/*
3132 	 * Always set the device to D3.  If ACPI suggests a different
3133 	 * power state, use it instead.  If ACPI is not present, the
3134 	 * firmware is responsible for managing device power.  Skip
3135 	 * children who aren't attached since they are powered down
3136 	 * separately.  Only manage type 0 devices for now.
3137 	 */
3138 	for (i = 0; acpi_dev && i < numdevs; i++) {
3139 		child = devlist[i];
3140 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3141 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
3142 			dstate = PCI_POWERSTATE_D3;
3143 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
3144 			pci_set_powerstate(child, dstate);
3145 		}
3146 	}
3147 	kfree(devlist, M_TEMP);
3148 	return (0);
3149 }
3150 
3151 int
3152 pci_resume(device_t dev)
3153 {
3154 	int i, numdevs;
3155 	device_t acpi_dev, child, *devlist;
3156 	struct pci_devinfo *dinfo;
3157 
3158 	/*
3159 	 * Set each child to D0 and restore its PCI configuration space.
3160 	 */
3161 	acpi_dev = NULL;
3162 	if (pci_do_power_resume)
3163 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3164 	device_get_children(dev, &devlist, &numdevs);
3165 	for (i = 0; i < numdevs; i++) {
3166 		/*
3167 		 * Notify ACPI we're going to D0 but ignore the result.  If
3168 		 * ACPI is not present, the firmware is responsible for
3169 		 * managing device power.  Only manage type 0 devices for now.
3170 		 */
3171 		child = devlist[i];
3172 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3173 		if (acpi_dev && device_is_attached(child) &&
3174 		    dinfo->cfg.hdrtype == 0) {
3175 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
3176 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
3177 		}
3178 
3179 		/* Now the device is powered up, restore its config space. */
3180 		pci_cfg_restore(child, dinfo);
3181 	}
3182 	kfree(devlist, M_TEMP);
3183 	return (bus_generic_resume(dev));
3184 }
3185 
3186 static void
3187 pci_load_vendor_data(void)
3188 {
3189 	caddr_t vendordata, info;
3190 
3191 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3192 		info = preload_search_info(vendordata, MODINFO_ADDR);
3193 		pci_vendordata = *(char **)info;
3194 		info = preload_search_info(vendordata, MODINFO_SIZE);
3195 		pci_vendordata_size = *(size_t *)info;
3196 		/* terminate the database */
3197 		pci_vendordata[pci_vendordata_size] = '\n';
3198 	}
3199 }
3200 
3201 void
3202 pci_driver_added(device_t dev, driver_t *driver)
3203 {
3204 	int numdevs;
3205 	device_t *devlist;
3206 	device_t child;
3207 	struct pci_devinfo *dinfo;
3208 	int i;
3209 
3210 	if (bootverbose)
3211 		device_printf(dev, "driver added\n");
3212 	DEVICE_IDENTIFY(driver, dev);
3213 	device_get_children(dev, &devlist, &numdevs);
3214 	for (i = 0; i < numdevs; i++) {
3215 		child = devlist[i];
3216 		if (device_get_state(child) != DS_NOTPRESENT)
3217 			continue;
3218 		dinfo = device_get_ivars(child);
3219 		pci_print_verbose(dinfo);
3220 		if (bootverbose)
3221 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3222 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3223 			    dinfo->cfg.func);
3224 		pci_cfg_restore(child, dinfo);
3225 		if (device_probe_and_attach(child) != 0)
3226 			pci_cfg_save(child, dinfo, 1);
3227 	}
3228 	kfree(devlist, M_TEMP);
3229 }
3230 
3231 static void
3232 pci_child_detached(device_t parent __unused, device_t child)
3233 {
3234 	/* Turn child's power off */
3235 	pci_cfg_save(child, device_get_ivars(child), 1);
3236 }
3237 
3238 int
3239 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3240     driver_intr_t *intr, void *arg, void **cookiep,
3241     lwkt_serialize_t serializer, const char *desc)
3242 {
3243 	int rid, error;
3244 	void *cookie;
3245 
3246 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3247 	    arg, &cookie, serializer, desc);
3248 	if (error)
3249 		return (error);
3250 
3251 	/* If this is not a direct child, just bail out. */
3252 	if (device_get_parent(child) != dev) {
3253 		*cookiep = cookie;
3254 		return(0);
3255 	}
3256 
3257 	rid = rman_get_rid(irq);
3258 	if (rid == 0) {
3259 		/* Make sure that INTx is enabled */
3260 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3261 	} else {
3262 		struct pci_devinfo *dinfo = device_get_ivars(child);
3263 		uint64_t addr;
3264 		uint32_t data;
3265 
3266 		/*
3267 		 * Check to see if the interrupt is MSI or MSI-X.
3268 		 * Ask our parent to map the MSI and give
3269 		 * us the address and data register values.
3270 		 * If we fail for some reason, teardown the
3271 		 * interrupt handler.
3272 		 */
3273 		if (dinfo->cfg.msi.msi_alloc > 0) {
3274 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3275 
3276 			if (msi->msi_addr == 0) {
3277 				KASSERT(msi->msi_handlers == 0,
3278 			    ("MSI has handlers, but vectors not mapped"));
3279 				error = PCIB_MAP_MSI(device_get_parent(dev),
3280 				    child, rman_get_start(irq), &addr, &data,
3281 				    rman_get_cpuid(irq));
3282 				if (error)
3283 					goto bad;
3284 				msi->msi_addr = addr;
3285 				msi->msi_data = data;
3286 				pci_enable_msi(child, addr, data);
3287 			}
3288 			msi->msi_handlers++;
3289 		} else {
3290 			struct msix_vector *mv;
3291 			u_int vector;
3292 
3293 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3294 			    ("No MSI-X or MSI rid %d allocated", rid));
3295 
3296 			mv = pci_find_msix_vector(child, rid);
3297 			KASSERT(mv != NULL,
3298 			    ("MSI-X rid %d is not allocated", rid));
3299 			KASSERT(mv->mv_address == 0,
3300 			    ("MSI-X rid %d has been setup", rid));
3301 
3302 			error = PCIB_MAP_MSI(device_get_parent(dev),
3303 			    child, rman_get_start(irq), &addr, &data,
3304 			    rman_get_cpuid(irq));
3305 			if (error)
3306 				goto bad;
3307 			mv->mv_address = addr;
3308 			mv->mv_data = data;
3309 
3310 			vector = PCI_MSIX_RID2VEC(rid);
3311 			pci_setup_msix_vector(child, vector,
3312 			    mv->mv_address, mv->mv_data);
3313 			pci_unmask_msix_vector(child, vector);
3314 		}
3315 
3316 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3317 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3318 	bad:
3319 		if (error) {
3320 			(void)bus_generic_teardown_intr(dev, child, irq,
3321 			    cookie);
3322 			return (error);
3323 		}
3324 	}
3325 	*cookiep = cookie;
3326 	return (0);
3327 }
3328 
3329 int
3330 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3331     void *cookie)
3332 {
3333 	int rid, error;
3334 
3335 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3336 		return (EINVAL);
3337 
3338 	/* If this isn't a direct child, just bail out */
3339 	if (device_get_parent(child) != dev)
3340 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3341 
3342 	rid = rman_get_rid(irq);
3343 	if (rid == 0) {
3344 		/* Mask INTx */
3345 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3346 	} else {
3347 		struct pci_devinfo *dinfo = device_get_ivars(child);
3348 
3349 		/*
3350 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3351 		 * decrement the appropriate handlers count and mask the
3352 		 * MSI-X message, or disable MSI messages if the count
3353 		 * drops to 0.
3354 		 */
3355 		if (dinfo->cfg.msi.msi_alloc > 0) {
3356 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3357 
3358 			KASSERT(rid <= msi->msi_alloc,
3359 			    ("MSI-X index too high"));
3360 			KASSERT(msi->msi_handlers > 0,
3361 			    ("MSI rid %d is not setup", rid));
3362 
3363 			msi->msi_handlers--;
3364 			if (msi->msi_handlers == 0)
3365 				pci_disable_msi(child);
3366 		} else {
3367 			struct msix_vector *mv;
3368 
3369 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3370 			    ("No MSI or MSI-X rid %d allocated", rid));
3371 
3372 			mv = pci_find_msix_vector(child, rid);
3373 			KASSERT(mv != NULL,
3374 			    ("MSI-X rid %d is not allocated", rid));
3375 			KASSERT(mv->mv_address != 0,
3376 			    ("MSI-X rid %d has not been setup", rid));
3377 
3378 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3379 			mv->mv_address = 0;
3380 			mv->mv_data = 0;
3381 		}
3382 	}
3383 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3384 	if (rid > 0)
3385 		KASSERT(error == 0,
3386 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3387 	return (error);
3388 }
3389 
3390 int
3391 pci_print_child(device_t dev, device_t child)
3392 {
3393 	struct pci_devinfo *dinfo;
3394 	struct resource_list *rl;
3395 	int retval = 0;
3396 
3397 	dinfo = device_get_ivars(child);
3398 	rl = &dinfo->resources;
3399 
3400 	retval += bus_print_child_header(dev, child);
3401 
3402 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3403 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3404 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3405 	if (device_get_flags(dev))
3406 		retval += kprintf(" flags %#x", device_get_flags(dev));
3407 
3408 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3409 	    pci_get_function(child));
3410 
3411 	retval += bus_print_child_footer(dev, child);
3412 
3413 	return (retval);
3414 }
3415 
3416 static struct
3417 {
3418 	int	class;
3419 	int	subclass;
3420 	char	*desc;
3421 } pci_nomatch_tab[] = {
3422 	{PCIC_OLD,		-1,			"old"},
3423 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3424 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3425 	{PCIC_STORAGE,		-1,			"mass storage"},
3426 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3427 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3428 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3429 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3430 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3431 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3432 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3433 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3434 	{PCIC_NETWORK,		-1,			"network"},
3435 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3436 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3437 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3438 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3439 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3440 	{PCIC_DISPLAY,		-1,			"display"},
3441 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3442 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3443 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3444 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3445 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3446 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3447 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3448 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3449 	{PCIC_MEMORY,		-1,			"memory"},
3450 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3451 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3452 	{PCIC_BRIDGE,		-1,			"bridge"},
3453 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3454 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3455 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3456 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3457 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3458 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3459 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3460 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3461 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3462 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3463 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3464 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3465 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3466 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3467 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3468 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3469 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3470 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3471 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3472 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3473 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3474 	{PCIC_INPUTDEV,		-1,			"input device"},
3475 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3476 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3477 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3478 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3479 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3480 	{PCIC_DOCKING,		-1,			"docking station"},
3481 	{PCIC_PROCESSOR,	-1,			"processor"},
3482 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3483 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3484 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3485 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3486 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3487 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3488 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3489 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3490 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3491 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3492 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3493 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3494 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3495 	{PCIC_SATCOM,		-1,			"satellite communication"},
3496 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3497 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3498 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3499 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3500 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3501 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3502 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3503 	{PCIC_DASP,		-1,			"dasp"},
3504 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3505 	{0, 0,		NULL}
3506 };
3507 
3508 void
3509 pci_probe_nomatch(device_t dev, device_t child)
3510 {
3511 	int	i;
3512 	char	*cp, *scp, *device;
3513 
3514 	/*
3515 	 * Look for a listing for this device in a loaded device database.
3516 	 */
3517 	if ((device = pci_describe_device(child)) != NULL) {
3518 		device_printf(dev, "<%s>", device);
3519 		kfree(device, M_DEVBUF);
3520 	} else {
3521 		/*
3522 		 * Scan the class/subclass descriptions for a general
3523 		 * description.
3524 		 */
3525 		cp = "unknown";
3526 		scp = NULL;
3527 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3528 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3529 				if (pci_nomatch_tab[i].subclass == -1) {
3530 					cp = pci_nomatch_tab[i].desc;
3531 				} else if (pci_nomatch_tab[i].subclass ==
3532 				    pci_get_subclass(child)) {
3533 					scp = pci_nomatch_tab[i].desc;
3534 				}
3535 			}
3536 		}
3537 		device_printf(dev, "<%s%s%s>",
3538 		    cp ? cp : "",
3539 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3540 		    scp ? scp : "");
3541 	}
3542 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3543 		pci_get_vendor(child), pci_get_device(child),
3544 		pci_get_slot(child), pci_get_function(child));
3545 	if (pci_get_intpin(child) > 0) {
3546 		int irq;
3547 
3548 		irq = pci_get_irq(child);
3549 		if (PCI_INTERRUPT_VALID(irq))
3550 			kprintf(" irq %d", irq);
3551 	}
3552 	kprintf("\n");
3553 
3554 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3555 }
3556 
3557 /*
3558  * Parse the PCI device database, if loaded, and return a pointer to a
3559  * description of the device.
3560  *
3561  * The database is flat text formatted as follows:
3562  *
3563  * Any line not in a valid format is ignored.
3564  * Lines are terminated with newline '\n' characters.
3565  *
3566  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3567  * the vendor name.
3568  *
3569  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3570  * - devices cannot be listed without a corresponding VENDOR line.
3571  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3572  * another TAB, then the device name.
3573  */
3574 
3575 /*
3576  * Assuming (ptr) points to the beginning of a line in the database,
3577  * return the vendor or device and description of the next entry.
3578  * The value of (vendor) or (device) inappropriate for the entry type
3579  * is set to -1.  Returns nonzero at the end of the database.
3580  *
3581  * Note that this is slightly unrobust in the face of corrupt data;
3582  * we attempt to safeguard against this by spamming the end of the
3583  * database with a newline when we initialise.
3584  */
3585 static int
3586 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3587 {
3588 	char	*cp = *ptr;
3589 	int	left;
3590 
3591 	*device = -1;
3592 	*vendor = -1;
3593 	**desc = '\0';
3594 	for (;;) {
3595 		left = pci_vendordata_size - (cp - pci_vendordata);
3596 		if (left <= 0) {
3597 			*ptr = cp;
3598 			return(1);
3599 		}
3600 
3601 		/* vendor entry? */
3602 		if (*cp != '\t' &&
3603 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3604 			break;
3605 		/* device entry? */
3606 		if (*cp == '\t' &&
3607 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3608 			break;
3609 
3610 		/* skip to next line */
3611 		while (*cp != '\n' && left > 0) {
3612 			cp++;
3613 			left--;
3614 		}
3615 		if (*cp == '\n') {
3616 			cp++;
3617 			left--;
3618 		}
3619 	}
3620 	/* skip to next line */
3621 	while (*cp != '\n' && left > 0) {
3622 		cp++;
3623 		left--;
3624 	}
3625 	if (*cp == '\n' && left > 0)
3626 		cp++;
3627 	*ptr = cp;
3628 	return(0);
3629 }
3630 
3631 static char *
3632 pci_describe_device(device_t dev)
3633 {
3634 	int	vendor, device;
3635 	char	*desc, *vp, *dp, *line;
3636 
3637 	desc = vp = dp = NULL;
3638 
3639 	/*
3640 	 * If we have no vendor data, we can't do anything.
3641 	 */
3642 	if (pci_vendordata == NULL)
3643 		goto out;
3644 
3645 	/*
3646 	 * Scan the vendor data looking for this device
3647 	 */
3648 	line = pci_vendordata;
3649 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3650 		goto out;
3651 	for (;;) {
3652 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3653 			goto out;
3654 		if (vendor == pci_get_vendor(dev))
3655 			break;
3656 	}
3657 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3658 		goto out;
3659 	for (;;) {
3660 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3661 			*dp = 0;
3662 			break;
3663 		}
3664 		if (vendor != -1) {
3665 			*dp = 0;
3666 			break;
3667 		}
3668 		if (device == pci_get_device(dev))
3669 			break;
3670 	}
3671 	if (dp[0] == '\0')
3672 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3673 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3674 	    NULL)
3675 		ksprintf(desc, "%s, %s", vp, dp);
3676  out:
3677 	if (vp != NULL)
3678 		kfree(vp, M_DEVBUF);
3679 	if (dp != NULL)
3680 		kfree(dp, M_DEVBUF);
3681 	return(desc);
3682 }
3683 
3684 int
3685 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3686 {
3687 	struct pci_devinfo *dinfo;
3688 	pcicfgregs *cfg;
3689 
3690 	dinfo = device_get_ivars(child);
3691 	cfg = &dinfo->cfg;
3692 
3693 	switch (which) {
3694 	case PCI_IVAR_ETHADDR:
3695 		/*
3696 		 * The generic accessor doesn't deal with failure, so
3697 		 * we set the return value, then return an error.
3698 		 */
3699 		*((uint8_t **) result) = NULL;
3700 		return (EINVAL);
3701 	case PCI_IVAR_SUBVENDOR:
3702 		*result = cfg->subvendor;
3703 		break;
3704 	case PCI_IVAR_SUBDEVICE:
3705 		*result = cfg->subdevice;
3706 		break;
3707 	case PCI_IVAR_VENDOR:
3708 		*result = cfg->vendor;
3709 		break;
3710 	case PCI_IVAR_DEVICE:
3711 		*result = cfg->device;
3712 		break;
3713 	case PCI_IVAR_DEVID:
3714 		*result = (cfg->device << 16) | cfg->vendor;
3715 		break;
3716 	case PCI_IVAR_CLASS:
3717 		*result = cfg->baseclass;
3718 		break;
3719 	case PCI_IVAR_SUBCLASS:
3720 		*result = cfg->subclass;
3721 		break;
3722 	case PCI_IVAR_PROGIF:
3723 		*result = cfg->progif;
3724 		break;
3725 	case PCI_IVAR_REVID:
3726 		*result = cfg->revid;
3727 		break;
3728 	case PCI_IVAR_INTPIN:
3729 		*result = cfg->intpin;
3730 		break;
3731 	case PCI_IVAR_IRQ:
3732 		*result = cfg->intline;
3733 		break;
3734 	case PCI_IVAR_DOMAIN:
3735 		*result = cfg->domain;
3736 		break;
3737 	case PCI_IVAR_BUS:
3738 		*result = cfg->bus;
3739 		break;
3740 	case PCI_IVAR_SLOT:
3741 		*result = cfg->slot;
3742 		break;
3743 	case PCI_IVAR_FUNCTION:
3744 		*result = cfg->func;
3745 		break;
3746 	case PCI_IVAR_CMDREG:
3747 		*result = cfg->cmdreg;
3748 		break;
3749 	case PCI_IVAR_CACHELNSZ:
3750 		*result = cfg->cachelnsz;
3751 		break;
3752 	case PCI_IVAR_MINGNT:
3753 		*result = cfg->mingnt;
3754 		break;
3755 	case PCI_IVAR_MAXLAT:
3756 		*result = cfg->maxlat;
3757 		break;
3758 	case PCI_IVAR_LATTIMER:
3759 		*result = cfg->lattimer;
3760 		break;
3761 	case PCI_IVAR_PCIXCAP_PTR:
3762 		*result = cfg->pcix.pcix_ptr;
3763 		break;
3764 	case PCI_IVAR_PCIECAP_PTR:
3765 		*result = cfg->expr.expr_ptr;
3766 		break;
3767 	case PCI_IVAR_VPDCAP_PTR:
3768 		*result = cfg->vpd.vpd_reg;
3769 		break;
3770 	default:
3771 		return (ENOENT);
3772 	}
3773 	return (0);
3774 }
3775 
3776 int
3777 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3778 {
3779 	struct pci_devinfo *dinfo;
3780 
3781 	dinfo = device_get_ivars(child);
3782 
3783 	switch (which) {
3784 	case PCI_IVAR_INTPIN:
3785 		dinfo->cfg.intpin = value;
3786 		return (0);
3787 	case PCI_IVAR_ETHADDR:
3788 	case PCI_IVAR_SUBVENDOR:
3789 	case PCI_IVAR_SUBDEVICE:
3790 	case PCI_IVAR_VENDOR:
3791 	case PCI_IVAR_DEVICE:
3792 	case PCI_IVAR_DEVID:
3793 	case PCI_IVAR_CLASS:
3794 	case PCI_IVAR_SUBCLASS:
3795 	case PCI_IVAR_PROGIF:
3796 	case PCI_IVAR_REVID:
3797 	case PCI_IVAR_IRQ:
3798 	case PCI_IVAR_DOMAIN:
3799 	case PCI_IVAR_BUS:
3800 	case PCI_IVAR_SLOT:
3801 	case PCI_IVAR_FUNCTION:
3802 		return (EINVAL);	/* disallow for now */
3803 
3804 	default:
3805 		return (ENOENT);
3806 	}
3807 }
3808 #ifdef notyet
3809 #include "opt_ddb.h"
3810 #ifdef DDB
3811 #include <ddb/ddb.h>
3812 #include <sys/cons.h>
3813 
3814 /*
3815  * List resources based on pci map registers, used for within ddb
3816  */
3817 
3818 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3819 {
3820 	struct pci_devinfo *dinfo;
3821 	struct devlist *devlist_head;
3822 	struct pci_conf *p;
3823 	const char *name;
3824 	int i, error, none_count;
3825 
3826 	none_count = 0;
3827 	/* get the head of the device queue */
3828 	devlist_head = &pci_devq;
3829 
3830 	/*
3831 	 * Go through the list of devices and print out devices
3832 	 */
3833 	for (error = 0, i = 0,
3834 	     dinfo = STAILQ_FIRST(devlist_head);
3835 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3836 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3837 
3838 		/* Populate pd_name and pd_unit */
3839 		name = NULL;
3840 		if (dinfo->cfg.dev)
3841 			name = device_get_name(dinfo->cfg.dev);
3842 
3843 		p = &dinfo->conf;
3844 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3845 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3846 			(name && *name) ? name : "none",
3847 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3848 			none_count++,
3849 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3850 			p->pc_sel.pc_func, (p->pc_class << 16) |
3851 			(p->pc_subclass << 8) | p->pc_progif,
3852 			(p->pc_subdevice << 16) | p->pc_subvendor,
3853 			(p->pc_device << 16) | p->pc_vendor,
3854 			p->pc_revid, p->pc_hdr);
3855 	}
3856 }
3857 #endif /* DDB */
3858 #endif
3859 
3860 static struct resource *
3861 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3862     u_long start, u_long end, u_long count, u_int flags)
3863 {
3864 	struct pci_devinfo *dinfo = device_get_ivars(child);
3865 	struct resource_list *rl = &dinfo->resources;
3866 	struct resource_list_entry *rle;
3867 	struct resource *res;
3868 	pci_addr_t map, testval;
3869 	int mapsize;
3870 
3871 	/*
3872 	 * Weed out the bogons, and figure out how large the BAR/map
3873 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3874 	 * Note: atapci in legacy mode are special and handled elsewhere
3875 	 * in the code.  If you have a atapci device in legacy mode and
3876 	 * it fails here, that other code is broken.
3877 	 */
3878 	res = NULL;
3879 	map = pci_read_config(child, *rid, 4);
3880 	pci_write_config(child, *rid, 0xffffffff, 4);
3881 	testval = pci_read_config(child, *rid, 4);
3882 	if (pci_maprange(testval) == 64)
3883 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3884 	if (pci_mapbase(testval) == 0)
3885 		goto out;
3886 
3887 	/*
3888 	 * Restore the original value of the BAR.  We may have reprogrammed
3889 	 * the BAR of the low-level console device and when booting verbose,
3890 	 * we need the console device addressable.
3891 	 */
3892 	pci_write_config(child, *rid, map, 4);
3893 
3894 	if (PCI_BAR_MEM(testval)) {
3895 		if (type != SYS_RES_MEMORY) {
3896 			if (bootverbose)
3897 				device_printf(dev,
3898 				    "child %s requested type %d for rid %#x,"
3899 				    " but the BAR says it is an memio\n",
3900 				    device_get_nameunit(child), type, *rid);
3901 			goto out;
3902 		}
3903 	} else {
3904 		if (type != SYS_RES_IOPORT) {
3905 			if (bootverbose)
3906 				device_printf(dev,
3907 				    "child %s requested type %d for rid %#x,"
3908 				    " but the BAR says it is an ioport\n",
3909 				    device_get_nameunit(child), type, *rid);
3910 			goto out;
3911 		}
3912 	}
3913 	/*
3914 	 * For real BARs, we need to override the size that
3915 	 * the driver requests, because that's what the BAR
3916 	 * actually uses and we would otherwise have a
3917 	 * situation where we might allocate the excess to
3918 	 * another driver, which won't work.
3919 	 */
3920 	mapsize = pci_mapsize(testval);
3921 	count = 1UL << mapsize;
3922 	if (RF_ALIGNMENT(flags) < mapsize)
3923 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3924 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3925 		flags |= RF_PREFETCHABLE;
3926 
3927 	/*
3928 	 * Allocate enough resource, and then write back the
3929 	 * appropriate bar for that resource.
3930 	 */
3931 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3932 	    start, end, count, flags, -1);
3933 	if (res == NULL) {
3934 		device_printf(child,
3935 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3936 		    count, *rid, type, start, end);
3937 		goto out;
3938 	}
3939 	resource_list_add(rl, type, *rid, start, end, count, -1);
3940 	rle = resource_list_find(rl, type, *rid);
3941 	if (rle == NULL)
3942 		panic("pci_alloc_map: unexpectedly can't find resource.");
3943 	rle->res = res;
3944 	rle->start = rman_get_start(res);
3945 	rle->end = rman_get_end(res);
3946 	rle->count = count;
3947 	if (bootverbose)
3948 		device_printf(child,
3949 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3950 		    count, *rid, type, rman_get_start(res));
3951 	map = rman_get_start(res);
3952 out:;
3953 	pci_write_config(child, *rid, map, 4);
3954 	if (pci_maprange(testval) == 64)
3955 		pci_write_config(child, *rid + 4, map >> 32, 4);
3956 	return (res);
3957 }
3958 
3959 
3960 struct resource *
3961 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3962     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3963 {
3964 	struct pci_devinfo *dinfo = device_get_ivars(child);
3965 	struct resource_list *rl = &dinfo->resources;
3966 	struct resource_list_entry *rle;
3967 	pcicfgregs *cfg = &dinfo->cfg;
3968 
3969 	/*
3970 	 * Perform lazy resource allocation
3971 	 */
3972 	if (device_get_parent(child) == dev) {
3973 		switch (type) {
3974 		case SYS_RES_IRQ:
3975 			/*
3976 			 * Can't alloc legacy interrupt once MSI messages
3977 			 * have been allocated.
3978 			 */
3979 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3980 			    cfg->msix.msix_alloc > 0))
3981 				return (NULL);
3982 			/*
3983 			 * If the child device doesn't have an
3984 			 * interrupt routed and is deserving of an
3985 			 * interrupt, try to assign it one.
3986 			 */
3987 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3988 			    (cfg->intpin != 0))
3989 				pci_assign_interrupt(dev, child, 0);
3990 			break;
3991 		case SYS_RES_IOPORT:
3992 		case SYS_RES_MEMORY:
3993 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3994 				/*
3995 				 * Enable the I/O mode.  We should
3996 				 * also be assigning resources too
3997 				 * when none are present.  The
3998 				 * resource_list_alloc kind of sorta does
3999 				 * this...
4000 				 */
4001 				if (PCI_ENABLE_IO(dev, child, type))
4002 					return (NULL);
4003 			}
4004 			rle = resource_list_find(rl, type, *rid);
4005 			if (rle == NULL)
4006 				return (pci_alloc_map(dev, child, type, rid,
4007 				    start, end, count, flags));
4008 			break;
4009 		}
4010 		/*
4011 		 * If we've already allocated the resource, then
4012 		 * return it now.  But first we may need to activate
4013 		 * it, since we don't allocate the resource as active
4014 		 * above.  Normally this would be done down in the
4015 		 * nexus, but since we short-circuit that path we have
4016 		 * to do its job here.  Not sure if we should kfree the
4017 		 * resource if it fails to activate.
4018 		 */
4019 		rle = resource_list_find(rl, type, *rid);
4020 		if (rle != NULL && rle->res != NULL) {
4021 			if (bootverbose)
4022 				device_printf(child,
4023 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
4024 				    rman_get_size(rle->res), *rid, type,
4025 				    rman_get_start(rle->res));
4026 			if ((flags & RF_ACTIVE) &&
4027 			    bus_generic_activate_resource(dev, child, type,
4028 			    *rid, rle->res) != 0)
4029 				return (NULL);
4030 			return (rle->res);
4031 		}
4032 	}
4033 	return (resource_list_alloc(rl, dev, child, type, rid,
4034 	    start, end, count, flags, cpuid));
4035 }
4036 
4037 void
4038 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4039 {
4040 	struct pci_devinfo *dinfo;
4041 	struct resource_list *rl;
4042 	struct resource_list_entry *rle;
4043 
4044 	if (device_get_parent(child) != dev)
4045 		return;
4046 
4047 	dinfo = device_get_ivars(child);
4048 	rl = &dinfo->resources;
4049 	rle = resource_list_find(rl, type, rid);
4050 	if (rle) {
4051 		if (rle->res) {
4052 			if (rman_get_device(rle->res) != dev ||
4053 			    rman_get_flags(rle->res) & RF_ACTIVE) {
4054 				device_printf(dev, "delete_resource: "
4055 				    "Resource still owned by child, oops. "
4056 				    "(type=%d, rid=%d, addr=%lx)\n",
4057 				    rle->type, rle->rid,
4058 				    rman_get_start(rle->res));
4059 				return;
4060 			}
4061 			bus_release_resource(dev, type, rid, rle->res);
4062 		}
4063 		resource_list_delete(rl, type, rid);
4064 	}
4065 	/*
4066 	 * Why do we turn off the PCI configuration BAR when we delete a
4067 	 * resource? -- imp
4068 	 */
4069 	pci_write_config(child, rid, 0, 4);
4070 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
4071 }
4072 
4073 struct resource_list *
4074 pci_get_resource_list (device_t dev, device_t child)
4075 {
4076 	struct pci_devinfo *dinfo = device_get_ivars(child);
4077 
4078 	if (dinfo == NULL)
4079 		return (NULL);
4080 
4081 	return (&dinfo->resources);
4082 }
4083 
4084 uint32_t
4085 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4086 {
4087 	struct pci_devinfo *dinfo = device_get_ivars(child);
4088 	pcicfgregs *cfg = &dinfo->cfg;
4089 
4090 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4091 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4092 }
4093 
4094 void
4095 pci_write_config_method(device_t dev, device_t child, int reg,
4096     uint32_t val, int width)
4097 {
4098 	struct pci_devinfo *dinfo = device_get_ivars(child);
4099 	pcicfgregs *cfg = &dinfo->cfg;
4100 
4101 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4102 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4103 }
4104 
4105 int
4106 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4107     size_t buflen)
4108 {
4109 
4110 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4111 	    pci_get_function(child));
4112 	return (0);
4113 }
4114 
4115 int
4116 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4117     size_t buflen)
4118 {
4119 	struct pci_devinfo *dinfo;
4120 	pcicfgregs *cfg;
4121 
4122 	dinfo = device_get_ivars(child);
4123 	cfg = &dinfo->cfg;
4124 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4125 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4126 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4127 	    cfg->progif);
4128 	return (0);
4129 }
4130 
4131 int
4132 pci_assign_interrupt_method(device_t dev, device_t child)
4133 {
4134 	struct pci_devinfo *dinfo = device_get_ivars(child);
4135 	pcicfgregs *cfg = &dinfo->cfg;
4136 
4137 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4138 	    cfg->intpin));
4139 }
4140 
4141 static int
4142 pci_modevent(module_t mod, int what, void *arg)
4143 {
4144 	static struct cdev *pci_cdev;
4145 
4146 	switch (what) {
4147 	case MOD_LOAD:
4148 		STAILQ_INIT(&pci_devq);
4149 		pci_generation = 0;
4150 		pci_cdev = make_dev(&pci_ops, 0, UID_ROOT, GID_WHEEL, 0644,
4151 				    "pci");
4152 		pci_load_vendor_data();
4153 		break;
4154 
4155 	case MOD_UNLOAD:
4156 		destroy_dev(pci_cdev);
4157 		break;
4158 	}
4159 
4160 	return (0);
4161 }
4162 
4163 void
4164 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4165 {
4166 	int i;
4167 
4168 	/*
4169 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4170 	 * which we know need special treatment.  Type 2 devices are
4171 	 * cardbus bridges which also require special treatment.
4172 	 * Other types are unknown, and we err on the side of safety
4173 	 * by ignoring them.
4174 	 */
4175 	if (dinfo->cfg.hdrtype != 0)
4176 		return;
4177 
4178 	/*
4179 	 * Restore the device to full power mode.  We must do this
4180 	 * before we restore the registers because moving from D3 to
4181 	 * D0 will cause the chip's BARs and some other registers to
4182 	 * be reset to some unknown power on reset values.  Cut down
4183 	 * the noise on boot by doing nothing if we are already in
4184 	 * state D0.
4185 	 */
4186 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4187 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4188 	}
4189 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4190 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4191 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4192 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4193 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4194 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4195 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4196 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4197 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4198 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4199 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4200 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4201 
4202 	/* Restore MSI and MSI-X configurations if they are present. */
4203 	if (dinfo->cfg.msi.msi_location != 0)
4204 		pci_resume_msi(dev);
4205 	if (dinfo->cfg.msix.msix_location != 0)
4206 		pci_resume_msix(dev);
4207 }
4208 
4209 void
4210 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4211 {
4212 	int i;
4213 	uint32_t cls;
4214 	int ps;
4215 
4216 	/*
4217 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4218 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4219 	 * which also require special treatment.  Other types are unknown, and
4220 	 * we err on the side of safety by ignoring them.  Powering down
4221 	 * bridges should not be undertaken lightly.
4222 	 */
4223 	if (dinfo->cfg.hdrtype != 0)
4224 		return;
4225 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4226 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4227 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4228 
4229 	/*
4230 	 * Some drivers apparently write to these registers w/o updating our
4231 	 * cached copy.  No harm happens if we update the copy, so do so here
4232 	 * so we can restore them.  The COMMAND register is modified by the
4233 	 * bus w/o updating the cache.  This should represent the normally
4234 	 * writable portion of the 'defined' part of type 0 headers.  In
4235 	 * theory we also need to save/restore the PCI capability structures
4236 	 * we know about, but apart from power we don't know any that are
4237 	 * writable.
4238 	 */
4239 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4240 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4241 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4242 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4243 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4244 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4245 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4246 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4247 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4248 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4249 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4250 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4251 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4252 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4253 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4254 
4255 	/*
4256 	 * don't set the state for display devices, base peripherals and
4257 	 * memory devices since bad things happen when they are powered down.
4258 	 * We should (a) have drivers that can easily detach and (b) use
4259 	 * generic drivers for these devices so that some device actually
4260 	 * attaches.  We need to make sure that when we implement (a) we don't
4261 	 * power the device down on a reattach.
4262 	 */
4263 	cls = pci_get_class(dev);
4264 	if (!setstate)
4265 		return;
4266 	switch (pci_do_power_nodriver)
4267 	{
4268 		case 0:		/* NO powerdown at all */
4269 			return;
4270 		case 1:		/* Conservative about what to power down */
4271 			if (cls == PCIC_STORAGE)
4272 				return;
4273 			/*FALLTHROUGH*/
4274 		case 2:		/* Agressive about what to power down */
4275 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4276 			    cls == PCIC_BASEPERIPH)
4277 				return;
4278 			/*FALLTHROUGH*/
4279 		case 3:		/* Power down everything */
4280 			break;
4281 	}
4282 	/*
4283 	 * PCI spec says we can only go into D3 state from D0 state.
4284 	 * Transition from D[12] into D0 before going to D3 state.
4285 	 */
4286 	ps = pci_get_powerstate(dev);
4287 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4288 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4289 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4290 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4291 }
4292 
4293 #ifdef COMPAT_OLDPCI
4294 
4295 /*
4296  * Locate the parent of a PCI device by scanning the PCI devlist
4297  * and return the entry for the parent.
4298  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4299  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4300  */
4301 pcicfgregs *
4302 pci_devlist_get_parent(pcicfgregs *cfg)
4303 {
4304 	struct devlist *devlist_head;
4305 	struct pci_devinfo *dinfo;
4306 	pcicfgregs *bridge_cfg;
4307 	int i;
4308 
4309 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4310 
4311 	/* If the device is on PCI bus 0, look for the host */
4312 	if (cfg->bus == 0) {
4313 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4314 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4315 			bridge_cfg = &dinfo->cfg;
4316 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4317 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4318 		    		&& bridge_cfg->bus == cfg->bus) {
4319 				return bridge_cfg;
4320 			}
4321 		}
4322 	}
4323 
4324 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4325 	if (cfg->bus > 0) {
4326 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4327 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4328 			bridge_cfg = &dinfo->cfg;
4329 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4330 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4331 				&& bridge_cfg->secondarybus == cfg->bus) {
4332 				return bridge_cfg;
4333 			}
4334 		}
4335 	}
4336 
4337 	return NULL;
4338 }
4339 
4340 #endif	/* COMPAT_OLDPCI */
4341 
4342 int
4343 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4344 {
4345 	int rid, type;
4346 	u_int flags;
4347 
4348 	rid = 0;
4349 	type = PCI_INTR_TYPE_LEGACY;
4350 	flags = RF_SHAREABLE | RF_ACTIVE;
4351 
4352 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4353 	if (msi_enable) {
4354 		int cpu;
4355 
4356 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4357 		if (cpu >= ncpus)
4358 			cpu = ncpus - 1;
4359 
4360 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4361 			flags &= ~RF_SHAREABLE;
4362 			type = PCI_INTR_TYPE_MSI;
4363 		}
4364 	}
4365 
4366 	*rid0 = rid;
4367 	*flags0 = flags;
4368 
4369 	return type;
4370 }
4371 
4372 /* Wrapper APIs suitable for device driver use. */
4373 void
4374 pci_save_state(device_t dev)
4375 {
4376 	struct pci_devinfo *dinfo;
4377 
4378 	dinfo = device_get_ivars(dev);
4379 	pci_cfg_save(dev, dinfo, 0);
4380 }
4381 
4382 void
4383 pci_restore_state(device_t dev)
4384 {
4385 	struct pci_devinfo *dinfo;
4386 
4387 	dinfo = device_get_ivars(dev);
4388 	pci_cfg_restore(dev, dinfo);
4389 }
4390